def process_retriever_component_fn(self, user_query: str): """Transform the output of the sentence_retriver""" logger.info("Sentence Retriever Output processing...") sentence_retriever = self.index.as_retriever(similarity_top_k=5) nodes = sentence_retriever.retrieve(user_query) with open("first_nodes.txt", mode="w") as f: for node in nodes: f.write(str(node) + "\n") # Create a QueryBundle from the user query query_bundle = QueryBundle(query_str=user_query) logger.info("Relevant Node Retrieved...") logger.info("Starting the reranking process...") postprocessor = LLMRerank(top_n=3, llm=self.llm, choice_batch_size=1) reranked_nodes = postprocessor.postprocess_nodes( nodes=nodes, query_bundle=query_bundle ) contexts = "" with open("second_nodes.txt", mode="w") as f: for reranked_node in reranked_nodes: f.write(str(reranked_node) + "\n") contexts += str(reranked_node) + "\n" return contexts
File "/usr/local/lib/python3.8/dist-packages/llama_index/core/postprocessor/types.py", line 56, in postprocess_nodes return self._postprocess_nodes(nodes, query_bundle) File "/usr/local/lib/python3.8/dist-packages/llama_index/core/instrumentation/dispatcher.py", line 230, in wrapper result = func(*args, **kwargs) File "/usr/local/lib/python3.8/dist-packages/llama_index/core/postprocessor/llm_rerank.py", line 99, in _postprocess_nodes raw_choices, relevances = self._parse_choice_select_answer_fn( File "/usr/local/lib/python3.8/dist-packages/llama_index/core/indices/utils.py", line 104, in default_parse_choice_select_answer_fn answer_num = int(line_tokens[0].split(":")[1].strip()) IndexError: list index out of range
fastapi uvicorn pydantic loguru llama-index-experimental llama-index-embeddings-huggingface llama-index-llms-ollama llama-index-finetuning llama-index-readers-file sqlalchemy
def get_query_pipeline(self): """Create & Return the Query Pipeline of database generation""" qp = QP( modules={ "input": InputComponent(), "process_retriever": self.process_retriever_component, "table_creation_prompt": self.table_creation_prompt, "llm1": self.llm1, "python_output_parser": self.python_parser_component, }, verbose=True, ) qp.add_link("input", "process_retriever") qp.add_link("input", "table_creation_prompt", dest_key="query_str") qp.add_link( "process_retriever", "table_creation_prompt", dest_key="retrieved_nodes" ) qp.add_chain(["table_creation_prompt", "llm1", "python_output_parser"]) return qp
def get_query_pipeline(self): """Create & Return the Query Pipeline of database generation""" qp = QP( modules={ "input": InputComponent(), "process_retriever": self.process_retriever_component, "table_creation_prompt": self.table_creation_prompt, "llm1": self.llm1, "python_output_parser": self.python_parser_component, "table_insert_prompt": self.table_insert_prompt, "llm2": self.llm1, "python_output_parser1": self.python_parser_component, }, verbose=True, ) qp.add_link("input", "process_retriever") qp.add_link("input", "table_creation_prompt", dest_key="query_str") qp.add_link( "process_retriever", "table_creation_prompt", dest_key="retrieved_nodes" ) qp.add_chain(["table_creation_prompt", "llm1", "python_output_parser"]) ... return qp
instruction_str = ( "1. Convert the query to executable Python code using Pandas.\n" "2. The code should represent a solution to the query.\n" "3. PRINT ONLY THE EXPRESSION.\n" ) pandas_prompt_str = ( "You are working with a pandas dataframe in Python.\n" "The name of the dataframe is `df`.\n" "Here is a list of columns from the dataframe `df` with explanations for each column.\n" "{columns_explain}\n\n" "Here's are some relevant example rows (values in the same order as columns above)\n" "{relevant_node}\n\n" "Follow these instructions:\n" "{instruction_str}\n" "Query: {query_str}\n\n" )
Is there a way to force him better ?
Is there a way to improve the performance => Embedding faster.Here's the python code
if not os.path.exists("vector_index"): logger.info("CSVReader working..") reader = CSVReader(concat_rows=False) nodes = reader.load_data(file=Path(path)) logger.info("CSVReader Done..") logger.info("Vectorizing..") index = VectorStoreIndex(nodes, show_progress=True) logger.info("Vectorizing done..") logger.info("Storing..") index.storage_context.persist("vector_index") logger.info("Storing done..") else: logger.info("Loading from the storage..") storage_context = StorageContext.from_defaults(persist_dir="vector_index") index = load_index_from_storage(storage_context)