def api_get_chatbot_response(self, project_id, question, index): answer = "" try: persist_dir = self.config.host.path + f"projects/{project_id}/docs/reports/llama_vectordb" # Set your OpenAI API key openai.api_key = "" index = load_index_from_storage( StorageContext.from_defaults(persist_dir=persist_dir) ) qa_prompt = PromptTemplate( ) class RAGStringQueryEngine(CustomQueryEngine): """RAG String Query Engine.""" retriever: BaseRetriever response_synthesizer: BaseSynthesizer llm: OpenAI qa_prompt: PromptTemplate def custom_query(self, query_str: str): nodes = self.retriever.retrieve(query_str) context_str = "\n\n".join([n.node.get_content() for n in nodes]) response = self.llm.complete( qa_prompt.format(context_str=context_str, query_str=query_str) ) return str(response) # configure retriever retriever = VectorIndexRetriever( index=index, similarity_top_k=2, ) # configure response synthesizer response_synthesizer = get_response_synthesizer( streaming=True, response_mode="tree_summarize", ) llm = OpenAI(model="gpt-3.5-turbo") # assemble query engine query_engine = RAGStringQueryEngine( retriever=retriever, response_synthesizer=response_synthesizer, llm=llm, qa_prompt=qa_prompt, ) answer = str(query_engine.query(question)) here I want to load the index separetly and keep it loaded , I will call this api to get the answer everytime
Add a reply
Sign up and join the conversation on Discord
Join on Discord