global_handler.start_trace_params(user_id=request.email_id, tags=[env.ENVIRONMENT, "support-bot"]) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ AttributeError: 'NoneType' object has no attribute 'start_trace_params'
MarkdownReader
is splitting the markdown based on headings eg (`#, code block) . I want to change the strategy of dividing the document chunk. As in my use case the document extracted doesn't have more context due to small chunksQueryFusionRetriever
with CondensePlusContextChatEngine
, where i am having 2 retrievers BM25Retriever
and VectorStoreIndex.from_vector_store
and using langfuse for traces. When using condense plus context chat engine the traces are not well segrated like for multiple retriever, multiple queries and then fusion nodes. Just like well speperated as in index.as_chat_engine
python import os from dotenv import load_dotenv import nest_asyncio load_dotenv() nest_asyncio.apply() from llama_index.core.evaluation import RetrieverEvaluator Settings.embed_model = OpenAIEmbedding(model="text-embedding-ada-002", embed_batch_size=10) llm = OpenAI(model="gpt-4o") client = qdrant_client.QdrantClient( url=os.getenv("QDRANT_URI"), api_key=os.getenv("QDRANT_API_KEY") ) vector_store = QdrantVectorStore(client=client, collection_name="mlofo-loan-officer-july") index = VectorStoreIndex.from_vector_store(vector_store=vector_store) qa_dataset = EmbeddingQAFinetuneDataset.from_json("pg_eval_dataset.json") metrics = ["mrr", "hit_rate"] retriever_evaluator = RetrieverEvaluator.from_metric_names( metrics, retriever=index.as_retriever(similarity_top_k=2) ) sample_id, sample_query = list(qa_dataset.queries.items())[0] sample_expected = qa_dataset.relevant_docs[sample_id] eval_result = retriever_evaluator.evaluate(sample_query, sample_expected) print(eval_result)
nodes = vector_store.get_nodes() qa_dataset = generate_question_context_pairs( nodes, llm=llm, num_questions_per_chunk=2 )
def get_chat_engine() -> "CondensePlusContextChatEngine": Settings.llm = OpenAI(model="gpt-4o", temperature=0.1) index = VectorStoreIndex.from_vector_store(vector_store=vector_store) retriever = index.as_retriever(similarity_top_k=3) retriever = QueryFusionRetriever( [retriever], similarity_top_k=4, num_queries=4, mode="reciprocal_rerank", use_async=True, verbose=True, query_gen_prompt=BOT_QUERY_GEN_PROMPT ) chat_engine = CondensePlusContextChatEngine.from_defaults(retriever=retriever, system_prompt=SUPPORT_BOT_SYSTEM_PROMPT, streaming=True) return chat_engine async def chat(request: ChatRequestBody): try: engine = get_chat_engine() response_stream = engine.stream_chat(message, chat_history=history) return StreamingResponse( stream_generator(response_stream, request.history, request.timezone), media_type="application/x-ndjson", ) except Exception as e: traceback.print_exc() raise HTTPException( status_code=500, detail=f"An error occurred while processing the request. {str(e)}" ) from e
aquery
, aretrieve
, arun
, etc.). Otherwise, use import nest_asyncio; nest_asyncio.apply()
to enable nested async or use in a jupyter notebook.Hyde Query Transform
with a chat engine
i was unable to find implementation with chat engineQdrantVectorStore
i want to modify the metadata received / retrived from the vector store, before i send it to llm to generate the response any tips how can i do thatAzureOpenAI
is very slow as compared OpenAI
, there is about 10x delay in response generation. I have tried with both ReActAgent
& OpenAIAgent
llm = AzureOpenAI( model=os.getenv("AOAI_COMPLETION_MODEL"), deployment_name=os.getenv("AOAI_DEPLOYMENT_NAME_COMPLETION"), api_key=os.getenv("AZURE_OPENAI_API_KEY"), azure_endpoint=os.getenv("AOAI_ENDPOINT"), api_version=os.getenv("AOAI_API_VERSION"), )
query_engine = index.as_query_engine( similarity_top_k=2, sparse_top_k=12, vector_store_query_mode="hybrid" )
QueryFusionRetriever
with CondensePlusContextChatEngine
, where i am having 2 retrievers BM25Retriever
and VectorStoreIndex.from_vector_store
and using langfuse for traces. When using condense plus context chat engine the traces are not well serrated like for multiple retrieverss, multiple queries and then fusion nodes. Just like well separated as in index.as_chat_engine
def get_chat_engine() -> "CondensePlusContextChatEngine": Settings.llm = OpenAI(model="gpt-4o", temperature=0.1) index = VectorStoreIndex.from_vector_store(vector_store=vector_store) retriever = QueryFusionRetriever( [ index.as_retriever(similarity_top_k=3), BM25Retriever.from_defaults(nodes=nodes, similarity_top_k=2, verbose=True), ], similarity_top_k=2, num_queries=2, mode="reciprocal_rerank", use_async=False, verbose=True, query_gen_prompt=BOT_QUERY_GEN_PROMPT, ) chat_engine = CondensePlusContextChatEngine.from_defaults( retriever=retriever, system_prompt=SUPPORT_BOT_SYSTEM_PROMPT, streaming=True ) return chat_engine