index.as_chat_engine
and memory
. how to get sources that have been used? streaming_response = as_chat_engine_var.stream_chat()
streaming_response.sources
is en emptyindex.as_query_engine.query()
then we can get sources. but this way doesn't relates to history. it is only for one questionfrom llama_index.core import Document, VectorStoreIndex from llama_index.core.tools import QueryEngineTool from llama_index.agent.openai import OpenAIAgent index = VectorStoreIndex.from_documents([Document.example()]) query_engine = index.as_query_engine() tool = QueryEngineTool.from_defaults(query_engine, name='search', description='Useful for asking questions about LLMs.') agent = OpenAIAgent.from_tools([tool]) async def run(): response = await agent.astream_chat("What are some facts about LLMs?") async for token in response.async_response_gen(): print(str(token), end="", flush=True) print(response.source_nodes) print(response.sources) if __name__ == "__main__": import asyncio asyncio.run(run())
# Create a memory to store the chat history memory = ChatMemoryBuffer.from_defaults(llm=Settings.llm, token_limit=4095, chat_store=SimpleChatStore()) # The simplest case as_chat_engine = self.index.as_chat_engine( # Select the best chat engine based on the current LLM. # Corresponds to `OpenAIAgent` if using an OpenAI model that supports # function calling API, otherwise, corresponds to `ReActAgent` chat_mode=ChatMode.BEST, memory=memory, streaming=True, vector_store_query_mode=VectorStoreQueryMode.SEMANTIC_HYBRID, similarity_top_k=3, text_qa_template=text_qa_template, refine_template=refine_template, )
verbose=True
you can see when the agent makes a tool callreturn AgentRunner.from_llm( tools=[query_engine_tool], llm=llm, **kwargs, )
return OpenAIAgent.from_tools( tools=tools, llm=llm, **kwargs, )
AgentRunner.from_llm()
is just a helper that picks between openai agent and react agent