import os import openai os.environ['OPENAI_API_KEY'] = "sk-***" openai.api_key = os.environ["OPENAI_API_KEY"] from llama_index import Document, SummaryIndex, VectorStoreIndex, SimpleDirectoryReader from llama_index.tools import QueryEngineTool, ToolMetadata from llama_index.query_engine import SubQuestionQueryEngine from llama_index.callbacks import CallbackManager, LlamaDebugHandler from llama_index import ServiceContext from llama_index.question_gen.llm_generators import LLMQuestionGenerator from qdrant_client import QdrantClient from llama_index.vector_stores.qdrant import QdrantVectorStore # Using the LlamaDebugHandler to print the trace of the sub questions # captured by the SUB_QUESTION callback event type llama_debug = LlamaDebugHandler(print_trace_on_end=True) callback_manager = CallbackManager([llama_debug]) service_context = ServiceContext.from_defaults( callback_manager=callback_manager ) vector_index = VectorStoreIndex.from_documents( documents=[ Document(text="whe have the color green for trees") ], service_context=service_context ) vector_query_engine = vector_index.as_query_engine() # setup base query engine as tool query_engine_tools = [ QueryEngineTool( query_engine=vector_query_engine, metadata=ToolMetadata( name="tree_colors", description="Everything around trees", ), ), ] query_engine = SubQuestionQueryEngine.from_defaults( query_engine_tools=query_engine_tools, service_context=service_context, question_gen=LLMQuestionGenerator.from_defaults( service_context=service_context ) ) response = query_engine.query( "What is the color for trees?" ) print(response) return msg
Traceback (most recent call last): File "/python3.11/site-packages/pydantic/v1/main.py", line 522, in parse_obj obj = dict(obj) ^^^^^^^^^ ValueError: dictionary update sequence element #0 has length 1; 2 is required The above exception was the direct cause of the following exception: Traceback (most recent call last): File "<string>", line 130, in <module> File "<string>", line 116, in python_function File "/python3.11/site-packages/llama_index/core/base_query_engine.py", line 40, in query return self._query(str_or_query_bundle) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/python3.11/site-packages/llama_index/query_engine/sub_question_query_engine.py", line 129, in _query sub_questions = self._question_gen.generate(self._metadatas, query_bundle) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/python3.11/site-packages/llama_index/question_gen/llm_generators.py", line 78, in generate parse = self._prompt.output_parser.parse(prediction) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/python3.11/site-packages/llama_index/question_gen/output_parser.py", line 15, in parse sub_questions = [SubQuestion.parse_obj(item) for item in json_dict] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/python3.11/site-packages/llama_index/question_gen/output_parser.py", line 15, in <listcomp> sub_questions = [SubQuestion.parse_obj(item) for item in json_dict] ^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/python3.11/site-packages/pydantic/v1/main.py", line 525, in parse_obj raise ValidationError([ErrorWrapper(exc, loc=ROOT_KEY)], cls) from e pydantic.v1.error_wrappers.ValidationError: 1 validation error for SubQuestion __root__ SubQuestion expected dict not str (type=type_error)
OpenAIQuestionGenerator
since it uses the function calling apifinal_engine_no_metadata = SubQuestionQueryEngine.from_defaults( query_engine_tools=[ QueryEngineTool( query_engine=engine_no_metadata, metadata=ToolMetadata( name="sec_filing_documents", description="financial information on companies", ), ) ], question_gen=question_gen, use_async=True, )
LLMQuestonGenerator
, use OpenAIQuestionGenerator
from llama_index.question_gen.llm_generators import OpenAIQuestionGenerator, DEFAULT_OPENAI_SUB_QUESTION_PROMPT_TMPL service_context = ServiceContext.from_defaults( llm=llm, text_splitter=text_splitter ) question_gen = OpenAIQuestionGenerator.from_defaults( llm=service_context.llm, prompt_template_str=""" Follow the example, but instead of giving a question, always prefix the question with: 'By first identifying and quoting the most relevant sources, '. """ + DEFAULT_OPENAI_SUB_QUESTION_PROMPT_TMPL, )
ImportError Traceback (most recent call last) <ipython-input-14-24df7212ac1e> in <cell line: 16>() 14 # ) 15 ---> 16 from llama_index.question_gen.llm_generators import OpenAIQuestionGenerator, DEFAULT_OPENAI_SUB_QUESTION_PROMPT_TMPL 17 18 service_context = ServiceContext.from_defaults( ImportError: cannot import name 'OpenAIQuestionGenerator' from 'llama_index.question_gen.llm_generators' (/usr/local/lib/python3.10/dist-packages/llama_index/question_gen/llm_generators.py) --------------------------------------------------------------------------- NOTE: If your import is failing due to a missing package, you can manually install dependencies using either !pip or !apt. To view examples of installing some common dependencies, click the "Open Examples" button below.
from llama_index.question_gen.openai_generator import OpenAIQuestionGenerator, DEFAULT_OPENAI_SUB_QUESTION_PROMPT_TMPL
OpenAIQuestionGenerator
improved the performance.response_no_metadata
with LLMQuestionGenerator
from the original notebook:Generated 4 sub questions. [sec_filing_documents] Q: What was the cost due to research and development for Uber in 2019 [sec_filing_documents] Q: What was the cost due to sales and marketing for Uber in 2019 [sec_filing_documents] Q: What was the cost due to research and development for Lyft in 2019 [sec_filing_documents] Q: What was the cost due to sales and marketing for Lyft in 2019 [sec_filing_documents] A: The cost due to sales and marketing for Uber in 2019 was $814,122 in thousands. [sec_filing_documents] A: The cost due to research and development for Uber in 2019 was $1,505,640 in thousands. [sec_filing_documents] A: The cost of research and development for Lyft in 2019 was $1,505,640 in thousands. [sec_filing_documents] A: The cost due to sales and marketing for Lyft in 2019 was $814,122 in thousands. { "Uber": { "Research and Development": 1505.64, "Sales and Marketing": 814.122 }, "Lyft": { "Research and Development": 1505.64, "Sales and Marketing": 814.122 } }
response_no_metadata
with OpenAIQuestionGenerator
:Generated 4 sub questions. [sec_filing_documents] Q: By first identifying and quoting the most relevant sources, what was the cost due to research and development for Uber in 2019 in millions of USD? [sec_filing_documents] Q: By first identifying and quoting the most relevant sources, what was the cost due to sales and marketing for Uber in 2019 in millions of USD? [sec_filing_documents] Q: By first identifying and quoting the most relevant sources, what was the cost due to research and development for Lyft in 2019 in millions of USD? [sec_filing_documents] Q: By first identifying and quoting the most relevant sources, what was the cost due to sales and marketing for Lyft in 2019 in millions of USD? [sec_filing_documents] A: The cost due to sales and marketing for Lyft in 2019 was $4,626 million. [sec_filing_documents] A: The cost due to sales and marketing for Uber in 2019 was $4,626 million. [sec_filing_documents] A: The cost due to research and development for Uber in 2019 was $4,836 million. [sec_filing_documents] A: The cost due to research and development for Lyft in 2019 was $1,505,640 in thousands of USD, which is equivalent to $1,505.64 million. { "Uber": { "Research and Development": 4836, "Sales and Marketing": 4626 }, "Lyft": { "Research and Development": 1505.64, "Sales and Marketing": 4626 } }