# define llm predictor
llm_predictor = LLMPredictor(
llm=ChatOpenAI(
model_name="gpt-3.5-turbo",
max_tokens=1024,
openai_api_key=api_key,
temperature=0.2,
streaming=False,
)
)
# define prompt helper
# set maximum input size
max_input_size = 4096
# set number of output tokens
num_output = 512
# set maximum chunk overlap
max_chunk_overlap = 20
prompt_helper = PromptHelper(max_input_size, num_output, max_chunk_overlap)
service_context = ServiceContext.from_defaults(
llm_predictor=llm_predictor,
prompt_helper=prompt_helper,
chunk_size_limit=2048,
)
index_file = 'indices/index.json'
os.makedirs('indices', exist_ok=True)
if os.path.exists(index_file):
index = GPTSimpleVectorIndex.load_from_disk(index_file, service_context=service_context)
else:
index = GPTSimpleVectorIndex.from_documents(documents, service_context=service_context)
index.save_to_disk(index_file)
result = index.query(
query_text,
text_qa_template=QA_PROMPT,
response_mode="tree_summarize",
similarity_top_k=4,
mode=QueryMode.EMBEDDING,
streaming=False,
)
This is the code I use to load the index and perform queries, which takes more than ten seconds to get an answer. In fact, the answers provided by ChatGPT are not very long.
Am I setting some parameters improperly? I'm very sorry for taking up your time. π