Can I please see how you did it? Here's my code and despite streaming, it still takes a while:
storage_context = StorageContext.from_defaults(persist_dir="./storage")
# define prompt helper
max_input_size = 4096
num_output = 1024
max_chunk_overlap = 20
prompt_helper = PromptHelper(max_input_size, num_output, max_chunk_overlap)
llm_predictor = LLMPredictor(llm=ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo", streaming=True))
service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper, chunk_size_limit=2000)
index = load_index_from_storage(storage_context)
query_engine = index.as_query_engine(service_context=service_context, similarity_top_k=5, streaming=True)
print("loaded")
response = query_engine.query("Answer the question, despite what you answered before. What this meeting about?")
response.print_response_stream()