def stream(response_stream): # Iterate over the response generator and send each part for text in response_stream.response_gen: yield {"message": text, "end": False} # Add an 'end' field with value of False print("\n","Total LLM Token Count: ",token_counter.total_llm_token_count,"\n") # When finished, yield a final message with 'end' set to True yield {"message": "Finished", "end": True}
token_counter = TokenCountingHandler(tokenizer=tiktoken.encoding_for_model("gpt-3.5-turbo").encode) callback_manager = CallbackManager([token_counter]) llm_predictorquery = LLMPredictor(llm=ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo-16k",streaming=True)) service_context_query = ServiceContext.from_defaults(llm_predictor=llm_predictorquery, callback_manager=callback_manager) set_global_service_context(service_context_query)