embed_model = HuggingFaceEmbedding(model_name="intfloat/multilingual-e5-large") # llm model print('defining llm model') llm_predictor = LLMPredictor(llm=ChatOpenAI(temperature=0, model_name="gpt-4", streaming=True)) service_context = ServiceContext.from_defaults( llm_predictor=llm_predictor, embed_model=embed_model ) confluence_index = VectorStoreIndex.from_documents(docs, service_context=service_context, show_progress=True) confluence_index.set_index_id("vector_index") confluence_index.storage_context.persist(data_path) QA_TEMPLATE = Prompt(TEMPLATE_STR) print('defining chat engine') storage_context = StorageContext.from_defaults(persist_dir=data_path) index = load_index_from_storage(storage_context) chat_engine = index.as_chat_engine( chat_mode='condense_question', streaming=True, text_qa_template=QA_TEMPLATE ) while True: print('waiting for question...') message = input() if message == 'RESET': chat_engine.reset() else: response = chat_engine.chat(message) print(response)