query_engine = index.as_chat_engine(chat_mode='context', similarity_top_k=similarity_top_k, llm=llm_engine, system_prompt=prepared_system_prompt)) response = query_engine.chat(query_text, chat_history=chat_history)
nodes = index.as_retriever().retrieve("test query str")
nodes = index.as_retriever(similarity_top_k=similarity_top_k).retrieve(query_text) context_str = "\n\n".join([n.node.get_content() for n in nodes]) full_prompt = system_prompt + 'Below is the provided context: \n\n' + context_str chat_history.append(ChatMessage(role="system", content=full_prompt)) chat_history.append(ChatMessage(role="user", content=query_text)) response = llm_engine.chat(chat_history)
query_engine = index.as_chat_engine(chat_mode='context', similarity_top_k=similarity_top_k, llm=llm_engine, system_prompt=prepared_system_prompt)) response = query_engine.chat(query_text, chat_history=chat_history)
memory = ChatMemoryBuffer.from_defaults(token_limit=1500) ... system_message = ChatMessage(role="system", content=full_prompt) user_message = ChatMessage(role="user", content=query_text) prev_messages = memory.get() response = llm.chat([system_message, *prev_messages, user_message]) memory.put(user_message) memory.put(response.message)
memory = ChatMemoryBuffer.from_defaults(token_limit=1500)
memory.get()
-- it will fetch as many of the latest messages that fit into that limit