ChatMemoryBuffer
which is essnetially a rolling windowfrom llama_index.memory import ChatMemoryBuffer memory = ChatMemoryBuffer.from_defaults(token_limit=3900) chat_engine = index.as_chat_engine( chat_mode="condense_plus_context", memory=memory, verbose=False, )
retriever = index.as_retriever(similarity_top_k=2) nodes = retriever.retrieve("query") from llama_index import get_response_synthesizer response_synthesizer = get_response_synthesizer() response = response_synthesizer.synthesize("query", nodes)
from llama_index.memory import ChatMemoryBuffer from llama_index.llms import ChatMessage memory = ChatMemoryBuffer.from_defaults(token_limit=1500) memory.put(ChatMessage(role="user", content="hello")) chat_history = memory.get()