stream = await self.chat_engine.astream_chat(message_content,self.chat_history)
# blocking LLM get message handler def getmsg(message) -> AGENT_CHAT_RESPONSE_TYPE: return chat_sys.chat(message) # async (non-blocking) LLM message handler async def getmsga(message) -> AGENT_CHAT_RESPONSE_TYPE: return await client.loop.run_in_executor(None,getmsg, message)
astream_chat
is not implemented in the LLM classfrom llama_index.llms import OpenAILike llm = OpenAILike(model="model", api_key="fake", api_base="http://127.0.0.1:8000/v1", ...)
def completion_to_prompt(completion: str) -> str: system_prompt = "..." return f"<|system|>\n{system_prompt}</s>\n<|user|>\n{completion}</s>\n<|assistant|>\n" def messages_to_prompt(messages): prompt_str = "" for msg in messages: if msg.role == "system": prompt += f"<|system|>\n{msg.content}</s>\n" if msg.role == "user": prompt += f"<|user|>\n{msg.content}</s>\n" if msg.role == "assistant": prompt += f"<|assistant|>\n{msg.content}</s>\n" prompt_str += "<|assistant|>\n" return prompt_str llm = OpenAILike(....., completion_to_prompt=completion_to_prompt, messages_to_prompt=messages_to_prompt)