The issue is pretty much exclusive to non-OpenAI models. This is the code I'm running the other models (Mixtral-8x7b/22b, Llama3-8b/70b-8192, Gemini Pro/1.5 Pro, all three Claude models, Command-R) through:
from llama_index.core.agent.react.formatter import (
ReActChatFormatter,
)
from llama_index.core.agent.react.prompts import (
REACT_CHAT_SYSTEM_HEADER,
)
fsh_format_base = "{prompt}\n{system_header}"
final_system_header = (
REACT_CHAT_SYSTEM_HEADER
if self.get_setting("system_prompt") is None
else fsh_format_base.format(
prompt=self.get_setting("system_prompt"),
system_header=REACT_CHAT_SYSTEM_HEADER,
)
)
react_chat_formatter = ReActChatFormatter(
system_header=final_system_header
)
chat_engine = self.index.as_chat_engine(
chat_mode=chat_engine_type_lower,
llm=self.llm,
query_engine=query_engine,
memory=memory,
system_prompt=self.get_setting("system_prompt"),
streaming=self.get_setting("streaming"),
similarity_top_k=self.get_setting("similarity_top_k"),
react_chat_formatter=react_chat_formatter,
context_template=context_template
)