does anyone know how to get memory to work with multimodal react agents? it doesn't seem to persist between tasks. Here is my test code:
def do_test(user_id: str) -> bool:
"""
runs the test and returns whether it was successful or not
"""
return user_id == "f6773f16854cca0f871d10e7cf6c7e84d12f3783bcfa13ccba8d2d0e48ee7cbd"
function_tool = FunctionTool.from_defaults(fn=do_test)
tools = [FunctionTool.from_defaults(fn=f) for f in (do_test,)]
llm = OpenAIMultiModal(model="gpt-4-vision-preview", api_key=OPENAI_API_KEY)
react_step_engine = MultimodalReActAgentWorker.from_tools(tools, llm=llm, verbose=True)
agent = AgentRunner(react_step_engine)
def execute_step(agent: AgentRunner, task: Task):
step_output = agent.run_step(task.task_id)
if step_output.is_last:
response = agent.finalize_response(task.task_id)
return response
else:
return None
def execute_steps(agent: AgentRunner, task: Task):
response = execute_step(agent, task)
while response is None:
response = execute_step(agent, task)
return response
task = agent.create_task(
"User ID: f6773f16854cca0f871d10e7cf6c7e84d12f3783bcfa13ccba8d2d0e48ee7cbd\nrun the first test and if it fails describe the image",
extra_state={"image_docs": [ImageDocument(image_path="/home/finn/Pictures/rhino.jpeg")]},
)
response = execute_steps(agent, task)
task2 = agent.create_task(
"what is the first word i said to you",
extra_state={"image_docs": []},
)
response = execute_steps(agent, task2)
the first task works correctly, but the agent says the first word was 'what' in response to the second task