cheeetoo

does anyone know how to get memory to work with multimodal react agents? it doesn't seem to persist between tasks. Here is my test code:

Plain Text

def do_test(user_id: str) -> bool:
    """
    runs the test and returns whether it was successful or not
    """
    return user_id == "f6773f16854cca0f871d10e7cf6c7e84d12f3783bcfa13ccba8d2d0e48ee7cbd"

function_tool = FunctionTool.from_defaults(fn=do_test)
tools = [FunctionTool.from_defaults(fn=f) for f in (do_test,)]
llm = OpenAIMultiModal(model="gpt-4-vision-preview", api_key=OPENAI_API_KEY)
react_step_engine = MultimodalReActAgentWorker.from_tools(tools, llm=llm, verbose=True)
agent = AgentRunner(react_step_engine)

def execute_step(agent: AgentRunner, task: Task):
    step_output = agent.run_step(task.task_id)
    if step_output.is_last:
        response = agent.finalize_response(task.task_id)
        return response
    else:
        return None


def execute_steps(agent: AgentRunner, task: Task):
    response = execute_step(agent, task)
    while response is None:
        response = execute_step(agent, task)
    return response

task = agent.create_task(
    "User ID: f6773f16854cca0f871d10e7cf6c7e84d12f3783bcfa13ccba8d2d0e48ee7cbd\nrun the first test and if it fails describe the image",
    extra_state={"image_docs": [ImageDocument(image_path="/home/finn/Pictures/rhino.jpeg")]},
)

response = execute_steps(agent, task)

task2 = agent.create_task(
    "what is the first word i said to you",
    extra_state={"image_docs": []},
)

response = execute_steps(agent, task2)

the first task works correctly, but the agent says the first word was 'what' in response to the second task

Find answers from the community

does anyone know how to get memory to