Find answers from the community

Home
Members
cheeetoo
c
cheeetoo
Offline, last seen 3 months ago
Joined September 25, 2024
does anyone know how to get memory to work with multimodal react agents? it doesn't seem to persist between tasks. Here is my test code:
Plain Text
def do_test(user_id: str) -> bool:
    """
    runs the test and returns whether it was successful or not
    """
    return user_id == "f6773f16854cca0f871d10e7cf6c7e84d12f3783bcfa13ccba8d2d0e48ee7cbd"

function_tool = FunctionTool.from_defaults(fn=do_test)
tools = [FunctionTool.from_defaults(fn=f) for f in (do_test,)]
llm = OpenAIMultiModal(model="gpt-4-vision-preview", api_key=OPENAI_API_KEY)
react_step_engine = MultimodalReActAgentWorker.from_tools(tools, llm=llm, verbose=True)
agent = AgentRunner(react_step_engine)

def execute_step(agent: AgentRunner, task: Task):
    step_output = agent.run_step(task.task_id)
    if step_output.is_last:
        response = agent.finalize_response(task.task_id)
        return response
    else:
        return None


def execute_steps(agent: AgentRunner, task: Task):
    response = execute_step(agent, task)
    while response is None:
        response = execute_step(agent, task)
    return response

task = agent.create_task(
    "User ID: f6773f16854cca0f871d10e7cf6c7e84d12f3783bcfa13ccba8d2d0e48ee7cbd\nrun the first test and if it fails describe the image",
    extra_state={"image_docs": [ImageDocument(image_path="/home/finn/Pictures/rhino.jpeg")]},
)

response = execute_steps(agent, task)

task2 = agent.create_task(
    "what is the first word i said to you",
    extra_state={"image_docs": []},
)

response = execute_steps(agent, task2)

the first task works correctly, but the agent says the first word was 'what' in response to the second task
6 comments
L
c