Find answers from the community

Updated 2 months ago

What is causing the error in my multimodal query engine?

what am i doing wrong here?
ERROR: ValueError: "MultimodalQueryEngine" object has no field "multi_modal_llm"
f
L
10 comments
Plain Text
# Initialize the multimodal query engines
query_engine = MultimodalQueryEngine(
    retriever=index.as_retriever(similarity_top_k=3),
    multi_modal_llm=azure_openai_mm_llm,  # Use Azure GPT-4V for multimodal understanding
)

base_query_engine = MultimodalQueryEngine(
    retriever=baseline_index.as_retriever(similarity_top_k=3),
    multi_modal_llm=azure_openai_mm_llm,  # Use Azure GPT-4V for multimodal understanding
)

# Example usage: Try a query with text and image understanding
response = query_engine.custom_query("Describe the content and key insights from the provided slides.")
print(str(response))
Plain Text
class MultimodalQueryEngine(CustomQueryEngine):
    """Custom multimodal Query Engine."""

    def __init__(self, retriever, multi_modal_llm, qa_prompt=None):
        super().__init__(retriever=retriever, qa_prompt=qa_prompt or QA_PROMPT)
        # Explicitly define multi_modal_llm as a class attribute
        self.multi_modal_llm = multi_modal_llm

    def custom_query(self, query_str: str):
        # Retrieve text nodes
        nodes = self.retriever.retrieve(query_str)
        # Create ImageNode items from text nodes
        image_nodes = [
            NodeWithScore(node=ImageNode(image_path=n.metadata["image_path"]))
            for n in nodes
        ]

        # Create context string from text nodes
        context_str = "\n\n".join(
            [n.node.get_content(metadata_mode=MetadataMode.LLM) for n in nodes]
        )
        fmt_prompt = self.qa_prompt.format(context_str=context_str, query_str=query_str)

        # Synthesize an answer using AzureOpenAIMultiModal for image and text understanding
        llm_response = self.multi_modal_llm.complete(
            prompt=fmt_prompt,
            image_documents=[image_node.node for image_node in image_nodes],
        )
        return Response(
            response=str(llm_response),
            source_nodes=nodes,
            metadata={"text_nodes": nodes, "image_nodes": image_nodes},
        )
Not immediately clear what the issue is

If you are in a notebook, try fully rerunning it?

Maybe claude can spot the issue before I can lol
claude got it, I'm just dumb
It loads the image from the image path πŸ‘ sends the bytes to openai
brilliant, do you know if Arize Phoenix AI supports viewing the images if they are just local with llamatrace?
Attachment
image.png
Ohhh yea arize won't be supporting that i think
But that's a good point of feedback!
Add a reply
Sign up and join the conversation on Discord