You are sure that self.embedder.get_query_embedding(query)
returns the expected dimension? π€
async def aget_text_embedding(self, text: str) -> Embedding:
"""
Generates an embedding for the given text
"""
txt_embedding = await self._aget_text_embedding(text)
return txt_embedding
text_model: str = "sentence-transformers/all-mpnet-base-v2",
https://huggingface.co/sentence-transformers/all-mpnet-base-v2768 dimensionality
def _get_query_embedding(self, query: str) -> Embedding:
"""
Generates an embedding for the given query
"""
qry_embedding = self._text_model.get_query_embedding(query)
return qry_embedding
interesting π€ Somehow, it says your query embedding has 512 dimenions (while the data stored in the vector store has 768)
similarity = similarity_fn(query_embedding_np, emb)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/zachhandley/Documents/GitHub/my_project/api/.venv/lib/python3.11/site-packages/llama_index/core/embeddings/base.py", line 48, in similarity
product = np.dot(embedding1, embedding2)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ValueError: shapes (512,) and (768,) not aligned: 512 (dim 0) != 768 (dim 0)
So query embed == 512, and vector_store == 768, If im reading that correctly π€
right which doesn't make any sense
ZEmbedding is part of my ZPipeline which is the embedder
and I'm using the same thing
Hmm.. I think the issue is, in order to retrieve images, it needs to use the clip embeddings (not mpnet)
I can't find the dimensionality of OpenCLIP embeddings anywhere tbh
but if that was the case then the OpenCLIP embeddings that are embedding my images should be erroring on insertion no?
cause I legit specifically create the collection cause
Llama Index lazy init's them and I want payload indexes
I think its just a small bug when you are specifically using the query bundle here
I would use the retriever directly and let the retriever figure it out
the way you guys have the retriever doesn't work for. me
I can't have it thinking about which images to get period
I need it to filter them by user ID, sort by Cluster ID, then retrieve the top_k from each cluster
I basically just made it a "retriever"
but really it's my own code, it pulls the nodes, converts them to ImageNodes
which, with hybrid search maybe this is my issue?
def record_to_image_node(self, record: Record) -> ImageNode:
if record.payload is None:
raise ValueError(
"Record payload is None, cannot convert to ImageNode"
)
if not isinstance(record.payload, dict):
raise TypeError("Record payload is not a dictionary")
if record.vector.get("text-dense", None) is not None: # type: ignore
node_content = json.loads(record.payload.get("_node_content", "{}"))
node = ImageNode(
id_=record.id,
embedding=record.vector.get("text-dense"), # type: ignore
metadata=node_content.get("metadata", {}),
text=record.payload.get("text", ""),
image=record.payload.get("image", None),
image_path=record.payload.get("image_path", None),
image_url=record.payload.get("image_url", None),
image_mimetype=record.payload.get("image_mimetype", None),
)
else:
node_content = json.loads(record.payload.get("_node_content", "{}"))
node = ImageNode(
id_=record.id,
embedding=record.vector,
metadata=node_content.get("metadata", {}),
text=record.payload.get("text", ""),
image=record.payload.get("image", None),
image_path=record.payload.get("image_path", None),
image_url=record.payload.get("image_url", None),
image_mimetype=record.payload.get("image_mimetype", None),
)
return node
retriever = index.as_retriever(...)
image_source_nodes = retriever.text_to_image_retrieve("query")
no because the retriever is "dumb"
it tries to ask itself which of the images are relevant to my query
I want it to ask itself which images of each cluster are most relevant to the query
So the issue here is, if you want to retrieve imaegs , you need to use clip embeddings, not mpnet
I'm not asking it to retrieve, I know which images I need
so I should be using the image embedding model to embed the query string instead?
do you know what that pooling thing is?
@classmethod
async def async_init(cls, user_id: str):
"""
Initializes the async qdrant client and what not
"""
self = cls()
if not cls._qdrant_client_normal:
cls._qdrant_client_normal = QdrantClient(
url="my_url",
api_key=os.getenv("QDRANT_API_KEY"),
port=443,
https=True,
)
if not cls._qdrant_client:
cls._qdrant_client = AsyncQdrantClient(
url="my_url",
api_key=os.getenv("QDRANT_API_KEY"),
port=443,
https=True,
)
print("Qdrant Client Created for testing")
# Initialize the text and image stores
text_store_name = "test_text_store"
cls._text_store = QdrantVectorStore(
text_store_name,
client=cls._qdrant_client_normal,
aclient=cls._qdrant_client,
enable_hybrid=True,
sparse_query_fn=sparse_query_vectors,
sparse_doc_fn=sparse_doc_vectors,
hybrid_fusion_fn=relative_score_fusion,
)
if not cls._text_store._collection_initialized:
cls._text_store._create_collection(
collection_name=cls._text_store.collection_name,
vector_size=768,
)
image_store_name = "test_image_store"
cls._image_store = QdrantVectorStore(
image_store_name,
client=cls._qdrant_client_normal,
aclient=cls._qdrant_client,
enable_hybrid=True,
sparse_query_fn=sparse_query_vectors,
sparse_doc_fn=sparse_doc_vectors,
hybrid_fusion_fn=relative_score_fusion,
)
if not cls._image_store._collection_initialized:
cls._image_store._create_collection(
collection_name=cls._image_store.collection_name,
vector_size=768,
)
await self.setup_payload_indices()
self._image_clusterer = ImageClusterer(
user_id=user_id,
txt_vector_store=cls._text_store,
img_vector_store=cls._image_store,
)
# Initialize the ZPipeline with the test stores and embeddings
cls._pipeline = ZPipeline(
user_id=TEST_USER_ID,
txt_vector_store=cls._text_store,
img_vector_store=cls._image_store,
)
cls._image_retriever = ZImageRetriever(
img_vector_store=cls._image_store,
user_id=user_id,
)
print("Test VectorStore and ZPipeline initialized")
return self
Something in there is causing my load times to be like, huge, and I don't know why
could it be because when I parse them into the image nodes I'm not storing both dense and sparse vector?
mmm maybe? It should be generating the sparse vectors automatically though π€
async def aget_nodes_paginated(
self,
vector_client: AsyncQdrantClient,
collection_name: str,
query_filter: Filter,
limit: int = 100,
get_all: bool = False,
) -> list[Record]:
"""
Asynchronously get the embeddings from the user paginated to manage request size.
"""
nodes: list[Record] = []
nodes_pulled: tuple[
list[Record], PointId | None
] = await vector_client.scroll(
collection_name=collection_name,
scroll_filter=query_filter,
with_vectors=True,
with_payload=True,
limit=limit,
)
offset_id = nodes_pulled[1]
nodes.extend(nodes_pulled[0])
if get_all:
while offset_id:
nodes_pulled: tuple[
list[Record], PointId | None
] = await vector_client.scroll(
collection_name=collection_name,
scroll_filter=query_filter,
with_vectors=True,
with_payload=True,
limit=limit,
offset=offset_id,
)
offset_id = nodes_pulled[1]
nodes.extend(nodes_pulled[0])
return nodes
to ImageNode's but I don't store the sparse vector too, should I?
cause I just do
def record_to_image_node(self, record: Record) -> ImageNode:
if record.payload is None:
raise ValueError(
"Record payload is None, cannot convert to ImageNode"
)
if not isinstance(record.payload, dict):
raise TypeError("Record payload is not a dictionary")
if record.vector.get("text-dense", None) is not None: # type: ignore
node_content = json.loads(record.payload.get("_node_content", "{}"))
node = ImageNode(
id_=record.id,
embedding=record.vector.get("text-dense"), # type: ignore
metadata=node_content.get("metadata", {}),
text=record.payload.get("text", ""),
image=record.payload.get("image", None),
image_path=record.payload.get("image_path", None),
image_url=record.payload.get("image_url", None),
image_mimetype=record.payload.get("image_mimetype", None),
)
else:
node_content = json.loads(record.payload.get("_node_content", "{}"))
node = ImageNode(
id_=record.id,
embedding=record.vector,
metadata=node_content.get("metadata", {}),
text=record.payload.get("text", ""),
image=record.payload.get("image", None),
image_path=record.payload.get("image_path", None),
image_url=record.payload.get("image_url", None),
image_mimetype=record.payload.get("image_mimetype", None),
)
return node
Yea not toally sure here -- we getting super custom now haha
well yeah but how can I debug it? I'm using a MultiModalVectorStoreIndex over a grouping of ImageNodes, that's about it
the only diff is I don't know how ImageNodes look when they have hybrid vectors
or if that's even relevant
Do you need hybrid vectors? It really only applies to text
oh, really? I was thinking the hybrid vectors might help with the summary and keywords/entities
but no, I def don't need it, I just thought it would return more optimal results
It might help yea π€ But the logic feels pretty tricky with multimodal, because now you have text embeddings, sparse text embeddings, and image embeddings
it could work, but will take some confusing setup I think
Basically youd need one collection for image embeddings, and another collection for text+sparse embeddings?
Yeah I essentially enabled hybrid on both because I assumed it didn't matter, but lemme try inserting some and stuff without hybrid to see
so I disabled hybrid and made a new collection (test_image_nohybrid) and inserted there, ran the same thing, and still error
I'm trying it one more time now
I set the
image_embed_model
in the index @Logan M and that worked, but now I get
Getting image records
Converted 15 records to ImageNodes
Found 1 clusters
Traceback (most recent call last):
File "/Users/zachhandley/Documents/GitHub/my_project/api/app/db/vector_stores_temp.py", line 295, in <module>
asyncio.run(main())
File "/Users/zachhandley/Documents/GitHub/my_project/api/.venv/lib/python3.11/site-packages/nest_asyncio.py", line 30, in run
return loop.run_until_complete(task)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/zachhandley/Documents/GitHub/my_project/api/.venv/lib/python3.11/site-packages/nest_asyncio.py", line 98, in run_until_complete
return f.result()
^^^^^^^^^^
File "/usr/local/Cellar/python@3.11/3.11.7_1/Frameworks/Python.framework/Versions/3.11/lib/python3.11/asyncio/futures.py", line 203, in result
raise self._exception.with_traceback(self._exception_tb)
File "/usr/local/Cellar/python@3.11/3.11.7_1/Frameworks/Python.framework/Versions/3.11/lib/python3.11/asyncio/tasks.py", line 277, in __step
result = coro.send(None)
^^^^^^^^^^^^^^^
File "/Users/zachhandley/Documents/GitHub/my_project/api/app/db/vector_stores_temp.py", line 238, in main
user_images = await vector_store_temp.get_user_images()
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/zachhandley/Documents/GitHub/my_project/api/app/db/vector_stores_temp.py", line 191, in get_user_images
return await self._image_retriever.aretrieve(query_str)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/zachhandley/Documents/GitHub/my_project/api/app/ai/zimage_retriever.py", line 212, in aretrieve
return await self._atext_to_image_retrieve(query)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/zachhandley/Documents/GitHub/my_project/api/app/ai/zimage_retriever.py", line 203, in _atext_to_image_retrieve
response = await engine.aquery(query_bundle)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/zachhandley/Documents/GitHub/my_project/api/.venv/lib/python3.11/site-packages/llama_index/core/base_query_engine.py", line 46, in aquery
return await self._aquery(str_or_query_bundle)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/zachhandley/Documents/GitHub/my_project/api/.venv/lib/python3.11/site-packages/llama_index/query_engine/multi_modal.py", line 220, in _aquery
response = await self.asynthesize(
^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/zachhandley/Documents/GitHub/my_project/api/.venv/lib/python3.11/site-packages/llama_index/query_engine/multi_modal.py", line 146, in asynthesize
llm_response = await self._multi_modal_llm.acomplete(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/zachhandley/Documents/GitHub/my_project/api/.venv/lib/python3.11/site-packages/llama_index/multi_modal_llms/openai.py", line 380, in acomplete
return await self._acomplete(prompt, image_documents, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/zachhandley/Documents/GitHub/my_project/api/.venv/lib/python3.11/site-packages/llama_index/multi_modal_llms/openai.py", line 362, in _acomplete
message_dict = self._get_multi_modal_chat_messages(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/zachhandley/Documents/GitHub/my_project/api/.venv/lib/python3.11/site-packages/llama_index/multi_modal_llms/openai.py", line 173, in _get_multi_modal_chat_messages
generate_openai_multi_modal_chat_message(
File "/Users/zachhandley/Documents/GitHub/my_project/api/.venv/lib/python3.11/site-packages/llama_index/multi_modal_llms/openai_utils.py", line 41, in generate_openai_multi_modal_chat_message
mimetype = image_document.image_mimetype or "image/jpeg"
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
AttributeError: 'NodeWithScore' object has no attribute 'image_mimetype'
I think that's low key a bug
It's passing in node with score. But image mimetype is set on ImageNode/ImageDocumrnt
yeah I'm passing it ImageDocuments
a list of nodes that I retrieve, I confirmed they are correct
How do you guys parse data from Qdrant? Is there a helper function I can use?
is it possible that that is being used to parse the image nodes and for some reason dropping the mimetype?
I don't think so. I think it's an object type error (NodeWithScore doesn't have an image_mimetype value, NodeWithScore.node does)
either way I think it should be
mimetype = image_document.node.get("image_mimetype") or "image/jpeg"
That would be the correct code. But actually, NodeWithScore shouldn't even be passed into that function (it should be the node)
I changed it and then AttributeError: 'NodeWithScore' object has no attribute 'image'
Inside multi_modal.py
inside the query_engine
online 220
it's calling response = await self.asynthesize
which has a list[NodeWithScore]
parameter inside that same file
that seems to be the issue
def _get_image_and_text_nodes(
nodes: List[NodeWithScore],
) -> Tuple[List[NodeWithScore], List[NodeWithScore]]:
image_nodes = []
text_nodes = []
for res_node in nodes:
if isinstance(res_node.node, ImageNode):
image_nodes.append(res_node)
else:
text_nodes.append(res_node)
return image_nodes, text_nodes
and the synthesize in multi_modal
so it looks like it's calling that wait
could it be because I'm using ImageDocuments and not ImageNodes?
just curious if it could be that simple
I'm giving it image nodes
should I just hold off for now and wait for a fix or dig into it more?