vector_store = <My new vectordb integration> nodes_dict = index.docstore.docs nodes_with_embeddings = [] for id_, vector in index.vector_store._data.embedding_dict.items(): node = nodes_dict[id_] node.embedding = vector nodes_with_embeddings.append(node) new_index = VectorStoreIndex( nodes, storage_context=StorageContext.from_defaults(vector_store=vector_store) )
Traceback (most recent call last):
.........
File "C:\Users\General\AppData\Roaming\Python\Python310\site-packages\chromadb\api\segment.py", line 361, in _add
validate_batch(
File "C:\Users\General\AppData\Roaming\Python\Python310\site-packages\chromadb\api\types.py", line 505, in validate_batch
raise ValueError(
ValueError: Batch size 41665 exceeds maximum batch size 5461
vector_store = <My new vectordb integration> nodes_dict = index.docstore.docs nodes_with_embeddings = [] for id_, vector in index.vector_store._data.embedding_dict.items(): node = nodes_dict[id_] node.embedding = vector nodes_with_embeddings.append(node) # batch add batch_size = 5000 for batch_idx in range(0, len(nodes_with_embeddings), batch_size): vector_store.add(nodes_with_embeddings[batch_idx:batch_idx+batch_size]) # then to use your vector store in an index index = VectorStoreIndex.from_vector_store(vector_store)
File "C:\Users\General\AppData\Roaming\Python\Python310\site-packages\llama_index\indices\vector_store\base.py", line 255, in build_index_from_nodes
return self._build_index_from_nodes(nodes, **insert_kwargs)
File "C:\Users\General\AppData\Roaming\Python\Python310\site-packages\llama_index\indices\vector_store\base.py", line 236, in _build_index_from_nodes
self._add_nodes_to_index(
File "C:\Users\General\AppData\Roaming\Python\Python310\site-packages\llama_index\indices\vector_store\base.py", line 190, in _add_nodes_to_index
new_ids = self._vector_store.add(nodes, **insert_kwargs)
File "C:\Users\General\AppData\Roaming\Python\Python310\site-packages\llama_index\vector_stores\chroma.py", line 243, in add
self._collection.add(
File "C:\Users\General\AppData\Roaming\Python\Python310\site-packages\chromadb\api\models\Collection.py", line 168, in add
self._client._add(ids, self.id, embeddings, metadatas, documents, uris)
File "C:\Users\General\AppData\Roaming\Python\Python310\site-packages\chromadb\telemetry\opentelemetry\__init__.py", line 127, in wrapper
return f(*args, **kwargs)
File "C:\Users\General\AppData\Roaming\Python\Python310\site-packages\chromadb\api\segment.py", line 361, in _add
validate_batch(
File "C:\Users\General\AppData\Roaming\Python\Python310\site-packages\chromadb\api\types.py", line 505, in validate_batch
raise ValueError(
ValueError: Batch size 41665 exceeds maximum batch size 5461
index = VectorStoreIndex.from_vector_store(vector_store)
index = VectorStoreIndex.from_vector_store(vector_store)
new_index = VectorStoreIndex(
nodes_with_embeddings,
storage_context=StorageContext.from_defaults(vector_store=chromadb_vs)
)
vector_store.add()
in batches