email_docs = process_emails_sync(filtered_unprocessed_emails, user) docstore = MongoDocumentStore.from_uri(uri=LLAMAINDEX_MONGODB_STORAGE_SRV) parser = SentenceSplitter() nodes = parser.get_nodes_from_documents(my_docs) docstore.add_documents(nodes) Settings.llm = OpenAI(model=ModelType.OPENAI_GPT_4_o_MINI.value) Settings.embed_model = OpenAIEmbedding(api_key=OPENAI_API_KEY) client = qdrant_client.QdrantClient(url=QDRANT_API_URL, api_key=QDRANT_API_TOKEN) vector_store = QdrantVectorStore(client=client, collection_name=LLAMAINDEX_QDRANT_COLLECTION_NAME) index_store = MongoIndexStore.from_uri(uri=LLAMAINDEX_MONGODB_STORAGE_SRV) storage_context = StorageContext.from_defaults(vector_store=vector_store, index_store=index_store, docstore=docstore) index = VectorStoreIndex(nodes, storage_context=storage_context, show_progress=True) index.storage_context.persist()
index_id
because a new index is created every time I run the code above. How to pass the index_id to the store so it updates existing index? Please note that I am already using doc_id
correctly to ensure upserting of documents. VectorStoreIndex(nodes, storage_context=storage_context, show_progress=True, index_id="<index_id>")
but that approach didn't work.doc_store
portion of index store. My index looks like {"_id":"602a8035-4b00-45d6-8b57-3c9646e4c07e","__data__":"{\"index_id\": \"602a8035-4b00-45d6-8b57-3c9646e4c07e\", \"summary\": null, \"nodes_dict\": {}, \"doc_id_dict\": {}, \"embeddings_dict\": {}}","__type__":"vector_store"}
store_nodes_override=True
π€ VectorStoreIndex.from_vector_store(vector_store)
We prefer not to write our own doc storage wrapperstorage_context = StorageContext.from_defaults( vector_store=vector_store, docstore=docstore, index_store=index_store ) # create index = VectorStoreIndex(nodes, storage_context=storage_context, ...) index.set_index_id("some index id") # persist # nothing to do actually, since it should persist automatically # load index = load_index_from_storage(storage_context, index_id="some index id")
VectorStoreIndex(nodes, storage_context=storage_context, show_progress=True)
Will index.set_index_id("some index id") change the key in MongoDB?index.set_index_id("some index id")
changed the key in MongoDB and saved the index with the new key?VectorStoreIndex(nodes, storage_context=storage_context, show_progress=True, store_nodes_override=True)
doesn't store docs in MongoDBdict
empty in the index store?{"_id":"ae60ab16-88b8-41ea-9fd9-e64968a68e5f","__data__":"{\"index_id\": \"ae60ab16-88b8-41ea-9fd9-e64968a68e5f\", \"summary\": null, \"nodes_dict\": {}, \"doc_id_dict\": {}, \"embeddings_dict\": {}}","__type__":"vector_store"}
>>> from llama_index.core import StorageContext, VectorStoreIndex, Document >>> from llama_index.vector_stores.qdrant import QdrantVectorStore >>> import qdrant_client >>> client = qdrant_client.QdrantClient(path="./qdrant_db_test") >>> vector_store = QdrantVectorStore(collection_name="test", client=client) >>> storage_context = StorageContext.from_defaults(vector_store=vector_store) >>> index = VectorStoreIndex.from_documents([Document.example()], storage_context=storage_context, store_nodes_override=True) >>> index.set_index_id("test_index") >>> index.storage_context.persist(persist_dir="./qdrant_test_storage")
{"index_store/data": {"test_index": {"__type__": "vector_store", "__data__": "{\"index_id\": \"test_index\", \"summary\": null, \"nodes_dict\": {\"2127aa66-2e6b-4337-b910-8af1e2cb5328\": \"2127aa66-2e6b-4337-b910-8af1e2cb5328\"}, \"doc_id_dict\": {}, \"embeddings_dict\": {}}"}}}