db_documents = db.load_data(query=query) for document in db_documents: document.doc_id = VERSION_NUMBER + "_"+"string" vector_store = PGVectorStore.from_params( database="postgres", host=HOSTNAME, password=PASS, port=5432, user=USER, table_name=TABLE, embed_dim=1536, hybrid_search=True, ) index = VectorStoreIndex.from_vector_store(vector_store=vector_store) refreshed_docs = index.refresh( db_documents, ) index.storage_context.persist()
from llama_index import VectorStoreIndex, StorageContext, Document from llama_index.vector_stores import ChromaVectorStore import chromadb db = chromadb.PersistentClient(path="./chroma_db") chroma_collection = db.get_or_create_collection("quickstart") vector_store = ChromaVectorStore(chroma_collection=chroma_collection) storage_context = StorageContext.from_defaults(vector_store=vector_store) documents = [Document(text="document 1", doc_id="doc1")] # test and confirm single document is retrieved index = VectorStoreIndex.from_documents(documents, storage_context=storage_context, store_nodes_override=True) nodes = index.as_retriever(similarity_top_k=10).retrieve("document") print("Initial: ", [(node.text, node.node_id) for node in nodes]) # save the docstore/index store index.storage_context.persist(persist_dir="./storage") # load the index new_storage_context = StorageContext.from_defaults(vector_store=vector_store, persist_dir="./storage") from llama_index import load_index_from_storage # optional service context loaded_index = load_index_from_storage(new_storage_context) # , service_context=service_context) # test and confirm single document is retrieved nodes = loaded_index.as_retriever(similarity_top_k=10).retrieve("document") print("Loaded: ", [(node.text, node.node_id) for node in nodes]) # test that refresh works documents = [Document(text="new document 1", doc_id="doc1"), Document(text="document 2", doc_id="doc2")] loaded_index.refresh_ref_docs(documents) # test and confirm refreshed documents are retrieved nodes = loaded_index.as_retriever(similarity_top_k=10).retrieve("document") print("Refreshed: ", [(node.text, node.node_id) for node in nodes])
Number of requested results 10 is greater than number of elements in index 1, updating n_results = 1 Initial: [('document 1', 'f6d7740e-f483-4c2a-a017-eaddd3916382')] Number of requested results 10 is greater than number of elements in index 1, updating n_results = 1 Loaded: [('document 1', 'f6d7740e-f483-4c2a-a017-eaddd3916382')] Number of requested results 10 is greater than number of elements in index 2, updating n_results = 2 Refreshed: [('document 2', 'a9e63361-7c7f-4a4a-925f-7956820ef8c1'), ('new document 1', '349c1488-0433-4dbc-a081-ecc5d1829496')]