vector_store = storage_service.get_vector_store(collection_name, db_name) storage_context = StorageContext.from_defaults(vector_store=vector_store) embed_model = OpenAIEmbedding(mode='similarity', embed_batch_size=2000, api_key=user_settings_data.item.get('openai_key')) service_context = ServiceContext.from_defaults(chunk_size=chunk_size, embed_model=embed_model, llm=None, callback_manager=token_counter_callback_manager) node_parser = SimpleNodeParser.from_defaults(chunk_size=chunk_size, chunk_overlap=20) VectorStoreIndex(nodes, storage_context=storage_context, service_context=service_context) # <== THIS
index = VectorStoreIndex.from_vector_store(vector_store=vector_store, service_context=service_context) index._add_nodes_to_index(nodes=content_nodes) # <== EXCEPTION: index structure is not provided
update_vector_index: memory before: 539,373,568, after: 539,381,760, consumed: 8,192; exec time: 00:00:05 update_vector_index: memory before: 539,381,760, after: 539,402,240, consumed: 20,480; exec time: 00:00:04 update_vector_index: memory before: 539,402,240, after: 539,480,064, consumed: 77,824; exec time: 00:00:08 update_vector_index: memory before: 539,484,160, after: 539,648,000, consumed: 163,840; exec time: 00:00:06 update_vector_index: memory before: 539,648,000, after: 539,648,000, consumed: 0; exec time: 00:00:04
index.insert(document)
or index.insert_nodes(nodes)
would be the correct methods to use though btwupdate_vector_index: memory before: 488,484,864, after: 491,327,488, consumed: 2,842,624; exec time: 00:00:02 update_vector_index: memory before: 491,470,848, after: 492,474,368, consumed: 1,003,520; exec time: 00:00:00
update_vector_index: memory before: 528,179,200, after: 528,179,200, consumed: 0; exec time: 00:00:00 <== The last update of loop 1 prepare_index_objects: memory before: 486,137,856, after: 486,297,600, consumed: 159,744; exec time: 00:00:00 <== Before calling insert_nodes
insert_nodes()
automatically updates the docstore. Even if you aren't using it. Which is in memoryVectorStoreIndex
classtoken_counter.reset_counts()
will clear the data it is holding