Hi all, trying to delete nodes in LlamaIndex. I'm in a situation where my documents and nodes are one-to-one (my documents will never have multiple nodes, since my
chunk_size
will not exceed the size I specified). I specify their
ref_doc_id
on ingestion and try to delete with the following methods:
def ingest_documents(self, db_name: str, documents: List[Document]) -> None:
index = self.get_database(db_name)
nodes = []
for doc in documents:
node = Node(
text=doc.text,
metadata=doc.metadata,
id_=doc.id, # Set node_id to UUID
ref_doc_id=doc.id, # Set ref_doc_id to the document's ID
node_info={'document_id': doc.id} # Optionally include node_info
)
nodes.append(node)
index.insert_nodes(nodes)
index.storage_context.persist(persist_dir=f"{self.persist_directory}/{db_name}")
def delete_document(self, db_name: str, document_id: str) -> bool:
try:
index = self.get_database(db_name)
logger.info(f"Attempting to delete document {document_id} from database {db_name}")
# Use the delete_ref_doc method
index.delete_ref_doc(ref_doc_id=document_id)
logger.info(f"Deleted document with ID {document_id}")
# Persist the changes
index.storage_context.persist(persist_dir=f"{self.persist_directory}/{db_name}")
logger.info(f"Persisted changes for database {db_name}")
# Update the in-memory index
self.databases[db_name] = index
return True
except Exception as e:
logger.error(f"Error deleting document {document_id} from database {db_name}: {str(e)}", exc_info=True)
return False
The deletion "fails" despite no errors being thrown and the id is still present. Node
ref_doc_id
is None, which I don't understand. Here are some logs from my program:
INFO:__main__:Document count before deletion: 1
INFO:__main__:Node ID: 520f18d2-4121-4f05-96ac-73da5af787ba
INFO:__main__:Node ref_doc_id: None
INFO:__main__:Node metadata: {'source_file': 'test_source.json', 'updated': True}
INFO:utils.vector_store_handler:Attempting to delete document 520f18d2-4121-4f05-96ac-73da5af787ba from database test_db
INFO:utils.vector_store:Attempting to delete document 520f18d2-4121-4f05-96ac-73da5af787ba from database test_db
INFO:utils.vector_store:Deleted document with ID 520f18d2-4121-4f05-96ac-73da5af787ba
INFO:utils.vector_store:Persisted changes for database test_db
INFO:utils.vector_store_handler:Deleted document 520f18d2-4121-4f05-96ac-73da5af787ba from database test_db
INFO:__main__:Document '520f18d2-4121-4f05-96ac-73da5af787ba' successfully deleted from the database
INFO:__main__:Document count after deletion: 1
ERROR:__main__:Document '520f18d2-4121-4f05-96ac-73da5af787ba' still exists in the database after deletion
Any help towards getting this node deletion functioning would be much appreciated!