insert_nodes
function that you can use to dynamically add new documentsdef process_project_directory(project_code, network_path, vectordb_path, max_emails):
project_path = os.path.join(network_path, project_code)
client = chromadb.PersistentClient(path=vectordb_path)
collection_name = project_code
collection = client.get_or_create_collection(collection_name)
vector_store = ChromaVectorStore(chroma_collection=collection)
index = VectorStoreIndex.from_vector_store(vector_store=vector_store, embed_model="local:BAAI/bge-large-en-v1.5")
existing_ids = set(get_existing_ids(collection))
#loop through XMl files in project_path
xml_files = [f for f in os.listdir(project_path) if f.endswith('.xml')]
print(xml_files)
count = 0
for xml_file in xml_files:
if count >= max_emails:
break
xml_file_path = os.path.join(project_path, xml_file)
if xml_file not in existing_ids:
email_content = extract_xml_content(xml_file_path, xml_file)
document = Document(
text=f"Body: {email_content.body}, Date: {email_content.date_sent}, From: {email_content.from_email}, To: {email_content.to_email}, Subject: {email_content.subject}",
metadata={
"file_name": email_content.xml_file,
"id": email_content.id,
"subject": email_content.subject,
"date_sent": email_content.date_sent,
"from_email": email_content.from_email,
"to_email": email_content.to_email
}
)
index.insert(document)
count += 1