G = nx.Graph() for source_node in response.source_nodes: node_content = source_node.node.get_text() citation_page = source_node.node.metadata['page_number'] file_name = source_node.node.metadata['filename'] G.add_node((file_name, citation_page), content=node_content)
class Document:
def __init__(self, text, metadata):
self.text = text
self.metadata = metadata
def get_content(self):
return self.text
def get_metadata(self):
return self.metadata
def get_metadata_str(self):
return json.dumps(self.metadata)
def get_doc_id(self):
return self.metadata.get('doc_id', None)
def hash(self):
return hashlib.sha256(self.text.encode()).hexdigest()
# Create Document objects with extended metadata
documents = []
for doc_idx, (chunk, page_number) in enumerate(chunks):
metadata = {
"source_doc_idx": doc_idx,
"filename": "1.2.2.2 Customer Contract - Stockwood Dr - Woodstock - GA.pdf",
"page_number": page_number,
"document_title": "Customer Contract - Stockwood Dr - Woodstock - GA",
"line_count": chunk.count('\n') + 1,
"starting_line_number": doc_idx * 10 + 1
}
documents.append(Document(text=chunk, metadata=metadata))
# Initialize VectorStoreIndex
try:
index = VectorStoreIndex.from_documents(documents, service_context=service_context)
except Exception as e:
print(f"Error: {e}")
# Initialize the CitationQueryEngine
query_engine = CitationQueryEngine.from_args(
index,
similarity_top_k=3,
citation_chunk_size=512,
)
# Query and Retrieve Information
response = query_engine.query("who is party to the agreement")
print("Query Response:", response)
# Create Knowledge Graph Nodes
G = nx.Graph()
# Add nodes to the graph
for i, source_node in enumerate(response.source_nodes):
node_content = source_node.node.get_content() # Remove the metadata_mode keyword argument
metadata = source_node.node.metadata
citation = metadata.get('page_number', 'Unknown')
file_name = metadata.get('filename', 'Unknown')
title = metadata.get('document_title', 'Unknown')
G.add_node(citation, content=node_content, title=title)
# Nicely formatted metadata output
print(f"--- Citation for Source Node {i + 1} ---")
print(f"Filename: {metadata.get('filename', 'Unknown')}")
print(f"Document Title: {metadata.get('document_title', 'Unknown')}")
print(f"Page Number: {metadata.get('page_number', 'Unknown')}")
print(f"Line Number: {metadata.get('starting_line_number', 'Unknown') + 2}") # Assuming line offset is 2
document.get_content()
?