from llama_index.extractors import (
TitleExtractor,
QuestionsAnsweredExtractor,
)
from llama_index.text_splitter import TokenTextSplitter
# Import the necessary modules for metadata extraction
from llama_index.node_parser.extractors import (
MetadataExtractor,
QuestionsAnsweredExtractor,
TitleExtractor,
)
from llama_index.llms import OpenAI
# Initialize the LLM and metadata extractor
llm = OpenAI(model="gpt-3.5-turbo")
metadata_extractor = MetadataExtractor(
extractors=[
TitleExtractor(nodes=5, llm=llm),
QuestionsAnsweredExtractor(questions=3, llm=llm),
],
in_place=False,
)
# Process nodes to add additional metadata
nodes = metadata_extractor.process_nodes(nodes)
print(f"Debug: Processed {len(nodes)} nodes.") # Debugging
for node in nodes:
print(f"Processed Node Metadata: {node.metadata}")
TitleExtractor
is maybe misleading. It just gives the LLM some text and asks it to write an example title