MarvinMetadataExtractor
and MetadataExtractor
based on the information extracts provided:MarvinMetadataExtractor
:# !pip install marvin from llama_index import SimpleDirectoryReader from llama_index.indices.service_context import ServiceContext from llama_index.llms import OpenAI from llama_index.node_parser import TokenTextSplitter from llama_index.extractors.marvin_metadata_extractor import MarvinMetadataExtractor import os import openai import marvin from marvin import ai_model from llama_index.bridge.pydantic import BaseModel, Field os.environ["OPENAI_API_KEY"] = "sk-..." openai.api_key = os.environ["OPENAI_API_KEY"] documents = SimpleDirectoryReader("data").load_data() documents[0].text = documents[0].text[:10000] marvin.settings.openai.api_key = os.environ["OPENAI_API_KEY"] @ai_model class SportsSupplement(BaseModel): name: str = Field(..., description="The name of the sports supplement") description: str = Field(..., description="A description of the sports supplement") pros_cons: str = Field(..., description="The pros and cons of the sports supplement") llm_model = "gpt-3.5-turbo" llm = OpenAI(temperature=0.1, model_name=llm_model, max_tokens=512) service_context = ServiceContext.from_defaults(llm=llm) node_parser = TokenTextSplitter(separator=" ", chunk_size=512, chunk_overlap=128) from llama_index import set_global_service_context set_global_service_context(service_context) metadata_extractor = MarvinMetadataExtractor(marvin_model=SportsSupplement, llm_model_string=llm_model)
MetadataExtractor
:from llama_index import ServiceContext from llama_index.llms import OpenAI from llama_index.schema import MetadataMode from llama_index.node_parser import SimpleNodeParser from llama_index.node_parser.extractors import ( MetadataExtractor, SummaryExtractor, QuestionsAnsweredExtractor, ) from llama_index.text_splitter import TokenTextSplitter llm = OpenAI(temperature=0.1, model="gpt-3.5-turbo", max_tokens=512) text_splitter = TokenTextSplitter(separator=" ", chunk_size=256, chunk_overlap=128) metadata_extractor_1 = MetadataExtractor( extractors=[ QuestionsAnsweredExtractor(questions=3, llm=llm), ], in_place=False, ) metadata_extractor = MetadataExtractor( extractors=[ SummaryExtractor(summaries=["prev", "self", "next"], llm=llm), QuestionsAnsweredExtractor(questions=3, llm=llm), ], in_place=False, ) node_parser = SimpleNodeParser.from_defaults( text_splitter=text_splitter, # metadata_extractor=metadata_extractor, )