Find answers from the community

Updated last year

Generate code about MarvinMetadataExtractor and MetadataExtractor

@kapa.ai Generate code about MarvinMetadataExtractor and MetadataExtractor
k
3 comments
Please stand by @Matthews as I search the knowledge base. I normally take about 20 seconds or so. :wave:
----------------------
Sure, here is the code for setting up MarvinMetadataExtractor and MetadataExtractor based on the information extracts provided:

For MarvinMetadataExtractor:

Plain Text
# !pip install marvin

from llama_index import SimpleDirectoryReader
from llama_index.indices.service_context import ServiceContext
from llama_index.llms import OpenAI
from llama_index.node_parser import TokenTextSplitter
from llama_index.extractors.marvin_metadata_extractor import MarvinMetadataExtractor
import os
import openai
import marvin
from marvin import ai_model
from llama_index.bridge.pydantic import BaseModel, Field

os.environ["OPENAI_API_KEY"] = "sk-..."
openai.api_key = os.environ["OPENAI_API_KEY"]

documents = SimpleDirectoryReader("data").load_data()
documents[0].text = documents[0].text[:10000]

marvin.settings.openai.api_key = os.environ["OPENAI_API_KEY"]

@ai_model
class SportsSupplement(BaseModel):
    name: str = Field(..., description="The name of the sports supplement")
    description: str = Field(..., description="A description of the sports supplement")
    pros_cons: str = Field(..., description="The pros and cons of the sports supplement")

llm_model = "gpt-3.5-turbo"
llm = OpenAI(temperature=0.1, model_name=llm_model, max_tokens=512)
service_context = ServiceContext.from_defaults(llm=llm)

node_parser = TokenTextSplitter(separator=" ", chunk_size=512, chunk_overlap=128)

from llama_index import set_global_service_context
set_global_service_context(service_context)

metadata_extractor = MarvinMetadataExtractor(marvin_model=SportsSupplement, llm_model_string=llm_model)

Source: MarvinMetadataExtractor Setup ()

For MetadataExtractor:
Plain Text
from llama_index import ServiceContext
from llama_index.llms import OpenAI
from llama_index.schema import MetadataMode
from llama_index.node_parser import SimpleNodeParser
from llama_index.node_parser.extractors import (
    MetadataExtractor,
    SummaryExtractor,
    QuestionsAnsweredExtractor,
)
from llama_index.text_splitter import TokenTextSplitter

llm = OpenAI(temperature=0.1, model="gpt-3.5-turbo", max_tokens=512)

text_splitter = TokenTextSplitter(separator=" ", chunk_size=256, chunk_overlap=128)

metadata_extractor_1 = MetadataExtractor(
    extractors=[
        QuestionsAnsweredExtractor(questions=3, llm=llm),
    ],
    in_place=False,
)

metadata_extractor = MetadataExtractor(
    extractors=[
        SummaryExtractor(summaries=["prev", "self", "next"], llm=llm),
        QuestionsAnsweredExtractor(questions=3, llm=llm),
    ],
    in_place=False,
)

node_parser = SimpleNodeParser.from_defaults(
    text_splitter=text_splitter,
    # metadata_extractor=metadata_extractor,
)

Source: MetadataExtractor Setup ()
----------------------
Note: you can ask me a follow up question by @ mentioning me again :speech_balloon:
----------------------
Add a reply
Sign up and join the conversation on Discord