Find answers from the community

K
Kavin
Offline, last seen 3 months ago
Joined September 25, 2024
hey all - having an issue with pickling objects in what seems to be a relatively simple scenario. here's the code:
Plain Text
from llama_index import VectorStoreIndex
from llama_index.schema import Document
import os
from llama_index.node_parser import SimpleNodeParser
from llama_index.text_splitter import TokenTextSplitter

class NewlineTextSplitter(TokenTextSplitter):
    def split_text(self, text):
        # Split the text into chunks based on newlines
        chunks = text.split('\n\n')

        return chunks

class CharacterSheetIndexer:
    def __init__(self, character_sheets_dir):
        self.character_sheets_dir = character_sheets_dir
        self.indexes = {}

    def create_indexes(self):
        # Create a NodeParser that uses NewlineTextSplitter
        node_parser = SimpleNodeParser(text_splitter=NewlineTextSplitter())

        # Read all character sheets
        for filename in os.listdir(self.character_sheets_dir):
            with open(os.path.join(self.character_sheets_dir, filename), 'r') as f:
                character_sheet = f.read()

            # Create a document from the character sheet
            document = Document(text=character_sheet)

            # Chunk the document into nodes using the NodeParser
            nodes = node_parser.parse(document)

            # Create an index for the character sheet
            self.indexes[filename] = VectorStoreIndex.from_documents(nodes)

def main():
    # Create an instance of CharacterSheetIndexer
    indexer = CharacterSheetIndexer(character_sheets_dir='resources/characters')

    # Index all character sheets
    indexer.create_indexes()

if __name__ == "__main__":
    main()
21 comments
K
L
i'm getting a divide by zero from this line in prompt_helper.py:
Plain Text
        result = (
            self.max_input_size - num_prompt_tokens - self.num_output
        ) // num_chunks

i'm trying to load a GPTTreeIndex from persisted storage. not sure if related, but looks like it might be trying to load a nonexistent vectorindex. this is the line that leads me to think maybe that's what's going on:
Plain Text
INFO:llama_index.token_counter.token_counter:> [retrieve] Total embedding token usage: 0 tokens

anyone got any ideas?
11 comments
K
d