from llama_index import VectorStoreIndex from llama_index.schema import Document import os from llama_index.node_parser import SimpleNodeParser from llama_index.text_splitter import TokenTextSplitter class NewlineTextSplitter(TokenTextSplitter): def split_text(self, text): # Split the text into chunks based on newlines chunks = text.split('\n\n') return chunks class CharacterSheetIndexer: def __init__(self, character_sheets_dir): self.character_sheets_dir = character_sheets_dir self.indexes = {} def create_indexes(self): # Create a NodeParser that uses NewlineTextSplitter node_parser = SimpleNodeParser(text_splitter=NewlineTextSplitter()) # Read all character sheets for filename in os.listdir(self.character_sheets_dir): with open(os.path.join(self.character_sheets_dir, filename), 'r') as f: character_sheet = f.read() # Create a document from the character sheet document = Document(text=character_sheet) # Chunk the document into nodes using the NodeParser nodes = node_parser.parse(document) # Create an index for the character sheet self.indexes[filename] = VectorStoreIndex.from_documents(nodes) def main(): # Create an instance of CharacterSheetIndexer indexer = CharacterSheetIndexer(character_sheets_dir='resources/characters') # Index all character sheets indexer.create_indexes() if __name__ == "__main__": main()
result = ( self.max_input_size - num_prompt_tokens - self.num_output ) // num_chunks
INFO:llama_index.token_counter.token_counter:> [retrieve] Total embedding token usage: 0 tokens