node_postprocessor
over an HTTP call . The reason for this is the main LlamaIndex ragapp is a lambda and the startup costs of loading a bge re-ranker is high so I was going to have it pre-loaded behind a mini flask app. Are there any guides on how to make a huggingface reranker model like BGE or JINA work behind an API the way like, a cohere or others do? Would i have to write my own postprocessor implementation that wraps this ?embedding = OpenAIEmbedding(api_key="XXX", model="text-embedding-3-large") node_parser = SentenceWindowNodeParser.from_defaults(window_size=3) dir_reader = SimpleDirectoryReader(input_files=[tmpfile]) docs = dir_reader.load_data(show_progress=True) for doc in docs: doc.metadata["external_id"] = external_id nodes = node_parser.get_nodes_from_documents(docs, show_progress=True) print("Getting batched embeddings for nodes from embedding " + embedding.model_name + "..") text_chunks = [node.get_content(metadata_mode=MetadataMode.EMBED) for node in nodes] embeddings = embedding.get_text_embedding_batch(text_chunks, show_progress=True)
openai.BadRequestError: Error code: 400 - {'error': {'message': "This model's maximum context length is 8192 tokens, however you requested 71420 tokens (71420 in your prompt; 0 for the completion). Please reduce your prompt; or completion length.", 'type': 'invalid_request_error', 'param': None, 'code': None}}
def add_nodes(self, nodes): return self.vector_store.add(nodes) def add_nodes_from_file( self, tmpfile, external_id: str, node_parser: NodeParser, embedding: HuggingFaceEmbedding ): dir_reader = SimpleDirectoryReader(input_files=[tmpfile]) docs = dir_reader.load_data() for doc in docs: doc.metadata["external_id"] = external_id nodes = node_parser.get_nodes_from_documents(docs) for node in nodes: node_embeddings = embedding.get_text_embedding( node.get_content(metadata_mode="all") ) node.embedding = node_embeddings res = self.add_nodes(nodes) print("Result from add nodes: " + str(res)) return res
.from_vector_store()
method but documentSummaryIndex does not. When trying to do load_index_from_storage(storage_context=storage_context, service_context=service_context)
i get an error about no persist_dir as follows ValueError: No index in storage context, check if you specified the right persist_dir.
and requires me to .persist()
the documentSummaryIndex to a file. db = chromadb.PersistentClient(path="./chroma_db") chroma_collection = db.get_or_create_collection("test") vector_store = ChromaVectorStore(chroma_collection=chroma_collection, persist_dir="./chroma_db") storage_context = StorageContext.from_defaults(vector_store=vector_store, persist_dir="./chroma_db") service_context = ServiceContext.from_defaults( llm=chatgpt, transformations=extractors, embed_model=embedding, system_prompt=system_prompt) doc_summary_index = DocumentSummaryIndex.from_documents(documents=docs, storage_context=storage_context, service_context=service_context, show_progress=True) doc_summary_index.storage_context.persist(persist_dir="./chroma_db")
doc_summary_index = load_index_from_storage(storage_context=storage_context, service_context=service_context) query_engine = doc_summary_index.as_query_engine( response_mode="tree_summarize", use_async=True, service_context=service_context )
vector_index = VectorStoreIndex.from_vector_store(
vector_store=store.vector_store, service_context=service_context
)
#Create the chat engine
chat_engine = vector_index.as_chat_engine(**chat_engine_params)
response = chat_engine.chat(query)
returns AssertionError()
and nothing else.