metadata = { "provider": provider, "admin_id": admin_id, "chunk_size": int(self.chunk_size), "chunk_overlap": int(self.chunk_overlap), "num_indexes": int(num_indexes), "category": tag, "page_label": page_no, "provider":<provider>, "document_name":document_name, "organisation_name":organisation_name, "uploaded_at":get_current_date() } Document( text=clean_text(text), doc_id=f"{document_id}", metadata=metadata, excluded_llm_metadata_keys=[ "category", "page_label", "num_indexes", "chunk_overlap", "chunk_size", "admin_id", ], excluded_embed_metadata_keys=[ "category", "page_label", "num_indexes", "chunk_overlap", "chunk_size", "admin_id", ], metadata_seperator=" | ", metadata_template="{key} = {value}", text_template="Metadata: {metadata_str}\n=====\nContent: {content}", )
from llama_index.data_structs.node import Node nodes = [ Node("Michael Jordan is a retired professional basketball player, widely regarded as one of the greatest basketball players of all time.", extra_info={ "category": ["Sports", "Entertainment"], "country": "United States", })
{"query": "sport persons business men", "filters": [{"key": "category", "value": "Sports"}, {"key": "category", "value": "Business"}, {"key": "country", "value": "United States"}], "top_k": 2}
Using filters: {'category': 'Business', 'country': 'United States'} #here sport was not used
if self._mode == "AND": retrieve_ids = vector_ids.intersection(keyword_ids) else: retrieve_ids = vector_ids.union(keyword_ids)
response_synthesizer = get_response_synthesizer( service_context=self.service_context, text_qa_template=qa_chat_prompt, response_mode=self.index_response_mode, streaming= True ) custom_index = RetrieverQueryEngine( retriever=custom_retriever, response_synthesizer=response_synthesizer, node_postprocessors=[ SimilarityPostprocessor(similarity_cutoff=self.similarity_cutoff), ], )
from langchain.chat_models import ChatOpenAI llm = ChatOpenAI( model=self.model_name, temperature=self.temperature, model_kwargs=model_kwargs, max_tokens=self.max_output_tokens, api_key=api_key, base_url=base_url )
def store_index(self, documents, payload, service_context): with self.lock: parser = service_context.node_parser nodes = parser.get_nodes_from_documents(documents) storage_context = self.get_pinecone_storage_context(payload, toquery=False) storage_context.docstore.add_documents(nodes) pc_index = GPTVectorStoreIndex( nodes, storage_context=storage_context, service_context=service_context, ) if "oldDocumentId" in payload: self.delete_old_vector(payload) return pc_index
Error - Unknown model: gpt-4-0613. Please provide a valid OpenAI model name.Known models are: gpt-4, gpt-4-0314, gpt-4-32k, gpt-4-32k-0314, gpt-3.5-turbo, gpt-3.5-turbo-0301, text-ada-001, ada, text-babbage-001, babbage, text-curie-001, curie, davinci, text-davinci-003, text-davinci-002, code-davinci-002, code-davinci-001, code-cushman-002, code-cushman-001.
self.llm = ChatOpenAI( model=self.model_name, temperature=self.temperature, max_tokens=self.max_tokens, frequency_penalty=self.frequency_penalty, top_p=self.top_p, headers = { <some_header> } ) # LLM Predictor self.llm_predictor = LLMPredictor(llm=self.llm)
from llama_index.vector_stores import PineconeVectorStore from llama_index.storage.storage_context import StorageContext #"Insert"ing vector_store = PineconeVectorStore( pinecone_index=index, add_sparse_vector=True, ) storage_context = StorageContext.from_defaults(vector_store=vector_store) index = pinecone.Index("dobby-springworks-be-dev") pcv_index = GPTVectorStoreIndex.from_documents(documents,pinecone_index=index, metadata_filters ={'doc_name': 'Handbook', 'admin_name': 'Siddhant', 'user_name': 'Siddhant'}, namespace='temp') # for doc_chunk in documents: # pcv_index.insert(doc_chunk) print(index.describe_index_stats())