# code 1: from llama_index import GPTVectorStoreIndex, ServiceContext # storage_context = mongodb_storage_context() embed_model = huggingface_embed_model() # Set up the service context, i.e., the embedding model (and completion if used) service_context = ServiceContext.from_defaults(embed_model=embed_model) index_GPTVectorStoreIndex = GPTVectorStoreIndex(nodes=nodes, service_context=service_context, show_progress=True ) # Code 2: from llama_index import GPTSimpleKeywordTableIndex, ServiceContext # storage_context = mongodb_storage_context() embed_model = huggingface_embed_model() # Set up the service context, i.e., the embedding model (and completion if used) service_context = ServiceContext.from_defaults(embed_model=embed_model) index_GPTSimpleKeywordTableIndex = GPTSimpleKeywordTableIndex(nodes=nodes, service_context=service_context, show_progress=True ) # Verification: my metadata has url field filter_nodes = [x for x in nodes if " " + query_term.lower() in x.text.lower()] filter_nodes_urls = list(set([x.metadata["url"] for x in filter_nodes])) retriever_nodes_GPTVectorStoreIndex_urls = [] for each_node in retriever_nodes_GPTVectorStoreIndex: for _node in nodes: if _node.id_ == each_node.id_: retriever_nodes_GPTVectorStoreIndex_urls.append(_node.metadata["url"]) break retriever_nodes_GPTVectorStoreIndex_urls = list(set(retriever_nodes_GPTVectorStoreIndex_urls)) retriever_nodes_GPTVectorStoreIndex_urls == filter_nodes_urls False
index = GPTSimpleKeywordTableIndex(nodes=nodes, storage_context=storage_context, service_context=service_context, show_progress=True ) retriever = index.as_retriever() nodes = retriever.retrieve("formation")
/usr/local/lib/python3.10/dist-packages/llama_index/indices/service_context.py in from_defaults(cls, llm_predictor, llm, prompt_helper, embed_model, node_parser, llama_logger, callback_manager, system_prompt, query_wrapper_prompt, chunk_size, chunk_overlap, context_window, num_output, chunk_size_limit) 153 raise ValueError("Cannot specify both llm and llm_predictor") 154 llm = resolve_llm(llm) --> 155 llm_predictor = llm_predictor or LLMPredictor(llm=llm) 156 if isinstance(llm_predictor, LLMPredictor): 157 llm_predictor.llm.callback_manager = callback_manager /usr/local/lib/python3.10/dist-packages/llama_index/llm_predictor/base.py in __init__(self, llm, callback_manager, system_prompt, query_wrapper_prompt) 93 ) -> None: 94 """Initialize params.""" ---> 95 self._llm = resolve_llm(llm) 96 97 if callback_manager:
similarity_top_k=1000 response_mode="no_text" query_term = "formation" query_engine = picone_index_llama.as_query_engine( retriever_mode="embedding", service_context=service_context, verbose=True, similarity_top_k=similarity_top_k, response_mode=response_mode ) query_engine.query(query_term) ---------------- ApiException: (400) Reason: Bad Request HTTP response headers: HTTPHeaderDict({'content-type': 'application/json', 'Content-Length': '103', 'date': 'Sat, 09 Sep 2023 10:45:46 GMT', 'x-envoy-upstream-service-time': '0', 'server': 'envoy', 'Via': '1.1 google', 'Alt-Svc': 'h3=":443"; ma=2592000,h3-29=":443"; ma=2592000'}) HTTP response body: {"code":3,"message":"Vector dimension 1536 does not match the dimension of the index 768","details":[]}
from llama_index import load_index_from_storage storage_context = StorageContext.from_defaults( docstore=MongoDocumentStore.from_uri(uri=mongo_url, db_name=db_name), index_store=MongoIndexStore.from_uri(uri=mongo_url, db_name=db_name), ) index_test = load_index_from_storage(storage_context)