How exactly did you use load_from_string
? Just want to understand the use case and see how we can migrate it
In a very boring/simple way.
async def query(self,
data: str,
query: str,
):
index = GPTSimpleVectorIndex.load_from_string(
data,
llm_predictor=self.create_llm_predictor(),
)
response = index.query(query, similarity_top_k=1) # default chunks is 1 anyway - make this easier to tweak
return response
We need to handle the file upstream like this:
response = await llama_index.query(
file_store.retrieve_file(index_key).decode(), query_text
)
because among other things we're using a hash as the name of the file, to make it easier to hit a commonly used file that others have already indexed (and which the querier possesses)
bit of added complexity here is we're on Azure, and had to do some funky stuff with langchain defined LLMs to make that all work with our API management solution.
(but you can ignore that bit!)
file_store is s3, but when developing locally it's just the local filesystem.
e.g. we want to mediate it, not just use a built in loader
ok, so you have two options
Option 1: Directly integrate with S3 using fsspec
https://gpt-index.readthedocs.io/en/latest/how_to/storage/save_load.html#using-a-remote-backendOption 2: Dumping the
storage_context
to dict, and then moving that to/from a json blob
# "save"
storage_dict = index.storage_context.to_dict()
storage_json = json.dumps(storage_json)
# "load"
storage_dict = json.loads(storage_json)
storage_context = StorageContext.from_dict(storage_dict)
index = VectorStoreIndex([], storage_context=storage_context)
I mean I guess 1 is possible but it's a pretty big/messy rewrite at this point.
Assuming I have the file text decoded, ready to load, I don't get how / where that plugs into storage context
yea, option 2 is closer to your current setup
Did my example not quite explain well enough? Let me explain using your code π
is there a structure I need to put the file in to be able to load into storage_dict
ok, solved a buncha issues including changes in langchain and how it handles Azure OpenAI calls
I think I've got it like 99% of the way there - it now correctly creates embeddings, I've been able to inspect the files
async def query(self, data: str, query: str):
storage_dict = json.loads(data)
llm = OpenAI(
openai_api_key=self.access_token,
temperature=0,
model_kwargs={
"deployment_id": "text-davinci-003",
"engine": "text-davinci-003",
"api_base": settings.OPENAI_API_BASE,
"api_type": settings.OPENAI_API_TYPE,
"api_version": settings.OPENAI_API_VERSION,
},
)
service_context = ServiceContext.from_defaults(
embed_model=LangchainEmbedding(
OpenAIEmbeddings(
openai_api_key=self.access_token,
model="text-embedding-ada-002",
embedding_ctx_length=4095,
chunk_size=1,
)
),
llm_predictor=LLMPredictor(llm=llm),
)
storage_context = StorageContext.from_dict(storage_dict)
index = VectorStoreIndex(
[], storage_context=storage_context, service_context=service_context
)
query_engine = index.as_query_engine()
response = query_engine.query(query)
return response
2023-06-28 23:35:09,169 - ERROR - query[error]
Traceback (most recent call last):
File "C:\Temp\Genie\LlamaAPI\main.py", line 238, in query
response = await llama_index.query(
^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Temp\Genie\LlamaAPI\llamaapi\data\model\llama_index.py", line 130, in query
response = query_engine.query(query)
^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Temp\Genie\LlamaAPI\venv\Lib\site-packages\llama_index\indices\query\base.py", line 23, in query
response = self._query(str_or_query_bundle)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Temp\Genie\LlamaAPI\venv\Lib\site-packages\llama_index\query_engine\retriever_query_engine.py", line 142, in
_query
nodes = self._retriever.retrieve(query_bundle)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Temp\Genie\LlamaAPI\venv\Lib\site-packages\llama_index\indices\base_retriever.py", line 21, in retrieve
return self._retrieve(str_or_query_bundle)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Temp\Genie\LlamaAPI\venv\Lib\site-packages\llama_index\token_counter\token_counter.py", line 78, in wrapped_llm_predict
f_return_val = f(_self, *args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Temp\Genie\LlamaAPI\venv\Lib\site-packages\llama_index\indices\vector_store\retrievers\retriever.py", line 95, in _retrieve
node_ids = [
^
File "C:\Temp\Genie\LlamaAPI\venv\Lib\site-packages\llama_index\indices\vector_store\retrievers\retriever.py", line 96, in <listcomp>
self._index.index_struct.nodes_dict[idx] for idx in query_result.ids
~~~~~~~^^^^^
KeyError: 'eb9cbe5b-f97b-464f-9b9f-7e23737f94fb' - Extra:
msg: query[error]
args: ()
levelno: 40
pathname: C:\Temp\Genie\LlamaAPI\main.py
filename: main.py
module: main
exc_info: (<class 'KeyError'>, KeyError('eb9cbe5b-f97b-464f-9b9f-7e23737f94fb'), <traceback object at 0x00000227D02BE400>)
interestingly, this is at the query stage
and I can actually find those keys in the json index files I'm writing
Are you loading an index created with the old llama_index version, or the new one?
I'll try and reproduce locally
confirmed I'm writing the file and it looks good
so I know I'm managing to navigate the langchain / llama / azureopenai / gateway successfully
at least for embeddings..
I could even attach the index file created if you like...
cool cool, was able to reproduce locally
ok thanks for being awesome!
wanders in awe through the new docs and things llama can do
oh spooky, for some reason to_dict
is saving TWO index stores, even though there is only one
Also, I forgot we should be using load_index_from_storage
for this
Will dig a little deeper before writing up this workaround I found lol
ok, not a bug, but I figured it out!
Here goes, slightly corrected example
# "save"
storage_dict = index.storage_context.to_dict()
storage_json = json.dumps(storage_json)
# "load"
from llama_index import load_index_from_storage
storage_dict = json.loads(storage_json)
storage_context = StorageContext.from_dict(storage_dict)
index = load_index_from_storage(storage_context)
basically, just needed to use that function lol
idk what I saw with that double index thing, I think I goofed something because I couldn't replicate it haha
I've had a few interesting findings too:
{"doc_hash": "6096a341fff252ad516c15e56b2f015917944b79b7090c04b59829ee2f9f074c"}}}, "index_store": {"index_store/data": {"a113998c-6c18-4a41-8ffd-c52426244957": {"type": "vector_store", "data": "{"index_id": "a113998c-6c18-4a41-8ffd-c52426244957", "summary": null, "nodes_dict": {"6219baa4-63ce-4e00-889c-fb88d3454c93": "6219baa4-63ce-4e00-889c-fb88d3454c93", "eb9cbe5b-f97b-464f-9b9f-7e23737f94fb": "eb9cbe5b-f97b-464f-9b9f-7e23737f94fb"}, "doc_id_dict": {}, "embeddings_dict": {}}"}}}, "graph_store": {"graph_dict": {}}}
^^ this appears escaped in the output JSON where other bits didn't.
I also noticed that there is a storage_context.to_json()
@Logan M where does load_index_from_storage_context come from? can't seem to find it
from llama_index import load_index_from_storage
oh there's a to_json
? lol I guess I missed that
almost there. Now just need to figure out what service contexts I need to plug in where
oh, you can plug that in when you load too!
index = load_index_from_storage(storage_context, service_context=service_context)
'NodeWithScore' object has no attribute 'to_dict'
I haven't directly written any code
referring to NodeWithScore
how did you hit this error?
async def query(self, data: str, query: str):
storage_dict = json.loads(data)
llm = OpenAI(
openai_api_key=self.access_token,
temperature=0,
model_kwargs={
"deployment_id": "text-davinci-003",
"engine": "text-davinci-003",
"api_base": settings.OPENAI_API_BASE,
"api_type": settings.OPENAI_API_TYPE,
"api_version": settings.OPENAI_API_VERSION,
},
)
service_context = ServiceContext.from_defaults(
embed_model=LangchainEmbedding(
OpenAIEmbeddings(
openai_api_key=self.access_token,
model="text-embedding-ada-002",
embedding_ctx_length=4095,
chunk_size=1,
)
),
llm_predictor=LLMPredictor(llm=llm),
)
storage_context = StorageContext.from_dict(storage_dict)
index = load_index_from_storage(
storage_context, service_context=service_context
)
query_engine = index.as_query_engine()
response = query_engine.query(query)
return response
ah no, my bad. It's gotten further along now, and I now need to revisit the downstream code that pulled out nodes, so probably is ours. Sir, you are a legend in your spare time
and kerpow, it's all working now. Superstar
hahaha thanks man. Happy to help figure out anything else you run into!
Next up I'm going to take the chat functionality for a spin, and also MMR