Using this part in my program to define the API
index = None
def initialize_index():
global index
storage_context = StorageContext.from_defaults()
if os.path.exists('./storage'):
index = load_index_from_storage(storage_context, service_context=service_context)
print((f"Finished loading doc n°1 index from storage with {len(index.docstore.docs)} nodes"))
else:
documents = SimpleDirectoryReader("./data").load_data()
print((f"Loaded doc n°1 with {len(documents)} pages"))
index = GPTVectorStoreIndex.from_documents(documents, storage_context=storage_context)
index.storage_context.persist(persist_dir="./storage")
print((f"Finished building doc n°1 index with {len(index.docstore.docs)} nodes"))
##
app = Flask(__name__)
##
@app.route("/query", methods=["GET"])
def query_index():
global index
query_text = request.args.get("text", None)
if query_text is None:
return "No text found, please include a ?text=blahblahblah parameter in the URL", 400
query_engine = index.as_query_engine(similarity_top_k=3, text_qa_template=qa_template)
response = query_engine.query(query_text)
return str(response), 200
@app.route("/")
def home():
return "Hello World!, Bienvenue sur le EquansGPT !"
if __name__ == "__main__":
app.run(host="0.0.0.0", port=5601)
when entering http://localhost:5601/ in my brower there is nothing loading...
And here is my llm implementation just over the api in my program:
.....
openai.api_key = os.getenv('OPENAI_API_KEY')
#set context window
context_window = 2048
#set number of output tokens
num_output = 512
# Initialisation de l'objet AzureOpenAI
# test1 représente le nom de déployment model sur Azure (le nom du modÚle gpt35turbo)
llm = AzureChatOpenAI(deployment_name="test1", temperature=0.1, max_tokens=num_output, openai_api_version=openai.api_version, model_kwargs={
"api_key": openai.api_key,
"api_base": openai.api_base,
"api_type": openai.api_type,
"api_version": openai.api_version,
})
llm_predictor = LLMPredictor(llm=llm)
# Initialisation de l'objet LangchainEmbedding pour l'indexation des documents à partir ici du modÚle ada-002 nommé ada-test dans
embedding_llm = LangchainEmbedding(
OpenAIEmbeddings(
model="text-embedding-ada-002",
deployment="ada-test",
openai_api_key= openai.api_key,
openai_api_base=openai.api_base,
openai_api_type=openai.api_type,
openai_api_version=openai.api_version,
),
embed_batch_size=1,
)
service_context = ServiceContext.from_defaults(
llm_predictor=llm_predictor,
embed_model=embedding_llm,
context_window=context_window,
num_output=num_output,
)
template = (
"Tu trouveras ci-dessous des informations contextuelles. \n"
"---------------------\n"
"{context_str}"
"\n---------------------\n"
"Tu es un assistant technique de la socitété Equans via un chatbot. Tu donnes une assistance technique aux questions que te poseras l'utilisateur."
"D'aprĂšs le contexte, rĂ©ponds Ă la question en français uniquement, mĂȘme si la question est posĂ©e dans une autre langue. RĂ©ponds donc Ă la question:{query_str}\n"
"Il se peut que l'utilisateur te pose des questions sur des parties spécifiques du document. Essaye de les retrouver et de répondre à la question"
"Si la question n'a rien à voir avec les documents, réponds simplement : 'Je suis désolé, je n'ai pas pu trouver la réponse dans les documents que vous m'avez donné.' sauf si on te demande des questions sur toi(ex: Bonjour ou qui es-tu ?)."
)
qa_template = Prompt(template)
index = None
app = Flask(__name__)
def initialize_index():
global index
storage_context = StorageContext.from_defaults()
if os.path.exists('./storage'):
index = load_index_from_storage(storage_context, service_context=service_context)
print((f"Finished loading doc n°1 index from storage with {len(index.docstore.docs)} nodes"))
else:.....................
did you run the server using Flask?
flask --app ./src/api run --host=0.0.0.0 -p 5601
i launch it via flask --app ./test_flask.py run --host=0.0.0.0 -p 5601
thereâs no error on your terminal?
PS C:\Projets\IA Chat Local\Sources\AzureOpenAI> flask --app ./tes_flask.py run --host=0.0.0.0 -p 5601
Usage: flask run [OPTIONS]
File "C:\Users\sxd-i\AppData\Local\Programs\Python\Python310\lib\site-packages\flask\app.py", line 2190, in wsgi_app
response = self.full_dispatch_request()
File "C:\Users\sxd-i\AppData\Local\Programs\Python\Python310\lib\site-packages\flask\app.py", line 1486, in full_dispatch_request
rv = self.handle_user_exception(e)
File "C:\Users\sxd-i\AppData\Local\Programs\Python\Python310\lib\site-packages\flask\app.py", line 1484, in full_dispatch_request
rv = self.dispatch_request()
File "C:\Users\sxd-i\AppData\Local\Programs\Python\Python310\lib\site-packages\flask\app.py", line 1469, in dispatch_request
return self.ensure_sync(self.view_functions[rule.endpoint])(**view_args)
File "C:\Projets\IA Chat Local\Sources\AzureOpenAI\test_flask.py", line 92, in query_index
query_engine = index.as_query_engine(similarity_top_k=3, text_qa_template=qa_template)
AttributeError: 'NoneType' object has no attribute 'as_query_engine'
127.0.0.1 - - [07/Jun/2023 13:39:06] "GET /query?text=what_is_this_doc_about? HTTP/1.1" 500 -
wich i guess the problem come from the query_engine
but everything is setup the same as my usual program
in terms of imported librairy
Youâre not initializing the index
no, because you need to call initialize_index()
then it will assign the variable and flask will be able to use
is it inherent to flask ?
i don't see where you're calling it in your code
you need to run this first code first, then the index will be assigned globally
ie: python3 ./initialize_index.py
then you can run flask API
This function will initialize our index. If we call this just before starting the flask server in the main function, then our index will be ready for user queries!
this is exactly what i'v done here haha !
do i need to have it in a separate file ?
i changed some little things but yeah i think this is quite the same, i just implemented the persist_di and printed some things to have the info
import os
from flask import request
from llama_index import SimpleDirectoryReader, VectorStoreIndex, StorageContext
# NOTE: for local testing only, do NOT deploy with your key hardcoded
os.environ['OPENAI_API_KEY'] = "your key here"
index = None
index_dir = "./index_data" # specify the directory where you want to store the index
def initialize_index():
global index
storage_context = StorageContext.from_defaults()
if os.path.exists(index_dir):
index = load_index_from_storage(storage_context)
else:
documents = SimpleDirectoryReader("./documents").load_data()
index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)
storage_context.persist(index_dir)
@app.route("/query", methods=["GET"])
def query_index():
global index
query_text = request.args.get("text", None)
if query_text is None:
return "No text found, please include a ?text=blah parameter in the URL", 400
query_engine = index.as_query_engine()
response = query_engine.query(query_text)
return str(response), 200
def main():
initialize_index()
if __name__ == "__main__":
main()
can be on the same file, do these steps:
- run
python3 ./test_flask.py
- run
flask --app ./test_flask.py run --host=0.0.0.0 -p 5601
i did something like that
and now it's not working anymore, idk why
i guess the error come from this
which is written in white
move initialize_index() to above of app.run()
File "C:\Projets\IA Chat Local\Sources\AzureOpenAI\test_flask.py", line 95, in query_index
query_engine = index.as_query_engine(similarity_top_k=3, text_qa_template=qa_template)
query_engine to works need to have initialize_index() ran first
so you need to follow the steps that I shared, or with your current code call the home route first then call your query route
i litterally changed nothing
just rebooted my computer
and relaunched using the new port
index = None
index_name = "./storage"
app = Flask(__name__)
def initialize_index():
global index
storage_context = StorageContext.from_defaults(persist_dir="./storage")
if os.path.exists("./storage"):
index = load_index_from_storage(storage_context, service_context=service_context)
print((f"Finished loading doc n°1 index from storage with {len(index.docstore.docs)} nodes"))
else:
documents = SimpleDirectoryReader("./data").load_data()
print((f"Loaded doc n°1 with {len(documents)} pages"))
index = GPTVectorStoreIndex.from_documents(documents, storage_context=storage_context,service_context=service_context)
storage_context.persist(persist_dir="./storage")
print((f"Finished building doc n°1 index with {len(index.docstore.docs)} nodes"))
initialize_index()
@app.route("/query", methods=["GET"])
def query_index():
global index
query_text = request.args.get("text", None)
if query_text is None:
return "No text found, please include a ?text=blahblahblah parameter in the URL", 400
query_engine = index.as_query_engine(similarity_top_k=3, text_qa_template=qa_template)
response = query_engine.query(query_text)
return str(response), 200
@app.route("/")
def main():
return query_index()
if __name__ == "__main__":
app.run(host="0.0.0.0", port=5608)
I saw now that you're already calling initialize_index()
above query route
yeah ! sorry if this was silly but thank you for your help !
Hey, made it work on an azure web app, just to query an existing index.
My issue is, i want to initialize or create an index from an existing folder such as "data" or one spedicifed in the url with the http://example.com/index?=my_folder
Idk what's not good with my code. If you or @Logan M could help me find the issue ^^
index = None
index_name = "./storage"
app = Flask(__name__)
@app.route("/index", methods=["GET"])
def initialize_index():
global index
dossier = request.args.get("dossier", None)
if dossier is None:
dossier = "data"
storage_context = StorageContext.from_defaults(persist_dir="./storage")
if os.path.exists("./storage"):
index = load_index_from_storage(storage_context, service_context=service_context)
print((f"Finished loading doc n°1 index from storage with {len(index.docstore.docs)} nodes"))
else:
documents = SimpleDirectoryReader(f"./{dossier}").load_data()
print((f"Loaded doc n°1 with {len(documents)} pages"))
index = GPTVectorStoreIndex.from_documents(documents, storage_context=storage_context,service_context=service_context)
storage_context.persist(persist_dir="./storage")
print((f"Finished building doc n°1 index with {len(index.docstore.docs)} nodes"))
return "Index initialized successfully", 200
initialize_index()
@app.route("/query", methods=["GET"])
def query_index():
global index
query_text = request.args.get("text", None)
if query_text is None:
return "No text found, please include a ?text=blahblahblah parameter in the URL", 400
query_engine = index.as_query_engine(similarity_top_k=3, text_qa_template=qa_template)
response = query_engine.query(query_text)
return str(response), 200
@app.route("/")
def main():
return query_index()
if __name__ == "__main__":
app.run()
idk why but it doesnt create any "storage" file when its deleted
@app.route("/")
def main():
initialize_index
return query_index()
ah a small error I think (the docs could probably be clearer about this)
storage_context = StorageContext.from_defaults(persist_dir="./storage")
Should only do this if the folder ./storage
already exists
like you're example uses indexed files as pkl and now we uses json files too so i get a little confused haha
@app.route("/index", methods=["GET"])
def initialize_index():
global index
dossier = request.args.get("dossier", None)
if dossier is None:
dossier = "data"
if os.path.exists("./storage"):
storage_context = StorageContext.from_defaults(persist_dir="./storage")
index = load_index_from_storage(storage_context, service_context=service_context)
print((f"Finished loading doc n°1 index from storage with {len(index.docstore.docs)} nodes"))
else:
documents = SimpleDirectoryReader(f"./{dossier}").load_data()
print((f"Loaded doc n°1 with {len(documents)} pages"))
index = GPTVectorStoreIndex.from_documents(documents, service_context=service_context)
index.storage_context.persist(persist_dir="./storage")
print((f"Finished building doc n°1 index with {len(index.docstore.docs)} nodes"))
return "Index initialized successfully", 200
heh I was just using pkl to track the documents I inserted, just a bit of a hack haha
i need to specify a storage_context
| for this part too
else:
documents = SimpleDirectoryReader("./data").load_data()
print((f"Loaded doc n°1 with {len(documents)} pages"))
index = GPTVectorStoreIndex.from_documents(documents, storage_context=storage_context,service_context=service_context)
storage_context.persist(persist_dir="./storage")
print((f"Finished building doc n°1 index with {len(index.docstore.docs)} nodes"))``
as
else:
storage_context = StorageContext.from_defaults()
documents = SimpleDirectoryReader("./data").load_data()
print((f"Loaded doc n°1 with {len(documents)} pages"))
index = GPTVectorStoreIndex.from_documents(documents, storage_context=storage_context,service_context=service_context)
storage_context.persist(persist_dir="./storage")
print((f"Finished building doc n°1 index with {len(index.docstore.docs)} nodes"))
oops typo, you can just do this
index = GPTVectorStoreIndex.from_documents(documents, service_context=service_context)
index.storage_context.persist(persist_dir="./storage")
oh ok dont need it anymore
Ok so, when I launch the program in local and access it via a local url, everything is working.
even
https://example.com/index?dossier=data, then after i can query the new vector database (storage) that has been created.
When it gets tricky is that when I deploy my program on the azure webapp service, it deploys successfully.
But, when I query
https://example.com/query it says me right that "not text has been found etc..." wich is normal
And when i query
https://example.com/query?text=what_is_this_doc_about I get a 500 internal error (which is normal too because no index has been created)
So, when i want to create an index from my data storage with
https://example.com/index?dossier=data , I get a 500 internal error too. This is strange because it doesnt do this in local.
Does it mean that i did something wrong ? or just maybe that my program have not access to my data storage on the server ?
This is still the code that is working, in local
index = None
app = Flask(__name__)
@app.route("/index", methods=["GET"])
def initialize_index():
global index
dossier = request.args.get("dossier", None)
if dossier is None:
dossier = "data"
if os.path.exists("./storage"):
storage_context = StorageContext.from_defaults(persist_dir="./storage")
index = load_index_from_storage(storage_context, service_context=service_context)
print((f"Finished loading doc n°1 index from storage with {len(index.docstore.docs)} nodes"))
else:
documents = SimpleDirectoryReader(f"./{dossier}").load_data()
print((f"Loaded doc n°1 with {len(documents)} pages"))
index = GPTVectorStoreIndex.from_documents(documents, service_context=service_context)
index.storage_context.persist(persist_dir="./storage")
print((f"Finished building doc n°1 index with {len(index.docstore.docs)} nodes"))
return "Index initialized successfully", 200
@app.route("/query", methods=["GET"])
def query_index():
global index
query_text = request.args.get("text", None)
if query_text is None:
return "No text found, please include a ?text=blahblahblah parameter in the URL", 400
query_engine = index.as_query_engine(similarity_top_k=3, text_qa_template=qa_template)
response = query_engine.query(query_text)
return str(response), 200
@app.route("/")
def main():
return query_index()
if __name__ == "__main__":
app.run()
If the respons code is 500, is there any way to see logs from the server? I would expect to see some traceback that will help track this down.
If there are no logs, you can wrap everything inside initialize_index with a try/except, and return the error and 500
if an exception happens
Maybe it's a permissions error with reading ./{dossier}
?
Yeah fixed it, it was a permission error. I had not the permission to write. Which is not permised at all.. so I give the model already embedded data. I will need to fix it in another way later
ayyy called it lol. Glad you got it slightly figured out!