chat_engine
instance based on each user. That way every chat will be unique to that particular user.if not cl.user_session.get("index"): # do the instantiation of llm and set the index in user_session cl.user_session.set('index', index) else: index = cl.user_session.get('index')
@cl.on_chat_start async def factory():
line 139, in <module> callback_manager=CallbackManager([cl.LlamaIndexCallbackHandler()]), File "/usr/local/lib/python3.10/dist-packages/chainlit/llama_index/callbacks.py", line 31, in __init__ self.context = context_var.get() LookupError: <ContextVar name='chainlit' at 0x7fd24f880630>
@cl.on_chat_start async def factory(): global QA_TEMPLATE, MEM_PROMPT # Detect hardware acceleration device if torch.cuda.is_available(): device = 'cuda' gpu_layers = 50 elif torch.backends.mps.is_available(): # Assuming MPS backend exists device = 'mps' gpu_layers = 1 else: device = 'cpu' gpu_layers = 0 print(f'Using device: {device}') if not cl.user_session.get("index"): # do the instantiation of llm and set the index in user_session cl.user_session.set('index', index) else: index = cl.user_session.get('index') # Do the query engine part from here # percentile_cutoff: a measure for using the top percentage of relevant sentences. query_engine = index.as_query_engine(streaming=True, similarity_top_k = 2, text_qa_template=QA_TEMPLATE,
cl.user_session
remains active until the server is active and not get's created with every new connection then this part should work.line 91, in factory cl.user_session.set('index', index) UnboundLocalError: local variable 'index' referenced before assignment
@cl.on_chat_start async def factory(): global QA_TEMPLATE, MEM_PROMPT, CHAT_HISTORY # Detect hardware acceleration device if torch.cuda.is_available(): device = 'cuda' gpu_layers = 50 elif torch.backends.mps.is_available(): # Assuming MPS backend exists device = 'mps' gpu_layers = 1 else: device = 'cpu' gpu_layers = 0 print(f'Using device: {device}') if not cl.user_session.get("index"): # do the instantiation of llm and set the index in user_session cl.user_session.set('index', index) else: index = cl.user_session.get('index') embed_model_name = 'BAAI/bge-small-en-v1.5' # Create an instance of HuggingFace embed_model = HuggingFaceEmbedding( model_name=embed_model_name, device = device, normalize='True' ) # load from disk path = 'RAG_VectorDB' db = chromadb.PersistentClient(path=path) chroma_collection = db.get_collection('arxiv_PDF_DB') print(chroma_collection.metadata) if embed_model_name != chroma_collection.metadata['embedding_used']: raise Warning('Not using the same embedding model!') vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
@cl.on_chat_start async def factory():
# percentile_cutoff: a measure for using the top percentage of relevant sentences. query_engine = index.as_query_engine(streaming=True, similarity_top_k = 2, text_qa_template=QA_TEMPLATE, node_postprocessors=[SentenceEmbeddingOptimizer(percentile_cutoff=0.5, embed_model=embed_model)] ) CHAT_HISTORY = [] chat_engine = CondenseQuestionChatEngine.from_defaults( query_engine=query_engine, embed_model=embed_model, service_context = service_context, condense_question_prompt=MEM_PROMPT, chat_history=CHAT_HISTORY, verbose=False, ) print('Model Loaded') cl.user_session.set('chat_engine', chat_engine)
@cl.on_chat_start
if not cl.user_session.get("index"):
npx create-llama@latest
model_path = 'mistralai/Mistral-7B-v0.1' from llama_index.llms.vllm import Vllm llm = Vllm(model_path)
service_context = ServiceContext.from_defaults( embed_model=embed_model, llm=llm, # callback manager show progress in UI callback_manager=CallbackManager([cl.LlamaIndexCallbackHandler()]), )
ERROR: Exception in ASGI application Traceback (most recent call last): File "/usr/local/lib/python3.10/dist-packages/uvicorn/protocols/websockets/websockets_impl.py", line 247, in run_asgi result = await self.app(self.scope, self.asgi_receive, self.asgi_send) ... File "/usr/local/lib/python3.10/dist-packages/engineio/async_drivers/asgi.py", line 247, in send await self.asgi_send({'type': 'websocket.send', File "/usr/local/lib/python3.10/dist-packages/starlette/middleware/exceptions.py", line 65, in sender await send(message) File "/usr/local/lib/python3.10/dist-packages/uvicorn/protocols/websockets/websockets_impl.py", line 320, in asgi_send await self.send(data) # type: ignore[arg-type] File "/usr/local/lib/python3.10/dist-packages/websockets/legacy/protocol.py", line 635, in send await self.ensure_open() File "/usr/local/lib/python3.10/dist-packages/websockets/legacy/protocol.py", line 948, in ensure_open raise self.connection_closed_exc() websockets.exceptions.ConnectionClosedOK: received 1005 (no status received [internal]); then sent 1005 (no status received [internal]) First question 2023-12-15 17:17:10 - Not Implemented
Traceback (most recent call last): File "/usr/local/lib/python3.10/dist-packages/chainlit/utils.py", line 39, in wrapper return await user_function(**params_values) File "/home/Josh-ee_Llama_RAG/test-gpu.py", line 165, in main response = await cl.make_async(chat_engine._query_engine.query)(question) File "/usr/local/lib/python3.10/dist-packages/asyncer/_main.py", line 358, in wrapper return await anyio.to_thread.run_sync( File "/usr/local/lib/python3.10/dist-packages/anyio/to_thread.py", line 33, in run_sync return await get_asynclib().run_sync_in_worker_thread( ... File "/usr/local/lib/python3.10/dist-packages/llama_index/query_engine/retriever_query_engine.py", line 171, in _query response = self._response_synthesizer.synthesize( File "/usr/local/lib/python3.10/dist-packages/llama_index/response_synthesizers/base.py", line 146, in synthesize response_str = self.get_response( File "/usr/local/lib/python3.10/dist-packages/llama_index/response_synthesizers/compact_and_refine.py", line 38, in get_response return super().get_response( File "/usr/local/lib/python3.10/dist-packages/llama_index/response_synthesizers/refine.py", line 127, in get_response response = self._give_response_single( File "/usr/local/lib/python3.10/dist-packages/llama_index/response_synthesizers/refine.py", line 196, in _give_response_single response = self._service_context.llm_predictor.stream( File "/usr/local/lib/python3.10/dist-packages/llama_index/llm_predictor/base.py", line 251, in stream stream_response = self._llm.stream_complete(formatted_prompt) File "/usr/local/lib/python3.10/dist-packages/llama_index/llms/base.py", line 313, in wrapped_llm_predict f_return_val = f(_self, *args, **kwargs) File "/usr/local/lib/python3.10/dist-packages/llama_index/llms/vllm.py", line 256, in stream_complete raise (ValueError("Not Implemented"))
@cl.cache def cachellm(): model_path = 'mistralai/Mistral-7B-v0.1' from llama_index.llms.vllm import Vllm llm = Vllm(model_path) return llm @cl.on_chat_start async def factory(): global llm llm = cachellm() @cl.on_message async def main(message: cl.Message): question = message.content output = llm.complete(question) response_message = cl.Message(content=output[0].text) await response_message.send()
question = "What is the paper about?" query_engine = index.as_query_engine(service_context=service_context) response = query_engine.query(question)
2023-12-15 18:15:29 - 'list' object has no attribute 'text' Traceback (most recent call last): File "/usr/local/lib/python3.10/dist-packages/chainlit/utils.py", line 39, in wrapper return await user_function(**params_values) File "/home/Josh-ee_Llama_RAG/vllm.py", line 102, in factory response = query_engine.query(question) File "/usr/local/lib/python3.10/dist-packages/llama_index/core/base_query_engine.py", line 30, in query return self._query(str_or_query_bundle) File "/usr/local/lib/python3.10/dist-packages/llama_index/query_engine/retriever_query_engine.py", line 171, in _query response = self._response_synthesizer.synthesize( ... File "/usr/local/lib/python3.10/dist-packages/llama_index/response_synthesizers/refine.py", line 182, in _give_response_single program( File "/usr/local/lib/python3.10/dist-packages/llama_index/response_synthesizers/refine.py", line 53, in __call__ answer = self._llm_predictor.predict( File "/usr/local/lib/python3.10/dist-packages/llama_index/llm_predictor/base.py", line 225, in predict output = response.text AttributeError: 'list' object has no attribute 'text'
File "/usr/local/lib/python3.10/dist-packages/llama_index/llm_predictor/base.py", line 225, in predict output = response.text
output = response[0].text
2023-12-15 21:13:51 - 'list' object has no attribute 'text' Traceback (most recent call last): File "/usr/local/lib/python3.10/dist-packages/chainlit/utils.py", line 39, in wrapper return await user_function(**params_values) File "/home/Josh-ee_Llama_RAG/vllm-gpu.py", line 111, in factory response = query_engine.query(question) File "/usr/local/lib/python3.10/dist-packages/llama_index/core/base_query_engine.py", line 30, in query return self._query(str_or_query_bundle) File "/usr/local/lib/python3.10/dist-packages/llama_index/query_engine/retriever_query_engine.py", line 171, in _query response = self._response_synthesizer.synthesize( File "/usr/local/lib/python3.10/dist-packages/llama_index/response_synthesizers/base.py", line 146, in synthesize response_str = self.get_response( File "/usr/local/lib/python3.10/dist-packages/llama_index/response_synthesizers/compact_and_refine.py", line 38, in get_response return super().get_response( File "/usr/local/lib/python3.10/dist-packages/llama_index/response_synthesizers/refine.py", line 146, in get_response response = self._give_response_single( File "/usr/local/lib/python3.10/dist-packages/llama_index/response_synthesizers/refine.py", line 202, in _give_response_single program( File "/usr/local/lib/python3.10/dist-packages/llama_index/response_synthesizers/refine.py", line 64, in __call__ answer = self._llm.predict( File "/usr/local/lib/python3.10/dist-packages/llama_index/llms/llm.py", line 221, in predict output = response.text AttributeError: 'list' object has no attribute 'text'