Saving:
# import
from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext
from llama_index.vector_stores import ChromaVectorStore
from llama_index.storage.storage_context import StorageContext
from llama_index.embeddings import HuggingFaceEmbedding
from IPython.display import Markdown, display
import chromadb
from llama_index.llms import LlamaCPP
print("Calling LLM")
llm = LlamaCPP(
# You can pass in the URL to a GGML model to download it automatically
# optionally, you can set the path to a pre-downloaded model instead of model_url
model_path="./models/em_german_13b_v01.Q8_0.gguf",
temperature=0.1,
max_new_tokens=4048,
# llama2 has a context window of 4096 tokens, but we set it lower to allow for some wiggle room
context_window=8128,
# kwargs to pass to __call__()
generate_kwargs={},
# kwargs to pass to __init__()
# set to at least 1 to use GPU
# model_kwargs={"n_gpu_layers": 1},
# transform inputs into Llama2 format
# messages_to_prompt=messages_to_prompt,
# completion_to_prompt=completion_to_prompt,
verbose=True,
)
print("Called LLM")
print("Making PersistentClient")
part 1
db = chromadb.PersistentClient(path="./chroma_db")
print("Made PersistentClient")
print("Making client")
print("Made client")
print("Creating collection")
chroma_collection = db.get_or_create_collection("sampledata")#
print(chroma_collection)
print("Created collection")
# define embedding function
print("Creating embedding model")
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-base-en-v1.5")
print("Created embedding model")
# load documents
print("Loading documents")
documents = SimpleDirectoryReader("./sample_data").load_data()
print("Loaded documents")
# set up ChromaVectorStore and load in data
print("Creating vector store")
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
print("Created vector store")
print("Loading data into vector store")
storage_context = StorageContext.from_defaults(vector_store=vector_store)
print("Loaded data into vector store")
print("Making ServiceContext")
service_context = ServiceContext.from_defaults(llm=llm, embed_model="local:BAAI/bge-base-en-v1.5")
print("Made ServiceContext")
print("Making index")
index = VectorStoreIndex.from_documents(documents, service_context=service_context)
print("Made index")
# Query Data
print("Querying data")
query_engine = index.as_query_engine()
print("Queried data")
print("Getting response")
response = query_engine.query("Was ist der Prozess \"Düngen\"? Bitte nutze Leerzeichen zwischen den Wörtern. Und nutze Satzzeichen.")
print("Got response")
print("Printing response")
print(response)
print("Printed response")
print("Printing response source nodes")
print(response.source_nodes)
print("Printed response source nodes")
Part 2
Calling it in another file
import chromadb
from llama_index import VectorStoreIndex
from llama_index.vector_stores import ChromaVectorStore
from llama_index.storage.storage_context import StorageContext
from llama_index.llms import LlamaCPP
from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext
# initialize client
db = chromadb.PersistentClient(path="./chroma_db")
llm = LlamaCPP(
# You can pass in the URL to a GGML model to download it automatically
# optionally, you can set the path to a pre-downloaded model instead of model_url
model_path="./models/em_german_13b_v01.Q8_0.gguf",
temperature=0.1,
max_new_tokens=4048,
# llama2 has a context window of 4096 tokens, but we set it lower to allow for some wiggle room
context_window=8128,
# kwargs to pass to __call__()
generate_kwargs={},
# kwargs to pass to __init__()
# set to at least 1 to use GPU
# model_kwargs={"n_gpu_layers": 1},
# transform inputs into Llama2 format
# messages_to_prompt=messages_to_prompt,
# completion_to_prompt=completion_to_prompt,
verbose=True,
)
# get collection
chroma_collection = db.get_or_create_collection("sampledata")
print(chroma_collection)
# assign chroma as the vector_store to the context
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
service_context = ServiceContext.from_defaults(llm=llm, embed_model="local:BAAI/bge-base-en-v1.5")
# load your index from stored vectors
index = VectorStoreIndex.from_vector_store(
vector_store, storage_context=storage_context, service_context=service_context
)
# create a query engine
query_engine = index.as_query_engine()
response = query_engine.query("Was ist der Prozess \"Düngen\"? Bitte nutze Leerzeichen zwischen den Wörtern. Und nutze Satzzeichen.")
print(response)
print(response.source_nodes)
@Tay you never used the storage context in the first part
Should be
index = VectorStoreIndex.from_documents(documents, service_context=service_context, storage_context=storage_context)
In the first part I got a response tho, in the second I didnt
Because in the first, it just created the default in-memory vector db
Then in the second part, you actually use the storage context properly, but it's empty becasue it was unused in part 1
Part 1 and part 2 are the same files btw
Heya @Logan M,
I now have it in a FastAPI, i would like to make a Chat History per IP, How do i add the Chat History to the LLM as a Context if i have my Chat History as a Array?
Ah and atm i get this.
Can i request the Filenames instead of the DocIDs?
chat_engine.chat("hello", chat_history=chat_history)
will let you pass in the chat history as a list
or you can call the LLM directly with llm.chat(chat_history)
I have a query engine. Which is my index. Would this work?:
chat_engine = index.as_chat_engine()
Instead of as_chat_engine
Yea if you want chat history, you'll need to use a chat engine or agent 👍
You might have to try a few different chat modes to find one that works best for you
Is there a Layout of how the Chat History has to look=
Hmm just a list of ChatMessage objects 👀
Is it somewhere in the Doc?
Does the Assistant provide its sourcetext? Like where it got the Information from
aaand now its fully broken
Error:
TypeError: Object of type ChatMessage is not JSON serializable
Code:
Attatched
Yea, need to convert it to JSON first (can't leave it as pydantic for the API response)
response_data = {
"message": message.json(),
"answer": responsestuff.response,
"timeinfo": time,
#"sourcetext": responsestuff.get_formatted_sources()
"chat_history": [x.json() for x in custom_chat_history],
}
Then, if you need to go from JSON to
ChatMessage
object
from llama_index.llms import ChatMessage
chat_history = [ChatMessage.parse_raw(x) for x in json_chat_history]
I think it'd be responsestuff.json().response cuz message is a string
AttributeError: 'AgentChatResponse' object has no attribute 'json'
whoops -- probably just the chat_history that needs to be modified then
I didn't look close enough 😅
response_data = {
"message": message,
"answer": responsestuff.response,
"timeinfo": time,
#"sourcetext": responsestuff.get_formatted_sources()
"chat_history": [x.json() for x in custom_chat_history],
}
as seen here it responds to the initial message after 2 more questions
For that --- I don't have an answer 😅 Maybe try print query_engine.chat_history
after each chat, to make sure it looks correct?
tbh it could be llamacpp too -- if you have access to openai or similar, maybe confirm the behaviour with it
Sadly I don't. So I can't test it
If it's the same as custom_chat_history, then it shouldn't be an issue
I could try appending the users message to the chat history before it asks the chat engine tho
maaaybe, although it should already be doing that under the hood. But worth a shot
Yea it should be the same, but just a sanity check
Lemme try that in a sec, rebooting for an update
okay its back, now imma try
Just got that after the second message
Something is going into response_data
that's a chat message object
you kept the x.json()
stuff?
But the responsestuff.json or sth game me the error so i didnt
ah yea that's what I meant -- might have to manually debug what part of response_data is a chat message object
if responsestuff.response == "":
response_data = {
"message": message,
"answer": "Ich weiß nicht, es tut mir leid.",
"timeinfo": time,
# "sourcetext": responsestuff.get_formatted_sources()
"chat_history": custom_chat_history,
}
responsestuff.response = "Ich weiß nicht, es tut mir leid."
I FORGOT TO ADD THE x.json() STUFF ON THE NO RESPONSE FALLBACK
Sooo can i somehow implement a Prompt?
As it doesnt know abt what i asked before
It might has gotten dumber tho idk
Good thing I recently went to Austria, somehow I am understanding some of these messages 😆
responsestuff.response = "Ich weiß nicht, es tut mir leid."
is some hardcoded message when the response is empty right?
Ooo thats good. Yes. Its just so the Bot doesnt just send "insert void here"
So the real issue is that it's responding with a blank on the second message 🤔
but it does that every time when it doesnt know an answer
even tho that was the question we always used. And it worked
Everytime it doesnt know an answer it just gives me a void
That seems weird 🤔 Did you tell it to do that?
interesting 😅 Hmm not sure then. I think its another case where we need to double check the chat history?
The default as_chat_engine()
is a react agent. You might have better luck with another mode -- maybe try index.as_chat_engine(chat_mode="condense_plus_context")
Nope, didnt help in any way
@Logan M Imma let my laptop run through the big DB, maybe that helps. ATM I use a small portion of my whole data
nvm, misread where to add thst. buuut now its telling me ValueError: Unknown chat mode: condense_plus_context
Okay, then Imma look to update llama_index
Now it just doesn't reply after the first message
I feeeeel like this is an LLM error -- working with llamacpp is really annoying tbh 😅 If this was me, I'd be putting a break point in the actual LLM code, or making sure the inputs aren't too big