import chromadb
from llama_index import VectorStoreIndex
from llama_index.vector_stores import ChromaVectorStore
from llama_index.storage.storage_context import StorageContext
from llama_index.llms import LlamaCPP
from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext
# initialize client
db = chromadb.PersistentClient(path="./chroma_db")
llm = LlamaCPP(
# You can pass in the URL to a GGML model to download it automatically
# optionally, you can set the path to a pre-downloaded model instead of model_url
model_path="./models/em_german_13b_v01.Q8_0.gguf",
temperature=0.1,
max_new_tokens=4048,
# llama2 has a context window of 4096 tokens, but we set it lower to allow for some wiggle room
context_window=8128,
# kwargs to pass to __call__()
generate_kwargs={},
# kwargs to pass to __init__()
# set to at least 1 to use GPU
# model_kwargs={"n_gpu_layers": 1},
# transform inputs into Llama2 format
# messages_to_prompt=messages_to_prompt,
# completion_to_prompt=completion_to_prompt,
verbose=True,
)
# get collection
chroma_collection = db.get_or_create_collection("quickstart")
# assign chroma as the vector_store to the context
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
service_context = ServiceContext.from_defaults(llm=llm)
# load your index from stored vectors
index = VectorStoreIndex.from_vector_store(
vector_store, storage_context=storage_context, service_context=service_context
)
# create a query engine
query_engine = index.as_query_engine()
response = query_engine.query("Hallo, wie geht es dir?")
print(response)
Gives me a Connection error. Seems like its trying to work with OpenAI. Is there a way to make it work w my Model?