There is no over dependencies on third party π
Actually all the integrations have been separated as pypi packages.
To use your hosted llm, you can use ollama, openailike.
If you have your own model, you can use custom llm and interact with llm
Settings.llm = LlamaCPP(
model_path="wikibot_models/zephyr-7b-gguf/zephyr-7b-beta.Q2_K.gguf",
#model_path="wikibot_models/gemma-2b/gemma-2b.gguf",
#model_path="wikibot_models/gemma-7b/gemma-7b.gguf",
temperature=0.1,
max_new_tokens=256,
context_window=3900,
generate_kwargs={},
model_kwargs={"n_gpu_layers": 20, "torch_dtype": torch.bfloat16},
messages_to_prompt=messages_to_prompt,
completion_to_prompt=completion_to_prompt,
verbose=True,
)
def readDocs(prompt: str):
start = time.time()
if os.path.isfile("index/docstore.json"):
print("found index at index/docstore.json\n")
storage_context = StorageContext.from_defaults(persist_dir="index")
index = load_index_from_storage(storage_context)
query_engine = index.as_query_engine()
response = query_engine.query(prompt)
print('\nResponse: ',response)
else:
print("create index\n")
documents = SimpleDirectoryReader(input_dir="docs",recursive=True,exclude=(".docx" , ".png" , ".jpeg", ".tmp", ".lnk", ".unk", ".vsdx", ".heic", '.avif', ".so.1")).load_data()
index = VectorStoreIndex.from_documents(documents)
index.storage_context.persist(persist_dir="index")
query_engine = index.as_query_engine()
response = query_engine.query(prompt)
print('\nResponse: ',response)
if hasattr(response, 'metadata'):
document_info = str(response.metadata)
find = re.findall(r"'page_label': '[^']', 'file_name': '[^']'", document_info)
print('\n'+'=' * 60+'\n')
print('Context Information')
print(str(find))
print('\n'+'=' * 60+'\n')
end = time.time()
print("Elapsed Time: ", end-start, " Sekunden")
I have this one working right now, but i want to use gpu heavy models, not llamacpp