Hey guys, how can I load a HuggingFace model locally (meaning from the path) using Llamaindex. I haven't found any solutions regarding my problem yet. This is my code :
documents = SimpleDirectoryReader("../Datasets/Merged").load_data()
def completion_to_prompt(completion):
return f"<|system|>\n</s>\n<|user|>\n{completion}</s>\n<|assistant|>\n"
# Transform a list of chat messages into zephyr-specific input
def messages_to_prompt(messages):
prompt = ""
for message in messages:
if message.role == "system":
prompt += f"<|system|>\n{message.content}</s>\n"
elif message.role == "user":
prompt += f"<|user|>\n{message.content}</s>\n"
elif message.role == "assistant":
prompt += f"<|assistant|>\n{message.content}</s>\n"
# ensure we start with a system prompt, insert blank if needed
if not prompt.startswith("<|system|>\n"):
prompt = "<|system|>\n</s>\n" + prompt
# add final assistant prompt
prompt = prompt + "<|assistant|>\n"
return prompt
Settings.llm = HuggingFaceLLM(
model_name="./zephyr-7b-beta",
tokenizer_name="./zephyr-7b-beta",
context_window = 3900,
max_new_tokens = 256,
generate_kwargs={"temperature": 0, "top_k": 30, "top_p": 0.95, "do_sample": True},
messages_to_prompt=messages_to_prompt,
completion_to_prompt=completion_to_prompt,
device_map="auto",
)
Settings.embed_model = HuggingFaceEmbedding(
model_name="./BAAI/bge-small-en-v1.5"
)
index = VectorStoreIndex.from_documents(documents)
query_engine = index.as_query_engine()
response = query_engine.query("What are Zero-Knowledge Proofs ?")
print(response)