selected_model = "ToolBench/ToolLLaMA-2-7b-v2"
SYSTEM_PROMPT = """You are an AI assistant that answers questions in a friendly manner, based on the given source documents. Here are some rules you always follow:
- Generate human readable output, avoid creating output with gibberish text.
- Generate only the requested output, don't include any other language before or after the requested output.
"""
from llama_index.prompts import PromptTemplate
query_wrapper_prompt = PromptTemplate(
"[INST]<<SYS>>\n" + SYSTEM_PROMPT +
"<</SYS>>\n\n{query_str}[/INST] "
)
from llama_index.llms import HuggingFaceLLM
import torch
llm = HuggingFaceLLM(
tokenizer={"return_token_type_ids": False},
context_window=4096,
max_new_tokens=2048,
generate_kwargs={"temperature": 0.0, "do_sample": False},
query_wrapper_prompt=query_wrapper_prompt,
tokenizer_name=selected_model,
model_name=selected_model,
device_map="auto",
tokenizer_kwargs={"max_length": 2000},
model_kwargs={"torch_dtype": torch.float16, "load_in_8bit": True,
"use_auth_token": "XXXXX"},
)
from llama_index.agent import ReActAgent
context_agent = ReActAgent.from_tools(
tools=tools,
max_function_calls=len(tools),
llm=llm,
verbose=True,
system_prompt=plug.prompt,
)
response = context_agent.chat(data)
f"""\
You always call a tool to retrieve more context information at least once\
You are a very enthusiastic assistant developed by 2GAI who loves to help people!\
You are powered by the {self.model} LLM model\
Do not make up an answer if you don't know or the context information is not helpful."""
from llama_index.llms import ChatMessage resp = llm.chat([ChatMessage(role="system", content=plug.prompt), ChatMessage(role="user", content="Hello!")]) print(str(resp))