selected_model = "ToolBench/ToolLLaMA-2-7b-v2"
SYSTEM_PROMPT = """You are an AI assistant that answers questions in a friendly manner, based on the given source documents. Here are some rules you always follow:
- Generate human readable output, avoid creating output with gibberish text.
- Generate only the requested output, don't include any other language before or after the requested output.
"""
from llama_index.prompts import PromptTemplate
query_wrapper_prompt = PromptTemplate(
"[INST]<<SYS>>\n" + SYSTEM_PROMPT +
"<</SYS>>\n\n{query_str}[/INST] "
)
from llama_index.llms import HuggingFaceLLM
import torch
llm = HuggingFaceLLM(
tokenizer={"return_token_type_ids": False},
context_window=4096,
max_new_tokens=2048,
generate_kwargs={"temperature": 0.0, "do_sample": False},
query_wrapper_prompt=query_wrapper_prompt,
tokenizer_name=selected_model,
model_name=selected_model,
device_map="auto",
tokenizer_kwargs={"max_length": 2000},
model_kwargs={"torch_dtype": torch.float16, "load_in_8bit": True,
"use_auth_token": "XXXXX"},
)
from llama_index.agent import ReActAgent
context_agent = ReActAgent.from_tools(
tools=tools,
max_function_calls=len(tools),
llm=llm,
verbose=True,
system_prompt=plug.prompt,
)
response = context_agent.chat(data)
response = context_agent.stream_chat("HI", chat_history=[])
, this didn't happen with previous versions, am I missing something? 🤔 {
"title": " Orders by Status",
"names": ["Cancelled", "Shipped", "Shipped and Delivered"],
"amount": [3, 14, 6]
}
Here's a visualization of the of orders by status:
- Cancelled: 3 orders
- Shipped: 14 orders
- Shipped and Delivered: 6 orders
647f3c5170b47d535c175523/salesforceclimatedatasample
but it keeps querying into salesforceclimatedatasample
, llama_index version is 0.9.21, how do I fix thisllamaindex-cli upgrade-file <file_path>
gave this error, please help.VectorStoreIndex.from_documents
too, same result. Im using chroma PersistentClient btw, very weird 🤔Rate limit reached for text-embedding-ada-002
no matter how I adjust batch size. Changing to use_async = false fixes this but makes it very very slow and batch size dont apply when use_async = false for some reason. How can I embed a lot of documents quickly while without getting rate limit error? thank you!llm = OpenAI(model="")
context_agent = OpenAIAgent.from_tools(
qa_prompt=qa_prompt,
tools=tools,
max_function_calls=3,
llm=llm,
verbose=True,
)
I thought gpt-3.5-turbo-0613 should have function calling features, why does this happen? Thanks
def draw_bar_graph(self, title, x_label, y_label, x_data, y_data):
plt.bar(x_data, y_data)
plt.title(title)
plt.xlabel(x_label)
plt.ylabel(y_label)
image_name = str(uuid.uuid4())
image_path = os.path.join('src/assets', f'{image_name}.png')
plt.savefig(image_path, format='png')
return f"{image_name}.png"