My code example:
from llama_index.llms import LlamaCPP, MessageRole
from llama_index.chat_engine.types import ChatMode
from llama_index.callbacks import CallbackManager
from llama_index.callbacks import LlamaDebugHandler
from llama_index import SimpleDirectoryReader, VectorStoreIndex, ServiceContext
def messages_to_prompt(messages):
roles = {MessageRole.SYSTEM: '',
MessageRole.USER: "User: ",
MessageRole.ASSISTANT: "Assistant: "}
prompt = ''
for message in messages:
prompt += roles[message.role] + message.content + '<|end_of_turn|>'
prompt += "Assistant: "
return prompt
def completion_to_prompt(completion):
LLM_TEMPLATE = "### Instruction:\n\n{instruction}\n\n### Response:"
prompt = LLM_TEMPLATE.format(instruction=completion)
return prompt
llama_debug = LlamaDebugHandler()
callback_manager = CallbackManager([llama_debug])
llm = LlamaCPP(
model_path='C:/LLMs/oobabooga_windows/text-generation-webui/models/openorca-platypus2-13b.Q4_K_M.gguf',
temperature=0.75,
max_new_tokens=256,
context_window=2048,
generate_kwargs={"top_p": 1,},
model_kwargs={"n_gpu_layers": 32, "n_batch": 512, "n_threads": 10},
callback_manager = callback_manager,
verbose=False,
messages_to_prompt=messages_to_prompt, # The function to convert messages to a prompt
completion_to_prompt=completion_to_prompt, # The function to convert a completion to a prompt.
)
service_context = ServiceContext.from_defaults(llm=llm, embed_model="local")
data = SimpleDirectoryReader(input_dir="C:/temp_my/text_embeddings").load_data()
index = VectorStoreIndex.from_documents(data, service_context=service_context)
chat_engine = index.as_chat_engine(service_context=service_context, chat_mode=ChatMode.BEST, verbose=True)
response = chat_engine.chat("Use the tool to answer Who is Gaia and and tell the story short.")
print(response)