llm = OpenAI(
temperature=0.2,
model="gpt-4",
streaming=True,
)
vector_store = FaissVectorStore.from_persist_dir("./faissMarkdown")
storage_context = StorageContext.from_defaults(
vector_store=vector_store, persist_dir="./faissMarkdown"
)
service_context = ServiceContext.from_defaults(llm=llm)
evaluator = ResponseEvaluator(service_context=service_context)
index = load_index_from_storage(storage_context=storage_context)
retriever = VectorIndexRetriever(
index=index,
similarity_top_k=7,
)
response_synthesizer = get_response_synthesizer(
service_context=service_context,
response_mode="compact",
text_qa_template=CHAT_TEXT_QA_PROMPT,
)
chat_engine = ContextChatEngine.from_defaults(
retriever=retriever,
verbose=True,
)
# Define a function to choose and use the appropriate chat engine
def chatbot(input_text):
try:
response = chat_engine.chat(input_text)
top_urls = []
for source in response.source_nodes:
metadata = source.node.metadata
if "url" in metadata:
url = metadata["url"]
top_urls.append(url)
print(url, source.score)
top_urls = "\n".join(top_urls)
join_response = f"{response.response}\n\n\nFuentes:\n{top_urls}"
return join_response
except Exception as e:
print(f"Error: {e}")
return ["Error occurred"]
Error: This model's maximum context length is 4097 tokens. However, your messages resulted in 4121 tokens. Please reduce the length of the messages.
using gpt-4
memory = ChatMemoryBuffer.from_defaults(token_limit=3000)
chat_engine = ContextChatEngine.from_defaults(
retriever=retriever,
verbose=True,
memory=memory,
memory_cls=memory
)
memory = ChatMemoryBuffer.from_defaults(token_limit=500)
llm = OpenAI(
model="gpt-3.5-turbo-16k", temperature=0.2, streaming=True, max_tokens=16383
)
print(llm._get_model_name(), llm._get_max_token_for_prompt("hello"))
gpt-3.5-turbo-16k 16383
.Error: This model's maximum context length is 4097 tokens. However, your messages resulted in 4313 tokens. Please reduce the length of the messages.
chat_engine = index.as_chat_engine( similarity_top_k=3, service_context=service_context)
service_context = ServiceContext.from_defaults(callback_manager=callback_manager, llm=OpenAI(model="gpt-3.5-turbo-16k", temperature=0, max_tokens=1000), chunk_size=1024, node_parser=node_parser )