%pip -q install langchain==0.0.148 openai %pip -q install llama_index==0.5.6 from llama_index import SimpleDirectoryReader, GPTListIndex, readers, GPTSimpleVectorIndex, LLMPredictor, PromptHelper, ServiceContext from langchain import OpenAI import sys import os from IPython.display import Markdown, display def construct_index(directory_path): # set maximum input size max_input_size = 4096 # set number of output tokens num_outputs = 30000 # set maximum chunk overlap max_chunk_overlap = 60 # set chunk size limit chunk_size_limit = 6024 # define prompt helper prompt_helper = PromptHelper(max_input_size, num_outputs, max_chunk_overlap, chunk_size_limit=chunk_size_limit) # define LLM llm_predictor = LLMPredictor(llm=OpenAI(temperature=0.5, model_name="gpt-3.5-turbo-0613", max_tokens=num_outputs)) documents = SimpleDirectoryReader(directory_path).load_data() service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper) index = GPTSimpleVectorIndex.from_documents(documents, service_context=service_context) index.save_to_disk('index.json') return index def ask_ai(): index = GPTSimpleVectorIndex.load_from_disk('index.json') while True: query = input("What do you want to ask? ") response = index.query(query) display(Markdown(f"Response: <b>{response.response}</b>")) construct_index("data/") ask_ai()
index = GPTSimpleVectorIndex.load_from_disk('index.json')
index = GPTSimpleVectorIndex.load_from_disk('index.json', service_context=service_context)
from llama_index import SimpleDirectoryReader, GPTListIndex, readers, GPTVectorStoreIndex, LLMPredictor, PromptHelper, ServiceContext, StorageContext, load_index_from_storage from langchain import OpenAI import sys import os from IPython.display import Markdown, display def construct_index(directory_path): # set maximum input size max_input_size = 4096 # set number of output tokens num_outputs = 10000 # set maximum chunk overlap max_chunk_overlap = 20 # set chunk size limit chunk_size_limit = 600 # define prompt helper # prompt_helper = PromptHelper(max_input_size, num_outputs, max_chunk_overlap, chunk_size_limit=chunk_size_limit) # define LLM llm_predictor = LLMPredictor(llm=OpenAI(temperature=0.5, model_name="gpt-3.5-turbo-0613", max_tokens=num_outputs)) documents = SimpleDirectoryReader(directory_path).load_data() # storage_context = StorageContext.from_defaults() # storage_context.persist(persist_dir="/Users/og/Downloads/RozGPT-main") # service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper) service_context = ServiceContext.from_defaults( llm_predictor=llm_predictor, chunk_size=1024 ) index = GPTVectorStoreIndex.from_documents(documents, service_context=service_context) storage_context = StorageContext.from_defaults() storage_context.persist(persist_dir="/Users/og/Downloads/RozGPT-main") storage_context = StorageContext.from_defaults(persist_dir="") # index.save_to_disk('index.json') return index def ask_ai(): index = load_index_from_storage(storage_context,service_context=service_context) # index = load_index_from_storage(storage_context,service_context=service_context) while True: query = input("What do you want to ask? ") response = index.query(query) display(Markdown(f"Response: <b>{response.response}</b>"))
Cell In[23], line 34, in ask_ai() 32 def ask_ai(): 33 # index = GPTSimpleVectorIndex.load_from_disk('index.json') ---> 34 index = GPTSimpleVectorIndex.load_from_disk('index.json', service_context=service_context) 35 while True: 36 query = input("What do you want to ask? ") NameError: name 'service_context' is not defined
from langchain.chat_models import ChatOpenAI
llm_predictor = LLMPredictor(llm=ChatOpenAI(temperature=0.5, model_name="gpt-3.5-turbo-0613", max_tokens=num_outputs))
def construct_index(directory_path): # set maximum input size max_input_size = 4096 # set number of output tokens num_outputs = 30000 # set maximum chunk overlap max_chunk_overlap = 60 # set chunk size limit chunk_size_limit = 6024 # define prompt helper prompt_helper = PromptHelper(max_input_size, num_outputs, max_chunk_overlap, chunk_size_limit=chunk_size_limit) # define LLM # llm_predictor = LLMPredictor(llm=OpenAI(temperature=0.5, model_name="gpt-3.5-turbo-0613", max_tokens=num_outputs)) llm_predictor = LLMPredictor(llm=ChatOpenAI(temperature=0.5, model_name="gpt-3.5-turbo-0613", max_tokens=num_outputs)) documents = SimpleDirectoryReader(directory_path).load_data() service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper) index = GPTSimpleVectorIndex.from_documents(documents, service_context=service_context) index.save_to_disk('index.json') return index def ask_ai(): service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper) # index = GPTSimpleVectorIndex.load_from_disk('index.json') index = GPTSimpleVectorIndex.load_from_disk('index.json', service_context=service_context) while True: query = input("What do you want to ask? ") response = index.query(query) display(Markdown(f"Response: <b>{response.response}</b>"))
NameError Traceback (most recent call last) Cell In[5], line 1 ----> 1 ask_ai() Cell In[2], line 34, in ask_ai() 33 def ask_ai(): ---> 34 service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper) 35 # index = GPTSimpleVectorIndex.load_from_disk('index.json') 36 index = GPTSimpleVectorIndex.load_from_disk('index.json', service_context=service_context) NameError: name 'llm_predictor' is not defined
# set maximum input size max_input_size = 4096 # set number of output tokens num_outputs = 512 # set maximum chunk overlap max_chunk_overlap = 60 # set chunk size limit chunk_size_limit = 1024 # define prompt helper prompt_helper = PromptHelper(max_input_size, num_outputs, max_chunk_overlap, chunk_size_limit=chunk_size_limit) # define LLM # llm_predictor = LLMPredictor(llm=OpenAI(temperature=0.5, model_name="gpt-3.5-turbo-0613", max_tokens=num_outputs)) llm_predictor = LLMPredictor(llm=ChatOpenAI(temperature=0.5, model_name="gpt-3.5-turbo-0613", max_tokens=num_outputs)) service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper, chunk_size_limit=chunk_size_limit) def construct_index(directory_path): documents = SimpleDirectoryReader(directory_path).load_data() index = GPTSimpleVectorIndex.from_documents(documents, service_context=service_context) index.save_to_disk('index.json') return index def ask_ai(): index = GPTSimpleVectorIndex.load_from_disk('index.json', service_context=service_context) while True: query = input("What do you want to ask? ") response = index.query(query) display(Markdown(f"Response: <b>{response.response}</b>"))
decoder
models. What this means is that they generate one token at a time, add it to the input, and generate the next tokennum_output
to 512, that means I have to leave room for 512 tokens. This means the maximum prompt length that can be sent to the LLM is 4096 minus 512gpt-3.5-turbo-16k # 16k input gpt-3.5-turbo-16k-0613 # 16k input gpt-4 # 8k input gpt-4-0613 # 8k input gpt-4-32k # 32k input gpt-4-32k-0613 # 32k input
llm_predictor = LLMPredictor(llm=ChatOpenAI(temperature=0.5, model_name="gpt-3.5-turbo-0613", max_tokens=num_outputs))