Task is related to some made-up data about drones, here is how I'm making the index and prompting:
def prepare_data():
'''This function loads the data from the directory and prepares it for indexing'''
# load from disk if index already exists
if Path('index.json').is_file():
index = GPTSimpleVectorIndex.load_from_disk('index.json')
#check for new files
docs = glob.glob(config['data']['books'] + '/*')
with open('docs.txt', 'r') as f:
old_docs = json.load(f)
new_docs = [Document(t) for t in docs if t not in old_docs]
if len(new_docs) > 0:
for doc in new_docs:
index.insert(doc)
index.save_to_disk('index.json')
with open('docs.txt', 'w') as f:
json.dump(docs, f)
return index
#load the data
documents = SimpleDirectoryReader(config['data']['books']).load_data()
#make a record of all files contained in the directory
docs = glob.glob(config['data']['books'] + '/*')
#save off the docs list as a json
with open('docs.txt', 'w') as f:
json.dump(docs, f)
index = GPTSimpleVectorIndex(documents, max_input_size=2048, num_output=2000, max_chunk_overlap=12)
index.save_to_disk('index.json')
return index
and the prompt:
print(index.query(
'''### USER QUERY: it just started snowing, what is the snow checklist?
### DRONE: AeroGuardian AG950
### DIRECTION: Find and provide the relevant weather checklist. Provide the number of elements in the checklist, then list each element. Answer in the form of a JSON object. '''))