I build a GPTVectorStoreIndex, and refresh it. My index is always refreshed with all docs irrespective of if they changed - Is there an obvious flaw in this code which i'm not seeing? based on the documentation and the code
https://github.com/jerryjliu/llama_index/blob/79c40a0a0382c5952b3f3c5b10663344aee19c1a/llama_index/indices/base.py#L17 , i don't see anything
def get_service_context():
max_input_size = 4096
num_output = 256
max_chunk_overlap = 20
chunk_size_limit = 600
prompt_helper = PromptHelper(max_input_size, num_output, max_chunk_overlap, chunk_size_limit=chunk_size_limit)
service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper)
return service_context
def get_documents():
file_metadata = lambda x: {"filename": x}
reader = SimpleDirectoryReader('notes', file_metadata=file_metadata)
documents = list(reader.load_data())
return documents
def build_index():
service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper)
documents = get_documents()
index = GPTVectorStoreIndex.refresh(documents)
index.storage_context.persist()
def load_index():
# index if dir 'storage' does not exist
if not os.path.exists('storage'):
print('Building index...')
build_index()
storage_context = StorageContext.from_defaults(persist_dir='./storage')
# refresh index
index = load_index_from_storage(storage_context)
documents = get_documents()
updated_documents = index.refresh(documents)
print('Updated documents: ', updated_documents)