def load_index(): # index if dir 'storage' does not exist if not os.path.exists('storage'): print('Building index...') build_index() storage_context = StorageContext.from_defaults(persist_dir='./storage') # doc_hash_to_filename = json.load(open('doc_hash_to_filename.json', 'r')) return load_index_from_storage(storage_context) def ask_question(index, query): query_engine = index.as_query_engine() response = query_engine.query(query) return response
def get_service_context(): max_input_size = 4096 num_output = 256 max_chunk_overlap = 20 chunk_size_limit = 600 prompt_helper = PromptHelper(max_input_size, num_output, max_chunk_overlap, chunk_size_limit=chunk_size_limit) service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper) return service_context def get_documents(): file_metadata = lambda x: {"filename": x} reader = SimpleDirectoryReader('notes', file_metadata=file_metadata) documents = list(reader.load_data()) return documents def build_index(): service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper) documents = get_documents() index = GPTVectorStoreIndex.refresh(documents) index.storage_context.persist() def load_index(): # index if dir 'storage' does not exist if not os.path.exists('storage'): print('Building index...') build_index() storage_context = StorageContext.from_defaults(persist_dir='./storage') # refresh index index = load_index_from_storage(storage_context) documents = get_documents() updated_documents = index.refresh(documents) print('Updated documents: ', updated_documents)