Is it normal for extraction from documents to take +20 minutes?
I'm trying to build some embeddings from Postgresql internals documentation
from llama_index import (SimpleDirectoryReader, StorageContext,
VectorStoreIndex, load_index_from_storage)
from llama_index.query_engine import RetrieverQueryEngine
base_path = os.path.dirname(os.path.realpath(__file__))
persist_dir = os.path.join(base_path, "../index")
postgres_doc_dir = os.path.join(base_path, "../postgres-documents")
index_exists = os.path.exists(persist_dir)
if index_exists:
storage_context = StorageContext.from_defaults(persist_dir=persist_dir)
index = load_index_from_storage(storage_context)
else:
document_loader = SimpleDirectoryReader(input_files=[
os.path.join(postgres_doc_dir, "postgresql_internals-14_en.pdf"),
os.path.join(postgres_doc_dir, "postgresql-16-US.pdf")
])
documents = document_loader.load_data()
index = VectorStoreIndex.from_documents(documents)
index.storage_context.persist(persist_dir)
query = input("Enter your query: ")
query_engine = index.as_query_engine()
response = query_engine.query(query)
print(response)
It's been running for ages and still not gotten to the
enter your query
part
I can hear my fans blowing though, lol