Well, besides faiss itself missing from the dependencies of
llama-index-vector-stores-faiss
(maybe becasue there is a CPU and GPU veresion?), I did this:
import os
os.environ["OPENAI_API_KEY"] = "sk-..."
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, StorageContext
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.core.text_splitter import SentenceSplitter
from llama_index.core.ingestion import IngestionPipeline
from llama_index.vector_stores.faiss import FaissVectorStore
import faiss
vector_store = FaissVectorStore(
faiss_index=faiss.IndexFlatL2(1536)
)
documents = SimpleDirectoryReader("./docs/examples/data/10k").load_data()
# create the pipeline with transformations
pipeline = IngestionPipeline(
transformations=[
SentenceSplitter(chunk_size=256, chunk_overlap=20),
OpenAIEmbedding(embed_batch_size=256),
],
)
# run the pipeline
import time
start = time.time()
nodes = pipeline.run(documents=documents)
end = time.time()
print(f"Time taken: {end - start} seconds for {len(nodes)} nodes")
index = VectorStoreIndex(
nodes=nodes,
storage_context=StorageContext.from_defaults(vector_store=vector_store)
)
> Time taken: 17.46170997619629 seconds for 2721 nodes