# Generate an ingestion pipeline pipeline = IngestionPipeline( transformations=[ OpenAIEmbedding(), ], vector_store=vector_store, ) #restore the pipeline pipeline.load("pipeline_storage") # Ingest directly into a vector db pipeline.run(documents) # save the pipeline pipeline.persist("pipeline_storage")
pipeline = IngestionPipeline( transformations=[ OpenAIEmbedding(), ], vector_store=vector_store, docstore=SimpleDocumentStore(), )
# Load documents documents = SimpleDirectoryReader("data", recursive=True, filename_as_id=True).load_data() # this returns "Found 43 Documents" print(f"Found {len(documents)} Documents") # Generate an ingestion pipeline pipeline = IngestionPipeline( transformations=[ OpenAIEmbedding(), ], docstore=SimpleDocumentStore(), ) # run the pipeline nodes = pipeline.run(documents) #this returns "Ingested 0 Nodes" print(f"Ingested {len(nodes)} Nodes")
# Load documents documents = SimpleDirectoryReader("data", recursive=True, filename_as_id=True).load_data() print(f"Found {len(documents)} Documents") # Generate an ingestion pipeline pipeline = IngestionPipeline( transformations=[ OpenAIEmbedding(), ], vector_store=vector_store, docstore=SimpleDocumentStore(), ) # Check if the folder exists if os.path.exists("pipeline_storage"): # Restore the pipeline pipeline.load("pipeline_storage") # Ingest directly into a vector db nodes = pipeline.run(documents=documents) print(f"Ingested {len(nodes)} Nodes") for node in nodes: print(f"Node: {node.text}") print(f"Node: {node.id_}") # save the pipeline pipeline.persist("pipeline_storage")