i am looking to see how i can increase the speed of generating embeddings, currently a large file is taking several minutes. Shoudl this be moved to a pipeline? This is running on AWS Lambda with LlamaIndex 0.9 (still working on 0.10 upgrade) . Embedding is OpenAI text-embedding-large-3
def add_nodes(self, nodes):
return self.vector_store.add(nodes)
def add_nodes_from_file(
self, tmpfile, external_id: str, node_parser: NodeParser, embedding: HuggingFaceEmbedding
):
dir_reader = SimpleDirectoryReader(input_files=[tmpfile])
docs = dir_reader.load_data()
for doc in docs:
doc.metadata["external_id"] = external_id
nodes = node_parser.get_nodes_from_documents(docs)
for node in nodes:
node_embeddings = embedding.get_text_embedding(
node.get_content(metadata_mode="all")
)
node.embedding = node_embeddings
res = self.add_nodes(nodes)
print("Result from add nodes: " + str(res))
return res