@Logan M This code loads and embeds approximately 3000 markdown files. but . Embedding takes too long and takes a long time to load. Is there a way to improve this?
documents = SimpleDirectoryReader("./markdown").load_data()
doc_text = "\n\n".join([d.get_content() for d in documents])
docs = [Document(text=doc_text)]
llm = OpenAI(model="gpt-3.5-turbo")
chunk_sizes = [128, 256, 512, 1024]
nodes_list = []
vector_indices = []
for chunk_size in chunk_sizes:
print(f"Chunk Size: {chunk_size}")
splitter = SentenceSplitter(chunk_size=chunk_size, chunk_overlap=chunk_size // 2)
nodes = splitter.get_nodes_from_documents(docs)
for node in nodes:
node.metadata["chunk_size"] = chunk_size
node.excluded_embed_metadata_keys = ["chunk_size"]
node.excluded_llm_metadata_keys = [ "chunk_size"]
nodes_list.append(nodes)
vector_index = VectorStoreIndex(nodes)
vector_indices.append(vector_index)
print(vector_indices)