It looks like they don't generate certain attributes like the
get_doc_id
. Maybe I'm doing someting wrong, here's my code snippet.
from langchain.document_loaders import UnstructuredFileLoader, BSHTMLLoader, UnstructuredMarkdownLoader
documents = []
for filename in os.listdir(data_directory):
file_path = os.path.join(data_directory, filename)
if filename.endswith(".md"):
loader = UnstructuredMarkdownLoader(file_path)
elif filename.endswith(".html"):
loader = BSHTMLLoader(file_path)
elif filename.endswith('.txt'):
loader = UnstructuredFileLoader(file_path)
documents.extend(loader.load())
index = GPTWeaviateIndex.from_documents(documents, weaviate_client=client)
returns the error:
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
Cell In[108], line 1
----> 1 index = GPTWeaviateIndex.from_documents(documents, weaviate_client=client)
File ~/projects/GPeaT/backend/venv/lib/python3.8/site-packages/llama_index/indices/base.py:101, in BaseGPTIndex.from_documents(cls, documents, docstore, service_context, **kwargs)
98 docstore = docstore or get_default_docstore()
100 for doc in documents:
--> 101 docstore.set_document_hash(doc.get_doc_id(), doc.get_doc_hash())
103 nodes = service_context.node_parser.get_nodes_from_documents(documents)
105 return cls(
106 nodes=nodes,
107 docstore=docstore,
108 service_context=service_context,
109 **kwargs,
110 )
AttributeError: 'Document' object has no attribute 'get_doc_id'