I am running into an issue creating documents out of a an array of articles.
I am attempting to load in a set of docs and then create LI documents out of them, create embeddings and then add then to a DeepLake dataset via
. It seems that
vector_store = DeepLakeVectorStore(dataset_path=dataset_path, ingestion_batch_size=1024).add(nodes)
expects a node structure different structure then what i currently have.
The error:
AttributeError: 'Node' object has no attribute 'node'
code:
if not medium_input or medium_input == '':
print("The string is empty.")
else:
print("The string is not empty.")
print(medium_input)
publication = medium.publication(publication_slug=medium_input)
medium_articles = medium.publication(publication_id=str(publication._id)).get_articles_between(_from=datetime.now(),_to=datetime.now() - timedelta(days=70))
docs = []
texts = ''
# print("medium_articles bool", medium_articles[0].content)
for article in medium_articles:
document = article.content
document = Document(article.content)
new_dict = {key: article.info[key] for key in ['url', 'published_at', 'title']}
document.extra_info = new_dict
docs.append(document)
parser = SimpleNodeParser()
nodes = parser.get_nodes_from_documents(docs)
print('nodes',nodes)
dataset_path = f"hub://tali/{deeplake_datasets}"
vector_store = DeepLakeVectorStore(dataset_path=dataset_path, ingestion_batch_size=1024).add(nodes)