I'm doing the simplest thing possible here, but maybe there is something better.
doc_ids_for_deletion = set(index.ref_doc_info.keys()) - set([p.doc_id for p in newly_scraped_docs])
#%%
for doc_id in doc_ids_for_deletion:
if doc_id in index.ref_doc_info:
index.delete_ref_doc(doc_id, delete_from_docstore=True)
print(f"Deleted doc {doc_id}")
else:
print(f"Doc {doc_id} not found in index")
#%%
refreshed_docs = index.refresh_ref_docs(newly_scraped_docs, update_kwargs={"delete_kwargs": {'delete_from_docstore': True}})