data = [] for filename in os.listdir(pdf_directory): if filename.endswith(".pdf"): pdf_path = os.path.join(pdf_directory, filename) print(f"\nProcessing document: {filename}") doc = pdf_reader.read_pdf(pdf_path),
extra_info_user = get_extra_info()
for chunk in doc.chunks(): chunk_text = chunk.to_text(include_children=True, recurse=True) docs = nlp(chunk_text) extra_info_cats = {"summary": docs.text, "classification": docs.cats}
extra_info = {extra_info_user, extra_info_cats}
document = Document( text= chunk_text, extra_info=extra_info ) data.append(document) return data