My code is
loader = GoogleDriveReader()
def load_data(folder_id: str):
docs = loader.load_data(folder_id=folder_id)
for doc in docs:
try:
doc.id_ = doc.metadata["file_name"]
except KeyError:
continue # Skip this document if "file_name" is missing
return docs
docs = load_data(folder_id="...")
documents = [
doc.to_langchain_format()
for doc in docs
]
print(documents[0].page_content[0:1000])
page content looks like this -
%PDF-1.7
d 1/L 704968/O 424/E 410578/N 25/T 704466/H [ 555 457]>>
<</DecodeParms<</Columns 5/Predictor 12>>/Filter/FlateDecode/ID[<0DDF85C876E76C5F8037E2A25DC7C602><955405ABD1F33C4FB9ED0FD3BEE042C9>]/Index[422 61]/Info 421 0 R/Length 135/Prev 704467/Root 423 0 R/Size 483/Type/XRef/W[1 3 1]>>stream
hbbd```b``~"90̾&A$Dru8`$H,,""AdA8d,"]@d"4 Q`q~9dq Y#CX
startxref
0
%%EOF
<</C 593/Filter/FlateDecode/I 615/Length 362/S 457/V 567>>stream
4,Uk4EQ}AVX*aH3 /dd2pYKƙko4D=z`+gBiʥXn.D>Ohݮ>IN2"7sOVȨ``R,.`1.a1ԑ 5,3p@gb><;[|nOh6/ȿ.G
<</Annots 453 0 R/Contents[431 0 R 432 0 R 433 0 R 434 0 R 4ta 65 0 R/Pages 418 0 R/StructTreeRoot 104 0 R/Type/Catalog>>