Find answers from the community

Home
Members
padiyar
p
padiyar
Offline, last seen 3 months ago
Joined September 25, 2024
p
padiyar
·

My code is

My code is
Plain Text
 loader = GoogleDriveReader()

def load_data(folder_id: str):
    docs = loader.load_data(folder_id=folder_id)
    for doc in docs:
        try:
            doc.id_ = doc.metadata["file_name"]
        except KeyError:
            continue  # Skip this document if "file_name" is missing
    return docs

docs = load_data(folder_id="...")

documents = [
  doc.to_langchain_format()
  for doc in docs
]
    

print(documents[0].page_content[0:1000]) 



page content looks like this -
Plain Text
 %PDF-1.7
            d 1/L 704968/O 424/E 410578/N 25/T 704466/H [ 555 457]>>
<</DecodeParms<</Columns 5/Predictor 12>>/Filter/FlateDecode/ID[<0DDF85C876E76C5F8037E2A25DC7C602><955405ABD1F33C4FB9ED0FD3BEE042C9>]/Index[422 61]/Info 421 0 R/Length 135/Prev 704467/Root 423 0 R/Size 483/Type/XRef/W[1 3 1]>>stream
hbbd```b``~"90̾&A$Dru8`$H,,""AdA8d,"]@d"4     Q`q~9dq Y#؅CX 
startxref
0
%%EOF
                  
<</C 593/Filter/FlateDecode/I 615/Length 362/S 457/V 567>>stream
4,Uk4EQ}AVX*aH3 /dd2pYKƙko4D=z`+gBiʥXn.D>Ohݮ>IN2"7sOVȨ``R,.`1؁.a1ԑ 5,3p@gb><;[|nOh6/ȿ.G
<</Annots 453 0 R/Contents[431 0 R 432 0 R 433 0 R 434 0 R 4ta 65 0 R/Pages 418 0 R/StructTreeRoot 104 0 R/Type/Catalog>>
9 comments
p
W