I am using LlamParse for production. When I am calling the llama parse for the first time , i get this erorr
Started parsing the file under job_id 3b8e220b-01cd-4c81-b946-8fabb20f5b9f
...Error while parsing the file '/tmp/tmpv6au6tw5/document.pdf':
And when I try to call the llamaparse, it parses the same pdf with no problem.
try:
bucket = s3_url.split("/")[2]
key = "/".join(s3_url.split("/")[3:])
with tempfile.TemporaryDirectory() as temp_dir:
s3_client = get_s3_client()
pdf_path = os.path.join(temp_dir, "document.pdf")
s3_client.download_file(bucket, key, pdf_path)
try:
parser = LlamaParse(
result_type="markdown",
auto_mode=True,
auto_mode_trigger_on_table_in_page=True,
skip_diagonal_text=True,
disable_ocr=True,
verbose=True,
disable_image_extraction=True,
do_not_cache=True,
)
documents = parser.load_data(pdf_path)
except Exception as e:
raise e
markdown_text = "\n".join(doc.text for doc in documents)
return markdown_text
except Exception as e:
error_msg = str(e)
if "exceeded the maximum number of pages" in error_msg:
raise ExceededPageLimitError(error_msg) from e
elif "Internal Server Error" in error_msg:
raise PDFProcessingError(f"LlamaParse server error: {error_msg}") from e
else:
raise PDFProcessingError(f"Error processing PDF: {error_msg}") from e