from llama_index.readers.schema.base import Document from llmsherpa.readers import LayoutPDFReader llmsherpa_api_url = "https://readers.llmsherpa.com/api/document/developer/parseDocument?renderFormat=all" pdf_path = "2023190_riteaid_complaint_filed.pdf" # also allowed is a file path e.g. /home/downloads/xyz.pdf pdf_reader = LayoutPDFReader(llmsherpa_api_url) doc = pdf_reader.read_pdf(pdf_path) doc = pdf_reader.read_pdf(pdf_path) for chunk in doc.chunks(): # Create a Document object for each chunk. document = Document(text=chunk.to_context_text(), extra_info={})
` qa_prompt_tmpl_str = ( "<|im_start|>Context information is below.\n" "---------------------\n" "{context_str}\n" "---------------------\n" "Given the context information and not prior knowledge, " "answer the query\n<|im_end|>" "<|im_start|>user: {query_str}\n" "<|im_start|>assistant Answer: " )
qa_prompt_tmpl_str = ( "<|im_start|>user\n" "Context information is below.\n" "---------------------\n" "{context_str}\n" "---------------------\n" "Given the context information and not prior knowledge, " "answer the query:\n" "{query_str}<|im_end|>\n" "<|im_start|>assistant\n" )