from llama_parse import LlamaParse from llama_index.core import SimpleDirectoryReader parser = LlamaParse( api_key="llx-...", # can also be set in your env as LLAMA_CLOUD_API_KEY result_type="markdown", # "markdown" and "text" are available verbose=True ) file_extractor = {".pdf": parser} documents = SimpleDirectoryReader("./data", file_extractor=file_extractor).load_data()
files = Path("./DB_Useful/tests") file_extractor = {".pdf": parser, ".docx":UnstructuredReader,".jpg":UnstructuredReader,".csv": UnstructuredReader} documents = SimpleDirectoryReader(files, file_extractor=file_extractor, recursive=True).load_data()
docx2txt
file_extractor = {".pdf": parser,".docx":UnstructuredReader()}