from llama_index.core import VectorStoreIndex, SimpleDirectoryReader documents = SimpleDirectoryReader("/Users/home/Library/Mobile Documents/com~apple~CloudDocs/Academia/Legal Research").load_data() index = VectorStoreIndex.from_documents(documents) --------------------------------------------------------------------------- ValidationError Traceback (most recent call last) /Users/home/Downloads/OpenAI_Finetuning_Distill_GPT_4_to_GPT_3_5_(v2).ipynb Cell 13 line 1 ----> 1 from llama_index.core import VectorStoreIndex, SimpleDirectoryReader 3 documents = SimpleDirectoryReader("/Users/home/Library/Mobile Documents/com~apple~CloudDocs/Academia/Legal Research").load_data() 4 index = VectorStoreIndex.from_documents(documents) File ~/.ooba/text-generation-ui/installer_files/env/lib/python3.10/site-packages/llama_index/__init__.py:21 17 from llama_index.embeddings import OpenAIEmbedding 19 # indices 20 # loading ---> 21 from llama_index.indices import ( 22 ComposableGraph, 23 DocumentSummaryIndex, 24 GPTDocumentSummaryIndex, 25 GPTKeywordTableIndex, 26 GPTKnowledgeGraphIndex, 27 GPTListIndex, 28 GPTRAKEKeywordTableIndex, 29 GPTSimpleKeywordTableIndex, 30 GPTTreeIndex, 31 GPTVectorStoreIndex, 32 KeywordTableIndex, 33 KnowledgeGraphIndex, 34 ListIndex, ... File ~/.ooba/text-generation-ui/installer_files/env/lib/python3.10/site-packages/pydantic/main.py:341, in pydantic.main.BaseModel.__init__() ValidationError: 1 validation error for DataSource
pip install llama-index-core llama-index-readers-file llama-index-llms-ollama llama-index-embeddings-huggingface
from llama_index.readers.file import UnstructuredReader file_extractor = {".pdf": UnstructuredReader()} documents = SimpleDirectoryReader("./data", file_extractor=file_extractor).load_data()
from llama_index.readers.file import UnstructuredReader file_extractor = {".pdf": UnstructuredReader()} documents = SimpleDirectoryReader("./data", file_extractor=file_extractor).load_data()
ImportError Traceback (most recent call last) /usr/local/lib/python3.10/dist-packages/llama_index/core/readers/file/base.py in load_file(input_file, file_metadata, file_extractor, filename_as_id, encoding, errors) 324 # ensure that ImportError is raised so user knows 325 # about missing dependencies --> 326 raise ImportError(str(e)) 327 except Exception as e: 328 # otherwise, just skip the file and report the error ImportError: No module named 'unstructured'
{'type': 'ListItem', 'element_id': '685c346992da8cb638277234e18455dc', 'text': '3. Analysis and application of the rule of law to the facts of the case. This step is composed of three parts:', 'metadata': {'filetype': 'application/pdf', 'languages': ['eng'], 'page_number': 48, 'parent_id': '1f5eb66d4519bd762414d483640a7cd1', 'filename': 'Legal Research_Part 1.pdf'}}, {'type': 'NarrativeText', 'element_id': '269a00dfa7fbab67e141c6c5600e8440', 'text': 'a. A determination of the elements or requirements of the rule of law b. A matching of the facts of the client’s case to the elements and a determi- nation of how the rule of law applies to the facts ¢. A counteranalysis that addresses any counterarguments to the analysis 4. A conclusion that summarizes the previous steps. The conclusion may also include a weighing of the merits of the case and an identification of other information or avenues of research that should be pursued.', 'metadata': {'filetype': 'application/pdf', 'languages': ['eng'], 'page_number': 48, ... 50, 'parent_id': '8aebadf99302a64184ebba5341df7d89', 'filename': 'Legal Research_Part 1.pdf'}}]