from llama_index.llms.openai import OpenAI from llama_index.core.callbacks import OpenAIFineTuningHandler from llama_index.core.callbacks import CallbackManager finetuning_handler = OpenAIFineTuningHandler() callback_manager = CallbackManager([finetuning_handler]) llm = OpenAI(model="gpt-4-0125-preview", temperature=0.1) Settings.callback_manager = (callback_manager,)
ImportError: cannot import name 'OpenAIFineTuningHandler' from 'llama_index.core.callbacks' (/usr/local/lib/python3.10/dist-packages/llama_index/core/callbacks/__init__.py)
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader documents = SimpleDirectoryReader("/Users/home/Library/Mobile Documents/com~apple~CloudDocs/Academia/Legal Research").load_data() index = VectorStoreIndex.from_documents(documents) --------------------------------------------------------------------------- ValidationError Traceback (most recent call last) /Users/home/Downloads/OpenAI_Finetuning_Distill_GPT_4_to_GPT_3_5_(v2).ipynb Cell 13 line 1 ----> 1 from llama_index.core import VectorStoreIndex, SimpleDirectoryReader 3 documents = SimpleDirectoryReader("/Users/home/Library/Mobile Documents/com~apple~CloudDocs/Academia/Legal Research").load_data() 4 index = VectorStoreIndex.from_documents(documents) File ~/.ooba/text-generation-ui/installer_files/env/lib/python3.10/site-packages/llama_index/__init__.py:21 17 from llama_index.embeddings import OpenAIEmbedding 19 # indices 20 # loading ---> 21 from llama_index.indices import ( 22 ComposableGraph, 23 DocumentSummaryIndex, 24 GPTDocumentSummaryIndex, 25 GPTKeywordTableIndex, 26 GPTKnowledgeGraphIndex, 27 GPTListIndex, 28 GPTRAKEKeywordTableIndex, 29 GPTSimpleKeywordTableIndex, 30 GPTTreeIndex, 31 GPTVectorStoreIndex, 32 KeywordTableIndex, 33 KnowledgeGraphIndex, 34 ListIndex, ... File ~/.ooba/text-generation-ui/installer_files/env/lib/python3.10/site-packages/pydantic/main.py:341, in pydantic.main.BaseModel.__init__() ValidationError: 1 validation error for DataSource
pip install llama-index-core llama-index-readers-file llama-index-llms-ollama llama-index-embeddings-huggingface
index.query("What is the document about?")
'VectorStoreIndex' object has no attribute 'query'
from llama_index.readers.file import FlatReader from pathlib import Path reader = FlatReader() docs_2021 = reader.load_data(Path("my_file.pdf"))
File ~/.local/lib/python3.10/site-packages/llama_index/readers/file/flat/base.py:28, in FlatReader.load_data(self, file, extra_info) 26 """Parse file into string.""" 27 with open(file, encoding="utf-8") as f: ---> 28 content = f.read() 29 metadata = {"filename": file.name, "extension": file.suffix} 30 if extra_info: File /usr/lib/python3.10/codecs.py:322, in BufferedIncrementalDecoder.decode(self, input, final) 319 def decode(self, input, final=False): 320 # decode input (taking the buffer into account) 321 data = self.buffer + input --> 322 (result, consumed) = self._buffer_decode(data, self.errors, final) 323 # keep undecoded input until the next call 324 self.buffer = data[consumed:] UnicodeDecodeError: 'utf-8' codec can't decode byte 0xe2 in position 10: invalid continuation byte
index = VectorStoreIndex.from_documents( documents, storage_context=storage_context )
question_gen_query = ( "You are a Teacher/ Professor. Your task is to setup " "a quiz/examination. Using the provided context, formulate " "a single question that captures an important fact from the " "context. Restrict the question to the context information provided." ) dataset_generator = DatasetGenerator.from_documents( documents[:50], question_gen_query=question_gen_query, service_context=gpt_35_context, )
1973 def __init__(self, *args: Any, **kwargs: Any) -> None: -> 1974 deprecation_with_replacement("PdfFileReader", "PdfReader", "3.0.0") 1975 if "strict" not in kwargs and len(args) < 2: 1976 kwargs["strict"] = True # maintain the default File ~/anaconda3/lib/python3.10/site-packages/PyPDF2/_utils.py:369, in deprecation_with_replacement(old_name, new_name, removed_in) 363 def deprecation_with_replacement( 364 old_name: str, new_name: str, removed_in: str = "3.0.0" 365 ) -> None: 366 """ 367 Raise an exception that a feature was already removed, but has a replacement. 368 """ --> 369 deprecation(DEPR_MSG_HAPPENED.format(old_name, removed_in, new_name)) File ~/anaconda3/lib/python3.10/site-packages/PyPDF2/_utils.py:351, in deprecation(msg) 350 def deprecation(msg: str) -> None: --> 351 raise DeprecationError(msg) DeprecationError: PdfFileReader is deprecated and was removed in PyPDF2 3.0.0. Use PdfReader instead.