LlamaParse(result_type="markdown").load_data("./apple_2021_10k.pdf")
# Load documents documents = SimpleDirectoryReader("/content/data/paul_graham", recursive=True).load_data()
ValueError Traceback (most recent call last) <ipython-input-7-0bfb093e8ab6> in <cell line: 14>() 12 13 # Load documents ---> 14 documents = SimpleDirectoryReader("/content/data/paul_graham", recursive=True).load_data() 15 16 # Create an index from the documents using the service context 1 frames /usr/local/lib/python3.10/dist-packages/llama_index/core/readers/file/base.py in _add_files(self, input_dir) 303 304 if len(new_input_files) == 0: --> 305 raise ValueError(f"No files found in {input_dir}.") 306 307 if self.num_files_limit is not None and self.num_files_limit > 0: ValueError: No files found in /content/data/paul_graham.
from llama_index.core.chat_engine.types import BaseChatEngine from llama_index.core.llms import ChatMessage, MessageRole
# pass wandb_callback to the service context callback_manager = CallbackManager([llama_debug, wandb_callback]) service_context = ServiceContext.from_defaults(llm=OpenAI(model="gpt-3.5-turbo-0613", temperature=0), chunk_size=1024, callback_manager=callback_manager)
# define llm with HuggingFaceInferenceAPI llm = HuggingFaceInferenceAPI( model_name="HuggingFaceH4/zephyr-7b-beta", token=os.environ.get("HUGGINGFACE_ACCESS_TOKEN") ) # create QdrantClient with the location set to ":memory:", which means the vector db will be stored in memory vectordb_client = qdrant_client.QdrantClient(location=":memory:") # create QdrantVectorStore using QdrantClient and the collection name "wonderful_life" vector_store = QdrantVectorStore( client=vectordb_client, collection_name="wonderful_life" ) # create StorageContext object using the QdrantVectorStore storage_context = StorageContext.from_defaults(vector_store=vector_store) node_parser = SentenceSplitter() service_context = ServiceContext.from_defaults( llm=llm, embed_model="local:WhereIsAI/UAE-Large-V1", callback_manager=callback_manager ) # Build agents dictionary query_engine_tools = [] for idx, wiki_title in enumerate(wiki_titles): nodes = node_parser.get_nodes_from_documents(city_docs[wiki_title]) if not os.path.exists(f"./data/{wiki_title}"): # build vector index vector_index = VectorStoreIndex( nodes, service_context=service_context, callback_manager=callback_manager,storage_context=storage_context, ) vector_index.storage_context.persist(persist_dir=f"./data/{wiki_title}") else: vector_index = load_index_from_storage( StorageContext.from_defaults(persist_dir=f"./data/{wiki_title}"), service_context=service_context, callback_manager=callback_manager, ) # define query engines vector_query_engine = vector_index.as_query_engine()
from llama_index.node_parser.extractors import ( MetadataExtractor, QuestionsAnsweredExtractor, TitleExtractor ) from llama_index.text_splitter import TokenTextSplitter from llama_index.node_parser import SimpleNodeParser text_splitter = TokenTextSplitter(separator=" ", chunk_size=512, chunk_overlap=20) metadata_extractor = MetadataExtractor( extractors=[ TitleExtractor(nodes=5), QuestionsAnsweredExtractor(questions=3), ], ) node_parser = SimpleNodeParser( text_splitter = text_splitter, metadata_extractor=metadata_extractor )
program = GuidancePydanticProgram( output_cls=Character, prompt_template_str=( "Give me a character description" " the movie {{game_name}} as inspiration" ), guidance_llm=outlines.models.transformers("mistralai/Mistral-7B-v0.1", device="cuda"), verbose=True, )
JSONReader = download_loader("JSONReader") loader = JSONReader() documents = loader.load_data(Path('/workspace/data/train.json'))
from llama_index.retrievers import RecursiveRetriever # note: can pass `agents` dict as `query_engine_dict` since every agent can be used as a query engine recursive_retriever = RecursiveRetriever( "vector", retriever_dict={"vector": index_retriever, **retriever_dict}, # query_engine_dict=query_engine_dict, verbose=True, )
os.environ["OPENAI_API_KEY"] = "" nodes = recursive_retriever.retrieve("Tell me about some issues on 12/11") print(f"Number of source nodes: {len(nodes)}") nodes[0].node.metadata
Retrieving with query id None: Tell me about some issues on 12/11 --------------------------------------------------------------------------- AuthenticationError Traceback (most recent call last) <ipython-input-57-e465b1a12d99> in <cell line: 1>() ----> 1 nodes = recursive_retriever.retrieve("Tell me about some issues on 12/11") 2 3 print(f"Number of source nodes: {len(nodes)}") 4 nodes[0].node.metadata 15 frames /usr/local/lib/python3.10/dist-packages/openai/_base_client.py in _request(self, cast_to, options, remaining_retries, stream, stream_cls) 928 err.response.read() 929 --> 930 raise self._make_status_error_from_response(err.response) from None 931 932 return self._process_response( AuthenticationError: Error code: 401 - {'error': {'message': 'Incorrect API key provided: sk-NgwJh***************************************ViNP. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}
openai_key = "" #<--- Your API KEY openai.api_key = openai_key
Retrieving with query id None: Tell me about some issues on 12/11 --------------------------------------------------------------------------- AuthenticationError Traceback (most recent call last) <ipython-input-50-e465b1a12d99> in <cell line: 1>() ----> 1 nodes = recursive_retriever.retrieve("Tell me about some issues on 12/11") 2 3 print(f"Number of source nodes: {len(nodes)}") 4 nodes[0].node.metadata 15 frames /usr/local/lib/python3.10/dist-packages/openai/_base_client.py in _request(self, cast_to, options, remaining_retries, stream, stream_cls) 928 err.response.read() 929 --> 930 raise self._make_status_error_from_response(err.response) from None 931 932 return self._process_response( AuthenticationError: Error code: 401 - {'error': {'message': 'Incorrect API key provided: sk-NgwJh***************************************ViNP. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}
from llama_index.evaluation import FaithfulnessEvaluator evaluator = FaithfulnessEvaluator(service_context=service_context) response = flare_query_engine.query("Can you tell me about the author's trajectory in the startup world?") response_str = response.response for source_node in response.source_nodes: eval_result = evaluator.evaluate(response=response_str, contexts=[source_node.get_content()]) print(str(eval_result.passing))
RuntimeError Traceback (most recent call last) <ipython-input-30-43b304ac96cc> in <cell line: 7>() 6 response_str = response.response 7 for source_node in response.source_nodes: ----> 8 eval_result = evaluator.evaluate(response=response_str, contexts=[source_node.get_content()]) 9 print(str(eval_result.passing)) 1 frames /usr/lib/python3.10/asyncio/runners.py in run(main, debug) 31 """ 32 if events._get_running_loop() is not None: ---> 33 raise RuntimeError( 34 "asyncio.run() cannot be called from a running event loop") 35 RuntimeError: asyncio.run() cannot be called from a running event loop
REPLICATE_API_URL = "https://api.replicate.ai/v1/chat/completions" def compose_payload(images: np.ndarray, prompt: str) -> dict: text_content = { "type": "text", "text": prompt } image_content = [ { "type": "image", "image": encode_image_to_base64(image=image) } for image in images ] return { "model": REPLICATE_MULTI_MODAL_LLM_MODELS["llava-13b"], "messages": [ { "role": "user", "content": [text_content] + image_content } ], "max_tokens": 300 } def prompt_image(api_key: str, images: list, prompt: str) -> list: REPLICATE_API_URL = "https://api.replicate.ai/v1/chat/completions" # Insert your Replicate API URL here headers = compose_headers(api_key=api_key) res = [] for image in images: payload = compose_payload(images=[image], prompt=prompt) response = requests.post(url=REPLICATE_API_URL, headers=headers, json=payload).json() if 'error' in response: raise ValueError(response['error']['message']) # Assuming response format needs to be adjusted based on the actual response structure res.append({ "response": response['choices'][0]['message']['content'], "image": str(image.image_path), # Adjust this according to your image structure }) return res
pdf_file = "llama2.pdf" # Split the base name and extension output_directory_path, _ = os.path.splitext(pdf_file) if not os.path.exists(output_directory_path): os.makedirs(output_directory_path) # Open the PDF file pdf_document = fitz.open(pdf_file) # Iterate through each page and convert to an image for page_number in range(pdf_document.page_count): # Get the page page = pdf_document[page_number] # Convert the page to an image pix = page.get_pixmap() # Create a Pillow Image object from the pixmap image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples) # Save the image image.save(f"./{output_directory_path}/page_{page_number + 1}.png") # Close the PDF file pdf_document.close()
TypeError Traceback (most recent call last) Cell In[38], line 1 ----> 1 prediction_dataset = await rag_dataset.amake_predictions_with( 2 query_engine=flare_query_engine, show_progress=True 3 ) TypeError: BaseLlamaDataset.amake_predictions_with() got an unexpected keyword argument 'query_engine'