base_index = VectorStoreIndex.from_documents(documents, storage_context=storage_context, service_context=service_context, show_progress=True)
VectorStoreIndex.from_documents(documents, storage_context=storage_context, service_context=service_context, show_progress=True) index = VectorStoreIndex.from_vector_store(vector_store=vector_store, service_context = service_context) retriever = index.as_retriever(similarity_top_k = similarity_top_k)
query_engine_base = RetrieverQueryEngine.from_args(retriever, service_context=service_context) response = query_engine_base.query(query)
for node in nodes: display_source_node(node, source_length=10000)
Node ID: d4d67180-71c8-4328-b3f1-1e98fa42ab69 Similarity: 0.8694979150607424 Text: We also list two qualitative examples where safety [...]
Node ID: Source Document: example5.pdf Similarity: Text:
from llama_index.core import Document from llama_index.core import VectorStoreIndex index = VectorStoreIndex(embed_model=embed_model) for chunk in doc.chunks(): index.insert(Document(text=chunk.to_context_text(), extra_info={})) query_engine = index.as_query_engine() # Let's run one query response = query_engine.query("Tell me about Europe.") print(response)
ValueError Traceback (most recent call last) Cell In[18], line 4 1 from llama_index.core import Document 2 from llama_index.core import VectorStoreIndex ----> 4 index = VectorStoreIndex(embed_model=embed_model) 5 for chunk in doc.chunks(): 6 index.insert(Document(text=chunk.to_context_text(), extra_info={})) ... ---> 59 raise ValueError("One of nodes, objects, or index_struct must be provided.") 60 if index_struct is not None and nodes is not None: 61 raise ValueError("Only one of nodes or index_struct can be provided.") ValueError: One of nodes, objects, or index_struct must be provided.
from llama_index.core import SummaryIndex, Document index = SummaryIndex([]) text_chunks = ["cars are red", "cars are big", "elephants are red"] doc_chunks = [] for i, text in enumerate(text_chunks): doc = Document(text=text, id_=f"doc_id_{i}") doc_chunks.append(doc) kg_index = KnowledgeGraphIndex.from_documents(documents=doc_chunks, storage_context=storage_context, max_triplets_per_chunk=3, space_name=space_name, edge_types=edge_types, rel_prop_names=rel_prop_names, tags=tags, llm=llm, embed_model=embed_model, include_embeddings=True) import networkx as nx G = kg_index.get_networkx_graph() nx.draw(G, with_labels=True, font_weight='bold')
documents = SimpleDirectoryReader("./data").load_data() kg_index = KnowledgeGraphIndex.from_documents(documents=documents, storage_context=storage_context, max_triplets_per_chunk=2, space_name=space_name, edge_types=edge_types, llm=llm, embed_model=embed_model, rel_prop_names=rel_prop_names, tags=tags, include_embeddings=True) kg_index.storage_context.persist(persist_dir='./storage_graph') hybrid_query_engine = kg_index.as_query_engine(include_text=True, llm=llm, response_mode="tree_summarize", embedding_mode="hybrid", similarity_top_k=3, explore_global_knowledge=True) query_text = "What is education?" response = hybrid_query_engine.query(query_text)
Query failed. Query: WITH map{`true`: '-[', `false`: '<-['} AS arrow_l, map{`true`: ']->', `false`: ']-'} AS arrow_r, map{`relationship`: "relationship"} AS edge_type_map MATCH p=(start)-[e:`relationship`*..2]-() WHERE id(start) IN $subjs WITH start, id(start) AS vid, nodes(p) AS nodes, e AS rels [...]
from llama_index.llms import OpenAILike llm = OpenAILike(api_base="http://localhost:8000",\ model="gpt_35_turbo", api_key="sk-xxx")
import pandas as pd def display_eval_df(query, response, eval_result): eval_df = pd.DataFrame( { "Query": str(query), "Response": str(response), "Source": response.source_nodes[0].node.get_content()[:500] + "...", "Evaluation Result": eval_result.feedback }, index=[0], ) eval_df = eval_df.style.set_properties( **{ "inline-size": "600px", "overflow-wrap": "break-word", }, subset=["Response", "Source"] ) display(eval_df) prompt_helper = PromptHelper(context_window=4096, num_output=256, chunk_overlap_ratio=0.1, chunk_size_limit=None) service_context = ServiceContext.from_defaults(llm=llm, embed_model=embed_model, prompt_helper=prompt_helper) query_engine = rag.custom_retriever(query=None, documents=documents, method='top_k', rag=True, chunk_size=512, similarity_top_k=1, eval=True) faithfulness_evaluator = FaithfulnessEvaluator(service_context=service_context) response_vector = query_engine.query(eval_questions[1]) eval_result = faithfulness_evaluator.evaluate_response(response=response_vector) display_eval_df(eval_questions[1], response_vector, eval_result)
from openai import OpenAI client = OpenAI(base_url="http://localhost:8000", api_key="sk-xxx") response = client.chat.completions.create(model="gpt_35_turbo", messages = [ { "role": "user", "content": "this is a test request, write a short poem" } ]) print(response)
llm = OpenAI(model="text-davinci-003", temperature=0, max_tokens=256) embed_model = OpenAIEmbedding() text_splitter = SentenceSplitter(chunk_size=1024, chunk_overlap=20) prompt_helper = PromptHelper( context_window=4096, num_output=256, chunk_overlap_ratio=0.1, chunk_size_limit=None, ) service_context = ServiceContext.from_defaults( llm=llm, embed_model=embed_model, text_splitter=text_splitter, prompt_helper=prompt_helper, )
VectorStoreIndex.from_documents(documents, storage_context=self.storage_context, service_context=service_context, show_progress=False)
vector_index_chunk = VectorStoreIndex(all_nodes, service_context=self.service_context, storage_context=self.storage_context) index = VectorStoreIndex.from_vector_store(vector_store=self.vector_store, service_context=self.service_context) vector_retriever_chunk = vector_index_chunk.as_retriever(similarity_top_k=2) retriever_chunk = RecursiveRetriever("vector", retriever_dict={"vector": vector_retriever_chunk}, node_dict=all_nodes_dict, verbose=True) if rag: query_engine_chunk = RetrieverQueryEngine.from_args(retriever_chunk, service_context=self.service_context) response = query_engine_chunk.query(query) return str(response) else: result = retriever_chunk.retrieve(query) res = [] for node in result: res += [display_source_node_custom(node, 200, True)] return res
ValueError: Query id c092cd56-9404-43b8-84a0-d591c2cc2dc9 not found in either `retriever_dict` or `query_engine_dict`.
from openai import OpenAI client = OpenAI(base_url="http://localhost:xxx", api_key="xxx") response = client.embeddings.create(input=["Your string here"], model="Azure-Text-Embedding-ada-002") embedding_vector = response.data[0].embedding
class CustomEmbedding: def __init__(self, client, model="Azure-Text-Embedding-ada-002"): self.client = client self.model = model def embed(self, texts): response = self.client.embeddings.create(input=texts, model=self.model) return [r.embedding for r in response.data] [...] client = OpenAI(base_url="http://localhost:xxx", api_key="xxx") embed_model = CustomEmbedding(client, model="Azure-Text-Embedding-ada-002")
AttributeError: 'OpenAI' object has no attribute 'embeddings'
from llama_index import VectorStoreIndex index = VectorStoreIndex.from_documents(documents, storage_context=storage_context, service_context=service_context, show_progress=True)
AttributeError: 'str' object has no attribute 'get_doc_id'