Neo4jGraphStore
class to connect to my database correctly but I'm not sure what to do next. Any tips greatly appreciated! cheers 😄class TechnicalNoteClassifcations(Enum):
CRITICAL = "Critical"
NEW_FEATURE = "New feature"
SOLUTION_PROVIDED = "Solution provided"
INFORMATION_ONLY = "Information only"
class TechnicalNoteResponseData(BaseModel):
classification: TechnicalNoteClassifcations
summary: str
response = index.as_query_engine(
text_qa_template=qa_prompt_templates[item],
similarity_top_k=num_k,
output_cls=TechnicalNoteResponseData
).query(f"{title}")
❱ 51 │ │ │ │ │ ).query(f"{title}")
KeyError: 'classification'
InvalidRequestError: This model's maximum context length is 4097 tokens. However, you requested 4529 tokens (529 in
the messages, 4000 in the completion). Please reduce the length of the messages or completion.
llm = OpenAI(temperature=0.1, model="gpt-3.5-turbo", max_tokens=3000)
prompt_helper = PromptHelper(
context_window = 4097,
num_output = 1000,
tokenizer = tiktoken.encoding_for_model('text-davinci-002').encode,
chunk_overlap_ratio = 0.01
)
vectordb = chromadb.PersistentClient(path="some/path/here")
chroma_collection = vectordb.get_collection('collection_name') # <-- can we/should we specify an embedding function here? I hadn't noticed in docs
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
vector_store_index = VectorStoreIndex.from_vector_store(vector_store=vector_store, service_context=service_context)
data = chroma_collection.query(query_texts = 'some string', n_results=5, where_document={'$contains': 'CVE-2023-4351'}, include=['metadatas', 'distances'])
from chromadb.utils import embedding_functions
openai_ef = embedding_functions.OpenAIEmbeddingFunction(api_key=openai.api_key, model_name="text-embedding-ada-002")
collection = vectordb.get_collection(name='msrc_security_update', embedding_function=openai_ef)
data = chroma_collection.query(query_texts = 'some string', n_results=5, where_document={'$contains': 'CVE-2023-4351'}, include=['metadatas', 'distances'])
filters = MetadataFilters(filters=[ExactMatchFilter(key="source", value="https://msrc.microsoft.com/update-guide/vulnerability/CVE-2023-4351")])
retriever = VectorIndexRetriever(
index=vector_store_indicies['msrc_security_update'],
similarity_top_k=5,
metadata_filters=filters
)
query_engine = RetrieverQueryEngine(
retriever=retriever,
node_postprocessors=[metadata_replace]
)
response = query_engine.query(
"fully explain with details 'CVE-2023-4351'",
)
vector_store_index = VectorStoreIndex.from_documents(docs, storage_context=storage_context, service_contenxt=service_context)
I always get the errors: ConnectionResetError: [WinError 10054] An existing connection was forcibly closed by the remote host, ProtocolError: ('Connection aborted.', ConnectionResetError(10054, 'An existing connection was forcibly closed by the remote host', None, 10054, None)), PineconeProtocolError: Failed to connect; did you specify the correct index name?
active_indexes = pinecone.list_indexes() -> ['report-vector-store']
pinecone.describe_index("report-vector-store") ->IndexDescription(name='report-vector-store', metric='cosine', replicas=1, dimension=1536.0, shards=1, pods=1, pod_type='starter', status={'ready': True, 'state': 'Ready'}, metadata_config=None, source_collection='')
vector_index.storage_context.persist(persist_dir=persist_dir)
. but I can never get any of it to load back. I've tried the following to no avail.
persist_dir2 = "C:/projects/technical-notes-llm-report/data/06_models/"
chroma_client2 = chromadb.PersistentClient(path=persist_dir2)
chroma_collection2 = chroma_client2.get_or_create_collection(collection_name)
vector_store2 = ChromaVectorStore(chroma_collection=chroma_collection2)
storage_context2a = StorageContext.from_defaults(
docstore=SimpleDocumentStore.from_persist_path("C:/projects/technical-notes-llm-report/data/06_models/docstore.json"),
vector_store=vector_store2,
index_store=SimpleIndexStore.from_persist_path("C:/projects/technical-notes-llm-report/data/06_models/index_store.json"),
)
vector_index2 = VectorStoreIndex.from_vector_store(vector_store2, storage_context=storage_context2a, service_context=service_context, store_nodes_override=True)
vector_index3a = VectorStoreIndex([], storage_context=storage_context2a, store_nodes_override=True)
vector_index3a.ref_doc_info -> {}
vector_index3b.ref_doc_info -> {}
docstorea = storage_context2a.docstore
docstorea.get_all_ref_doc_info() -> {}
thanks for any insight!fs1 = fsspec.filesystem("abfs", account_name="name", account_key="key")
AZURE_CONTAINER = "report-stores"
sentence_index.storage_context.persist(persist_dir=f'{AZURE_CONTAINER}', fs=fs1)
print(fs1.ls(AZURE_CONTAINER))
['report-stores/docstore.json', 'report-stores/graph_store.json', 'report-stores/index_store.json', 'report-stores/vector_store.json']
sc = StorageContext.from_defaults(persist_dir=f'{AZURE_CONTAINER}', fs=fs1)
<-- HttpResponseError Does anyone have any recommendations? cheers!for item in collection_names:
retriever = VectorIndexAutoRetriever(
index=vector_store_indicies[item],
vector_store_info=index_infos['vector_index'][item],
prompt_template_str = retriever_prompt_strings[item],
similarity_top_k=num_k,
max_top_k=5
)
response_synthesizer = TreeSummarize(summary_template=qa_prompt_templates[item])
query_engine = RetrieverQueryEngine(
retriever=retriever,
response_synthesizer=response_synthesizer,
node_postprocessors = [metadata_replace]
)
query_engine = vector_store_indicies['msrc_security_update'].as_query_engine(
similarity_top_k=5, node_postprocessors=[metadata_replace], response_mode="tree_summarize"
)
docstore = MongoDocumentStore.from_uri(db_name="report_docstore", namespace=f"docstore_{item}", uri="mongodb+srv://")
index_store = MongoIndexStore.from_uri(db_name="report_docstore", namespace=f"index_store_{item}", uri="mongodb+srv://")
storage_context=StorageContext.from_defaults(docstore=docstore, index_store=index_store)
service_context=ServiceContext.from_defaults(embed_model=OpenAIEmbedding(), callback_manager=callback_manager, node_parser=node_parser)
index = SummaryIndex.from_documents(docs_for_collection, storage_context=storage_context, service_context=service_context)
nodes = node_parser.get_nodes_from_documents(docs_for_collection)
docstores.add_documents(nodes)
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
storage_context = StorageContext.from_defaults(
docstore=SimpleDocumentStore.from_persist_dir(storage_params["persist_dir"]),
vector_store=vector_store,
index_store=SimpleIndexStore.from_persist_dir(storage_params["persist_dir"]),
)
service_context = ServiceContext.from_defaults(callback_manager=callback_manager, llm=llm, embed_model=OpenAIEmbedding(embed_batch_size=50), node_parser=node_parser)
vector_index = VectorStoreIndex([], storage_context=storage_context, service_context=service_context, store_nodes_override=vector_index_params["store_nodes_override"])
results = vector_index.refresh_ref_docs(data)
refresh_ref_docs()
how do I verify the refresh worked and persisted the docs/nodes/embeddings? thanks kindly!embed_model
argument of the Llama Index service_context override chroma's embedding function specification? Sorry if that sounds convoluted! I'm just not clear where the work is happening and who is doing it... LOL! 😆collection = client.get_collection(name="my_collection", embedding_function=emb_fn)
<- from chroma documentation