embed_model = OpenAIEmbedding( api_key=OPENAI_API_KEY, temperature=0, model=EMBEDDING_MODEL, ) callback_manager_basic = CallbackManager([ LlamaDebugHandler(print_trace_on_end=True), get_token_counter(MODEL_BASIC), ]) callback_manager_premium = CallbackManager([ LlamaDebugHandler(print_trace_on_end=True), get_token_counter(MODEL_PREMIUM), ]) service_context_basic = ServiceContext.from_defaults( llm=OpenAI(temperature=0, model=MODEL_BASIC, timeout=180), callback_manager=callback_manager_basic, embed_model=embed_model, context_window=16385, chunk_size_limit=16385, ) service_context_premium = ServiceContext.from_defaults( llm=OpenAI(temperature=0, model=MODEL_PREMIUM, timeout=180), callback_manager=callback_manager_premium, embed_model=embed_model, context_window=128000, chunk_size_limit=128000, ) def initialize_index(model_name: str = MODEL_BASIC) -> VectorStoreIndex: """Initialize the index. Args: ---- model_name (str, optional): The model name. Defaults to MODEL_BASIC. Returns: ------- Any: The initialized index. """ service_context = service_context_basic if model_name == MODEL_BASIC else service_context_premium vector_store = PGVectorStore.from_params( async_connection_string=f"postgresql+asyncpg://{user}:{password}@{host}:{port}/{database}", connection_string=f"postgresql+psycopg2://{user}:{password}@{host}:{port}/{database}?sslmode=require", table_name=PG_VECTOR_DATABASE_DOC_TABLE_NAME, embed_dim=1536, hybrid_search=True, ) storage_context = StorageContext.from_defaults( docstore=document_store, index_store=index_store, vector_store=vector_store, ) return VectorStoreIndex( nodes=[], storage_context=storage_context, service_context=service_context, use_async=True, )
def initialize_chat_engine(index: VectorStoreIndex, document_uuid: str) -> BaseChatEngine: """Initialize chat engine with chat history.""" chat_history = get_chat_history(document_uuid) filters = MetadataFilters( filters=[ExactMatchFilter(key="doc_id", value=document_uuid)], ) return index.as_chat_engine( chat_mode=ChatMode.CONTEXT, condense_question_prompt=PromptTemplate(CHAT_PROMPT_TEMPLATE), chat_history=chat_history, agent_chat_response_mode="StreamingAgentChatResponse", similarity_top_k=10, filters=filters, )
from llama_index.vector_store.types import VectorStoreQuery query = VectoStoreQuery( query_embedding=embed_model.get_query_embedding("my query"), similarity_top_k=10, filters=filters ) res = vector_store.query(query)
def query_directly(uuid: str): database = PG_VECTOR_DATABASE_NAME host = PG_VECTOR_DATABASE_HOST password = PG_VECTOR_DATABASE_PASSWORD port = PG_VECTOR_DATABASE_PORT user = PG_VECTOR_DATABASE_USER vector_store = PGVectorStore.from_params( async_connection_string=f"postgresql+asyncpg://{user}:{password}@{host}:{port}/{database}", connection_string=f"postgresql+psycopg2://{user}:{password}@{host}:{port}/{database}?sslmode=require", table_name=PG_VECTOR_DATABASE_DOC_TABLE_NAME, embed_dim=1536, ) query = VectorStoreQuery( query_embedding=embed_model.get_query_embedding("Summarize this document for me"), similarity_top_k=10, filters=MetadataFilters( filters=[ExactMatchFilter(key="doc_id", value=uuid)], ) ) # calculate time for query start_time = time.time() res = vector_store.query(query) end_time = time.time() duration = end_time - start_time print(f"Query time: {duration} seconds.") return res
web-1 | Query time: 3.0898852348327637 seconds.
metadata_
JSON column. I think I mentioned to you before that previously we had a table per document (sort of as a different namespace). You actually recommended to use the metadata filter instead and store it all in one table. I'm wondering if its maybe better to use the "one table per doc" approach. Let me know your thoughts.