Hey everyone,
Hoping to find some help with the following:
Using Azure OpenAI API so maintaining service context with these variables has proved tricky.
def setup_service_context(embed_model = "hgf"):
api_key = os.environ['openai_api_key']
azure_endpoint = "xxx"
api_version = "xxx"
# Declaring LLM to use - GPT-4-32K by default
llm = AzureOpenAI(
engine=settings.CHAT_MODEL,
api_key=api_key,
azure_endpoint=azure_endpoint,
api_version=api_version,
temperature=0.2,
system_prompt='''prompt here''',
)
# Using HuggingFace Embeddings
embed_model = HuggingFaceEmbedding(model_name="thenlper/gte-small")
# Setting up node parser for creating documents line by line
node_parser = TokenTextSplitter.from_defaults(chunk_size=650, separator = " ", backup_separators = ["\n", "\n\n"], chunk_overlap=0)
service_context = ServiceContext.from_defaults(
llm=llm,
embed_model=embed_model,
node_parser=node_parser,
)
settings.SERVICE_CONTEXT = service_context
That shows how I set up my service context. I then set this to a settings variable that I import in other files.
In a different py file, I do the following:
def setup_retreivers():
vector_retriever = settings.INDEX.as_retriever(similarity_top_k = st.session_state.sim_top_k, service_context=settings.SERVICE_CONTEXT)
bm25_retriever = BM25Retriever.from_defaults(
docstore=settings.INDEX.docstore, similarity_top_k = st.session_state.sim_top_k,
)
retriever = QueryFusionRetriever(
[vector_retriever, bm25_retriever],
similarity_top_k = st.session_state.sim_top_k,
num_queries=3, # set this to 1 to disable query generation
mode="reciprocal_rerank", # can be changed to different rerank modes
use_async=True,
verbose=True,
llm=settings.SERVICE_CONTEXT.llm, # Have to set llm here for generated queries - AzureOpenAI llm needed
query_gen_prompt=settings.QUERY_GEN_PROMPT, # we could override the query generation prompt here
)
settings.FUSION_RETRIEVER = retriever
def setup_query_engine():
response_synthesizer = get_response_synthesizer(
service_context=settings.SERVICE_CONTEXT,
response_mode=settings.RESPONSE_MODE,
)
# TODO: Add a node postprocessor
settings.QUERY_ENGINE = RetrieverQueryEngine.from_args(retriever=settings.FUSION_RETRIEVER, response_synthesizer=response_synthesizer, service_context=settings.SERVICE_CONTEXT)
I get the following error specifically in the query engine:
openai.AuthenticationError: Error code: 401 - {'error': {'message': 'Incorrect API key provided: ********************. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}
Which to me seems like it is going to the regular OpenAI API instead of Azure. But I have tried everything I can think of to anchor the Azure API into the query engine. (it looks like it is the lazy embedding that is doing it.)
I have also checked that the service context that is passed to the query engine has the correct HF embedding model.
Would GREATLY appreciate any advice / help. Been staring at this for hours.