This works fine for me
from llama_index.llms import AzureOpenAI
from llama_index.embeddings import AzureOpenAIEmbedding
api_key = "..."
azure_endpoint = "https://test-simon.openai.azure.com/"
api_version = "2023-07-01-preview"
llm = AzureOpenAI(
model="gpt-35-turbo-16k",
deployment_name="my-custom-llm",
api_key=api_key,
azure_endpoint=azure_endpoint,
api_version=api_version,
)
# You need to deploy your own embedding model as well as your own chat completion model
embed_model = AzureOpenAIEmbedding(
model="text-embedding-ada-002",
deployment_name="my-custom-embedding",
api_key=api_key,
azure_endpoint=azure_endpoint,
api_version=api_version,
)
from llama_index import set_global_service_context, ServiceContext
service_context = ServiceContext.from_defaults(llm=llm, embed_model=embed_model)
set_global_service_context(service_context)
storage_context = StorageContext.from_defaults(
persist_dir="./storage/lyft"
)
lyft_index = load_index_from_storage(storage_context, service_context=service_context)
storage_context = StorageContext.from_defaults(
persist_dir="./storage/uber"
)
uber_index = load_index_from_storage(storage_context, service_context=service_context)
query_engine_tools = [
QueryEngineTool(
query_engine=lyft_engine,
metadata=ToolMetadata(
name="lyft_10k",
description=(
"Provides information about Lyft financials for year 2021. "
"Use a detailed plain text question as input to the tool."
),
),
),
QueryEngineTool(
query_engine=uber_engine,
metadata=ToolMetadata(
name="uber_10k",
description=(
"Provides information about Uber financials for year 2021. "
"Use a detailed plain text question as input to the tool."
),
),
),
]
from llama_index.agent import OpenAIAgent
agent = OpenAIAgent.from_tools(query_engine_tools, llm=llm)
response = await agent.chat("How did Uber and Lyft compare in revenue growth in 2021?")
print(response)