hi there. is it possible to get the embedding tokens used without using Mocks? I can get the LLM token usage without it, but not embeddings. Here's how I'm doing it now:
from gpt_index import GPTSimpleVectorIndex, LLMPredictor, MockEmbedding, MockLLMPredictor
app = Flask(__name__, static_folder='.')
llm_predictor = LLMPredictor(llm=OpenAI(temperature=0, model_name="text-davinci-003", max_tokens=1024))
index = GPTSimpleVectorIndex.load_from_disk('data.json', llm_predictor=llm_predictor)
mock_llm_predictor = MockLLMPredictor(max_tokens=256)
mock_embed_model = MockEmbedding(embed_dim=1536)
def question():
token = request.headers.get('Authorization')
if not token or token != AUTH_TOKEN:
abort(401, 'Unauthorized')
question = request.args.get('question')
#prompt = f'You are a helpful support agent. You are asked: "{question}". Try to use only the information provided. Format your answer nicely as a Markdown page.'
prompt = f"""You are a helpful support agent for a project named Nouns DAO. You are asked: "{question}". Try to use only the
information provided. If you don't know an answer, do not make one up. Kindly let the user know you don't know the
answer."""
# run mock sequence to mock token usage
index.query(prompt, llm_predictor=mock_llm_predictor, embed_model=mock_embed_model)
embedding_tokens_used = mock_embed_model.total_tokens_used
response = index.query(prompt).response.strip()
total_tokens_used = llm_predictor.total_tokens_used
return jsonify({'answer': response, 'tokens_used': total_tokens_used, 'embedding_tokens_used': embedding_tokens_used})