Find answers from the community

Updated 2 months ago

hi there is it possible to get the

hi there. is it possible to get the embedding tokens used without using Mocks? I can get the LLM token usage without it, but not embeddings. Here's how I'm doing it now:

Plain Text
from gpt_index import GPTSimpleVectorIndex, LLMPredictor, MockEmbedding, MockLLMPredictor

app = Flask(__name__, static_folder='.')
llm_predictor = LLMPredictor(llm=OpenAI(temperature=0, model_name="text-davinci-003", max_tokens=1024))
index = GPTSimpleVectorIndex.load_from_disk('data.json', llm_predictor=llm_predictor)

mock_llm_predictor = MockLLMPredictor(max_tokens=256)
mock_embed_model = MockEmbedding(embed_dim=1536)

def question():
    token = request.headers.get('Authorization')
    if not token or token != AUTH_TOKEN:
        abort(401, 'Unauthorized')
    question = request.args.get('question')
    #prompt = f'You are a helpful support agent. You are asked: "{question}". Try to use only the information provided. Format your answer nicely as a Markdown page.'
    prompt = f"""You are a helpful support agent for a project named Nouns DAO. You are asked: "{question}". Try to use only the 
    information provided. If you don't know an answer, do not make one up. Kindly let the user know you don't know the 
    answer.""" 

    #  run mock sequence to mock token usage
    index.query(prompt, llm_predictor=mock_llm_predictor, embed_model=mock_embed_model)
    embedding_tokens_used = mock_embed_model.total_tokens_used

    response = index.query(prompt).response.strip()
    total_tokens_used = llm_predictor.total_tokens_used

    return jsonify({'answer': response, 'tokens_used': total_tokens_used, 'embedding_tokens_used': embedding_tokens_used})
j
s
3 comments
oh! yeah you can do that through
Plain Text
index.embed_model.last_token_usage
and for the LLM,
Plain Text
index.llm_predictor.last_token_usage
Fantastic, thank you jerry
Add a reply
Sign up and join the conversation on Discord