-------------------------------------------------------------------------- AuthenticationError Traceback (most recent call last) File c:\Users\happy\Documents\Projects\askLavinia\.venv\lib\site-packages\tenacity\__init__.py:382, in Retrying.__call__(self, fn, *args, **kwargs) 381 try: --> 382 result = fn(*args, **kwargs) 383 except BaseException: # noqa: B902 File c:\Users\happy\Documents\Projects\askLavinia\.venv\lib\site-packages\llama_index\embeddings\openai.py:106, in get_embedding(text, engine, **kwargs) 105 text = text.replace("\n", " ") --> 106 return openai.Embedding.create(input=[text], model=engine, **kwargs)["data"][0][ 107 "embedding" 108 ] File c:\Users\happy\Documents\Projects\askLavinia\.venv\lib\site-packages\openai\api_resources\embedding.py:33, in Embedding.create(cls, *args, **kwargs) 32 try: ---> 33 response = super().create(*args, **kwargs) 35 # If a user specifies base64, we'll just return the encoded string. 36 # This is only for the default case. File c:\Users\happy\Documents\Projects\askLavinia\.venv\lib\site-packages\openai\api_resources\abstract\engine_api_resource.py:149, in EngineAPIResource.create(cls, api_key, api_base, api_type, request_id, api_version, organization, **params) 127 @classmethod 128 def create( 129 cls, (...) 136 **params, ... --> 326 raise retry_exc from fut.exception() 328 if self.wait: 329 sleep = self.wait(retry_state) RetryError: RetryError[]
st.session_state['query_engine'] = index.as_query_engine(verbose=True)
Has anyone gotten the ability to retrieve tokens and then figure out the cost ? Thank you.import openai openai.api_key = "sk-...."
llm_predictor = MockLLMPredictor(max_tokens=256) embed_model = MockEmbedding(embed_dim=1536) service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor)
import tiktoken from llama_index.callbacks import CallbackManager, TokenCountingHandler # you can set a tokenizer directly, or optionally let it default # to the same tokenizer that was used previously for token counting # NOTE: The tokenizer should be a function that takes in text and returns a list of tokens token_counter = TokenCountingHandler( tokenizer=tiktoken.encoding_for_model("text-davinci-003").encode, verbose=True # set to true to see usage printed to the console ) callback_manager = CallbackManager([token_counter]) service_context = ServiceContext.from_defaults(callback_manager=callback_manager) # also track prompt, completion, and total LLM tokens, in addition to embeddings response = index.as_query_engine().query("What are the overtime policies?") print('Embedding Tokens: ', token_counter.total_embedding_token_count, '\n', 'LLM Prompt Tokens: ', token_counter.prompt_llm_token_count, '\n', 'LLM Completion Tokens: ', token_counter.completion_llm_token_count, '\n', 'Total LLM Token Count: ', token_counter.total_llm_token_count)
callback_manager = CallbackManager([token_counter]) service_context = ServiceContext.from_defaults(callback_manager=callback_manager) document = SimpleDirectoryReader("./data").load_data() # if verbose is turned on, you will see embedding token usage printed index = VectorStoreIndex.from_documents(documents)
response = index.as_query_engine(verbose=True, service_context=service_context).query("What are the overtime policies?")
LLM Prompt Token Usage: 1945
# Get the prompt text of the last LLM call token_counter.llm_token_counts[-1].prompt # Get the completion text of the last LLM call token_counter.llm_token_counts[-1].completion
class TokenCost: """ A class used to calculate the token cost of an LLM model. Attributes ---------- token_counter : TokenCountingHandler a TokenCountingHandler object that counts tokens in the model callback_manager : CallbackManager a CallbackManager object that manages callbacks for the token counter """ def __init__(self, model_name, verbose=True): """ Initializes the TokenCost object. Parameters ---------- model_name : str The name of the model to be token counted. Common names are 'text-davinci-003' verbose : bool, optional Whether to print the token counting progress to the console. Default is True. """ self._callback_manager = None # Set up callback self.token_counter = TokenCountingHandler( tokenizer=tiktoken.encoding_for_model(model_name).encode, verbose=verbose ) self.callback_manager = CallbackManager([self.token_counter]) @property def callback_manager(self): return self._callback_manager @callbak_manager.setter def callback_manager(self, value): self._callback_manager = value @property def embedding_token_count(self): return self.token_counter.embedding_token_counts @property def prompt_token_count(self): return self.token_counter.prompt_llm_token_count @property def completion_token_count(self): return self.token_counter.completion_llm_token_count @property def total_token_count(self): return self.token_counter.total_llm_token_count @property def prompt(self): return self.token_counter.llm_token_counts[-1].prompt @property def completion(self): return self.token_counter.llm_token_counts[-1].completion
from myutils import TokenCost, utils_load_index from llama_index import ServiceContext, Prompt model_name = "text-davinci-003" token_cost = TokenCost(model_name, verbose=False) service_context = ServiceContext.from_defaults( callback_manager=token_cost.callback_manager ) index = utils_load_index("indices/vector_index") # also track prompt, completion, and total LLM tokens, in addition to embeddings PROMPT_TMPL_STR = ( "Given this context information --> {context_str} <-- \n\n" "and no prior knowledge, " "answer the question: {query_str}. The response should be formatted as a list of bullet points. Adhere to these guidelines:\n" "- bullet points start on new lines\n" "- each bullet point includes a fact and the article number where the fact is discussed\n" "- the text should be comprehensible to a high school student\n" ) QA_TEMPLATE = Prompt(PROMPT_TMPL_STR) response = index.as_query_engine( verbose=True, service_context=service_context, text_qa_template=QA_TEMPLATE ).query("What are the overtime policies?") print( f""" Embedding Tokens: {token_cost.embedding_token_count} LLM Prompt Tokens: {token_cost.prompt_token_count} LLM Completion Tokens: {token_cost.completion_token_count} Total LLM Token Count: {token_cost.total_token_count} {'*' * 50} Prompt: {token_cost.prompt} {'*' * 50} Completion: {token_cost.completion} """ )
{ "openai_LLMs": { "text-davinci-003":{"prompt":0.00002, "completion": 0.00002}, "gpt4": {"prompt":0.00003, "completion": 0.00006 }, "gpt-4-32k": {"prompt":0.00006, "completion": 0.00012}, "gpt-3.5-turbo": {"prompt":0.0000015 , "completion": 0.000002 }, "gpt-3.5-16K" : {"prompt":0.000003 , "completion": 0.000004 } } }
def calculate_cost(model_name, num_prompt_tokens, num_completion_tokens): """Calculate the total cost for a specified model based on the number of prompt and completion tokens.""" # Load data from JSON file with open("openai_costs.json") as f: data = json.load(f) try: costs = data["openai_LLMs"][model_name] prompt_cost = costs["prompt"] completion_cost = costs["completion"] # Calculate total cost total_cost = (prompt_cost * num_prompt_tokens) + ( completion_cost * num_completion_tokens ) return total_cost except KeyError: return "Model not found in data."
from tiktoken.model import MODEL_TO_ENCODING # Now MODEL_TO_ENCODING is a dictionary where keys are model names and values are their encodings # We can transform it into a list of dictionaries list_of_dicts = [{"model": k, "encoding": v} for k, v in MODEL_TO_ENCODING.items()] print(list_of_dicts)