This is roughly our code (simplified a bit). When I run this and then print the token_counter values for total llm token count and total embedding token count they're both 0. Also I never see any stack traces printed from the LlamaDebugHandler. I've tried it both ways where I pass in the callback manager and where I don't and the result is the same in either case. Could it be because we're using AzureOpenAI and AzureOpenAIEmbeddings rather than directly using OpenAI?
token_counter = TokenCountingHandler(
tokenizer=tiktoken.encoding_for_model("gpt-3.5-turbo").encode,
verbose=True,
)
llama_debug = LlamaDebugHandler(print_trace_on_end=True)
callback_manager = Settings.callback_manager
callback_manager.add_handler(token_counter)
callback_manager.add_handler(llama_debug)
def get_worker_llm():
return AzureOpenAI(model="gpt-3.5-turbo", ...)
def get_embed_model():
return AzureOpenAIEmbedding(...)
def run_ingestion_transforms_on_documents(documents: List[Document]) -> List[BaseNode]:
node_parser = SentenceSplitter(
separator=" ",
chunk_size=1024,
chunk_overlap=200,
# callback_manager=callback_manager,
)
parsed_nodes = node_parser.get_nodes_from_documents(documents)
extractors: List[BaseExtractor] = [
SummaryExtractor(summaries=["prev", "self", "next"], llm=get_worker_llm()),
QuestionsAnsweredExtractor(questions=3, llm=get_worker_llm()),
]
for extractor in extractors:
parsed_nodes = extractor(
parsed_nodes,
# callback_manager=callback_manager
)
embed_model = get_embed_model()
embedded_nodes = embed_model(
parsed_nodes,
# callback_manager=callback_manager
)
return embedded_nodes
def load_documents(path: str):
documents = SimpleDirectoryReader(path).load_data()
return documents
def ingest(path: str):
documents = load_documents(path)
run_ingestion_transforms_on_documents(documents)