Bedrock(model="anthropic.claude-v2")
. When I check the generated prompt using token_counter.llm_token_counts
, I notice that there are two duplicate prompt events. Any one has any idea why we send two back to back same prompt calls to LLM?class QA(): def __init__(self): self.CHUNK_SIZE = 256 self.MODEL_NAME = "gpt-3.5-turbo" self.NUM_OF_PRODUCT_REQUIRED_PAGES = 5 self.SIMILARITY_TOP_K = 2 def get_model(self): token_counter = TokenCountingHandler( tokenizer=tiktoken.encoding_for_model(self.MODEL_NAME).encode ) callback_manager = CallbackManager([token_counter]) llm_predictor = Bedrock(model="anthropic.claude-v2", profile_name='ABC' ) # llm_predictor = OpenAI(temperature=0, model=self.MODEL_NAME) # service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, chunk_size=self.CHUNK_SIZE) service_context = ServiceContext.from_defaults(llm=llm_predictor, chunk_size=self.CHUNK_SIZE, callback_manager=callback_manager) set_global_service_context(service_context) return service_context, token_counter
index.as_query_engine()
?from llama_index.callbacks import TokenCountingHandler, CallbackManager from llama_index import ServiceContext, SimpleDirectoryReader, VectorStoreIndex documents = SimpleDirectoryReader("./docs/examples/data/paul_graham").load_data() service_context = ServiceContext.from_defaults( callback_manager=CallbackManager([TokenCountingHandler()]), chunk_size=256 ) index = VectorStoreIndex.from_documents(documents, service_context=service_context) query_engine = index.as_query_engine(similarity_top_k=2) response = query_engine.query("What is the best way to raise money for a startup?")
token_counter = TokenCountingHandler( tokenizer=tiktoken.encoding_for_model( "gpt-3.5-turbo").encode ) callback_manager = CallbackManager([token_counter]) llm_predictor = Bedrock(model="anthropic.claude-v2") service_context = ServiceContext.from_defaults(llm=llm_predictor, chunk_size=256, callback_manager=callback_manager) set_global_service_context(service_context) pages_text = ['Investors write checks when the idea they hear is compelling, when they are persuaded that the team of founders can realize its vision, and that the opportunity described is real and sufficiently large. When founders are ready to tell this story, they can raise money. And usually when you can raise money, you should.'] documents = [Document(text=t) for t in pages_text] custom_llm_index = VectorStoreIndex.from_documents(documents, service_context=service_context) custom_llm_query_engine = custom_llm_index.as_query_engine(similarity_top_k=2) question = "how to raise money for a startup" response = custom_llm_query_engine.query(question) token_counter.llm_token_counts
Bedrock
LLM class, it only gets hit once