@llm_completion_callback() def complete(self, prompt: str, **kwargs: Any) -> CompletionResponse: prompt_length = len(prompt) response = pipeline(prompt, max_new_tokens=num_output)[0]["generated_text"] # only return newly generated tokens text = response[prompt_length:] return CompletionResponse(text=text)