CompletionResponseGen
section is not implemented in the docyield CompletionResponseGen(text=generated_text)
but i get error Generator() takes no arguments
yield ChatResponse(..)
objectsfor token in response.response_gen
the token
are empty stringsllm = BamLLM() resp = llm.stream_complete('1 + 1') for delta in resp: print(delta, end='')
streaming_response = self.query_engine.query( prompt ) for token in streaming_response.response_gen: print(token)
@llm_completion_callback() def stream_complete(self, prompt: str, **kwargs: Any) -> CompletionResponseGen: data = { "model_id": os.getenv('MODEL_NAME'), "inputs": [prompt], "parameters": { "temperature": float(os.getenv('TEMPERATURE')), "max_new_tokens": int(os.getenv('MAX_OUTPUT_TOKENS')), "stream": True } } headers = { "Authorization": f"Bearer {os.getenv('GENAI_KEY')}", } response = requests.post(os.getenv('GENAI_API'), json=data, headers=headers, stream=True) if response.status_code == 200: for chunk in response.iter_content(chunk_size=4096): try: if chunk: output_str = chunk.decode('utf-8') if output_str.startswith('data: '): output_str = output_str[len('data: '):] data = json.loads(output_str) generated_text = data['results'][0]['generated_text'] yield CompletionResponse(text=generated_text) except Exception as ex: print(str(ex))
llm = BamLLM() resp = llm.stream_complete('1 + 1') for delta in resp: print(delta, end='')
streaming_response = self.query_engine.query( prompt ) for token in streaming_response.response_gen: print(token)
@llm_completion_callback() def stream_complete(self, prompt: str, **kwargs: Any) -> CompletionResponseGen: data = { "model_id": os.getenv('MODEL_NAME'), "inputs": [prompt], "parameters": { "temperature": float(os.getenv('TEMPERATURE')), "max_new_tokens": int(os.getenv('MAX_OUTPUT_TOKENS')), "stream": True } } headers = { "Authorization": f"Bearer {os.getenv('GENAI_KEY')}", } response = requests.post(os.getenv('GENAI_API'), json=data, headers=headers, stream=True) def gen(): content = "" if response.status_code == 200: for chunk in response.iter_content(chunk_size=4096): try: if chunk: output_str = chunk.decode('utf-8') if output_str.startswith('data: '): output_str = output_str[len('data: '):] data = json.loads(output_str) generated_text = data['results'][0]['generated_text'] content += generated_text yield CompletionResponse(text=content, delta=generated_text) except Exception as ex: print(str(ex)) else: yield CompletionResponse(text="Network Error") return gen()