return json
. I use the {context_str} and {query_str}
correctly I think, they are being included in the prompt output that goes over the wire.{'score': 0.85932', 'answer':'the answer'}
_call
method returns anything other than a string, I get an error in langchain_core/language_models/llms.py{"foo":{}, "bar":{}}
in a prompt?class CustomLLM(LLM): model_name = "facebook/opt-iml-max-30b" pipeline = pipeline("text-generation", model=model_name, device="cuda:0", model_kwargs={"torch_dtype":torch.bfloat16}) def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str: prompt_length = len(prompt) response = self.pipeline(prompt, max_new_tokens=num_output)[0]["generated_text"] # only return newly generated tokens return response[prompt_length:] @property def _identifying_params(self) -> Mapping[str, Any]: return {"name_of_model": self.model_name} @property def _llm_type(self) -> str: return "custom"