for the sake of accuracy, the code should then look like the following:
class OurLLM(CustomLLM):
@property
def metadata(self) -> LLMMetadata:
"""Get LLM metadata."""
return LLMMetadata(
context_window=context_window,
num_output=num_output,
model_name=API_URL # (or 'zephyr-7b-alpha')
)
@llm_completion_callback()
def complete(self, prompt: str, kwargs: Any) -> CompletionResponse:
API_URL = "
https://api-inference.huggingface.co/models/HuggingFaceH4/zephyr-7b-alpha"
headers = {"Authorization": "Bearer xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"}
response = requests.post(API_URL, headers=headers, json=prompt)
return CompletionResponse(text=response.text)
@llm_completion_callback()
def stream_complete(self, prompt: str, kwargs: Any) -> CompletionResponseGen:
raise NotImplementedError()