peft_model_id = "nomic-ai/gpt4all-lora"
config = PeftConfig.from_pretrained(peft_model_id)
model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path, cache_dir="/Users/../PycharmProjects/jtcPoc/data/model")
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path, cache_dir="/Users/../PycharmProjects/jtcPoc/data/tokenizer")
gpt4all_model = PeftModel.from_pretrained(model, peft_model_id, cache_dir="/Users/../PycharmProjects/jtcPoc/data/model")
JTC_QA_PROMPT = (
"Perform the following instructions: \n"
.... blah blah ....
"Please return only the return_object in desiredObjectFormat JSON format.")
FULL_PROMPT = QuestionAnswerPrompt(JTC_QA_PROMPT)
class Gpt4AllLlm(LLM):
def _call(self, prompt: str, stop: Optional[List[str]] = None, **kwargs) -> str:
print('--- prompt was: ---- ')
print(prompt)
print('***** end prompt **** ')
inputs = tokenizer(prompt, return_tensors="pt", )
input_ids = inputs["input_ids"]
generation_config = GenerationConfig(
temperature=0.1,
top_p=0.95,
repetition_penalty=1.2,
)
generation_output = gpt4all_model.generate(
input_ids=input_ids,
generation_config=generation_config,
output_scores=True,
max_new_tokens=num_output
)
response = tokenizer.decode(generation_output[0], skip_special_tokens=True).strip()
return response[len(prompt):]
@property
def _identifying_params(self) -> Mapping[str, Any]:
return {"name_of_model": "GPT4ALL"}
@property
def _llm_type(self) -> str:
return "custom"