gpt2
from HF) and getting this error on query: Asking to pad but the tokenizer does not have a padding token
model_name = "gpt2" tokenizer = AutoTokenizer.from_pretrained( pretrained_model_name_or_path=model_name) tokenizer.pad_token = tokenizer.eos_token print(f"Tokenizer settings: {tokenizer}") prompt_helper = PromptHelper(max_input_size=max_input_size,num_output=num_output, max_chunk_overlap=max_chunk_overlap,chunk_size_limit=200, tokenizer=tokenizer) class CustomLLM(LLM): pipeline = pipeline( task="text-generation", model=model_name, device="cuda:0", ) def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str: prompt_length = len(prompt) response = self.pipeline(prompt, max_new_tokens=num_output)[0]["generated_text"] # only return newly generated tokens return response[prompt_length:] @property def _identifying_params(self) -> Mapping[str, Any]: return {"name_of_model": model_name} @property def _llm_type(self) -> str: return "custom" def setup_index(service_context: ServiceContext | None) -> BaseGPTIndex: INDEX_PATH = "DATA_PATH" if os.path.exists(INDEX_PATH): return GPTSimpleVectorIndex.load_from_disk(save_path=INDEX_PATH, service_context=service_context) PandasCSVReader = download_loader("PandasCSVReader") loader = PandasCSVReader() documents: List[Document] = loader.load_data( file=Path('./data/articles.csv')) index: BaseGPTIndex = GPTSimpleVectorIndex.from_documents( documents=documents, service_context=service_context) index.save_to_disk(INDEX_PATH) return index llm_predictor = LLMPredictor(llm=CustomLLM()) embed_model = LangchainEmbedding(HuggingFaceEmbeddings(model_name=model_name), tokenizer=tokenizer) service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor,prompt_helper=prompt_helper,embed_model=embed_model) index = setup_index(service_context)
WARNING:sentence_transformers.SentenceTransformer:No sentence-transformers model found with name /home/ubuntu/.cache/torch/sentence_transformers/gpt2. Creating a new one with MEAN pooling
No model was supplied, defaulted to gpt2 and revision 6c0e608 (https://huggingface.co/gpt2). Using a pipeline without specifying a model name and revision in production is not recommended. INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: sentence-transformers/all-mpnet-base-v2
../aten/src/ATen/native/cuda/Indexing.cu:1146: indexSelectLargeIndex: block: [8,0,0], thread: [95,0,0] Assertion `srcIndex < srcSelectDimSize` failed .... RuntimeError: CUDA error: device-side assert triggered CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect. For debugging consider passing CUDA_LAUNCH_BLOCKING=1. Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.