` Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation. /usr/local/lib/python3.9/dist-packages/transformers/generation/utils.py:1313: UserWarning: Using `max_length`'s default (300) to control the generation length. This behaviour is deprecated and will be removed from the config in v5 of Transformers -- we recommend using `max_new_tokens` to control the maximum length of the generation. warnings.warn( Input length of input_ids is 3635, but `max_length` is set to 300. This can lead to unexpected behavior. You should consider increasing `max_new_tokens`. --------------------------------------------------------------------------- IndexError Traceback (most recent call last) <ipython-input-53-0f83071d088d> in <cell line: 1>() ----> 1 response = index.query("What did the author do growing up?") 2 print(response) ---------------------------42 frames------------------------------ /usr/local/lib/python3.9/dist-packages/torch/nn/functional.py in embedding(input, weight, padding_idx, max_norm, norm_type, scale_grad_by_freq, sparse) 2208 # remove once script supports set_grad_enabled 2209 _no_grad_embedding_renorm_(weight, input, max_norm, norm_type) -> 2210 return torch.embedding(weight, input, padding_idx, scale_grad_by_freq, sparse) 2211 2212 IndexError: index out of range in self
from llama_index import SimpleDirectoryReader, LLMPredictor, ServiceContext, GPTListIndex from langchain.llms.huggingface_pipeline import HuggingFacePipeline documents = SimpleDirectoryReader('./data/documents/').load_data() model_id = 'cerebras/Cerebras-GPT-111M' llm = HuggingFacePipeline.from_model_id( model_id, task='text-generation', model_kwargs={ 'max_length': 300, 'do_sample': False, }, verbose=True) llm_predictor = LLMPredictor(llm=llm) service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor) index = GPTListIndex.from_documents( documents, service_context=service_context ) response = index.query('What did the author do growing up?') print(response)
# define prompt helper # set maximum input size max_input_size = 300 # set number of output tokens num_output = 50 # set maximum chunk overlap max_chunk_overlap = 10 # max size of each chunk chunk_size_limit = 100 prompt_helper = PromptHelper(max_input_size, num_output, max_chunk_overlap, chunk_size_limit=chunk_size_limit) service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper)