RuntimeError: The size of tensor a (2048) must match the size of tensor b (2049) at non-singleton dimension 3
# define prompt helper # set maximum input size max_input_size = 2048 # set number of output tokens num_output = 1500 # set maximum chunk overlap max_chunk_overlap = 20 # Set the chunk size limit chunk_size_limit = 100 prompt_helper = PromptHelper(max_input_size, num_output, max_chunk_overlap, chunk_size_limit=chunk_size_limit)
# Custom LLM Class class CustomLLM(LLM): model_name = "EleutherAI/pythia-70m" pipeline = pipeline(model=model_name, model_kwargs={'pad_token_id': 0}, # torch_dtype=torch.bfloat16, trust_remote_code=True, max_new_tokens=1026, device_map="auto") def _call(self, prompt, stop=None): prompt_length = len(prompt) response = self.pipeline(prompt)[0]['generated_text'] return response @property def _identifying_params(self) -> Mapping[str, Any]: return {"name_of_model": self.model_name} @property def _llm_type(self) -> str: return "custom"
# define prompt helper # set maximum input size max_input_size = 2048 # set number of output tokens num_output = 256 # set maximum chunk overlap max_chunk_overlap = 20 prompt_helper = PromptHelper(max_input_size, num_output, max_chunk_overlap, chunk_size_limit=512)
2023-05-07 18:38:47 Building Attachments Index... --- Hashing /home/gabri/AkoGPT/attachments 2023-05-07 18:38:47 Building Base Knowledge Index... --- Hashing /home/gabri/AkoGPT/base INFO:llama_index.indices.loading:Loading all indices. Querying... Batches: 100%|βββββββββββββββββββββββββββββββββββββββββββββββββββββ| 1/1 [00:00<00:00, 7.93it/s] INFO:llama_index.token_counter.token_counter:> [retrieve] Total LLM token usage: 0 tokens INFO:llama_index.token_counter.token_counter:> [retrieve] Total embedding token usage: 13 tokens INFO:llama_index.token_counter.token_counter:> [get_response] Total LLM token usage: 0 tokens INFO:llama_index.token_counter.token_counter:> [get_response] Total embedding token usage: 0 tokens Query Complete... None
def build_index(prompt): additions = False documents_array = [] parser = SimpleNodeParser() docstore = MongoDocumentStore.from_uri(uri=MONGO_URI) index_store = MongoIndexStore.from_uri(uri=MONGO_URI) storage_context = StorageContext.from_defaults( index_store = index_store, docstore = docstore ) if arg_present('show_index'): for i, v in enumerate(index_store.index_structs()): print(f'\nIndex {i}:\n{v}\n') if arg_present('refresh_documents'): log(f'Refresh documents called', True, False) log(f'Clearing Document Store...', False, True) for d in docstore.docs: storage_context.docstore.delete_document(d) log(f'Clearing Index Store...', False, True) client = pymongo.MongoClient("mongodb://localhost:27017/") db = client["db_docstore"] col = db["index_store/data"] col.drop() client.close() log(f'Removing Hash files...', False, True) os.remove(attachments_hash) os.remove(base_knowledge_hash) # Check if attachments index file exists, if not, build it. log(f'Building Attachments Index...', True, False) if os.path.exists(attachments_hash): if not compare_hashes(attachments_folder, attachments_hash, attachments_hash): storage_context.docstore.add_documents(get_nodes_from_documents_in_folder(attachments_folder)) additions = True else: storage_context.docstore.add_documents(get_nodes_from_documents_in_folder(attachments_folder)) hash_folder(attachments_folder, attachments_hash, True) additions = True
# Base Knowledge Folder Index log(f'Building Base Knowledge Index...', True, False) if os.path.exists(base_knowledge_hash): if not compare_hashes(base_knowledge_folder, base_knowledge_hash, base_knowledge_hash): storage_context.docstore.add_documents(get_nodes_from_documents_in_folder(base_knowledge_folder)) additions = True else: storage_context.docstore.add_documents(get_nodes_from_documents_in_folder(base_knowledge_folder)) hash_folder(base_knowledge_folder, base_knowledge_hash, True) additions = True # build index from folders if additions: for d in docstore.docs: documents_array.append(storage_context.docstore.get_document(d)) index = GPTVectorStoreIndex.from_documents( documents_array, storage_context=storage_context, service_context=service_context ) else: index = load_index_from_storage(storage_context=storage_context, service_context=service_context) return index
def ask_gpt_custom(prompt): index = build_index(prompt) print(f'\n\nQuerying...\n\n') query_engine = index.as_query_engine( verbose=True, service_context=service_context ) response = query_engine.query(prompt) print(f'\n\nQuery Complete...\n\n') print(f'{response}') return f'{response}'
class CustomLLM(LLM): model_name = "EleutherAI/pythia-70m" pipeline = pipeline(model=model_name, model_kwargs={'pad_token_id': 0}, # torch_dtype=torch.bfloat16, trust_remote_code=True, max_new_tokens=256, device_map="auto") def _call(self, prompt, stop=None): prompt_length = len(prompt) response = self.pipeline(prompt)[0]['generated_text'] return response @property def _identifying_params(self) -> Mapping[str, Any]: return {"name_of_model": self.model_name} @property def _llm_type(self) -> str: return "custom" # define prompt helper # set maximum input size max_input_size = 2048 # set number of output tokens num_output = 256 # set maximum chunk overlap max_chunk_overlap = 20 # Set the chunk size limit chunk_size_limit = 512 prompt_helper = PromptHelper(max_input_size, num_output, max_chunk_overlap) # define our LLM llm_predictor = LLMPredictor(llm=CustomLLM()) # build service context embed_model = LangchainEmbedding(HuggingFaceEmbeddings()) service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper, embed_model=embed_model)
query_wrapper_prompt = SimpleInputPrompt("<|USER|>{query_str}<|ASSISTANT|>") llm_predictor = HuggingFaceLLMPredictor( max_input_size=4096, max_new_tokens=256, temperature=0.7, do_sample=False, query_wrapper_prompt=query_wrapper_prompt, tokenizer_name="EleutherAI/pythia-160m", model_name="EleutherAI/pythia-160m", device_map="auto", stopping_ids=[50278, 50279, 50277, 1, 0], tokenizer_kwargs={"max_length": 4096}, ) embed_model = LangchainEmbedding(HuggingFaceEmbeddings()) service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, embed_model=embed_model) index = build_index(prompt) query_engine = index.as_query_engine( retriever_mode="embedding", service_context=service_context ) response = query_engine.query(prompt)