from injector import inject, singleton from llama_index import MockEmbedding from llama_index.embeddings.base import BaseEmbedding from private_gpt.paths import models_cache_path from private_gpt.settings.settings import settings from torch.nn.parallel import DataParallel from torch.nn.parallel import DistributedDataParallel @singleton class EmbeddingComponent: embedding_model: BaseEmbedding @inject def __init__(self) -> None: match settings.llm.mode: case "local": from llama_index.embeddings import HuggingFaceEmbedding embedding_model = HuggingFaceEmbedding( model_name=settings.local.embedding_hf_model_name, cache_folder=str(models_cache_path), embed_batch_size = 20, ) self.embedding_model = DataParallel(embedding_model) case "sagemaker": from private_gpt.components.embedding.custom.sagemaker import ( SagemakerEmbedding, ) self.embedding_model = SagemakerEmbedding( endpoint_name=settings.sagemaker.embedding_endpoint_name, )
File "/home/bennison/Documents/yavar/poc/privateGPT/private_gpt/components/embedding/embedding_component.py", line 25, in __init__ self.embedding_model = DataParallel(embedding_model) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/home/bennison/.cache/pypoetry/virtualenvs/private-gpt-_Dc3_tu1-py3.11/lib/python3.11/site-packages/torch/nn/parallel/data_parallel.py", line 148, in __init__ self.module.to(self.src_device_obj) ^^^^^^^^^^^^^^ AttributeError: 'HuggingFaceEmbedding' object has no attribute 'to' make: *** [Makefile:36: run] Error 1
HuggingFaceEmbedding
is not a pytorch model, it's a wrapper around a modelAutoModel
and put it in this class you've createdHuggingFaceEmbedding(model=model)
?model = AutoModel.from_pretrained( # BAAI/bge-small-en settings.local.embedding_hf_model_name, cache_dir=models_cache_path ) self.embedding_model = HuggingFaceEmbedding( model=model, )
File "/home/bennison/.cache/pypoetry/virtualenvs/private-gpt-_Dc3_tu1-py3.11/lib/python3.11/site-packages/llama_index/embeddings/huggingface.py", line 98, in __init__ super().__init__( File "/home/bennison/.cache/pypoetry/virtualenvs/private-gpt-_Dc3_tu1-py3.11/lib/python3.11/site-packages/pydantic/v1/main.py", line 341, in __init__ raise validation_error pydantic.v1.error_wrappers.ValidationError: 1 validation error for HuggingFaceEmbedding model_name none is not an allowed value (type=type_error.none.not_allowed) make: *** [Makefile:36: run] Error 1
model = AutoModel.from_pretrained( # BAAI/bge-small-en settings.local.embedding_hf_model_name, cache_dir=models_cache_path ) self.embedding_model = HuggingFaceEmbedding( model=model, )
self.embedding_model = HuggingFaceEmbedding( model=model, device="cuda:0" )
cuda:0
and another GPU on cuda:1
torch.nn.parallel
stuff, I am less knowleagble about. It not using any GPU is likely related to this not specifying any GPU?case "local": from llama_index.embeddings import HuggingFaceEmbedding embedding_model = HuggingFaceEmbedding( model_name=settings.local.embedding_hf_model_name, cache_folder=str(models_cache_path), embed_batch_size = 20, ) self.embedding_model = DataParallel(embedding_model)