from langchain.embeddings import HuggingFaceEmbeddings from llama_index import ServiceContext, set_global_service_context embed_model = HuggingFaceEmbeddings( model_name="sentence-transformers/all-mpnet-base-v2", model_kwargs={"device": "cuda"} ) service_context = ServiceContext.from_defaults(embed_model=embed_model) # optionally set a global service context set_global_service_context(service_context)
BaseEmbedding
class to verify if you can use GPU with local model. I think that will be a good place to start looking