How to use a custom embed model for Supabase Vector Store (the default one is OpenAI’s text-embedding-ada-002, having embedding dimension 1536).
I want to use the
sentence-transformers/all-mpnet-base-v2, having embedding dimension 768.
code:
def get_query_engine_supabase(llm, filename):
# use Huggingface embeddings
print("-----LOGGING----- start query_engine - SUPABASE")
embed_model = HuggingFaceEmbeddings(
model_name="sentence-transformers/all-mpnet-base-v2"
)
# create a service context
service_context = ServiceContext.from_defaults(
llm=llm,
embed_model=embed_model,
)
# set_global_service_context(service_context)
# # load documents
documents = SimpleDirectoryReader(
input_files = [f"./docs/{filename}"]
).load_data()
DB_CONNECTION = "postgresql://<user>:<password>@<host>:<port>/<db_name>" # I HAVE THESE SET
print("-----LOGGING----- initializing vector_store")
vector_store = SupabaseVectorStore(
postgres_connection_string=DB_CONNECTION,
collection_name='reviewIndexes',
dimension='768',
)
# TRIED dimension=768 ABOVE, NO LUCK
print("-----LOGGING----- initialized vector_store")
storage_context = StorageContext.from_defaults(vector_store=vector_store)
print("-----LOGGING----- initialized storage_context") # IT DOES EXECUTE TILL HERE
index = VectorStoreIndex.from_documents(documents, storage_context=storage_context, service_context=service_context)
print("-----LOGGING----- generated index:",index)
# set up query engine
query_engine = index.as_query_engine(
streaming=True,
similarity_top_k=1
)
return query_engine
ERROR:
raise ValueError('expected %d dimensions, not %d' % (dim, len(value)))
sqlalchemy.exc.StatementError: (builtins.ValueError) expected 1536 dimensions, not 768
[SQL: INSERT INTO vecs."reviewIndexes" (id, vec, metadata) VALUES (%(id_m0)s, %(vec_m0)s,