Thanks for the help, it works now. I could work out a very basic solution, this will do it until someone with better skills comes along. For those in need, here is the code.
- Define new query function, and customise the metadata for your needs.
def modified_query(
self,
query: VectorStoreQuery,
**kwargs: Any,
) -> VectorStoreQueryResult:
"""Query index for top k most similar nodes."""
if query.filters is not None:
if "where" in kwargs:
raise ValueError(
"Cannot specify filter via both query and kwargs. "
"Use kwargs only for lancedb specific items that are "
"not supported via the generic query interface."
)
where = _to_lance_filter(query.filters)
else:
where = kwargs.pop("where", None)
table = self.connection.open_table(self.table_name)
lance_query = (
table.search(query.query_embedding)
.limit(query.similarity_top_k)
.where(where)
.nprobes(self.nprobes)
)
if self.refine_factor is not None:
lance_query.refine_factor(self.refine_factor)
results = lance_query.to
df() nodes = [] for , item in results.iterrows():
node = TextNode(
text=item.text,
id_=item.id,
relationships={
NodeRelationship.SOURCE: RelatedNodeInfo(node_id=item.doc_id),
},
metadata={"file_path" : item.file_path} #CUSTOMIZE THIS BASED ON YOUR PREFERENCE
)
nodes.append(node)
return VectorStoreQueryResult(
nodes=nodes,
similarities=_to_llama_similarities(results),
ids=results["id"].tolist()
)
- Monkey patch: LanceDBVectorStore.query = modified_query