Fixes scope of query Session in PGVector (#5194)

`vectorstore.PGVector`: The transactional boundary should be increased
to cover the query itself

Currently, within the `similarity_search_with_score_by_vector` the
transactional boundary (created via the `Session` call) does not include
the select query being made.

This can result in un-intended consequences when interacting with the
PGVector instance methods directly


---------

Co-authored-by: Dev 2049 <dev.dev2049@gmail.com>
searx_updates
Matt Wells 1 year ago committed by GitHub
parent 52714cedd4
commit c173bf1c62
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -291,40 +291,43 @@ class PGVector(VectorStore):
if not collection: if not collection:
raise ValueError("Collection not found") raise ValueError("Collection not found")
filter_by = EmbeddingStore.collection_id == collection.uuid filter_by = EmbeddingStore.collection_id == collection.uuid
if filter is not None: if filter is not None:
filter_clauses = [] filter_clauses = []
for key, value in filter.items(): for key, value in filter.items():
IN = "in" IN = "in"
if isinstance(value, dict) and IN in map(str.lower, value): if isinstance(value, dict) and IN in map(str.lower, value):
value_case_insensitive = {k.lower(): v for k, v in value.items()} value_case_insensitive = {
filter_by_metadata = EmbeddingStore.cmetadata[key].astext.in_( k.lower(): v for k, v in value.items()
value_case_insensitive[IN] }
) filter_by_metadata = EmbeddingStore.cmetadata[key].astext.in_(
filter_clauses.append(filter_by_metadata) value_case_insensitive[IN]
else: )
filter_by_metadata = EmbeddingStore.cmetadata[key].astext == str( filter_clauses.append(filter_by_metadata)
value else:
) filter_by_metadata = EmbeddingStore.cmetadata[
filter_clauses.append(filter_by_metadata) key
].astext == str(value)
filter_by = sqlalchemy.and_(filter_by, *filter_clauses) filter_clauses.append(filter_by_metadata)
results: List[QueryResult] = ( filter_by = sqlalchemy.and_(filter_by, *filter_clauses)
session.query(
EmbeddingStore, results: List[QueryResult] = (
self.distance_strategy(embedding).label("distance"), # type: ignore session.query(
) EmbeddingStore,
.filter(filter_by) self.distance_strategy(embedding).label("distance"), # type: ignore
.order_by(sqlalchemy.asc("distance")) )
.join( .filter(filter_by)
CollectionStore, .order_by(sqlalchemy.asc("distance"))
EmbeddingStore.collection_id == CollectionStore.uuid, .join(
CollectionStore,
EmbeddingStore.collection_id == CollectionStore.uuid,
)
.limit(k)
.all()
) )
.limit(k)
.all()
)
docs = [ docs = [
( (
Document( Document(

Loading…
Cancel
Save