community[patch]: add possibility to search by vector in OpenSearchVectorSearch (#17878)

- **Description:** implements the missing `similarity_search_by_vector`
function for `OpenSearchVectorSearch`
- **Issue:** N/A
- **Dependencies:** N/A
pull/17902/head
Karim Assi 8 months ago committed by GitHub
parent 144f59b5fe
commit afc1ba0329
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -516,6 +516,15 @@ class OpenSearchVectorSearch(VectorStore):
docs_with_scores = self.similarity_search_with_score(query, k, **kwargs)
return [doc[0] for doc in docs_with_scores]
def similarity_search_by_vector(
self, embedding: List[float], k: int = 4, **kwargs: Any
) -> List[Document]:
"""Return docs most similar to the embedding vector."""
docs_with_scores = self.similarity_search_with_score_by_vector(
embedding, k, **kwargs
)
return [doc[0] for doc in docs_with_scores]
def similarity_search_with_score(
self, query: str, k: int = 4, **kwargs: Any
) -> List[Tuple[Document, float]]:
@ -534,19 +543,43 @@ class OpenSearchVectorSearch(VectorStore):
Optional Args:
same as `similarity_search`
"""
embedding = self.embedding_function.embed_query(query)
return self.similarity_search_with_score_by_vector(embedding, k, **kwargs)
def similarity_search_with_score_by_vector(
self, embedding: List[float], k: int = 4, **kwargs: Any
) -> List[Tuple[Document, float]]:
"""Return docs and it's scores most similar to the embedding vector.
By default, supports Approximate Search.
Also supports Script Scoring and Painless Scripting.
Args:
embedding: Embedding vector to look up documents similar to.
k: Number of Documents to return. Defaults to 4.
Returns:
List of Documents along with its scores most similar to the query.
Optional Args:
same as `similarity_search`
"""
text_field = kwargs.get("text_field", "text")
metadata_field = kwargs.get("metadata_field", "metadata")
hits = self._raw_similarity_search_with_score(query=query, k=k, **kwargs)
hits = self._raw_similarity_search_with_score_by_vector(
embedding=embedding, k=k, **kwargs
)
documents_with_scores = [
(
Document(
page_content=hit["_source"][text_field],
metadata=hit["_source"]
if metadata_field == "*" or metadata_field not in hit["_source"]
else hit["_source"][metadata_field],
metadata=(
hit["_source"]
if metadata_field == "*" or metadata_field not in hit["_source"]
else hit["_source"][metadata_field]
),
),
hit["_score"],
)
@ -554,26 +587,25 @@ class OpenSearchVectorSearch(VectorStore):
]
return documents_with_scores
def _raw_similarity_search_with_score(
self, query: str, k: int = 4, **kwargs: Any
def _raw_similarity_search_with_score_by_vector(
self, embedding: List[float], k: int = 4, **kwargs: Any
) -> List[dict]:
"""Return raw opensearch documents (dict) including vectors,
scores most similar to query.
scores most similar to the embedding vector.
By default, supports Approximate Search.
Also supports Script Scoring and Painless Scripting.
Args:
query: Text to look up documents similar to.
embedding: Embedding vector to look up documents similar to.
k: Number of Documents to return. Defaults to 4.
Returns:
List of dict with its scores most similar to the query.
List of dict with its scores most similar to the embedding.
Optional Args:
same as `similarity_search`
"""
embedding = self.embedding_function.embed_query(query)
search_type = kwargs.get("search_type", "approximate_search")
vector_field = kwargs.get("vector_field", "vector_field")
index_name = kwargs.get("index_name", self.index_name)
@ -702,7 +734,9 @@ class OpenSearchVectorSearch(VectorStore):
embedding = self.embedding_function.embed_query(query)
# Do ANN/KNN search to get top fetch_k results where fetch_k >= k
results = self._raw_similarity_search_with_score(query, fetch_k, **kwargs)
results = self._raw_similarity_search_with_score_by_vector(
embedding, fetch_k, **kwargs
)
embeddings = [result["_source"][vector_field] for result in results]

Loading…
Cancel
Save