core: deprecate vectorstore relevance scoring

pull/20302/head
Erick Friis 2 months ago
parent 03b247cca1
commit 426ee095cf

@ -18,6 +18,7 @@ and retrieve the data that are 'most similar' to the embedded query.
Embeddings, Document
""" # noqa: E501
from __future__ import annotations
import logging
@ -39,6 +40,7 @@ from typing import (
TypeVar,
)
from langchain_core._api.deprecation import deprecated
from langchain_core.embeddings import Embeddings
from langchain_core.pydantic_v1 import Field, root_validator
from langchain_core.retrievers import BaseRetriever
@ -184,6 +186,11 @@ class VectorStore(ABC):
) -> List[Document]:
"""Return docs most similar to query."""
@deprecated(
since="0.1.43",
removal="0.2",
alternative="euclidean_distance_to_relevance_score",
)
@staticmethod
def _euclidean_relevance_score_fn(distance: float) -> float:
"""Return a similarity score on a scale [0, 1]."""
@ -199,12 +206,22 @@ class VectorStore(ABC):
# to a similarity function (0 to 1)
return 1.0 - distance / math.sqrt(2)
@deprecated(
since="0.1.43",
removal="0.2",
alternative="cosine_to_relevance_score",
)
@staticmethod
def _cosine_relevance_score_fn(distance: float) -> float:
"""Normalize the distance to a score on a scale [0, 1]."""
return 1.0 - distance
@deprecated(
since="0.1.43",
removal="0.2",
message="Max inner product relevance scoring is not recommended.",
)
@staticmethod
def _max_inner_product_relevance_score_fn(distance: float) -> float:
"""Normalize the distance to a score on a scale [0, 1]."""
@ -213,6 +230,15 @@ class VectorStore(ABC):
return -1.0 * distance
@deprecated(
since="0.1.43",
removal="0.2",
message=(
"Relevance scoring is no longer implemented on the VectorStore - "
"please use external helper methods such as "
"euclidean_distance_to_relevance_score instead."
),
)
def _select_relevance_score_fn(self) -> Callable[[float], float]:
"""
The 'correct' relevance function
@ -244,6 +270,16 @@ class VectorStore(ABC):
None, self.similarity_search_with_score, *args, **kwargs
)
@deprecated(
since="0.1.43",
removal="0.2",
message=(
"Relevance scoring is no longer implemented on the VectorStore - "
"please use retrieve documents with similarity_search_with_score"
" and convert scores with external helper methods such as "
"euclidean_distance_to_relevance_score instead."
),
)
def _similarity_search_with_relevance_scores(
self,
query: str,
@ -271,6 +307,16 @@ class VectorStore(ABC):
docs_and_scores = self.similarity_search_with_score(query, k, **kwargs)
return [(doc, relevance_score_fn(score)) for doc, score in docs_and_scores]
@deprecated(
since="0.1.43",
removal="0.2",
message=(
"Relevance scoring is no longer implemented on the VectorStore - "
"please use retrieve documents with similarity_search_with_score"
" and convert scores with external helper methods such as "
"euclidean_distance_to_relevance_score instead."
),
)
async def _asimilarity_search_with_relevance_scores(
self,
query: str,
@ -298,6 +344,16 @@ class VectorStore(ABC):
docs_and_scores = await self.asimilarity_search_with_score(query, k, **kwargs)
return [(doc, relevance_score_fn(score)) for doc, score in docs_and_scores]
@deprecated(
since="0.1.43",
removal="0.2",
message=(
"Relevance scoring is no longer implemented on the VectorStore - "
"please use retrieve documents with similarity_search_with_score"
" and convert scores with external helper methods such as "
"euclidean_distance_to_relevance_score instead."
),
)
def similarity_search_with_relevance_scores(
self,
query: str,
@ -345,6 +401,16 @@ class VectorStore(ABC):
)
return docs_and_similarities
@deprecated(
since="0.1.43",
removal="0.2",
message=(
"Relevance scoring is no longer implemented on the VectorStore - "
"please use retrieve documents with similarity_search_with_score"
" and convert scores with external helper methods such as "
"euclidean_distance_to_relevance_score instead."
),
)
async def asimilarity_search_with_relevance_scores(
self,
query: str,
@ -740,3 +806,27 @@ class VectorStoreRetriever(BaseRetriever):
) -> List[str]:
"""Add documents to vectorstore."""
return await self.vectorstore.aadd_documents(documents, **kwargs)
def euclidean_distance_to_relevance_score(score: float) -> float:
"""
Takes a euclidean distance score and returns a relevance score on a scale [0, 1].
Assumptions:
- The embeddings are normalized length 1 (unit normed)
- Each unit of the vector is [-1, 1]
- input scores therefore are in [0, 2], where 0 is most similar and 2 is
most dissimilar
"""
return 1 - (score / 2)
def cosine_to_relevance_score(score: float) -> float:
"""
Takes a cosine similarity score and returns a relevance score on a scale [0, 1].
Assumptions:
- The input cosine scores are in [-1, 1], where 1 is most similar and -1 is
most dissimilar
"""
return (score + 1) / 2

Loading…
Cancel
Save