CrateDB: Vector Store -- make _euclidean_relevance_score_fn identity f.

We don't need anything on top of it, ie we don't need this function and
instead should use value from CrateDB as is.

Similarity is already in the (0,1] interval and dividing by math.sqrt(2)
won't normalize it but return wrong result, for example 1 will become
0.714.
This commit is contained in:
Andreas Motl 2024-11-11 05:49:07 +01:00
parent 0561dcc754
commit 1ee02dd76c
2 changed files with 5 additions and 6 deletions

View File

@ -1,7 +1,6 @@
from __future__ import annotations
import enum
import math
from typing import (
Any,
Callable,
@ -466,10 +465,10 @@ class CrateDBVectorStore(PGVector):
# others are not!)
# - embedding dimensionality
# - etc.
# This function converts the euclidean norm of normalized embeddings
# This function converts the Euclidean norm of normalized embeddings
# (0 is most similar, sqrt(2) most dissimilar)
# to a similarity function (0 to 1)
# Original:
# return 1.0 - distance / math.sqrt(2)
return similarity / math.sqrt(2)
return similarity

View File

@ -470,9 +470,9 @@ def test_cratedb_relevance_score() -> None:
output = docsearch.similarity_search_with_relevance_scores("foo", k=3)
# Original score values: 1.0, 0.9996744261675065, 0.9986996093328621
assert output == [
(Document(page_content="foo", metadata={"page": "0"}), 0.7071067811865475),
(Document(page_content="bar", metadata={"page": "1"}), 0.35355339059327373),
(Document(page_content="baz", metadata={"page": "2"}), 0.1414213562373095),
(Document(page_content="foo", metadata={"page": "0"}), 1.0),
(Document(page_content="bar", metadata={"page": "1"}), 0.5),
(Document(page_content="baz", metadata={"page": "2"}), 0.2),
]