CrateDB: Vector Store -- make _euclidean_relevance_score_fn identity f.

We don't need anything on top of it, ie we don't need this function and instead should use value from CrateDB as is. Similarity is already in the (0,1] interval and dividing by math.sqrt(2) won't normalize it but return wrong result, for example 1 will become 0.714.
2024-11-13 19:10:52 +00:00 · 2024-11-11 05:49:07 +01:00 · 2024-11-11 05:49:07 +01:00 · 1ee02dd76c
commit 1ee02dd76c
parent 0561dcc754
2 changed files with 5 additions and 6 deletions
--- a/libs/community/langchain_community/vectorstores/cratedb/base.py
+++ b/libs/community/langchain_community/vectorstores/cratedb/base.py
@ -1,7 +1,6 @@
 from __future__ import annotations

 import enum
-import math
 from typing import (
    Any,
    Callable,
@ -466,10 +465,10 @@ class CrateDBVectorStore(PGVector):
        #  others are not!)
        # - embedding dimensionality
        # - etc.
-        # This function converts the euclidean norm of normalized embeddings
+        # This function converts the Euclidean norm of normalized embeddings
        # (0 is most similar, sqrt(2) most dissimilar)
        # to a similarity function (0 to 1)

        # Original:
        # return 1.0 - distance / math.sqrt(2)
-        return similarity / math.sqrt(2)
+        return similarity
--- a/libs/community/tests/integration_tests/vectorstores/test_cratedb.py
+++ b/libs/community/tests/integration_tests/vectorstores/test_cratedb.py
@ -470,9 +470,9 @@ def test_cratedb_relevance_score() -> None:
    output = docsearch.similarity_search_with_relevance_scores("foo", k=3)
    # Original score values: 1.0, 0.9996744261675065, 0.9986996093328621
    assert output == [
-        (Document(page_content="foo", metadata={"page": "0"}), 0.7071067811865475),
-        (Document(page_content="bar", metadata={"page": "1"}), 0.35355339059327373),
-        (Document(page_content="baz", metadata={"page": "2"}), 0.1414213562373095),
+        (Document(page_content="foo", metadata={"page": "0"}), 1.0),
+        (Document(page_content="bar", metadata={"page": "1"}), 0.5),
+        (Document(page_content="baz", metadata={"page": "2"}), 0.2),
    ]