mirror of
https://github.com/hwchase17/langchain
synced 2024-11-13 19:10:52 +00:00
CrateDB: Vector Store -- make _euclidean_relevance_score_fn identity f.
We don't need anything on top of it, ie we don't need this function and instead should use value from CrateDB as is. Similarity is already in the (0,1] interval and dividing by math.sqrt(2) won't normalize it but return wrong result, for example 1 will become 0.714.
This commit is contained in:
parent
0561dcc754
commit
1ee02dd76c
@ -1,7 +1,6 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import enum
|
||||
import math
|
||||
from typing import (
|
||||
Any,
|
||||
Callable,
|
||||
@ -466,10 +465,10 @@ class CrateDBVectorStore(PGVector):
|
||||
# others are not!)
|
||||
# - embedding dimensionality
|
||||
# - etc.
|
||||
# This function converts the euclidean norm of normalized embeddings
|
||||
# This function converts the Euclidean norm of normalized embeddings
|
||||
# (0 is most similar, sqrt(2) most dissimilar)
|
||||
# to a similarity function (0 to 1)
|
||||
|
||||
# Original:
|
||||
# return 1.0 - distance / math.sqrt(2)
|
||||
return similarity / math.sqrt(2)
|
||||
return similarity
|
||||
|
@ -470,9 +470,9 @@ def test_cratedb_relevance_score() -> None:
|
||||
output = docsearch.similarity_search_with_relevance_scores("foo", k=3)
|
||||
# Original score values: 1.0, 0.9996744261675065, 0.9986996093328621
|
||||
assert output == [
|
||||
(Document(page_content="foo", metadata={"page": "0"}), 0.7071067811865475),
|
||||
(Document(page_content="bar", metadata={"page": "1"}), 0.35355339059327373),
|
||||
(Document(page_content="baz", metadata={"page": "2"}), 0.1414213562373095),
|
||||
(Document(page_content="foo", metadata={"page": "0"}), 1.0),
|
||||
(Document(page_content="bar", metadata={"page": "1"}), 0.5),
|
||||
(Document(page_content="baz", metadata={"page": "2"}), 0.2),
|
||||
]
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user