add test for embedding distance evaluation (#8285)

Add tests for embedding distance evaluation

  - Description: Add tests for embedding distance evaluation
  - Issue: None
  - Dependencies: None
  - Tag maintainer: @baskaryan
  - Twitter handle: @MlopsJ
pull/8314/head
shibuiwilliam 1 year ago committed by GitHub
parent f3d2fdd54c
commit 6b88fbd9bb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -5,6 +5,7 @@ import pytest
from langchain.evaluation.embedding_distance import (
EmbeddingDistance,
EmbeddingDistanceEvalChain,
PairwiseEmbeddingDistanceEvalChain,
)
@ -44,18 +45,25 @@ def vectors() -> Tuple[np.ndarray, np.ndarray]:
@pytest.fixture
def chain() -> PairwiseEmbeddingDistanceEvalChain:
def pairwise_embedding_distance_eval_chain() -> PairwiseEmbeddingDistanceEvalChain:
"""Create a PairwiseEmbeddingDistanceEvalChain."""
return PairwiseEmbeddingDistanceEvalChain()
@pytest.fixture
def embedding_distance_eval_chain() -> EmbeddingDistanceEvalChain:
"""Create a EmbeddingDistanceEvalChain."""
return EmbeddingDistanceEvalChain()
@pytest.mark.requires("scipy")
def test_cosine_similarity(
chain: PairwiseEmbeddingDistanceEvalChain, vectors: Tuple[np.ndarray, np.ndarray]
def test_pairwise_embedding_distance_eval_chain_cosine_similarity(
pairwise_embedding_distance_eval_chain: PairwiseEmbeddingDistanceEvalChain,
vectors: Tuple[np.ndarray, np.ndarray],
) -> None:
"""Test the cosine similarity."""
chain.distance_metric = EmbeddingDistance.COSINE
result = chain._compute_score(np.array(vectors))
pairwise_embedding_distance_eval_chain.distance_metric = EmbeddingDistance.COSINE
result = pairwise_embedding_distance_eval_chain._compute_score(np.array(vectors))
expected = 1.0 - np.dot(vectors[0], vectors[1]) / (
np.linalg.norm(vectors[0]) * np.linalg.norm(vectors[1])
)
@ -63,61 +71,81 @@ def test_cosine_similarity(
@pytest.mark.requires("scipy")
def test_euclidean_distance(
chain: PairwiseEmbeddingDistanceEvalChain, vectors: Tuple[np.ndarray, np.ndarray]
def test_pairwise_embedding_distance_eval_chain_euclidean_distance(
pairwise_embedding_distance_eval_chain: PairwiseEmbeddingDistanceEvalChain,
vectors: Tuple[np.ndarray, np.ndarray],
) -> None:
"""Test the euclidean distance."""
from scipy.spatial.distance import euclidean
chain.distance_metric = EmbeddingDistance.EUCLIDEAN
result = chain._compute_score(np.array(vectors))
pairwise_embedding_distance_eval_chain.distance_metric = EmbeddingDistance.EUCLIDEAN
result = pairwise_embedding_distance_eval_chain._compute_score(np.array(vectors))
expected = euclidean(*vectors)
assert np.isclose(result, expected)
@pytest.mark.requires("scipy")
def test_manhattan_distance(
chain: PairwiseEmbeddingDistanceEvalChain, vectors: Tuple[np.ndarray, np.ndarray]
def test_pairwise_embedding_distance_eval_chain_manhattan_distance(
pairwise_embedding_distance_eval_chain: PairwiseEmbeddingDistanceEvalChain,
vectors: Tuple[np.ndarray, np.ndarray],
) -> None:
"""Test the manhattan distance."""
from scipy.spatial.distance import cityblock
chain.distance_metric = EmbeddingDistance.MANHATTAN
result = chain._compute_score(np.array(vectors))
pairwise_embedding_distance_eval_chain.distance_metric = EmbeddingDistance.MANHATTAN
result = pairwise_embedding_distance_eval_chain._compute_score(np.array(vectors))
expected = cityblock(*vectors)
assert np.isclose(result, expected)
@pytest.mark.requires("scipy")
def test_chebyshev_distance(
chain: PairwiseEmbeddingDistanceEvalChain, vectors: Tuple[np.ndarray, np.ndarray]
def test_pairwise_embedding_distance_eval_chain_chebyshev_distance(
pairwise_embedding_distance_eval_chain: PairwiseEmbeddingDistanceEvalChain,
vectors: Tuple[np.ndarray, np.ndarray],
) -> None:
"""Test the chebyshev distance."""
from scipy.spatial.distance import chebyshev
chain.distance_metric = EmbeddingDistance.CHEBYSHEV
result = chain._compute_score(np.array(vectors))
pairwise_embedding_distance_eval_chain.distance_metric = EmbeddingDistance.CHEBYSHEV
result = pairwise_embedding_distance_eval_chain._compute_score(np.array(vectors))
expected = chebyshev(*vectors)
assert np.isclose(result, expected)
@pytest.mark.requires("scipy")
def test_hamming_distance(
chain: PairwiseEmbeddingDistanceEvalChain, vectors: Tuple[np.ndarray, np.ndarray]
def test_pairwise_embedding_distance_eval_chain_hamming_distance(
pairwise_embedding_distance_eval_chain: PairwiseEmbeddingDistanceEvalChain,
vectors: Tuple[np.ndarray, np.ndarray],
) -> None:
"""Test the hamming distance."""
from scipy.spatial.distance import hamming
chain.distance_metric = EmbeddingDistance.HAMMING
result = chain._compute_score(np.array(vectors))
pairwise_embedding_distance_eval_chain.distance_metric = EmbeddingDistance.HAMMING
result = pairwise_embedding_distance_eval_chain._compute_score(np.array(vectors))
expected = hamming(*vectors)
assert np.isclose(result, expected)
@pytest.mark.requires("openai", "tiktoken")
def test_embedding_distance(chain: PairwiseEmbeddingDistanceEvalChain) -> None:
def test_pairwise_embedding_distance_eval_chain_embedding_distance(
pairwise_embedding_distance_eval_chain: PairwiseEmbeddingDistanceEvalChain,
) -> None:
"""Test the embedding distance."""
result = chain.evaluate_string_pairs(
result = pairwise_embedding_distance_eval_chain.evaluate_string_pairs(
prediction="A single cat", prediction_b="A single cat"
)
assert np.isclose(result["score"], 0.0)
@pytest.mark.requires("scipy")
def test_embedding_distance_eval_chain(
embedding_distance_eval_chain: EmbeddingDistanceEvalChain,
) -> None:
embedding_distance_eval_chain.distance_metric = EmbeddingDistance.COSINE
prediction = "Hi"
reference = "Hello"
result = embedding_distance_eval_chain.evaluate_strings(
prediction=prediction,
reference=reference,
)
assert result["score"] < 1.0

Loading…
Cancel
Save