langchain/libs/community/tests/integration_tests/embeddings/test_laser.py
Dan Stambler 69344a0661
community: Add Laser Embedding Integration (#18111)
- **Description:** Added Integration with Meta AI's LASER
Language-Agnostic SEntence Representations embedding library, which
supports multilingual embedding for any of the languages listed here:
https://github.com/facebookresearch/flores/blob/main/flores200/README.md#languages-in-flores-200,
including several low resource languages
- **Dependencies:** laser_encoders
2024-02-26 12:16:37 -08:00

30 lines
1.1 KiB
Python

"""Test LASER embeddings."""
import pytest
from langchain_community.embeddings.laser import LaserEmbeddings
@pytest.mark.filterwarnings("ignore::UserWarning:")
@pytest.mark.parametrize("lang", [None, "lus_Latn", "english"])
def test_laser_embedding_documents(lang: str) -> None:
"""Test laser embeddings for documents.
User warning is returned by LASER library implementation
so will ignore in testing."""
documents = ["hello", "world"]
embedding = LaserEmbeddings(lang=lang)
output = embedding.embed_documents(documents)
assert len(output) == 2 # type: ignore[arg-type]
assert len(output[0]) == 1024 # type: ignore[index]
@pytest.mark.filterwarnings("ignore::UserWarning:")
@pytest.mark.parametrize("lang", [None, "lus_Latn", "english"])
def test_laser_embedding_query(lang: str) -> None:
"""Test laser embeddings for query.
User warning is returned by LASER library implementation
so will ignore in testing."""
query = "hello world"
embedding = LaserEmbeddings(lang=lang)
output = embedding.embed_query(query)
assert len(output) == 1024