mirror of
https://github.com/hwchase17/langchain
synced 2024-11-06 03:20:49 +00:00
69344a0661
- **Description:** Added Integration with Meta AI's LASER Language-Agnostic SEntence Representations embedding library, which supports multilingual embedding for any of the languages listed here: https://github.com/facebookresearch/flores/blob/main/flores200/README.md#languages-in-flores-200, including several low resource languages - **Dependencies:** laser_encoders
30 lines
1.1 KiB
Python
30 lines
1.1 KiB
Python
"""Test LASER embeddings."""
|
|
import pytest
|
|
|
|
from langchain_community.embeddings.laser import LaserEmbeddings
|
|
|
|
|
|
@pytest.mark.filterwarnings("ignore::UserWarning:")
|
|
@pytest.mark.parametrize("lang", [None, "lus_Latn", "english"])
|
|
def test_laser_embedding_documents(lang: str) -> None:
|
|
"""Test laser embeddings for documents.
|
|
User warning is returned by LASER library implementation
|
|
so will ignore in testing."""
|
|
documents = ["hello", "world"]
|
|
embedding = LaserEmbeddings(lang=lang)
|
|
output = embedding.embed_documents(documents)
|
|
assert len(output) == 2 # type: ignore[arg-type]
|
|
assert len(output[0]) == 1024 # type: ignore[index]
|
|
|
|
|
|
@pytest.mark.filterwarnings("ignore::UserWarning:")
|
|
@pytest.mark.parametrize("lang", [None, "lus_Latn", "english"])
|
|
def test_laser_embedding_query(lang: str) -> None:
|
|
"""Test laser embeddings for query.
|
|
User warning is returned by LASER library implementation
|
|
so will ignore in testing."""
|
|
query = "hello world"
|
|
embedding = LaserEmbeddings(lang=lang)
|
|
output = embedding.embed_query(query)
|
|
assert len(output) == 1024
|