langchain/tests/integration_tests/embeddings/test_dashscope.py
wenmeng zhou bb7ac9edb5
add dashscope text embedding (#5929)
#### What I do
Adding embedding api for
[DashScope](https://help.aliyun.com/product/610100.html), which is the
DAMO Academy's multilingual text unified vector model based on the LLM
base. It caters to multiple mainstream languages worldwide and offers
high-quality vector services, helping developers quickly transform text
data into high-quality vector data. Currently supported languages
include Chinese, English, Spanish, French, Portuguese, Indonesian, and
more.

#### Who can review?

  Models
  - @hwchase17
  - @agola11

---------

Co-authored-by: Harrison Chase <hw.chase.17@gmail.com>
2023-06-11 21:14:20 -07:00

56 lines
1.8 KiB
Python

"""Test dashscope embeddings."""
import numpy as np
from langchain.embeddings.dashscope import DashScopeEmbeddings
def test_dashscope_embedding_documents() -> None:
"""Test dashscope embeddings."""
documents = ["foo bar"]
embedding = DashScopeEmbeddings(model="text-embedding-v1")
output = embedding.embed_documents(documents)
assert len(output) == 1
assert len(output[0]) == 1536
def test_dashscope_embedding_documents_multiple() -> None:
"""Test dashscope embeddings."""
documents = ["foo bar", "bar foo", "foo"]
embedding = DashScopeEmbeddings(model="text-embedding-v1")
output = embedding.embed_documents(documents)
assert len(output) == 3
assert len(output[0]) == 1536
assert len(output[1]) == 1536
assert len(output[2]) == 1536
def test_dashscope_embedding_query() -> None:
"""Test dashscope embeddings."""
document = "foo bar"
embedding = DashScopeEmbeddings(model="text-embedding-v1")
output = embedding.embed_query(document)
assert len(output) == 1536
def test_dashscope_embedding_with_empty_string() -> None:
"""Test dashscope embeddings with empty string."""
import dashscope
document = ["", "abc"]
embedding = DashScopeEmbeddings(model="text-embedding-v1")
output = embedding.embed_documents(document)
assert len(output) == 2
assert len(output[0]) == 1536
expected_output = dashscope.TextEmbedding.call(
input="", model="text-embedding-v1", text_type="document"
).output["embeddings"][0]["embedding"]
assert np.allclose(output[0], expected_output)
assert len(output[1]) == 1536
if __name__ == "__main__":
test_dashscope_embedding_documents()
test_dashscope_embedding_documents_multiple()
test_dashscope_embedding_query()
test_dashscope_embedding_with_empty_string()