langchain/libs/community/tests/integration_tests/embeddings/test_dashscope.py

"""Test dashscope embeddings."""
import numpy as np

from langchain_community.embeddings.dashscope import DashScopeEmbeddings


def test_dashscope_embedding_documents() -> None:
    """Test dashscope embeddings."""
    documents = ["foo bar"]
    embedding = DashScopeEmbeddings(model="text-embedding-v1")
    output = embedding.embed_documents(documents)
    assert len(output) == 1
    assert len(output[0]) == 1536


def test_dashscope_embedding_documents_multiple() -> None:
    """Test dashscope embeddings."""
    documents = [
        "foo bar",
        "bar foo",
        "foo",
        "foo0",
        "foo1",
        "foo2",
        "foo3",
        "foo4",
        "foo5",
        "foo6",
        "foo7",
        "foo8",
        "foo9",
        "foo10",
        "foo11",
        "foo12",
        "foo13",
        "foo14",
        "foo15",
        "foo16",
        "foo17",
        "foo18",
        "foo19",
        "foo20",
        "foo21",
        "foo22",
        "foo23",
        "foo24",
    ]
    embedding = DashScopeEmbeddings(model="text-embedding-v1")
    output = embedding.embed_documents(documents)
    assert len(output) == 28
    assert len(output[0]) == 1536
    assert len(output[1]) == 1536
    assert len(output[2]) == 1536


def test_dashscope_embedding_query() -> None:
    """Test dashscope embeddings."""
    document = "foo bar"
    embedding = DashScopeEmbeddings(model="text-embedding-v1")
    output = embedding.embed_query(document)
    assert len(output) == 1536


def test_dashscope_embedding_with_empty_string() -> None:
    """Test dashscope embeddings with empty string."""
    import dashscope

    document = ["", "abc"]
    embedding = DashScopeEmbeddings(model="text-embedding-v1")
    output = embedding.embed_documents(document)
    assert len(output) == 2
    assert len(output[0]) == 1536
    expected_output = dashscope.TextEmbedding.call(
        input="", model="text-embedding-v1", text_type="document"
    ).output["embeddings"][0]["embedding"]
    assert np.allclose(output[0], expected_output)
    assert len(output[1]) == 1536


if __name__ == "__main__":
    test_dashscope_embedding_documents()
    test_dashscope_embedding_documents_multiple()
    test_dashscope_embedding_query()
    test_dashscope_embedding_with_empty_string()
add dashscope text embedding (#5929) #### What I do Adding embedding api for [DashScope](https://help.aliyun.com/product/610100.html), which is the DAMO Academy's multilingual text unified vector model based on the LLM base. It caters to multiple mainstream languages worldwide and offers high-quality vector services, helping developers quickly transform text data into high-quality vector data. Currently supported languages include Chinese, English, Spanish, French, Portuguese, Indonesian, and more. #### Who can review? Models - @hwchase17 - @agola11 --------- Co-authored-by: Harrison Chase <hw.chase.17@gmail.com> 1 year ago			`"""Test dashscope embeddings."""`
			`import numpy as np`

community[major], core[patch], langchain[patch], experimental[patch]: Create langchain-community (#14463) Moved the following modules to new package langchain-community in a backwards compatible fashion: ``` mv langchain/langchain/adapters community/langchain_community mv langchain/langchain/callbacks community/langchain_community/callbacks mv langchain/langchain/chat_loaders community/langchain_community mv langchain/langchain/chat_models community/langchain_community mv langchain/langchain/document_loaders community/langchain_community mv langchain/langchain/docstore community/langchain_community mv langchain/langchain/document_transformers community/langchain_community mv langchain/langchain/embeddings community/langchain_community mv langchain/langchain/graphs community/langchain_community mv langchain/langchain/llms community/langchain_community mv langchain/langchain/memory/chat_message_histories community/langchain_community mv langchain/langchain/retrievers community/langchain_community mv langchain/langchain/storage community/langchain_community mv langchain/langchain/tools community/langchain_community mv langchain/langchain/utilities community/langchain_community mv langchain/langchain/vectorstores community/langchain_community mv langchain/langchain/agents/agent_toolkits community/langchain_community mv langchain/langchain/cache.py community/langchain_community mv langchain/langchain/adapters community/langchain_community mv langchain/langchain/callbacks community/langchain_community/callbacks mv langchain/langchain/chat_loaders community/langchain_community mv langchain/langchain/chat_models community/langchain_community mv langchain/langchain/document_loaders community/langchain_community mv langchain/langchain/docstore community/langchain_community mv langchain/langchain/document_transformers community/langchain_community mv langchain/langchain/embeddings community/langchain_community mv langchain/langchain/graphs community/langchain_community mv langchain/langchain/llms community/langchain_community mv langchain/langchain/memory/chat_message_histories community/langchain_community mv langchain/langchain/retrievers community/langchain_community mv langchain/langchain/storage community/langchain_community mv langchain/langchain/tools community/langchain_community mv langchain/langchain/utilities community/langchain_community mv langchain/langchain/vectorstores community/langchain_community mv langchain/langchain/agents/agent_toolkits community/langchain_community mv langchain/langchain/cache.py community/langchain_community ``` Moved the following to core ``` mv langchain/langchain/utils/json_schema.py core/langchain_core/utils mv langchain/langchain/utils/html.py core/langchain_core/utils mv langchain/langchain/utils/strings.py core/langchain_core/utils cat langchain/langchain/utils/env.py >> core/langchain_core/utils/env.py rm langchain/langchain/utils/env.py ``` See .scripts/community_split/script_integrations.sh for all changes 10 months ago			`from langchain_community.embeddings.dashscope import DashScopeEmbeddings`
add dashscope text embedding (#5929) #### What I do Adding embedding api for [DashScope](https://help.aliyun.com/product/610100.html), which is the DAMO Academy's multilingual text unified vector model based on the LLM base. It caters to multiple mainstream languages worldwide and offers high-quality vector services, helping developers quickly transform text data into high-quality vector data. Currently supported languages include Chinese, English, Spanish, French, Portuguese, Indonesian, and more. #### Who can review? Models - @hwchase17 - @agola11 --------- Co-authored-by: Harrison Chase <hw.chase.17@gmail.com> 1 year ago

			`def test_dashscope_embedding_documents() -> None:`
			`"""Test dashscope embeddings."""`
			`documents = ["foo bar"]`
			`embedding = DashScopeEmbeddings(model="text-embedding-v1")`
			`output = embedding.embed_documents(documents)`
			`assert len(output) == 1`
			`assert len(output[0]) == 1536`


			`def test_dashscope_embedding_documents_multiple() -> None:`
			`"""Test dashscope embeddings."""`
Fixing the Issue with DashScopeEmbeddings Handling More than 25 Rows of Data (#14662) <!-- Thank you for contributing to LangChain! Replace this entire comment with: - Description: a description of the change, - Issue: the issue # it fixes (if applicable), - Dependencies: any dependencies required for this change, - Tag maintainer: for a quicker response, tag the relevant maintainer (see below), - Twitter handle: we announce bigger features on Twitter. If your PR gets announced, and you'd like a mention, we'll gladly shout you out! Please make sure your PR is passing linting and testing before submitting. Run `make format`, `make lint` and `make test` to check this locally. See contribution guidelines for more information on how to write/run tests, lint, etc: https://github.com/langchain-ai/langchain/blob/master/.github/CONTRIBUTING.md If you're adding a new integration, please include: 1. a test for the integration, preferably unit tests that do not rely on network access, 2. an example notebook showing its use. It lives in `docs/extras` directory. If no one reviews your PR within a few days, please @-mention one of @baskaryan, @eyurtsev, @hwchase17. --> This change addresses the issue where DashScopeEmbeddingAPI limits requests to 25 lines of data, and DashScopeEmbeddings did not handle cases with more than 25 lines, leading to errors. I have implemented a fix to manage data exceeding this limit efficiently. --------- Co-authored-by: xuxiang <xuxiang@aliyun.com> 9 months ago			`documents = [`
			`"foo bar",`
			`"bar foo",`
			`"foo",`
			`"foo0",`
			`"foo1",`
			`"foo2",`
			`"foo3",`
			`"foo4",`
			`"foo5",`
			`"foo6",`
			`"foo7",`
			`"foo8",`
			`"foo9",`
			`"foo10",`
			`"foo11",`
			`"foo12",`
			`"foo13",`
			`"foo14",`
			`"foo15",`
			`"foo16",`
			`"foo17",`
			`"foo18",`
			`"foo19",`
			`"foo20",`
			`"foo21",`
			`"foo22",`
			`"foo23",`
			`"foo24",`
			`]`
add dashscope text embedding (#5929) #### What I do Adding embedding api for [DashScope](https://help.aliyun.com/product/610100.html), which is the DAMO Academy's multilingual text unified vector model based on the LLM base. It caters to multiple mainstream languages worldwide and offers high-quality vector services, helping developers quickly transform text data into high-quality vector data. Currently supported languages include Chinese, English, Spanish, French, Portuguese, Indonesian, and more. #### Who can review? Models - @hwchase17 - @agola11 --------- Co-authored-by: Harrison Chase <hw.chase.17@gmail.com> 1 year ago			`embedding = DashScopeEmbeddings(model="text-embedding-v1")`
			`output = embedding.embed_documents(documents)`
Fixing the Issue with DashScopeEmbeddings Handling More than 25 Rows of Data (#14662) <!-- Thank you for contributing to LangChain! Replace this entire comment with: - Description: a description of the change, - Issue: the issue # it fixes (if applicable), - Dependencies: any dependencies required for this change, - Tag maintainer: for a quicker response, tag the relevant maintainer (see below), - Twitter handle: we announce bigger features on Twitter. If your PR gets announced, and you'd like a mention, we'll gladly shout you out! Please make sure your PR is passing linting and testing before submitting. Run `make format`, `make lint` and `make test` to check this locally. See contribution guidelines for more information on how to write/run tests, lint, etc: https://github.com/langchain-ai/langchain/blob/master/.github/CONTRIBUTING.md If you're adding a new integration, please include: 1. a test for the integration, preferably unit tests that do not rely on network access, 2. an example notebook showing its use. It lives in `docs/extras` directory. If no one reviews your PR within a few days, please @-mention one of @baskaryan, @eyurtsev, @hwchase17. --> This change addresses the issue where DashScopeEmbeddingAPI limits requests to 25 lines of data, and DashScopeEmbeddings did not handle cases with more than 25 lines, leading to errors. I have implemented a fix to manage data exceeding this limit efficiently. --------- Co-authored-by: xuxiang <xuxiang@aliyun.com> 9 months ago			`assert len(output) == 28`
add dashscope text embedding (#5929) #### What I do Adding embedding api for [DashScope](https://help.aliyun.com/product/610100.html), which is the DAMO Academy's multilingual text unified vector model based on the LLM base. It caters to multiple mainstream languages worldwide and offers high-quality vector services, helping developers quickly transform text data into high-quality vector data. Currently supported languages include Chinese, English, Spanish, French, Portuguese, Indonesian, and more. #### Who can review? Models - @hwchase17 - @agola11 --------- Co-authored-by: Harrison Chase <hw.chase.17@gmail.com> 1 year ago			`assert len(output[0]) == 1536`
			`assert len(output[1]) == 1536`
			`assert len(output[2]) == 1536`


			`def test_dashscope_embedding_query() -> None:`
			`"""Test dashscope embeddings."""`
			`document = "foo bar"`
			`embedding = DashScopeEmbeddings(model="text-embedding-v1")`
			`output = embedding.embed_query(document)`
			`assert len(output) == 1536`


			`def test_dashscope_embedding_with_empty_string() -> None:`
			`"""Test dashscope embeddings with empty string."""`
			`import dashscope`

			`document = ["", "abc"]`
			`embedding = DashScopeEmbeddings(model="text-embedding-v1")`
			`output = embedding.embed_documents(document)`
			`assert len(output) == 2`
			`assert len(output[0]) == 1536`
			`expected_output = dashscope.TextEmbedding.call(`
			`input="", model="text-embedding-v1", text_type="document"`
			`).output["embeddings"][0]["embedding"]`
			`assert np.allclose(output[0], expected_output)`
			`assert len(output[1]) == 1536`


			`if __name__ == "__main__":`
			`test_dashscope_embedding_documents()`
			`test_dashscope_embedding_documents_multiple()`
			`test_dashscope_embedding_query()`
			`test_dashscope_embedding_with_empty_string()`