langchain/libs/community/langchain_community/embeddings/modelscope_hub.py

from typing import Any, List, Optional

from langchain_core.embeddings import Embeddings
from langchain_core.pydantic_v1 import BaseModel, Extra


class ModelScopeEmbeddings(BaseModel, Embeddings):
    """ModelScopeHub embedding models.

    To use, you should have the ``modelscope`` python package installed.

    Example:
        .. code-block:: python

            from langchain_community.embeddings import ModelScopeEmbeddings
            model_id = "damo/nlp_corom_sentence-embedding_english-base"
            embed = ModelScopeEmbeddings(model_id=model_id, model_revision="v1.0.0")
    """

    embed: Any
    model_id: str = "damo/nlp_corom_sentence-embedding_english-base"
    """Model name to use."""
    model_revision: Optional[str] = None

    def __init__(self, **kwargs: Any):
        """Initialize the modelscope"""
        super().__init__(**kwargs)
        try:
            from modelscope.pipelines import pipeline
            from modelscope.utils.constant import Tasks
        except ImportError as e:
            raise ImportError(
                "Could not import some python packages."
                "Please install it with `pip install modelscope`."
            ) from e
        self.embed = pipeline(
            Tasks.sentence_embedding,
            model=self.model_id,
            model_revision=self.model_revision,
        )

    class Config:
        """Configuration for this pydantic object."""

        extra = Extra.forbid

    def embed_documents(self, texts: List[str]) -> List[List[float]]:
        """Compute doc embeddings using a modelscope embedding model.

        Args:
            texts: The list of texts to embed.

        Returns:
            List of embeddings, one for each text.
        """
        texts = list(map(lambda x: x.replace("\n", " "), texts))
        inputs = {"source_sentence": texts}
        embeddings = self.embed(input=inputs)["text_embedding"]
        return embeddings.tolist()

    def embed_query(self, text: str) -> List[float]:
        """Compute query embeddings using a modelscope embedding model.

        Args:
            text: The text to embed.

        Returns:
            Embeddings for the text.
        """
        text = text.replace("\n", " ")
        inputs = {"source_sentence": [text]}
        embedding = self.embed(input=inputs)["text_embedding"][0]
        return embedding.tolist()
community[major], core[patch], langchain[patch], experimental[patch]: Create langchain-community (#14463) Moved the following modules to new package langchain-community in a backwards compatible fashion: ``` mv langchain/langchain/adapters community/langchain_community mv langchain/langchain/callbacks community/langchain_community/callbacks mv langchain/langchain/chat_loaders community/langchain_community mv langchain/langchain/chat_models community/langchain_community mv langchain/langchain/document_loaders community/langchain_community mv langchain/langchain/docstore community/langchain_community mv langchain/langchain/document_transformers community/langchain_community mv langchain/langchain/embeddings community/langchain_community mv langchain/langchain/graphs community/langchain_community mv langchain/langchain/llms community/langchain_community mv langchain/langchain/memory/chat_message_histories community/langchain_community mv langchain/langchain/retrievers community/langchain_community mv langchain/langchain/storage community/langchain_community mv langchain/langchain/tools community/langchain_community mv langchain/langchain/utilities community/langchain_community mv langchain/langchain/vectorstores community/langchain_community mv langchain/langchain/agents/agent_toolkits community/langchain_community mv langchain/langchain/cache.py community/langchain_community mv langchain/langchain/adapters community/langchain_community mv langchain/langchain/callbacks community/langchain_community/callbacks mv langchain/langchain/chat_loaders community/langchain_community mv langchain/langchain/chat_models community/langchain_community mv langchain/langchain/document_loaders community/langchain_community mv langchain/langchain/docstore community/langchain_community mv langchain/langchain/document_transformers community/langchain_community mv langchain/langchain/embeddings community/langchain_community mv langchain/langchain/graphs community/langchain_community mv langchain/langchain/llms community/langchain_community mv langchain/langchain/memory/chat_message_histories community/langchain_community mv langchain/langchain/retrievers community/langchain_community mv langchain/langchain/storage community/langchain_community mv langchain/langchain/tools community/langchain_community mv langchain/langchain/utilities community/langchain_community mv langchain/langchain/vectorstores community/langchain_community mv langchain/langchain/agents/agent_toolkits community/langchain_community mv langchain/langchain/cache.py community/langchain_community ``` Moved the following to core ``` mv langchain/langchain/utils/json_schema.py core/langchain_core/utils mv langchain/langchain/utils/html.py core/langchain_core/utils mv langchain/langchain/utils/strings.py core/langchain_core/utils cat langchain/langchain/utils/env.py >> core/langchain_core/utils/env.py rm langchain/langchain/utils/env.py ``` See .scripts/community_split/script_integrations.sh for all changes 2023-12-11 21:53:30 +00:00			`from typing import Any, List, Optional`

			`from langchain_core.embeddings import Embeddings`
			`from langchain_core.pydantic_v1 import BaseModel, Extra`


			`class ModelScopeEmbeddings(BaseModel, Embeddings):`
			`"""ModelScopeHub embedding models.`

			To use, you should have the ``modelscope`` python package installed.

			`Example:`
			`.. code-block:: python`

			`from langchain_community.embeddings import ModelScopeEmbeddings`
			`model_id = "damo/nlp_corom_sentence-embedding_english-base"`
			`embed = ModelScopeEmbeddings(model_id=model_id, model_revision="v1.0.0")`
			`"""`

			`embed: Any`
			`model_id: str = "damo/nlp_corom_sentence-embedding_english-base"`
			`"""Model name to use."""`
			`model_revision: Optional[str] = None`

			`def __init__(self, **kwargs: Any):`
			`"""Initialize the modelscope"""`
			`super().__init__(**kwargs)`
			`try:`
			`from modelscope.pipelines import pipeline`
			`from modelscope.utils.constant import Tasks`
			`except ImportError as e:`
			`raise ImportError(`
			`"Could not import some python packages."`
			"Please install it with `pip install modelscope`."
			`) from e`
			`self.embed = pipeline(`
			`Tasks.sentence_embedding,`
			`model=self.model_id,`
			`model_revision=self.model_revision,`
			`)`

			`class Config:`
			`"""Configuration for this pydantic object."""`

			`extra = Extra.forbid`

			`def embed_documents(self, texts: List[str]) -> List[List[float]]:`
			`"""Compute doc embeddings using a modelscope embedding model.`

			`Args:`
			`texts: The list of texts to embed.`

			`Returns:`
			`List of embeddings, one for each text.`
			`"""`
			`texts = list(map(lambda x: x.replace("\n", " "), texts))`
			`inputs = {"source_sentence": texts}`
			`embeddings = self.embed(input=inputs)["text_embedding"]`
			`return embeddings.tolist()`

			`def embed_query(self, text: str) -> List[float]:`
			`"""Compute query embeddings using a modelscope embedding model.`

			`Args:`
			`text: The text to embed.`

			`Returns:`
			`Embeddings for the text.`
			`"""`
			`text = text.replace("\n", " ")`
			`inputs = {"source_sentence": [text]}`
			`embedding = self.embed(input=inputs)["text_embedding"][0]`
			`return embedding.tolist()`