mirror of https://github.com/hwchase17/langchain
Harrison/modelscope (#5156)
Co-authored-by: thomas-yanxin <yx20001210@163.com> Co-authored-by: Dev 2049 <dev.dev2049@gmail.com>pull/5050/head
parent
2d5588c5f0
commit
11c26ebb55
@ -0,0 +1,20 @@
|
||||
# ModelScope
|
||||
|
||||
This page covers how to use the modelscope ecosystem within LangChain.
|
||||
It is broken into two parts: installation and setup, and then references to specific modelscope wrappers.
|
||||
|
||||
## Installation and Setup
|
||||
|
||||
* Install the Python SDK with `pip install modelscope`
|
||||
|
||||
## Wrappers
|
||||
|
||||
### Embeddings
|
||||
|
||||
There exists a modelscope Embeddings wrapper, which you can access with
|
||||
|
||||
```python
|
||||
from langchain.embeddings import ModelScopeEmbeddings
|
||||
```
|
||||
|
||||
For a more detailed walkthrough of this, see [this notebook](../modules/models/text_embedding/examples/modelscope_hub.ipynb)
|
@ -0,0 +1,82 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# ModelScope\n",
|
||||
"\n",
|
||||
"Let's load the ModelScope Embedding class."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.embeddings import ModelScopeEmbeddings"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"model_id = \"damo/nlp_corom_sentence-embedding_english-base\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"embeddings = ModelScopeEmbeddings(model_id=model_id)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"text = \"This is a test document.\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"query_result = embeddings.embed_query(text)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"doc_results = embeddings.embed_documents([\"foo\"])"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "chatgpt",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python",
|
||||
"version": "3.9.15"
|
||||
},
|
||||
"orig_nbformat": 4
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
@ -0,0 +1,72 @@
|
||||
"""Wrapper around ModelScopeHub embedding models."""
|
||||
from typing import Any, List
|
||||
|
||||
from pydantic import BaseModel, Extra
|
||||
|
||||
from langchain.embeddings.base import Embeddings
|
||||
|
||||
|
||||
class ModelScopeEmbeddings(BaseModel, Embeddings):
|
||||
"""Wrapper around modelscope_hub embedding models.
|
||||
|
||||
To use, you should have the ``modelscope`` python package installed.
|
||||
|
||||
Example:
|
||||
.. code-block:: python
|
||||
|
||||
from langchain.embeddings import ModelScopeEmbeddings
|
||||
model_id = "damo/nlp_corom_sentence-embedding_english-base"
|
||||
embed = ModelScopeEmbeddings(model_id=model_id)
|
||||
"""
|
||||
|
||||
embed: Any
|
||||
model_id: str = "damo/nlp_corom_sentence-embedding_english-base"
|
||||
"""Model name to use."""
|
||||
|
||||
def __init__(self, **kwargs: Any):
|
||||
"""Initialize the modelscope"""
|
||||
super().__init__(**kwargs)
|
||||
try:
|
||||
from modelscope.pipelines import pipeline
|
||||
from modelscope.utils.constant import Tasks
|
||||
|
||||
self.embed = pipeline(Tasks.sentence_embedding, model=self.model_id)
|
||||
|
||||
except ImportError as e:
|
||||
raise ImportError(
|
||||
"Could not import some python packages."
|
||||
"Please install it with `pip install modelscope`."
|
||||
) from e
|
||||
|
||||
class Config:
|
||||
"""Configuration for this pydantic object."""
|
||||
|
||||
extra = Extra.forbid
|
||||
|
||||
def embed_documents(self, texts: List[str]) -> List[List[float]]:
|
||||
"""Compute doc embeddings using a modelscope embedding model.
|
||||
|
||||
Args:
|
||||
texts: The list of texts to embed.
|
||||
|
||||
Returns:
|
||||
List of embeddings, one for each text.
|
||||
"""
|
||||
texts = list(map(lambda x: x.replace("\n", " "), texts))
|
||||
inputs = {"source_sentence": texts}
|
||||
embeddings = self.embed(input=inputs)["text_embedding"]
|
||||
return embeddings.tolist()
|
||||
|
||||
def embed_query(self, text: str) -> List[float]:
|
||||
"""Compute query embeddings using a modelscope embedding model.
|
||||
|
||||
Args:
|
||||
text: The text to embed.
|
||||
|
||||
Returns:
|
||||
Embeddings for the text.
|
||||
"""
|
||||
text = text.replace("\n", " ")
|
||||
inputs = {"source_sentence": [text]}
|
||||
embedding = self.embed(input=inputs)["text_embedding"][0]
|
||||
return embedding.tolist()
|
@ -0,0 +1,19 @@
|
||||
"""Test modelscope embeddings."""
|
||||
from langchain.embeddings.modelscope_hub import ModelScopeEmbeddings
|
||||
|
||||
|
||||
def test_modelscope_embedding_documents() -> None:
|
||||
"""Test modelscope embeddings for documents."""
|
||||
documents = ["foo bar"]
|
||||
embedding = ModelScopeEmbeddings()
|
||||
output = embedding.embed_documents(documents)
|
||||
assert len(output) == 2
|
||||
assert len(output[0]) == 512
|
||||
|
||||
|
||||
def test_modelscope_embedding_query() -> None:
|
||||
"""Test modelscope embeddings for query."""
|
||||
document = "foo bar"
|
||||
embedding = ModelScopeEmbeddings()
|
||||
output = embedding.embed_query(document)
|
||||
assert len(output) == 512
|
Loading…
Reference in New Issue