langchain/libs/community/tests/integration_tests/embeddings/test_xinference.py

"""Test Xinference embeddings."""
import time
from typing import AsyncGenerator, Tuple

import pytest_asyncio

from langchain_community.embeddings import XinferenceEmbeddings


@pytest_asyncio.fixture
async def setup() -> AsyncGenerator[Tuple[str, str], None]:
    import xoscar as xo
    from xinference.deploy.supervisor import start_supervisor_components
    from xinference.deploy.utils import create_worker_actor_pool
    from xinference.deploy.worker import start_worker_components

    pool = await create_worker_actor_pool(
        f"test://127.0.0.1:{xo.utils.get_next_port()}"
    )
    print(f"Pool running on localhost:{pool.external_address}")

    endpoint = await start_supervisor_components(
        pool.external_address, "127.0.0.1", xo.utils.get_next_port()
    )
    await start_worker_components(
        address=pool.external_address, supervisor_address=pool.external_address
    )

    # wait for the api.
    time.sleep(3)
    async with pool:
        yield endpoint, pool.external_address


def test_xinference_embedding_documents(setup: Tuple[str, str]) -> None:
    """Test xinference embeddings for documents."""
    from xinference.client import RESTfulClient

    endpoint, _ = setup

    client = RESTfulClient(endpoint)

    model_uid = client.launch_model(
        model_name="vicuna-v1.3",
        model_size_in_billions=7,
        model_format="ggmlv3",
        quantization="q4_0",
    )

    xinference = XinferenceEmbeddings(server_url=endpoint, model_uid=model_uid)

    documents = ["foo bar", "bar foo"]
    output = xinference.embed_documents(documents)
    assert len(output) == 2
    assert len(output[0]) == 4096


def test_xinference_embedding_query(setup: Tuple[str, str]) -> None:
    """Test xinference embeddings for query."""
    from xinference.client import RESTfulClient

    endpoint, _ = setup

    client = RESTfulClient(endpoint)

    model_uid = client.launch_model(
        model_name="vicuna-v1.3", model_size_in_billions=7, quantization="q4_0"
    )

    xinference = XinferenceEmbeddings(server_url=endpoint, model_uid=model_uid)

    document = "foo bar"
    output = xinference.embed_query(document)
    assert len(output) == 4096
FEAT: Integrate Xinference LLMs and Embeddings (#8171) - [Xorbits Inference(Xinference)](https://github.com/xorbitsai/inference) is a powerful and versatile library designed to serve language, speech recognition, and multimodal models. Xinference supports a variety of GGML-compatible models including chatglm, whisper, and vicuna, and utilizes heterogeneous hardware and a distributed architecture for seamless cross-device and cross-server model deployment. - This PR integrates Xinference models and Xinference embeddings into LangChain. - Dependencies: To install the depenedencies for this integration, run `pip install "xinference[all]"` - Example Usage: To start a local instance of Xinference, run `xinference`. To deploy Xinference in a distributed cluster, first start an Xinference supervisor using `xinference-supervisor`: `xinference-supervisor -H "${supervisor_host}"` Then, start the Xinference workers using `xinference-worker` on each server you want to run them on. `xinference-worker -e "http://${supervisor_host}:9997"` To use Xinference with LangChain, you also need to launch a model. You can use command line interface (CLI) to do so. Fo example: `xinference launch -n vicuna-v1.3 -f ggmlv3 -q q4_0`. This launches a model named vicuna-v1.3 with `model_format="ggmlv3"` and `quantization="q4_0"`. A model UID is returned for you to use. Now you can use Xinference with LangChain: ```python from langchain.llms import Xinference llm = Xinference( server_url="http://0.0.0.0:9997", # suppose the supervisor_host is "0.0.0.0" model_uid = {model_uid} # model UID returned from launching a model ) llm( prompt="Q: where can we visit in the capital of France? A:", generate_config={"max_tokens": 1024}, ) ``` You can also use RESTful client to launch a model: ```python from xinference.client import RESTfulClient client = RESTfulClient("http://0.0.0.0:9997") model_uid = client.launch_model(model_name="vicuna-v1.3", model_size_in_billions=7, quantization="q4_0") ``` The following code block demonstrates how to use Xinference embeddings with LangChain: ```python from langchain.embeddings import XinferenceEmbeddings xinference = XinferenceEmbeddings( server_url="http://0.0.0.0:9997", model_uid = model_uid ) ``` ```python query_result = xinference.embed_query("This is a test query") ``` ```python doc_result = xinference.embed_documents(["text A", "text B"]) ``` Xinference is still under rapid development. Feel free to [join our Slack community](https://xorbitsio.slack.com/join/shared_invite/zt-1z3zsm9ep-87yI9YZ_B79HLB2ccTq4WA) to get the latest updates! - Request for review: @hwchase17, @baskaryan - Twitter handle: https://twitter.com/Xorbitsio --------- Co-authored-by: Bagatur <baskaryan@gmail.com> 2023-07-28 04:23:19 +00:00			`"""Test Xinference embeddings."""`
			`import time`
			`from typing import AsyncGenerator, Tuple`

			`import pytest_asyncio`

community[major], core[patch], langchain[patch], experimental[patch]: Create langchain-community (#14463) Moved the following modules to new package langchain-community in a backwards compatible fashion: ``` mv langchain/langchain/adapters community/langchain_community mv langchain/langchain/callbacks community/langchain_community/callbacks mv langchain/langchain/chat_loaders community/langchain_community mv langchain/langchain/chat_models community/langchain_community mv langchain/langchain/document_loaders community/langchain_community mv langchain/langchain/docstore community/langchain_community mv langchain/langchain/document_transformers community/langchain_community mv langchain/langchain/embeddings community/langchain_community mv langchain/langchain/graphs community/langchain_community mv langchain/langchain/llms community/langchain_community mv langchain/langchain/memory/chat_message_histories community/langchain_community mv langchain/langchain/retrievers community/langchain_community mv langchain/langchain/storage community/langchain_community mv langchain/langchain/tools community/langchain_community mv langchain/langchain/utilities community/langchain_community mv langchain/langchain/vectorstores community/langchain_community mv langchain/langchain/agents/agent_toolkits community/langchain_community mv langchain/langchain/cache.py community/langchain_community mv langchain/langchain/adapters community/langchain_community mv langchain/langchain/callbacks community/langchain_community/callbacks mv langchain/langchain/chat_loaders community/langchain_community mv langchain/langchain/chat_models community/langchain_community mv langchain/langchain/document_loaders community/langchain_community mv langchain/langchain/docstore community/langchain_community mv langchain/langchain/document_transformers community/langchain_community mv langchain/langchain/embeddings community/langchain_community mv langchain/langchain/graphs community/langchain_community mv langchain/langchain/llms community/langchain_community mv langchain/langchain/memory/chat_message_histories community/langchain_community mv langchain/langchain/retrievers community/langchain_community mv langchain/langchain/storage community/langchain_community mv langchain/langchain/tools community/langchain_community mv langchain/langchain/utilities community/langchain_community mv langchain/langchain/vectorstores community/langchain_community mv langchain/langchain/agents/agent_toolkits community/langchain_community mv langchain/langchain/cache.py community/langchain_community ``` Moved the following to core ``` mv langchain/langchain/utils/json_schema.py core/langchain_core/utils mv langchain/langchain/utils/html.py core/langchain_core/utils mv langchain/langchain/utils/strings.py core/langchain_core/utils cat langchain/langchain/utils/env.py >> core/langchain_core/utils/env.py rm langchain/langchain/utils/env.py ``` See .scripts/community_split/script_integrations.sh for all changes 2023-12-11 21:53:30 +00:00			`from langchain_community.embeddings import XinferenceEmbeddings`
FEAT: Integrate Xinference LLMs and Embeddings (#8171) - [Xorbits Inference(Xinference)](https://github.com/xorbitsai/inference) is a powerful and versatile library designed to serve language, speech recognition, and multimodal models. Xinference supports a variety of GGML-compatible models including chatglm, whisper, and vicuna, and utilizes heterogeneous hardware and a distributed architecture for seamless cross-device and cross-server model deployment. - This PR integrates Xinference models and Xinference embeddings into LangChain. - Dependencies: To install the depenedencies for this integration, run `pip install "xinference[all]"` - Example Usage: To start a local instance of Xinference, run `xinference`. To deploy Xinference in a distributed cluster, first start an Xinference supervisor using `xinference-supervisor`: `xinference-supervisor -H "${supervisor_host}"` Then, start the Xinference workers using `xinference-worker` on each server you want to run them on. `xinference-worker -e "http://${supervisor_host}:9997"` To use Xinference with LangChain, you also need to launch a model. You can use command line interface (CLI) to do so. Fo example: `xinference launch -n vicuna-v1.3 -f ggmlv3 -q q4_0`. This launches a model named vicuna-v1.3 with `model_format="ggmlv3"` and `quantization="q4_0"`. A model UID is returned for you to use. Now you can use Xinference with LangChain: ```python from langchain.llms import Xinference llm = Xinference( server_url="http://0.0.0.0:9997", # suppose the supervisor_host is "0.0.0.0" model_uid = {model_uid} # model UID returned from launching a model ) llm( prompt="Q: where can we visit in the capital of France? A:", generate_config={"max_tokens": 1024}, ) ``` You can also use RESTful client to launch a model: ```python from xinference.client import RESTfulClient client = RESTfulClient("http://0.0.0.0:9997") model_uid = client.launch_model(model_name="vicuna-v1.3", model_size_in_billions=7, quantization="q4_0") ``` The following code block demonstrates how to use Xinference embeddings with LangChain: ```python from langchain.embeddings import XinferenceEmbeddings xinference = XinferenceEmbeddings( server_url="http://0.0.0.0:9997", model_uid = model_uid ) ``` ```python query_result = xinference.embed_query("This is a test query") ``` ```python doc_result = xinference.embed_documents(["text A", "text B"]) ``` Xinference is still under rapid development. Feel free to [join our Slack community](https://xorbitsio.slack.com/join/shared_invite/zt-1z3zsm9ep-87yI9YZ_B79HLB2ccTq4WA) to get the latest updates! - Request for review: @hwchase17, @baskaryan - Twitter handle: https://twitter.com/Xorbitsio --------- Co-authored-by: Bagatur <baskaryan@gmail.com> 2023-07-28 04:23:19 +00:00

			`@pytest_asyncio.fixture`
			`async def setup() -> AsyncGenerator[Tuple[str, str], None]:`
			`import xoscar as xo`
			`from xinference.deploy.supervisor import start_supervisor_components`
			`from xinference.deploy.utils import create_worker_actor_pool`
			`from xinference.deploy.worker import start_worker_components`

			`pool = await create_worker_actor_pool(`
			`f"test://127.0.0.1:{xo.utils.get_next_port()}"`
			`)`
			`print(f"Pool running on localhost:{pool.external_address}")`

			`endpoint = await start_supervisor_components(`
			`pool.external_address, "127.0.0.1", xo.utils.get_next_port()`
			`)`
			`await start_worker_components(`
			`address=pool.external_address, supervisor_address=pool.external_address`
			`)`

			`# wait for the api.`
			`time.sleep(3)`
			`async with pool:`
			`yield endpoint, pool.external_address`


			`def test_xinference_embedding_documents(setup: Tuple[str, str]) -> None:`
			`"""Test xinference embeddings for documents."""`
			`from xinference.client import RESTfulClient`

			`endpoint, _ = setup`

			`client = RESTfulClient(endpoint)`

			`model_uid = client.launch_model(`
			`model_name="vicuna-v1.3",`
			`model_size_in_billions=7,`
			`model_format="ggmlv3",`
			`quantization="q4_0",`
			`)`

			`xinference = XinferenceEmbeddings(server_url=endpoint, model_uid=model_uid)`

			`documents = ["foo bar", "bar foo"]`
			`output = xinference.embed_documents(documents)`
			`assert len(output) == 2`
			`assert len(output[0]) == 4096`


			`def test_xinference_embedding_query(setup: Tuple[str, str]) -> None:`
			`"""Test xinference embeddings for query."""`
			`from xinference.client import RESTfulClient`

			`endpoint, _ = setup`

			`client = RESTfulClient(endpoint)`

			`model_uid = client.launch_model(`
			`model_name="vicuna-v1.3", model_size_in_billions=7, quantization="q4_0"`
			`)`

			`xinference = XinferenceEmbeddings(server_url=endpoint, model_uid=model_uid)`

			`document = "foo bar"`
			`output = xinference.embed_query(document)`
			`assert len(output) == 4096`