diff --git a/docs/integrations/ctransformers.md b/docs/integrations/ctransformers.md new file mode 100644 index 00000000..1159e195 --- /dev/null +++ b/docs/integrations/ctransformers.md @@ -0,0 +1,57 @@ +# C Transformers + +This page covers how to use the [C Transformers](https://github.com/marella/ctransformers) library within LangChain. +It is broken into two parts: installation and setup, and then references to specific C Transformers wrappers. + +## Installation and Setup + +- Install the Python package with `pip install ctransformers` +- Download a supported [GGML model](https://huggingface.co/TheBloke) (see [Supported Models](https://github.com/marella/ctransformers#supported-models)) + +## Wrappers + +### LLM + +There exists a CTransformers LLM wrapper, which you can access with: + +```python +from langchain.llms import CTransformers +``` + +It provides a unified interface for all models: + +```python +llm = CTransformers(model='/path/to/ggml-gpt-2.bin', model_type='gpt2') + +print(llm('AI is going to')) +``` + +If you are getting `illegal instruction` error, try using `lib='avx'` or `lib='basic'`: + +```py +llm = CTransformers(model='/path/to/ggml-gpt-2.bin', model_type='gpt2', lib='avx') +``` + +It can be used with models hosted on the Hugging Face Hub: + +```py +llm = CTransformers(model='marella/gpt-2-ggml') +``` + +If a model repo has multiple model files (`.bin` files), specify a model file using: + +```py +llm = CTransformers(model='marella/gpt-2-ggml', model_file='ggml-model.bin') +``` + +Additional parameters can be passed using the `config` parameter: + +```py +config = {'max_new_tokens': 256, 'repetition_penalty': 1.1} + +llm = CTransformers(model='marella/gpt-2-ggml', config=config) +``` + +See [Documentation](https://github.com/marella/ctransformers#config) for a list of available parameters. + +For a more detailed walkthrough of this, see [this notebook](../modules/models/llms/integrations/ctransformers.ipynb). diff --git a/docs/modules/models/llms/integrations/ctransformers.ipynb b/docs/modules/models/llms/integrations/ctransformers.ipynb new file mode 100644 index 00000000..af0810ec --- /dev/null +++ b/docs/modules/models/llms/integrations/ctransformers.ipynb @@ -0,0 +1,125 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# C Transformers\n", + "\n", + "The [C Transformers](https://github.com/marella/ctransformers) library provides Python bindings for GGML models.\n", + "\n", + "This example goes over how to use LangChain to interact with `C Transformers` [models](https://github.com/marella/ctransformers#supported-models)." + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Install**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%pip install ctransformers" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Load Model**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.llms import CTransformers\n", + "\n", + "llm = CTransformers(model='marella/gpt-2-ggml')" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Generate Text**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(llm('AI is going to'))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Streaming**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler\n", + "\n", + "llm = CTransformers(model='marella/gpt-2-ggml', callbacks=[StreamingStdOutCallbackHandler()])\n", + "\n", + "response = llm('AI is going to')" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**LLMChain**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain import PromptTemplate, LLMChain\n", + "\n", + "template = \"\"\"Question: {question}\n", + "\n", + "Answer:\"\"\"\n", + "\n", + "prompt = PromptTemplate(template=template, input_variables=['question'])\n", + "\n", + "llm_chain = LLMChain(prompt=prompt, llm=llm)\n", + "\n", + "response = llm_chain.run('What is AI?')" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/langchain/llms/__init__.py b/langchain/llms/__init__.py index 6786f8cd..29e66ce9 100644 --- a/langchain/llms/__init__.py +++ b/langchain/llms/__init__.py @@ -10,6 +10,7 @@ from langchain.llms.base import BaseLLM from langchain.llms.beam import Beam from langchain.llms.cerebriumai import CerebriumAI from langchain.llms.cohere import Cohere +from langchain.llms.ctransformers import CTransformers from langchain.llms.deepinfra import DeepInfra from langchain.llms.fake import FakeListLLM from langchain.llms.forefrontai import ForefrontAI @@ -48,6 +49,7 @@ __all__ = [ "Beam", "CerebriumAI", "Cohere", + "CTransformers", "DeepInfra", "ForefrontAI", "GooglePalm", @@ -92,6 +94,7 @@ type_to_cls_dict: Dict[str, Type[BaseLLM]] = { "beam": Beam, "cerebriumai": CerebriumAI, "cohere": Cohere, + "ctransformers": CTransformers, "deepinfra": DeepInfra, "forefrontai": ForefrontAI, "google_palm": GooglePalm, diff --git a/langchain/llms/ctransformers.py b/langchain/llms/ctransformers.py new file mode 100644 index 00000000..617d56dc --- /dev/null +++ b/langchain/llms/ctransformers.py @@ -0,0 +1,104 @@ +"""Wrapper around the C Transformers library.""" +from typing import Any, Dict, Optional, Sequence + +from pydantic import root_validator + +from langchain.callbacks.manager import CallbackManagerForLLMRun +from langchain.llms.base import LLM + + +class CTransformers(LLM): + """Wrapper around the C Transformers LLM interface. + + To use, you should have the ``ctransformers`` python package installed. + See https://github.com/marella/ctransformers + + Example: + .. code-block:: python + + from langchain.llms import CTransformers + + llm = CTransformers(model="/path/to/ggml-gpt-2.bin", model_type="gpt2") + """ + + client: Any #: :meta private: + + model: str + """The path to a model file or directory or the name of a Hugging Face Hub + model repo.""" + + model_type: Optional[str] = None + """The model type.""" + + model_file: Optional[str] = None + """The name of the model file in repo or directory.""" + + config: Optional[Dict[str, Any]] = None + """The config parameters. + See https://github.com/marella/ctransformers#config""" + + lib: Optional[str] = None + """The path to a shared library or one of `avx2`, `avx`, `basic`.""" + + @property + def _identifying_params(self) -> Dict[str, Any]: + """Get the identifying parameters.""" + return { + "model": self.model, + "model_type": self.model_type, + "model_file": self.model_file, + "config": self.config, + } + + @property + def _llm_type(self) -> str: + """Return type of llm.""" + return "ctransformers" + + @root_validator() + def validate_environment(cls, values: Dict) -> Dict: + """Validate that ``ctransformers`` package is installed.""" + try: + from ctransformers import AutoModelForCausalLM + except ImportError: + raise ImportError( + "Could not import `ctransformers` package. " + "Please install it with `pip install ctransformers`" + ) + + config = values["config"] or {} + values["client"] = AutoModelForCausalLM.from_pretrained( + values["model"], + model_type=values["model_type"], + model_file=values["model_file"], + lib=values["lib"], + **config, + ) + return values + + def _call( + self, + prompt: str, + stop: Optional[Sequence[str]] = None, + run_manager: Optional[CallbackManagerForLLMRun] = None, + ) -> str: + """Generate text from a prompt. + + Args: + prompt: The prompt to generate text from. + stop: A list of sequences to stop generation when encountered. + + Returns: + The generated text. + + Example: + .. code-block:: python + + response = llm("Tell me a joke.") + """ + text = [] + _run_manager = run_manager or CallbackManagerForLLMRun.get_noop_manager() + for chunk in self.client(prompt, stop=stop, stream=True): + text.append(chunk) + _run_manager.on_llm_new_token(chunk, verbose=self.verbose) + return "".join(text) diff --git a/tests/integration_tests/llms/test_ctransformers.py b/tests/integration_tests/llms/test_ctransformers.py new file mode 100644 index 00000000..ead4dbce --- /dev/null +++ b/tests/integration_tests/llms/test_ctransformers.py @@ -0,0 +1,21 @@ +"""Test C Transformers wrapper.""" + +from langchain.llms import CTransformers +from tests.unit_tests.callbacks.fake_callback_handler import FakeCallbackHandler + + +def test_ctransformers_call() -> None: + """Test valid call to C Transformers.""" + config = {"max_new_tokens": 5} + callback_handler = FakeCallbackHandler() + + llm = CTransformers( + model="marella/gpt-2-ggml", + config=config, + callbacks=[callback_handler], + ) + + output = llm("Say foo:") + assert isinstance(output, str) + assert len(output) > 1 + assert 0 < callback_handler.llm_streams <= config["max_new_tokens"]