Add PipelineAI LLM integration (#3644)

Add PipelineAI LLM integration
1 year ago · 6d6fd1b9e1
parent a35bbbfa9e
commit 6d6fd1b9e1
6 changed files with 312 additions and 0 deletions
--- a/docs/ecosystem/pipelineai.md
+++ b/docs/ecosystem/pipelineai.md
@ -0,0 +1,19 @@
+# PipelineAI
+
+This page covers how to use the PipelineAI ecosystem within LangChain.
+It is broken into two parts: installation and setup, and then references to specific PipelineAI wrappers.
+
+## Installation and Setup
+
+- Install with `pip install pipeline-ai`
+- Get a Pipeline Cloud api key and set it as an environment variable (`PIPELINE_API_KEY`)
+
+## Wrappers
+
+### LLM
+
+There exists a PipelineAI LLM wrapper, which you can access with
+
+```python
+from langchain.llms import PipelineAI
+```
--- a/docs/modules/models/llms/integrations/pipelineai_example.ipynb
+++ b/docs/modules/models/llms/integrations/pipelineai_example.ipynb
@ -0,0 +1,171 @@
+{
+ "cells": [
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# PipelineAI\n",
+    "\n",
+    "PipelineAI allows you to run your ML models at scale in the cloud. It also provides API access to [several LLM models](https://pipeline.ai).\n",
+    "\n",
+    "This notebook goes over how to use Langchain with [PipelineAI](https://docs.pipeline.ai/docs)."
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Install pipeline-ai\n",
+    "The `pipeline-ai` library is required to use the `PipelineAI` API, AKA `Pipeline Cloud`. Install `pipeline-ai` using `pip install pipeline-ai`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Install the package\n",
+    "!pip install pipeline-ai"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Imports"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "from langchain.llms import PipelineAI\n",
+    "from langchain import PromptTemplate, LLMChain"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Set the Environment API Key\n",
+    "Make sure to get your API key from PipelineAI. Check out the [cloud quickstart guide](https://docs.pipeline.ai/docs/cloud-quickstart). You'll be given a 30 day free trial with 10 hours of serverless GPU compute to test different models."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "os.environ[\"PIPELINE_API_KEY\"] = \"YOUR_API_KEY_HERE\""
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Create the PipelineAI instance\n",
+    "When instantiating PipelineAI, you need to specify the id or tag of the pipeline you want to use, e.g. `pipeline_key = \"public/gpt-j:base\"`. You then have the option of passing additional pipeline-specific keyword arguments:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "llm = PipelineAI(pipeline_key=\"YOUR_PIPELINE_KEY\", pipeline_kwargs={...})"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Create a Prompt Template\n",
+    "We will create a prompt template for Question and Answer."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "template = \"\"\"Question: {question}\n",
+    "\n",
+    "Answer: Let's think step by step.\"\"\"\n",
+    "\n",
+    "prompt = PromptTemplate(template=template, input_variables=[\"question\"])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Initiate the LLMChain"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "llm_chain = LLMChain(prompt=prompt, llm=llm)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Run the LLMChain\n",
+    "Provide a question and run the LLMChain."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "question = \"What NFL team won the Super Bowl in the year Justin Beiber was born?\"\n",
+    "\n",
+    "llm_chain.run(question)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.6"
+  },
+  "vscode": {
+   "interpreter": {
+    "hash": "a0a0263b650d907a3bfe41c0f8d6a63a071b884df3cfdc1579f00cdc1aed6b03"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
--- a/langchain/init.py
+++ b/langchain/init.py
@ -35,6 +35,7 @@ from langchain.llms import (
    Modal,
    OpenAI,
    Petals,
+    PipelineAI,
    SagemakerEndpoint,
    StochasticAI,
    Writer,
@ -94,6 +95,7 @@ __all__ = [
    "Modal",
    "OpenAI",
    "Petals",
+    "PipelineAI",
    "StochasticAI",
    "Writer",
    "BasePromptTemplate",
--- a/langchain/llms/init.py
+++ b/langchain/llms/init.py
@ -20,6 +20,7 @@ from langchain.llms.modal import Modal
 from langchain.llms.nlpcloud import NLPCloud
 from langchain.llms.openai import AzureOpenAI, OpenAI, OpenAIChat
 from langchain.llms.petals import Petals
+from langchain.llms.pipelineai import PipelineAI
 from langchain.llms.predictionguard import PredictionGuard
 from langchain.llms.promptlayer_openai import PromptLayerOpenAI, PromptLayerOpenAIChat
 from langchain.llms.replicate import Replicate
@ -46,6 +47,7 @@ __all__ = [
    "OpenAI",
    "OpenAIChat",
    "Petals",
+    "PipelineAI",
    "HuggingFaceEndpoint",
    "HuggingFaceHub",
    "SagemakerEndpoint",
@ -82,6 +84,7 @@ type_to_cls_dict: Dict[str, Type[BaseLLM]] = {
    "nlpcloud": NLPCloud,
    "openai": OpenAI,
    "petals": Petals,
+    "pipelineai": PipelineAI,
    "huggingface_pipeline": HuggingFacePipeline,
    "azure": AzureOpenAI,
    "replicate": Replicate,
--- a/langchain/llms/pipelineai.py
+++ b/langchain/llms/pipelineai.py
@ -0,0 +1,107 @@
+"""Wrapper around Pipeline Cloud API."""
+import logging
+from typing import Any, Dict, List, Mapping, Optional
+
+from pydantic import BaseModel, Extra, Field, root_validator
+
+from langchain.llms.base import LLM
+from langchain.llms.utils import enforce_stop_tokens
+from langchain.utils import get_from_dict_or_env
+
+logger = logging.getLogger(__name__)
+
+
+class PipelineAI(LLM, BaseModel):
+    """Wrapper around PipelineAI large language models.
+
+    To use, you should have the ``pipeline-ai`` python package installed,
+    and the environment variable ``PIPELINE_API_KEY`` set with your API key.
+
+    Any parameters that are valid to be passed to the call can be passed
+    in, even if not explicitly saved on this class.
+
+    Example:
+        .. code-block:: python
+            from langchain import PipelineAI
+            pipeline = PipelineAI(pipeline_key="")
+    """
+
+    pipeline_key: str = ""
+    """The id or tag of the target pipeline"""
+
+    pipeline_kwargs: Dict[str, Any] = Field(default_factory=dict)
+    """Holds any pipeline parameters valid for `create` call not
+    explicitly specified."""
+
+    pipeline_api_key: Optional[str] = None
+
+    class Config:
+        """Configuration for this pydantic config."""
+
+        extra = Extra.forbid
+
+    @root_validator(pre=True)
+    def build_extra(cls, values: Dict[str, Any]) -> Dict[str, Any]:
+        """Build extra kwargs from additional params that were passed in."""
+        all_required_field_names = {field.alias for field in cls.__fields__.values()}
+
+        extra = values.get("pipeline_kwargs", {})
+        for field_name in list(values):
+            if field_name not in all_required_field_names:
+                if field_name in extra:
+                    raise ValueError(f"Found {field_name} supplied twice.")
+                logger.warning(
+                    f"""{field_name} was transfered to pipeline_kwargs.
+                    Please confirm that {field_name} is what you intended."""
+                )
+                extra[field_name] = values.pop(field_name)
+        values["pipeline_kwargs"] = extra
+        return values
+
+    @root_validator()
+    def validate_environment(cls, values: Dict) -> Dict:
+        """Validate that api key and python package exists in environment."""
+        pipeline_api_key = get_from_dict_or_env(
+            values, "pipeline_api_key", "PIPELINE_API_KEY"
+        )
+        values["pipeline_api_key"] = pipeline_api_key
+        return values
+
+    @property
+    def _identifying_params(self) -> Mapping[str, Any]:
+        """Get the identifying parameters."""
+        return {
+            **{"pipeline_key": self.pipeline_key},
+            **{"pipeline_kwargs": self.pipeline_kwargs},
+        }
+
+    @property
+    def _llm_type(self) -> str:
+        """Return type of llm."""
+        return "pipeline_ai"
+
+    def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
+        """Call to Pipeline Cloud endpoint."""
+        try:
+            from pipeline import PipelineCloud
+        except ImportError:
+            raise ValueError(
+                "Could not import pipeline-ai python package. "
+                "Please install it with `pip install pipeline-ai`."
+            )
+        client = PipelineCloud(token=self.pipeline_api_key)
+        params = self.pipeline_kwargs or {}
+
+        run = client.run_pipeline(self.pipeline_key, [prompt, params])
+        try:
+            text = run.result_preview[0][0]
+        except AttributeError:
+            raise AttributeError(
+                f"A pipeline run should have a `result_preview` attribute."
+                f"Run was: {run}"
+            )
+        if stop is not None:
+            # I believe this is required since the stop tokens
+            # are not enforced by the pipeline parameters
+            text = enforce_stop_tokens(text, stop)
+        return text
--- a/tests/integration_tests/llms/test_pipelineai.py
+++ b/tests/integration_tests/llms/test_pipelineai.py
@ -0,0 +1,10 @@
+"""Test Pipeline Cloud API wrapper."""
+
+from langchain.llms.pipelineai import PipelineAI
+
+
+def test_pipelineai_call() -> None:
+    """Test valid call to Pipeline Cloud."""
+    llm = PipelineAI()
+    output = llm("Say foo:")
+    assert isinstance(output, str)