enable streaming in anthropic llm wrapper (#2065)

1 year ago · b7ebb8fe30
parent 41c8a42e22
commit b7ebb8fe30
3 changed files with 137 additions and 50 deletions
--- a/docs/modules/models/llms/examples/streaming_llm.ipynb
+++ b/docs/modules/models/llms/examples/streaming_llm.ipynb
@ -5,18 +5,34 @@
   "id": "6eaf7e66-f49c-42da-8d11-22ea13bef718",
   "metadata": {},
   "source": [
-    "# How to stream LLM responses\n",
+    "# How to stream LLM and Chat Model responses\n",
    "\n",
-    "LangChain provides streaming support for LLMs. Currently, we only support streaming for the `OpenAI` and `ChatOpenAI` LLM implementation, but streaming support for other LLM implementations is on the roadmap. To utilize streaming, use a [`CallbackHandler`](https://github.com/hwchase17/langchain/blob/master/langchain/callbacks/base.py) that implements `on_llm_new_token`. In this example, we are using [`StreamingStdOutCallbackHandler`]()."
+    "LangChain provides streaming support for LLMs. Currently, we support streaming for the `OpenAI`, `ChatOpenAI`. and `Anthropic` implementations, but streaming support for other LLM implementations is on the roadmap. To utilize streaming, use a [`CallbackHandler`](https://github.com/hwchase17/langchain/blob/master/langchain/callbacks/base.py) that implements `on_llm_new_token`. In this example, we are using [`StreamingStdOutCallbackHandler`]()."
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 1,
   "id": "4ac0ff54-540a-4f2b-8d9a-b590fec7fe07",
   "metadata": {
    "tags": []
   },
+   "outputs": [],
+   "source": [
+    "from langchain.llms import OpenAI, Anthropic\n",
+    "from langchain.chat_models import ChatOpenAI\n",
+    "from langchain.callbacks.base import CallbackManager\n",
+    "from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler\n",
+    "from langchain.schema import HumanMessage"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "77f60a4b-f786-41f2-972e-e5bb8a48dcd5",
+   "metadata": {
+    "tags": []
+   },
   "outputs": [
    {
     "name": "stdout",
@ -63,13 +79,6 @@
    }
   ],
   "source": [
-    "from langchain.llms import OpenAI\n",
-    "from langchain.chat_models import ChatOpenAI\n",
-    "from langchain.callbacks.base import CallbackManager\n",
-    "from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler\n",
-    "from langchain.schema import HumanMessage\n",
-    "\n",
-    "\n",
    "llm = OpenAI(streaming=True, callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]), verbose=True, temperature=0)\n",
    "resp = llm(\"Write me a song about sparkling water.\")"
   ]
@ -86,7 +95,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 4,
   "id": "a35373f1-9ee6-4753-a343-5aee749b8527",
   "metadata": {
    "tags": []
@ -105,10 +114,10 @@
    {
     "data": {
      "text/plain": [
-       "LLMResult(generations=[[Generation(text='\\n\\nQ: What did the fish say when it hit the wall?\\nA: Dam!', generation_info={'finish_reason': None, 'logprobs': None})]], llm_output={'token_usage': {}})"
+       "LLMResult(generations=[[Generation(text='\\n\\nQ: What did the fish say when it hit the wall?\\nA: Dam!', generation_info={'finish_reason': None, 'logprobs': None})]], llm_output={'token_usage': {}, 'model_name': 'text-davinci-003'})"
      ]
     },
-     "execution_count": 6,
+     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
@ -122,12 +131,12 @@
   "id": "a93a4d61-0476-49db-8321-7de92bd74059",
   "metadata": {},
   "source": [
-    "Here's an example with `ChatOpenAI`:"
+    "Here's an example with the `ChatOpenAI` chat model implementation:"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 6,
   "id": "22665f16-e05b-473c-a4bd-ad75744ea024",
   "metadata": {
    "tags": []
@ -137,49 +146,47 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "\n",
-      "\n",
      "Verse 1:\n",
      "Bubbles rising to the top\n",
      "A refreshing drink that never stops\n",
-      "Clear and crisp, it's pure delight\n",
-      "A taste that's sure to excite\n",
+      "Clear and crisp, it's oh so pure\n",
+      "Sparkling water, I can't ignore\n",
      "\n",
      "Chorus:\n",
-      "Sparkling water, oh so fine\n",
-      "A drink that's always on my mind\n",
-      "With every sip, I feel alive\n",
-      "Sparkling water, you're my vibe\n",
+      "Sparkling water, oh how you shine\n",
+      "A taste so clean, it's simply divine\n",
+      "You quench my thirst, you make me feel alive\n",
+      "Sparkling water, you're my favorite vibe\n",
      "\n",
      "Verse 2:\n",
-      "No sugar, no calories, just pure bliss\n",
-      "A drink that's hard to resist\n",
-      "It's the perfect way to quench my thirst\n",
-      "A drink that always comes first\n",
+      "No sugar, no calories, just H2O\n",
+      "A drink that's good for me, don't you know\n",
+      "With lemon or lime, you're even better\n",
+      "Sparkling water, you're my forever\n",
      "\n",
      "Chorus:\n",
-      "Sparkling water, oh so fine\n",
-      "A drink that's always on my mind\n",
-      "With every sip, I feel alive\n",
-      "Sparkling water, you're my vibe\n",
+      "Sparkling water, oh how you shine\n",
+      "A taste so clean, it's simply divine\n",
+      "You quench my thirst, you make me feel alive\n",
+      "Sparkling water, you're my favorite vibe\n",
      "\n",
      "Bridge:\n",
-      "From the mountains to the sea\n",
-      "Sparkling water, you're the key\n",
-      "To a healthy life, a happy soul\n",
-      "A drink that makes me feel whole\n",
+      "You're my go-to drink, day or night\n",
+      "You make me feel so light\n",
+      "I'll never give you up, you're my true love\n",
+      "Sparkling water, you're sent from above\n",
      "\n",
      "Chorus:\n",
-      "Sparkling water, oh so fine\n",
-      "A drink that's always on my mind\n",
-      "With every sip, I feel alive\n",
-      "Sparkling water, you're my vibe\n",
+      "Sparkling water, oh how you shine\n",
+      "A taste so clean, it's simply divine\n",
+      "You quench my thirst, you make me feel alive\n",
+      "Sparkling water, you're my favorite vibe\n",
      "\n",
      "Outro:\n",
-      "Sparkling water, you're the one\n",
-      "A drink that's always so much fun\n",
-      "I'll never let you go, my friend\n",
-      "Sparkling"
+      "Sparkling water, you're the one for me\n",
+      "I'll never let you go, can't you see\n",
+      "You're my drink of choice, forevermore\n",
+      "Sparkling water, I adore."
     ]
    }
   ],
@ -188,13 +195,59 @@
    "resp = chat([HumanMessage(content=\"Write me a song about sparkling water.\")])"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "id": "909ae48b-0f07-4990-bbff-e627f706c93e",
+   "metadata": {},
+   "source": [
+    "Here is an example with the `Anthropic` LLM implementation, which uses their `claude` model."
+   ]
+  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 3,
   "id": "eadae4ba-9f21-4ec8-845d-dd43b0edc2dc",
-   "metadata": {},
-   "outputs": [],
-   "source": []
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Sparkling water, bubbles so bright,\n",
+      "\n",
+      "Fizzing and popping in the light.\n",
+      "\n",
+      "No sugar or calories, a healthy delight,\n",
+      "\n",
+      "Sparkling water, refreshing and light.\n",
+      "\n",
+      "Carbonation that tickles the tongue,\n",
+      "\n",
+      "In flavors of lemon and lime unsung.\n",
+      "\n",
+      "Sparkling water, a drink quite all right,\n",
+      "\n",
+      "Bubbles sparkling in the light."
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "'\\nSparkling water, bubbles so bright,\\n\\nFizzing and popping in the light.\\n\\nNo sugar or calories, a healthy delight,\\n\\nSparkling water, refreshing and light.\\n\\nCarbonation that tickles the tongue,\\n\\nIn flavors of lemon and lime unsung.\\n\\nSparkling water, a drink quite all right,\\n\\nBubbles sparkling in the light.'"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "llm = Anthropic(streaming=True, callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]), verbose=True, temperature=0)\n",
+    "llm(\"Write me a song about sparkling water.\")"
+   ]
  }
 ],
 "metadata": {
@ -213,7 +266,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.9.1"
+   "version": "3.10.9"
  }
 },
 "nbformat": 4,
--- a/langchain/llms/anthropic.py
+++ b/langchain/llms/anthropic.py
@ -48,6 +48,9 @@ class Anthropic(LLM, BaseModel):
    top_p: float = 1
    """Total probability mass of tokens to consider at each step."""

+    streaming: bool = False
+    """Whether to stream the results."""
+
    anthropic_api_key: Optional[str] = None

    HUMAN_PROMPT: Optional[str] = None
@ -143,14 +146,29 @@ class Anthropic(LLM, BaseModel):

        """
        stop = self._get_anthropic_stop(stop)
+        if self.streaming:
+            stream_resp = self.client.completion_stream(
+                model=self.model,
+                prompt=self._wrap_prompt(prompt),
+                stop_sequences=stop,
+                stream=True,
+                **self._default_params,
+            )
+            current_completion = ""
+            for data in stream_resp:
+                delta = data["completion"][len(current_completion) :]
+                current_completion = data["completion"]
+                self.callback_manager.on_llm_new_token(
+                    delta, verbose=self.verbose, **data
+                )
+            return current_completion
        response = self.client.completion(
            model=self.model,
            prompt=self._wrap_prompt(prompt),
            stop_sequences=stop,
            **self._default_params,
        )
-        text = response["completion"]
-        return text
+        return response["completion"]

    def stream(self, prompt: str, stop: Optional[List[str]] = None) -> Generator:
        r"""Call Anthropic completion_stream and return the resulting generator.
--- a/tests/integration_tests/llms/test_anthropic.py
+++ b/tests/integration_tests/llms/test_anthropic.py
@ -2,7 +2,9 @@

 from typing import Generator

+from langchain.callbacks.base import CallbackManager
 from langchain.llms.anthropic import Anthropic
+from tests.unit_tests.callbacks.fake_callback_handler import FakeCallbackHandler


 def test_anthropic_call() -> None:
@ -21,3 +23,17 @@ def test_anthropic_streaming() -> None:

    for token in generator:
        assert isinstance(token["completion"], str)
+
+
+def test_anthropic_streaming_callback() -> None:
+    """Test that streaming correctly invokes on_llm_new_token callback."""
+    callback_handler = FakeCallbackHandler()
+    callback_manager = CallbackManager([callback_handler])
+    llm = Anthropic(
+        model="claude-v1",
+        streaming=True,
+        callback_manager=callback_manager,
+        verbose=True,
+    )
+    llm("Write me a sentence with 100 words.")
+    assert callback_handler.llm_streams > 1