enable streaming in anthropic llm wrapper (#2065)

2024-11-08 07:10:35 +00:00 · 2023-03-27 20:25:00 -04:00 · 2023-03-27 20:25:00 -04:00 · b7ebb8fe30
commit b7ebb8fe30
parent 41c8a42e22
3 changed files with 138 additions and 51 deletions
--- a/docs/modules/models/llms/examples/streaming_llm.ipynb
+++ b/docs/modules/models/llms/examples/streaming_llm.ipynb
@ -5,18 +5,34 @@
   "id": "6eaf7e66-f49c-42da-8d11-22ea13bef718",
   "metadata": {},
   "source": [
-    "# How to stream LLM responses\n",
+    "# How to stream LLM and Chat Model responses\n",
    "\n",
-    "LangChain provides streaming support for LLMs. Currently, we only support streaming for the `OpenAI` and `ChatOpenAI` LLM implementation, but streaming support for other LLM implementations is on the roadmap. To utilize streaming, use a [`CallbackHandler`](https://github.com/hwchase17/langchain/blob/master/langchain/callbacks/base.py) that implements `on_llm_new_token`. In this example, we are using [`StreamingStdOutCallbackHandler`]()."
+    "LangChain provides streaming support for LLMs. Currently, we support streaming for the `OpenAI`, `ChatOpenAI`. and `Anthropic` implementations, but streaming support for other LLM implementations is on the roadmap. To utilize streaming, use a [`CallbackHandler`](https://github.com/hwchase17/langchain/blob/master/langchain/callbacks/base.py) that implements `on_llm_new_token`. In this example, we are using [`StreamingStdOutCallbackHandler`]()."
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 1,
   "id": "4ac0ff54-540a-4f2b-8d9a-b590fec7fe07",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "from langchain.llms import OpenAI, Anthropic\n",
    "from langchain.chat_models import ChatOpenAI\n",
    "from langchain.callbacks.base import CallbackManager\n",
    "from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler\n",
    "from langchain.schema import HumanMessage"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "77f60a4b-f786-41f2-972e-e5bb8a48dcd5",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
@ -63,13 +79,6 @@
    }
   ],
   "source": [
    "from langchain.llms import OpenAI\n",
    "from langchain.chat_models import ChatOpenAI\n",
    "from langchain.callbacks.base import CallbackManager\n",
    "from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler\n",
    "from langchain.schema import HumanMessage\n",
    "\n",
    "\n",
    "llm = OpenAI(streaming=True, callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]), verbose=True, temperature=0)\n",
    "resp = llm(\"Write me a song about sparkling water.\")"
   ]
@ -86,7 +95,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 4,
   "id": "a35373f1-9ee6-4753-a343-5aee749b8527",
   "metadata": {
    "tags": []
@ -105,10 +114,10 @@
    {
     "data": {
      "text/plain": [
-       "LLMResult(generations=[[Generation(text='\\n\\nQ: What did the fish say when it hit the wall?\\nA: Dam!', generation_info={'finish_reason': None, 'logprobs': None})]], llm_output={'token_usage': {}})"
+       "LLMResult(generations=[[Generation(text='\\n\\nQ: What did the fish say when it hit the wall?\\nA: Dam!', generation_info={'finish_reason': None, 'logprobs': None})]], llm_output={'token_usage': {}, 'model_name': 'text-davinci-003'})"
      ]
     },
-     "execution_count": 6,
+     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
@ -122,12 +131,12 @@
   "id": "a93a4d61-0476-49db-8321-7de92bd74059",
   "metadata": {},
   "source": [
-    "Here's an example with `ChatOpenAI`:"
+    "Here's an example with the `ChatOpenAI` chat model implementation:"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 6,
   "id": "22665f16-e05b-473c-a4bd-ad75744ea024",
   "metadata": {
    "tags": []
@ -137,49 +146,47 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "\n",
      "Verse 1:\n",
      "Bubbles rising to the top\n",
      "A refreshing drink that never stops\n",
-      "Clear and crisp, it's pure delight\n",
+      "Clear and crisp, it's oh so pure\n",
-      "A taste that's sure to excite\n",
+      "Sparkling water, I can't ignore\n",
      "\n",
      "Chorus:\n",
-      "Sparkling water, oh so fine\n",
+      "Sparkling water, oh how you shine\n",
-      "A drink that's always on my mind\n",
+      "A taste so clean, it's simply divine\n",
-      "With every sip, I feel alive\n",
+      "You quench my thirst, you make me feel alive\n",
-      "Sparkling water, you're my vibe\n",
+      "Sparkling water, you're my favorite vibe\n",
      "\n",
      "Verse 2:\n",
-      "No sugar, no calories, just pure bliss\n",
+      "No sugar, no calories, just H2O\n",
-      "A drink that's hard to resist\n",
+      "A drink that's good for me, don't you know\n",
-      "It's the perfect way to quench my thirst\n",
+      "With lemon or lime, you're even better\n",
-      "A drink that always comes first\n",
+      "Sparkling water, you're my forever\n",
      "\n",
      "Chorus:\n",
-      "Sparkling water, oh so fine\n",
+      "Sparkling water, oh how you shine\n",
-      "A drink that's always on my mind\n",
+      "A taste so clean, it's simply divine\n",
-      "With every sip, I feel alive\n",
+      "You quench my thirst, you make me feel alive\n",
-      "Sparkling water, you're my vibe\n",
+      "Sparkling water, you're my favorite vibe\n",
      "\n",
      "Bridge:\n",
-      "From the mountains to the sea\n",
+      "You're my go-to drink, day or night\n",
-      "Sparkling water, you're the key\n",
+      "You make me feel so light\n",
-      "To a healthy life, a happy soul\n",
+      "I'll never give you up, you're my true love\n",
-      "A drink that makes me feel whole\n",
+      "Sparkling water, you're sent from above\n",
      "\n",
      "Chorus:\n",
-      "Sparkling water, oh so fine\n",
+      "Sparkling water, oh how you shine\n",
-      "A drink that's always on my mind\n",
+      "A taste so clean, it's simply divine\n",
-      "With every sip, I feel alive\n",
+      "You quench my thirst, you make me feel alive\n",
-      "Sparkling water, you're my vibe\n",
+      "Sparkling water, you're my favorite vibe\n",
      "\n",
      "Outro:\n",
-      "Sparkling water, you're the one\n",
+      "Sparkling water, you're the one for me\n",
-      "A drink that's always so much fun\n",
+      "I'll never let you go, can't you see\n",
-      "I'll never let you go, my friend\n",
+      "You're my drink of choice, forevermore\n",
-      "Sparkling"
+      "Sparkling water, I adore."
     ]
    }
   ],
@ -189,12 +196,58 @@
   ]
  },
  {
-   "cell_type": "code",
+   "cell_type": "markdown",
-   "execution_count": null,
+   "id": "909ae48b-0f07-4990-bbff-e627f706c93e",
   "id": "eadae4ba-9f21-4ec8-845d-dd43b0edc2dc",
   "metadata": {},
-   "outputs": [],
+   "source": [
-   "source": []
+    "Here is an example with the `Anthropic` LLM implementation, which uses their `claude` model."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "eadae4ba-9f21-4ec8-845d-dd43b0edc2dc",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Sparkling water, bubbles so bright,\n",
      "\n",
      "Fizzing and popping in the light.\n",
      "\n",
      "No sugar or calories, a healthy delight,\n",
      "\n",
      "Sparkling water, refreshing and light.\n",
      "\n",
      "Carbonation that tickles the tongue,\n",
      "\n",
      "In flavors of lemon and lime unsung.\n",
      "\n",
      "Sparkling water, a drink quite all right,\n",
      "\n",
      "Bubbles sparkling in the light."
     ]
    },
    {
     "data": {
      "text/plain": [
       "'\\nSparkling water, bubbles so bright,\\n\\nFizzing and popping in the light.\\n\\nNo sugar or calories, a healthy delight,\\n\\nSparkling water, refreshing and light.\\n\\nCarbonation that tickles the tongue,\\n\\nIn flavors of lemon and lime unsung.\\n\\nSparkling water, a drink quite all right,\\n\\nBubbles sparkling in the light.'"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "llm = Anthropic(streaming=True, callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]), verbose=True, temperature=0)\n",
    "llm(\"Write me a song about sparkling water.\")"
   ]
  }
 ],
 "metadata": {
@ -213,7 +266,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.9.1"
+   "version": "3.10.9"
  }
 },
 "nbformat": 4,
--- a/langchain/llms/anthropic.py
+++ b/langchain/llms/anthropic.py
@ -48,6 +48,9 @@ class Anthropic(LLM, BaseModel):
    top_p: float = 1
    """Total probability mass of tokens to consider at each step."""
    streaming: bool = False
    """Whether to stream the results."""
    anthropic_api_key: Optional[str] = None
    HUMAN_PROMPT: Optional[str] = None
@ -143,14 +146,29 @@ class Anthropic(LLM, BaseModel):
        """
        stop = self._get_anthropic_stop(stop)
        if self.streaming:
            stream_resp = self.client.completion_stream(
                model=self.model,
                prompt=self._wrap_prompt(prompt),
                stop_sequences=stop,
                stream=True,
                **self._default_params,
            )
            current_completion = ""
            for data in stream_resp:
                delta = data["completion"][len(current_completion) :]
                current_completion = data["completion"]
                self.callback_manager.on_llm_new_token(
                    delta, verbose=self.verbose, **data
                )
            return current_completion
        response = self.client.completion(
            model=self.model,
            prompt=self._wrap_prompt(prompt),
            stop_sequences=stop,
            **self._default_params,
        )
-        text = response["completion"]
+        return response["completion"]
        return text
    def stream(self, prompt: str, stop: Optional[List[str]] = None) -> Generator:
        r"""Call Anthropic completion_stream and return the resulting generator.
--- a/tests/integration_tests/llms/test_anthropic.py
+++ b/tests/integration_tests/llms/test_anthropic.py
@ -2,7 +2,9 @@
 from typing import Generator
 from langchain.callbacks.base import CallbackManager
 from langchain.llms.anthropic import Anthropic
 from tests.unit_tests.callbacks.fake_callback_handler import FakeCallbackHandler
 def test_anthropic_call() -> None:
@ -21,3 +23,17 @@ def test_anthropic_streaming() -> None:
    for token in generator:
        assert isinstance(token["completion"], str)
 def test_anthropic_streaming_callback() -> None:
    """Test that streaming correctly invokes on_llm_new_token callback."""
    callback_handler = FakeCallbackHandler()
    callback_manager = CallbackManager([callback_handler])
    llm = Anthropic(
        model="claude-v1",
        streaming=True,
        callback_manager=callback_manager,
        verbose=True,
    )
    llm("Write me a sentence with 100 words.")
    assert callback_handler.llm_streams > 1