From b7ebb8fe3009dd791b562968524718e20bfb4df8 Mon Sep 17 00:00:00 2001 From: Ankush Gola <9536492+agola11@users.noreply.github.com> Date: Mon, 27 Mar 2023 20:25:00 -0400 Subject: [PATCH] enable streaming in anthropic llm wrapper (#2065) --- .../models/llms/examples/streaming_llm.ipynb | 151 ++++++++++++------ langchain/llms/anthropic.py | 22 ++- .../integration_tests/llms/test_anthropic.py | 16 ++ 3 files changed, 138 insertions(+), 51 deletions(-) diff --git a/docs/modules/models/llms/examples/streaming_llm.ipynb b/docs/modules/models/llms/examples/streaming_llm.ipynb index 02d53d0e..c48d1ee5 100644 --- a/docs/modules/models/llms/examples/streaming_llm.ipynb +++ b/docs/modules/models/llms/examples/streaming_llm.ipynb @@ -5,18 +5,34 @@ "id": "6eaf7e66-f49c-42da-8d11-22ea13bef718", "metadata": {}, "source": [ - "# How to stream LLM responses\n", + "# How to stream LLM and Chat Model responses\n", "\n", - "LangChain provides streaming support for LLMs. Currently, we only support streaming for the `OpenAI` and `ChatOpenAI` LLM implementation, but streaming support for other LLM implementations is on the roadmap. To utilize streaming, use a [`CallbackHandler`](https://github.com/hwchase17/langchain/blob/master/langchain/callbacks/base.py) that implements `on_llm_new_token`. In this example, we are using [`StreamingStdOutCallbackHandler`]()." + "LangChain provides streaming support for LLMs. Currently, we support streaming for the `OpenAI`, `ChatOpenAI`. and `Anthropic` implementations, but streaming support for other LLM implementations is on the roadmap. To utilize streaming, use a [`CallbackHandler`](https://github.com/hwchase17/langchain/blob/master/langchain/callbacks/base.py) that implements `on_llm_new_token`. In this example, we are using [`StreamingStdOutCallbackHandler`]()." ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "id": "4ac0ff54-540a-4f2b-8d9a-b590fec7fe07", "metadata": { "tags": [] }, + "outputs": [], + "source": [ + "from langchain.llms import OpenAI, Anthropic\n", + "from langchain.chat_models import ChatOpenAI\n", + "from langchain.callbacks.base import CallbackManager\n", + "from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler\n", + "from langchain.schema import HumanMessage" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "77f60a4b-f786-41f2-972e-e5bb8a48dcd5", + "metadata": { + "tags": [] + }, "outputs": [ { "name": "stdout", @@ -63,13 +79,6 @@ } ], "source": [ - "from langchain.llms import OpenAI\n", - "from langchain.chat_models import ChatOpenAI\n", - "from langchain.callbacks.base import CallbackManager\n", - "from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler\n", - "from langchain.schema import HumanMessage\n", - "\n", - "\n", "llm = OpenAI(streaming=True, callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]), verbose=True, temperature=0)\n", "resp = llm(\"Write me a song about sparkling water.\")" ] @@ -86,7 +95,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 4, "id": "a35373f1-9ee6-4753-a343-5aee749b8527", "metadata": { "tags": [] @@ -105,10 +114,10 @@ { "data": { "text/plain": [ - "LLMResult(generations=[[Generation(text='\\n\\nQ: What did the fish say when it hit the wall?\\nA: Dam!', generation_info={'finish_reason': None, 'logprobs': None})]], llm_output={'token_usage': {}})" + "LLMResult(generations=[[Generation(text='\\n\\nQ: What did the fish say when it hit the wall?\\nA: Dam!', generation_info={'finish_reason': None, 'logprobs': None})]], llm_output={'token_usage': {}, 'model_name': 'text-davinci-003'})" ] }, - "execution_count": 6, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -122,12 +131,12 @@ "id": "a93a4d61-0476-49db-8321-7de92bd74059", "metadata": {}, "source": [ - "Here's an example with `ChatOpenAI`:" + "Here's an example with the `ChatOpenAI` chat model implementation:" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 6, "id": "22665f16-e05b-473c-a4bd-ad75744ea024", "metadata": { "tags": [] @@ -137,49 +146,47 @@ "name": "stdout", "output_type": "stream", "text": [ - "\n", - "\n", "Verse 1:\n", "Bubbles rising to the top\n", "A refreshing drink that never stops\n", - "Clear and crisp, it's pure delight\n", - "A taste that's sure to excite\n", + "Clear and crisp, it's oh so pure\n", + "Sparkling water, I can't ignore\n", "\n", "Chorus:\n", - "Sparkling water, oh so fine\n", - "A drink that's always on my mind\n", - "With every sip, I feel alive\n", - "Sparkling water, you're my vibe\n", + "Sparkling water, oh how you shine\n", + "A taste so clean, it's simply divine\n", + "You quench my thirst, you make me feel alive\n", + "Sparkling water, you're my favorite vibe\n", "\n", "Verse 2:\n", - "No sugar, no calories, just pure bliss\n", - "A drink that's hard to resist\n", - "It's the perfect way to quench my thirst\n", - "A drink that always comes first\n", + "No sugar, no calories, just H2O\n", + "A drink that's good for me, don't you know\n", + "With lemon or lime, you're even better\n", + "Sparkling water, you're my forever\n", "\n", "Chorus:\n", - "Sparkling water, oh so fine\n", - "A drink that's always on my mind\n", - "With every sip, I feel alive\n", - "Sparkling water, you're my vibe\n", + "Sparkling water, oh how you shine\n", + "A taste so clean, it's simply divine\n", + "You quench my thirst, you make me feel alive\n", + "Sparkling water, you're my favorite vibe\n", "\n", "Bridge:\n", - "From the mountains to the sea\n", - "Sparkling water, you're the key\n", - "To a healthy life, a happy soul\n", - "A drink that makes me feel whole\n", + "You're my go-to drink, day or night\n", + "You make me feel so light\n", + "I'll never give you up, you're my true love\n", + "Sparkling water, you're sent from above\n", "\n", "Chorus:\n", - "Sparkling water, oh so fine\n", - "A drink that's always on my mind\n", - "With every sip, I feel alive\n", - "Sparkling water, you're my vibe\n", + "Sparkling water, oh how you shine\n", + "A taste so clean, it's simply divine\n", + "You quench my thirst, you make me feel alive\n", + "Sparkling water, you're my favorite vibe\n", "\n", "Outro:\n", - "Sparkling water, you're the one\n", - "A drink that's always so much fun\n", - "I'll never let you go, my friend\n", - "Sparkling" + "Sparkling water, you're the one for me\n", + "I'll never let you go, can't you see\n", + "You're my drink of choice, forevermore\n", + "Sparkling water, I adore." ] } ], @@ -189,12 +196,58 @@ ] }, { - "cell_type": "code", - "execution_count": null, - "id": "eadae4ba-9f21-4ec8-845d-dd43b0edc2dc", + "cell_type": "markdown", + "id": "909ae48b-0f07-4990-bbff-e627f706c93e", "metadata": {}, - "outputs": [], - "source": [] + "source": [ + "Here is an example with the `Anthropic` LLM implementation, which uses their `claude` model." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "eadae4ba-9f21-4ec8-845d-dd43b0edc2dc", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Sparkling water, bubbles so bright,\n", + "\n", + "Fizzing and popping in the light.\n", + "\n", + "No sugar or calories, a healthy delight,\n", + "\n", + "Sparkling water, refreshing and light.\n", + "\n", + "Carbonation that tickles the tongue,\n", + "\n", + "In flavors of lemon and lime unsung.\n", + "\n", + "Sparkling water, a drink quite all right,\n", + "\n", + "Bubbles sparkling in the light." + ] + }, + { + "data": { + "text/plain": [ + "'\\nSparkling water, bubbles so bright,\\n\\nFizzing and popping in the light.\\n\\nNo sugar or calories, a healthy delight,\\n\\nSparkling water, refreshing and light.\\n\\nCarbonation that tickles the tongue,\\n\\nIn flavors of lemon and lime unsung.\\n\\nSparkling water, a drink quite all right,\\n\\nBubbles sparkling in the light.'" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "llm = Anthropic(streaming=True, callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]), verbose=True, temperature=0)\n", + "llm(\"Write me a song about sparkling water.\")" + ] } ], "metadata": { @@ -213,7 +266,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.1" + "version": "3.10.9" } }, "nbformat": 4, diff --git a/langchain/llms/anthropic.py b/langchain/llms/anthropic.py index a5c57a94..9927af91 100644 --- a/langchain/llms/anthropic.py +++ b/langchain/llms/anthropic.py @@ -48,6 +48,9 @@ class Anthropic(LLM, BaseModel): top_p: float = 1 """Total probability mass of tokens to consider at each step.""" + streaming: bool = False + """Whether to stream the results.""" + anthropic_api_key: Optional[str] = None HUMAN_PROMPT: Optional[str] = None @@ -143,14 +146,29 @@ class Anthropic(LLM, BaseModel): """ stop = self._get_anthropic_stop(stop) + if self.streaming: + stream_resp = self.client.completion_stream( + model=self.model, + prompt=self._wrap_prompt(prompt), + stop_sequences=stop, + stream=True, + **self._default_params, + ) + current_completion = "" + for data in stream_resp: + delta = data["completion"][len(current_completion) :] + current_completion = data["completion"] + self.callback_manager.on_llm_new_token( + delta, verbose=self.verbose, **data + ) + return current_completion response = self.client.completion( model=self.model, prompt=self._wrap_prompt(prompt), stop_sequences=stop, **self._default_params, ) - text = response["completion"] - return text + return response["completion"] def stream(self, prompt: str, stop: Optional[List[str]] = None) -> Generator: r"""Call Anthropic completion_stream and return the resulting generator. diff --git a/tests/integration_tests/llms/test_anthropic.py b/tests/integration_tests/llms/test_anthropic.py index 9077633a..325a098d 100644 --- a/tests/integration_tests/llms/test_anthropic.py +++ b/tests/integration_tests/llms/test_anthropic.py @@ -2,7 +2,9 @@ from typing import Generator +from langchain.callbacks.base import CallbackManager from langchain.llms.anthropic import Anthropic +from tests.unit_tests.callbacks.fake_callback_handler import FakeCallbackHandler def test_anthropic_call() -> None: @@ -21,3 +23,17 @@ def test_anthropic_streaming() -> None: for token in generator: assert isinstance(token["completion"], str) + + +def test_anthropic_streaming_callback() -> None: + """Test that streaming correctly invokes on_llm_new_token callback.""" + callback_handler = FakeCallbackHandler() + callback_manager = CallbackManager([callback_handler]) + llm = Anthropic( + model="claude-v1", + streaming=True, + callback_manager=callback_manager, + verbose=True, + ) + llm("Write me a sentence with 100 words.") + assert callback_handler.llm_streams > 1