enable streaming in anthropic llm wrapper (#2065)

searx
Ankush Gola 1 year ago committed by GitHub
parent 41c8a42e22
commit b7ebb8fe30
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -5,18 +5,34 @@
"id": "6eaf7e66-f49c-42da-8d11-22ea13bef718", "id": "6eaf7e66-f49c-42da-8d11-22ea13bef718",
"metadata": {}, "metadata": {},
"source": [ "source": [
"# How to stream LLM responses\n", "# How to stream LLM and Chat Model responses\n",
"\n", "\n",
"LangChain provides streaming support for LLMs. Currently, we only support streaming for the `OpenAI` and `ChatOpenAI` LLM implementation, but streaming support for other LLM implementations is on the roadmap. To utilize streaming, use a [`CallbackHandler`](https://github.com/hwchase17/langchain/blob/master/langchain/callbacks/base.py) that implements `on_llm_new_token`. In this example, we are using [`StreamingStdOutCallbackHandler`]()." "LangChain provides streaming support for LLMs. Currently, we support streaming for the `OpenAI`, `ChatOpenAI`. and `Anthropic` implementations, but streaming support for other LLM implementations is on the roadmap. To utilize streaming, use a [`CallbackHandler`](https://github.com/hwchase17/langchain/blob/master/langchain/callbacks/base.py) that implements `on_llm_new_token`. In this example, we are using [`StreamingStdOutCallbackHandler`]()."
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 2, "execution_count": 1,
"id": "4ac0ff54-540a-4f2b-8d9a-b590fec7fe07", "id": "4ac0ff54-540a-4f2b-8d9a-b590fec7fe07",
"metadata": { "metadata": {
"tags": [] "tags": []
}, },
"outputs": [],
"source": [
"from langchain.llms import OpenAI, Anthropic\n",
"from langchain.chat_models import ChatOpenAI\n",
"from langchain.callbacks.base import CallbackManager\n",
"from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler\n",
"from langchain.schema import HumanMessage"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "77f60a4b-f786-41f2-972e-e5bb8a48dcd5",
"metadata": {
"tags": []
},
"outputs": [ "outputs": [
{ {
"name": "stdout", "name": "stdout",
@ -63,13 +79,6 @@
} }
], ],
"source": [ "source": [
"from langchain.llms import OpenAI\n",
"from langchain.chat_models import ChatOpenAI\n",
"from langchain.callbacks.base import CallbackManager\n",
"from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler\n",
"from langchain.schema import HumanMessage\n",
"\n",
"\n",
"llm = OpenAI(streaming=True, callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]), verbose=True, temperature=0)\n", "llm = OpenAI(streaming=True, callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]), verbose=True, temperature=0)\n",
"resp = llm(\"Write me a song about sparkling water.\")" "resp = llm(\"Write me a song about sparkling water.\")"
] ]
@ -86,7 +95,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 6, "execution_count": 4,
"id": "a35373f1-9ee6-4753-a343-5aee749b8527", "id": "a35373f1-9ee6-4753-a343-5aee749b8527",
"metadata": { "metadata": {
"tags": [] "tags": []
@ -105,10 +114,10 @@
{ {
"data": { "data": {
"text/plain": [ "text/plain": [
"LLMResult(generations=[[Generation(text='\\n\\nQ: What did the fish say when it hit the wall?\\nA: Dam!', generation_info={'finish_reason': None, 'logprobs': None})]], llm_output={'token_usage': {}})" "LLMResult(generations=[[Generation(text='\\n\\nQ: What did the fish say when it hit the wall?\\nA: Dam!', generation_info={'finish_reason': None, 'logprobs': None})]], llm_output={'token_usage': {}, 'model_name': 'text-davinci-003'})"
] ]
}, },
"execution_count": 6, "execution_count": 4,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -122,12 +131,12 @@
"id": "a93a4d61-0476-49db-8321-7de92bd74059", "id": "a93a4d61-0476-49db-8321-7de92bd74059",
"metadata": {}, "metadata": {},
"source": [ "source": [
"Here's an example with `ChatOpenAI`:" "Here's an example with the `ChatOpenAI` chat model implementation:"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 3, "execution_count": 6,
"id": "22665f16-e05b-473c-a4bd-ad75744ea024", "id": "22665f16-e05b-473c-a4bd-ad75744ea024",
"metadata": { "metadata": {
"tags": [] "tags": []
@ -137,49 +146,47 @@
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"\n",
"\n",
"Verse 1:\n", "Verse 1:\n",
"Bubbles rising to the top\n", "Bubbles rising to the top\n",
"A refreshing drink that never stops\n", "A refreshing drink that never stops\n",
"Clear and crisp, it's pure delight\n", "Clear and crisp, it's oh so pure\n",
"A taste that's sure to excite\n", "Sparkling water, I can't ignore\n",
"\n", "\n",
"Chorus:\n", "Chorus:\n",
"Sparkling water, oh so fine\n", "Sparkling water, oh how you shine\n",
"A drink that's always on my mind\n", "A taste so clean, it's simply divine\n",
"With every sip, I feel alive\n", "You quench my thirst, you make me feel alive\n",
"Sparkling water, you're my vibe\n", "Sparkling water, you're my favorite vibe\n",
"\n", "\n",
"Verse 2:\n", "Verse 2:\n",
"No sugar, no calories, just pure bliss\n", "No sugar, no calories, just H2O\n",
"A drink that's hard to resist\n", "A drink that's good for me, don't you know\n",
"It's the perfect way to quench my thirst\n", "With lemon or lime, you're even better\n",
"A drink that always comes first\n", "Sparkling water, you're my forever\n",
"\n", "\n",
"Chorus:\n", "Chorus:\n",
"Sparkling water, oh so fine\n", "Sparkling water, oh how you shine\n",
"A drink that's always on my mind\n", "A taste so clean, it's simply divine\n",
"With every sip, I feel alive\n", "You quench my thirst, you make me feel alive\n",
"Sparkling water, you're my vibe\n", "Sparkling water, you're my favorite vibe\n",
"\n", "\n",
"Bridge:\n", "Bridge:\n",
"From the mountains to the sea\n", "You're my go-to drink, day or night\n",
"Sparkling water, you're the key\n", "You make me feel so light\n",
"To a healthy life, a happy soul\n", "I'll never give you up, you're my true love\n",
"A drink that makes me feel whole\n", "Sparkling water, you're sent from above\n",
"\n", "\n",
"Chorus:\n", "Chorus:\n",
"Sparkling water, oh so fine\n", "Sparkling water, oh how you shine\n",
"A drink that's always on my mind\n", "A taste so clean, it's simply divine\n",
"With every sip, I feel alive\n", "You quench my thirst, you make me feel alive\n",
"Sparkling water, you're my vibe\n", "Sparkling water, you're my favorite vibe\n",
"\n", "\n",
"Outro:\n", "Outro:\n",
"Sparkling water, you're the one\n", "Sparkling water, you're the one for me\n",
"A drink that's always so much fun\n", "I'll never let you go, can't you see\n",
"I'll never let you go, my friend\n", "You're my drink of choice, forevermore\n",
"Sparkling" "Sparkling water, I adore."
] ]
} }
], ],
@ -188,13 +195,59 @@
"resp = chat([HumanMessage(content=\"Write me a song about sparkling water.\")])" "resp = chat([HumanMessage(content=\"Write me a song about sparkling water.\")])"
] ]
}, },
{
"cell_type": "markdown",
"id": "909ae48b-0f07-4990-bbff-e627f706c93e",
"metadata": {},
"source": [
"Here is an example with the `Anthropic` LLM implementation, which uses their `claude` model."
]
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 3,
"id": "eadae4ba-9f21-4ec8-845d-dd43b0edc2dc", "id": "eadae4ba-9f21-4ec8-845d-dd43b0edc2dc",
"metadata": {}, "metadata": {
"outputs": [], "tags": []
"source": [] },
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Sparkling water, bubbles so bright,\n",
"\n",
"Fizzing and popping in the light.\n",
"\n",
"No sugar or calories, a healthy delight,\n",
"\n",
"Sparkling water, refreshing and light.\n",
"\n",
"Carbonation that tickles the tongue,\n",
"\n",
"In flavors of lemon and lime unsung.\n",
"\n",
"Sparkling water, a drink quite all right,\n",
"\n",
"Bubbles sparkling in the light."
]
},
{
"data": {
"text/plain": [
"'\\nSparkling water, bubbles so bright,\\n\\nFizzing and popping in the light.\\n\\nNo sugar or calories, a healthy delight,\\n\\nSparkling water, refreshing and light.\\n\\nCarbonation that tickles the tongue,\\n\\nIn flavors of lemon and lime unsung.\\n\\nSparkling water, a drink quite all right,\\n\\nBubbles sparkling in the light.'"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"llm = Anthropic(streaming=True, callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]), verbose=True, temperature=0)\n",
"llm(\"Write me a song about sparkling water.\")"
]
} }
], ],
"metadata": { "metadata": {
@ -213,7 +266,7 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.9.1" "version": "3.10.9"
} }
}, },
"nbformat": 4, "nbformat": 4,

@ -48,6 +48,9 @@ class Anthropic(LLM, BaseModel):
top_p: float = 1 top_p: float = 1
"""Total probability mass of tokens to consider at each step.""" """Total probability mass of tokens to consider at each step."""
streaming: bool = False
"""Whether to stream the results."""
anthropic_api_key: Optional[str] = None anthropic_api_key: Optional[str] = None
HUMAN_PROMPT: Optional[str] = None HUMAN_PROMPT: Optional[str] = None
@ -143,14 +146,29 @@ class Anthropic(LLM, BaseModel):
""" """
stop = self._get_anthropic_stop(stop) stop = self._get_anthropic_stop(stop)
if self.streaming:
stream_resp = self.client.completion_stream(
model=self.model,
prompt=self._wrap_prompt(prompt),
stop_sequences=stop,
stream=True,
**self._default_params,
)
current_completion = ""
for data in stream_resp:
delta = data["completion"][len(current_completion) :]
current_completion = data["completion"]
self.callback_manager.on_llm_new_token(
delta, verbose=self.verbose, **data
)
return current_completion
response = self.client.completion( response = self.client.completion(
model=self.model, model=self.model,
prompt=self._wrap_prompt(prompt), prompt=self._wrap_prompt(prompt),
stop_sequences=stop, stop_sequences=stop,
**self._default_params, **self._default_params,
) )
text = response["completion"] return response["completion"]
return text
def stream(self, prompt: str, stop: Optional[List[str]] = None) -> Generator: def stream(self, prompt: str, stop: Optional[List[str]] = None) -> Generator:
r"""Call Anthropic completion_stream and return the resulting generator. r"""Call Anthropic completion_stream and return the resulting generator.

@ -2,7 +2,9 @@
from typing import Generator from typing import Generator
from langchain.callbacks.base import CallbackManager
from langchain.llms.anthropic import Anthropic from langchain.llms.anthropic import Anthropic
from tests.unit_tests.callbacks.fake_callback_handler import FakeCallbackHandler
def test_anthropic_call() -> None: def test_anthropic_call() -> None:
@ -21,3 +23,17 @@ def test_anthropic_streaming() -> None:
for token in generator: for token in generator:
assert isinstance(token["completion"], str) assert isinstance(token["completion"], str)
def test_anthropic_streaming_callback() -> None:
"""Test that streaming correctly invokes on_llm_new_token callback."""
callback_handler = FakeCallbackHandler()
callback_manager = CallbackManager([callback_handler])
llm = Anthropic(
model="claude-v1",
streaming=True,
callback_manager=callback_manager,
verbose=True,
)
llm("Write me a sentence with 100 words.")
assert callback_handler.llm_streams > 1

Loading…
Cancel
Save