enable streaming in anthropic llm wrapper (#2065)

searx
Ankush Gola 1 year ago committed by GitHub
parent 41c8a42e22
commit b7ebb8fe30
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -5,18 +5,34 @@
"id": "6eaf7e66-f49c-42da-8d11-22ea13bef718",
"metadata": {},
"source": [
"# How to stream LLM responses\n",
"# How to stream LLM and Chat Model responses\n",
"\n",
"LangChain provides streaming support for LLMs. Currently, we only support streaming for the `OpenAI` and `ChatOpenAI` LLM implementation, but streaming support for other LLM implementations is on the roadmap. To utilize streaming, use a [`CallbackHandler`](https://github.com/hwchase17/langchain/blob/master/langchain/callbacks/base.py) that implements `on_llm_new_token`. In this example, we are using [`StreamingStdOutCallbackHandler`]()."
"LangChain provides streaming support for LLMs. Currently, we support streaming for the `OpenAI`, `ChatOpenAI`. and `Anthropic` implementations, but streaming support for other LLM implementations is on the roadmap. To utilize streaming, use a [`CallbackHandler`](https://github.com/hwchase17/langchain/blob/master/langchain/callbacks/base.py) that implements `on_llm_new_token`. In this example, we are using [`StreamingStdOutCallbackHandler`]()."
]
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 1,
"id": "4ac0ff54-540a-4f2b-8d9a-b590fec7fe07",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"from langchain.llms import OpenAI, Anthropic\n",
"from langchain.chat_models import ChatOpenAI\n",
"from langchain.callbacks.base import CallbackManager\n",
"from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler\n",
"from langchain.schema import HumanMessage"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "77f60a4b-f786-41f2-972e-e5bb8a48dcd5",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
@ -63,13 +79,6 @@
}
],
"source": [
"from langchain.llms import OpenAI\n",
"from langchain.chat_models import ChatOpenAI\n",
"from langchain.callbacks.base import CallbackManager\n",
"from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler\n",
"from langchain.schema import HumanMessage\n",
"\n",
"\n",
"llm = OpenAI(streaming=True, callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]), verbose=True, temperature=0)\n",
"resp = llm(\"Write me a song about sparkling water.\")"
]
@ -86,7 +95,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 4,
"id": "a35373f1-9ee6-4753-a343-5aee749b8527",
"metadata": {
"tags": []
@ -105,10 +114,10 @@
{
"data": {
"text/plain": [
"LLMResult(generations=[[Generation(text='\\n\\nQ: What did the fish say when it hit the wall?\\nA: Dam!', generation_info={'finish_reason': None, 'logprobs': None})]], llm_output={'token_usage': {}})"
"LLMResult(generations=[[Generation(text='\\n\\nQ: What did the fish say when it hit the wall?\\nA: Dam!', generation_info={'finish_reason': None, 'logprobs': None})]], llm_output={'token_usage': {}, 'model_name': 'text-davinci-003'})"
]
},
"execution_count": 6,
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
@ -122,12 +131,12 @@
"id": "a93a4d61-0476-49db-8321-7de92bd74059",
"metadata": {},
"source": [
"Here's an example with `ChatOpenAI`:"
"Here's an example with the `ChatOpenAI` chat model implementation:"
]
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 6,
"id": "22665f16-e05b-473c-a4bd-ad75744ea024",
"metadata": {
"tags": []
@ -137,49 +146,47 @@
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"\n",
"Verse 1:\n",
"Bubbles rising to the top\n",
"A refreshing drink that never stops\n",
"Clear and crisp, it's pure delight\n",
"A taste that's sure to excite\n",
"Clear and crisp, it's oh so pure\n",
"Sparkling water, I can't ignore\n",
"\n",
"Chorus:\n",
"Sparkling water, oh so fine\n",
"A drink that's always on my mind\n",
"With every sip, I feel alive\n",
"Sparkling water, you're my vibe\n",
"Sparkling water, oh how you shine\n",
"A taste so clean, it's simply divine\n",
"You quench my thirst, you make me feel alive\n",
"Sparkling water, you're my favorite vibe\n",
"\n",
"Verse 2:\n",
"No sugar, no calories, just pure bliss\n",
"A drink that's hard to resist\n",
"It's the perfect way to quench my thirst\n",
"A drink that always comes first\n",
"No sugar, no calories, just H2O\n",
"A drink that's good for me, don't you know\n",
"With lemon or lime, you're even better\n",
"Sparkling water, you're my forever\n",
"\n",
"Chorus:\n",
"Sparkling water, oh so fine\n",
"A drink that's always on my mind\n",
"With every sip, I feel alive\n",
"Sparkling water, you're my vibe\n",
"Sparkling water, oh how you shine\n",
"A taste so clean, it's simply divine\n",
"You quench my thirst, you make me feel alive\n",
"Sparkling water, you're my favorite vibe\n",
"\n",
"Bridge:\n",
"From the mountains to the sea\n",
"Sparkling water, you're the key\n",
"To a healthy life, a happy soul\n",
"A drink that makes me feel whole\n",
"You're my go-to drink, day or night\n",
"You make me feel so light\n",
"I'll never give you up, you're my true love\n",
"Sparkling water, you're sent from above\n",
"\n",
"Chorus:\n",
"Sparkling water, oh so fine\n",
"A drink that's always on my mind\n",
"With every sip, I feel alive\n",
"Sparkling water, you're my vibe\n",
"Sparkling water, oh how you shine\n",
"A taste so clean, it's simply divine\n",
"You quench my thirst, you make me feel alive\n",
"Sparkling water, you're my favorite vibe\n",
"\n",
"Outro:\n",
"Sparkling water, you're the one\n",
"A drink that's always so much fun\n",
"I'll never let you go, my friend\n",
"Sparkling"
"Sparkling water, you're the one for me\n",
"I'll never let you go, can't you see\n",
"You're my drink of choice, forevermore\n",
"Sparkling water, I adore."
]
}
],
@ -188,13 +195,59 @@
"resp = chat([HumanMessage(content=\"Write me a song about sparkling water.\")])"
]
},
{
"cell_type": "markdown",
"id": "909ae48b-0f07-4990-bbff-e627f706c93e",
"metadata": {},
"source": [
"Here is an example with the `Anthropic` LLM implementation, which uses their `claude` model."
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 3,
"id": "eadae4ba-9f21-4ec8-845d-dd43b0edc2dc",
"metadata": {},
"outputs": [],
"source": []
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Sparkling water, bubbles so bright,\n",
"\n",
"Fizzing and popping in the light.\n",
"\n",
"No sugar or calories, a healthy delight,\n",
"\n",
"Sparkling water, refreshing and light.\n",
"\n",
"Carbonation that tickles the tongue,\n",
"\n",
"In flavors of lemon and lime unsung.\n",
"\n",
"Sparkling water, a drink quite all right,\n",
"\n",
"Bubbles sparkling in the light."
]
},
{
"data": {
"text/plain": [
"'\\nSparkling water, bubbles so bright,\\n\\nFizzing and popping in the light.\\n\\nNo sugar or calories, a healthy delight,\\n\\nSparkling water, refreshing and light.\\n\\nCarbonation that tickles the tongue,\\n\\nIn flavors of lemon and lime unsung.\\n\\nSparkling water, a drink quite all right,\\n\\nBubbles sparkling in the light.'"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"llm = Anthropic(streaming=True, callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]), verbose=True, temperature=0)\n",
"llm(\"Write me a song about sparkling water.\")"
]
}
],
"metadata": {
@ -213,7 +266,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.1"
"version": "3.10.9"
}
},
"nbformat": 4,

@ -48,6 +48,9 @@ class Anthropic(LLM, BaseModel):
top_p: float = 1
"""Total probability mass of tokens to consider at each step."""
streaming: bool = False
"""Whether to stream the results."""
anthropic_api_key: Optional[str] = None
HUMAN_PROMPT: Optional[str] = None
@ -143,14 +146,29 @@ class Anthropic(LLM, BaseModel):
"""
stop = self._get_anthropic_stop(stop)
if self.streaming:
stream_resp = self.client.completion_stream(
model=self.model,
prompt=self._wrap_prompt(prompt),
stop_sequences=stop,
stream=True,
**self._default_params,
)
current_completion = ""
for data in stream_resp:
delta = data["completion"][len(current_completion) :]
current_completion = data["completion"]
self.callback_manager.on_llm_new_token(
delta, verbose=self.verbose, **data
)
return current_completion
response = self.client.completion(
model=self.model,
prompt=self._wrap_prompt(prompt),
stop_sequences=stop,
**self._default_params,
)
text = response["completion"]
return text
return response["completion"]
def stream(self, prompt: str, stop: Optional[List[str]] = None) -> Generator:
r"""Call Anthropic completion_stream and return the resulting generator.

@ -2,7 +2,9 @@
from typing import Generator
from langchain.callbacks.base import CallbackManager
from langchain.llms.anthropic import Anthropic
from tests.unit_tests.callbacks.fake_callback_handler import FakeCallbackHandler
def test_anthropic_call() -> None:
@ -21,3 +23,17 @@ def test_anthropic_streaming() -> None:
for token in generator:
assert isinstance(token["completion"], str)
def test_anthropic_streaming_callback() -> None:
"""Test that streaming correctly invokes on_llm_new_token callback."""
callback_handler = FakeCallbackHandler()
callback_manager = CallbackManager([callback_handler])
llm = Anthropic(
model="claude-v1",
streaming=True,
callback_manager=callback_manager,
verbose=True,
)
llm("Write me a sentence with 100 words.")
assert callback_handler.llm_streams > 1

Loading…
Cancel
Save