forked from Archives/langchain
# Streaming only final output of agent (#2483) As requested in issue #2483, this Callback allows to stream only the final output of an agent (ie not the intermediate steps). Fixes #2483 Co-authored-by: Dev 2049 <dev.dev2049@gmail.com>
This commit is contained in:
parent
3bc0bf0079
commit
7388248b3e
154
docs/modules/agents/streaming_stdout_final_only.ipynb
Normal file
154
docs/modules/agents/streaming_stdout_final_only.ipynb
Normal file
@ -0,0 +1,154 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "23234b50-e6c6-4c87-9f97-259c15f36894",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"source": [
|
||||
"# Only streaming final agent output"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "29dd6333-307c-43df-b848-65001c01733b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If you only want the final output of an agent to be streamed, you can use the callback ``FinalStreamingStdOutCallbackHandler``.\n",
|
||||
"For this, the underlying LLM has to support streaming as well."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "e4592215-6604-47e2-89ff-5db3af6d1e40",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.agents import load_tools\n",
|
||||
"from langchain.agents import initialize_agent\n",
|
||||
"from langchain.agents import AgentType\n",
|
||||
"from langchain.callbacks.streaming_stdout_final_only import FinalStreamingStdOutCallbackHandler\n",
|
||||
"from langchain.llms import OpenAI"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "19a813f7",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Let's create the underlying LLM with ``streaming = True`` and pass a new instance of ``FinalStreamingStdOutCallbackHandler``."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "7fe81ef4",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"llm = OpenAI(streaming=True, callbacks=[FinalStreamingStdOutCallbackHandler()], temperature=0)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "ff45b85d",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" Konrad Adenauer became Chancellor of Germany in 1949, 74 years ago in 2023."
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'Konrad Adenauer became Chancellor of Germany in 1949, 74 years ago in 2023.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"tools = load_tools([\"wikipedia\", \"llm-math\"], llm=llm)\n",
|
||||
"agent = initialize_agent(tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=False)\n",
|
||||
"agent.run(\"It's 2023 now. How many years ago did Konrad Adenauer become Chancellor of Germany.\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "53a743b8",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Handling custom answer prefixes"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "23602c62",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"By default, we assume that the token sequence ``\"\\nFinal\", \" Answer\", \":\"`` indicates that the agent has reached an answers. We can, however, also pass a custom sequence to use as answer prefix."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "5662a638",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"llm = OpenAI(\n",
|
||||
" streaming=True,\n",
|
||||
" callbacks=[FinalStreamingStdOutCallbackHandler(answer_prefix_tokens=[\"\\nThe\", \" answer\", \":\"])],\n",
|
||||
" temperature=0\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b1a96cc0",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Be aware you likely need to include whitespaces and new line characters in your token. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "9278b522",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
49
langchain/callbacks/streaming_stdout_final_only.py
Normal file
49
langchain/callbacks/streaming_stdout_final_only.py
Normal file
@ -0,0 +1,49 @@
|
||||
"""Callback Handler streams to stdout on new llm token."""
|
||||
import sys
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
|
||||
|
||||
DEFAULT_ANSWER_PREFIX_TOKENS = ["\nFinal", " Answer", ":"]
|
||||
|
||||
|
||||
class FinalStreamingStdOutCallbackHandler(StreamingStdOutCallbackHandler):
|
||||
"""Callback handler for streaming in agents.
|
||||
Only works with agents using LLMs that support streaming.
|
||||
|
||||
Only the final output of the agent will be streamed.
|
||||
"""
|
||||
|
||||
def __init__(self, answer_prefix_tokens: Optional[List[str]] = None) -> None:
|
||||
super().__init__()
|
||||
if answer_prefix_tokens is None:
|
||||
answer_prefix_tokens = DEFAULT_ANSWER_PREFIX_TOKENS
|
||||
self.answer_prefix_tokens = answer_prefix_tokens
|
||||
self.last_tokens = [""] * len(answer_prefix_tokens)
|
||||
self.answer_reached = False
|
||||
|
||||
def on_llm_start(
|
||||
self, serialized: Dict[str, Any], prompts: List[str], **kwargs: Any
|
||||
) -> None:
|
||||
"""Run when LLM starts running."""
|
||||
self.answer_reached = False
|
||||
|
||||
def on_llm_new_token(self, token: str, **kwargs: Any) -> None:
|
||||
"""Run on new LLM token. Only available when streaming is enabled."""
|
||||
|
||||
# Remember the last n tokens, where n = len(answer_prefix_tokens)
|
||||
self.last_tokens.append(token)
|
||||
if len(self.last_tokens) > len(self.answer_prefix_tokens):
|
||||
self.last_tokens.pop(0)
|
||||
|
||||
# Check if the last n tokens match the answer_prefix_tokens list ...
|
||||
if self.last_tokens == self.answer_prefix_tokens:
|
||||
self.answer_reached = True
|
||||
# Do not print the last token in answer_prefix_tokens,
|
||||
# as it's not part of the answer yet
|
||||
return
|
||||
|
||||
# ... if yes, then print tokens from now on
|
||||
if self.answer_reached:
|
||||
sys.stdout.write(token)
|
||||
sys.stdout.flush()
|
Loading…
Reference in New Issue
Block a user