Streaming only final output of agent (#2483) (#4630)

# Streaming only final output of agent (#2483) As requested in issue #2483, this Callback allows to stream only the final output of an agent (ie not the intermediate steps). Fixes #2483 Co-authored-by: Dev 2049 <dev.dev2049@gmail.com>
2023-05-20 18:20:17 +02:00 · 2023-05-20 18:20:17 +02:00 · 7388248b3e
commit 7388248b3e
parent 3bc0bf0079
2 changed files with 203 additions and 0 deletions
--- a/docs/modules/agents/streaming_stdout_final_only.ipynb
+++ b/docs/modules/agents/streaming_stdout_final_only.ipynb
@ -0,0 +1,154 @@
 {
 "cells": [
  {
   "cell_type": "markdown",
   "id": "23234b50-e6c6-4c87-9f97-259c15f36894",
   "metadata": {
    "tags": []
   },
   "source": [
    "# Only streaming final agent output"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "29dd6333-307c-43df-b848-65001c01733b",
   "metadata": {},
   "source": [
    "If you only want the final output of an agent to be streamed, you can use the callback ``FinalStreamingStdOutCallbackHandler``.\n",
    "For this, the underlying LLM has to support streaming as well."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "e4592215-6604-47e2-89ff-5db3af6d1e40",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "from langchain.agents import load_tools\n",
    "from langchain.agents import initialize_agent\n",
    "from langchain.agents import AgentType\n",
    "from langchain.callbacks.streaming_stdout_final_only import FinalStreamingStdOutCallbackHandler\n",
    "from langchain.llms import OpenAI"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "19a813f7",
   "metadata": {},
   "source": [
    "Let's create the underlying LLM with ``streaming = True`` and pass a new instance of ``FinalStreamingStdOutCallbackHandler``."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "7fe81ef4",
   "metadata": {},
   "outputs": [],
   "source": [
    "llm = OpenAI(streaming=True, callbacks=[FinalStreamingStdOutCallbackHandler()], temperature=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "ff45b85d",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      " Konrad Adenauer became Chancellor of Germany in 1949, 74 years ago in 2023."
     ]
    },
    {
     "data": {
      "text/plain": [
       "'Konrad Adenauer became Chancellor of Germany in 1949, 74 years ago in 2023.'"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tools = load_tools([\"wikipedia\", \"llm-math\"], llm=llm)\n",
    "agent = initialize_agent(tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=False)\n",
    "agent.run(\"It's 2023 now. How many years ago did Konrad Adenauer become Chancellor of Germany.\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "53a743b8",
   "metadata": {},
   "source": [
    "### Handling custom answer prefixes"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "23602c62",
   "metadata": {},
   "source": [
    "By default, we assume that the token sequence ``\"\\nFinal\", \" Answer\", \":\"`` indicates that the agent has reached an answers. We can, however, also pass a custom sequence to use as answer prefix."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "5662a638",
   "metadata": {},
   "outputs": [],
   "source": [
    "llm = OpenAI(\n",
    "    streaming=True,\n",
    "    callbacks=[FinalStreamingStdOutCallbackHandler(answer_prefix_tokens=[\"\\nThe\", \" answer\", \":\"])],\n",
    "    temperature=0\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b1a96cc0",
   "metadata": {},
   "source": [
    "Be aware you likely need to include whitespaces and new line characters in your token. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9278b522",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
 }
--- a/langchain/callbacks/streaming_stdout_final_only.py
+++ b/langchain/callbacks/streaming_stdout_final_only.py
@ -0,0 +1,49 @@
 """Callback Handler streams to stdout on new llm token."""
 import sys
 from typing import Any, Dict, List, Optional
 from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
 DEFAULT_ANSWER_PREFIX_TOKENS = ["\nFinal", " Answer", ":"]
 class FinalStreamingStdOutCallbackHandler(StreamingStdOutCallbackHandler):
    """Callback handler for streaming in agents.
    Only works with agents using LLMs that support streaming.
    Only the final output of the agent will be streamed.
    """
    def __init__(self, answer_prefix_tokens: Optional[List[str]] = None) -> None:
        super().__init__()
        if answer_prefix_tokens is None:
            answer_prefix_tokens = DEFAULT_ANSWER_PREFIX_TOKENS
        self.answer_prefix_tokens = answer_prefix_tokens
        self.last_tokens = [""] * len(answer_prefix_tokens)
        self.answer_reached = False
    def on_llm_start(
        self, serialized: Dict[str, Any], prompts: List[str], **kwargs: Any
    ) -> None:
        """Run when LLM starts running."""
        self.answer_reached = False
    def on_llm_new_token(self, token: str, **kwargs: Any) -> None:
        """Run on new LLM token. Only available when streaming is enabled."""
        # Remember the last n tokens, where n = len(answer_prefix_tokens)
        self.last_tokens.append(token)
        if len(self.last_tokens) > len(self.answer_prefix_tokens):
            self.last_tokens.pop(0)
        # Check if the last n tokens match the answer_prefix_tokens list ...
        if self.last_tokens == self.answer_prefix_tokens:
            self.answer_reached = True
            # Do not print the last token in answer_prefix_tokens,
            # as it's not part of the answer yet
            return
        # ... if yes, then print tokens from now on
        if self.answer_reached:
            sys.stdout.write(token)
            sys.stdout.flush()