diff --git a/docs/modules/agents/streaming_stdout_final_only.ipynb b/docs/modules/agents/streaming_stdout_final_only.ipynb new file mode 100644 index 00000000..1746c8e1 --- /dev/null +++ b/docs/modules/agents/streaming_stdout_final_only.ipynb @@ -0,0 +1,154 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "23234b50-e6c6-4c87-9f97-259c15f36894", + "metadata": { + "tags": [] + }, + "source": [ + "# Only streaming final agent output" + ] + }, + { + "cell_type": "markdown", + "id": "29dd6333-307c-43df-b848-65001c01733b", + "metadata": {}, + "source": [ + "If you only want the final output of an agent to be streamed, you can use the callback ``FinalStreamingStdOutCallbackHandler``.\n", + "For this, the underlying LLM has to support streaming as well." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "e4592215-6604-47e2-89ff-5db3af6d1e40", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "from langchain.agents import load_tools\n", + "from langchain.agents import initialize_agent\n", + "from langchain.agents import AgentType\n", + "from langchain.callbacks.streaming_stdout_final_only import FinalStreamingStdOutCallbackHandler\n", + "from langchain.llms import OpenAI" + ] + }, + { + "cell_type": "markdown", + "id": "19a813f7", + "metadata": {}, + "source": [ + "Let's create the underlying LLM with ``streaming = True`` and pass a new instance of ``FinalStreamingStdOutCallbackHandler``." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "7fe81ef4", + "metadata": {}, + "outputs": [], + "source": [ + "llm = OpenAI(streaming=True, callbacks=[FinalStreamingStdOutCallbackHandler()], temperature=0)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "ff45b85d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Konrad Adenauer became Chancellor of Germany in 1949, 74 years ago in 2023." + ] + }, + { + "data": { + "text/plain": [ + "'Konrad Adenauer became Chancellor of Germany in 1949, 74 years ago in 2023.'" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tools = load_tools([\"wikipedia\", \"llm-math\"], llm=llm)\n", + "agent = initialize_agent(tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=False)\n", + "agent.run(\"It's 2023 now. How many years ago did Konrad Adenauer become Chancellor of Germany.\")" + ] + }, + { + "cell_type": "markdown", + "id": "53a743b8", + "metadata": {}, + "source": [ + "### Handling custom answer prefixes" + ] + }, + { + "cell_type": "markdown", + "id": "23602c62", + "metadata": {}, + "source": [ + "By default, we assume that the token sequence ``\"\\nFinal\", \" Answer\", \":\"`` indicates that the agent has reached an answers. We can, however, also pass a custom sequence to use as answer prefix." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "5662a638", + "metadata": {}, + "outputs": [], + "source": [ + "llm = OpenAI(\n", + " streaming=True,\n", + " callbacks=[FinalStreamingStdOutCallbackHandler(answer_prefix_tokens=[\"\\nThe\", \" answer\", \":\"])],\n", + " temperature=0\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "b1a96cc0", + "metadata": {}, + "source": [ + "Be aware you likely need to include whitespaces and new line characters in your token. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9278b522", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/langchain/callbacks/streaming_stdout_final_only.py b/langchain/callbacks/streaming_stdout_final_only.py new file mode 100644 index 00000000..af992cfa --- /dev/null +++ b/langchain/callbacks/streaming_stdout_final_only.py @@ -0,0 +1,49 @@ +"""Callback Handler streams to stdout on new llm token.""" +import sys +from typing import Any, Dict, List, Optional + +from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler + +DEFAULT_ANSWER_PREFIX_TOKENS = ["\nFinal", " Answer", ":"] + + +class FinalStreamingStdOutCallbackHandler(StreamingStdOutCallbackHandler): + """Callback handler for streaming in agents. + Only works with agents using LLMs that support streaming. + + Only the final output of the agent will be streamed. + """ + + def __init__(self, answer_prefix_tokens: Optional[List[str]] = None) -> None: + super().__init__() + if answer_prefix_tokens is None: + answer_prefix_tokens = DEFAULT_ANSWER_PREFIX_TOKENS + self.answer_prefix_tokens = answer_prefix_tokens + self.last_tokens = [""] * len(answer_prefix_tokens) + self.answer_reached = False + + def on_llm_start( + self, serialized: Dict[str, Any], prompts: List[str], **kwargs: Any + ) -> None: + """Run when LLM starts running.""" + self.answer_reached = False + + def on_llm_new_token(self, token: str, **kwargs: Any) -> None: + """Run on new LLM token. Only available when streaming is enabled.""" + + # Remember the last n tokens, where n = len(answer_prefix_tokens) + self.last_tokens.append(token) + if len(self.last_tokens) > len(self.answer_prefix_tokens): + self.last_tokens.pop(0) + + # Check if the last n tokens match the answer_prefix_tokens list ... + if self.last_tokens == self.answer_prefix_tokens: + self.answer_reached = True + # Do not print the last token in answer_prefix_tokens, + # as it's not part of the answer yet + return + + # ... if yes, then print tokens from now on + if self.answer_reached: + sys.stdout.write(token) + sys.stdout.flush()