From d3bdb8ea6d88f0088a155c217d6fe22df113b513 Mon Sep 17 00:00:00 2001 From: mbchang Date: Sat, 3 Jun 2023 16:48:48 -0700 Subject: [PATCH] FileCallbackHandler (#5589) # like [StdoutCallbackHandler](https://github.com/hwchase17/langchain/blob/master/langchain/callbacks/stdout.py), but writes to a file When running experiments I have found myself wanting to log the outputs of my chains in a more lightweight way than using WandB tracing. This PR contributes a callback handler that writes to file what `StdoutCallbackHandler` would print. ## Example Notebook See the included `filecallbackhandler.ipynb` notebook for usage. Would it be better to include this notebook under `modules/callbacks` or under `integrations/`? ![image](https://github.com/hwchase17/langchain/assets/6439365/c624de0e-343f-4eab-a55b-8808a887489f) ## Who can review? Community members can review the PR once tests pass. Tag maintainers/contributors who might be interested: @agola11 --- .../callbacks/filecallbackhandler.ipynb | 175 ++++++++++++++++++ langchain/callbacks/__init__.py | 2 + langchain/callbacks/file.py | 75 ++++++++ langchain/input.py | 15 +- 4 files changed, 260 insertions(+), 7 deletions(-) create mode 100644 docs/modules/callbacks/filecallbackhandler.ipynb create mode 100644 langchain/callbacks/file.py diff --git a/docs/modules/callbacks/filecallbackhandler.ipynb b/docs/modules/callbacks/filecallbackhandler.ipynb new file mode 100644 index 00000000..a3f90176 --- /dev/null +++ b/docs/modules/callbacks/filecallbackhandler.ipynb @@ -0,0 +1,175 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "63b87b91", + "metadata": {}, + "source": [ + "# Logging to file\n", + "This example shows how to print logs to file. It shows how to use the `FileCallbackHandler`, which does the same thing as [`StdOutCallbackHandler`](https://python.langchain.com/en/latest/modules/callbacks/getting_started.html#using-an-existing-handler), but instead writes the output to file. It also uses the `loguru` library to log other outputs that are not captured by the handler." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "6cb156cc", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "\u001b[1m> Entering new LLMChain chain...\u001b[0m\n", + "Prompt after formatting:\n", + "\u001b[32;1m\u001b[1;3m1 + 2 = \u001b[0m\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2023-06-01 18:36:38.929\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m20\u001b[0m - \u001b[1m\n", + "\n", + "3\u001b[0m\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\u001b[1m> Finished chain.\u001b[0m\n" + ] + } + ], + "source": [ + "from loguru import logger\n", + "\n", + "from langchain.callbacks import FileCallbackHandler\n", + "from langchain.chains import LLMChain\n", + "from langchain.llms import OpenAI\n", + "from langchain.prompts import PromptTemplate\n", + "\n", + "logfile = 'output.log'\n", + "\n", + "logger.add(logfile, colorize=True, enqueue=True)\n", + "handler = FileCallbackHandler(logfile)\n", + "\n", + "llm = OpenAI()\n", + "prompt = PromptTemplate.from_template(\"1 + {number} = \")\n", + "\n", + "# this chain will both print to stdout (because verbose=True) and write to 'output.log'\n", + "# if verbose=False, the FileCallbackHandler will still write to 'output.log'\n", + "chain = LLMChain(llm=llm, prompt=prompt, callbacks=[handler], verbose=True)\n", + "answer = chain.run(number=2)\n", + "logger.info(answer)" + ] + }, + { + "cell_type": "markdown", + "id": "9c50d54f", + "metadata": {}, + "source": [ + "Now we can open the file `output.log` to see that the output has been captured." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "aa32dc0a", + "metadata": {}, + "outputs": [], + "source": [ + "!pip install ansi2html > /dev/null" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "4af00719", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
\n",
+       "\n",
+       "\n",
+       "> Entering new LLMChain chain...\n",
+       "Prompt after formatting:\n",
+       "1 + 2 = \n",
+       "\n",
+       "> Finished chain.\n",
+       "2023-06-01 18:36:38.929 | INFO     | __main__:<module>:20 - \n",
+       "\n",
+       "3\n",
+       "\n",
+       "
\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from IPython.display import display, HTML\n", + "from ansi2html import Ansi2HTMLConverter\n", + "\n", + "with open('output.log', 'r') as f:\n", + " content = f.read()\n", + "\n", + "conv = Ansi2HTMLConverter()\n", + "html = conv.convert(content, full=True)\n", + "\n", + "display(HTML(html))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.16" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/langchain/callbacks/__init__.py b/langchain/callbacks/__init__.py index 8e1f6ad9..4165a3d0 100644 --- a/langchain/callbacks/__init__.py +++ b/langchain/callbacks/__init__.py @@ -4,6 +4,7 @@ from langchain.callbacks.aim_callback import AimCallbackHandler from langchain.callbacks.argilla_callback import ArgillaCallbackHandler from langchain.callbacks.clearml_callback import ClearMLCallbackHandler from langchain.callbacks.comet_ml_callback import CometCallbackHandler +from langchain.callbacks.file import FileCallbackHandler from langchain.callbacks.human import HumanApprovalCallbackHandler from langchain.callbacks.manager import ( get_openai_callback, @@ -21,6 +22,7 @@ __all__ = [ "ArgillaCallbackHandler", "OpenAICallbackHandler", "StdOutCallbackHandler", + "FileCallbackHandler", "AimCallbackHandler", "WandbCallbackHandler", "MlflowCallbackHandler", diff --git a/langchain/callbacks/file.py b/langchain/callbacks/file.py new file mode 100644 index 00000000..77edb879 --- /dev/null +++ b/langchain/callbacks/file.py @@ -0,0 +1,75 @@ +"""Callback Handler that writes to a file.""" +from typing import Any, Dict, Optional, TextIO, cast + +from langchain.callbacks.base import BaseCallbackHandler +from langchain.input import print_text +from langchain.schema import AgentAction, AgentFinish + + +class FileCallbackHandler(BaseCallbackHandler): + """Callback Handler that writes to a file.""" + + def __init__( + self, filename: str, mode: str = "a", color: Optional[str] = None + ) -> None: + """Initialize callback handler.""" + self.file = cast(TextIO, open(filename, mode)) + self.color = color + + def __del__(self) -> None: + """Destructor to cleanup when done.""" + self.file.close() + + def on_chain_start( + self, serialized: Dict[str, Any], inputs: Dict[str, Any], **kwargs: Any + ) -> None: + """Print out that we are entering a chain.""" + class_name = serialized["name"] + print_text( + f"\n\n\033[1m> Entering new {class_name} chain...\033[0m", + end="\n", + file=self.file, + ) + + def on_chain_end(self, outputs: Dict[str, Any], **kwargs: Any) -> None: + """Print out that we finished a chain.""" + print_text("\n\033[1m> Finished chain.\033[0m", end="\n", file=self.file) + + def on_agent_action( + self, action: AgentAction, color: Optional[str] = None, **kwargs: Any + ) -> Any: + """Run on agent action.""" + print_text(action.log, color=color if color else self.color, file=self.file) + + def on_tool_end( + self, + output: str, + color: Optional[str] = None, + observation_prefix: Optional[str] = None, + llm_prefix: Optional[str] = None, + **kwargs: Any, + ) -> None: + """If not the final action, print out observation.""" + if observation_prefix is not None: + print_text(f"\n{observation_prefix}", file=self.file) + print_text(output, color=color if color else self.color, file=self.file) + if llm_prefix is not None: + print_text(f"\n{llm_prefix}", file=self.file) + + def on_text( + self, + text: str, + color: Optional[str] = None, + end: str = "", + **kwargs: Any, + ) -> None: + """Run when agent ends.""" + print_text(text, color=color if color else self.color, end=end, file=self.file) + + def on_agent_finish( + self, finish: AgentFinish, color: Optional[str] = None, **kwargs: Any + ) -> None: + """Run on agent end.""" + print_text( + finish.log, color=color if self.color else color, end="\n", file=self.file + ) diff --git a/langchain/input.py b/langchain/input.py index a0a8855d..8d5ae6cc 100644 --- a/langchain/input.py +++ b/langchain/input.py @@ -1,5 +1,5 @@ """Handle chained inputs.""" -from typing import Dict, List, Optional +from typing import Dict, List, Optional, TextIO _TEXT_COLOR_MAPPING = { "blue": "36;1", @@ -32,10 +32,11 @@ def get_bolded_text(text: str) -> str: return f"\033[1m{text}\033[0m" -def print_text(text: str, color: Optional[str] = None, end: str = "") -> None: +def print_text( + text: str, color: Optional[str] = None, end: str = "", file: Optional[TextIO] = None +) -> None: """Print text with highlighting and no end characters.""" - if color is None: - text_to_print = text - else: - text_to_print = get_colored_text(text, color) - print(text_to_print, end=end) + text_to_print = get_colored_text(text, color) if color else text + print(text_to_print, end=end, file=file) + if file: + file.flush() # ensure all printed content are written to file