From 2719e49718c26218c30afe707e04b10e35be6598 Mon Sep 17 00:00:00 2001 From: Nuno Campos Date: Sun, 22 Oct 2023 19:02:17 +0100 Subject: [PATCH] Add how-to guide on runnable generators (#12135) --- .../how_to/generators.ipynb | 119 ++++++++++++++++++ 1 file changed, 119 insertions(+) create mode 100644 docs/docs/expression_language/how_to/generators.ipynb diff --git a/docs/docs/expression_language/how_to/generators.ipynb b/docs/docs/expression_language/how_to/generators.ipynb new file mode 100644 index 0000000000..6dd76709ac --- /dev/null +++ b/docs/docs/expression_language/how_to/generators.ipynb @@ -0,0 +1,119 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Custom generator functions\n", + "\n", + "You can use generator functions (ie. functions that use the `yield` keyword, and behave like iterators) in a LCEL pipeline.\n", + "\n", + "The signature of these generators should be `Iterator[Input] -> Iterator[Output]`. Or for async generators: `AsyncIterator[Input] -> AsyncIterator[Output]`.\n", + "\n", + "These are useful for:\n", + "- implementing a custom output parser\n", + "- modifying the output of a previous step, while preserving streaming capabilities\n", + "\n", + "Let's implement a custom output parser for comma-separated lists." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "lion, tiger, wolf, gorilla, panda\n" + ] + } + ], + "source": [ + "from typing import Iterator, List\n", + "\n", + "from langchain.chat_models import ChatOpenAI\n", + "from langchain.prompts.chat import ChatPromptTemplate\n", + "from langchain.schema.output_parser import StrOutputParser\n", + "\n", + "\n", + "prompt = ChatPromptTemplate.from_template(\n", + " \"Write a comma-separated list of 5 animals similar to: {animal}\"\n", + ")\n", + "model = ChatOpenAI(temperature=0.0)\n", + "\n", + "\n", + "str_chain = prompt | model | StrOutputParser()\n", + "\n", + "print(str_chain.invoke({\"animal\": \"bear\"}))\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "# This is a custom parser that splits an iterator of llm tokens\n", + "# into a list of strings separated by commas\n", + "def split_into_list(input: Iterator[str]) -> Iterator[List[str]]:\n", + " # hold partial input until we get a comma\n", + " buffer = \"\"\n", + " for chunk in input:\n", + " # add current chunk to buffer\n", + " buffer += chunk\n", + " # while there are commas in the buffer\n", + " while \",\" in buffer:\n", + " # split buffer on comma\n", + " comma_index = buffer.index(\",\")\n", + " # yield everything before the comma\n", + " yield [buffer[:comma_index].strip()]\n", + " # save the rest for the next iteration\n", + " buffer = buffer[comma_index + 1 :]\n", + " # yield the last chunk\n", + " yield [buffer.strip()]\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['lion', 'tiger', 'wolf', 'gorilla', 'panda']\n" + ] + } + ], + "source": [ + "list_chain = str_chain | split_into_list\n", + "\n", + "print(list_chain.invoke({\"animal\": \"bear\"}))\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}