From db2e9c2b0d1192e589b5f74a17e30b516fe70637 Mon Sep 17 00:00:00 2001 From: Harrison Chase Date: Tue, 28 Feb 2023 08:40:35 -0800 Subject: [PATCH] partial variables (#1308) --- docs/modules/prompts/examples/partial.ipynb | 184 +++++++++++++++++++ docs/modules/prompts/how_to_guides.rst | 2 + langchain/prompts/base.py | 41 ++++- langchain/prompts/few_shot.py | 4 +- langchain/prompts/few_shot_with_templates.py | 23 ++- langchain/prompts/prompt.py | 4 +- tests/unit_tests/prompts/test_few_shot.py | 89 +++++++++ tests/unit_tests/prompts/test_prompt.py | 37 ++++ 8 files changed, 370 insertions(+), 14 deletions(-) create mode 100644 docs/modules/prompts/examples/partial.ipynb diff --git a/docs/modules/prompts/examples/partial.ipynb b/docs/modules/prompts/examples/partial.ipynb new file mode 100644 index 00000000..f7a15b35 --- /dev/null +++ b/docs/modules/prompts/examples/partial.ipynb @@ -0,0 +1,184 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "9355a547", + "metadata": {}, + "source": [ + "# Partial Prompt Templates\n", + "\n", + "A prompt template is a class with a `.format` method which takes in a key-value map and returns a string (a prompt) to pass to the language model. Like other methods, it can make sense to \"partial\" a prompt template - eg pass in a subset of the required values, as to create a new prompt template which expects only the remaining subset of values.\n", + "\n", + "LangChain supports this in two ways: we allow for partially formatted prompts (1) with string values, (2) with functions that return string values. These two different ways support different use cases. In the documentation below we go over the motivations for both use cases as well as how to do it in LangChain.\n", + "\n", + "## Partial With Strings\n", + "\n", + "One common use case for wanting to partial a prompt template is if you get some of the variables before others. For example, suppose you have a prompt template that requires two variables, `foo` and `baz`. If you get the `foo` value early on in the chain, but the `baz` value later, it can be annoying to wait until you have both variables in the same place to pass them to the prompt template. Instead, you can partial the prompt template with the `foo` value, and then pass the partialed prompt template along and just use that. Below is an example of doing this:" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "643af5da", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.prompts import PromptTemplate" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "4080d8d7", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "foobaz\n" + ] + } + ], + "source": [ + "prompt = PromptTemplate(template=\"{foo}{bar}\", input_variables=[\"foo\", \"bar\"])\n", + "partial_prompt = prompt.partial(foo=\"foo\");\n", + "print(partial_prompt.format(bar=\"baz\"))" + ] + }, + { + "cell_type": "markdown", + "id": "9986766e", + "metadata": {}, + "source": [ + "You can also just initialize the prompt with the partialed variables." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "e2ce95b3", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "foobaz\n" + ] + } + ], + "source": [ + "prompt = PromptTemplate(template=\"{foo}{bar}\", input_variables=[\"bar\"], partial_variables={\"foo\": \"foo\"})\n", + "print(prompt.format(bar=\"baz\"))" + ] + }, + { + "cell_type": "markdown", + "id": "a9c66f83", + "metadata": {}, + "source": [ + "## Partial With Functions\n", + "\n", + "The other common use is to partial with a function. The use case for this is when you have a variable you know that you always want to fetch in a common way. A prime example of this is with date or time. Imagine you have a prompt which you always want to have the current date. You can't hard code it in the prompt, and passing it along with the other input variables is a bit annoying. In this case, it's very handy to be able to partial the prompt with a function that always returns the current date." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "d0712d8a", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import datetime\n", + "\n", + "def _get_datetime():\n", + " now = datetime.now()\n", + " return now.strftime(\"%m/%d/%Y, %H:%M:%S\")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "4cbcb666", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Tell me a funny joke about the day 02/27/2023, 22:15:16\n" + ] + } + ], + "source": [ + "prompt = PromptTemplate(\n", + " template=\"Tell me a {adjective} joke about the day {date}\", \n", + " input_variables=[\"adjective\", \"date\"]\n", + ");\n", + "partial_prompt = prompt.partial(date=_get_datetime)\n", + "print(partial_prompt.format(adjective=\"funny\"))" + ] + }, + { + "cell_type": "markdown", + "id": "ffed6811", + "metadata": {}, + "source": [ + "You can also just initialize the prompt with the partialed variables, which often makes more sense in this workflow." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "96285b25", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Tell me a funny joke about the day 02/27/2023, 22:15:16\n" + ] + } + ], + "source": [ + "prompt = PromptTemplate(\n", + " template=\"Tell me a {adjective} joke about the day {date}\", \n", + " input_variables=[\"adjective\"],\n", + " partial_variables={\"date\": _get_datetime}\n", + ");\n", + "print(prompt.format(adjective=\"funny\"))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4bff16f7", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.1" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/modules/prompts/how_to_guides.rst b/docs/modules/prompts/how_to_guides.rst index 6b50c3b9..762c02d2 100644 --- a/docs/modules/prompts/how_to_guides.rst +++ b/docs/modules/prompts/how_to_guides.rst @@ -17,6 +17,8 @@ The user guide here shows more advanced workflows and how to use the library in `Few Shot Prompt Examples <./examples/few_shot_examples.html>`_: Examples of Few Shot Prompt Templates. +`Partial Prompt Template <./examples/partial.html>`_: How to partial Prompt Templates. + .. toctree:: diff --git a/langchain/prompts/base.py b/langchain/prompts/base.py index bb93ca9b..a22e2ef6 100644 --- a/langchain/prompts/base.py +++ b/langchain/prompts/base.py @@ -1,12 +1,14 @@ """BasePrompt schema definition.""" +from __future__ import annotations + import json import re from abc import ABC, abstractmethod from pathlib import Path -from typing import Any, Callable, Dict, List, Optional, Union +from typing import Any, Callable, Dict, List, Mapping, Optional, Union import yaml -from pydantic import BaseModel, Extra, root_validator +from pydantic import BaseModel, Extra, Field, root_validator from langchain.formatting import formatter @@ -117,6 +119,9 @@ class BasePromptTemplate(BaseModel, ABC): """A list of the names of the variables the prompt template expects.""" output_parser: Optional[BaseOutputParser] = None """How to parse the output of calling an LLM on this formatted prompt.""" + partial_variables: Mapping[str, Union[str, Callable[[], str]]] = Field( + default_factory=dict + ) class Config: """Configuration for this pydantic object.""" @@ -132,8 +137,38 @@ class BasePromptTemplate(BaseModel, ABC): "Cannot have an input variable named 'stop', as it is used internally," " please rename." ) + if "stop" in values["partial_variables"]: + raise ValueError( + "Cannot have an partial variable named 'stop', as it is used " + "internally, please rename." + ) + + overall = set(values["input_variables"]).intersection( + values["partial_variables"] + ) + if overall: + raise ValueError( + f"Found overlapping input and partial variables: {overall}" + ) return values + def partial(self, **kwargs: Union[str, Callable[[], str]]) -> BasePromptTemplate: + """Return a partial of the prompt template.""" + prompt_dict = self.__dict__.copy() + prompt_dict["input_variables"] = list( + set(self.input_variables).difference(kwargs) + ) + prompt_dict["partial_variables"] = {**self.partial_variables, **kwargs} + return type(self)(**prompt_dict) + + def _merge_partial_and_user_variables(self, **kwargs: Any) -> Dict[str, Any]: + # Get partial params: + partial_kwargs = { + k: v if isinstance(v, str) else v() + for k, v in self.partial_variables.items() + } + return {**partial_kwargs, **kwargs} + @abstractmethod def format(self, **kwargs: Any) -> str: """Format the prompt with the inputs. @@ -173,6 +208,8 @@ class BasePromptTemplate(BaseModel, ABC): prompt.save(file_path="path/prompt.yaml") """ + if self.partial_variables: + raise ValueError("Cannot save prompt with partial variables.") # Convert file to Path object. if isinstance(file_path, str): save_path = Path(file_path) diff --git a/langchain/prompts/few_shot.py b/langchain/prompts/few_shot.py index 5a7b0c54..184d2f88 100644 --- a/langchain/prompts/few_shot.py +++ b/langchain/prompts/few_shot.py @@ -68,7 +68,7 @@ class FewShotPromptTemplate(BasePromptTemplate, BaseModel): check_valid_template( values["prefix"] + values["suffix"], values["template_format"], - values["input_variables"], + values["input_variables"] + list(values["partial_variables"]), ) return values @@ -101,6 +101,7 @@ class FewShotPromptTemplate(BasePromptTemplate, BaseModel): prompt.format(variable1="foo") """ + kwargs = self._merge_partial_and_user_variables(**kwargs) # Get the examples to use. examples = self._get_examples(**kwargs) # Format the examples. @@ -110,6 +111,7 @@ class FewShotPromptTemplate(BasePromptTemplate, BaseModel): # Create the overall template. pieces = [self.prefix, *example_strings, self.suffix] template = self.example_separator.join([piece for piece in pieces if piece]) + # Format the template with the input variables. return DEFAULT_FORMATTER_MAPPING[self.template_format](template, **kwargs) diff --git a/langchain/prompts/few_shot_with_templates.py b/langchain/prompts/few_shot_with_templates.py index cba4f6d0..56a1d874 100644 --- a/langchain/prompts/few_shot_with_templates.py +++ b/langchain/prompts/few_shot_with_templates.py @@ -60,16 +60,18 @@ class FewShotPromptWithTemplates(BasePromptTemplate, BaseModel): @root_validator() def template_is_valid(cls, values: Dict) -> Dict: """Check that prefix, suffix and input variables are consistent.""" - input_variables = values["input_variables"] - expected_input_variables = set(values["suffix"].input_variables) - if values["prefix"] is not None: - expected_input_variables |= set(values["prefix"].input_variables) - missing_vars = expected_input_variables.difference(input_variables) - if missing_vars: - raise ValueError( - f"Got input_variables={input_variables}, but based on prefix/suffix " - f"expected {expected_input_variables}" - ) + if values["validate_template"]: + input_variables = values["input_variables"] + expected_input_variables = set(values["suffix"].input_variables) + expected_input_variables |= set(values["partial_variables"]) + if values["prefix"] is not None: + expected_input_variables |= set(values["prefix"].input_variables) + missing_vars = expected_input_variables.difference(input_variables) + if missing_vars: + raise ValueError( + f"Got input_variables={input_variables}, but based on " + f"prefix/suffix expected {expected_input_variables}" + ) return values class Config: @@ -101,6 +103,7 @@ class FewShotPromptWithTemplates(BasePromptTemplate, BaseModel): prompt.format(variable1="foo") """ + kwargs = self._merge_partial_and_user_variables(**kwargs) # Get the examples to use. examples = self._get_examples(**kwargs) # Format the examples. diff --git a/langchain/prompts/prompt.py b/langchain/prompts/prompt.py index 0b2dcfdd..777c1cbe 100644 --- a/langchain/prompts/prompt.py +++ b/langchain/prompts/prompt.py @@ -60,14 +60,16 @@ class PromptTemplate(BasePromptTemplate, BaseModel): prompt.format(variable1="foo") """ + kwargs = self._merge_partial_and_user_variables(**kwargs) return DEFAULT_FORMATTER_MAPPING[self.template_format](self.template, **kwargs) @root_validator() def template_is_valid(cls, values: Dict) -> Dict: """Check that template and input variables are consistent.""" if values["validate_template"]: + all_inputs = values["input_variables"] + list(values["partial_variables"]) check_valid_template( - values["template"], values["template_format"], values["input_variables"] + values["template"], values["template_format"], all_inputs ) return values diff --git a/tests/unit_tests/prompts/test_few_shot.py b/tests/unit_tests/prompts/test_few_shot.py index 59a0ac7a..22ba8e51 100644 --- a/tests/unit_tests/prompts/test_few_shot.py +++ b/tests/unit_tests/prompts/test_few_shot.py @@ -85,3 +85,92 @@ def test_few_shot_functionality() -> None: "Now you try to talk about party." ) assert output == expected_output + + +def test_partial_init_string() -> None: + """Test prompt can be initialized with partial variables.""" + prefix = "This is a test about {content}." + suffix = "Now you try to talk about {new_content}." + examples = [ + {"question": "foo", "answer": "bar"}, + {"question": "baz", "answer": "foo"}, + ] + prompt = FewShotPromptTemplate( + suffix=suffix, + prefix=prefix, + input_variables=["new_content"], + partial_variables={"content": "animals"}, + examples=examples, + example_prompt=EXAMPLE_PROMPT, + example_separator="\n", + ) + output = prompt.format(new_content="party") + expected_output = ( + "This is a test about animals.\n" + "foo: bar\n" + "baz: foo\n" + "Now you try to talk about party." + ) + assert output == expected_output + + +def test_partial_init_func() -> None: + """Test prompt can be initialized with partial variables.""" + prefix = "This is a test about {content}." + suffix = "Now you try to talk about {new_content}." + examples = [ + {"question": "foo", "answer": "bar"}, + {"question": "baz", "answer": "foo"}, + ] + prompt = FewShotPromptTemplate( + suffix=suffix, + prefix=prefix, + input_variables=["new_content"], + partial_variables={"content": lambda: "animals"}, + examples=examples, + example_prompt=EXAMPLE_PROMPT, + example_separator="\n", + ) + output = prompt.format(new_content="party") + expected_output = ( + "This is a test about animals.\n" + "foo: bar\n" + "baz: foo\n" + "Now you try to talk about party." + ) + assert output == expected_output + + +def test_partial() -> None: + """Test prompt can be partialed.""" + prefix = "This is a test about {content}." + suffix = "Now you try to talk about {new_content}." + examples = [ + {"question": "foo", "answer": "bar"}, + {"question": "baz", "answer": "foo"}, + ] + prompt = FewShotPromptTemplate( + suffix=suffix, + prefix=prefix, + input_variables=["content", "new_content"], + examples=examples, + example_prompt=EXAMPLE_PROMPT, + example_separator="\n", + ) + new_prompt = prompt.partial(content="foo") + new_output = new_prompt.format(new_content="party") + expected_output = ( + "This is a test about foo.\n" + "foo: bar\n" + "baz: foo\n" + "Now you try to talk about party." + ) + assert new_output == expected_output + output = prompt.format(new_content="party", content="bar") + expected_output = ( + "This is a test about bar.\n" + "foo: bar\n" + "baz: foo\n" + "Now you try to talk about party." + ) + assert output == expected_output diff --git a/tests/unit_tests/prompts/test_prompt.py b/tests/unit_tests/prompts/test_prompt.py index 1789963e..d7f38874 100644 --- a/tests/unit_tests/prompts/test_prompt.py +++ b/tests/unit_tests/prompts/test_prompt.py @@ -108,3 +108,40 @@ def test_prompt_from_file() -> None: input_variables = ["question"] prompt = PromptTemplate.from_file(template_file, input_variables) assert prompt.template == "Question: {question}\nAnswer:" + + +def test_partial_init_string() -> None: + """Test prompt can be initialized with partial variables.""" + template = "This is a {foo} test." + prompt = PromptTemplate( + input_variables=[], template=template, partial_variables={"foo": 1} + ) + assert prompt.template == template + assert prompt.input_variables == [] + result = prompt.format() + assert result == "This is a 1 test." + + +def test_partial_init_func() -> None: + """Test prompt can be initialized with partial variables.""" + template = "This is a {foo} test." + prompt = PromptTemplate( + input_variables=[], template=template, partial_variables={"foo": lambda: 2} + ) + assert prompt.template == template + assert prompt.input_variables == [] + result = prompt.format() + assert result == "This is a 2 test." + + +def test_partial() -> None: + """Test prompt can be partialed.""" + template = "This is a {foo} test." + prompt = PromptTemplate(input_variables=["foo"], template=template) + assert prompt.template == template + assert prompt.input_variables == ["foo"] + new_prompt = prompt.partial(foo="3") + new_result = new_prompt.format() + assert new_result == "This is a 3 test." + result = prompt.format(foo="foo") + assert result == "This is a foo test."