You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
langchain/langchain/prompts/optimized.py

225 lines
8.5 KiB
Python

"""Optimized prompt schema definition."""
import re
from typing import Any, Callable, Dict, List, Optional
from pydantic import BaseModel, Extra, root_validator
from langchain.embeddings.base import Embeddings
from langchain.prompts.base import DEFAULT_FORMATTER_MAPPING
from langchain.prompts.prompt import Prompt
from langchain.vectorstores.base import VectorStore
class OptimizedPrompt(BaseModel):
r"""Schema to represent an optimized prompt for an LLM.
Example:
.. code-block:: python
from langchain import DynamicPrompt
vectorstore = FAISS.from_texts(examples, OpenAIEmbeddings()
optimized_prompt = OptimizedPrompt(
example_separator="\n\n",
prefix="",
suffix="\n\nSay {foo}"
input_variables=["foo"],
max_length=200,
get_text_length=word_count,
vectorstore=vectorstore)
)
"""
vectorstore: VectorStore
"""Vectorstore to use for storing the embeddings."""
example_separator: str = "\n\n"
"""Example separator, e.g. \n\n, for the dynamic prompt creation."""
input_variables: List[str] = []
"""A list of the names of the variables the prompt template expects."""
prefix: str = ""
"""Prefix for the prompt."""
suffix: str = ""
"""Suffix for the prompt."""
template_format: str = "f-string"
"""The format of the prompt template. Options are: 'f-string'."""
get_text_length: Callable[[str], int] = lambda x: len(re.split("\n| ", x))
"""Function to measure prompt length. Defaults to word count."""
max_length: int = 2048
"""Max length for the prompt, beyond which examples are cut."""
class Config:
"""Configuration for this pydantic object."""
arbitrary_types_allowed = True
extra = Extra.forbid
def template(self, example_list: List[str], **kwargs: Any) -> str:
"""Return template given full example list."""
template = self.example_separator.join(
[self.prefix, *example_list, self.suffix]
)
return DEFAULT_FORMATTER_MAPPING[self.template_format](template, **kwargs)
def format(self, k: int = 4, **kwargs: Any) -> str:
"""Optimize the examples in the prompt for the given inputs.
Args:
k: Number of examples to aim for (may be trimmed by optimizer afterwards)
kwargs: Any arguments to be passed to the prompt template.
Returns:
A formatted string.
Example:
.. code-block:: python
prompt.format(variable1="foo")
"""
query = " ".join([v for k, v in kwargs.items()])
example_docs = self.vectorstore.similarity_search(query, k=k)
curr_examples = [str(e.page_content) for e in example_docs]
template = self.template(curr_examples, **kwargs)
while self.get_text_length(template) > self.max_length and curr_examples:
curr_examples = curr_examples[:-1]
template = self.template(curr_examples, **kwargs)
return template
@root_validator()
def template_is_valid(cls, values: Dict) -> Dict:
"""Check that prefix, suffix and input variables are consistent."""
input_variables = values["input_variables"]
if len(input_variables) > 1:
raise ValueError("Only one input variable allowed for optimized prompt;")
prefix = values["prefix"]
suffix = values["suffix"]
template_format = values["template_format"]
if template_format not in DEFAULT_FORMATTER_MAPPING:
valid_formats = list(DEFAULT_FORMATTER_MAPPING)
raise ValueError(
f"Invalid template format. Got `{template_format}`;"
f" should be one of {valid_formats}"
)
try:
result = values["get_text_length"]("foo")
assert isinstance(result, int)
except AssertionError:
raise ValueError(
"Invalid text length callable, must take string & return int;"
)
dummy_inputs = {input_variable: "foo" for input_variable in input_variables}
try:
formatter_func = DEFAULT_FORMATTER_MAPPING[template_format]
formatter_func(prefix + suffix, **dummy_inputs)
except KeyError:
raise ValueError("Invalid prompt schema.")
return values
@classmethod
def from_examples(
cls,
examples: List[str],
suffix: str,
input_variables: List[str],
embeddings: Embeddings,
vectorstore_cls: VectorStore,
example_separator: str = "\n\n",
prefix: str = "",
**vectorstore_cls_kwargs: Any,
) -> "OptimizedPrompt":
"""Create k-shot prompt optimizer using example list and embeddings.
Reshuffles examples for the prompt dynamically based on query similarity.
Args:
examples: List of examples to use in the prompt.
suffix: String to go after the list of examples. Should generally
set up the user's input.
input_variables: A list of variable names the final prompt template
will expect.
embeddings: An iniialized embedding API interface, e.g. OpenAIEmbeddings().
vectorstore_cls: A vector store DB interface class, e.g. FAISS.
example_separator: The seperator to use in between examples. Defaults
to two new line characters.
prefix: String that should go before any examples. Generally includes
examples. Default to an empty string.
vectorstore_cls_kwargs: optional kwargs containing url for vector store
Returns:
The OptimizedPrompt instantiated, backed by a vector store.
"""
dict_examples = [{"text": example} for example in examples]
example_prompt = Prompt(input_variables=["text"], template="{text}")
return cls.from_structured_examples(
dict_examples,
example_prompt,
suffix,
input_variables,
embeddings,
vectorstore_cls=vectorstore_cls,
example_separator=example_separator,
prefix=prefix,
**vectorstore_cls_kwargs,
)
@classmethod
def from_structured_examples(
cls,
examples: List[dict],
example_prompt: Prompt,
suffix: str,
input_variables: List[str],
embeddings: Embeddings,
vectorstore_cls: VectorStore,
example_separator: str = "\n\n",
prefix: str = "",
example_key: Optional[str] = None,
**vectorstore_cls_kwargs: Any,
) -> "OptimizedPrompt":
"""Create k-shot prompt optimizer using example list and embeddings.
Reshuffles examples for the prompt dynamically based on query similarity.
Args:
examples: List of structured examples to use in the prompt.
example_prompt: Prompt used to format the examples.
suffix: String to go after the list of examples. Should generally
set up the user's input.
input_variables: A list of variable names the final prompt template
will expect.
embeddings: An initialized embedding API interface, e.g. OpenAIEmbeddings().
vectorstore_cls: A vector store DB interface class, e.g. FAISS.
example_separator: The seperator to use in between examples. Defaults
to two new line characters.
prefix: String that should go before any examples. Generally includes
examples. Default to an empty string.
example_key: Optional string pointing to the key in the example to
vectorized. If None, will format the example in the example_prompt,
and then vectorize that whole string.
vectorstore_cls_kwargs: optional kwargs containing url for vector store
Returns:
The OptimizedPrompt instantiated, backed by a vector store.
"""
if example_key is None:
string_examples = [example_prompt.format(**example) for example in examples]
else:
string_examples = [example[example_key] for example in examples]
vectorstore = vectorstore_cls.from_texts(
string_examples, embeddings, **vectorstore_cls_kwargs
)
return cls(
suffix=suffix,
input_variables=input_variables,
example_separator=example_separator,
prefix=prefix,
vectorstore=vectorstore,
)