mirror of
https://github.com/hwchase17/langchain
synced 2024-11-10 01:10:59 +00:00
281 lines
8.0 KiB
Plaintext
281 lines
8.0 KiB
Plaintext
|
{
|
||
|
"cells": [
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "4aaeed2f",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"# NGram Overlap ExampleSelector\n",
|
||
|
"\n",
|
||
|
"The NGramOverlapExampleSelector selects and orders examples based on which examples are most similar to the input, according to an ngram overlap score. The ngram overlap score is a float between 0.0 and 1.0, inclusive. \n",
|
||
|
"\n",
|
||
|
"The selector allows for a threshold score to be set. Examples with an ngram overlap score less than or equal to the threshold are excluded. The threshold is set to -1.0, by default, so will not exclude any examples, only reorder them. Setting the threshold to 0.0 will exclude examples that have no ngram overlaps with the input.\n"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 1,
|
||
|
"id": "9cbc0acc",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"from langchain.prompts import PromptTemplate\n",
|
||
|
"from langchain.prompts.example_selector.ngram_overlap import NGramOverlapExampleSelector\n",
|
||
|
"from langchain.prompts import FewShotPromptTemplate, PromptTemplate\n",
|
||
|
"\n",
|
||
|
"example_prompt = PromptTemplate(\n",
|
||
|
" input_variables=[\"input\", \"output\"],\n",
|
||
|
" template=\"Input: {input}\\nOutput: {output}\",\n",
|
||
|
")\n",
|
||
|
"\n",
|
||
|
"# These are a lot of examples of a pretend task of creating antonyms.\n",
|
||
|
"examples = [\n",
|
||
|
" {\"input\": \"happy\", \"output\": \"sad\"},\n",
|
||
|
" {\"input\": \"tall\", \"output\": \"short\"},\n",
|
||
|
" {\"input\": \"energetic\", \"output\": \"lethargic\"},\n",
|
||
|
" {\"input\": \"sunny\", \"output\": \"gloomy\"},\n",
|
||
|
" {\"input\": \"windy\", \"output\": \"calm\"},\n",
|
||
|
"]"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 2,
|
||
|
"id": "4f318f4b",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"# These are examples of a fictional translation task.\n",
|
||
|
"examples = [\n",
|
||
|
" {\"input\": \"See Spot run.\", \"output\": \"Ver correr a Spot.\"},\n",
|
||
|
" {\"input\": \"My dog barks.\", \"output\": \"Mi perro ladra.\"},\n",
|
||
|
" {\"input\": \"Spot can run.\", \"output\": \"Spot puede correr.\"},\n",
|
||
|
"]"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 3,
|
||
|
"id": "bf75e0fe",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"example_prompt = PromptTemplate(\n",
|
||
|
" input_variables=[\"input\", \"output\"],\n",
|
||
|
" template=\"Input: {input}\\nOutput: {output}\",\n",
|
||
|
")\n",
|
||
|
"example_selector = NGramOverlapExampleSelector(\n",
|
||
|
" # These are the examples it has available to choose from.\n",
|
||
|
" examples=examples, \n",
|
||
|
" # This is the PromptTemplate being used to format the examples.\n",
|
||
|
" example_prompt=example_prompt, \n",
|
||
|
" # This is the threshold, at which selector stops.\n",
|
||
|
" # It is set to -1.0 by default.\n",
|
||
|
" threshold=-1.0,\n",
|
||
|
" # For negative threshold:\n",
|
||
|
" # Selector sorts examples by ngram overlap score, and excludes none.\n",
|
||
|
" # For threshold greater than 1.0:\n",
|
||
|
" # Selector excludes all examples, and returns an empty list.\n",
|
||
|
" # For threshold equal to 0.0:\n",
|
||
|
" # Selector sorts examples by ngram overlap score,\n",
|
||
|
" # and excludes those with no ngram overlap with input.\n",
|
||
|
")\n",
|
||
|
"dynamic_prompt = FewShotPromptTemplate(\n",
|
||
|
" # We provide an ExampleSelector instead of examples.\n",
|
||
|
" example_selector=example_selector,\n",
|
||
|
" example_prompt=example_prompt,\n",
|
||
|
" prefix=\"Give the Spanish translation of every input\",\n",
|
||
|
" suffix=\"Input: {sentence}\\nOutput:\", \n",
|
||
|
" input_variables=[\"sentence\"],\n",
|
||
|
")"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 4,
|
||
|
"id": "83fb218a",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"Give the Spanish translation of every input\n",
|
||
|
"\n",
|
||
|
"Input: Spot can run.\n",
|
||
|
"Output: Spot puede correr.\n",
|
||
|
"\n",
|
||
|
"Input: See Spot run.\n",
|
||
|
"Output: Ver correr a Spot.\n",
|
||
|
"\n",
|
||
|
"Input: My dog barks.\n",
|
||
|
"Output: Mi perro ladra.\n",
|
||
|
"\n",
|
||
|
"Input: Spot can run fast.\n",
|
||
|
"Output:\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"# An example input with large ngram overlap with \"Spot can run.\"\n",
|
||
|
"# and no overlap with \"My dog barks.\"\n",
|
||
|
"print(dynamic_prompt.format(sentence=\"Spot can run fast.\"))"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 5,
|
||
|
"id": "485f5307",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"Give the Spanish translation of every input\n",
|
||
|
"\n",
|
||
|
"Input: Spot can run.\n",
|
||
|
"Output: Spot puede correr.\n",
|
||
|
"\n",
|
||
|
"Input: See Spot run.\n",
|
||
|
"Output: Ver correr a Spot.\n",
|
||
|
"\n",
|
||
|
"Input: Spot plays fetch.\n",
|
||
|
"Output: Spot juega a buscar.\n",
|
||
|
"\n",
|
||
|
"Input: My dog barks.\n",
|
||
|
"Output: Mi perro ladra.\n",
|
||
|
"\n",
|
||
|
"Input: Spot can run fast.\n",
|
||
|
"Output:\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"# You can add examples to NGramOverlapExampleSelector as well.\n",
|
||
|
"new_example = {\"input\": \"Spot plays fetch.\", \"output\": \"Spot juega a buscar.\"}\n",
|
||
|
"\n",
|
||
|
"example_selector.add_example(new_example)\n",
|
||
|
"print(dynamic_prompt.format(sentence=\"Spot can run fast.\"))"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 6,
|
||
|
"id": "606ce697",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"Give the Spanish translation of every input\n",
|
||
|
"\n",
|
||
|
"Input: Spot can run.\n",
|
||
|
"Output: Spot puede correr.\n",
|
||
|
"\n",
|
||
|
"Input: See Spot run.\n",
|
||
|
"Output: Ver correr a Spot.\n",
|
||
|
"\n",
|
||
|
"Input: Spot plays fetch.\n",
|
||
|
"Output: Spot juega a buscar.\n",
|
||
|
"\n",
|
||
|
"Input: Spot can run fast.\n",
|
||
|
"Output:\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"# You can set a threshold at which examples are excluded.\n",
|
||
|
"# For example, setting threshold equal to 0.0\n",
|
||
|
"# excludes examples with no ngram overlaps with input.\n",
|
||
|
"# Since \"My dog barks.\" has no ngram overlaps with \"Spot can run fast.\"\n",
|
||
|
"# it is excluded.\n",
|
||
|
"example_selector.threshold=0.0\n",
|
||
|
"print(dynamic_prompt.format(sentence=\"Spot can run fast.\"))"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 7,
|
||
|
"id": "7f8d72f7",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"Give the Spanish translation of every input\n",
|
||
|
"\n",
|
||
|
"Input: Spot can run.\n",
|
||
|
"Output: Spot puede correr.\n",
|
||
|
"\n",
|
||
|
"Input: Spot plays fetch.\n",
|
||
|
"Output: Spot juega a buscar.\n",
|
||
|
"\n",
|
||
|
"Input: Spot can play fetch.\n",
|
||
|
"Output:\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"# Setting small nonzero threshold\n",
|
||
|
"example_selector.threshold=0.09\n",
|
||
|
"print(dynamic_prompt.format(sentence=\"Spot can play fetch.\"))"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 8,
|
||
|
"id": "09633aa8",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"Give the Spanish translation of every input\n",
|
||
|
"\n",
|
||
|
"Input: Spot can play fetch.\n",
|
||
|
"Output:\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"# Setting threshold greater than 1.0\n",
|
||
|
"example_selector.threshold=1.0+1e-9\n",
|
||
|
"print(dynamic_prompt.format(sentence=\"Spot can play fetch.\"))"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": null,
|
||
|
"id": "39f30097",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": []
|
||
|
}
|
||
|
],
|
||
|
"metadata": {
|
||
|
"kernelspec": {
|
||
|
"display_name": "Python 3 (ipykernel)",
|
||
|
"language": "python",
|
||
|
"name": "python3"
|
||
|
},
|
||
|
"language_info": {
|
||
|
"codemirror_mode": {
|
||
|
"name": "ipython",
|
||
|
"version": 3
|
||
|
},
|
||
|
"file_extension": ".py",
|
||
|
"mimetype": "text/x-python",
|
||
|
"name": "python",
|
||
|
"nbconvert_exporter": "python",
|
||
|
"pygments_lexer": "ipython3",
|
||
|
"version": "3.9.1"
|
||
|
}
|
||
|
},
|
||
|
"nbformat": 4,
|
||
|
"nbformat_minor": 5
|
||
|
}
|