forked from Archives/langchain
8d07ba0d51
# Fix wrong class instantiation in docs MMR example <!-- Thank you for contributing to LangChain! Your PR will appear in our release under the title you set. Please make sure it highlights your valuable contribution. Replace this with a description of the change, the issue it fixes (if applicable), and relevant context. List any dependencies required for this change. After you're done, someone will review your PR. They may suggest improvements. If no one reviews your PR within a few days, feel free to @-mention the same people again, as notifications can get lost. Finally, we'd love to show appreciation for your contribution - if you'd like us to shout you out on Twitter, please also include your handle! --> When looking at the Maximal Marginal Relevance ExampleSelector example at https://python.langchain.com/en/latest/modules/prompts/example_selectors/examples/mmr.html, I noticed that there seems to be an error. Initially, the `MaxMarginalRelevanceExampleSelector` class is used as an `example_selector` argument to the `FewShotPromptTemplate` class. Then, according to the text, a comparison is made to regular similarity search. However, the `FewShotPromptTemplate` still uses the `MaxMarginalRelevanceExampleSelector` class, so the output is the same. To fix it, I added an instantiation of the `SemanticSimilarityExampleSelector` class, because this seems to be what is intended. ## Who can review? @hwchase17
173 lines
5.3 KiB
Plaintext
173 lines
5.3 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "bc35afd0",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Maximal Marginal Relevance ExampleSelector\n",
|
|
"\n",
|
|
"The MaxMarginalRelevanceExampleSelector selects examples based on a combination of which examples are most similar to the inputs, while also optimizing for diversity. It does this by finding the examples with the embeddings that have the greatest cosine similarity with the inputs, and then iteratively adding them while penalizing them for closeness to already selected examples.\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"id": "ac95c968",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from langchain.prompts.example_selector import MaxMarginalRelevanceExampleSelector, SemanticSimilarityExampleSelector\n",
|
|
"from langchain.vectorstores import FAISS\n",
|
|
"from langchain.embeddings import OpenAIEmbeddings\n",
|
|
"from langchain.prompts import FewShotPromptTemplate, PromptTemplate\n",
|
|
"\n",
|
|
"example_prompt = PromptTemplate(\n",
|
|
" input_variables=[\"input\", \"output\"],\n",
|
|
" template=\"Input: {input}\\nOutput: {output}\",\n",
|
|
")\n",
|
|
"\n",
|
|
"# These are a lot of examples of a pretend task of creating antonyms.\n",
|
|
"examples = [\n",
|
|
" {\"input\": \"happy\", \"output\": \"sad\"},\n",
|
|
" {\"input\": \"tall\", \"output\": \"short\"},\n",
|
|
" {\"input\": \"energetic\", \"output\": \"lethargic\"},\n",
|
|
" {\"input\": \"sunny\", \"output\": \"gloomy\"},\n",
|
|
" {\"input\": \"windy\", \"output\": \"calm\"},\n",
|
|
"]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 2,
|
|
"id": "db579bea",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"example_selector = MaxMarginalRelevanceExampleSelector.from_examples(\n",
|
|
" # This is the list of examples available to select from.\n",
|
|
" examples, \n",
|
|
" # This is the embedding class used to produce embeddings which are used to measure semantic similarity.\n",
|
|
" OpenAIEmbeddings(), \n",
|
|
" # This is the VectorStore class that is used to store the embeddings and do a similarity search over.\n",
|
|
" FAISS, \n",
|
|
" # This is the number of examples to produce.\n",
|
|
" k=2\n",
|
|
")\n",
|
|
"mmr_prompt = FewShotPromptTemplate(\n",
|
|
" # We provide an ExampleSelector instead of examples.\n",
|
|
" example_selector=example_selector,\n",
|
|
" example_prompt=example_prompt,\n",
|
|
" prefix=\"Give the antonym of every input\",\n",
|
|
" suffix=\"Input: {adjective}\\nOutput:\", \n",
|
|
" input_variables=[\"adjective\"],\n",
|
|
")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 3,
|
|
"id": "cd76e344",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Give the antonym of every input\n",
|
|
"\n",
|
|
"Input: happy\n",
|
|
"Output: sad\n",
|
|
"\n",
|
|
"Input: windy\n",
|
|
"Output: calm\n",
|
|
"\n",
|
|
"Input: worried\n",
|
|
"Output:\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"# Input is a feeling, so should select the happy/sad example as the first one\n",
|
|
"print(mmr_prompt.format(adjective=\"worried\"))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 4,
|
|
"id": "cf82956b",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Give the antonym of every input\n",
|
|
"\n",
|
|
"Input: happy\n",
|
|
"Output: sad\n",
|
|
"\n",
|
|
"Input: sunny\n",
|
|
"Output: gloomy\n",
|
|
"\n",
|
|
"Input: worried\n",
|
|
"Output:\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"# Let's compare this to what we would just get if we went solely off of similarity,\n",
|
|
"# by using SemanticSimilarityExampleSelector instead of MaxMarginalRelevanceExampleSelector.\n",
|
|
"example_selector = SemanticSimilarityExampleSelector.from_examples(\n",
|
|
" # This is the list of examples available to select from.\n",
|
|
" examples, \n",
|
|
" # This is the embedding class used to produce embeddings which are used to measure semantic similarity.\n",
|
|
" OpenAIEmbeddings(), \n",
|
|
" # This is the VectorStore class that is used to store the embeddings and do a similarity search over.\n",
|
|
" FAISS, \n",
|
|
" # This is the number of examples to produce.\n",
|
|
" k=2\n",
|
|
")\n",
|
|
"similar_prompt = FewShotPromptTemplate(\n",
|
|
" # We provide an ExampleSelector instead of examples.\n",
|
|
" example_selector=example_selector,\n",
|
|
" example_prompt=example_prompt,\n",
|
|
" prefix=\"Give the antonym of every input\",\n",
|
|
" suffix=\"Input: {adjective}\\nOutput:\", \n",
|
|
" input_variables=[\"adjective\"],\n",
|
|
")\n",
|
|
"print(similar_prompt.format(adjective=\"worried\"))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "39f30097",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3 (ipykernel)",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.9.16"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|