From 279605b4d33c22cae014bfa8dde41980f4ae4e3a Mon Sep 17 00:00:00 2001 From: Harrison Chase Date: Sat, 13 May 2023 21:45:05 -0700 Subject: [PATCH] Harrison/metaphor search (#4657) Co-authored-by: Jeffrey Wang --- .../tools/examples/metaphor_search.ipynb | 246 ++++++++++++++++++ langchain/agents/load_tools.py | 7 + langchain/tools/__init__.py | 2 + langchain/tools/metaphor_search/__init__.py | 5 + langchain/tools/metaphor_search/tool.py | 46 ++++ langchain/utilities/__init__.py | 2 + langchain/utilities/metaphor_search.py | 105 ++++++++ tests/unit_tests/tools/test_public_api.py | 1 + 8 files changed, 414 insertions(+) create mode 100644 docs/modules/agents/tools/examples/metaphor_search.ipynb create mode 100644 langchain/tools/metaphor_search/__init__.py create mode 100644 langchain/tools/metaphor_search/tool.py create mode 100644 langchain/utilities/metaphor_search.py diff --git a/docs/modules/agents/tools/examples/metaphor_search.ipynb b/docs/modules/agents/tools/examples/metaphor_search.ipynb new file mode 100644 index 00000000..e3f76de8 --- /dev/null +++ b/docs/modules/agents/tools/examples/metaphor_search.ipynb @@ -0,0 +1,246 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Metaphor Search" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This notebook goes over how to use Metaphor search.\n", + "\n", + "First, you need to set up the proper API keys and environment variables. Request an API key [here](Sign up for early access here).\n", + "\n", + "Then enter your API key as an environment variable." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "os.environ[\"METAPHOR_API_KEY\"] = \"\"" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.utilities import MetaphorSearchAPIWrapper" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "search = MetaphorSearchAPIWrapper()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Call the API\n", + "`results` takes in a Metaphor-optimized search query and a number of results (up to 500). It returns a list of results with title, url, author, and creation date." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'results': [{'url': 'https://www.anthropic.com/index/core-views-on-ai-safety', 'title': 'Core Views on AI Safety: When, Why, What, and How', 'dateCreated': '2023-03-08', 'author': None, 'score': 0.1998831331729889}, {'url': 'https://aisafety.wordpress.com/', 'title': 'Extinction Risk from Artificial Intelligence', 'dateCreated': '2013-10-08', 'author': None, 'score': 0.19801370799541473}, {'url': 'https://www.lesswrong.com/posts/WhNxG4r774bK32GcH/the-simple-picture-on-ai-safety', 'title': 'The simple picture on AI safety - LessWrong', 'dateCreated': '2018-05-27', 'author': 'Alex Flint', 'score': 0.19735534489154816}, {'url': 'https://slatestarcodex.com/2015/05/29/no-time-like-the-present-for-ai-safety-work/', 'title': 'No Time Like The Present For AI Safety Work', 'dateCreated': '2015-05-29', 'author': None, 'score': 0.19408763945102692}, {'url': 'https://www.lesswrong.com/posts/5BJvusxdwNXYQ4L9L/so-you-want-to-save-the-world', 'title': 'So You Want to Save the World - LessWrong', 'dateCreated': '2012-01-01', 'author': 'Lukeprog', 'score': 0.18853715062141418}, {'url': 'https://openai.com/blog/planning-for-agi-and-beyond', 'title': 'Planning for AGI and beyond', 'dateCreated': '2023-02-24', 'author': 'Authors', 'score': 0.18665121495723724}, {'url': 'https://waitbutwhy.com/2015/01/artificial-intelligence-revolution-1.html', 'title': 'The Artificial Intelligence Revolution: Part 1 - Wait But Why', 'dateCreated': '2015-01-22', 'author': 'Tim Urban', 'score': 0.18604731559753418}, {'url': 'https://forum.effectivealtruism.org/posts/uGDCaPFaPkuxAowmH/anthropic-core-views-on-ai-safety-when-why-what-and-how', 'title': 'Anthropic: Core Views on AI Safety: When, Why, What, and How - EA Forum', 'dateCreated': '2023-03-09', 'author': 'Jonmenaster', 'score': 0.18415069580078125}, {'url': 'https://www.lesswrong.com/posts/xBrpph9knzWdtMWeQ/the-proof-of-doom', 'title': 'The Proof of Doom - LessWrong', 'dateCreated': '2022-03-09', 'author': 'Johnlawrenceaspden', 'score': 0.18159329891204834}, {'url': 'https://intelligence.org/why-ai-safety/', 'title': 'Why AI Safety? - Machine Intelligence Research Institute', 'dateCreated': '2017-03-01', 'author': None, 'score': 0.1814115345478058}]}\n" + ] + }, + { + "data": { + "text/plain": [ + "[{'title': 'Core Views on AI Safety: When, Why, What, and How',\n", + " 'url': 'https://www.anthropic.com/index/core-views-on-ai-safety',\n", + " 'author': None,\n", + " 'date_created': '2023-03-08'},\n", + " {'title': 'Extinction Risk from Artificial Intelligence',\n", + " 'url': 'https://aisafety.wordpress.com/',\n", + " 'author': None,\n", + " 'date_created': '2013-10-08'},\n", + " {'title': 'The simple picture on AI safety - LessWrong',\n", + " 'url': 'https://www.lesswrong.com/posts/WhNxG4r774bK32GcH/the-simple-picture-on-ai-safety',\n", + " 'author': 'Alex Flint',\n", + " 'date_created': '2018-05-27'},\n", + " {'title': 'No Time Like The Present For AI Safety Work',\n", + " 'url': 'https://slatestarcodex.com/2015/05/29/no-time-like-the-present-for-ai-safety-work/',\n", + " 'author': None,\n", + " 'date_created': '2015-05-29'},\n", + " {'title': 'So You Want to Save the World - LessWrong',\n", + " 'url': 'https://www.lesswrong.com/posts/5BJvusxdwNXYQ4L9L/so-you-want-to-save-the-world',\n", + " 'author': 'Lukeprog',\n", + " 'date_created': '2012-01-01'},\n", + " {'title': 'Planning for AGI and beyond',\n", + " 'url': 'https://openai.com/blog/planning-for-agi-and-beyond',\n", + " 'author': 'Authors',\n", + " 'date_created': '2023-02-24'},\n", + " {'title': 'The Artificial Intelligence Revolution: Part 1 - Wait But Why',\n", + " 'url': 'https://waitbutwhy.com/2015/01/artificial-intelligence-revolution-1.html',\n", + " 'author': 'Tim Urban',\n", + " 'date_created': '2015-01-22'},\n", + " {'title': 'Anthropic: Core Views on AI Safety: When, Why, What, and How - EA Forum',\n", + " 'url': 'https://forum.effectivealtruism.org/posts/uGDCaPFaPkuxAowmH/anthropic-core-views-on-ai-safety-when-why-what-and-how',\n", + " 'author': 'Jonmenaster',\n", + " 'date_created': '2023-03-09'},\n", + " {'title': 'The Proof of Doom - LessWrong',\n", + " 'url': 'https://www.lesswrong.com/posts/xBrpph9knzWdtMWeQ/the-proof-of-doom',\n", + " 'author': 'Johnlawrenceaspden',\n", + " 'date_created': '2022-03-09'},\n", + " {'title': 'Why AI Safety? - Machine Intelligence Research Institute',\n", + " 'url': 'https://intelligence.org/why-ai-safety/',\n", + " 'author': None,\n", + " 'date_created': '2017-03-01'}]" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "search.results(\"The best blog post about AI safety is definitely this: \", 10)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Use Metaphor as a tool\n", + "Metaphor can be used as a tool that gets URLs that other tools such as browsing tools." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.agents.agent_toolkits import PlayWrightBrowserToolkit\n", + "from langchain.tools.playwright.utils import (\n", + " create_async_playwright_browser,# A synchronous browser is available, though it isn't compatible with jupyter.\n", + ")\n", + "\n", + "async_browser = create_async_playwright_browser()\n", + "toolkit = PlayWrightBrowserToolkit.from_browser(async_browser=async_browser)\n", + "tools = toolkit.get_tools()\n", + "\n", + "tools_by_name = {tool.name: tool for tool in tools}\n", + "print(tools_by_name.keys())\n", + "navigate_tool = tools_by_name[\"navigate_browser\"]\n", + "extract_text = tools_by_name[\"extract_text\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n", + "\u001b[32;1m\u001b[1;3mThought: I need to find a tweet about AI safety using Metaphor Search.\n", + "Action:\n", + "```\n", + "{\n", + " \"action\": \"Metaphor Search Results JSON\",\n", + " \"action_input\": {\n", + " \"query\": \"interesting tweet AI safety\",\n", + " \"num_results\": 1\n", + " }\n", + "}\n", + "```\n", + "\u001b[0m{'results': [{'url': 'https://safe.ai/', 'title': 'Center for AI Safety', 'dateCreated': '2022-01-01', 'author': None, 'score': 0.18083244562149048}]}\n", + "\n", + "Observation: \u001b[36;1m\u001b[1;3m[{'title': 'Center for AI Safety', 'url': 'https://safe.ai/', 'author': None, 'date_created': '2022-01-01'}]\u001b[0m\n", + "Thought:\u001b[32;1m\u001b[1;3mI need to navigate to the URL provided in the search results to find the tweet.\u001b[0m\n", + "\n", + "\u001b[1m> Finished chain.\u001b[0m\n" + ] + }, + { + "data": { + "text/plain": [ + "'I need to navigate to the URL provided in the search results to find the tweet.'" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from langchain.agents import initialize_agent, AgentType\n", + "from langchain.chat_models import ChatOpenAI\n", + "from langchain.tools import MetaphorSearchResults\n", + "\n", + "llm = ChatOpenAI(model_name=\"gpt-4\", temperature=0.7)\n", + "\n", + "metaphor_tool = MetaphorSearchResults(api_wrapper=search)\n", + "\n", + "agent_chain = initialize_agent([metaphor_tool, extract_text, navigate_tool], llm, agent=AgentType.STRUCTURED_CHAT_ZERO_SHOT_REACT_DESCRIPTION, verbose=True)\n", + "\n", + "agent_chain.run(\"find me an interesting tweet about AI safety using Metaphor, then tell me the first sentence in the post. Do not finish until able to retrieve the first sentence.\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.3" + }, + "vscode": { + "interpreter": { + "hash": "a0a0263b650d907a3bfe41c0f8d6a63a071b884df3cfdc1579f00cdc1aed6b03" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/langchain/agents/load_tools.py b/langchain/agents/load_tools.py index e22db4c6..38865b08 100644 --- a/langchain/agents/load_tools.py +++ b/langchain/agents/load_tools.py @@ -18,6 +18,7 @@ from langchain.tools.base import BaseTool from langchain.tools.bing_search.tool import BingSearchRun from langchain.tools.ddg_search.tool import DuckDuckGoSearchRun from langchain.tools.google_search.tool import GoogleSearchResults, GoogleSearchRun +from langchain.tools.metaphor_search.tool import MetaphorSearchResults from langchain.tools.google_serper.tool import GoogleSerperResults, GoogleSerperRun from langchain.tools.human.tool import HumanInputRun from langchain.tools.python.tool import PythonREPLTool @@ -38,6 +39,7 @@ from langchain.utilities.bing_search import BingSearchAPIWrapper from langchain.utilities.duckduckgo_search import DuckDuckGoSearchAPIWrapper from langchain.utilities.google_search import GoogleSearchAPIWrapper from langchain.utilities.google_serper import GoogleSerperAPIWrapper +from langchain.utilities.metaphor_search import MetaphorSearchAPIWrapper from langchain.utilities.awslambda import LambdaWrapper from langchain.utilities.searx_search import SearxSearchWrapper from langchain.utilities.serpapi import SerpAPIWrapper @@ -225,6 +227,10 @@ def _get_bing_search(**kwargs: Any) -> BaseTool: return BingSearchRun(api_wrapper=BingSearchAPIWrapper(**kwargs)) +def _get_metaphor_search(**kwargs: Any) -> BaseTool: + return MetaphorSearchResults(api_wrapper=MetaphorSearchAPIWrapper(**kwargs)) + + def _get_ddg_search(**kwargs: Any) -> BaseTool: return DuckDuckGoSearchRun(api_wrapper=DuckDuckGoSearchAPIWrapper(**kwargs)) @@ -258,6 +264,7 @@ _EXTRA_OPTIONAL_TOOLS: Dict[str, Tuple[Callable[[KwArg(Any)], BaseTool], List[st ["searx_host", "engines", "num_results", "aiosession"], ), "bing-search": (_get_bing_search, ["bing_subscription_key", "bing_search_url"]), + "metaphor-search": (_get_metaphor_search, ["metaphor_api_key"]), "ddg-search": (_get_ddg_search, []), "google-serper": (_get_google_serper, ["serper_api_key", "aiosession"]), "google-serper-results-json": ( diff --git a/langchain/tools/__init__.py b/langchain/tools/__init__.py index acae8431..91bfb957 100644 --- a/langchain/tools/__init__.py +++ b/langchain/tools/__init__.py @@ -22,6 +22,7 @@ from langchain.tools.google_search.tool import GoogleSearchResults, GoogleSearch from langchain.tools.google_serper.tool import GoogleSerperResults, GoogleSerperRun from langchain.tools.human.tool import HumanInputRun from langchain.tools.ifttt import IFTTTWebhook +from langchain.tools.metaphor_search import MetaphorSearchResults from langchain.tools.openapi.utils.api_models import APIOperation from langchain.tools.openapi.utils.openapi_utils import OpenAPISpec from langchain.tools.playwright import ( @@ -78,6 +79,7 @@ __all__ = [ "HumanInputRun", "IFTTTWebhook", "ListDirectoryTool", + "MetaphorSearchResults", "MoveFileTool", "NavigateBackTool", "NavigateTool", diff --git a/langchain/tools/metaphor_search/__init__.py b/langchain/tools/metaphor_search/__init__.py new file mode 100644 index 00000000..42ac4a50 --- /dev/null +++ b/langchain/tools/metaphor_search/__init__.py @@ -0,0 +1,5 @@ +"""Metaphor Search API toolkit.""" + +from langchain.tools.metaphor_search.tool import MetaphorSearchResults + +__all__ = ["MetaphorSearchResults"] diff --git a/langchain/tools/metaphor_search/tool.py b/langchain/tools/metaphor_search/tool.py new file mode 100644 index 00000000..2e690111 --- /dev/null +++ b/langchain/tools/metaphor_search/tool.py @@ -0,0 +1,46 @@ +"""Tool for the Metaphor search API.""" + +from typing import Dict, List, Optional, Union + +from langchain.callbacks.manager import ( + AsyncCallbackManagerForToolRun, + CallbackManagerForToolRun, +) +from langchain.tools.base import BaseTool +from langchain.utilities.metaphor_search import MetaphorSearchAPIWrapper + + +class MetaphorSearchResults(BaseTool): + """Tool that has capability to query the Metaphor Search API and get back json.""" + + name = "Metaphor Search Results JSON" + description = ( + "A wrapper around Metaphor Search. " + "Input should be a Metaphor-optimized query. " + "Output is a JSON array of the query results" + ) + api_wrapper: MetaphorSearchAPIWrapper + + def _run( + self, + query: str, + num_results: int, + run_manager: Optional[CallbackManagerForToolRun] = None, + ) -> Union[List[Dict], str]: + """Use the tool.""" + try: + return self.api_wrapper.results(query, num_results) + except Exception as e: + return repr(e) + + async def _arun( + self, + query: str, + num_results: int, + run_manager: Optional[AsyncCallbackManagerForToolRun] = None, + ) -> Union[List[Dict], str]: + """Use the tool asynchronously.""" + try: + return await self.api_wrapper.results_async(query, num_results) + except Exception as e: + return repr(e) diff --git a/langchain/utilities/__init__.py b/langchain/utilities/__init__.py index b122add5..89db1d7d 100644 --- a/langchain/utilities/__init__.py +++ b/langchain/utilities/__init__.py @@ -9,6 +9,7 @@ from langchain.utilities.duckduckgo_search import DuckDuckGoSearchAPIWrapper from langchain.utilities.google_places_api import GooglePlacesAPIWrapper from langchain.utilities.google_search import GoogleSearchAPIWrapper from langchain.utilities.google_serper import GoogleSerperAPIWrapper +from langchain.utilities.metaphor_search import MetaphorSearchAPIWrapper from langchain.utilities.openweathermap import OpenWeatherMapAPIWrapper from langchain.utilities.powerbi import PowerBIDataset from langchain.utilities.python import PythonREPL @@ -35,4 +36,5 @@ __all__ = [ "PythonREPL", "LambdaWrapper", "PowerBIDataset", + "MetaphorSearchAPIWrapper", ] diff --git a/langchain/utilities/metaphor_search.py b/langchain/utilities/metaphor_search.py new file mode 100644 index 00000000..cbc7cecf --- /dev/null +++ b/langchain/utilities/metaphor_search.py @@ -0,0 +1,105 @@ +"""Util that calls Metaphor Search API. + +In order to set this up, follow instructions at: +""" +import json +from typing import Dict, List + +import aiohttp +import requests +from pydantic import BaseModel, Extra, root_validator + +from langchain.utils import get_from_dict_or_env + +METAPHOR_API_URL = "https://api.metaphor.systems" + + +class MetaphorSearchAPIWrapper(BaseModel): + """Wrapper for Metaphor Search API.""" + + metaphor_api_key: str + k: int = 10 + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + + def _metaphor_search_results(self, query: str, num_results: int) -> List[dict]: + headers = {"X-Api-Key": self.metaphor_api_key} + params = {"numResults": num_results, "query": query} + response = requests.post( + # type: ignore + f"{METAPHOR_API_URL}/search", + headers=headers, + json=params, + ) + + response.raise_for_status() + search_results = response.json() + print(search_results) + return search_results["results"] + + @root_validator(pre=True) + def validate_environment(cls, values: Dict) -> Dict: + """Validate that api key and endpoint exists in environment.""" + metaphor_api_key = get_from_dict_or_env( + values, "metaphor_api_key", "METAPHOR_API_KEY" + ) + values["metaphor_api_key"] = metaphor_api_key + + return values + + def results(self, query: str, num_results: int) -> List[Dict]: + """Run query through Metaphor Search and return metadata. + + Args: + query: The query to search for. + num_results: The number of results to return. + + Returns: + A list of dictionaries with the following keys: + title - The title of the + url - The url + author - Author of the content, if applicable. Otherwise, None. + date_created - Estimated date created, + in YYYY-MM-DD format. Otherwise, None. + """ + raw_search_results = self._metaphor_search_results( + query, num_results=num_results + ) + return self._clean_results(raw_search_results) + + async def results_async(self, query: str, num_results: int) -> List[Dict]: + """Get results from the Metaphor Search API asynchronously.""" + + # Function to perform the API call + async def fetch() -> str: + headers = {"X-Api-Key": self.metaphor_api_key} + params = {"numResults": num_results, "query": query} + async with aiohttp.ClientSession() as session: + async with session.post( + f"{METAPHOR_API_URL}/search", json=params, headers=headers + ) as res: + if res.status == 200: + data = await res.text() + return data + else: + raise Exception(f"Error {res.status}: {res.reason}") + + results_json_str = await fetch() + results_json = json.loads(results_json_str) + return self._clean_results(results_json["results"]) + + def _clean_results(self, raw_search_results: List[Dict]) -> List[Dict]: + cleaned_results = [] + for result in raw_search_results: + cleaned_results.append( + { + "title": result["title"], + "url": result["url"], + "author": result["author"], + "date_created": result["dateCreated"], + } + ) + return cleaned_results diff --git a/tests/unit_tests/tools/test_public_api.py b/tests/unit_tests/tools/test_public_api.py index a8f417c3..f70ace64 100644 --- a/tests/unit_tests/tools/test_public_api.py +++ b/tests/unit_tests/tools/test_public_api.py @@ -32,6 +32,7 @@ _EXPECTED = [ "HumanInputRun", "IFTTTWebhook", "ListDirectoryTool", + "MetaphorSearchResults", "MoveFileTool", "NavigateBackTool", "NavigateTool",