Harrison/metaphor search (#4657)

Co-authored-by: Jeffrey Wang <jeffreyzhiyuanwang@gmail.com>
textloader_autodetect_encodings
Harrison Chase 1 year ago committed by GitHub
parent 9aa9fe7021
commit 279605b4d3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -0,0 +1,246 @@
{
"cells": [
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"# Metaphor Search"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"This notebook goes over how to use Metaphor search.\n",
"\n",
"First, you need to set up the proper API keys and environment variables. Request an API key [here](Sign up for early access here).\n",
"\n",
"Then enter your API key as an environment variable."
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"os.environ[\"METAPHOR_API_KEY\"] = \"\""
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"from langchain.utilities import MetaphorSearchAPIWrapper"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"search = MetaphorSearchAPIWrapper()"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"# Call the API\n",
"`results` takes in a Metaphor-optimized search query and a number of results (up to 500). It returns a list of results with title, url, author, and creation date."
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'results': [{'url': 'https://www.anthropic.com/index/core-views-on-ai-safety', 'title': 'Core Views on AI Safety: When, Why, What, and How', 'dateCreated': '2023-03-08', 'author': None, 'score': 0.1998831331729889}, {'url': 'https://aisafety.wordpress.com/', 'title': 'Extinction Risk from Artificial Intelligence', 'dateCreated': '2013-10-08', 'author': None, 'score': 0.19801370799541473}, {'url': 'https://www.lesswrong.com/posts/WhNxG4r774bK32GcH/the-simple-picture-on-ai-safety', 'title': 'The simple picture on AI safety - LessWrong', 'dateCreated': '2018-05-27', 'author': 'Alex Flint', 'score': 0.19735534489154816}, {'url': 'https://slatestarcodex.com/2015/05/29/no-time-like-the-present-for-ai-safety-work/', 'title': 'No Time Like The Present For AI Safety Work', 'dateCreated': '2015-05-29', 'author': None, 'score': 0.19408763945102692}, {'url': 'https://www.lesswrong.com/posts/5BJvusxdwNXYQ4L9L/so-you-want-to-save-the-world', 'title': 'So You Want to Save the World - LessWrong', 'dateCreated': '2012-01-01', 'author': 'Lukeprog', 'score': 0.18853715062141418}, {'url': 'https://openai.com/blog/planning-for-agi-and-beyond', 'title': 'Planning for AGI and beyond', 'dateCreated': '2023-02-24', 'author': 'Authors', 'score': 0.18665121495723724}, {'url': 'https://waitbutwhy.com/2015/01/artificial-intelligence-revolution-1.html', 'title': 'The Artificial Intelligence Revolution: Part 1 - Wait But Why', 'dateCreated': '2015-01-22', 'author': 'Tim Urban', 'score': 0.18604731559753418}, {'url': 'https://forum.effectivealtruism.org/posts/uGDCaPFaPkuxAowmH/anthropic-core-views-on-ai-safety-when-why-what-and-how', 'title': 'Anthropic: Core Views on AI Safety: When, Why, What, and How - EA Forum', 'dateCreated': '2023-03-09', 'author': 'Jonmenaster', 'score': 0.18415069580078125}, {'url': 'https://www.lesswrong.com/posts/xBrpph9knzWdtMWeQ/the-proof-of-doom', 'title': 'The Proof of Doom - LessWrong', 'dateCreated': '2022-03-09', 'author': 'Johnlawrenceaspden', 'score': 0.18159329891204834}, {'url': 'https://intelligence.org/why-ai-safety/', 'title': 'Why AI Safety? - Machine Intelligence Research Institute', 'dateCreated': '2017-03-01', 'author': None, 'score': 0.1814115345478058}]}\n"
]
},
{
"data": {
"text/plain": [
"[{'title': 'Core Views on AI Safety: When, Why, What, and How',\n",
" 'url': 'https://www.anthropic.com/index/core-views-on-ai-safety',\n",
" 'author': None,\n",
" 'date_created': '2023-03-08'},\n",
" {'title': 'Extinction Risk from Artificial Intelligence',\n",
" 'url': 'https://aisafety.wordpress.com/',\n",
" 'author': None,\n",
" 'date_created': '2013-10-08'},\n",
" {'title': 'The simple picture on AI safety - LessWrong',\n",
" 'url': 'https://www.lesswrong.com/posts/WhNxG4r774bK32GcH/the-simple-picture-on-ai-safety',\n",
" 'author': 'Alex Flint',\n",
" 'date_created': '2018-05-27'},\n",
" {'title': 'No Time Like The Present For AI Safety Work',\n",
" 'url': 'https://slatestarcodex.com/2015/05/29/no-time-like-the-present-for-ai-safety-work/',\n",
" 'author': None,\n",
" 'date_created': '2015-05-29'},\n",
" {'title': 'So You Want to Save the World - LessWrong',\n",
" 'url': 'https://www.lesswrong.com/posts/5BJvusxdwNXYQ4L9L/so-you-want-to-save-the-world',\n",
" 'author': 'Lukeprog',\n",
" 'date_created': '2012-01-01'},\n",
" {'title': 'Planning for AGI and beyond',\n",
" 'url': 'https://openai.com/blog/planning-for-agi-and-beyond',\n",
" 'author': 'Authors',\n",
" 'date_created': '2023-02-24'},\n",
" {'title': 'The Artificial Intelligence Revolution: Part 1 - Wait But Why',\n",
" 'url': 'https://waitbutwhy.com/2015/01/artificial-intelligence-revolution-1.html',\n",
" 'author': 'Tim Urban',\n",
" 'date_created': '2015-01-22'},\n",
" {'title': 'Anthropic: Core Views on AI Safety: When, Why, What, and How - EA Forum',\n",
" 'url': 'https://forum.effectivealtruism.org/posts/uGDCaPFaPkuxAowmH/anthropic-core-views-on-ai-safety-when-why-what-and-how',\n",
" 'author': 'Jonmenaster',\n",
" 'date_created': '2023-03-09'},\n",
" {'title': 'The Proof of Doom - LessWrong',\n",
" 'url': 'https://www.lesswrong.com/posts/xBrpph9knzWdtMWeQ/the-proof-of-doom',\n",
" 'author': 'Johnlawrenceaspden',\n",
" 'date_created': '2022-03-09'},\n",
" {'title': 'Why AI Safety? - Machine Intelligence Research Institute',\n",
" 'url': 'https://intelligence.org/why-ai-safety/',\n",
" 'author': None,\n",
" 'date_created': '2017-03-01'}]"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"search.results(\"The best blog post about AI safety is definitely this: \", 10)"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"# Use Metaphor as a tool\n",
"Metaphor can be used as a tool that gets URLs that other tools such as browsing tools."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from langchain.agents.agent_toolkits import PlayWrightBrowserToolkit\n",
"from langchain.tools.playwright.utils import (\n",
" create_async_playwright_browser,# A synchronous browser is available, though it isn't compatible with jupyter.\n",
")\n",
"\n",
"async_browser = create_async_playwright_browser()\n",
"toolkit = PlayWrightBrowserToolkit.from_browser(async_browser=async_browser)\n",
"tools = toolkit.get_tools()\n",
"\n",
"tools_by_name = {tool.name: tool for tool in tools}\n",
"print(tools_by_name.keys())\n",
"navigate_tool = tools_by_name[\"navigate_browser\"]\n",
"extract_text = tools_by_name[\"extract_text\"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"\n",
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
"\u001b[32;1m\u001b[1;3mThought: I need to find a tweet about AI safety using Metaphor Search.\n",
"Action:\n",
"```\n",
"{\n",
" \"action\": \"Metaphor Search Results JSON\",\n",
" \"action_input\": {\n",
" \"query\": \"interesting tweet AI safety\",\n",
" \"num_results\": 1\n",
" }\n",
"}\n",
"```\n",
"\u001b[0m{'results': [{'url': 'https://safe.ai/', 'title': 'Center for AI Safety', 'dateCreated': '2022-01-01', 'author': None, 'score': 0.18083244562149048}]}\n",
"\n",
"Observation: \u001b[36;1m\u001b[1;3m[{'title': 'Center for AI Safety', 'url': 'https://safe.ai/', 'author': None, 'date_created': '2022-01-01'}]\u001b[0m\n",
"Thought:\u001b[32;1m\u001b[1;3mI need to navigate to the URL provided in the search results to find the tweet.\u001b[0m\n",
"\n",
"\u001b[1m> Finished chain.\u001b[0m\n"
]
},
{
"data": {
"text/plain": [
"'I need to navigate to the URL provided in the search results to find the tweet.'"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from langchain.agents import initialize_agent, AgentType\n",
"from langchain.chat_models import ChatOpenAI\n",
"from langchain.tools import MetaphorSearchResults\n",
"\n",
"llm = ChatOpenAI(model_name=\"gpt-4\", temperature=0.7)\n",
"\n",
"metaphor_tool = MetaphorSearchResults(api_wrapper=search)\n",
"\n",
"agent_chain = initialize_agent([metaphor_tool, extract_text, navigate_tool], llm, agent=AgentType.STRUCTURED_CHAT_ZERO_SHOT_REACT_DESCRIPTION, verbose=True)\n",
"\n",
"agent_chain.run(\"find me an interesting tweet about AI safety using Metaphor, then tell me the first sentence in the post. Do not finish until able to retrieve the first sentence.\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.3"
},
"vscode": {
"interpreter": {
"hash": "a0a0263b650d907a3bfe41c0f8d6a63a071b884df3cfdc1579f00cdc1aed6b03"
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}

@ -18,6 +18,7 @@ from langchain.tools.base import BaseTool
from langchain.tools.bing_search.tool import BingSearchRun
from langchain.tools.ddg_search.tool import DuckDuckGoSearchRun
from langchain.tools.google_search.tool import GoogleSearchResults, GoogleSearchRun
from langchain.tools.metaphor_search.tool import MetaphorSearchResults
from langchain.tools.google_serper.tool import GoogleSerperResults, GoogleSerperRun
from langchain.tools.human.tool import HumanInputRun
from langchain.tools.python.tool import PythonREPLTool
@ -38,6 +39,7 @@ from langchain.utilities.bing_search import BingSearchAPIWrapper
from langchain.utilities.duckduckgo_search import DuckDuckGoSearchAPIWrapper
from langchain.utilities.google_search import GoogleSearchAPIWrapper
from langchain.utilities.google_serper import GoogleSerperAPIWrapper
from langchain.utilities.metaphor_search import MetaphorSearchAPIWrapper
from langchain.utilities.awslambda import LambdaWrapper
from langchain.utilities.searx_search import SearxSearchWrapper
from langchain.utilities.serpapi import SerpAPIWrapper
@ -225,6 +227,10 @@ def _get_bing_search(**kwargs: Any) -> BaseTool:
return BingSearchRun(api_wrapper=BingSearchAPIWrapper(**kwargs))
def _get_metaphor_search(**kwargs: Any) -> BaseTool:
return MetaphorSearchResults(api_wrapper=MetaphorSearchAPIWrapper(**kwargs))
def _get_ddg_search(**kwargs: Any) -> BaseTool:
return DuckDuckGoSearchRun(api_wrapper=DuckDuckGoSearchAPIWrapper(**kwargs))
@ -258,6 +264,7 @@ _EXTRA_OPTIONAL_TOOLS: Dict[str, Tuple[Callable[[KwArg(Any)], BaseTool], List[st
["searx_host", "engines", "num_results", "aiosession"],
),
"bing-search": (_get_bing_search, ["bing_subscription_key", "bing_search_url"]),
"metaphor-search": (_get_metaphor_search, ["metaphor_api_key"]),
"ddg-search": (_get_ddg_search, []),
"google-serper": (_get_google_serper, ["serper_api_key", "aiosession"]),
"google-serper-results-json": (

@ -22,6 +22,7 @@ from langchain.tools.google_search.tool import GoogleSearchResults, GoogleSearch
from langchain.tools.google_serper.tool import GoogleSerperResults, GoogleSerperRun
from langchain.tools.human.tool import HumanInputRun
from langchain.tools.ifttt import IFTTTWebhook
from langchain.tools.metaphor_search import MetaphorSearchResults
from langchain.tools.openapi.utils.api_models import APIOperation
from langchain.tools.openapi.utils.openapi_utils import OpenAPISpec
from langchain.tools.playwright import (
@ -78,6 +79,7 @@ __all__ = [
"HumanInputRun",
"IFTTTWebhook",
"ListDirectoryTool",
"MetaphorSearchResults",
"MoveFileTool",
"NavigateBackTool",
"NavigateTool",

@ -0,0 +1,5 @@
"""Metaphor Search API toolkit."""
from langchain.tools.metaphor_search.tool import MetaphorSearchResults
__all__ = ["MetaphorSearchResults"]

@ -0,0 +1,46 @@
"""Tool for the Metaphor search API."""
from typing import Dict, List, Optional, Union
from langchain.callbacks.manager import (
AsyncCallbackManagerForToolRun,
CallbackManagerForToolRun,
)
from langchain.tools.base import BaseTool
from langchain.utilities.metaphor_search import MetaphorSearchAPIWrapper
class MetaphorSearchResults(BaseTool):
"""Tool that has capability to query the Metaphor Search API and get back json."""
name = "Metaphor Search Results JSON"
description = (
"A wrapper around Metaphor Search. "
"Input should be a Metaphor-optimized query. "
"Output is a JSON array of the query results"
)
api_wrapper: MetaphorSearchAPIWrapper
def _run(
self,
query: str,
num_results: int,
run_manager: Optional[CallbackManagerForToolRun] = None,
) -> Union[List[Dict], str]:
"""Use the tool."""
try:
return self.api_wrapper.results(query, num_results)
except Exception as e:
return repr(e)
async def _arun(
self,
query: str,
num_results: int,
run_manager: Optional[AsyncCallbackManagerForToolRun] = None,
) -> Union[List[Dict], str]:
"""Use the tool asynchronously."""
try:
return await self.api_wrapper.results_async(query, num_results)
except Exception as e:
return repr(e)

@ -9,6 +9,7 @@ from langchain.utilities.duckduckgo_search import DuckDuckGoSearchAPIWrapper
from langchain.utilities.google_places_api import GooglePlacesAPIWrapper
from langchain.utilities.google_search import GoogleSearchAPIWrapper
from langchain.utilities.google_serper import GoogleSerperAPIWrapper
from langchain.utilities.metaphor_search import MetaphorSearchAPIWrapper
from langchain.utilities.openweathermap import OpenWeatherMapAPIWrapper
from langchain.utilities.powerbi import PowerBIDataset
from langchain.utilities.python import PythonREPL
@ -35,4 +36,5 @@ __all__ = [
"PythonREPL",
"LambdaWrapper",
"PowerBIDataset",
"MetaphorSearchAPIWrapper",
]

@ -0,0 +1,105 @@
"""Util that calls Metaphor Search API.
In order to set this up, follow instructions at:
"""
import json
from typing import Dict, List
import aiohttp
import requests
from pydantic import BaseModel, Extra, root_validator
from langchain.utils import get_from_dict_or_env
METAPHOR_API_URL = "https://api.metaphor.systems"
class MetaphorSearchAPIWrapper(BaseModel):
"""Wrapper for Metaphor Search API."""
metaphor_api_key: str
k: int = 10
class Config:
"""Configuration for this pydantic object."""
extra = Extra.forbid
def _metaphor_search_results(self, query: str, num_results: int) -> List[dict]:
headers = {"X-Api-Key": self.metaphor_api_key}
params = {"numResults": num_results, "query": query}
response = requests.post(
# type: ignore
f"{METAPHOR_API_URL}/search",
headers=headers,
json=params,
)
response.raise_for_status()
search_results = response.json()
print(search_results)
return search_results["results"]
@root_validator(pre=True)
def validate_environment(cls, values: Dict) -> Dict:
"""Validate that api key and endpoint exists in environment."""
metaphor_api_key = get_from_dict_or_env(
values, "metaphor_api_key", "METAPHOR_API_KEY"
)
values["metaphor_api_key"] = metaphor_api_key
return values
def results(self, query: str, num_results: int) -> List[Dict]:
"""Run query through Metaphor Search and return metadata.
Args:
query: The query to search for.
num_results: The number of results to return.
Returns:
A list of dictionaries with the following keys:
title - The title of the
url - The url
author - Author of the content, if applicable. Otherwise, None.
date_created - Estimated date created,
in YYYY-MM-DD format. Otherwise, None.
"""
raw_search_results = self._metaphor_search_results(
query, num_results=num_results
)
return self._clean_results(raw_search_results)
async def results_async(self, query: str, num_results: int) -> List[Dict]:
"""Get results from the Metaphor Search API asynchronously."""
# Function to perform the API call
async def fetch() -> str:
headers = {"X-Api-Key": self.metaphor_api_key}
params = {"numResults": num_results, "query": query}
async with aiohttp.ClientSession() as session:
async with session.post(
f"{METAPHOR_API_URL}/search", json=params, headers=headers
) as res:
if res.status == 200:
data = await res.text()
return data
else:
raise Exception(f"Error {res.status}: {res.reason}")
results_json_str = await fetch()
results_json = json.loads(results_json_str)
return self._clean_results(results_json["results"])
def _clean_results(self, raw_search_results: List[Dict]) -> List[Dict]:
cleaned_results = []
for result in raw_search_results:
cleaned_results.append(
{
"title": result["title"],
"url": result["url"],
"author": result["author"],
"date_created": result["dateCreated"],
}
)
return cleaned_results

@ -32,6 +32,7 @@ _EXPECTED = [
"HumanInputRun",
"IFTTTWebhook",
"ListDirectoryTool",
"MetaphorSearchResults",
"MoveFileTool",
"NavigateBackTool",
"NavigateTool",

Loading…
Cancel
Save