Harrison/metaphor search fix (#6387)

Co-authored-by: jeffzwang <jeffreyzhiyuanwang@gmail.com>
This commit is contained in:
Harrison Chase 2023-06-18 16:53:24 -07:00 committed by GitHub
parent 9bf5b0defa
commit b7159c15cc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 130 additions and 112 deletions

View File

@ -22,7 +22,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 1, "execution_count": null,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -33,7 +33,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 2, "execution_count": null,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -42,7 +42,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 3, "execution_count": null,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -60,70 +60,39 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 4, "execution_count": null,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [],
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'results': [{'url': 'https://www.anthropic.com/index/core-views-on-ai-safety', 'title': 'Core Views on AI Safety: When, Why, What, and How', 'dateCreated': '2023-03-08', 'author': None, 'score': 0.1998831331729889}, {'url': 'https://aisafety.wordpress.com/', 'title': 'Extinction Risk from Artificial Intelligence', 'dateCreated': '2013-10-08', 'author': None, 'score': 0.19801370799541473}, {'url': 'https://www.lesswrong.com/posts/WhNxG4r774bK32GcH/the-simple-picture-on-ai-safety', 'title': 'The simple picture on AI safety - LessWrong', 'dateCreated': '2018-05-27', 'author': 'Alex Flint', 'score': 0.19735534489154816}, {'url': 'https://slatestarcodex.com/2015/05/29/no-time-like-the-present-for-ai-safety-work/', 'title': 'No Time Like The Present For AI Safety Work', 'dateCreated': '2015-05-29', 'author': None, 'score': 0.19408763945102692}, {'url': 'https://www.lesswrong.com/posts/5BJvusxdwNXYQ4L9L/so-you-want-to-save-the-world', 'title': 'So You Want to Save the World - LessWrong', 'dateCreated': '2012-01-01', 'author': 'Lukeprog', 'score': 0.18853715062141418}, {'url': 'https://openai.com/blog/planning-for-agi-and-beyond', 'title': 'Planning for AGI and beyond', 'dateCreated': '2023-02-24', 'author': 'Authors', 'score': 0.18665121495723724}, {'url': 'https://waitbutwhy.com/2015/01/artificial-intelligence-revolution-1.html', 'title': 'The Artificial Intelligence Revolution: Part 1 - Wait But Why', 'dateCreated': '2015-01-22', 'author': 'Tim Urban', 'score': 0.18604731559753418}, {'url': 'https://forum.effectivealtruism.org/posts/uGDCaPFaPkuxAowmH/anthropic-core-views-on-ai-safety-when-why-what-and-how', 'title': 'Anthropic: Core Views on AI Safety: When, Why, What, and How - EA Forum', 'dateCreated': '2023-03-09', 'author': 'Jonmenaster', 'score': 0.18415069580078125}, {'url': 'https://www.lesswrong.com/posts/xBrpph9knzWdtMWeQ/the-proof-of-doom', 'title': 'The Proof of Doom - LessWrong', 'dateCreated': '2022-03-09', 'author': 'Johnlawrenceaspden', 'score': 0.18159329891204834}, {'url': 'https://intelligence.org/why-ai-safety/', 'title': 'Why AI Safety? - Machine Intelligence Research Institute', 'dateCreated': '2017-03-01', 'author': None, 'score': 0.1814115345478058}]}\n"
]
},
{
"data": {
"text/plain": [
"[{'title': 'Core Views on AI Safety: When, Why, What, and How',\n",
" 'url': 'https://www.anthropic.com/index/core-views-on-ai-safety',\n",
" 'author': None,\n",
" 'date_created': '2023-03-08'},\n",
" {'title': 'Extinction Risk from Artificial Intelligence',\n",
" 'url': 'https://aisafety.wordpress.com/',\n",
" 'author': None,\n",
" 'date_created': '2013-10-08'},\n",
" {'title': 'The simple picture on AI safety - LessWrong',\n",
" 'url': 'https://www.lesswrong.com/posts/WhNxG4r774bK32GcH/the-simple-picture-on-ai-safety',\n",
" 'author': 'Alex Flint',\n",
" 'date_created': '2018-05-27'},\n",
" {'title': 'No Time Like The Present For AI Safety Work',\n",
" 'url': 'https://slatestarcodex.com/2015/05/29/no-time-like-the-present-for-ai-safety-work/',\n",
" 'author': None,\n",
" 'date_created': '2015-05-29'},\n",
" {'title': 'So You Want to Save the World - LessWrong',\n",
" 'url': 'https://www.lesswrong.com/posts/5BJvusxdwNXYQ4L9L/so-you-want-to-save-the-world',\n",
" 'author': 'Lukeprog',\n",
" 'date_created': '2012-01-01'},\n",
" {'title': 'Planning for AGI and beyond',\n",
" 'url': 'https://openai.com/blog/planning-for-agi-and-beyond',\n",
" 'author': 'Authors',\n",
" 'date_created': '2023-02-24'},\n",
" {'title': 'The Artificial Intelligence Revolution: Part 1 - Wait But Why',\n",
" 'url': 'https://waitbutwhy.com/2015/01/artificial-intelligence-revolution-1.html',\n",
" 'author': 'Tim Urban',\n",
" 'date_created': '2015-01-22'},\n",
" {'title': 'Anthropic: Core Views on AI Safety: When, Why, What, and How - EA Forum',\n",
" 'url': 'https://forum.effectivealtruism.org/posts/uGDCaPFaPkuxAowmH/anthropic-core-views-on-ai-safety-when-why-what-and-how',\n",
" 'author': 'Jonmenaster',\n",
" 'date_created': '2023-03-09'},\n",
" {'title': 'The Proof of Doom - LessWrong',\n",
" 'url': 'https://www.lesswrong.com/posts/xBrpph9knzWdtMWeQ/the-proof-of-doom',\n",
" 'author': 'Johnlawrenceaspden',\n",
" 'date_created': '2022-03-09'},\n",
" {'title': 'Why AI Safety? - Machine Intelligence Research Institute',\n",
" 'url': 'https://intelligence.org/why-ai-safety/',\n",
" 'author': None,\n",
" 'date_created': '2017-03-01'}]"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"search.results(\"The best blog post about AI safety is definitely this: \", 10)" "search.results(\"The best blog post about AI safety is definitely this: \", 10)"
] ]
}, },
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"# Adding filters\n",
"We can also add filters to our search. \n",
"include_domains: Optional[List[str]] - List of domains to include in the search. If specified, results will only come from these domains. Only one of include_domains and exclude_domains should be specified.\n",
"exclude_domains: Optional[List[str]] - List of domains to exclude in the search. If specified, results will only come from these domains. Only one of include_domains and exclude_domains should be specified.\n",
"start_crawl_date: Optional[str] - \"Crawl date\" refers to the date that Metaphor discovered a link, which is more granular and can be more useful than published date. If start_crawl_date is specified, results will only include links that were crawled after start_crawl_date. Must be specified in ISO 8601 format (YYYY-MM-DDTHH:MM:SSZ)\n",
"end_crawl_date: Optional[str] - \"Crawl date\" refers to the date that Metaphor discovered a link, which is more granular and can be more useful than published date. If endCrawlDate is specified, results will only include links that were crawled before end_crawl_date. Must be specified in ISO 8601 format (YYYY-MM-DDTHH:MM:SSZ)\n",
"start_published_date: Optional[str] - If specified, only links with a published date after start_published_date will be returned. Must be specified in ISO 8601 format (YYYY-MM-DDTHH:MM:SSZ). Note that for some links, we have no published date, and these links will be excluded from the results if start_published_date is specified.\n",
"end_published_date: Optional[str] - If specified, only links with a published date before end_published_date will be returned. Must be specified in ISO 8601 format (YYYY-MM-DDTHH:MM:SSZ). Note that for some links, we have no published date, and these links will be excluded from the results if end_published_date is specified.\n",
"\n",
"See full docs [here](https://metaphorapi.readme.io/)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"search.results(\"The best blog post about AI safety is definitely this: \", 10, include_domains=[\"lesswrong.com\"], start_published_date=\"2019-01-01\")"
]
},
{ {
"attachments": {}, "attachments": {},
"cell_type": "markdown", "cell_type": "markdown",
@ -139,6 +108,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"%pip install playwright\n",
"from langchain.agents.agent_toolkits import PlayWrightBrowserToolkit\n", "from langchain.agents.agent_toolkits import PlayWrightBrowserToolkit\n",
"from langchain.tools.playwright.utils import (\n", "from langchain.tools.playwright.utils import (\n",
" create_async_playwright_browser, # A synchronous browser is available, though it isn't compatible with jupyter.\n", " create_async_playwright_browser, # A synchronous browser is available, though it isn't compatible with jupyter.\n",
@ -158,44 +128,7 @@
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [],
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"\n",
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
"\u001b[32;1m\u001b[1;3mThought: I need to find a tweet about AI safety using Metaphor Search.\n",
"Action:\n",
"```\n",
"{\n",
" \"action\": \"Metaphor Search Results JSON\",\n",
" \"action_input\": {\n",
" \"query\": \"interesting tweet AI safety\",\n",
" \"num_results\": 1\n",
" }\n",
"}\n",
"```\n",
"\u001b[0m{'results': [{'url': 'https://safe.ai/', 'title': 'Center for AI Safety', 'dateCreated': '2022-01-01', 'author': None, 'score': 0.18083244562149048}]}\n",
"\n",
"Observation: \u001b[36;1m\u001b[1;3m[{'title': 'Center for AI Safety', 'url': 'https://safe.ai/', 'author': None, 'date_created': '2022-01-01'}]\u001b[0m\n",
"Thought:\u001b[32;1m\u001b[1;3mI need to navigate to the URL provided in the search results to find the tweet.\u001b[0m\n",
"\n",
"\u001b[1m> Finished chain.\u001b[0m\n"
]
},
{
"data": {
"text/plain": [
"'I need to navigate to the URL provided in the search results to find the tweet.'"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"from langchain.agents import initialize_agent, AgentType\n", "from langchain.agents import initialize_agent, AgentType\n",
"from langchain.chat_models import ChatOpenAI\n", "from langchain.chat_models import ChatOpenAI\n",
@ -241,7 +174,7 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.11.3" "version": "3.10.11"
}, },
"vscode": { "vscode": {
"interpreter": { "interpreter": {

View File

@ -25,11 +25,26 @@ class MetaphorSearchResults(BaseTool):
self, self,
query: str, query: str,
num_results: int, num_results: int,
include_domains: Optional[List[str]] = None,
exclude_domains: Optional[List[str]] = None,
start_crawl_date: Optional[str] = None,
end_crawl_date: Optional[str] = None,
start_published_date: Optional[str] = None,
end_published_date: Optional[str] = None,
run_manager: Optional[CallbackManagerForToolRun] = None, run_manager: Optional[CallbackManagerForToolRun] = None,
) -> Union[List[Dict], str]: ) -> Union[List[Dict], str]:
"""Use the tool.""" """Use the tool."""
try: try:
return self.api_wrapper.results(query, num_results) return self.api_wrapper.results(
query,
num_results,
include_domains,
exclude_domains,
start_crawl_date,
end_crawl_date,
start_published_date,
end_published_date,
)
except Exception as e: except Exception as e:
return repr(e) return repr(e)
@ -37,10 +52,25 @@ class MetaphorSearchResults(BaseTool):
self, self,
query: str, query: str,
num_results: int, num_results: int,
include_domains: Optional[List[str]] = None,
exclude_domains: Optional[List[str]] = None,
start_crawl_date: Optional[str] = None,
end_crawl_date: Optional[str] = None,
start_published_date: Optional[str] = None,
end_published_date: Optional[str] = None,
run_manager: Optional[AsyncCallbackManagerForToolRun] = None, run_manager: Optional[AsyncCallbackManagerForToolRun] = None,
) -> Union[List[Dict], str]: ) -> Union[List[Dict], str]:
"""Use the tool asynchronously.""" """Use the tool asynchronously."""
try: try:
return await self.api_wrapper.results_async(query, num_results) return await self.api_wrapper.results_async(
query,
num_results,
include_domains,
exclude_domains,
start_crawl_date,
end_crawl_date,
start_published_date,
end_published_date,
)
except Exception as e: except Exception as e:
return repr(e) return repr(e)

View File

@ -3,7 +3,7 @@
In order to set this up, follow instructions at: In order to set this up, follow instructions at:
""" """
import json import json
from typing import Dict, List from typing import Dict, List, Optional
import aiohttp import aiohttp
import requests import requests
@ -25,9 +25,28 @@ class MetaphorSearchAPIWrapper(BaseModel):
extra = Extra.forbid extra = Extra.forbid
def _metaphor_search_results(self, query: str, num_results: int) -> List[dict]: def _metaphor_search_results(
self,
query: str,
num_results: int,
include_domains: Optional[List[str]] = None,
exclude_domains: Optional[List[str]] = None,
start_crawl_date: Optional[str] = None,
end_crawl_date: Optional[str] = None,
start_published_date: Optional[str] = None,
end_published_date: Optional[str] = None,
) -> List[dict]:
headers = {"X-Api-Key": self.metaphor_api_key} headers = {"X-Api-Key": self.metaphor_api_key}
params = {"numResults": num_results, "query": query} params = {
"numResults": num_results,
"query": query,
"includeDomains": include_domains,
"excludeDomains": exclude_domains,
"startCrawlDate": start_crawl_date,
"endCrawlDate": end_crawl_date,
"startPublishedDate": start_published_date,
"endPublishedDate": end_published_date,
}
response = requests.post( response = requests.post(
# type: ignore # type: ignore
f"{METAPHOR_API_URL}/search", f"{METAPHOR_API_URL}/search",
@ -50,7 +69,17 @@ class MetaphorSearchAPIWrapper(BaseModel):
return values return values
def results(self, query: str, num_results: int) -> List[Dict]: def results(
self,
query: str,
num_results: int,
include_domains: Optional[List[str]] = None,
exclude_domains: Optional[List[str]] = None,
start_crawl_date: Optional[str] = None,
end_crawl_date: Optional[str] = None,
start_published_date: Optional[str] = None,
end_published_date: Optional[str] = None,
) -> List[Dict]:
"""Run query through Metaphor Search and return metadata. """Run query through Metaphor Search and return metadata.
Args: Args:
@ -62,21 +91,47 @@ class MetaphorSearchAPIWrapper(BaseModel):
title - The title of the title - The title of the
url - The url url - The url
author - Author of the content, if applicable. Otherwise, None. author - Author of the content, if applicable. Otherwise, None.
date_created - Estimated date created, published_date - Estimated date published
in YYYY-MM-DD format. Otherwise, None. in YYYY-MM-DD format. Otherwise, None.
""" """
raw_search_results = self._metaphor_search_results( raw_search_results = self._metaphor_search_results(
query, num_results=num_results query,
num_results=num_results,
include_domains=include_domains,
exclude_domains=exclude_domains,
start_crawl_date=start_crawl_date,
end_crawl_date=end_crawl_date,
start_published_date=start_published_date,
end_published_date=end_published_date,
) )
return self._clean_results(raw_search_results) return self._clean_results(raw_search_results)
async def results_async(self, query: str, num_results: int) -> List[Dict]: async def results_async(
self,
query: str,
num_results: int,
include_domains: Optional[List[str]] = None,
exclude_domains: Optional[List[str]] = None,
start_crawl_date: Optional[str] = None,
end_crawl_date: Optional[str] = None,
start_published_date: Optional[str] = None,
end_published_date: Optional[str] = None,
) -> List[Dict]:
"""Get results from the Metaphor Search API asynchronously.""" """Get results from the Metaphor Search API asynchronously."""
# Function to perform the API call # Function to perform the API call
async def fetch() -> str: async def fetch() -> str:
headers = {"X-Api-Key": self.metaphor_api_key} headers = {"X-Api-Key": self.metaphor_api_key}
params = {"numResults": num_results, "query": query} params = {
"numResults": num_results,
"query": query,
"includeDomains": include_domains,
"excludeDomains": exclude_domains,
"startCrawlDate": start_crawl_date,
"endCrawlDate": end_crawl_date,
"startPublishedDate": start_published_date,
"endPublishedDate": end_published_date,
}
async with aiohttp.ClientSession() as session: async with aiohttp.ClientSession() as session:
async with session.post( async with session.post(
f"{METAPHOR_API_URL}/search", json=params, headers=headers f"{METAPHOR_API_URL}/search", json=params, headers=headers
@ -99,7 +154,7 @@ class MetaphorSearchAPIWrapper(BaseModel):
"title": result["title"], "title": result["title"],
"url": result["url"], "url": result["url"],
"author": result["author"], "author": result["author"],
"date_created": result["dateCreated"], "published_date": result["publishedDate"],
} }
) )
return cleaned_results return cleaned_results