From b7159c15cce0f19c6f7e12a59b419a242c131d30 Mon Sep 17 00:00:00 2001 From: Harrison Chase Date: Sun, 18 Jun 2023 16:53:24 -0700 Subject: [PATCH] Harrison/metaphor search fix (#6387) Co-authored-by: jeffzwang --- .../tools/integrations/metaphor_search.ipynb | 135 +++++------------- langchain/tools/metaphor_search/tool.py | 34 ++++- langchain/utilities/metaphor_search.py | 73 ++++++++-- 3 files changed, 130 insertions(+), 112 deletions(-) diff --git a/docs/extras/modules/agents/tools/integrations/metaphor_search.ipynb b/docs/extras/modules/agents/tools/integrations/metaphor_search.ipynb index eec8f949..ec52910c 100644 --- a/docs/extras/modules/agents/tools/integrations/metaphor_search.ipynb +++ b/docs/extras/modules/agents/tools/integrations/metaphor_search.ipynb @@ -22,7 +22,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -33,7 +33,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -42,7 +42,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -60,70 +60,39 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'results': [{'url': 'https://www.anthropic.com/index/core-views-on-ai-safety', 'title': 'Core Views on AI Safety: When, Why, What, and How', 'dateCreated': '2023-03-08', 'author': None, 'score': 0.1998831331729889}, {'url': 'https://aisafety.wordpress.com/', 'title': 'Extinction Risk from Artificial Intelligence', 'dateCreated': '2013-10-08', 'author': None, 'score': 0.19801370799541473}, {'url': 'https://www.lesswrong.com/posts/WhNxG4r774bK32GcH/the-simple-picture-on-ai-safety', 'title': 'The simple picture on AI safety - LessWrong', 'dateCreated': '2018-05-27', 'author': 'Alex Flint', 'score': 0.19735534489154816}, {'url': 'https://slatestarcodex.com/2015/05/29/no-time-like-the-present-for-ai-safety-work/', 'title': 'No Time Like The Present For AI Safety Work', 'dateCreated': '2015-05-29', 'author': None, 'score': 0.19408763945102692}, {'url': 'https://www.lesswrong.com/posts/5BJvusxdwNXYQ4L9L/so-you-want-to-save-the-world', 'title': 'So You Want to Save the World - LessWrong', 'dateCreated': '2012-01-01', 'author': 'Lukeprog', 'score': 0.18853715062141418}, {'url': 'https://openai.com/blog/planning-for-agi-and-beyond', 'title': 'Planning for AGI and beyond', 'dateCreated': '2023-02-24', 'author': 'Authors', 'score': 0.18665121495723724}, {'url': 'https://waitbutwhy.com/2015/01/artificial-intelligence-revolution-1.html', 'title': 'The Artificial Intelligence Revolution: Part 1 - Wait But Why', 'dateCreated': '2015-01-22', 'author': 'Tim Urban', 'score': 0.18604731559753418}, {'url': 'https://forum.effectivealtruism.org/posts/uGDCaPFaPkuxAowmH/anthropic-core-views-on-ai-safety-when-why-what-and-how', 'title': 'Anthropic: Core Views on AI Safety: When, Why, What, and How - EA Forum', 'dateCreated': '2023-03-09', 'author': 'Jonmenaster', 'score': 0.18415069580078125}, {'url': 'https://www.lesswrong.com/posts/xBrpph9knzWdtMWeQ/the-proof-of-doom', 'title': 'The Proof of Doom - LessWrong', 'dateCreated': '2022-03-09', 'author': 'Johnlawrenceaspden', 'score': 0.18159329891204834}, {'url': 'https://intelligence.org/why-ai-safety/', 'title': 'Why AI Safety? - Machine Intelligence Research Institute', 'dateCreated': '2017-03-01', 'author': None, 'score': 0.1814115345478058}]}\n" - ] - }, - { - "data": { - "text/plain": [ - "[{'title': 'Core Views on AI Safety: When, Why, What, and How',\n", - " 'url': 'https://www.anthropic.com/index/core-views-on-ai-safety',\n", - " 'author': None,\n", - " 'date_created': '2023-03-08'},\n", - " {'title': 'Extinction Risk from Artificial Intelligence',\n", - " 'url': 'https://aisafety.wordpress.com/',\n", - " 'author': None,\n", - " 'date_created': '2013-10-08'},\n", - " {'title': 'The simple picture on AI safety - LessWrong',\n", - " 'url': 'https://www.lesswrong.com/posts/WhNxG4r774bK32GcH/the-simple-picture-on-ai-safety',\n", - " 'author': 'Alex Flint',\n", - " 'date_created': '2018-05-27'},\n", - " {'title': 'No Time Like The Present For AI Safety Work',\n", - " 'url': 'https://slatestarcodex.com/2015/05/29/no-time-like-the-present-for-ai-safety-work/',\n", - " 'author': None,\n", - " 'date_created': '2015-05-29'},\n", - " {'title': 'So You Want to Save the World - LessWrong',\n", - " 'url': 'https://www.lesswrong.com/posts/5BJvusxdwNXYQ4L9L/so-you-want-to-save-the-world',\n", - " 'author': 'Lukeprog',\n", - " 'date_created': '2012-01-01'},\n", - " {'title': 'Planning for AGI and beyond',\n", - " 'url': 'https://openai.com/blog/planning-for-agi-and-beyond',\n", - " 'author': 'Authors',\n", - " 'date_created': '2023-02-24'},\n", - " {'title': 'The Artificial Intelligence Revolution: Part 1 - Wait But Why',\n", - " 'url': 'https://waitbutwhy.com/2015/01/artificial-intelligence-revolution-1.html',\n", - " 'author': 'Tim Urban',\n", - " 'date_created': '2015-01-22'},\n", - " {'title': 'Anthropic: Core Views on AI Safety: When, Why, What, and How - EA Forum',\n", - " 'url': 'https://forum.effectivealtruism.org/posts/uGDCaPFaPkuxAowmH/anthropic-core-views-on-ai-safety-when-why-what-and-how',\n", - " 'author': 'Jonmenaster',\n", - " 'date_created': '2023-03-09'},\n", - " {'title': 'The Proof of Doom - LessWrong',\n", - " 'url': 'https://www.lesswrong.com/posts/xBrpph9knzWdtMWeQ/the-proof-of-doom',\n", - " 'author': 'Johnlawrenceaspden',\n", - " 'date_created': '2022-03-09'},\n", - " {'title': 'Why AI Safety? - Machine Intelligence Research Institute',\n", - " 'url': 'https://intelligence.org/why-ai-safety/',\n", - " 'author': None,\n", - " 'date_created': '2017-03-01'}]" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "search.results(\"The best blog post about AI safety is definitely this: \", 10)" ] }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Adding filters\n", + "We can also add filters to our search. \n", + "include_domains: Optional[List[str]] - List of domains to include in the search. If specified, results will only come from these domains. Only one of include_domains and exclude_domains should be specified.\n", + "exclude_domains: Optional[List[str]] - List of domains to exclude in the search. If specified, results will only come from these domains. Only one of include_domains and exclude_domains should be specified.\n", + "start_crawl_date: Optional[str] - \"Crawl date\" refers to the date that Metaphor discovered a link, which is more granular and can be more useful than published date. If start_crawl_date is specified, results will only include links that were crawled after start_crawl_date. Must be specified in ISO 8601 format (YYYY-MM-DDTHH:MM:SSZ)\n", + "end_crawl_date: Optional[str] - \"Crawl date\" refers to the date that Metaphor discovered a link, which is more granular and can be more useful than published date. If endCrawlDate is specified, results will only include links that were crawled before end_crawl_date. Must be specified in ISO 8601 format (YYYY-MM-DDTHH:MM:SSZ)\n", + "start_published_date: Optional[str] - If specified, only links with a published date after start_published_date will be returned. Must be specified in ISO 8601 format (YYYY-MM-DDTHH:MM:SSZ). Note that for some links, we have no published date, and these links will be excluded from the results if start_published_date is specified.\n", + "end_published_date: Optional[str] - If specified, only links with a published date before end_published_date will be returned. Must be specified in ISO 8601 format (YYYY-MM-DDTHH:MM:SSZ). Note that for some links, we have no published date, and these links will be excluded from the results if end_published_date is specified.\n", + "\n", + "See full docs [here](https://metaphorapi.readme.io/)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "search.results(\"The best blog post about AI safety is definitely this: \", 10, include_domains=[\"lesswrong.com\"], start_published_date=\"2019-01-01\")" + ] + }, { "attachments": {}, "cell_type": "markdown", @@ -139,6 +108,7 @@ "metadata": {}, "outputs": [], "source": [ + "%pip install playwright\n", "from langchain.agents.agent_toolkits import PlayWrightBrowserToolkit\n", "from langchain.tools.playwright.utils import (\n", " create_async_playwright_browser, # A synchronous browser is available, though it isn't compatible with jupyter.\n", @@ -158,44 +128,7 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "\n", - "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n", - "\u001b[32;1m\u001b[1;3mThought: I need to find a tweet about AI safety using Metaphor Search.\n", - "Action:\n", - "```\n", - "{\n", - " \"action\": \"Metaphor Search Results JSON\",\n", - " \"action_input\": {\n", - " \"query\": \"interesting tweet AI safety\",\n", - " \"num_results\": 1\n", - " }\n", - "}\n", - "```\n", - "\u001b[0m{'results': [{'url': 'https://safe.ai/', 'title': 'Center for AI Safety', 'dateCreated': '2022-01-01', 'author': None, 'score': 0.18083244562149048}]}\n", - "\n", - "Observation: \u001b[36;1m\u001b[1;3m[{'title': 'Center for AI Safety', 'url': 'https://safe.ai/', 'author': None, 'date_created': '2022-01-01'}]\u001b[0m\n", - "Thought:\u001b[32;1m\u001b[1;3mI need to navigate to the URL provided in the search results to find the tweet.\u001b[0m\n", - "\n", - "\u001b[1m> Finished chain.\u001b[0m\n" - ] - }, - { - "data": { - "text/plain": [ - "'I need to navigate to the URL provided in the search results to find the tweet.'" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "from langchain.agents import initialize_agent, AgentType\n", "from langchain.chat_models import ChatOpenAI\n", @@ -241,7 +174,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.3" + "version": "3.10.11" }, "vscode": { "interpreter": { diff --git a/langchain/tools/metaphor_search/tool.py b/langchain/tools/metaphor_search/tool.py index cf4ea427..04f37e80 100644 --- a/langchain/tools/metaphor_search/tool.py +++ b/langchain/tools/metaphor_search/tool.py @@ -25,11 +25,26 @@ class MetaphorSearchResults(BaseTool): self, query: str, num_results: int, + include_domains: Optional[List[str]] = None, + exclude_domains: Optional[List[str]] = None, + start_crawl_date: Optional[str] = None, + end_crawl_date: Optional[str] = None, + start_published_date: Optional[str] = None, + end_published_date: Optional[str] = None, run_manager: Optional[CallbackManagerForToolRun] = None, ) -> Union[List[Dict], str]: """Use the tool.""" try: - return self.api_wrapper.results(query, num_results) + return self.api_wrapper.results( + query, + num_results, + include_domains, + exclude_domains, + start_crawl_date, + end_crawl_date, + start_published_date, + end_published_date, + ) except Exception as e: return repr(e) @@ -37,10 +52,25 @@ class MetaphorSearchResults(BaseTool): self, query: str, num_results: int, + include_domains: Optional[List[str]] = None, + exclude_domains: Optional[List[str]] = None, + start_crawl_date: Optional[str] = None, + end_crawl_date: Optional[str] = None, + start_published_date: Optional[str] = None, + end_published_date: Optional[str] = None, run_manager: Optional[AsyncCallbackManagerForToolRun] = None, ) -> Union[List[Dict], str]: """Use the tool asynchronously.""" try: - return await self.api_wrapper.results_async(query, num_results) + return await self.api_wrapper.results_async( + query, + num_results, + include_domains, + exclude_domains, + start_crawl_date, + end_crawl_date, + start_published_date, + end_published_date, + ) except Exception as e: return repr(e) diff --git a/langchain/utilities/metaphor_search.py b/langchain/utilities/metaphor_search.py index cbc7cecf..8c5340ec 100644 --- a/langchain/utilities/metaphor_search.py +++ b/langchain/utilities/metaphor_search.py @@ -3,7 +3,7 @@ In order to set this up, follow instructions at: """ import json -from typing import Dict, List +from typing import Dict, List, Optional import aiohttp import requests @@ -25,9 +25,28 @@ class MetaphorSearchAPIWrapper(BaseModel): extra = Extra.forbid - def _metaphor_search_results(self, query: str, num_results: int) -> List[dict]: + def _metaphor_search_results( + self, + query: str, + num_results: int, + include_domains: Optional[List[str]] = None, + exclude_domains: Optional[List[str]] = None, + start_crawl_date: Optional[str] = None, + end_crawl_date: Optional[str] = None, + start_published_date: Optional[str] = None, + end_published_date: Optional[str] = None, + ) -> List[dict]: headers = {"X-Api-Key": self.metaphor_api_key} - params = {"numResults": num_results, "query": query} + params = { + "numResults": num_results, + "query": query, + "includeDomains": include_domains, + "excludeDomains": exclude_domains, + "startCrawlDate": start_crawl_date, + "endCrawlDate": end_crawl_date, + "startPublishedDate": start_published_date, + "endPublishedDate": end_published_date, + } response = requests.post( # type: ignore f"{METAPHOR_API_URL}/search", @@ -50,7 +69,17 @@ class MetaphorSearchAPIWrapper(BaseModel): return values - def results(self, query: str, num_results: int) -> List[Dict]: + def results( + self, + query: str, + num_results: int, + include_domains: Optional[List[str]] = None, + exclude_domains: Optional[List[str]] = None, + start_crawl_date: Optional[str] = None, + end_crawl_date: Optional[str] = None, + start_published_date: Optional[str] = None, + end_published_date: Optional[str] = None, + ) -> List[Dict]: """Run query through Metaphor Search and return metadata. Args: @@ -62,21 +91,47 @@ class MetaphorSearchAPIWrapper(BaseModel): title - The title of the url - The url author - Author of the content, if applicable. Otherwise, None. - date_created - Estimated date created, + published_date - Estimated date published in YYYY-MM-DD format. Otherwise, None. """ raw_search_results = self._metaphor_search_results( - query, num_results=num_results + query, + num_results=num_results, + include_domains=include_domains, + exclude_domains=exclude_domains, + start_crawl_date=start_crawl_date, + end_crawl_date=end_crawl_date, + start_published_date=start_published_date, + end_published_date=end_published_date, ) return self._clean_results(raw_search_results) - async def results_async(self, query: str, num_results: int) -> List[Dict]: + async def results_async( + self, + query: str, + num_results: int, + include_domains: Optional[List[str]] = None, + exclude_domains: Optional[List[str]] = None, + start_crawl_date: Optional[str] = None, + end_crawl_date: Optional[str] = None, + start_published_date: Optional[str] = None, + end_published_date: Optional[str] = None, + ) -> List[Dict]: """Get results from the Metaphor Search API asynchronously.""" # Function to perform the API call async def fetch() -> str: headers = {"X-Api-Key": self.metaphor_api_key} - params = {"numResults": num_results, "query": query} + params = { + "numResults": num_results, + "query": query, + "includeDomains": include_domains, + "excludeDomains": exclude_domains, + "startCrawlDate": start_crawl_date, + "endCrawlDate": end_crawl_date, + "startPublishedDate": start_published_date, + "endPublishedDate": end_published_date, + } async with aiohttp.ClientSession() as session: async with session.post( f"{METAPHOR_API_URL}/search", json=params, headers=headers @@ -99,7 +154,7 @@ class MetaphorSearchAPIWrapper(BaseModel): "title": result["title"], "url": result["url"], "author": result["author"], - "date_created": result["dateCreated"], + "published_date": result["publishedDate"], } ) return cleaned_results