mirror of
https://github.com/hwchase17/langchain
synced 2024-11-06 03:20:49 +00:00
Harrison/metaphor search fix (#6387)
Co-authored-by: jeffzwang <jeffreyzhiyuanwang@gmail.com>
This commit is contained in:
parent
9bf5b0defa
commit
b7159c15cc
@ -22,7 +22,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -33,7 +33,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -42,7 +42,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -60,70 +60,39 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{'results': [{'url': 'https://www.anthropic.com/index/core-views-on-ai-safety', 'title': 'Core Views on AI Safety: When, Why, What, and How', 'dateCreated': '2023-03-08', 'author': None, 'score': 0.1998831331729889}, {'url': 'https://aisafety.wordpress.com/', 'title': 'Extinction Risk from Artificial Intelligence', 'dateCreated': '2013-10-08', 'author': None, 'score': 0.19801370799541473}, {'url': 'https://www.lesswrong.com/posts/WhNxG4r774bK32GcH/the-simple-picture-on-ai-safety', 'title': 'The simple picture on AI safety - LessWrong', 'dateCreated': '2018-05-27', 'author': 'Alex Flint', 'score': 0.19735534489154816}, {'url': 'https://slatestarcodex.com/2015/05/29/no-time-like-the-present-for-ai-safety-work/', 'title': 'No Time Like The Present For AI Safety Work', 'dateCreated': '2015-05-29', 'author': None, 'score': 0.19408763945102692}, {'url': 'https://www.lesswrong.com/posts/5BJvusxdwNXYQ4L9L/so-you-want-to-save-the-world', 'title': 'So You Want to Save the World - LessWrong', 'dateCreated': '2012-01-01', 'author': 'Lukeprog', 'score': 0.18853715062141418}, {'url': 'https://openai.com/blog/planning-for-agi-and-beyond', 'title': 'Planning for AGI and beyond', 'dateCreated': '2023-02-24', 'author': 'Authors', 'score': 0.18665121495723724}, {'url': 'https://waitbutwhy.com/2015/01/artificial-intelligence-revolution-1.html', 'title': 'The Artificial Intelligence Revolution: Part 1 - Wait But Why', 'dateCreated': '2015-01-22', 'author': 'Tim Urban', 'score': 0.18604731559753418}, {'url': 'https://forum.effectivealtruism.org/posts/uGDCaPFaPkuxAowmH/anthropic-core-views-on-ai-safety-when-why-what-and-how', 'title': 'Anthropic: Core Views on AI Safety: When, Why, What, and How - EA Forum', 'dateCreated': '2023-03-09', 'author': 'Jonmenaster', 'score': 0.18415069580078125}, {'url': 'https://www.lesswrong.com/posts/xBrpph9knzWdtMWeQ/the-proof-of-doom', 'title': 'The Proof of Doom - LessWrong', 'dateCreated': '2022-03-09', 'author': 'Johnlawrenceaspden', 'score': 0.18159329891204834}, {'url': 'https://intelligence.org/why-ai-safety/', 'title': 'Why AI Safety? - Machine Intelligence Research Institute', 'dateCreated': '2017-03-01', 'author': None, 'score': 0.1814115345478058}]}\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[{'title': 'Core Views on AI Safety: When, Why, What, and How',\n",
|
||||
" 'url': 'https://www.anthropic.com/index/core-views-on-ai-safety',\n",
|
||||
" 'author': None,\n",
|
||||
" 'date_created': '2023-03-08'},\n",
|
||||
" {'title': 'Extinction Risk from Artificial Intelligence',\n",
|
||||
" 'url': 'https://aisafety.wordpress.com/',\n",
|
||||
" 'author': None,\n",
|
||||
" 'date_created': '2013-10-08'},\n",
|
||||
" {'title': 'The simple picture on AI safety - LessWrong',\n",
|
||||
" 'url': 'https://www.lesswrong.com/posts/WhNxG4r774bK32GcH/the-simple-picture-on-ai-safety',\n",
|
||||
" 'author': 'Alex Flint',\n",
|
||||
" 'date_created': '2018-05-27'},\n",
|
||||
" {'title': 'No Time Like The Present For AI Safety Work',\n",
|
||||
" 'url': 'https://slatestarcodex.com/2015/05/29/no-time-like-the-present-for-ai-safety-work/',\n",
|
||||
" 'author': None,\n",
|
||||
" 'date_created': '2015-05-29'},\n",
|
||||
" {'title': 'So You Want to Save the World - LessWrong',\n",
|
||||
" 'url': 'https://www.lesswrong.com/posts/5BJvusxdwNXYQ4L9L/so-you-want-to-save-the-world',\n",
|
||||
" 'author': 'Lukeprog',\n",
|
||||
" 'date_created': '2012-01-01'},\n",
|
||||
" {'title': 'Planning for AGI and beyond',\n",
|
||||
" 'url': 'https://openai.com/blog/planning-for-agi-and-beyond',\n",
|
||||
" 'author': 'Authors',\n",
|
||||
" 'date_created': '2023-02-24'},\n",
|
||||
" {'title': 'The Artificial Intelligence Revolution: Part 1 - Wait But Why',\n",
|
||||
" 'url': 'https://waitbutwhy.com/2015/01/artificial-intelligence-revolution-1.html',\n",
|
||||
" 'author': 'Tim Urban',\n",
|
||||
" 'date_created': '2015-01-22'},\n",
|
||||
" {'title': 'Anthropic: Core Views on AI Safety: When, Why, What, and How - EA Forum',\n",
|
||||
" 'url': 'https://forum.effectivealtruism.org/posts/uGDCaPFaPkuxAowmH/anthropic-core-views-on-ai-safety-when-why-what-and-how',\n",
|
||||
" 'author': 'Jonmenaster',\n",
|
||||
" 'date_created': '2023-03-09'},\n",
|
||||
" {'title': 'The Proof of Doom - LessWrong',\n",
|
||||
" 'url': 'https://www.lesswrong.com/posts/xBrpph9knzWdtMWeQ/the-proof-of-doom',\n",
|
||||
" 'author': 'Johnlawrenceaspden',\n",
|
||||
" 'date_created': '2022-03-09'},\n",
|
||||
" {'title': 'Why AI Safety? - Machine Intelligence Research Institute',\n",
|
||||
" 'url': 'https://intelligence.org/why-ai-safety/',\n",
|
||||
" 'author': None,\n",
|
||||
" 'date_created': '2017-03-01'}]"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"search.results(\"The best blog post about AI safety is definitely this: \", 10)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Adding filters\n",
|
||||
"We can also add filters to our search. \n",
|
||||
"include_domains: Optional[List[str]] - List of domains to include in the search. If specified, results will only come from these domains. Only one of include_domains and exclude_domains should be specified.\n",
|
||||
"exclude_domains: Optional[List[str]] - List of domains to exclude in the search. If specified, results will only come from these domains. Only one of include_domains and exclude_domains should be specified.\n",
|
||||
"start_crawl_date: Optional[str] - \"Crawl date\" refers to the date that Metaphor discovered a link, which is more granular and can be more useful than published date. If start_crawl_date is specified, results will only include links that were crawled after start_crawl_date. Must be specified in ISO 8601 format (YYYY-MM-DDTHH:MM:SSZ)\n",
|
||||
"end_crawl_date: Optional[str] - \"Crawl date\" refers to the date that Metaphor discovered a link, which is more granular and can be more useful than published date. If endCrawlDate is specified, results will only include links that were crawled before end_crawl_date. Must be specified in ISO 8601 format (YYYY-MM-DDTHH:MM:SSZ)\n",
|
||||
"start_published_date: Optional[str] - If specified, only links with a published date after start_published_date will be returned. Must be specified in ISO 8601 format (YYYY-MM-DDTHH:MM:SSZ). Note that for some links, we have no published date, and these links will be excluded from the results if start_published_date is specified.\n",
|
||||
"end_published_date: Optional[str] - If specified, only links with a published date before end_published_date will be returned. Must be specified in ISO 8601 format (YYYY-MM-DDTHH:MM:SSZ). Note that for some links, we have no published date, and these links will be excluded from the results if end_published_date is specified.\n",
|
||||
"\n",
|
||||
"See full docs [here](https://metaphorapi.readme.io/)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"search.results(\"The best blog post about AI safety is definitely this: \", 10, include_domains=[\"lesswrong.com\"], start_published_date=\"2019-01-01\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
@ -139,6 +108,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install playwright\n",
|
||||
"from langchain.agents.agent_toolkits import PlayWrightBrowserToolkit\n",
|
||||
"from langchain.tools.playwright.utils import (\n",
|
||||
" create_async_playwright_browser, # A synchronous browser is available, though it isn't compatible with jupyter.\n",
|
||||
@ -158,44 +128,7 @@
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mThought: I need to find a tweet about AI safety using Metaphor Search.\n",
|
||||
"Action:\n",
|
||||
"```\n",
|
||||
"{\n",
|
||||
" \"action\": \"Metaphor Search Results JSON\",\n",
|
||||
" \"action_input\": {\n",
|
||||
" \"query\": \"interesting tweet AI safety\",\n",
|
||||
" \"num_results\": 1\n",
|
||||
" }\n",
|
||||
"}\n",
|
||||
"```\n",
|
||||
"\u001b[0m{'results': [{'url': 'https://safe.ai/', 'title': 'Center for AI Safety', 'dateCreated': '2022-01-01', 'author': None, 'score': 0.18083244562149048}]}\n",
|
||||
"\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3m[{'title': 'Center for AI Safety', 'url': 'https://safe.ai/', 'author': None, 'date_created': '2022-01-01'}]\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3mI need to navigate to the URL provided in the search results to find the tweet.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'I need to navigate to the URL provided in the search results to find the tweet.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.agents import initialize_agent, AgentType\n",
|
||||
"from langchain.chat_models import ChatOpenAI\n",
|
||||
@ -241,7 +174,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.3"
|
||||
"version": "3.10.11"
|
||||
},
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
|
@ -25,11 +25,26 @@ class MetaphorSearchResults(BaseTool):
|
||||
self,
|
||||
query: str,
|
||||
num_results: int,
|
||||
include_domains: Optional[List[str]] = None,
|
||||
exclude_domains: Optional[List[str]] = None,
|
||||
start_crawl_date: Optional[str] = None,
|
||||
end_crawl_date: Optional[str] = None,
|
||||
start_published_date: Optional[str] = None,
|
||||
end_published_date: Optional[str] = None,
|
||||
run_manager: Optional[CallbackManagerForToolRun] = None,
|
||||
) -> Union[List[Dict], str]:
|
||||
"""Use the tool."""
|
||||
try:
|
||||
return self.api_wrapper.results(query, num_results)
|
||||
return self.api_wrapper.results(
|
||||
query,
|
||||
num_results,
|
||||
include_domains,
|
||||
exclude_domains,
|
||||
start_crawl_date,
|
||||
end_crawl_date,
|
||||
start_published_date,
|
||||
end_published_date,
|
||||
)
|
||||
except Exception as e:
|
||||
return repr(e)
|
||||
|
||||
@ -37,10 +52,25 @@ class MetaphorSearchResults(BaseTool):
|
||||
self,
|
||||
query: str,
|
||||
num_results: int,
|
||||
include_domains: Optional[List[str]] = None,
|
||||
exclude_domains: Optional[List[str]] = None,
|
||||
start_crawl_date: Optional[str] = None,
|
||||
end_crawl_date: Optional[str] = None,
|
||||
start_published_date: Optional[str] = None,
|
||||
end_published_date: Optional[str] = None,
|
||||
run_manager: Optional[AsyncCallbackManagerForToolRun] = None,
|
||||
) -> Union[List[Dict], str]:
|
||||
"""Use the tool asynchronously."""
|
||||
try:
|
||||
return await self.api_wrapper.results_async(query, num_results)
|
||||
return await self.api_wrapper.results_async(
|
||||
query,
|
||||
num_results,
|
||||
include_domains,
|
||||
exclude_domains,
|
||||
start_crawl_date,
|
||||
end_crawl_date,
|
||||
start_published_date,
|
||||
end_published_date,
|
||||
)
|
||||
except Exception as e:
|
||||
return repr(e)
|
||||
|
@ -3,7 +3,7 @@
|
||||
In order to set this up, follow instructions at:
|
||||
"""
|
||||
import json
|
||||
from typing import Dict, List
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
import aiohttp
|
||||
import requests
|
||||
@ -25,9 +25,28 @@ class MetaphorSearchAPIWrapper(BaseModel):
|
||||
|
||||
extra = Extra.forbid
|
||||
|
||||
def _metaphor_search_results(self, query: str, num_results: int) -> List[dict]:
|
||||
def _metaphor_search_results(
|
||||
self,
|
||||
query: str,
|
||||
num_results: int,
|
||||
include_domains: Optional[List[str]] = None,
|
||||
exclude_domains: Optional[List[str]] = None,
|
||||
start_crawl_date: Optional[str] = None,
|
||||
end_crawl_date: Optional[str] = None,
|
||||
start_published_date: Optional[str] = None,
|
||||
end_published_date: Optional[str] = None,
|
||||
) -> List[dict]:
|
||||
headers = {"X-Api-Key": self.metaphor_api_key}
|
||||
params = {"numResults": num_results, "query": query}
|
||||
params = {
|
||||
"numResults": num_results,
|
||||
"query": query,
|
||||
"includeDomains": include_domains,
|
||||
"excludeDomains": exclude_domains,
|
||||
"startCrawlDate": start_crawl_date,
|
||||
"endCrawlDate": end_crawl_date,
|
||||
"startPublishedDate": start_published_date,
|
||||
"endPublishedDate": end_published_date,
|
||||
}
|
||||
response = requests.post(
|
||||
# type: ignore
|
||||
f"{METAPHOR_API_URL}/search",
|
||||
@ -50,7 +69,17 @@ class MetaphorSearchAPIWrapper(BaseModel):
|
||||
|
||||
return values
|
||||
|
||||
def results(self, query: str, num_results: int) -> List[Dict]:
|
||||
def results(
|
||||
self,
|
||||
query: str,
|
||||
num_results: int,
|
||||
include_domains: Optional[List[str]] = None,
|
||||
exclude_domains: Optional[List[str]] = None,
|
||||
start_crawl_date: Optional[str] = None,
|
||||
end_crawl_date: Optional[str] = None,
|
||||
start_published_date: Optional[str] = None,
|
||||
end_published_date: Optional[str] = None,
|
||||
) -> List[Dict]:
|
||||
"""Run query through Metaphor Search and return metadata.
|
||||
|
||||
Args:
|
||||
@ -62,21 +91,47 @@ class MetaphorSearchAPIWrapper(BaseModel):
|
||||
title - The title of the
|
||||
url - The url
|
||||
author - Author of the content, if applicable. Otherwise, None.
|
||||
date_created - Estimated date created,
|
||||
published_date - Estimated date published
|
||||
in YYYY-MM-DD format. Otherwise, None.
|
||||
"""
|
||||
raw_search_results = self._metaphor_search_results(
|
||||
query, num_results=num_results
|
||||
query,
|
||||
num_results=num_results,
|
||||
include_domains=include_domains,
|
||||
exclude_domains=exclude_domains,
|
||||
start_crawl_date=start_crawl_date,
|
||||
end_crawl_date=end_crawl_date,
|
||||
start_published_date=start_published_date,
|
||||
end_published_date=end_published_date,
|
||||
)
|
||||
return self._clean_results(raw_search_results)
|
||||
|
||||
async def results_async(self, query: str, num_results: int) -> List[Dict]:
|
||||
async def results_async(
|
||||
self,
|
||||
query: str,
|
||||
num_results: int,
|
||||
include_domains: Optional[List[str]] = None,
|
||||
exclude_domains: Optional[List[str]] = None,
|
||||
start_crawl_date: Optional[str] = None,
|
||||
end_crawl_date: Optional[str] = None,
|
||||
start_published_date: Optional[str] = None,
|
||||
end_published_date: Optional[str] = None,
|
||||
) -> List[Dict]:
|
||||
"""Get results from the Metaphor Search API asynchronously."""
|
||||
|
||||
# Function to perform the API call
|
||||
async def fetch() -> str:
|
||||
headers = {"X-Api-Key": self.metaphor_api_key}
|
||||
params = {"numResults": num_results, "query": query}
|
||||
params = {
|
||||
"numResults": num_results,
|
||||
"query": query,
|
||||
"includeDomains": include_domains,
|
||||
"excludeDomains": exclude_domains,
|
||||
"startCrawlDate": start_crawl_date,
|
||||
"endCrawlDate": end_crawl_date,
|
||||
"startPublishedDate": start_published_date,
|
||||
"endPublishedDate": end_published_date,
|
||||
}
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.post(
|
||||
f"{METAPHOR_API_URL}/search", json=params, headers=headers
|
||||
@ -99,7 +154,7 @@ class MetaphorSearchAPIWrapper(BaseModel):
|
||||
"title": result["title"],
|
||||
"url": result["url"],
|
||||
"author": result["author"],
|
||||
"date_created": result["dateCreated"],
|
||||
"published_date": result["publishedDate"],
|
||||
}
|
||||
)
|
||||
return cleaned_results
|
||||
|
Loading…
Reference in New Issue
Block a user