Add Metaphor new field and reformat docs (#8022)

This PR reformats our python notebook example and also adds a new field
we have.

---------

Co-authored-by: Erick Friis <erick@langchain.dev>
This commit is contained in:
Jeffrey Wang 2023-07-20 15:50:54 -07:00 committed by GitHub
parent e2a99bd169
commit 62d0475c29
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 29 additions and 12 deletions

View File

@ -13,9 +13,11 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"Metaphor is a search engine fully designed to be used by LLMs. You can search and then get the contents for any page.\n",
"\n",
"This notebook goes over how to use Metaphor search.\n",
"\n",
"First, you need to set up the proper API keys and environment variables. Request an API key [here](Sign up for early access here).\n",
"First, you need to set up the proper API keys and environment variables. Get 1000 free searches/month [here](https://platform.metaphor.systems/).\n",
"\n",
"Then enter your API key as an environment variable."
]
@ -74,14 +76,20 @@
"source": [
"# Adding filters\n",
"We can also add filters to our search. \n",
"\n",
"include_domains: Optional[List[str]] - List of domains to include in the search. If specified, results will only come from these domains. Only one of include_domains and exclude_domains should be specified.\n",
"\n",
"exclude_domains: Optional[List[str]] - List of domains to exclude in the search. If specified, results will only come from these domains. Only one of include_domains and exclude_domains should be specified.\n",
"\n",
"start_crawl_date: Optional[str] - \"Crawl date\" refers to the date that Metaphor discovered a link, which is more granular and can be more useful than published date. If start_crawl_date is specified, results will only include links that were crawled after start_crawl_date. Must be specified in ISO 8601 format (YYYY-MM-DDTHH:MM:SSZ)\n",
"\n",
"end_crawl_date: Optional[str] - \"Crawl date\" refers to the date that Metaphor discovered a link, which is more granular and can be more useful than published date. If endCrawlDate is specified, results will only include links that were crawled before end_crawl_date. Must be specified in ISO 8601 format (YYYY-MM-DDTHH:MM:SSZ)\n",
"\n",
"start_published_date: Optional[str] - If specified, only links with a published date after start_published_date will be returned. Must be specified in ISO 8601 format (YYYY-MM-DDTHH:MM:SSZ). Note that for some links, we have no published date, and these links will be excluded from the results if start_published_date is specified.\n",
"\n",
"end_published_date: Optional[str] - If specified, only links with a published date before end_published_date will be returned. Must be specified in ISO 8601 format (YYYY-MM-DDTHH:MM:SSZ). Note that for some links, we have no published date, and these links will be excluded from the results if end_published_date is specified.\n",
"\n",
"See full docs [here](https://metaphorapi.readme.io/)"
"See full docs [here](https://metaphorapi.readme.io/)."
]
},
{
@ -154,13 +162,6 @@
" \"find me an interesting tweet about AI safety using Metaphor, then tell me the first sentence in the post. Do not finish until able to retrieve the first sentence.\"\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {

View File

@ -31,6 +31,7 @@ class MetaphorSearchResults(BaseTool):
end_crawl_date: Optional[str] = None,
start_published_date: Optional[str] = None,
end_published_date: Optional[str] = None,
use_autoprompt: Optional[bool] = None,
run_manager: Optional[CallbackManagerForToolRun] = None,
) -> Union[List[Dict], str]:
"""Use the tool."""
@ -44,6 +45,7 @@ class MetaphorSearchResults(BaseTool):
end_crawl_date,
start_published_date,
end_published_date,
use_autoprompt,
)
except Exception as e:
return repr(e)
@ -58,6 +60,7 @@ class MetaphorSearchResults(BaseTool):
end_crawl_date: Optional[str] = None,
start_published_date: Optional[str] = None,
end_published_date: Optional[str] = None,
use_autoprompt: Optional[bool] = None,
run_manager: Optional[AsyncCallbackManagerForToolRun] = None,
) -> Union[List[Dict], str]:
"""Use the tool asynchronously."""
@ -71,6 +74,7 @@ class MetaphorSearchResults(BaseTool):
end_crawl_date,
start_published_date,
end_published_date,
use_autoprompt,
)
except Exception as e:
return repr(e)

View File

@ -35,6 +35,7 @@ class MetaphorSearchAPIWrapper(BaseModel):
end_crawl_date: Optional[str] = None,
start_published_date: Optional[str] = None,
end_published_date: Optional[str] = None,
use_autoprompt: Optional[bool] = None,
) -> List[dict]:
headers = {"X-Api-Key": self.metaphor_api_key}
params = {
@ -46,6 +47,7 @@ class MetaphorSearchAPIWrapper(BaseModel):
"endCrawlDate": end_crawl_date,
"startPublishedDate": start_published_date,
"endPublishedDate": end_published_date,
"useAutoprompt": use_autoprompt,
}
response = requests.post(
# type: ignore
@ -56,7 +58,6 @@ class MetaphorSearchAPIWrapper(BaseModel):
response.raise_for_status()
search_results = response.json()
print(search_results)
return search_results["results"]
@root_validator(pre=True)
@ -79,21 +80,29 @@ class MetaphorSearchAPIWrapper(BaseModel):
end_crawl_date: Optional[str] = None,
start_published_date: Optional[str] = None,
end_published_date: Optional[str] = None,
use_autoprompt: Optional[bool] = None,
) -> List[Dict]:
"""Run query through Metaphor Search and return metadata.
Args:
query: The query to search for.
num_results: The number of results to return.
include_domains: A list of domains to include in the search. Only one of include_domains and exclude_domains should be defined.
exclude_domains: A list of domains to exclude from the search. Only one of include_domains and exclude_domains should be defined.
start_crawl_date: If specified, only pages we crawled after start_crawl_date will be returned.
end_crawl_date: If specified, only pages we crawled before end_crawl_date will be returned.
start_published_date: If specified, only pages published after start_published_date will be returned.
end_published_date: If specified, only pages published before end_published_date will be returned.
use_autoprompt: If true, we turn your query into a more Metaphor-friendly query. Adds latency.
Returns:
A list of dictionaries with the following keys:
title - The title of the
title - The title of the page
url - The url
author - Author of the content, if applicable. Otherwise, None.
published_date - Estimated date published
in YYYY-MM-DD format. Otherwise, None.
"""
""" # noqa: E501
raw_search_results = self._metaphor_search_results(
query,
num_results=num_results,
@ -103,6 +112,7 @@ class MetaphorSearchAPIWrapper(BaseModel):
end_crawl_date=end_crawl_date,
start_published_date=start_published_date,
end_published_date=end_published_date,
use_autoprompt=use_autoprompt,
)
return self._clean_results(raw_search_results)
@ -116,6 +126,7 @@ class MetaphorSearchAPIWrapper(BaseModel):
end_crawl_date: Optional[str] = None,
start_published_date: Optional[str] = None,
end_published_date: Optional[str] = None,
use_autoprompt: Optional[bool] = None,
) -> List[Dict]:
"""Get results from the Metaphor Search API asynchronously."""
@ -131,6 +142,7 @@ class MetaphorSearchAPIWrapper(BaseModel):
"endCrawlDate": end_crawl_date,
"startPublishedDate": start_published_date,
"endPublishedDate": end_published_date,
"useAutoprompt": use_autoprompt,
}
async with aiohttp.ClientSession() as session:
async with session.post(