langchain/docs/extras/integrations/tools/metaphor_search.ipynb

420 lines
28 KiB
Plaintext
Raw Normal View History

{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Metaphor Search"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Metaphor is a search engine fully designed to be used by LLMs. You can search and then get the contents for any page.\n",
"\n",
"This notebook goes over how to use Metaphor search.\n",
"\n",
"First, you need to set up the proper API keys and environment variables. Get 1000 free searches/month [here](https://platform.metaphor.systems/).\n",
"\n",
"Then enter your API key as an environment variable."
]
},
{
"cell_type": "code",
2023-08-07 21:44:41 +00:00
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"\n",
2023-08-07 21:44:41 +00:00
"os.environ[\"METAPHOR_API_KEY\"] = \"...\""
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Using their SDK\n",
"\n",
"This is the newer and more supported way to use the Metaphor API - via their SDK"
]
},
{
"cell_type": "code",
2023-08-07 21:44:41 +00:00
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"# !pip install metaphor-python"
2023-08-07 21:44:41 +00:00
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"from metaphor_python import Metaphor\n",
"\n",
"client = Metaphor(api_key=os.environ[\"METAPHOR_API_KEY\"])"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"from langchain.agents import tool\n",
"from typing import List"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"@tool\n",
"def search(query: str):\n",
" \"\"\"Call search engine with a query.\"\"\"\n",
" return client.search(query)\n",
"\n",
"@tool\n",
"def get_contents(ids: List[str]):\n",
" \"\"\"Get contents of a webpage.\n",
" \n",
" The ids passed in should be a list of ids as fetched from `search`.\n",
" \"\"\"\n",
" return client.get_contents(ids)\n",
"\n",
"@tool\n",
"def find_similar(url: str):\n",
" \"\"\"Get search results similar to a given URL.\n",
" \n",
" The url passed in should be a URL returned from `search`\n",
" \"\"\"\n",
" return client.find_similar(url)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"tools = [search, get_contents, find_similar]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Use in an agent"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"from langchain.chat_models import ChatOpenAI\n",
"llm = ChatOpenAI(temperature=0)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"from langchain.agents import OpenAIFunctionsAgent\n",
"from langchain.schema import SystemMessage\n",
"system_message = SystemMessage(content=\"You are a web researcher who uses search engines to look up information.\")\n",
"prompt = OpenAIFunctionsAgent.create_prompt(system_message=system_message)\n",
"agent = OpenAIFunctionsAgent(llm=llm, tools=tools, prompt=prompt)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
"from langchain.agents import AgentExecutor\n",
"agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {
"scrolled": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"\n",
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
"\u001b[32;1m\u001b[1;3m\n",
"Invoking: `search` with `{'query': 'hottest AI agent startups'}`\n",
"\n",
"\n",
"\u001b[0m\u001b[36;1m\u001b[1;3mSearchResponse(results=[Result(title='The AI Hot 75', url='https://www.nfx.com/post/generative-ai-hot-75-list', id='pb0vypILvIakocJicoqm4w', score=0.19051167368888855, published_date='2023-04-27', author='NFX', extract=None), Result(title='10 new AI unicorns flying high in 2022', url='https://venturebeat.com/ai/10-new-ai-unicorns-flying-high-in-2022/', id='i4k-4QKod9mMo2bi8m19bw', score=0.18090465664863586, published_date='2022-06-27', author='Sharon Goldman', extract=None), Result(title='The 13 most promising early-stage artificial-intelligence startups of 2022, according to VCs investing in companies building technical infrastructure and developing tools', url='https://www.businessinsider.com/13-most-promising-earlier-stage-ai-startups-this-year-2022-11', id='ZA52Zk-yVy9skxWZtxOSgg', score=0.1796971708536148, published_date='2022-11-17', author='Sindhu Sundar', extract=None), Result(title='What to Watch in AI | The Generalist', url='https://www.generalist.com/briefing/what-to-watch-in-ai-2', id='cmKJmULl_vK4V-gth6yzWg', score=0.17643707990646362, published_date='2023-03-26', author='Artwork byÂ', extract=None), Result(title='Hot AI startups in San Francisco (2022) | Best', url='https://www.scoop.it/topic/best/p/4137745212/2022/11/08/hot-ai-startups-in-san-francisco-2022', id='wjo6lznHL-VI7jBOsvuwzA', score=0.17351359128952026, published_date='2022-11-08', author='Dr Stefan Gruenwald', extract=None), Result(title='New and recently funded AI Startups', url='https://www.ai-startups.org/', id='M0Cgw6hYIbLBvU6y-9oEBg', score=0.17336121201515198, published_date=None, author=None, extract=None), Result(title='A Search Engine for Machine Intelligence', url='https://bellow.ai/', id='bdYc6hvHww_JvLv9k8NhPA', score=0.17326533794403076, published_date='2023-01-01', author=None, extract=None), Result(title='26 AI experts who left Google to start new companies in 2022 as machine learning ushers in a new era of lifelike AI', url='https://www.businessinsider.com/26-ai-machine-learning-experts-left-google-for-startups-2022-12', id='MKxSDXGxC155eyQLva1Ksg', score=0.16997841000556946, published_date='2022-12-26', author='Matthew Lynley', extract=None), Result(title='Discover AI Startups', url='https://appliedai.com/discover', id='e_SK0_agdSNfvbf0Xn_wPQ', score=0.1694592982530594, published_date='2023-01-01', author=None, extract=None), Result(title='Careers at Top AI Startups', url='https://aijobnetwork.com/', id='jEBgBMmwMnOsEDZkxOWBWA', score=0.1688486635684967, published_date='2023-01-01', author=None, extract=None)], api=<metaphor_python.api.Metaphor object at 0x10f092d10>)\u001b[0m\u001b[32;1m\u001b[1;3m\n",
"Invoking: `get_contents` with `{'ids': ['pb0vypILvIakocJicoqm4w', 'i4k-4QKod9mMo2bi8m19bw', 'ZA52Zk-yVy9skxWZtxOSgg']}`\n",
"\n",
"\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[0m\u001b[33;1m\u001b[1;3mGetContentsResponse(contents=[DocumentContent(id='pb0vypILvIakocJicoqm4w', url='https://www.nfx.com/post/generative-ai-hot-75-list', title='The AI Hot 75', extract='<div><div><p><em>The hottest up &amp; comers in generative AI. Early-stage companies showing leading indicators of future greatness. Curated by NFX.</em></p>\\n<p></p>\\n<p>No doubt generative AI is extremely <a href=\"https://www.nfx.com/post/find-the-fast-moving-water\">fast-moving water</a>. But while media hype focuses on incumbents and household names, were doubling down on the “AI underground.”</p>\\n<p>The generative AI groundswell is disproportionately fueled by a dense network of exceptional, early-stage companies. Weve seen it growing throughout San Francisco, in “Cerebral Valley” at NFX HQ, inside hacker houses, and via 100s of meetings with up-and-coming AI founders.</p>\\n<p>After we open-sourced our <a href=\"https://www.nfx.com/post/generative-ai-tech-market-map\">Generative AI market map</a> a few months ago, we went to work analyzing this network for early indicators of future greatness. It is going to produce the next generation of unicorns, at a faster rate and at lower cost than any other startups before them.</p>\\n<p>Today we are making public our findings. Meet the <a href=\"https://www.nfx.com/post/generative-ai-hot-75-list?#AI-Hot-75\">AI Hot 75</a>.</p>\\n<h2><strong>Identifying the AI Hot 75 </strong></h2>\\n<p>Broadly speaking, we searched for leading indicators that an early-stage generative AI company has broken into the right networks and has found a “white hot center” (a highway to massive growth).</p>\\n<p>With those guiding principles in place, we gave preference to early-stage companies generally at the Seed and Series A stage. We narrowed the list according to: total amount raised, status of key investors, proximity to a geographical AI center of excellence, and “buzz” which we imputed from various virality, traction, and WOM factors.</p>\\n<p>We finalized our selections based on in-network reputation \\xa0i.e. if a company has been recommended or regarded by peers or experts in our network.</p>\\n<p>This list will look different from many that youve already seen. While some of the companies are household names already, most arent the biggest companies out there yet.</p>\\n<p>Instead, these are the companies indicative of the next generation. The ones with notable momentum. The hot ones worth following.</p>\\n<h2><strong>AI Hot 75</strong></h2>\\n<p><em>In addition to using proprietary NFX data for this list, we also referred to Pitchbook, Crunchbase, our network of friends and founders, </em><a href=\"https://twitter.com/NFX/status/1624103283236425729\"><em>The NFX AI Social Club</em></a><em>, and used ChatGPT to stress test analysis and language as well. </em></p>\\n</div></div>'), DocumentContent(id='i4k-4QKod9mMo2bi8m19bw', url='https://venturebeat.com/ai/10-new-ai-unicorns-flying-high-in-2022/', title='10 new AI unicorns flying high in 2022', extract='<div><div>\\n\\t<article>\\n\\t<div>\\n\\t<div>\\n<p><em>Check out all the on-demand sessions from the Intelligent Security Summit <a href=\"https://avolio.swapcard.com/intelligentsecuritysummit2022/registrations/Start?utm_source=vb&amp;utm_medium=boiler&amp;utm_content=ondemand&amp;utm_campaign=IS22_BoilerPlates\">here</a></em>.</p>\\n<hr />\\n</div><p>Technology venture capital deals may be <a href=\"https://www.wsj.com/articles/corporate-investors-fill-startup-funding-gap-as-vc-firms-pull-back-11655851713\">slowing down</a>, but investment in artificial intelligence (AI) companies continues to boom. Investments in AI research and applications are set to hit <a href=\"https://www.idc.com/getdoc.jsp?containerId=prUS47482321\">$500 billion</a> by 2024, according to research firm IDC, while PwC predicts AI will contribute <a href=\"https://www.pwc.com/gx/en/issues/data-and-analytics/publications/artificial-intelligence-study.html\">$15.7 trillion</a> to the global economy by 2030.\\xa0</p>\\n
"\n",
"1. The AI Hot 75: This list, curated by NFX, showcases early-stage companies in generative AI that show leading indicators of future greatness. These companies are at the forefront of the generative AI groundswell. [Read more](https://www.nfx.com/post/generative-ai-hot-75-list)\n",
"\n",
"2. 10 new AI unicorns flying high in 2022: This article highlights ten AI companies from the newest unicorn cohort that are soaring in 2022. One example is Abnormal Security, which uses behavioral AI to fight email attacks. [Read more](https://venturebeat.com/ai/10-new-ai-unicorns-flying-high-in-2022/)\n",
"\n",
"3. The 13 most promising early-stage artificial-intelligence startups of 2022: According to VCs investing in companies building technical infrastructure and developing tools, these 13 AI startups demonstrate the most potential. One example is OtterTune, a machine-learning-based platform to manage databases. [Read more](https://www.businessinsider.com/13-most-promising-earlier-stage-ai-startups-this-year-2022-11)\n",
"\n",
"Please note that these are just a few examples, and there are many more AI agent startups out there.\u001b[0m\n",
"\n",
"\u001b[1m> Finished chain.\u001b[0m\n"
]
},
{
"data": {
"text/plain": [
"'Here are some of the hottest AI agent startups and what they do:\\n\\n1. The AI Hot 75: This list, curated by NFX, showcases early-stage companies in generative AI that show leading indicators of future greatness. These companies are at the forefront of the generative AI groundswell. [Read more](https://www.nfx.com/post/generative-ai-hot-75-list)\\n\\n2. 10 new AI unicorns flying high in 2022: This article highlights ten AI companies from the newest unicorn cohort that are soaring in 2022. One example is Abnormal Security, which uses behavioral AI to fight email attacks. [Read more](https://venturebeat.com/ai/10-new-ai-unicorns-flying-high-in-2022/)\\n\\n3. The 13 most promising early-stage artificial-intelligence startups of 2022: According to VCs investing in companies building technical infrastructure and developing tools, these 13 AI startups demonstrate the most potential. One example is OtterTune, a machine-learning-based platform to manage databases. [Read more](https://www.businessinsider.com/13-most-promising-earlier-stage-ai-startups-this-year-2022-11)\\n\\nPlease note that these are just a few examples, and there are many more AI agent startups out there.'"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"agent_executor.run(\"Find the hottest AI agent startups and what they do\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Using the tool wrapper\n",
"\n",
"This is the old way of using Metaphor - through our own in-house integration."
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"from langchain.utilities import MetaphorSearchAPIWrapper"
]
},
{
"cell_type": "code",
2023-08-07 21:44:41 +00:00
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"search = MetaphorSearchAPIWrapper()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
2023-08-07 21:44:41 +00:00
"### Call the API\n",
"`results` takes in a Metaphor-optimized search query and a number of results (up to 500). It returns a list of results with title, url, author, and creation date."
]
},
{
"cell_type": "code",
2023-08-07 21:44:41 +00:00
"execution_count": 4,
"metadata": {},
2023-08-07 21:44:41 +00:00
"outputs": [
{
"data": {
"text/plain": [
"[{'title': 'Core Views on AI Safety: When, Why, What, and How',\n",
" 'url': 'https://www.anthropic.com/index/core-views-on-ai-safety',\n",
" 'author': None,\n",
" 'published_date': '2023-03-08'},\n",
" {'title': 'Extinction Risk from Artificial Intelligence',\n",
" 'url': 'https://aisafety.wordpress.com/',\n",
" 'author': None,\n",
" 'published_date': '2013-10-08'},\n",
" {'title': 'The simple picture on AI safety - LessWrong',\n",
" 'url': 'https://www.lesswrong.com/posts/WhNxG4r774bK32GcH/the-simple-picture-on-ai-safety',\n",
" 'author': 'Alex Flint',\n",
" 'published_date': '2018-05-27'},\n",
" {'title': 'No Time Like The Present For AI Safety Work',\n",
" 'url': 'https://slatestarcodex.com/2015/05/29/no-time-like-the-present-for-ai-safety-work/',\n",
" 'author': None,\n",
" 'published_date': '2015-05-29'},\n",
" {'title': 'A plea for solutionism on AI safety - LessWrong',\n",
" 'url': 'https://www.lesswrong.com/posts/ASMX9ss3J5G3GZdok/a-plea-for-solutionism-on-ai-safety',\n",
" 'author': 'Jasoncrawford',\n",
" 'published_date': '2023-06-09'},\n",
" {'title': 'The Artificial Intelligence Revolution: Part 1 - Wait But Why',\n",
" 'url': 'https://waitbutwhy.com/2015/01/artificial-intelligence-revolution-1.html',\n",
" 'author': 'Tim Urban',\n",
" 'published_date': '2015-01-22'},\n",
" {'title': 'Anthropic: Core Views on AI Safety: When, Why, What, and How - EA Forum',\n",
" 'url': 'https://forum.effectivealtruism.org/posts/uGDCaPFaPkuxAowmH/anthropic-core-views-on-ai-safety-when-why-what-and-how',\n",
" 'author': 'Jonmenaster',\n",
" 'published_date': '2023-03-09'},\n",
" {'title': \"[Linkpost] Sam Altman's 2015 Blog Posts Machine Intelligence Parts 1 & 2 - LessWrong\",\n",
" 'url': 'https://www.lesswrong.com/posts/QnBZkNJNbJK9k5Xi7/linkpost-sam-altman-s-2015-blog-posts-machine-intelligence',\n",
" 'author': 'Olivia Jimenez',\n",
" 'published_date': '2023-04-28'},\n",
" {'title': 'The Proof of Doom - LessWrong',\n",
" 'url': 'https://www.lesswrong.com/posts/xBrpph9knzWdtMWeQ/the-proof-of-doom',\n",
" 'author': 'Johnlawrenceaspden',\n",
" 'published_date': '2022-03-09'},\n",
" {'title': \"Anthropic's Core Views on AI Safety - LessWrong\",\n",
" 'url': 'https://www.lesswrong.com/posts/xhKr5KtvdJRssMeJ3/anthropic-s-core-views-on-ai-safety',\n",
" 'author': 'Zac Hatfield-Dodds',\n",
" 'published_date': '2023-03-09'}]"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"search.results(\"The best blog post about AI safety is definitely this: \", 10)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
2023-08-07 21:44:41 +00:00
"### Adding filters\n",
"We can also add filters to our search. \n",
"\n",
"include_domains: Optional[List[str]] - List of domains to include in the search. If specified, results will only come from these domains. Only one of include_domains and exclude_domains should be specified.\n",
"\n",
"exclude_domains: Optional[List[str]] - List of domains to exclude in the search. If specified, results will only come from these domains. Only one of include_domains and exclude_domains should be specified.\n",
"\n",
"start_crawl_date: Optional[str] - \"Crawl date\" refers to the date that Metaphor discovered a link, which is more granular and can be more useful than published date. If start_crawl_date is specified, results will only include links that were crawled after start_crawl_date. Must be specified in ISO 8601 format (YYYY-MM-DDTHH:MM:SSZ)\n",
"\n",
"end_crawl_date: Optional[str] - \"Crawl date\" refers to the date that Metaphor discovered a link, which is more granular and can be more useful than published date. If endCrawlDate is specified, results will only include links that were crawled before end_crawl_date. Must be specified in ISO 8601 format (YYYY-MM-DDTHH:MM:SSZ)\n",
"\n",
"start_published_date: Optional[str] - If specified, only links with a published date after start_published_date will be returned. Must be specified in ISO 8601 format (YYYY-MM-DDTHH:MM:SSZ). Note that for some links, we have no published date, and these links will be excluded from the results if start_published_date is specified.\n",
"\n",
"end_published_date: Optional[str] - If specified, only links with a published date before end_published_date will be returned. Must be specified in ISO 8601 format (YYYY-MM-DDTHH:MM:SSZ). Note that for some links, we have no published date, and these links will be excluded from the results if end_published_date is specified.\n",
"\n",
"See full docs [here](https://metaphorapi.readme.io/)."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Fix `make docs_build` and related scripts (#7276) **Description: a description of the change** Fixed `make docs_build` and related scripts which caused errors. There are several changes. First, I made the build of the documentation and the API Reference into two separate commands. This is because it takes less time to build. The commands for documents are `make docs_build`, `make docs_clean`, and `make docs_linkcheck`. The commands for API Reference are `make api_docs_build`, `api_docs_clean`, and `api_docs_linkcheck`. It looked like `docs/.local_build.sh` could be used to build the documentation, so I used that. Since `.local_build.sh` was also building API Rerefence internally, I removed that process. `.local_build.sh` also added some Bash options to stop in error or so. Futher more added `cd "${SCRIPT_DIR}"` at the beginning so that the script will work no matter which directory it is executed in. `docs/api_reference/api_reference.rst` is removed, because which is generated by `docs/api_reference/create_api_rst.py`, and added it to .gitignore. Finally, the description of CONTRIBUTING.md was modified. **Issue: the issue # it fixes (if applicable)** https://github.com/hwchase17/langchain/issues/6413 **Dependencies: any dependencies required for this change** `nbdoc` was missing in group docs so it was added. I installed it with the `poetry add --group docs nbdoc` command. I am concerned if any modifications are needed to poetry.lock. I would greatly appreciate it if you could pay close attention to this file during the review. **Tag maintainer** - General / Misc / if you don't know who to tag: @baskaryan If this PR needs any additional changes, I'll be happy to make them! --------- Co-authored-by: Bagatur <baskaryan@gmail.com>
2023-07-12 02:05:14 +00:00
"search.results(\n",
" \"The best blog post about AI safety is definitely this: \",\n",
" 10,\n",
" include_domains=[\"lesswrong.com\"],\n",
" start_published_date=\"2019-01-01\",\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
2023-08-07 21:44:41 +00:00
"### Use Metaphor as a tool\n",
"Metaphor can be used as a tool that gets URLs that other tools such as browsing tools."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%pip install playwright\n",
"from langchain.agents.agent_toolkits import PlayWrightBrowserToolkit\n",
"from langchain.tools.playwright.utils import (\n",
" create_async_playwright_browser, # A synchronous browser is available, though it isn't compatible with jupyter.\n",
")\n",
"\n",
"async_browser = create_async_playwright_browser()\n",
"toolkit = PlayWrightBrowserToolkit.from_browser(async_browser=async_browser)\n",
"tools = toolkit.get_tools()\n",
"\n",
"tools_by_name = {tool.name: tool for tool in tools}\n",
"print(tools_by_name.keys())\n",
"navigate_tool = tools_by_name[\"navigate_browser\"]\n",
"extract_text = tools_by_name[\"extract_text\"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from langchain.agents import initialize_agent, AgentType\n",
"from langchain.chat_models import ChatOpenAI\n",
"from langchain.tools import MetaphorSearchResults\n",
"\n",
"llm = ChatOpenAI(model_name=\"gpt-4\", temperature=0.7)\n",
"\n",
"metaphor_tool = MetaphorSearchResults(api_wrapper=search)\n",
"\n",
"agent_chain = initialize_agent(\n",
" [metaphor_tool, extract_text, navigate_tool],\n",
" llm,\n",
" agent=AgentType.STRUCTURED_CHAT_ZERO_SHOT_REACT_DESCRIPTION,\n",
" verbose=True,\n",
")\n",
"\n",
"agent_chain.run(\n",
" \"find me an interesting tweet about AI safety using Metaphor, then tell me the first sentence in the post. Do not finish until able to retrieve the first sentence.\"\n",
")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
2023-08-07 21:44:41 +00:00
"version": "3.10.1"
},
"vscode": {
"interpreter": {
"hash": "a0a0263b650d907a3bfe41c0f8d6a63a071b884df3cfdc1579f00cdc1aed6b03"
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}