{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Metaphor Search" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Metaphor is a search engine fully designed to be used by LLMs. You can search and then get the contents for any page.\n", "\n", "This notebook goes over how to use Metaphor search.\n", "\n", "First, you need to set up the proper API keys and environment variables. Get 1000 free searches/month [here](https://platform.metaphor.systems/).\n", "\n", "Then enter your API key as an environment variable." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import os\n", "\n", "os.environ[\"METAPHOR_API_KEY\"] = \"...\"" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Using their SDK\n", "\n", "This is the newer and more supported way to use the Metaphor API - via their SDK" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "# !pip install metaphor-python" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "from metaphor_python import Metaphor\n", "\n", "client = Metaphor(api_key=os.environ[\"METAPHOR_API_KEY\"])" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "from langchain.agents import tool\n", "from typing import List" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "@tool\n", "def search(query: str):\n", " \"\"\"Call search engine with a query.\"\"\"\n", " return client.search(query)\n", "\n", "@tool\n", "def get_contents(ids: List[str]):\n", " \"\"\"Get contents of a webpage.\n", " \n", " The ids passed in should be a list of ids as fetched from `search`.\n", " \"\"\"\n", " return client.get_contents(ids)\n", "\n", "@tool\n", "def find_similar(url: str):\n", " \"\"\"Get search results similar to a given URL.\n", " \n", " The url passed in should be a URL returned from `search`\n", " \"\"\"\n", " return client.find_similar(url)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "tools = [search, get_contents, find_similar]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Use in an agent" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "from langchain.chat_models import ChatOpenAI\n", "llm = ChatOpenAI(temperature=0)" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "from langchain.agents import OpenAIFunctionsAgent\n", "from langchain.schema import SystemMessage\n", "system_message = SystemMessage(content=\"You are a web researcher who uses search engines to look up information.\")\n", "prompt = OpenAIFunctionsAgent.create_prompt(system_message=system_message)\n", "agent = OpenAIFunctionsAgent(llm=llm, tools=tools, prompt=prompt)" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [], "source": [ "from langchain.agents import AgentExecutor\n", "agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "\n", "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n", "\u001b[32;1m\u001b[1;3m\n", "Invoking: `search` with `{'query': 'hottest AI agent startups'}`\n", "\n", "\n", "\u001b[0m\u001b[36;1m\u001b[1;3mSearchResponse(results=[Result(title='The AI Hot 75', url='https://www.nfx.com/post/generative-ai-hot-75-list', id='pb0vypILvIakocJicoqm4w', score=0.19051167368888855, published_date='2023-04-27', author='NFX', extract=None), Result(title='10 new AI unicorns flying high in 2022', url='https://venturebeat.com/ai/10-new-ai-unicorns-flying-high-in-2022/', id='i4k-4QKod9mMo2bi8m19bw', score=0.18090465664863586, published_date='2022-06-27', author='Sharon Goldman', extract=None), Result(title='The 13 most promising early-stage artificial-intelligence startups of 2022, according to VCs investing in companies building technical infrastructure and developing tools', url='https://www.businessinsider.com/13-most-promising-earlier-stage-ai-startups-this-year-2022-11', id='ZA52Zk-yVy9skxWZtxOSgg', score=0.1796971708536148, published_date='2022-11-17', author='Sindhu Sundar', extract=None), Result(title='What to Watch in AI | The Generalist', url='https://www.generalist.com/briefing/what-to-watch-in-ai-2', id='cmKJmULl_vK4V-gth6yzWg', score=0.17643707990646362, published_date='2023-03-26', author='Artwork byÂ', extract=None), Result(title='Hot AI startups in San Francisco (2022) | Best', url='https://www.scoop.it/topic/best/p/4137745212/2022/11/08/hot-ai-startups-in-san-francisco-2022', id='wjo6lznHL-VI7jBOsvuwzA', score=0.17351359128952026, published_date='2022-11-08', author='Dr Stefan Gruenwald', extract=None), Result(title='New and recently funded AI Startups', url='https://www.ai-startups.org/', id='M0Cgw6hYIbLBvU6y-9oEBg', score=0.17336121201515198, published_date=None, author=None, extract=None), Result(title='A Search Engine for Machine Intelligence', url='https://bellow.ai/', id='bdYc6hvHww_JvLv9k8NhPA', score=0.17326533794403076, published_date='2023-01-01', author=None, extract=None), Result(title='26 AI experts who left Google to start new companies in 2022 as machine learning ushers in a new era of lifelike AI', url='https://www.businessinsider.com/26-ai-machine-learning-experts-left-google-for-startups-2022-12', id='MKxSDXGxC155eyQLva1Ksg', score=0.16997841000556946, published_date='2022-12-26', author='Matthew Lynley', extract=None), Result(title='Discover AI Startups', url='https://appliedai.com/discover', id='e_SK0_agdSNfvbf0Xn_wPQ', score=0.1694592982530594, published_date='2023-01-01', author=None, extract=None), Result(title='Careers at Top AI Startups', url='https://aijobnetwork.com/', id='jEBgBMmwMnOsEDZkxOWBWA', score=0.1688486635684967, published_date='2023-01-01', author=None, extract=None)], api=)\u001b[0m\u001b[32;1m\u001b[1;3m\n", "Invoking: `get_contents` with `{'ids': ['pb0vypILvIakocJicoqm4w', 'i4k-4QKod9mMo2bi8m19bw', 'ZA52Zk-yVy9skxWZtxOSgg']}`\n", "\n", "\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\u001b[0m\u001b[33;1m\u001b[1;3mGetContentsResponse(contents=[DocumentContent(id='pb0vypILvIakocJicoqm4w', url='https://www.nfx.com/post/generative-ai-hot-75-list', title='The AI Hot 75', extract='

The hottest up & comers in generative AI. Early-stage companies showing leading indicators of future greatness. Curated by NFX.

\\n

\\n

No doubt generative AI is extremely fast-moving water. But while media hype focuses on incumbents and household names, we’re doubling down on the “AI underground.”

\\n

The generative AI groundswell is disproportionately fueled by a dense network of exceptional, early-stage companies. We’ve seen it growing throughout San Francisco, in “Cerebral Valley” at NFX HQ, inside hacker houses, and via 100’s of meetings with up-and-coming AI founders.

\\n

After we open-sourced our Generative AI market map a few months ago, we went to work analyzing this network for early indicators of future greatness. It is going to produce the next generation of unicorns, at a faster rate and at lower cost than any other startups before them.

\\n

Today we are making public our findings. Meet the AI Hot 75.

\\n

Identifying the AI Hot 75

\\n

Broadly speaking, we searched for leading indicators that an early-stage generative AI company has broken into the right networks and has found a “white hot center” (a highway to massive growth).

\\n

With those guiding principles in place, we gave preference to early-stage companies generally at the Seed and Series A stage. We narrowed the list according to: total amount raised, status of key investors, proximity to a geographical AI center of excellence, and “buzz” – which we imputed from various virality, traction, and WOM factors.

\\n

We finalized our selections based on in-network reputation –\\xa0i.e. if a company has been recommended or regarded by peers or experts in our network.

\\n

This list will look different from many that you’ve already seen. While some of the companies are household names already, most aren’t the biggest companies out there – yet.

\\n

Instead, these are the companies indicative of the next generation. The ones with notable momentum. The hot ones worth following.

\\n

AI Hot 75

\\n

In addition to using proprietary NFX data for this list, we also referred to Pitchbook, Crunchbase, our network of friends and founders, The NFX AI Social Club, and used ChatGPT to stress test analysis and language as well.

\\n
'), DocumentContent(id='i4k-4QKod9mMo2bi8m19bw', url='https://venturebeat.com/ai/10-new-ai-unicorns-flying-high-in-2022/', title='10 new AI unicorns flying high in 2022', extract='
\\n\\t
\\n\\t
\\n\\t
\\n

Check out all the on-demand sessions from the Intelligent Security Summit here.

\\n
\\n

Technology venture capital deals may be slowing down, but investment in artificial intelligence (AI) companies continues to boom. Investments in AI research and applications are set to hit $500 billion by 2024, according to research firm IDC, while PwC predicts AI will contribute $15.7 trillion to the global economy by 2030.\\xa0

\\n

So, it’s no surprise that among the 206 new 2022 “unicorns” – that is, privately held startups with a value of over $1 billion – a boatload are in artificial intelligence and machine learning.\\xa0

\\n

Here is a look at ten AI companies from the newest unicorn cohort that are soaring in 2022:\\xa0

\\n

Abnormal Security

\\n

Business: Uses behavioral AI to fight email attacks

\\n
\\n

Event

\\n

Intelligent Security Summit On-Demand

\\n

Learn the critical role of AI & ML in cybersecurity and industry specific case studies. Watch on-demand sessions today.

\\n
\\n

\\n Watch Here \\n

Based in: San Francisco, California

\\n

Founded: 2018

\\n

Became a unicorn: May 2022

\\n

Cloud-native email security platform Abnormal Security uses behavioral AI to analyze identity, content and context. It leverages more than 45,000 signals from cloud email platforms to stop all types of email attacks including business email compromise, supply chain fraud, ransomware, phishing, spam and graymail.

\\n

“Modern attacks, including business email compromise, supply chain compromise, account takeovers and more, are difficult to detect,” Abnormal Security’s CEO Evan Reiser told VentureBeat in May. “Because they typically contain no traditional indicators of compromise, they evade legacy email security solutions like the secure email gateway.”\\xa0

\\n

Abnormal Security uses AI to identify anomalous email messages, spot malicious actors and block them before they can cause any damage. It means organizations don’t need their employees to have the security awareness to detect phishing threats independently.\\xa0

\\n

As of May, Abnormal Security claims its customer base had grown by 270% and included companies such as Xerox, Hitachi Vantara and Urban Outfit'), DocumentContent(id='ZA52Zk-yVy9skxWZtxOSgg', url='https://www.businessinsider.com/13-most-promising-earlier-stage-ai-startups-this-year-2022-11', title='The 13 most promising early-stage artificial-intelligence startups of 2022, according to VCs investing in companies building technical infrastructure and developing tools', extract='

\\n \\n
\\n

US Markets Loading...\\n \\n H\\n M\\n S\\n \\n \\n

\\n
\\n \\n \\n \\n \\n
\\n \\n \\n \\n \\n \\n
\\n \\n
\\n

Premium\\n \\n \\n

\\n \\n \\n \\n
\\n \\n \\n
\\n
\\n \\n \\n \\n \\n \\n
\\n

\\n

\\n \\n \\n
\\n Nick Washburn of Intel Capital, Ashu Garg of Foundation Capital, Rudina Seseri of Glasswing Ventures, and Jordan Jacobs of Radical Ventures.\\n
\\n \\n \\n Intel Capital; Foundation; Glasswing Ventures; Radical Ventures; Anna Kim/Insider\\n \\n \\n \\n \\n
\\n

\\n This story is available exclusively to Insider subscribers.\\n Become an Insider and start reading now.\\n

\\n \\n
\\n \\n
\\n
    \\n
  • Venture capitalists investing in AI identified the promising startups at earlier fundraising stages.
  • \\n
  • Investors said they\\'re interested in areas like cloud infrastructure and generative AI.
  • \\n
  • These 13 AI startups are making tools that try to help companies make better use of data.
  • \\n

Even amid a downturn that\\'s affecting tech companies, venture capitalists say they\\'re paying attention to certain early-stage startups.

Early-stage VCs at firms like Intel Capital, Glasswing Ventures, and others have targeted their artificial-intelligence investments in areas like cloud and software infrastructure, cybersecurity, and machine learning. Many of those startups are developing tools to boost the capacity of AI programs and to support daily business needs like writing marketing materials or managing supply-chain logistics.

Kim-Mai Cutler, a partner at Initialized Capital, said VCs are seeing potential in the startups able to develop new applications.

\"We\\'re seeing a new wave of creative tools that are emerging that allow all kinds of workers to do more with what they have today,\" she said.

VCs told Insider that startups that have raised A, B, and C rounds over the past year have been able to do so also because of the background and technical expertise of their founding teams. Founders generating interest include academics and engineers who\\'ve worked at tech giants like Uber, Lyft, Google, and Meta.

Nick Washburn of Intel said that \"impeccable teams that are tackling big problems\" are especially compelling to VCs.

Top VCs shared with Insider their picks for startups in their earlier stages that they see as demonstrating the most potential. Estimates for total funding are from PitchBook unless noted otherwise.

Here are the 13 most promising AI startups of 2022, according to VCs, listed from least to most capital raised:

\\n
\\n \\n
\\n \\n \\n \\n \\n \\n
\\n

\\n

OtterTune: A machine-learning-based platform to manage databases

\\n

\\n \\n \\n \\n
\\n

\\n \\n \\n \\n

\\n \\n \\n
\\n Nick Washburn of Intel Capital.\\n
\\n \\n \\n Intel Capital.\\n \\n \\n \\n \\n
\\n \\n

Startup: OtterTune

Recommended by: Nick Washburn, Intel Capital

Relationship: Investor and board member

Total funding: $14.5 million, according to the company Finished chain.\u001b[0m\n" ] }, { "data": { "text/plain": [ "'Here are some of the hottest AI agent startups and what they do:\\n\\n1. The AI Hot 75: This list, curated by NFX, showcases early-stage companies in generative AI that show leading indicators of future greatness. These companies are at the forefront of the generative AI groundswell. [Read more](https://www.nfx.com/post/generative-ai-hot-75-list)\\n\\n2. 10 new AI unicorns flying high in 2022: This article highlights ten AI companies from the newest unicorn cohort that are soaring in 2022. One example is Abnormal Security, which uses behavioral AI to fight email attacks. [Read more](https://venturebeat.com/ai/10-new-ai-unicorns-flying-high-in-2022/)\\n\\n3. The 13 most promising early-stage artificial-intelligence startups of 2022: According to VCs investing in companies building technical infrastructure and developing tools, these 13 AI startups demonstrate the most potential. One example is OtterTune, a machine-learning-based platform to manage databases. [Read more](https://www.businessinsider.com/13-most-promising-earlier-stage-ai-startups-this-year-2022-11)\\n\\nPlease note that these are just a few examples, and there are many more AI agent startups out there.'" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "agent_executor.run(\"Find the hottest AI agent startups and what they do\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Using the tool wrapper\n", "\n", "This is the old way of using Metaphor - through our own in-house integration." ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "from langchain.utilities import MetaphorSearchAPIWrapper" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "search = MetaphorSearchAPIWrapper()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Call the API\n", "`results` takes in a Metaphor-optimized search query and a number of results (up to 500). It returns a list of results with title, url, author, and creation date." ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[{'title': 'Core Views on AI Safety: When, Why, What, and How',\n", " 'url': 'https://www.anthropic.com/index/core-views-on-ai-safety',\n", " 'author': None,\n", " 'published_date': '2023-03-08'},\n", " {'title': 'Extinction Risk from Artificial Intelligence',\n", " 'url': 'https://aisafety.wordpress.com/',\n", " 'author': None,\n", " 'published_date': '2013-10-08'},\n", " {'title': 'The simple picture on AI safety - LessWrong',\n", " 'url': 'https://www.lesswrong.com/posts/WhNxG4r774bK32GcH/the-simple-picture-on-ai-safety',\n", " 'author': 'Alex Flint',\n", " 'published_date': '2018-05-27'},\n", " {'title': 'No Time Like The Present For AI Safety Work',\n", " 'url': 'https://slatestarcodex.com/2015/05/29/no-time-like-the-present-for-ai-safety-work/',\n", " 'author': None,\n", " 'published_date': '2015-05-29'},\n", " {'title': 'A plea for solutionism on AI safety - LessWrong',\n", " 'url': 'https://www.lesswrong.com/posts/ASMX9ss3J5G3GZdok/a-plea-for-solutionism-on-ai-safety',\n", " 'author': 'Jasoncrawford',\n", " 'published_date': '2023-06-09'},\n", " {'title': 'The Artificial Intelligence Revolution: Part 1 - Wait But Why',\n", " 'url': 'https://waitbutwhy.com/2015/01/artificial-intelligence-revolution-1.html',\n", " 'author': 'Tim Urban',\n", " 'published_date': '2015-01-22'},\n", " {'title': 'Anthropic: Core Views on AI Safety: When, Why, What, and How - EA Forum',\n", " 'url': 'https://forum.effectivealtruism.org/posts/uGDCaPFaPkuxAowmH/anthropic-core-views-on-ai-safety-when-why-what-and-how',\n", " 'author': 'Jonmenaster',\n", " 'published_date': '2023-03-09'},\n", " {'title': \"[Linkpost] Sam Altman's 2015 Blog Posts Machine Intelligence Parts 1 & 2 - LessWrong\",\n", " 'url': 'https://www.lesswrong.com/posts/QnBZkNJNbJK9k5Xi7/linkpost-sam-altman-s-2015-blog-posts-machine-intelligence',\n", " 'author': 'Olivia Jimenez',\n", " 'published_date': '2023-04-28'},\n", " {'title': 'The Proof of Doom - LessWrong',\n", " 'url': 'https://www.lesswrong.com/posts/xBrpph9knzWdtMWeQ/the-proof-of-doom',\n", " 'author': 'Johnlawrenceaspden',\n", " 'published_date': '2022-03-09'},\n", " {'title': \"Anthropic's Core Views on AI Safety - LessWrong\",\n", " 'url': 'https://www.lesswrong.com/posts/xhKr5KtvdJRssMeJ3/anthropic-s-core-views-on-ai-safety',\n", " 'author': 'Zac Hatfield-Dodds',\n", " 'published_date': '2023-03-09'}]" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "search.results(\"The best blog post about AI safety is definitely this: \", 10)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Adding filters\n", "We can also add filters to our search. \n", "\n", "include_domains: Optional[List[str]] - List of domains to include in the search. If specified, results will only come from these domains. Only one of include_domains and exclude_domains should be specified.\n", "\n", "exclude_domains: Optional[List[str]] - List of domains to exclude in the search. If specified, results will only come from these domains. Only one of include_domains and exclude_domains should be specified.\n", "\n", "start_crawl_date: Optional[str] - \"Crawl date\" refers to the date that Metaphor discovered a link, which is more granular and can be more useful than published date. If start_crawl_date is specified, results will only include links that were crawled after start_crawl_date. Must be specified in ISO 8601 format (YYYY-MM-DDTHH:MM:SSZ)\n", "\n", "end_crawl_date: Optional[str] - \"Crawl date\" refers to the date that Metaphor discovered a link, which is more granular and can be more useful than published date. If endCrawlDate is specified, results will only include links that were crawled before end_crawl_date. Must be specified in ISO 8601 format (YYYY-MM-DDTHH:MM:SSZ)\n", "\n", "start_published_date: Optional[str] - If specified, only links with a published date after start_published_date will be returned. Must be specified in ISO 8601 format (YYYY-MM-DDTHH:MM:SSZ). Note that for some links, we have no published date, and these links will be excluded from the results if start_published_date is specified.\n", "\n", "end_published_date: Optional[str] - If specified, only links with a published date before end_published_date will be returned. Must be specified in ISO 8601 format (YYYY-MM-DDTHH:MM:SSZ). Note that for some links, we have no published date, and these links will be excluded from the results if end_published_date is specified.\n", "\n", "See full docs [here](https://metaphorapi.readme.io/)." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "search.results(\n", " \"The best blog post about AI safety is definitely this: \",\n", " 10,\n", " include_domains=[\"lesswrong.com\"],\n", " start_published_date=\"2019-01-01\",\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Use Metaphor as a tool\n", "Metaphor can be used as a tool that gets URLs that other tools such as browsing tools." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%pip install playwright\n", "from langchain.agents.agent_toolkits import PlayWrightBrowserToolkit\n", "from langchain.tools.playwright.utils import (\n", " create_async_playwright_browser, # A synchronous browser is available, though it isn't compatible with jupyter.\n", ")\n", "\n", "async_browser = create_async_playwright_browser()\n", "toolkit = PlayWrightBrowserToolkit.from_browser(async_browser=async_browser)\n", "tools = toolkit.get_tools()\n", "\n", "tools_by_name = {tool.name: tool for tool in tools}\n", "print(tools_by_name.keys())\n", "navigate_tool = tools_by_name[\"navigate_browser\"]\n", "extract_text = tools_by_name[\"extract_text\"]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from langchain.agents import initialize_agent, AgentType\n", "from langchain.chat_models import ChatOpenAI\n", "from langchain.tools import MetaphorSearchResults\n", "\n", "llm = ChatOpenAI(model_name=\"gpt-4\", temperature=0.7)\n", "\n", "metaphor_tool = MetaphorSearchResults(api_wrapper=search)\n", "\n", "agent_chain = initialize_agent(\n", " [metaphor_tool, extract_text, navigate_tool],\n", " llm,\n", " agent=AgentType.STRUCTURED_CHAT_ZERO_SHOT_REACT_DESCRIPTION,\n", " verbose=True,\n", ")\n", "\n", "agent_chain.run(\n", " \"find me an interesting tweet about AI safety using Metaphor, then tell me the first sentence in the post. Do not finish until able to retrieve the first sentence.\"\n", ")" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.1" }, "vscode": { "interpreter": { "hash": "a0a0263b650d907a3bfe41c0f8d6a63a071b884df3cfdc1579f00cdc1aed6b03" } } }, "nbformat": 4, "nbformat_minor": 2 }