Adds Tavily Search API retriever (#11314)

@baskaryan @efriis
pull/11317/head^2
Jacob Lee 1 year ago committed by GitHub
parent 3ec970cc11
commit 933655b4ac
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -0,0 +1,79 @@
{
"cells": [
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"# Tavily Search API\n",
"\n",
"[Tavily's Search API](https://tavily.com) is a search engine built specifically for AI agents (LLMs), delivering real-time, accurate, and factual results at speed.\n",
"\n",
"## Usage\n",
"\n",
"For a full list of allowed arguments, see [the official documentation](https://app.tavily.com/documentation/python). You can also pass any param to the SDK via a `kwargs` dictionary."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# %pip install tavily-python"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[Document(page_content='Nintendo Designer (s) Hidemaro Fujibayashi (director) Eiji Aonuma (producer/group manager) Release date (s) United States of America: • March 3, 2017 Japan: • March 3, 2017 Australia / New Zealand: • March 2, 2017 Belgium: • March 3, 2017 Hong Kong: • Feburary 1, 2018 South Korea: • February 1, 2018 The UK / Ireland: • March 3, 2017 Content ratings', metadata={'title': 'The Legend of Zelda: Breath of the Wild - Zelda Wiki', 'source': 'https://zelda.fandom.com/wiki/The_Legend_of_Zelda:_Breath_of_the_Wild', 'score': 0.96994, 'images': None}),\n",
" Document(page_content='02/01/23 Nintendo Switch Online member exclusive: Save on two digital games Read more 09/13/22 Out of the Shadows … the Legend of Zelda: Tears of the Kingdom Launches for Nintendo Switch on May...', metadata={'title': 'The Legend of Zelda™: Breath of the Wild - Nintendo', 'source': 'https://www.nintendo.com/store/products/the-legend-of-zelda-breath-of-the-wild-switch/', 'score': 0.94346, 'images': None}),\n",
" Document(page_content='Now we finally have a concrete release date of May 12, 2023. The date was announced alongside this brief (and mysterious) new trailer that also confirmed its title: The Legend of Zelda: Tears...', metadata={'title': 'The Legend of Zelda: Tears of the Kingdom: Release Date, Gameplay ... - IGN', 'source': 'https://www.ign.com/articles/the-legend-of-zelda-breath-of-the-wild-2-release-date-gameplay-news-rumors', 'score': 0.94145, 'images': None}),\n",
" Document(page_content='It was eventually released on March 3, 2017, as a launch game for the Switch and the final Nintendo game for the Wii U. It received widespread acclaim and won numerous Game of the Year accolades. Critics praised its open-ended gameplay, open-world design, and attention to detail, though some criticized its technical performance.', metadata={'title': 'The Legend of Zelda: Breath of the Wild - Wikipedia', 'source': 'https://en.wikipedia.org/wiki/The_Legend_of_Zelda:_Breath_of_the_Wild', 'score': 0.92102, 'images': None})]"
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import os\n",
"from langchain.retrievers.tavily_search_api import TavilySearchAPIRetriever\n",
"\n",
"os.environ[\"TAVILY_API_KEY\"] = \"YOUR_API_KEY\"\n",
"\n",
"retriever = TavilySearchAPIRetriever(k=4)\n",
"\n",
"retriever.invoke(\"what year was breath of the wild released?\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.5"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}

@ -49,6 +49,7 @@ from langchain.retrievers.re_phraser import RePhraseQueryRetriever
from langchain.retrievers.remote_retriever import RemoteLangChainRetriever from langchain.retrievers.remote_retriever import RemoteLangChainRetriever
from langchain.retrievers.self_query.base import SelfQueryRetriever from langchain.retrievers.self_query.base import SelfQueryRetriever
from langchain.retrievers.svm import SVMRetriever from langchain.retrievers.svm import SVMRetriever
from langchain.retrievers.tavily_search_api import TavilySearchAPIRetriever
from langchain.retrievers.tfidf import TFIDFRetriever from langchain.retrievers.tfidf import TFIDFRetriever
from langchain.retrievers.time_weighted_retriever import ( from langchain.retrievers.time_weighted_retriever import (
TimeWeightedVectorStoreRetriever, TimeWeightedVectorStoreRetriever,
@ -82,6 +83,7 @@ __all__ = [
"RemoteLangChainRetriever", "RemoteLangChainRetriever",
"SVMRetriever", "SVMRetriever",
"SelfQueryRetriever", "SelfQueryRetriever",
"TavilySearchAPIRetriever",
"TFIDFRetriever", "TFIDFRetriever",
"BM25Retriever", "BM25Retriever",
"TimeWeightedVectorStoreRetriever", "TimeWeightedVectorStoreRetriever",

@ -0,0 +1,82 @@
import os
from enum import Enum
from typing import Any, Dict, List, Optional
from langchain.callbacks.manager import CallbackManagerForRetrieverRun
from langchain.schema import Document
from langchain.schema.retriever import BaseRetriever
class SearchDepth(Enum):
BASIC = "basic"
ADVANCED = "advanced"
class TavilySearchAPIRetriever(BaseRetriever):
"""Tavily Search API retriever."""
k: int = 10
include_generated_answer: bool = False
include_raw_content: bool = False
include_images: bool = False
search_depth: SearchDepth = SearchDepth.BASIC
include_domains: Optional[List[str]] = None
exclude_domains: Optional[List[str]] = None
kwargs: Optional[Dict[str, Any]] = {}
api_key: Optional[str] = None
def _get_relevant_documents(
self, query: str, *, run_manager: CallbackManagerForRetrieverRun
) -> List[Document]:
try:
from tavily import Client
except ImportError:
raise ValueError(
"Tavily python package not found. "
"Please install it with `pip install tavily-python`."
)
tavily = Client(api_key=self.api_key or os.environ["TAVILY_API_KEY"])
max_results = self.k if not self.include_generated_answer else self.k - 1
response = tavily.search(
query=query,
max_results=max_results,
search_depth=self.search_depth.value,
include_answer=self.include_generated_answer,
include_domains=self.include_domains,
exclude_domains=self.exclude_domains,
include_raw_content=self.include_raw_content,
include_images=self.include_images,
**self.kwargs
)
docs = [
Document(
page_content=result.get("content", "")
if not self.include_raw_content
else result.get("raw_content", ""),
metadata={
"title": result.get("title", ""),
"source": result.get("url", ""),
**{
k: v
for k, v in result.items()
if k not in ("content", "title", "url", "raw_content")
},
"images": response.get("images"),
},
)
for result in response.get("results")
]
if self.include_generated_answer:
docs = [
Document(
page_content=response.get("answer", ""),
metadata={
"title": "Suggested Answer",
"source": "https://tavily.com/",
},
),
*docs,
]
return docs
Loading…
Cancel
Save