langchain/libs/community/langchain_community/retrievers/tavily_search_api.py

import os
from enum import Enum
from typing import Any, Dict, List, Optional

from langchain_core.callbacks import CallbackManagerForRetrieverRun
from langchain_core.documents import Document
from langchain_core.retrievers import BaseRetriever


class SearchDepth(Enum):
    """Search depth as enumerator."""

    BASIC = "basic"
    ADVANCED = "advanced"


class TavilySearchAPIRetriever(BaseRetriever):
    """Tavily Search API retriever."""

    k: int = 10
    include_generated_answer: bool = False
    include_raw_content: bool = False
    include_images: bool = False
    search_depth: SearchDepth = SearchDepth.BASIC
    include_domains: Optional[List[str]] = None
    exclude_domains: Optional[List[str]] = None
    kwargs: Optional[Dict[str, Any]] = {}
    api_key: Optional[str] = None

    def _get_relevant_documents(
        self, query: str, *, run_manager: CallbackManagerForRetrieverRun
    ) -> List[Document]:
        try:
            from tavily import Client
        except ImportError:
            raise ImportError(
                "Tavily python package not found. "
                "Please install it with `pip install tavily-python`."
            )

        tavily = Client(api_key=self.api_key or os.environ["TAVILY_API_KEY"])
        max_results = self.k if not self.include_generated_answer else self.k - 1
        response = tavily.search(
            query=query,
            max_results=max_results,
            search_depth=self.search_depth.value,
            include_answer=self.include_generated_answer,
            include_domains=self.include_domains,
            exclude_domains=self.exclude_domains,
            include_raw_content=self.include_raw_content,
            include_images=self.include_images,
            **self.kwargs,
        )
        docs = [
            Document(
                page_content=result.get("content", "")
                if not self.include_raw_content
                else result.get("raw_content", ""),
                metadata={
                    "title": result.get("title", ""),
                    "source": result.get("url", ""),
                    **{
                        k: v
                        for k, v in result.items()
                        if k not in ("content", "title", "url", "raw_content")
                    },
                    "images": response.get("images"),
                },
            )
            for result in response.get("results")
        ]
        if self.include_generated_answer:
            docs = [
                Document(
                    page_content=response.get("answer", ""),
                    metadata={
                        "title": "Suggested Answer",
                        "source": "https://tavily.com/",
                    },
                ),
                *docs,
            ]

        return docs
community[major], core[patch], langchain[patch], experimental[patch]: Create langchain-community (#14463) Moved the following modules to new package langchain-community in a backwards compatible fashion: ``` mv langchain/langchain/adapters community/langchain_community mv langchain/langchain/callbacks community/langchain_community/callbacks mv langchain/langchain/chat_loaders community/langchain_community mv langchain/langchain/chat_models community/langchain_community mv langchain/langchain/document_loaders community/langchain_community mv langchain/langchain/docstore community/langchain_community mv langchain/langchain/document_transformers community/langchain_community mv langchain/langchain/embeddings community/langchain_community mv langchain/langchain/graphs community/langchain_community mv langchain/langchain/llms community/langchain_community mv langchain/langchain/memory/chat_message_histories community/langchain_community mv langchain/langchain/retrievers community/langchain_community mv langchain/langchain/storage community/langchain_community mv langchain/langchain/tools community/langchain_community mv langchain/langchain/utilities community/langchain_community mv langchain/langchain/vectorstores community/langchain_community mv langchain/langchain/agents/agent_toolkits community/langchain_community mv langchain/langchain/cache.py community/langchain_community mv langchain/langchain/adapters community/langchain_community mv langchain/langchain/callbacks community/langchain_community/callbacks mv langchain/langchain/chat_loaders community/langchain_community mv langchain/langchain/chat_models community/langchain_community mv langchain/langchain/document_loaders community/langchain_community mv langchain/langchain/docstore community/langchain_community mv langchain/langchain/document_transformers community/langchain_community mv langchain/langchain/embeddings community/langchain_community mv langchain/langchain/graphs community/langchain_community mv langchain/langchain/llms community/langchain_community mv langchain/langchain/memory/chat_message_histories community/langchain_community mv langchain/langchain/retrievers community/langchain_community mv langchain/langchain/storage community/langchain_community mv langchain/langchain/tools community/langchain_community mv langchain/langchain/utilities community/langchain_community mv langchain/langchain/vectorstores community/langchain_community mv langchain/langchain/agents/agent_toolkits community/langchain_community mv langchain/langchain/cache.py community/langchain_community ``` Moved the following to core ``` mv langchain/langchain/utils/json_schema.py core/langchain_core/utils mv langchain/langchain/utils/html.py core/langchain_core/utils mv langchain/langchain/utils/strings.py core/langchain_core/utils cat langchain/langchain/utils/env.py >> core/langchain_core/utils/env.py rm langchain/langchain/utils/env.py ``` See .scripts/community_split/script_integrations.sh for all changes 2023-12-11 21:53:30 +00:00			`import os`
			`from enum import Enum`
			`from typing import Any, Dict, List, Optional`

			`from langchain_core.callbacks import CallbackManagerForRetrieverRun`
			`from langchain_core.documents import Document`
			`from langchain_core.retrievers import BaseRetriever`


			`class SearchDepth(Enum):`
			`"""Search depth as enumerator."""`

			`BASIC = "basic"`
			`ADVANCED = "advanced"`


			`class TavilySearchAPIRetriever(BaseRetriever):`
			`"""Tavily Search API retriever."""`

			`k: int = 10`
			`include_generated_answer: bool = False`
			`include_raw_content: bool = False`
			`include_images: bool = False`
			`search_depth: SearchDepth = SearchDepth.BASIC`
			`include_domains: Optional[List[str]] = None`
			`exclude_domains: Optional[List[str]] = None`
			`kwargs: Optional[Dict[str, Any]] = {}`
			`api_key: Optional[str] = None`

			`def _get_relevant_documents(`
			`self, query: str, *, run_manager: CallbackManagerForRetrieverRun`
			`) -> List[Document]:`
			`try:`
			`from tavily import Client`
			`except ImportError:`
			`raise ImportError(`
			`"Tavily python package not found. "`
			"Please install it with `pip install tavily-python`."
			`)`

			`tavily = Client(api_key=self.api_key or os.environ["TAVILY_API_KEY"])`
			`max_results = self.k if not self.include_generated_answer else self.k - 1`
			`response = tavily.search(`
			`query=query,`
			`max_results=max_results,`
			`search_depth=self.search_depth.value,`
			`include_answer=self.include_generated_answer,`
			`include_domains=self.include_domains,`
			`exclude_domains=self.exclude_domains,`
			`include_raw_content=self.include_raw_content,`
			`include_images=self.include_images,`
			`**self.kwargs,`
			`)`
			`docs = [`
			`Document(`
			`page_content=result.get("content", "")`
			`if not self.include_raw_content`
			`else result.get("raw_content", ""),`
			`metadata={`
			`"title": result.get("title", ""),`
			`"source": result.get("url", ""),`
			`**{`
			`k: v`
			`for k, v in result.items()`
			`if k not in ("content", "title", "url", "raw_content")`
			`},`
			`"images": response.get("images"),`
			`},`
			`)`
			`for result in response.get("results")`
			`]`
			`if self.include_generated_answer:`
			`docs = [`
			`Document(`
			`page_content=response.get("answer", ""),`
			`metadata={`
			`"title": "Suggested Answer",`
			`"source": "https://tavily.com/",`
			`},`
			`),`
			`*docs,`
			`]`

			`return docs`