diff --git a/docs/modules/agents/tools/examples/ddg.ipynb b/docs/modules/agents/tools/examples/ddg.ipynb new file mode 100644 index 00000000..ea7a66ea --- /dev/null +++ b/docs/modules/agents/tools/examples/ddg.ipynb @@ -0,0 +1,91 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "245a954a", + "metadata": {}, + "source": [ + "# DuckDuckGo Search\n", + "\n", + "This notebook goes over how to use the duck-duck-go search component." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "21e46d4d", + "metadata": {}, + "outputs": [], + "source": [ + "# !pip install duckduckgo-search" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "ac4910f8", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.tools import DuckDuckGoSearchTool" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "84b8f773", + "metadata": {}, + "outputs": [], + "source": [ + "search = DuckDuckGoSearchTool()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "068991a6", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'Barack Obama, in full Barack Hussein Obama II, (born August 4, 1961, Honolulu, Hawaii, U.S.), 44th president of the United States (2009-17) and the first African American to hold the office. Before winning the presidency, Obama represented Illinois in the U.S. Senate (2005-08). Barack Hussein Obama II (/ b ə ˈ r ɑː k h uː ˈ s eɪ n oʊ ˈ b ɑː m ə / bə-RAHK hoo-SAYN oh-BAH-mə; born August 4, 1961) is an American former politician who served as the 44th president of the United States from 2009 to 2017. A member of the Democratic Party, he was the first African-American president of the United States. Obama previously served as a U.S. senator representing ... Barack Obama was the first African American president of the United States (2009-17). He oversaw the recovery of the U.S. economy (from the Great Recession of 2008-09) and the enactment of landmark health care reform (the Patient Protection and Affordable Care Act ). In 2009 he was awarded the Nobel Peace Prize. His birth certificate lists his first name as Barack: That\\'s how Obama has spelled his name throughout his life. His name derives from a Hebrew name which means \"lightning.\". The Hebrew word has been transliterated into English in various spellings, including Barak, Buraq, Burack, and Barack. Most common names of U.S. presidents 1789-2021. Published by. Aaron O\\'Neill , Jun 21, 2022. The most common first name for a U.S. president is James, followed by John and then William. Six U.S ...'" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "search.run(\"Obama's first name?\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.1" + }, + "vscode": { + "interpreter": { + "hash": "a0a0263b650d907a3bfe41c0f8d6a63a071b884df3cfdc1579f00cdc1aed6b03" + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/langchain/tools/__init__.py b/langchain/tools/__init__.py index 8cf2f77e..44f225e1 100644 --- a/langchain/tools/__init__.py +++ b/langchain/tools/__init__.py @@ -1,9 +1,17 @@ """Core toolkit implementations.""" from langchain.tools.base import BaseTool +from langchain.tools.ddg_search.tool import DuckDuckGoSearchTool from langchain.tools.ifttt import IFTTTWebhook from langchain.tools.openapi.utils.api_models import APIOperation from langchain.tools.openapi.utils.openapi_utils import OpenAPISpec from langchain.tools.plugin import AIPluginTool -__all__ = ["BaseTool", "IFTTTWebhook", "AIPluginTool", "OpenAPISpec", "APIOperation"] +__all__ = [ + "BaseTool", + "IFTTTWebhook", + "AIPluginTool", + "OpenAPISpec", + "APIOperation", + "DuckDuckGoSearchTool", +] diff --git a/langchain/tools/ddg_search/__init__.py b/langchain/tools/ddg_search/__init__.py new file mode 100644 index 00000000..08739508 --- /dev/null +++ b/langchain/tools/ddg_search/__init__.py @@ -0,0 +1 @@ +"""DuckDuckGo Search API toolkit.""" diff --git a/langchain/tools/ddg_search/tool.py b/langchain/tools/ddg_search/tool.py new file mode 100644 index 00000000..33044241 --- /dev/null +++ b/langchain/tools/ddg_search/tool.py @@ -0,0 +1,28 @@ +"""Tool for the DuckDuckGo search API.""" + +from pydantic import Field + +from langchain.tools.base import BaseTool +from langchain.utilities.duckduckgo_search import DuckDuckGoSearchAPIWrapper + + +class DuckDuckGoSearchTool(BaseTool): + """Tool that adds the capability to query the DuckDuckGo search API.""" + + name = "DuckDuckGo Search" + description = ( + "A wrapper around DuckDuckGo Search. " + "Useful for when you need to answer questions about current events. " + "Input should be a search query." + ) + api_wrapper: DuckDuckGoSearchAPIWrapper = Field( + default_factory=DuckDuckGoSearchAPIWrapper + ) + + def _run(self, query: str) -> str: + """Use the tool.""" + return self.api_wrapper.run(query) + + async def _arun(self, query: str) -> str: + """Use the tool asynchronously.""" + raise NotImplementedError("DuckDuckGoSearch does not support async") diff --git a/langchain/utilities/duckduckgo_search.py b/langchain/utilities/duckduckgo_search.py new file mode 100644 index 00000000..30e8a66e --- /dev/null +++ b/langchain/utilities/duckduckgo_search.py @@ -0,0 +1,90 @@ +"""Util that calls DuckDuckGo Search. + +No setup required. Free. +https://pypi.org/project/duckduckgo-search/ +""" +from typing import Dict, List, Optional + +from pydantic import BaseModel, Extra +from pydantic.class_validators import root_validator + + +class DuckDuckGoSearchAPIWrapper(BaseModel): + """Wrapper for DuckDuckGo Search API. + + Free and does not require any setup + """ + + k: int = 10 + region: Optional[str] = "wt-wt" + safesearch: str = "moderate" + time: Optional[str] = "y" + max_results: int = 5 + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + + @root_validator() + def validate_environment(cls, values: Dict) -> Dict: + """Validate that python package exists in environment.""" + try: + from duckduckgo_search import ddg # noqa: F401 + except ImportError: + raise ValueError( + "Could not import duckduckgo-search python package. " + "Please install it with `pip install duckduckgo-search`." + ) + return values + + def run(self, query: str) -> str: + from duckduckgo_search import ddg + + """Run query through DuckDuckGo and return results.""" + results = ddg( + query, + region=self.region, + safesearch=self.safesearch, + time=self.time, + max_results=self.max_results, + ) + if len(results) == 0: + return "No good DuckDuckGo Search Result was found" + snippets = [result["body"] for result in results] + return " ".join(snippets) + + def results(self, query: str, num_results: int) -> List[Dict]: + """Run query through DuckDuckGo and return metadata. + + Args: + query: The query to search for. + num_results: The number of results to return. + + Returns: + A list of dictionaries with the following keys: + snippet - The description of the result. + title - The title of the result. + link - The link to the result. + """ + from duckduckgo_search import ddg + + results = ddg( + query, + region=self.region, + safesearch=self.safesearch, + time=self.time, + max_results=num_results, + ) + + if len(results) == 0: + return [{"Result": "No good DuckDuckGo Search Result was found"}] + + def to_metadata(result: Dict) -> Dict: + return { + "snippet": result["body"], + "title": result["title"], + "link": result["href"], + } + + return [to_metadata(result) for result in results] diff --git a/poetry.lock b/poetry.lock index 8e328e23..c44517b1 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1612,6 +1612,22 @@ duckdb = ">=0.4.0" numpy = "*" sqlalchemy = ">=1.3.19" +[[package]] +name = "duckduckgo-search" +version = "2.8.6" +description = "Search for words, documents, images, news, maps and text translation using the DuckDuckGo.com search engine." +category = "main" +optional = true +python-versions = ">=3.7" +files = [ + {file = "duckduckgo_search-2.8.6-py3-none-any.whl", hash = "sha256:c9312ad278d03d059ba7ced978dd1bc7806bb735aa239948322936d0570d8d7f"}, + {file = "duckduckgo_search-2.8.6.tar.gz", hash = "sha256:ffd620febb8c471bdb4aed520b26e645cd05ae79acdd78db6c0c927cb7b0237c"}, +] + +[package.dependencies] +click = ">=8.1.3" +requests = ">=2.28.2" + [[package]] name = "ecdsa" version = "0.18.0" @@ -9151,7 +9167,7 @@ cffi = {version = ">=1.11", markers = "platform_python_implementation == \"PyPy\ cffi = ["cffi (>=1.11)"] [extras] -all = ["anthropic", "cohere", "openai", "nlpcloud", "huggingface_hub", "jina", "manifest-ml", "elasticsearch", "opensearch-py", "google-search-results", "faiss-cpu", "sentence-transformers", "transformers", "spacy", "nltk", "wikipedia", "beautifulsoup4", "tiktoken", "torch", "jinja2", "pinecone-client", "pinecone-text", "weaviate-client", "redis", "google-api-python-client", "wolframalpha", "qdrant-client", "tensorflow-text", "pypdf", "networkx", "nomic", "aleph-alpha-client", "deeplake", "pgvector", "psycopg2-binary", "pyowm", "pytesseract", "html2text", "atlassian-python-api", "gptcache"] +all = ["anthropic", "cohere", "openai", "nlpcloud", "huggingface_hub", "jina", "manifest-ml", "elasticsearch", "opensearch-py", "google-search-results", "faiss-cpu", "sentence-transformers", "transformers", "spacy", "nltk", "wikipedia", "beautifulsoup4", "tiktoken", "torch", "jinja2", "pinecone-client", "pinecone-text", "weaviate-client", "redis", "google-api-python-client", "wolframalpha", "qdrant-client", "tensorflow-text", "pypdf", "networkx", "nomic", "aleph-alpha-client", "deeplake", "pgvector", "psycopg2-binary", "pyowm", "pytesseract", "html2text", "atlassian-python-api", "gptcache", "duckduckgo-search"] cohere = ["cohere"] llms = ["anthropic", "cohere", "openai", "nlpcloud", "huggingface_hub", "manifest-ml", "torch", "transformers"] openai = ["openai"] @@ -9160,4 +9176,4 @@ qdrant = ["qdrant-client"] [metadata] lock-version = "2.0" python-versions = ">=3.8.1,<4.0" -content-hash = "568b190c884e62df4e7bd897f402e3b6e61b24134af7f189f3d44b2ba5f00082" +content-hash = "19a145090188b0b446c68ca33599f4d4943bf9fb1312bcfa98a23268101e1323" diff --git a/pyproject.toml b/pyproject.toml index cdff1971..34252b19 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -66,6 +66,7 @@ atlassian-python-api = {version = "^3.36.0", optional=true} pytesseract = {version = "^0.3.10", optional=true} html2text = {version="^2020.1.16", optional=true} numexpr = "^2.8.4" +duckduckgo-search = {version="^2.8.6", optional=true} [tool.poetry.group.docs.dependencies] autodoc_pydantic = "^1.8.0" @@ -139,7 +140,7 @@ llms = ["anthropic", "cohere", "openai", "nlpcloud", "huggingface_hub", "manifes qdrant = ["qdrant-client"] openai = ["openai"] cohere = ["cohere"] -all = ["anthropic", "cohere", "openai", "nlpcloud", "huggingface_hub", "jina", "manifest-ml", "elasticsearch", "opensearch-py", "google-search-results", "faiss-cpu", "sentence_transformers", "transformers", "spacy", "nltk", "wikipedia", "beautifulsoup4", "tiktoken", "torch", "jinja2", "pinecone-client", "pinecone-text", "weaviate-client", "redis", "google-api-python-client", "wolframalpha", "qdrant-client", "tensorflow-text", "pypdf", "networkx", "nomic", "aleph-alpha-client", "deeplake", "pgvector", "psycopg2-binary", "boto3", "pyowm", "pytesseract", "html2text", "atlassian-python-api", "gptcache"] +all = ["anthropic", "cohere", "openai", "nlpcloud", "huggingface_hub", "jina", "manifest-ml", "elasticsearch", "opensearch-py", "google-search-results", "faiss-cpu", "sentence_transformers", "transformers", "spacy", "nltk", "wikipedia", "beautifulsoup4", "tiktoken", "torch", "jinja2", "pinecone-client", "pinecone-text", "weaviate-client", "redis", "google-api-python-client", "wolframalpha", "qdrant-client", "tensorflow-text", "pypdf", "networkx", "nomic", "aleph-alpha-client", "deeplake", "pgvector", "psycopg2-binary", "boto3", "pyowm", "pytesseract", "html2text", "atlassian-python-api", "gptcache", "duckduckgo-search"] [tool.ruff] select = [ diff --git a/tests/integration_tests/test_duckduckdgo_search_api.py b/tests/integration_tests/test_duckduckdgo_search_api.py new file mode 100644 index 00000000..a6397251 --- /dev/null +++ b/tests/integration_tests/test_duckduckdgo_search_api.py @@ -0,0 +1,22 @@ +import pytest + +from langchain.tools.ddg_search.tool import DuckDuckGoSearchTool + + +def ddg_installed() -> bool: + try: + from duckduckgo_search import ddg # noqa: F401 + + return True + except Exception as e: + print(f"duckduckgo not installed, skipping test {e}") + return False + + +@pytest.mark.skipif(not ddg_installed(), reason="requires duckduckgo-search package") +def test_ddg_search_tool() -> None: + keywords = "Bella Ciao" + tool = DuckDuckGoSearchTool() + result = tool(keywords) + print(result) + assert len(result.split()) > 20