langchain/libs/community/langchain_community/utilities/duckduckgo_search.py
Isaac Francisco 148088a588
docs: duckduckgosearch options listed (#22568)
Co-authored-by: Bagatur <baskaryan@gmail.com>
2024-06-05 23:29:47 +00:00

146 lines
4.3 KiB
Python

"""Util that calls DuckDuckGo Search.
No setup required. Free.
https://pypi.org/project/duckduckgo-search/
"""
from typing import Dict, List, Optional
from langchain_core.pydantic_v1 import BaseModel, Extra, root_validator
class DuckDuckGoSearchAPIWrapper(BaseModel):
"""Wrapper for DuckDuckGo Search API.
Free and does not require any setup.
"""
region: Optional[str] = "wt-wt"
"""
See https://pypi.org/project/duckduckgo-search/#regions
"""
safesearch: str = "moderate"
"""
Options: strict, moderate, off
"""
time: Optional[str] = "y"
"""
Options: d, w, m, y
"""
max_results: int = 5
backend: str = "api"
"""
Options: api, html, lite
"""
source: str = "text"
"""
Options: text, news
"""
class Config:
"""Configuration for this pydantic object."""
extra = Extra.forbid
@root_validator()
def validate_environment(cls, values: Dict) -> Dict:
"""Validate that python package exists in environment."""
try:
from duckduckgo_search import DDGS # noqa: F401
except ImportError:
raise ImportError(
"Could not import duckduckgo-search python package. "
"Please install it with `pip install -U duckduckgo-search`."
)
return values
def _ddgs_text(
self, query: str, max_results: Optional[int] = None
) -> List[Dict[str, str]]:
"""Run query through DuckDuckGo text search and return results."""
from duckduckgo_search import DDGS
with DDGS() as ddgs:
ddgs_gen = ddgs.text(
query,
region=self.region,
safesearch=self.safesearch,
timelimit=self.time,
max_results=max_results or self.max_results,
backend=self.backend,
)
if ddgs_gen:
return [r for r in ddgs_gen]
return []
def _ddgs_news(
self, query: str, max_results: Optional[int] = None
) -> List[Dict[str, str]]:
"""Run query through DuckDuckGo news search and return results."""
from duckduckgo_search import DDGS
with DDGS() as ddgs:
ddgs_gen = ddgs.news(
query,
region=self.region,
safesearch=self.safesearch,
timelimit=self.time,
max_results=max_results or self.max_results,
)
if ddgs_gen:
return [r for r in ddgs_gen]
return []
def run(self, query: str) -> str:
"""Run query through DuckDuckGo and return concatenated results."""
if self.source == "text":
results = self._ddgs_text(query)
elif self.source == "news":
results = self._ddgs_news(query)
else:
results = []
if not results:
return "No good DuckDuckGo Search Result was found"
return " ".join(r["body"] for r in results)
def results(
self, query: str, max_results: int, source: Optional[str] = None
) -> List[Dict[str, str]]:
"""Run query through DuckDuckGo and return metadata.
Args:
query: The query to search for.
max_results: The number of results to return.
source: The source to look from.
Returns:
A list of dictionaries with the following keys:
snippet - The description of the result.
title - The title of the result.
link - The link to the result.
"""
source = source or self.source
if source == "text":
results = [
{"snippet": r["body"], "title": r["title"], "link": r["href"]}
for r in self._ddgs_text(query, max_results=max_results)
]
elif source == "news":
results = [
{
"snippet": r["body"],
"title": r["title"],
"link": r["url"],
"date": r["date"],
"source": r["source"],
}
for r in self._ddgs_news(query, max_results=max_results)
]
else:
results = []
if results is None:
results = [{"Result": "No good DuckDuckGo Search Result was found"}]
return results