From cdb97f3dfb7231de51d769bf88cc6894666e676a Mon Sep 17 00:00:00 2001 From: Zach Schillaci <40636930+zachschillaci27@users.noreply.github.com> Date: Fri, 10 Mar 2023 01:34:39 +0100 Subject: [PATCH] Add Wikipedia search utility and tool (#1561) The Python `wikipedia` package gives easy access for searching and fetching pages from Wikipedia, see https://pypi.org/project/wikipedia/. It can serve as an additional search and retrieval tool, like the existing Google and SerpAPI helpers, for both chains and agents. --- langchain/__init__.py | 2 + langchain/agents/load_tools.py | 9 ++++- langchain/tools/wikipedia/__init__.py | 1 + langchain/tools/wikipedia/tool.py | 25 ++++++++++++ langchain/utilities/__init__.py | 2 + langchain/utilities/wikipedia.py | 56 +++++++++++++++++++++++++++ 6 files changed, 94 insertions(+), 1 deletion(-) create mode 100644 langchain/tools/wikipedia/__init__.py create mode 100644 langchain/tools/wikipedia/tool.py create mode 100644 langchain/utilities/wikipedia.py diff --git a/langchain/__init__.py b/langchain/__init__.py index ae7ddbef..3b8e0ac0 100644 --- a/langchain/__init__.py +++ b/langchain/__init__.py @@ -48,6 +48,7 @@ from langchain.utilities.google_search import GoogleSearchAPIWrapper from langchain.utilities.google_serper import GoogleSerperAPIWrapper from langchain.utilities.searx_search import SearxSearchWrapper from langchain.utilities.serpapi import SerpAPIWrapper +from langchain.utilities.wikipedia import WikipediaAPIWrapper from langchain.utilities.wolfram_alpha import WolframAlphaAPIWrapper from langchain.vectorstores import FAISS, ElasticVectorSearch @@ -70,6 +71,7 @@ __all__ = [ "GoogleSearchAPIWrapper", "GoogleSerperAPIWrapper", "WolframAlphaAPIWrapper", + "WikipediaAPIWrapper", "Anthropic", "Banana", "CerebriumAI", diff --git a/langchain/agents/load_tools.py b/langchain/agents/load_tools.py index 5633680d..fadbf4c6 100644 --- a/langchain/agents/load_tools.py +++ b/langchain/agents/load_tools.py @@ -9,12 +9,13 @@ from langchain.chains.api.base import APIChain from langchain.chains.llm_math.base import LLMMathChain from langchain.chains.pal.base import PALChain from langchain.llms.base import BaseLLM -from langchain.tools.python.tool import PythonREPLTool from langchain.requests import RequestsWrapper from langchain.tools.base import BaseTool from langchain.tools.bing_search.tool import BingSearchRun from langchain.tools.google_search.tool import GoogleSearchResults, GoogleSearchRun +from langchain.tools.python.tool import PythonREPLTool from langchain.tools.requests.tool import RequestsGetTool +from langchain.tools.wikipedia.tool import WikipediaQueryRun from langchain.tools.wolfram_alpha.tool import WolframAlphaQueryRun from langchain.utilities.bash import BashProcess from langchain.utilities.bing_search import BingSearchAPIWrapper @@ -22,6 +23,7 @@ from langchain.utilities.google_search import GoogleSearchAPIWrapper from langchain.utilities.google_serper import GoogleSerperAPIWrapper from langchain.utilities.searx_search import SearxSearchWrapper from langchain.utilities.serpapi import SerpAPIWrapper +from langchain.utilities.wikipedia import WikipediaAPIWrapper from langchain.utilities.wolfram_alpha import WolframAlphaAPIWrapper @@ -124,6 +126,10 @@ def _get_google_search(**kwargs: Any) -> BaseTool: return GoogleSearchRun(api_wrapper=GoogleSearchAPIWrapper(**kwargs)) +def _get_wikipedia(**kwargs: Any) -> BaseTool: + return WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper(**kwargs)) + + def _get_google_serper(**kwargs: Any) -> BaseTool: return Tool( name="Serper Search", @@ -173,6 +179,7 @@ _EXTRA_OPTIONAL_TOOLS = { "google-serper": (_get_google_serper, ["serper_api_key"]), "serpapi": (_get_serpapi, ["serpapi_api_key", "aiosession"]), "searx-search": (_get_searx_search, ["searx_host"]), + "wikipedia": (_get_wikipedia, ["top_k_results"]), } diff --git a/langchain/tools/wikipedia/__init__.py b/langchain/tools/wikipedia/__init__.py new file mode 100644 index 00000000..0b3edd08 --- /dev/null +++ b/langchain/tools/wikipedia/__init__.py @@ -0,0 +1 @@ +"""Wikipedia API toolkit.""" diff --git a/langchain/tools/wikipedia/tool.py b/langchain/tools/wikipedia/tool.py new file mode 100644 index 00000000..b138eacd --- /dev/null +++ b/langchain/tools/wikipedia/tool.py @@ -0,0 +1,25 @@ +"""Tool for the Wolfram Alpha API.""" + +from langchain.tools.base import BaseTool +from langchain.utilities.wikipedia import WikipediaAPIWrapper + + +class WikipediaQueryRun(BaseTool): + """Tool that adds the capability to search using the Wikipedia API.""" + + name = "Wikipedia" + description = ( + "A wrapper around Wikipedia. " + "Useful for when you need to answer general questions about " + "people, places, companies, historical events, or other subjects. " + "Input should be a search query." + ) + api_wrapper: WikipediaAPIWrapper + + def _run(self, query: str) -> str: + """Use the Wikipedia tool.""" + return self.api_wrapper.run(query) + + async def _arun(self, query: str) -> str: + """Use the Wikipedia tool asynchronously.""" + raise NotImplementedError("WikipediaQueryRun does not support async") diff --git a/langchain/utilities/__init__.py b/langchain/utilities/__init__.py index b61fa9fd..058ab5f7 100644 --- a/langchain/utilities/__init__.py +++ b/langchain/utilities/__init__.py @@ -7,6 +7,7 @@ from langchain.utilities.google_search import GoogleSearchAPIWrapper from langchain.utilities.google_serper import GoogleSerperAPIWrapper from langchain.utilities.searx_search import SearxSearchWrapper from langchain.utilities.serpapi import SerpAPIWrapper +from langchain.utilities.wikipedia import WikipediaAPIWrapper from langchain.utilities.wolfram_alpha import WolframAlphaAPIWrapper __all__ = [ @@ -19,4 +20,5 @@ __all__ = [ "SerpAPIWrapper", "SearxSearchWrapper", "BingSearchAPIWrapper", + "WikipediaAPIWrapper", ] diff --git a/langchain/utilities/wikipedia.py b/langchain/utilities/wikipedia.py new file mode 100644 index 00000000..2e2bd21a --- /dev/null +++ b/langchain/utilities/wikipedia.py @@ -0,0 +1,56 @@ +"""Util that calls Wikipedia.""" +from typing import Any, Dict, Optional + +from pydantic import BaseModel, Extra, root_validator + + +class WikipediaAPIWrapper(BaseModel): + """Wrapper around WikipediaAPI. + + To use, you should have the ``wikipedia`` python package installed. + This wrapper will use the Wikipedia API to conduct searches and + fetch page summaries. By default, it will return the page summaries + of the top-k results of an input search. + """ + + wiki_client: Any #: :meta private: + top_k_results: int = 3 + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + + @root_validator() + def validate_environment(cls, values: Dict) -> Dict: + """Validate that the python package exists in environment.""" + try: + import wikipedia + + values["wiki_client"] = wikipedia + except ImportError: + raise ValueError( + "Could not import wikipedia python package. " + "Please it install it with `pip install wikipedia`." + ) + return values + + def run(self, query: str) -> str: + """Run Wikipedia search and get page summaries.""" + search_results = self.wiki_client.search(query) + summaries = [] + for i in range(min(self.top_k_results, len(search_results))): + summary = self.fetch_formatted_page_summary(search_results[i]) + if summary is not None: + summaries.append(summary) + return "\n\n".join(summaries) + + def fetch_formatted_page_summary(self, page: str) -> Optional[str]: + try: + wiki_page = self.wiki_client.page(title=page) + return f"Page: {page}\nSummary: {wiki_page.summary}" + except ( + self.wiki_client.exceptions.PageError, + self.wiki_client.exceptions.DisambiguationError, + ): + return None