Add Wikipedia search utility and tool (#1561)

The Python `wikipedia` package gives easy access for searching and
fetching pages from Wikipedia, see https://pypi.org/project/wikipedia/.
It can serve as an additional search and retrieval tool, like the
existing Google and SerpAPI helpers, for both chains and agents.
tool-patch
Zach Schillaci 1 year ago committed by GitHub
parent b44c8bd969
commit cdb97f3dfb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -48,6 +48,7 @@ from langchain.utilities.google_search import GoogleSearchAPIWrapper
from langchain.utilities.google_serper import GoogleSerperAPIWrapper from langchain.utilities.google_serper import GoogleSerperAPIWrapper
from langchain.utilities.searx_search import SearxSearchWrapper from langchain.utilities.searx_search import SearxSearchWrapper
from langchain.utilities.serpapi import SerpAPIWrapper from langchain.utilities.serpapi import SerpAPIWrapper
from langchain.utilities.wikipedia import WikipediaAPIWrapper
from langchain.utilities.wolfram_alpha import WolframAlphaAPIWrapper from langchain.utilities.wolfram_alpha import WolframAlphaAPIWrapper
from langchain.vectorstores import FAISS, ElasticVectorSearch from langchain.vectorstores import FAISS, ElasticVectorSearch
@ -70,6 +71,7 @@ __all__ = [
"GoogleSearchAPIWrapper", "GoogleSearchAPIWrapper",
"GoogleSerperAPIWrapper", "GoogleSerperAPIWrapper",
"WolframAlphaAPIWrapper", "WolframAlphaAPIWrapper",
"WikipediaAPIWrapper",
"Anthropic", "Anthropic",
"Banana", "Banana",
"CerebriumAI", "CerebriumAI",

@ -9,12 +9,13 @@ from langchain.chains.api.base import APIChain
from langchain.chains.llm_math.base import LLMMathChain from langchain.chains.llm_math.base import LLMMathChain
from langchain.chains.pal.base import PALChain from langchain.chains.pal.base import PALChain
from langchain.llms.base import BaseLLM from langchain.llms.base import BaseLLM
from langchain.tools.python.tool import PythonREPLTool
from langchain.requests import RequestsWrapper from langchain.requests import RequestsWrapper
from langchain.tools.base import BaseTool from langchain.tools.base import BaseTool
from langchain.tools.bing_search.tool import BingSearchRun from langchain.tools.bing_search.tool import BingSearchRun
from langchain.tools.google_search.tool import GoogleSearchResults, GoogleSearchRun from langchain.tools.google_search.tool import GoogleSearchResults, GoogleSearchRun
from langchain.tools.python.tool import PythonREPLTool
from langchain.tools.requests.tool import RequestsGetTool from langchain.tools.requests.tool import RequestsGetTool
from langchain.tools.wikipedia.tool import WikipediaQueryRun
from langchain.tools.wolfram_alpha.tool import WolframAlphaQueryRun from langchain.tools.wolfram_alpha.tool import WolframAlphaQueryRun
from langchain.utilities.bash import BashProcess from langchain.utilities.bash import BashProcess
from langchain.utilities.bing_search import BingSearchAPIWrapper from langchain.utilities.bing_search import BingSearchAPIWrapper
@ -22,6 +23,7 @@ from langchain.utilities.google_search import GoogleSearchAPIWrapper
from langchain.utilities.google_serper import GoogleSerperAPIWrapper from langchain.utilities.google_serper import GoogleSerperAPIWrapper
from langchain.utilities.searx_search import SearxSearchWrapper from langchain.utilities.searx_search import SearxSearchWrapper
from langchain.utilities.serpapi import SerpAPIWrapper from langchain.utilities.serpapi import SerpAPIWrapper
from langchain.utilities.wikipedia import WikipediaAPIWrapper
from langchain.utilities.wolfram_alpha import WolframAlphaAPIWrapper from langchain.utilities.wolfram_alpha import WolframAlphaAPIWrapper
@ -124,6 +126,10 @@ def _get_google_search(**kwargs: Any) -> BaseTool:
return GoogleSearchRun(api_wrapper=GoogleSearchAPIWrapper(**kwargs)) return GoogleSearchRun(api_wrapper=GoogleSearchAPIWrapper(**kwargs))
def _get_wikipedia(**kwargs: Any) -> BaseTool:
return WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper(**kwargs))
def _get_google_serper(**kwargs: Any) -> BaseTool: def _get_google_serper(**kwargs: Any) -> BaseTool:
return Tool( return Tool(
name="Serper Search", name="Serper Search",
@ -173,6 +179,7 @@ _EXTRA_OPTIONAL_TOOLS = {
"google-serper": (_get_google_serper, ["serper_api_key"]), "google-serper": (_get_google_serper, ["serper_api_key"]),
"serpapi": (_get_serpapi, ["serpapi_api_key", "aiosession"]), "serpapi": (_get_serpapi, ["serpapi_api_key", "aiosession"]),
"searx-search": (_get_searx_search, ["searx_host"]), "searx-search": (_get_searx_search, ["searx_host"]),
"wikipedia": (_get_wikipedia, ["top_k_results"]),
} }

@ -0,0 +1 @@
"""Wikipedia API toolkit."""

@ -0,0 +1,25 @@
"""Tool for the Wolfram Alpha API."""
from langchain.tools.base import BaseTool
from langchain.utilities.wikipedia import WikipediaAPIWrapper
class WikipediaQueryRun(BaseTool):
"""Tool that adds the capability to search using the Wikipedia API."""
name = "Wikipedia"
description = (
"A wrapper around Wikipedia. "
"Useful for when you need to answer general questions about "
"people, places, companies, historical events, or other subjects. "
"Input should be a search query."
)
api_wrapper: WikipediaAPIWrapper
def _run(self, query: str) -> str:
"""Use the Wikipedia tool."""
return self.api_wrapper.run(query)
async def _arun(self, query: str) -> str:
"""Use the Wikipedia tool asynchronously."""
raise NotImplementedError("WikipediaQueryRun does not support async")

@ -7,6 +7,7 @@ from langchain.utilities.google_search import GoogleSearchAPIWrapper
from langchain.utilities.google_serper import GoogleSerperAPIWrapper from langchain.utilities.google_serper import GoogleSerperAPIWrapper
from langchain.utilities.searx_search import SearxSearchWrapper from langchain.utilities.searx_search import SearxSearchWrapper
from langchain.utilities.serpapi import SerpAPIWrapper from langchain.utilities.serpapi import SerpAPIWrapper
from langchain.utilities.wikipedia import WikipediaAPIWrapper
from langchain.utilities.wolfram_alpha import WolframAlphaAPIWrapper from langchain.utilities.wolfram_alpha import WolframAlphaAPIWrapper
__all__ = [ __all__ = [
@ -19,4 +20,5 @@ __all__ = [
"SerpAPIWrapper", "SerpAPIWrapper",
"SearxSearchWrapper", "SearxSearchWrapper",
"BingSearchAPIWrapper", "BingSearchAPIWrapper",
"WikipediaAPIWrapper",
] ]

@ -0,0 +1,56 @@
"""Util that calls Wikipedia."""
from typing import Any, Dict, Optional
from pydantic import BaseModel, Extra, root_validator
class WikipediaAPIWrapper(BaseModel):
"""Wrapper around WikipediaAPI.
To use, you should have the ``wikipedia`` python package installed.
This wrapper will use the Wikipedia API to conduct searches and
fetch page summaries. By default, it will return the page summaries
of the top-k results of an input search.
"""
wiki_client: Any #: :meta private:
top_k_results: int = 3
class Config:
"""Configuration for this pydantic object."""
extra = Extra.forbid
@root_validator()
def validate_environment(cls, values: Dict) -> Dict:
"""Validate that the python package exists in environment."""
try:
import wikipedia
values["wiki_client"] = wikipedia
except ImportError:
raise ValueError(
"Could not import wikipedia python package. "
"Please it install it with `pip install wikipedia`."
)
return values
def run(self, query: str) -> str:
"""Run Wikipedia search and get page summaries."""
search_results = self.wiki_client.search(query)
summaries = []
for i in range(min(self.top_k_results, len(search_results))):
summary = self.fetch_formatted_page_summary(search_results[i])
if summary is not None:
summaries.append(summary)
return "\n\n".join(summaries)
def fetch_formatted_page_summary(self, page: str) -> Optional[str]:
try:
wiki_page = self.wiki_client.page(title=page)
return f"Page: {page}\nSummary: {wiki_page.summary}"
except (
self.wiki_client.exceptions.PageError,
self.wiki_client.exceptions.DisambiguationError,
):
return None
Loading…
Cancel
Save