Add Wikipedia search utility and tool (#1561)

The Python `wikipedia` package gives easy access for searching and
fetching pages from Wikipedia, see https://pypi.org/project/wikipedia/.
It can serve as an additional search and retrieval tool, like the
existing Google and SerpAPI helpers, for both chains and agents.
tool-patch
Zach Schillaci 1 year ago committed by GitHub
parent b44c8bd969
commit cdb97f3dfb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -48,6 +48,7 @@ from langchain.utilities.google_search import GoogleSearchAPIWrapper
from langchain.utilities.google_serper import GoogleSerperAPIWrapper
from langchain.utilities.searx_search import SearxSearchWrapper
from langchain.utilities.serpapi import SerpAPIWrapper
from langchain.utilities.wikipedia import WikipediaAPIWrapper
from langchain.utilities.wolfram_alpha import WolframAlphaAPIWrapper
from langchain.vectorstores import FAISS, ElasticVectorSearch
@ -70,6 +71,7 @@ __all__ = [
"GoogleSearchAPIWrapper",
"GoogleSerperAPIWrapper",
"WolframAlphaAPIWrapper",
"WikipediaAPIWrapper",
"Anthropic",
"Banana",
"CerebriumAI",

@ -9,12 +9,13 @@ from langchain.chains.api.base import APIChain
from langchain.chains.llm_math.base import LLMMathChain
from langchain.chains.pal.base import PALChain
from langchain.llms.base import BaseLLM
from langchain.tools.python.tool import PythonREPLTool
from langchain.requests import RequestsWrapper
from langchain.tools.base import BaseTool
from langchain.tools.bing_search.tool import BingSearchRun
from langchain.tools.google_search.tool import GoogleSearchResults, GoogleSearchRun
from langchain.tools.python.tool import PythonREPLTool
from langchain.tools.requests.tool import RequestsGetTool
from langchain.tools.wikipedia.tool import WikipediaQueryRun
from langchain.tools.wolfram_alpha.tool import WolframAlphaQueryRun
from langchain.utilities.bash import BashProcess
from langchain.utilities.bing_search import BingSearchAPIWrapper
@ -22,6 +23,7 @@ from langchain.utilities.google_search import GoogleSearchAPIWrapper
from langchain.utilities.google_serper import GoogleSerperAPIWrapper
from langchain.utilities.searx_search import SearxSearchWrapper
from langchain.utilities.serpapi import SerpAPIWrapper
from langchain.utilities.wikipedia import WikipediaAPIWrapper
from langchain.utilities.wolfram_alpha import WolframAlphaAPIWrapper
@ -124,6 +126,10 @@ def _get_google_search(**kwargs: Any) -> BaseTool:
return GoogleSearchRun(api_wrapper=GoogleSearchAPIWrapper(**kwargs))
def _get_wikipedia(**kwargs: Any) -> BaseTool:
return WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper(**kwargs))
def _get_google_serper(**kwargs: Any) -> BaseTool:
return Tool(
name="Serper Search",
@ -173,6 +179,7 @@ _EXTRA_OPTIONAL_TOOLS = {
"google-serper": (_get_google_serper, ["serper_api_key"]),
"serpapi": (_get_serpapi, ["serpapi_api_key", "aiosession"]),
"searx-search": (_get_searx_search, ["searx_host"]),
"wikipedia": (_get_wikipedia, ["top_k_results"]),
}

@ -0,0 +1 @@
"""Wikipedia API toolkit."""

@ -0,0 +1,25 @@
"""Tool for the Wolfram Alpha API."""
from langchain.tools.base import BaseTool
from langchain.utilities.wikipedia import WikipediaAPIWrapper
class WikipediaQueryRun(BaseTool):
"""Tool that adds the capability to search using the Wikipedia API."""
name = "Wikipedia"
description = (
"A wrapper around Wikipedia. "
"Useful for when you need to answer general questions about "
"people, places, companies, historical events, or other subjects. "
"Input should be a search query."
)
api_wrapper: WikipediaAPIWrapper
def _run(self, query: str) -> str:
"""Use the Wikipedia tool."""
return self.api_wrapper.run(query)
async def _arun(self, query: str) -> str:
"""Use the Wikipedia tool asynchronously."""
raise NotImplementedError("WikipediaQueryRun does not support async")

@ -7,6 +7,7 @@ from langchain.utilities.google_search import GoogleSearchAPIWrapper
from langchain.utilities.google_serper import GoogleSerperAPIWrapper
from langchain.utilities.searx_search import SearxSearchWrapper
from langchain.utilities.serpapi import SerpAPIWrapper
from langchain.utilities.wikipedia import WikipediaAPIWrapper
from langchain.utilities.wolfram_alpha import WolframAlphaAPIWrapper
__all__ = [
@ -19,4 +20,5 @@ __all__ = [
"SerpAPIWrapper",
"SearxSearchWrapper",
"BingSearchAPIWrapper",
"WikipediaAPIWrapper",
]

@ -0,0 +1,56 @@
"""Util that calls Wikipedia."""
from typing import Any, Dict, Optional
from pydantic import BaseModel, Extra, root_validator
class WikipediaAPIWrapper(BaseModel):
"""Wrapper around WikipediaAPI.
To use, you should have the ``wikipedia`` python package installed.
This wrapper will use the Wikipedia API to conduct searches and
fetch page summaries. By default, it will return the page summaries
of the top-k results of an input search.
"""
wiki_client: Any #: :meta private:
top_k_results: int = 3
class Config:
"""Configuration for this pydantic object."""
extra = Extra.forbid
@root_validator()
def validate_environment(cls, values: Dict) -> Dict:
"""Validate that the python package exists in environment."""
try:
import wikipedia
values["wiki_client"] = wikipedia
except ImportError:
raise ValueError(
"Could not import wikipedia python package. "
"Please it install it with `pip install wikipedia`."
)
return values
def run(self, query: str) -> str:
"""Run Wikipedia search and get page summaries."""
search_results = self.wiki_client.search(query)
summaries = []
for i in range(min(self.top_k_results, len(search_results))):
summary = self.fetch_formatted_page_summary(search_results[i])
if summary is not None:
summaries.append(summary)
return "\n\n".join(summaries)
def fetch_formatted_page_summary(self, page: str) -> Optional[str]:
try:
wiki_page = self.wiki_client.page(title=page)
return f"Page: {page}\nSummary: {wiki_page.summary}"
except (
self.wiki_client.exceptions.PageError,
self.wiki_client.exceptions.DisambiguationError,
):
return None
Loading…
Cancel
Save