mirror of https://github.com/hwchase17/langchain
Add Wikipedia search utility and tool (#1561)
The Python `wikipedia` package gives easy access for searching and fetching pages from Wikipedia, see https://pypi.org/project/wikipedia/. It can serve as an additional search and retrieval tool, like the existing Google and SerpAPI helpers, for both chains and agents.pull/1579/head
parent
b44c8bd969
commit
cdb97f3dfb
@ -0,0 +1 @@
|
||||
"""Wikipedia API toolkit."""
|
@ -0,0 +1,25 @@
|
||||
"""Tool for the Wolfram Alpha API."""
|
||||
|
||||
from langchain.tools.base import BaseTool
|
||||
from langchain.utilities.wikipedia import WikipediaAPIWrapper
|
||||
|
||||
|
||||
class WikipediaQueryRun(BaseTool):
|
||||
"""Tool that adds the capability to search using the Wikipedia API."""
|
||||
|
||||
name = "Wikipedia"
|
||||
description = (
|
||||
"A wrapper around Wikipedia. "
|
||||
"Useful for when you need to answer general questions about "
|
||||
"people, places, companies, historical events, or other subjects. "
|
||||
"Input should be a search query."
|
||||
)
|
||||
api_wrapper: WikipediaAPIWrapper
|
||||
|
||||
def _run(self, query: str) -> str:
|
||||
"""Use the Wikipedia tool."""
|
||||
return self.api_wrapper.run(query)
|
||||
|
||||
async def _arun(self, query: str) -> str:
|
||||
"""Use the Wikipedia tool asynchronously."""
|
||||
raise NotImplementedError("WikipediaQueryRun does not support async")
|
@ -0,0 +1,56 @@
|
||||
"""Util that calls Wikipedia."""
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
from pydantic import BaseModel, Extra, root_validator
|
||||
|
||||
|
||||
class WikipediaAPIWrapper(BaseModel):
|
||||
"""Wrapper around WikipediaAPI.
|
||||
|
||||
To use, you should have the ``wikipedia`` python package installed.
|
||||
This wrapper will use the Wikipedia API to conduct searches and
|
||||
fetch page summaries. By default, it will return the page summaries
|
||||
of the top-k results of an input search.
|
||||
"""
|
||||
|
||||
wiki_client: Any #: :meta private:
|
||||
top_k_results: int = 3
|
||||
|
||||
class Config:
|
||||
"""Configuration for this pydantic object."""
|
||||
|
||||
extra = Extra.forbid
|
||||
|
||||
@root_validator()
|
||||
def validate_environment(cls, values: Dict) -> Dict:
|
||||
"""Validate that the python package exists in environment."""
|
||||
try:
|
||||
import wikipedia
|
||||
|
||||
values["wiki_client"] = wikipedia
|
||||
except ImportError:
|
||||
raise ValueError(
|
||||
"Could not import wikipedia python package. "
|
||||
"Please it install it with `pip install wikipedia`."
|
||||
)
|
||||
return values
|
||||
|
||||
def run(self, query: str) -> str:
|
||||
"""Run Wikipedia search and get page summaries."""
|
||||
search_results = self.wiki_client.search(query)
|
||||
summaries = []
|
||||
for i in range(min(self.top_k_results, len(search_results))):
|
||||
summary = self.fetch_formatted_page_summary(search_results[i])
|
||||
if summary is not None:
|
||||
summaries.append(summary)
|
||||
return "\n\n".join(summaries)
|
||||
|
||||
def fetch_formatted_page_summary(self, page: str) -> Optional[str]:
|
||||
try:
|
||||
wiki_page = self.wiki_client.page(title=page)
|
||||
return f"Page: {page}\nSummary: {wiki_page.summary}"
|
||||
except (
|
||||
self.wiki_client.exceptions.PageError,
|
||||
self.wiki_client.exceptions.DisambiguationError,
|
||||
):
|
||||
return None
|
Loading…
Reference in New Issue