langchain/langchain/utilities/wikipedia.py
Alex Iribarren 706ebd8f9c
Enforce maximum Wikipedia query length (#2969)
I got the following stacktrace when the agent was trying to search
Wikipedia with a huge query:

```
Thought:{
    "action": "Wikipedia",
    "action_input": "Outstanding is a song originally performed by the Gap Band and written by member Raymond Calhoun. The song originally appeared on the group's platinum-selling 1982 album Gap Band IV. It is one of their signature songs and biggest hits, reaching the number one spot on the U.S. R&B Singles Chart in February 1983.  \"Outstanding\" peaked at number 51 on the Billboard Hot 100."
}
Traceback (most recent call last):
  File "/usr/src/app/tests/chat.py", line 121, in <module>
    answer = agent_chain.run(input=question)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/site-packages/langchain/chains/base.py", line 216, in run
    return self(kwargs)[self.output_keys[0]]
           ^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/site-packages/langchain/chains/base.py", line 116, in __call__
    raise e
  File "/usr/local/lib/python3.11/site-packages/langchain/chains/base.py", line 113, in __call__
    outputs = self._call(inputs)
              ^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/site-packages/langchain/agents/agent.py", line 828, in _call
    next_step_output = self._take_next_step(
                       ^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/site-packages/langchain/agents/agent.py", line 725, in _take_next_step
    observation = tool.run(
                  ^^^^^^^^^
  File "/usr/local/lib/python3.11/site-packages/langchain/tools/base.py", line 73, in run
    raise e
  File "/usr/local/lib/python3.11/site-packages/langchain/tools/base.py", line 70, in run
    observation = self._run(tool_input)
                  ^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/site-packages/langchain/agents/tools.py", line 17, in _run
    return self.func(tool_input)
           ^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/site-packages/langchain/utilities/wikipedia.py", line 40, in run
    search_results = self.wiki_client.search(query)
                     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/site-packages/wikipedia/util.py", line 28, in __call__
    ret = self._cache[key] = self.fn(*args, **kwargs)
                             ^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/site-packages/wikipedia/wikipedia.py", line 109, in search
    raise WikipediaException(raw_results['error']['info'])
wikipedia.exceptions.WikipediaException: An unknown error occured: "Search request is longer than the maximum allowed length. (Actual: 373; allowed: 300)". Please report it on GitHub!
```

This commit limits the maximum size of the query passed to Wikipedia to
avoid this issue.
2023-04-16 08:30:57 -07:00

62 lines
2.1 KiB
Python

"""Util that calls Wikipedia."""
from typing import Any, Dict, Optional
from pydantic import BaseModel, Extra, root_validator
WIKIPEDIA_MAX_QUERY_LENGTH = 300
class WikipediaAPIWrapper(BaseModel):
"""Wrapper around WikipediaAPI.
To use, you should have the ``wikipedia`` python package installed.
This wrapper will use the Wikipedia API to conduct searches and
fetch page summaries. By default, it will return the page summaries
of the top-k results of an input search.
"""
wiki_client: Any #: :meta private:
top_k_results: int = 3
class Config:
"""Configuration for this pydantic object."""
extra = Extra.forbid
@root_validator()
def validate_environment(cls, values: Dict) -> Dict:
"""Validate that the python package exists in environment."""
try:
import wikipedia
values["wiki_client"] = wikipedia
except ImportError:
raise ValueError(
"Could not import wikipedia python package. "
"Please install it with `pip install wikipedia`."
)
return values
def run(self, query: str) -> str:
"""Run Wikipedia search and get page summaries."""
search_results = self.wiki_client.search(query[:WIKIPEDIA_MAX_QUERY_LENGTH])
summaries = []
len_search_results = len(search_results)
if len_search_results == 0:
return "No good Wikipedia Search Result was found"
for i in range(min(self.top_k_results, len_search_results)):
summary = self.fetch_formatted_page_summary(search_results[i])
if summary is not None:
summaries.append(summary)
return "\n\n".join(summaries)
def fetch_formatted_page_summary(self, page: str) -> Optional[str]:
try:
wiki_page = self.wiki_client.page(title=page, auto_suggest=False)
return f"Page: {page}\nSummary: {wiki_page.summary}"
except (
self.wiki_client.exceptions.PageError,
self.wiki_client.exceptions.DisambiguationError,
):
return None