searx: implement async + helper tool providing json results (#2129)

- implemented `arun` and `aresults`. Reuses aiosession if available.
- helper tools `SearxSearchRun` and `SearxSearchResults`
- update doc

Co-authored-by: blob42 <spike@w530>
This commit is contained in:
blob42 2023-03-29 05:49:02 +00:00 committed by GitHub
parent ccee1aedd2
commit 031e32f331
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 193 additions and 14 deletions

View File

@ -47,12 +47,24 @@ s.run("what is a large language model?")
### Tool ### Tool
You can also easily load this wrapper as a Tool (to use with an Agent). You can also load this wrapper as a Tool (to use with an Agent).
You can do this with: You can do this with:
```python ```python
from langchain.agents import load_tools from langchain.agents import load_tools
tools = load_tools(["searx-search"], searx_host="http://localhost:8888") tools = load_tools(["searx-search"],
searx_host="http://localhost:8888",
engines=["github"])
```
Note that we could _optionally_ pass custom engines to use.
If you want to obtain results with metadata as *json* you can use:
```python
tools = load_tools(["searx-search-results-json"],
searx_host="http://localhost:8888",
num_results=5)
``` ```
For more information on tools, see [this page](../modules/agents/tools/getting_started.md) For more information on tools, see [this page](../modules/agents/tools/getting_started.md)

View File

@ -13,6 +13,7 @@ from langchain.requests import RequestsWrapper
from langchain.tools.base import BaseTool from langchain.tools.base import BaseTool
from langchain.tools.bing_search.tool import BingSearchRun from langchain.tools.bing_search.tool import BingSearchRun
from langchain.tools.google_search.tool import GoogleSearchResults, GoogleSearchRun from langchain.tools.google_search.tool import GoogleSearchResults, GoogleSearchRun
from langchain.tools.searx_search.tool import SearxSearchResults, SearxSearchRun
from langchain.tools.human.tool import HumanInputRun from langchain.tools.human.tool import HumanInputRun
from langchain.tools.python.tool import PythonREPLTool from langchain.tools.python.tool import PythonREPLTool
from langchain.tools.requests.tool import RequestsGetTool from langchain.tools.requests.tool import RequestsGetTool
@ -167,11 +168,12 @@ def _get_serpapi(**kwargs: Any) -> BaseTool:
def _get_searx_search(**kwargs: Any) -> BaseTool: def _get_searx_search(**kwargs: Any) -> BaseTool:
return Tool( return SearxSearchRun(wrapper=SearxSearchWrapper(**kwargs))
name="SearX Search",
description="A meta search engine. Useful for when you need to answer questions about current events. Input should be a search query.",
func=SearxSearchWrapper(**kwargs).run, def _get_searx_search_results_json(**kwargs: Any) -> BaseTool:
) wrapper_kwargs = {k: v for k, v in kwargs.items() if k != "num_results"}
return SearxSearchResults(wrapper=SearxSearchWrapper(**wrapper_kwargs), **kwargs)
def _get_bing_search(**kwargs: Any) -> BaseTool: def _get_bing_search(**kwargs: Any) -> BaseTool:
@ -195,10 +197,14 @@ _EXTRA_OPTIONAL_TOOLS = {
_get_google_search_results_json, _get_google_search_results_json,
["google_api_key", "google_cse_id", "num_results"], ["google_api_key", "google_cse_id", "num_results"],
), ),
"searx-search-results-json": (
_get_searx_search_results_json,
["searx_host", "engines", "num_results", "aiosession"],
),
"bing-search": (_get_bing_search, ["bing_subscription_key", "bing_search_url"]), "bing-search": (_get_bing_search, ["bing_subscription_key", "bing_search_url"]),
"google-serper": (_get_google_serper, ["serper_api_key"]), "google-serper": (_get_google_serper, ["serper_api_key"]),
"serpapi": (_get_serpapi, ["serpapi_api_key", "aiosession"]), "serpapi": (_get_serpapi, ["serpapi_api_key", "aiosession"]),
"searx-search": (_get_searx_search, ["searx_host"]), "searx-search": (_get_searx_search, ["searx_host", "engines", "aiosession"]),
"wikipedia": (_get_wikipedia, ["top_k_results"]), "wikipedia": (_get_wikipedia, ["top_k_results"]),
"human": (_get_human_tool, ["prompt_func", "input_func"]), "human": (_get_human_tool, ["prompt_func", "input_func"]),
} }

View File

View File

@ -0,0 +1,51 @@
"""Tool for the SearxNG search API."""
from pydantic import Extra
from langchain.tools.base import BaseTool
from langchain.utilities.searx_search import SearxSearchWrapper
class SearxSearchRun(BaseTool):
"""Tool that adds the capability to query a Searx instance."""
name = "Searx Search"
description = (
"A meta search engine."
"Useful for when you need to answer questions about current events."
"Input should be a search query."
)
wrapper: SearxSearchWrapper
def _run(self, query: str) -> str:
"""Use the tool."""
return self.wrapper.run(query)
async def _arun(self, query: str) -> str:
"""Use the tool asynchronously."""
return await self.wrapper.arun(query)
class SearxSearchResults(BaseTool):
"""Tool that has capability to query a Searx instance and get back json."""
name = "Searx Search"
description = (
"A meta search engine."
"Useful for when you need to answer questions about current events."
"Input should be a search query. Output is a JSON array of the query results"
)
wrapper: SearxSearchWrapper
num_results: int = 4
class Config:
"""Pydantic config."""
extra = Extra.allow
def _run(self, query: str) -> str:
"""Use the tool."""
return str(self.wrapper.results(query, self.num_results))
async def _arun(self, query: str) -> str:
"""Use the tool asynchronously."""
return (await self.wrapper.aresults(query, self.num_results)).__str__()

View File

@ -15,7 +15,7 @@ Quick Start
----------- -----------
In order to use this tool you need to provide the searx host. This can be done In order to use this utility you need to provide the searx host. This can be done
by passing the named parameter :attr:`searx_host <SearxSearchWrapper.searx_host>` by passing the named parameter :attr:`searx_host <SearxSearchWrapper.searx_host>`
or exporting the environment variable SEARX_HOST. or exporting the environment variable SEARX_HOST.
Note: this is the only required parameter. Note: this is the only required parameter.
@ -129,6 +129,7 @@ For a list of public SearxNG instances see https://searx.space/
import json import json
from typing import Any, Dict, List, Optional from typing import Any, Dict, List, Optional
import aiohttp
import requests import requests
from pydantic import BaseModel, Extra, Field, PrivateAttr, root_validator, validator from pydantic import BaseModel, Extra, Field, PrivateAttr, root_validator, validator
@ -204,6 +205,13 @@ class SearxSearchWrapper(BaseModel):
engines: Optional[List[str]] = [] engines: Optional[List[str]] = []
query_suffix: Optional[str] = "" query_suffix: Optional[str] = ""
k: int = 10 k: int = 10
aiosession: Optional[Any] = None
class Config:
"""Configuration for this pydantic object."""
extra = Extra.forbid
arbitrary_types_allowed = True
@validator("unsecure") @validator("unsecure")
def disable_ssl_warnings(cls, v: bool) -> bool: def disable_ssl_warnings(cls, v: bool) -> bool:
@ -244,11 +252,6 @@ class SearxSearchWrapper(BaseModel):
return values return values
class Config:
"""Configuration for this pydantic object."""
extra = Extra.forbid
def _searx_api_query(self, params: dict) -> SearxResults: def _searx_api_query(self, params: dict) -> SearxResults:
"""Actual request to searx API.""" """Actual request to searx API."""
raw_result = requests.get( raw_result = requests.get(
@ -264,6 +267,33 @@ class SearxSearchWrapper(BaseModel):
self._result = res self._result = res
return res return res
async def _asearx_api_query(self, params: dict) -> SearxResults:
if not self.aiosession:
async with aiohttp.ClientSession() as session:
async with session.get(
self.searx_host,
headers=self.headers,
params=params,
ssl=(lambda: False if self.unsecure else None)(),
) as response:
if not response.ok:
raise ValueError("Searx API returned an error: ", response.text)
result = SearxResults(await response.text())
self._result = result
else:
async with self.aiosession.get(
self.searx_host,
headers=self.headers,
params=params,
verify=not self.unsecure,
) as response:
if not response.ok:
raise ValueError("Searx API returned an error: ", response.text)
result = SearxResults(await response.text())
self._result = result
return result
def run( def run(
self, self,
query: str, query: str,
@ -281,6 +311,13 @@ class SearxSearchWrapper(BaseModel):
engines: List of engines to use for the query. engines: List of engines to use for the query.
**kwargs: extra parameters to pass to the searx API. **kwargs: extra parameters to pass to the searx API.
Returns:
str: The result of the query.
Raises:
ValueError: If an error occured with the query.
Example: Example:
This will make a query to the qwant engine: This will make a query to the qwant engine:
@ -321,6 +358,41 @@ class SearxSearchWrapper(BaseModel):
return toret return toret
async def arun(
self,
query: str,
engines: Optional[List[str]] = None,
query_suffix: Optional[str] = "",
**kwargs: Any,
) -> str:
"""Asynchronously version of `run`."""
_params = {
"q": query,
}
params = {**self.params, **_params, **kwargs}
if self.query_suffix and len(self.query_suffix) > 0:
params["q"] += " " + self.query_suffix
if isinstance(query_suffix, str) and len(query_suffix) > 0:
params["q"] += " " + query_suffix
if isinstance(engines, list) and len(engines) > 0:
params["engines"] = ",".join(engines)
res = await self._asearx_api_query(params)
if len(res.answers) > 0:
toret = res.answers[0]
# only return the content of the results list
elif len(res.results) > 0:
toret = "\n\n".join([r.get("content", "") for r in res.results[: self.k]])
else:
toret = "No good search result found"
return toret
def results( def results(
self, self,
query: str, query: str,
@ -383,3 +455,41 @@ class SearxSearchWrapper(BaseModel):
} }
for result in results for result in results
] ]
async def aresults(
self,
query: str,
num_results: int,
engines: Optional[List[str]] = None,
query_suffix: Optional[str] = "",
**kwargs: Any,
) -> List[Dict]:
"""Asynchronously query with json results.
Uses aiohttp. See `results` for more info.
"""
_params = {
"q": query,
}
params = {**self.params, **_params, **kwargs}
if self.query_suffix and len(self.query_suffix) > 0:
params["q"] += " " + self.query_suffix
if isinstance(query_suffix, str) and len(query_suffix) > 0:
params["q"] += " " + query_suffix
if isinstance(engines, list) and len(engines) > 0:
params["engines"] = ",".join(engines)
results = (await self._asearx_api_query(params)).results[:num_results]
if len(results) == 0:
return [{"Result": "No good Search Result was found"}]
return [
{
"snippet": result.get("content", ""),
"title": result["title"],
"link": result["url"],
"engines": result["engines"],
"category": result["category"],
}
for result in results
]