searx: implement async and extra tool providing json results

- implemented `arun` and `aresults`. Reuses aiosession if available.
- helper tools `SearxSearchRun` and `SearxSearchResults`
- update doc
searx
blob42 1 year ago
parent e2c26909f2
commit 5f698928b2

@ -47,7 +47,8 @@ s.run("what is a large language model?")
### Tool
You can also easily load this wrapper as a Tool (to use with an Agent).
You can also load this wrapper as a Tool (to use with an Agent).
You can do this with:
```python
@ -55,4 +56,11 @@ from langchain.agents import load_tools
tools = load_tools(["searx-search"], searx_host="http://localhost:8888")
```
If you want to obtain results with metadata as *json* you can use:
```python
tools = load_tools(["searx-search-results-json"],
searx_host="http://localhost:8888",
num_results=5)
```
For more information on tools, see [this page](../modules/agents/tools/getting_started.md)

@ -13,6 +13,7 @@ from langchain.requests import RequestsWrapper
from langchain.tools.base import BaseTool
from langchain.tools.bing_search.tool import BingSearchRun
from langchain.tools.google_search.tool import GoogleSearchResults, GoogleSearchRun
from langchain.tools.searx_search.tool import SearxSearchResults, SearxSearchRun
from langchain.tools.human.tool import HumanInputRun
from langchain.tools.python.tool import PythonREPLTool
from langchain.tools.requests.tool import RequestsGetTool
@ -167,11 +168,13 @@ def _get_serpapi(**kwargs: Any) -> BaseTool:
def _get_searx_search(**kwargs: Any) -> BaseTool:
return Tool(
name="SearX Search",
description="A meta search engine. Useful for when you need to answer questions about current events. Input should be a search query.",
func=SearxSearchWrapper(**kwargs).run,
)
return SearxSearchRun(wrapper=SearxSearchWrapper(**kwargs))
def _get_searx_search_results_json(**kwargs: Any) -> BaseTool:
# remove the "num_results" param from kwargs
wrapper_kwargs = {k: v for k, v in kwargs.items() if k != "num_results"}
return SearxSearchResults(wrapper=SearxSearchWrapper(**wrapper_kwargs),
**kwargs)
def _get_bing_search(**kwargs: Any) -> BaseTool:
@ -195,10 +198,14 @@ _EXTRA_OPTIONAL_TOOLS = {
_get_google_search_results_json,
["google_api_key", "google_cse_id", "num_results"],
),
"searx-search-results-json": (
_get_searx_search_results_json,
["searx_host", "engines", "num_results", "aiosession"],
),
"bing-search": (_get_bing_search, ["bing_subscription_key", "bing_search_url"]),
"google-serper": (_get_google_serper, ["serper_api_key"]),
"serpapi": (_get_serpapi, ["serpapi_api_key", "aiosession"]),
"searx-search": (_get_searx_search, ["searx_host"]),
"searx-search": (_get_searx_search, ["searx_host", "engines", "aiosession"]),
"wikipedia": (_get_wikipedia, ["top_k_results"]),
"human": (_get_human_tool, ["prompt_func", "input_func"]),
}

@ -0,0 +1,49 @@
from langchain.tools.base import BaseTool
from langchain.utilities.searx_search import SearxSearchWrapper
from typing import Optional, List
from pydantic import Extra
class SearxSearchRun(BaseTool):
"""Tool that adds the capability to query a Searx instance."""
name = "Searx Search"
description = (
"A meta search engine."
"Useful for when you need to answer questions about current events."
"Input should be a search query.")
wrapper: SearxSearchWrapper
def _run(self, query: str) -> str:
"""Use the tool."""
return self.wrapper.run(query)
async def _arun(self, query: str) -> str:
"""Use the tool asynchronously."""
return await self.wrapper.arun(query)
class SearxSearchResults(BaseTool):
"""Tool that has capability to query a Searx instance and get back json."""
name = "Searx Search"
description = (
"A meta search engine."
"Useful for when you need to answer questions about current events."
"Input should be a search query. Output is a JSON array of the query results"
)
wrapper: SearxSearchWrapper
num_results: int = 4
class Config:
"""Pydantic config."""
extra = Extra.allow
def _run(self, query: str) -> str:
"""Use the tool."""
return str(self.wrapper.results(query, self.num_results))
async def _arun(self, query: str) -> str:
"""Use the tool asynchronously."""
return (await self.wrapper.aresults(query, self.num_results)).__str__()

@ -15,7 +15,7 @@ Quick Start
-----------
In order to use this tool you need to provide the searx host. This can be done
In order to use this utility you need to provide the searx host. This can be done
by passing the named parameter :attr:`searx_host <SearxSearchWrapper.searx_host>`
or exporting the environment variable SEARX_HOST.
Note: this is the only required parameter.
@ -129,6 +129,7 @@ For a list of public SearxNG instances see https://searx.space/
import json
from typing import Any, Dict, List, Optional
import aiohttp
import requests
from pydantic import BaseModel, Extra, Field, PrivateAttr, root_validator, validator
@ -204,6 +205,13 @@ class SearxSearchWrapper(BaseModel):
engines: Optional[List[str]] = []
query_suffix: Optional[str] = ""
k: int = 10
aiosession: Optional[Any] = None
class Config:
"""Configuration for this pydantic object."""
extra = Extra.forbid
arbitrary_types_allowed = True
@validator("unsecure")
def disable_ssl_warnings(cls, v: bool) -> bool:
@ -264,6 +272,36 @@ class SearxSearchWrapper(BaseModel):
self._result = res
return res
async def _asearx_api_query(self, params: dict) -> str:
if not self.aiosession:
async with aiohttp.ClientSession() as session:
async with session.get(
self.searx_host,
headers=self.headers,
params=params,
ssl=(lambda: False if self.unsecure else None)(),
) as response:
if not response.ok:
raise ValueError("Searx API returned an error: ", response.text)
result = SearxResults(await response.text())
self._result = result
else:
async with self.aiosession.get(
self.searx_host,
headers=self.headers,
params=params,
verify=not self.unsecure,
) as response:
if not response.ok:
raise ValueError("Searx API returned an error: ", response.text)
result = await SearxResults(response.text())
self._result = result
return result
def run(
self,
query: str,
@ -281,6 +319,13 @@ class SearxSearchWrapper(BaseModel):
engines: List of engines to use for the query.
**kwargs: extra parameters to pass to the searx API.
Returns:
str: The result of the query.
Raises:
ValueError: If an error occured with the query.
Example:
This will make a query to the qwant engine:
@ -321,6 +366,47 @@ class SearxSearchWrapper(BaseModel):
return toret
async def arun(
self,
query: str,
engines: Optional[List[str]] = None,
query_suffix: Optional[str] = "",
**kwargs: Any,
) -> str:
"""
Asynchronously runs a query on one or more search engines and returns the result.
Uses aiohttp. See `run` for more info.
"""
_params = {
"q": query,
}
params = {**self.params, **_params, **kwargs}
if self.query_suffix and len(self.query_suffix) > 0:
params["q"] += " " + self.query_suffix
if isinstance(query_suffix, str) and len(query_suffix) > 0:
params["q"] += " " + query_suffix
if isinstance(engines, list) and len(engines) > 0:
params["engines"] = ",".join(engines)
res = await self._asearx_api_query(params)
if len(res.answers) > 0:
toret = res.answers[0]
# only return the content of the results list
elif len(res.results) > 0:
toret = "\n\n".join([r.get("content", "") for r in res.results[: self.k]])
else:
toret = "No good search result found"
return toret
def results(
self,
query: str,
@ -383,3 +469,41 @@ class SearxSearchWrapper(BaseModel):
}
for result in results
]
async def aresults(
self,
query: str,
num_results: int,
engines: Optional[List[str]] = None,
query_suffix: Optional[str] = "",
**kwargs: Any,
) -> List[Dict]:
"""Asynchronously run query through Searx API and returns the results with metadata.
Uses aiohttp. See `results` for more info.
"""
_params = {
"q": query,
}
params = {**self.params, **_params, **kwargs}
if self.query_suffix and len(self.query_suffix) > 0:
params["q"] += " " + self.query_suffix
if isinstance(query_suffix, str) and len(query_suffix) > 0:
params["q"] += " " + query_suffix
if isinstance(engines, list) and len(engines) > 0:
params["engines"] = ",".join(engines)
results = (await self._asearx_api_query(params)).results[:num_results]
if len(results) == 0:
return [{"Result": "No good Search Result was found"}]
return [
{
"snippet": result.get("content", ""),
"title": result["title"],
"link": result["url"],
"engines": result["engines"],
"category": result["category"],
}
for result in results
]

Loading…
Cancel
Save