mirror of
https://github.com/hwchase17/langchain
synced 2024-11-06 03:20:49 +00:00
searx: implement async + helper tool providing json results (#2129)
- implemented `arun` and `aresults`. Reuses aiosession if available. - helper tools `SearxSearchRun` and `SearxSearchResults` - update doc Co-authored-by: blob42 <spike@w530>
This commit is contained in:
parent
ccee1aedd2
commit
031e32f331
@ -47,12 +47,24 @@ s.run("what is a large language model?")
|
|||||||
|
|
||||||
### Tool
|
### Tool
|
||||||
|
|
||||||
You can also easily load this wrapper as a Tool (to use with an Agent).
|
You can also load this wrapper as a Tool (to use with an Agent).
|
||||||
|
|
||||||
You can do this with:
|
You can do this with:
|
||||||
|
|
||||||
```python
|
```python
|
||||||
from langchain.agents import load_tools
|
from langchain.agents import load_tools
|
||||||
tools = load_tools(["searx-search"], searx_host="http://localhost:8888")
|
tools = load_tools(["searx-search"],
|
||||||
|
searx_host="http://localhost:8888",
|
||||||
|
engines=["github"])
|
||||||
|
```
|
||||||
|
|
||||||
|
Note that we could _optionally_ pass custom engines to use.
|
||||||
|
|
||||||
|
If you want to obtain results with metadata as *json* you can use:
|
||||||
|
```python
|
||||||
|
tools = load_tools(["searx-search-results-json"],
|
||||||
|
searx_host="http://localhost:8888",
|
||||||
|
num_results=5)
|
||||||
```
|
```
|
||||||
|
|
||||||
For more information on tools, see [this page](../modules/agents/tools/getting_started.md)
|
For more information on tools, see [this page](../modules/agents/tools/getting_started.md)
|
||||||
|
@ -13,6 +13,7 @@ from langchain.requests import RequestsWrapper
|
|||||||
from langchain.tools.base import BaseTool
|
from langchain.tools.base import BaseTool
|
||||||
from langchain.tools.bing_search.tool import BingSearchRun
|
from langchain.tools.bing_search.tool import BingSearchRun
|
||||||
from langchain.tools.google_search.tool import GoogleSearchResults, GoogleSearchRun
|
from langchain.tools.google_search.tool import GoogleSearchResults, GoogleSearchRun
|
||||||
|
from langchain.tools.searx_search.tool import SearxSearchResults, SearxSearchRun
|
||||||
from langchain.tools.human.tool import HumanInputRun
|
from langchain.tools.human.tool import HumanInputRun
|
||||||
from langchain.tools.python.tool import PythonREPLTool
|
from langchain.tools.python.tool import PythonREPLTool
|
||||||
from langchain.tools.requests.tool import RequestsGetTool
|
from langchain.tools.requests.tool import RequestsGetTool
|
||||||
@ -167,11 +168,12 @@ def _get_serpapi(**kwargs: Any) -> BaseTool:
|
|||||||
|
|
||||||
|
|
||||||
def _get_searx_search(**kwargs: Any) -> BaseTool:
|
def _get_searx_search(**kwargs: Any) -> BaseTool:
|
||||||
return Tool(
|
return SearxSearchRun(wrapper=SearxSearchWrapper(**kwargs))
|
||||||
name="SearX Search",
|
|
||||||
description="A meta search engine. Useful for when you need to answer questions about current events. Input should be a search query.",
|
|
||||||
func=SearxSearchWrapper(**kwargs).run,
|
def _get_searx_search_results_json(**kwargs: Any) -> BaseTool:
|
||||||
)
|
wrapper_kwargs = {k: v for k, v in kwargs.items() if k != "num_results"}
|
||||||
|
return SearxSearchResults(wrapper=SearxSearchWrapper(**wrapper_kwargs), **kwargs)
|
||||||
|
|
||||||
|
|
||||||
def _get_bing_search(**kwargs: Any) -> BaseTool:
|
def _get_bing_search(**kwargs: Any) -> BaseTool:
|
||||||
@ -195,10 +197,14 @@ _EXTRA_OPTIONAL_TOOLS = {
|
|||||||
_get_google_search_results_json,
|
_get_google_search_results_json,
|
||||||
["google_api_key", "google_cse_id", "num_results"],
|
["google_api_key", "google_cse_id", "num_results"],
|
||||||
),
|
),
|
||||||
|
"searx-search-results-json": (
|
||||||
|
_get_searx_search_results_json,
|
||||||
|
["searx_host", "engines", "num_results", "aiosession"],
|
||||||
|
),
|
||||||
"bing-search": (_get_bing_search, ["bing_subscription_key", "bing_search_url"]),
|
"bing-search": (_get_bing_search, ["bing_subscription_key", "bing_search_url"]),
|
||||||
"google-serper": (_get_google_serper, ["serper_api_key"]),
|
"google-serper": (_get_google_serper, ["serper_api_key"]),
|
||||||
"serpapi": (_get_serpapi, ["serpapi_api_key", "aiosession"]),
|
"serpapi": (_get_serpapi, ["serpapi_api_key", "aiosession"]),
|
||||||
"searx-search": (_get_searx_search, ["searx_host"]),
|
"searx-search": (_get_searx_search, ["searx_host", "engines", "aiosession"]),
|
||||||
"wikipedia": (_get_wikipedia, ["top_k_results"]),
|
"wikipedia": (_get_wikipedia, ["top_k_results"]),
|
||||||
"human": (_get_human_tool, ["prompt_func", "input_func"]),
|
"human": (_get_human_tool, ["prompt_func", "input_func"]),
|
||||||
}
|
}
|
||||||
|
0
langchain/tools/searx_search/__init__.py
Normal file
0
langchain/tools/searx_search/__init__.py
Normal file
51
langchain/tools/searx_search/tool.py
Normal file
51
langchain/tools/searx_search/tool.py
Normal file
@ -0,0 +1,51 @@
|
|||||||
|
"""Tool for the SearxNG search API."""
|
||||||
|
from pydantic import Extra
|
||||||
|
|
||||||
|
from langchain.tools.base import BaseTool
|
||||||
|
from langchain.utilities.searx_search import SearxSearchWrapper
|
||||||
|
|
||||||
|
|
||||||
|
class SearxSearchRun(BaseTool):
|
||||||
|
"""Tool that adds the capability to query a Searx instance."""
|
||||||
|
|
||||||
|
name = "Searx Search"
|
||||||
|
description = (
|
||||||
|
"A meta search engine."
|
||||||
|
"Useful for when you need to answer questions about current events."
|
||||||
|
"Input should be a search query."
|
||||||
|
)
|
||||||
|
wrapper: SearxSearchWrapper
|
||||||
|
|
||||||
|
def _run(self, query: str) -> str:
|
||||||
|
"""Use the tool."""
|
||||||
|
return self.wrapper.run(query)
|
||||||
|
|
||||||
|
async def _arun(self, query: str) -> str:
|
||||||
|
"""Use the tool asynchronously."""
|
||||||
|
return await self.wrapper.arun(query)
|
||||||
|
|
||||||
|
|
||||||
|
class SearxSearchResults(BaseTool):
|
||||||
|
"""Tool that has capability to query a Searx instance and get back json."""
|
||||||
|
|
||||||
|
name = "Searx Search"
|
||||||
|
description = (
|
||||||
|
"A meta search engine."
|
||||||
|
"Useful for when you need to answer questions about current events."
|
||||||
|
"Input should be a search query. Output is a JSON array of the query results"
|
||||||
|
)
|
||||||
|
wrapper: SearxSearchWrapper
|
||||||
|
num_results: int = 4
|
||||||
|
|
||||||
|
class Config:
|
||||||
|
"""Pydantic config."""
|
||||||
|
|
||||||
|
extra = Extra.allow
|
||||||
|
|
||||||
|
def _run(self, query: str) -> str:
|
||||||
|
"""Use the tool."""
|
||||||
|
return str(self.wrapper.results(query, self.num_results))
|
||||||
|
|
||||||
|
async def _arun(self, query: str) -> str:
|
||||||
|
"""Use the tool asynchronously."""
|
||||||
|
return (await self.wrapper.aresults(query, self.num_results)).__str__()
|
@ -15,7 +15,7 @@ Quick Start
|
|||||||
-----------
|
-----------
|
||||||
|
|
||||||
|
|
||||||
In order to use this tool you need to provide the searx host. This can be done
|
In order to use this utility you need to provide the searx host. This can be done
|
||||||
by passing the named parameter :attr:`searx_host <SearxSearchWrapper.searx_host>`
|
by passing the named parameter :attr:`searx_host <SearxSearchWrapper.searx_host>`
|
||||||
or exporting the environment variable SEARX_HOST.
|
or exporting the environment variable SEARX_HOST.
|
||||||
Note: this is the only required parameter.
|
Note: this is the only required parameter.
|
||||||
@ -129,6 +129,7 @@ For a list of public SearxNG instances see https://searx.space/
|
|||||||
import json
|
import json
|
||||||
from typing import Any, Dict, List, Optional
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
|
import aiohttp
|
||||||
import requests
|
import requests
|
||||||
from pydantic import BaseModel, Extra, Field, PrivateAttr, root_validator, validator
|
from pydantic import BaseModel, Extra, Field, PrivateAttr, root_validator, validator
|
||||||
|
|
||||||
@ -204,6 +205,13 @@ class SearxSearchWrapper(BaseModel):
|
|||||||
engines: Optional[List[str]] = []
|
engines: Optional[List[str]] = []
|
||||||
query_suffix: Optional[str] = ""
|
query_suffix: Optional[str] = ""
|
||||||
k: int = 10
|
k: int = 10
|
||||||
|
aiosession: Optional[Any] = None
|
||||||
|
|
||||||
|
class Config:
|
||||||
|
"""Configuration for this pydantic object."""
|
||||||
|
|
||||||
|
extra = Extra.forbid
|
||||||
|
arbitrary_types_allowed = True
|
||||||
|
|
||||||
@validator("unsecure")
|
@validator("unsecure")
|
||||||
def disable_ssl_warnings(cls, v: bool) -> bool:
|
def disable_ssl_warnings(cls, v: bool) -> bool:
|
||||||
@ -244,11 +252,6 @@ class SearxSearchWrapper(BaseModel):
|
|||||||
|
|
||||||
return values
|
return values
|
||||||
|
|
||||||
class Config:
|
|
||||||
"""Configuration for this pydantic object."""
|
|
||||||
|
|
||||||
extra = Extra.forbid
|
|
||||||
|
|
||||||
def _searx_api_query(self, params: dict) -> SearxResults:
|
def _searx_api_query(self, params: dict) -> SearxResults:
|
||||||
"""Actual request to searx API."""
|
"""Actual request to searx API."""
|
||||||
raw_result = requests.get(
|
raw_result = requests.get(
|
||||||
@ -264,6 +267,33 @@ class SearxSearchWrapper(BaseModel):
|
|||||||
self._result = res
|
self._result = res
|
||||||
return res
|
return res
|
||||||
|
|
||||||
|
async def _asearx_api_query(self, params: dict) -> SearxResults:
|
||||||
|
if not self.aiosession:
|
||||||
|
async with aiohttp.ClientSession() as session:
|
||||||
|
async with session.get(
|
||||||
|
self.searx_host,
|
||||||
|
headers=self.headers,
|
||||||
|
params=params,
|
||||||
|
ssl=(lambda: False if self.unsecure else None)(),
|
||||||
|
) as response:
|
||||||
|
if not response.ok:
|
||||||
|
raise ValueError("Searx API returned an error: ", response.text)
|
||||||
|
result = SearxResults(await response.text())
|
||||||
|
self._result = result
|
||||||
|
else:
|
||||||
|
async with self.aiosession.get(
|
||||||
|
self.searx_host,
|
||||||
|
headers=self.headers,
|
||||||
|
params=params,
|
||||||
|
verify=not self.unsecure,
|
||||||
|
) as response:
|
||||||
|
if not response.ok:
|
||||||
|
raise ValueError("Searx API returned an error: ", response.text)
|
||||||
|
result = SearxResults(await response.text())
|
||||||
|
self._result = result
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
def run(
|
def run(
|
||||||
self,
|
self,
|
||||||
query: str,
|
query: str,
|
||||||
@ -281,6 +311,13 @@ class SearxSearchWrapper(BaseModel):
|
|||||||
engines: List of engines to use for the query.
|
engines: List of engines to use for the query.
|
||||||
**kwargs: extra parameters to pass to the searx API.
|
**kwargs: extra parameters to pass to the searx API.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: The result of the query.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
ValueError: If an error occured with the query.
|
||||||
|
|
||||||
|
|
||||||
Example:
|
Example:
|
||||||
This will make a query to the qwant engine:
|
This will make a query to the qwant engine:
|
||||||
|
|
||||||
@ -321,6 +358,41 @@ class SearxSearchWrapper(BaseModel):
|
|||||||
|
|
||||||
return toret
|
return toret
|
||||||
|
|
||||||
|
async def arun(
|
||||||
|
self,
|
||||||
|
query: str,
|
||||||
|
engines: Optional[List[str]] = None,
|
||||||
|
query_suffix: Optional[str] = "",
|
||||||
|
**kwargs: Any,
|
||||||
|
) -> str:
|
||||||
|
"""Asynchronously version of `run`."""
|
||||||
|
_params = {
|
||||||
|
"q": query,
|
||||||
|
}
|
||||||
|
params = {**self.params, **_params, **kwargs}
|
||||||
|
|
||||||
|
if self.query_suffix and len(self.query_suffix) > 0:
|
||||||
|
params["q"] += " " + self.query_suffix
|
||||||
|
|
||||||
|
if isinstance(query_suffix, str) and len(query_suffix) > 0:
|
||||||
|
params["q"] += " " + query_suffix
|
||||||
|
|
||||||
|
if isinstance(engines, list) and len(engines) > 0:
|
||||||
|
params["engines"] = ",".join(engines)
|
||||||
|
|
||||||
|
res = await self._asearx_api_query(params)
|
||||||
|
|
||||||
|
if len(res.answers) > 0:
|
||||||
|
toret = res.answers[0]
|
||||||
|
|
||||||
|
# only return the content of the results list
|
||||||
|
elif len(res.results) > 0:
|
||||||
|
toret = "\n\n".join([r.get("content", "") for r in res.results[: self.k]])
|
||||||
|
else:
|
||||||
|
toret = "No good search result found"
|
||||||
|
|
||||||
|
return toret
|
||||||
|
|
||||||
def results(
|
def results(
|
||||||
self,
|
self,
|
||||||
query: str,
|
query: str,
|
||||||
@ -383,3 +455,41 @@ class SearxSearchWrapper(BaseModel):
|
|||||||
}
|
}
|
||||||
for result in results
|
for result in results
|
||||||
]
|
]
|
||||||
|
|
||||||
|
async def aresults(
|
||||||
|
self,
|
||||||
|
query: str,
|
||||||
|
num_results: int,
|
||||||
|
engines: Optional[List[str]] = None,
|
||||||
|
query_suffix: Optional[str] = "",
|
||||||
|
**kwargs: Any,
|
||||||
|
) -> List[Dict]:
|
||||||
|
"""Asynchronously query with json results.
|
||||||
|
|
||||||
|
Uses aiohttp. See `results` for more info.
|
||||||
|
"""
|
||||||
|
_params = {
|
||||||
|
"q": query,
|
||||||
|
}
|
||||||
|
params = {**self.params, **_params, **kwargs}
|
||||||
|
|
||||||
|
if self.query_suffix and len(self.query_suffix) > 0:
|
||||||
|
params["q"] += " " + self.query_suffix
|
||||||
|
if isinstance(query_suffix, str) and len(query_suffix) > 0:
|
||||||
|
params["q"] += " " + query_suffix
|
||||||
|
if isinstance(engines, list) and len(engines) > 0:
|
||||||
|
params["engines"] = ",".join(engines)
|
||||||
|
results = (await self._asearx_api_query(params)).results[:num_results]
|
||||||
|
if len(results) == 0:
|
||||||
|
return [{"Result": "No good Search Result was found"}]
|
||||||
|
|
||||||
|
return [
|
||||||
|
{
|
||||||
|
"snippet": result.get("content", ""),
|
||||||
|
"title": result["title"],
|
||||||
|
"link": result["url"],
|
||||||
|
"engines": result["engines"],
|
||||||
|
"category": result["category"],
|
||||||
|
}
|
||||||
|
for result in results
|
||||||
|
]
|
||||||
|
Loading…
Reference in New Issue
Block a user