From cc486f94bf8851994fbf2ffffaa1bb4ef67b9512 Mon Sep 17 00:00:00 2001 From: blob42 Date: Wed, 29 Mar 2023 04:16:13 +0200 Subject: [PATCH] searx: implement async and extra tool providing json results - implemented `arun` and `aresults`. Reuses aiosession if available. - helper tools `SearxSearchRun` and `SearxSearchResults` - update doc --- docs/ecosystem/searx.md | 16 ++- langchain/agents/load_tools.py | 18 ++-- langchain/tools/searx_search/__init__.py | 0 langchain/tools/searx_search/tool.py | 51 ++++++++++ langchain/utilities/searx_search.py | 122 +++++++++++++++++++++-- 5 files changed, 193 insertions(+), 14 deletions(-) create mode 100644 langchain/tools/searx_search/__init__.py create mode 100644 langchain/tools/searx_search/tool.py diff --git a/docs/ecosystem/searx.md b/docs/ecosystem/searx.md index 54939d06..73910565 100644 --- a/docs/ecosystem/searx.md +++ b/docs/ecosystem/searx.md @@ -47,12 +47,24 @@ s.run("what is a large language model?") ### Tool -You can also easily load this wrapper as a Tool (to use with an Agent). +You can also load this wrapper as a Tool (to use with an Agent). + You can do this with: ```python from langchain.agents import load_tools -tools = load_tools(["searx-search"], searx_host="http://localhost:8888") +tools = load_tools(["searx-search"], + searx_host="http://localhost:8888", + engines=["github"]) +``` + +Note that we could _optionally_ pass custom engines to use. + +If you want to obtain results with metadata as *json* you can use: +```python +tools = load_tools(["searx-search-results-json"], + searx_host="http://localhost:8888", + num_results=5) ``` For more information on tools, see [this page](../modules/agents/tools/getting_started.md) diff --git a/langchain/agents/load_tools.py b/langchain/agents/load_tools.py index 1ca5a86e..32b1463d 100644 --- a/langchain/agents/load_tools.py +++ b/langchain/agents/load_tools.py @@ -13,6 +13,7 @@ from langchain.requests import RequestsWrapper from langchain.tools.base import BaseTool from langchain.tools.bing_search.tool import BingSearchRun from langchain.tools.google_search.tool import GoogleSearchResults, GoogleSearchRun +from langchain.tools.searx_search.tool import SearxSearchResults, SearxSearchRun from langchain.tools.human.tool import HumanInputRun from langchain.tools.python.tool import PythonREPLTool from langchain.tools.requests.tool import RequestsGetTool @@ -167,11 +168,12 @@ def _get_serpapi(**kwargs: Any) -> BaseTool: def _get_searx_search(**kwargs: Any) -> BaseTool: - return Tool( - name="SearX Search", - description="A meta search engine. Useful for when you need to answer questions about current events. Input should be a search query.", - func=SearxSearchWrapper(**kwargs).run, - ) + return SearxSearchRun(wrapper=SearxSearchWrapper(**kwargs)) + + +def _get_searx_search_results_json(**kwargs: Any) -> BaseTool: + wrapper_kwargs = {k: v for k, v in kwargs.items() if k != "num_results"} + return SearxSearchResults(wrapper=SearxSearchWrapper(**wrapper_kwargs), **kwargs) def _get_bing_search(**kwargs: Any) -> BaseTool: @@ -195,10 +197,14 @@ _EXTRA_OPTIONAL_TOOLS = { _get_google_search_results_json, ["google_api_key", "google_cse_id", "num_results"], ), + "searx-search-results-json": ( + _get_searx_search_results_json, + ["searx_host", "engines", "num_results", "aiosession"], + ), "bing-search": (_get_bing_search, ["bing_subscription_key", "bing_search_url"]), "google-serper": (_get_google_serper, ["serper_api_key"]), "serpapi": (_get_serpapi, ["serpapi_api_key", "aiosession"]), - "searx-search": (_get_searx_search, ["searx_host"]), + "searx-search": (_get_searx_search, ["searx_host", "engines", "aiosession"]), "wikipedia": (_get_wikipedia, ["top_k_results"]), "human": (_get_human_tool, ["prompt_func", "input_func"]), } diff --git a/langchain/tools/searx_search/__init__.py b/langchain/tools/searx_search/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/langchain/tools/searx_search/tool.py b/langchain/tools/searx_search/tool.py new file mode 100644 index 00000000..a91f7e27 --- /dev/null +++ b/langchain/tools/searx_search/tool.py @@ -0,0 +1,51 @@ +"""Tool for the SearxNG search API.""" +from pydantic import Extra + +from langchain.tools.base import BaseTool +from langchain.utilities.searx_search import SearxSearchWrapper + + +class SearxSearchRun(BaseTool): + """Tool that adds the capability to query a Searx instance.""" + + name = "Searx Search" + description = ( + "A meta search engine." + "Useful for when you need to answer questions about current events." + "Input should be a search query." + ) + wrapper: SearxSearchWrapper + + def _run(self, query: str) -> str: + """Use the tool.""" + return self.wrapper.run(query) + + async def _arun(self, query: str) -> str: + """Use the tool asynchronously.""" + return await self.wrapper.arun(query) + + +class SearxSearchResults(BaseTool): + """Tool that has capability to query a Searx instance and get back json.""" + + name = "Searx Search" + description = ( + "A meta search engine." + "Useful for when you need to answer questions about current events." + "Input should be a search query. Output is a JSON array of the query results" + ) + wrapper: SearxSearchWrapper + num_results: int = 4 + + class Config: + """Pydantic config.""" + + extra = Extra.allow + + def _run(self, query: str) -> str: + """Use the tool.""" + return str(self.wrapper.results(query, self.num_results)) + + async def _arun(self, query: str) -> str: + """Use the tool asynchronously.""" + return (await self.wrapper.aresults(query, self.num_results)).__str__() diff --git a/langchain/utilities/searx_search.py b/langchain/utilities/searx_search.py index b171a0d7..da0bbd83 100644 --- a/langchain/utilities/searx_search.py +++ b/langchain/utilities/searx_search.py @@ -15,7 +15,7 @@ Quick Start ----------- -In order to use this tool you need to provide the searx host. This can be done +In order to use this utility you need to provide the searx host. This can be done by passing the named parameter :attr:`searx_host ` or exporting the environment variable SEARX_HOST. Note: this is the only required parameter. @@ -129,6 +129,7 @@ For a list of public SearxNG instances see https://searx.space/ import json from typing import Any, Dict, List, Optional +import aiohttp import requests from pydantic import BaseModel, Extra, Field, PrivateAttr, root_validator, validator @@ -204,6 +205,13 @@ class SearxSearchWrapper(BaseModel): engines: Optional[List[str]] = [] query_suffix: Optional[str] = "" k: int = 10 + aiosession: Optional[Any] = None + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + arbitrary_types_allowed = True @validator("unsecure") def disable_ssl_warnings(cls, v: bool) -> bool: @@ -244,11 +252,6 @@ class SearxSearchWrapper(BaseModel): return values - class Config: - """Configuration for this pydantic object.""" - - extra = Extra.forbid - def _searx_api_query(self, params: dict) -> SearxResults: """Actual request to searx API.""" raw_result = requests.get( @@ -264,6 +267,33 @@ class SearxSearchWrapper(BaseModel): self._result = res return res + async def _asearx_api_query(self, params: dict) -> SearxResults: + if not self.aiosession: + async with aiohttp.ClientSession() as session: + async with session.get( + self.searx_host, + headers=self.headers, + params=params, + ssl=(lambda: False if self.unsecure else None)(), + ) as response: + if not response.ok: + raise ValueError("Searx API returned an error: ", response.text) + result = SearxResults(await response.text()) + self._result = result + else: + async with self.aiosession.get( + self.searx_host, + headers=self.headers, + params=params, + verify=not self.unsecure, + ) as response: + if not response.ok: + raise ValueError("Searx API returned an error: ", response.text) + result = SearxResults(await response.text()) + self._result = result + + return result + def run( self, query: str, @@ -281,6 +311,13 @@ class SearxSearchWrapper(BaseModel): engines: List of engines to use for the query. **kwargs: extra parameters to pass to the searx API. + Returns: + str: The result of the query. + + Raises: + ValueError: If an error occured with the query. + + Example: This will make a query to the qwant engine: @@ -321,6 +358,41 @@ class SearxSearchWrapper(BaseModel): return toret + async def arun( + self, + query: str, + engines: Optional[List[str]] = None, + query_suffix: Optional[str] = "", + **kwargs: Any, + ) -> str: + """Asynchronously version of `run`.""" + _params = { + "q": query, + } + params = {**self.params, **_params, **kwargs} + + if self.query_suffix and len(self.query_suffix) > 0: + params["q"] += " " + self.query_suffix + + if isinstance(query_suffix, str) and len(query_suffix) > 0: + params["q"] += " " + query_suffix + + if isinstance(engines, list) and len(engines) > 0: + params["engines"] = ",".join(engines) + + res = await self._asearx_api_query(params) + + if len(res.answers) > 0: + toret = res.answers[0] + + # only return the content of the results list + elif len(res.results) > 0: + toret = "\n\n".join([r.get("content", "") for r in res.results[: self.k]]) + else: + toret = "No good search result found" + + return toret + def results( self, query: str, @@ -383,3 +455,41 @@ class SearxSearchWrapper(BaseModel): } for result in results ] + + async def aresults( + self, + query: str, + num_results: int, + engines: Optional[List[str]] = None, + query_suffix: Optional[str] = "", + **kwargs: Any, + ) -> List[Dict]: + """Asynchronously query with json results. + + Uses aiohttp. See `results` for more info. + """ + _params = { + "q": query, + } + params = {**self.params, **_params, **kwargs} + + if self.query_suffix and len(self.query_suffix) > 0: + params["q"] += " " + self.query_suffix + if isinstance(query_suffix, str) and len(query_suffix) > 0: + params["q"] += " " + query_suffix + if isinstance(engines, list) and len(engines) > 0: + params["engines"] = ",".join(engines) + results = (await self._asearx_api_query(params)).results[:num_results] + if len(results) == 0: + return [{"Result": "No good Search Result was found"}] + + return [ + { + "snippet": result.get("content", ""), + "title": result["title"], + "link": result["url"], + "engines": result["engines"], + "category": result["category"], + } + for result in results + ]