[searx-search] fix docs, format, clean tests

searx-api-pre
blob42 1 year ago
parent a62b134e99
commit c19fe2b678

@ -10,11 +10,11 @@ from langchain.chains.pal.base import PALChain
from langchain.llms.base import BaseLLM
from langchain.python import PythonREPL
from langchain.requests import RequestsWrapper
from langchain.searx_search import SearxSearchWrapper
from langchain.serpapi import SerpAPIWrapper
from langchain.utilities.bash import BashProcess
from langchain.utilities.google_search import GoogleSearchAPIWrapper
from langchain.utilities.wolfram_alpha import WolframAlphaAPIWrapper
from langchain.searx_search import SearxSearchWrapper
def _get_python_repl() -> Tool:
@ -140,12 +140,14 @@ def _get_serpapi(**kwargs: Any) -> Tool:
coroutine=SerpAPIWrapper(**kwargs).arun,
)
def _get_searx_search(**kwargs: Any) -> Tool:
return Tool(
"Search",
SearxSearchWrapper(**kwargs).run,
"A meta search engine. Useful for when you need to answer questions about current events. Input should be a search query."
)
"Search",
SearxSearchWrapper(**kwargs).run,
"A meta search engine. Useful for when you need to answer questions about current events. Input should be a search query.",
)
_EXTRA_LLM_TOOLS = {
"news-api": (_get_news_api, ["news_api_key"]),
@ -156,7 +158,7 @@ _EXTRA_OPTIONAL_TOOLS = {
"wolfram-alpha": (_get_wolfram_alpha, ["wolfram_alpha_appid"]),
"google-search": (_get_google_search, ["google_api_key", "google_cse_id"]),
"serpapi": (_get_serpapi, ["serpapi_api_key", "aiosession"]),
"searx-search": (_get_searx_search, ["searx_host", "searx_host"])
"searx-search": (_get_searx_search, ["searx_host", "searx_host"]),
}

@ -1,7 +1,7 @@
"""Chain that calls Searx meta search API.
SearxNG is a privacy-friendly free metasearch engine that aggregates results from multiple search engines
and databases.
SearxNG is a privacy-friendly free metasearch engine that aggregates results from
multiple search engines and databases.
For Searx search API refer to https://docs.searxng.org/dev/search_api.html
@ -10,58 +10,60 @@ better maintained than the original Searx project and offers more features.
For a list of public SearxNG instances see https://searx.space/
NOTE: SearxNG instances often have a rate limit, so you might want to use a
self hosted instance and disable the rate limiter or use this PR: https://github.com/searxng/searxng/pull/2129 that adds whitelisting to the rate limiter.
NOTE: SearxNG instances often have a rate limit, so you might want to use a self hosted
instance and disable the rate limiter.
You can use this PR: https://github.com/searxng/searxng/pull/2129 that adds whitelisting
to the rate limiter.
"""
import requests
from pydantic import BaseModel, PrivateAttr, Extra, Field, validator, root_validator
from typing import Optional, List, Dict, Any
import json
from typing import Any, Dict, List, Optional
import requests
from pydantic import BaseModel, Extra, Field, PrivateAttr, root_validator, validator
from langchain.utils import get_from_dict_or_env
def _get_default_params() -> dict:
return {
"lang": "en",
"format": "json"
}
return {"lang": "en", "format": "json"}
class SearxResults(dict):
_data = ''
"""Dict like wrapper around search api results."""
_data = ""
def __init__(self, data: str):
"""
Takes a raw result from Searx and make it into a dict like object
"""
"""Take a raw result from Searx and make it into a dict like object."""
json_data = json.loads(data)
super().__init__(json_data)
self.__dict__ = self
def __str__(self) -> str:
"""Text representation of searx result."""
return self._data
# the following are fields from the json result of Searx we put getter
# to silence mypy errors
@property
def results(self) -> Any:
"""Silence mypy for accessing this field."""
return self.get("results")
@property
def answers(self) -> Any:
"""Accessor helper on the json result."""
return self.get("answers")
class SearxSearchWrapper(BaseModel):
"""Wrapper for Searx API.
To use you need to provide the searx host by passing the named parameter
``searx_host`` or exporting the environment variable ``SEARX_HOST``.
In some situations you might want to disable SSL verification, for example
if you are running searx locally. You can do this by passing the named parameter
``unsecure``.
``unsecure``.
You can also pass the host url scheme as ``http`` to disable SSL.
@ -75,11 +77,14 @@ class SearxSearchWrapper(BaseModel):
.. code-block:: python
from langchain.searx_search import SearxSearchWrapper
# note the unsecure parameter is not needed if you pass the url scheme as http
searx = SearxSearchWrapper(searx_host="http://searx.example.com", unsecure=True)
# note the unsecure parameter is not needed if you pass the url scheme as
# http
searx = SearxSearchWrapper(searx_host="http://searx.example.com",
unsecure=True)
"""
_result: SearxResults = PrivateAttr()
searx_host = ""
unsecure: bool = False
@ -87,14 +92,14 @@ class SearxSearchWrapper(BaseModel):
headers: Optional[dict] = None
k: int = 10
@validator("unsecure")
def disable_ssl_warnings(cls, v: bool) -> bool:
"""Disable SSL warnings."""
if v:
# requests.urllib3.disable_warnings()
try:
import urllib3
urllib3.disable_warnings()
except ImportError as e:
print(e)
@ -103,14 +108,17 @@ class SearxSearchWrapper(BaseModel):
@root_validator()
def validate_params(cls, values: Dict) -> Dict:
"""Validate that custom searx params are merged with default ones"""
"""Validate that custom searx params are merged with default ones."""
user_params = values["params"]
default = _get_default_params()
values["params"] = {**default, **user_params}
searx_host = get_from_dict_or_env(values, "searx_host", "SEARX_HOST")
if not searx_host.startswith("http"):
print(f"Warning: `searx_host` is missing the url scheme, assuming secure https://{searx_host} ")
print(
f"Warning: missing the url scheme on host \
! assuming secure https://{searx_host} "
)
searx_host = "https://" + searx_host
elif searx_host.startswith("http://"):
values["unsecure"] = True
@ -121,20 +129,25 @@ class SearxSearchWrapper(BaseModel):
class Config:
"""Configuration for this pydantic object."""
extra = Extra.forbid
def _searx_api_query(self, params: dict) -> SearxResults:
"""actual request to searx API """
raw_result = requests.get(self.searx_host, headers=self.headers,
params=params,
verify=not self.unsecure).text
"""Actual request to searx API."""
raw_result = requests.get(
self.searx_host,
headers=self.headers,
params=params,
verify=not self.unsecure,
).text
res = SearxResults(raw_result)
self._result = res
return res
def run(self, query: str, **kwargs: Any) -> str:
"""Run query through Searx API and parse results.
You can pass any other params to the searx query API.
You can pass any other params to the searx query API.
Args:
query: The query to search for.
@ -153,7 +166,7 @@ class SearxSearchWrapper(BaseModel):
"""
_params = {
"q": query,
}
}
params = {**self.params, **_params, **kwargs}
res = self._searx_api_query(params)
@ -162,7 +175,9 @@ class SearxSearchWrapper(BaseModel):
# only return the content of the results list
elif len(res.results) > 0:
toret = "\n\n".join([r.get('content', 'no result found') for r in res.results[:self.k]])
toret = "\n\n".join(
[r.get("content", "no result found") for r in res.results[: self.k]]
)
else:
toret = "No good search result found"
@ -171,19 +186,19 @@ class SearxSearchWrapper(BaseModel):
def results(self, query: str, num_results: int, **kwargs: Any) -> List[Dict]:
"""Run query through Searx API and returns the results with metadata.
Args:
query: The query to search for.
num_results: Limit the number of results to return.
Args:
query: The query to search for.
num_results: Limit the number of results to return.
Returns:
A list of dictionaries with the following keys:
snippet - The description of the result.
title - The title of the result.
link - The link to the result.
Returns:
A list of dictionaries with the following keys:
snippet - The description of the result.
title - The title of the result.
link - The link to the result.
"""
metadata_results = []
_params = {
"q": query,
"q": query,
}
params = {**self.params, **_params, **kwargs}
results = self._searx_api_query(params).results[:num_results]

@ -1,8 +1,8 @@
"""General utilities."""
from langchain.python import PythonREPL
from langchain.requests import RequestsWrapper
from langchain.serpapi import SerpAPIWrapper
from langchain.searx_search import SearxSearchWrapper
from langchain.serpapi import SerpAPIWrapper
from langchain.utilities.bash import BashProcess
from langchain.utilities.bing_search import BingSearchAPIWrapper
from langchain.utilities.google_search import GoogleSearchAPIWrapper

@ -1,8 +0,0 @@
"""Integration test for Searx API."""
from langchain.searx import SearxAPIWrapper
def test_call() -> None:
"""Test that call gives the correct answer."""
chain = SearxAPIWrapper()
output = chain.run("who is the current president of Bengladesh ?")
Loading…
Cancel
Save