|
|
|
@ -1,7 +1,7 @@
|
|
|
|
|
"""Chain that calls Searx meta search API.
|
|
|
|
|
|
|
|
|
|
SearxNG is a privacy-friendly free metasearch engine that aggregates results from multiple search engines
|
|
|
|
|
and databases.
|
|
|
|
|
SearxNG is a privacy-friendly free metasearch engine that aggregates results from
|
|
|
|
|
multiple search engines and databases.
|
|
|
|
|
|
|
|
|
|
For Searx search API refer to https://docs.searxng.org/dev/search_api.html
|
|
|
|
|
|
|
|
|
@ -10,58 +10,60 @@ better maintained than the original Searx project and offers more features.
|
|
|
|
|
|
|
|
|
|
For a list of public SearxNG instances see https://searx.space/
|
|
|
|
|
|
|
|
|
|
NOTE: SearxNG instances often have a rate limit, so you might want to use a
|
|
|
|
|
self hosted instance and disable the rate limiter or use this PR: https://github.com/searxng/searxng/pull/2129 that adds whitelisting to the rate limiter.
|
|
|
|
|
NOTE: SearxNG instances often have a rate limit, so you might want to use a self hosted
|
|
|
|
|
instance and disable the rate limiter.
|
|
|
|
|
You can use this PR: https://github.com/searxng/searxng/pull/2129 that adds whitelisting
|
|
|
|
|
to the rate limiter.
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
import requests
|
|
|
|
|
from pydantic import BaseModel, PrivateAttr, Extra, Field, validator, root_validator
|
|
|
|
|
from typing import Optional, List, Dict, Any
|
|
|
|
|
import json
|
|
|
|
|
from typing import Any, Dict, List, Optional
|
|
|
|
|
|
|
|
|
|
import requests
|
|
|
|
|
from pydantic import BaseModel, Extra, Field, PrivateAttr, root_validator, validator
|
|
|
|
|
|
|
|
|
|
from langchain.utils import get_from_dict_or_env
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _get_default_params() -> dict:
|
|
|
|
|
return {
|
|
|
|
|
"lang": "en",
|
|
|
|
|
"format": "json"
|
|
|
|
|
}
|
|
|
|
|
return {"lang": "en", "format": "json"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class SearxResults(dict):
|
|
|
|
|
_data = ''
|
|
|
|
|
"""Dict like wrapper around search api results."""
|
|
|
|
|
|
|
|
|
|
_data = ""
|
|
|
|
|
|
|
|
|
|
def __init__(self, data: str):
|
|
|
|
|
"""
|
|
|
|
|
Takes a raw result from Searx and make it into a dict like object
|
|
|
|
|
"""
|
|
|
|
|
"""Take a raw result from Searx and make it into a dict like object."""
|
|
|
|
|
json_data = json.loads(data)
|
|
|
|
|
super().__init__(json_data)
|
|
|
|
|
self.__dict__ = self
|
|
|
|
|
|
|
|
|
|
def __str__(self) -> str:
|
|
|
|
|
"""Text representation of searx result."""
|
|
|
|
|
return self._data
|
|
|
|
|
|
|
|
|
|
# the following are fields from the json result of Searx we put getter
|
|
|
|
|
# to silence mypy errors
|
|
|
|
|
@property
|
|
|
|
|
def results(self) -> Any:
|
|
|
|
|
"""Silence mypy for accessing this field."""
|
|
|
|
|
return self.get("results")
|
|
|
|
|
|
|
|
|
|
@property
|
|
|
|
|
def answers(self) -> Any:
|
|
|
|
|
"""Accessor helper on the json result."""
|
|
|
|
|
return self.get("answers")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class SearxSearchWrapper(BaseModel):
|
|
|
|
|
"""Wrapper for Searx API.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
To use you need to provide the searx host by passing the named parameter
|
|
|
|
|
``searx_host`` or exporting the environment variable ``SEARX_HOST``.
|
|
|
|
|
|
|
|
|
|
In some situations you might want to disable SSL verification, for example
|
|
|
|
|
if you are running searx locally. You can do this by passing the named parameter
|
|
|
|
|
``unsecure``.
|
|
|
|
|
``unsecure``.
|
|
|
|
|
|
|
|
|
|
You can also pass the host url scheme as ``http`` to disable SSL.
|
|
|
|
|
|
|
|
|
@ -75,11 +77,14 @@ class SearxSearchWrapper(BaseModel):
|
|
|
|
|
.. code-block:: python
|
|
|
|
|
|
|
|
|
|
from langchain.searx_search import SearxSearchWrapper
|
|
|
|
|
# note the unsecure parameter is not needed if you pass the url scheme as http
|
|
|
|
|
searx = SearxSearchWrapper(searx_host="http://searx.example.com", unsecure=True)
|
|
|
|
|
# note the unsecure parameter is not needed if you pass the url scheme as
|
|
|
|
|
# http
|
|
|
|
|
searx = SearxSearchWrapper(searx_host="http://searx.example.com",
|
|
|
|
|
unsecure=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
_result: SearxResults = PrivateAttr()
|
|
|
|
|
searx_host = ""
|
|
|
|
|
unsecure: bool = False
|
|
|
|
@ -87,14 +92,14 @@ class SearxSearchWrapper(BaseModel):
|
|
|
|
|
headers: Optional[dict] = None
|
|
|
|
|
k: int = 10
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@validator("unsecure")
|
|
|
|
|
def disable_ssl_warnings(cls, v: bool) -> bool:
|
|
|
|
|
"""Disable SSL warnings."""
|
|
|
|
|
if v:
|
|
|
|
|
# requests.urllib3.disable_warnings()
|
|
|
|
|
try:
|
|
|
|
|
import urllib3
|
|
|
|
|
|
|
|
|
|
urllib3.disable_warnings()
|
|
|
|
|
except ImportError as e:
|
|
|
|
|
print(e)
|
|
|
|
@ -103,14 +108,17 @@ class SearxSearchWrapper(BaseModel):
|
|
|
|
|
|
|
|
|
|
@root_validator()
|
|
|
|
|
def validate_params(cls, values: Dict) -> Dict:
|
|
|
|
|
"""Validate that custom searx params are merged with default ones"""
|
|
|
|
|
"""Validate that custom searx params are merged with default ones."""
|
|
|
|
|
user_params = values["params"]
|
|
|
|
|
default = _get_default_params()
|
|
|
|
|
values["params"] = {**default, **user_params}
|
|
|
|
|
|
|
|
|
|
searx_host = get_from_dict_or_env(values, "searx_host", "SEARX_HOST")
|
|
|
|
|
if not searx_host.startswith("http"):
|
|
|
|
|
print(f"Warning: `searx_host` is missing the url scheme, assuming secure https://{searx_host} ")
|
|
|
|
|
print(
|
|
|
|
|
f"Warning: missing the url scheme on host \
|
|
|
|
|
! assuming secure https://{searx_host} "
|
|
|
|
|
)
|
|
|
|
|
searx_host = "https://" + searx_host
|
|
|
|
|
elif searx_host.startswith("http://"):
|
|
|
|
|
values["unsecure"] = True
|
|
|
|
@ -121,20 +129,25 @@ class SearxSearchWrapper(BaseModel):
|
|
|
|
|
|
|
|
|
|
class Config:
|
|
|
|
|
"""Configuration for this pydantic object."""
|
|
|
|
|
|
|
|
|
|
extra = Extra.forbid
|
|
|
|
|
|
|
|
|
|
def _searx_api_query(self, params: dict) -> SearxResults:
|
|
|
|
|
"""actual request to searx API """
|
|
|
|
|
raw_result = requests.get(self.searx_host, headers=self.headers,
|
|
|
|
|
params=params,
|
|
|
|
|
verify=not self.unsecure).text
|
|
|
|
|
"""Actual request to searx API."""
|
|
|
|
|
raw_result = requests.get(
|
|
|
|
|
self.searx_host,
|
|
|
|
|
headers=self.headers,
|
|
|
|
|
params=params,
|
|
|
|
|
verify=not self.unsecure,
|
|
|
|
|
).text
|
|
|
|
|
res = SearxResults(raw_result)
|
|
|
|
|
self._result = res
|
|
|
|
|
return res
|
|
|
|
|
|
|
|
|
|
def run(self, query: str, **kwargs: Any) -> str:
|
|
|
|
|
"""Run query through Searx API and parse results.
|
|
|
|
|
You can pass any other params to the searx query API.
|
|
|
|
|
|
|
|
|
|
You can pass any other params to the searx query API.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
query: The query to search for.
|
|
|
|
@ -153,7 +166,7 @@ class SearxSearchWrapper(BaseModel):
|
|
|
|
|
"""
|
|
|
|
|
_params = {
|
|
|
|
|
"q": query,
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
params = {**self.params, **_params, **kwargs}
|
|
|
|
|
res = self._searx_api_query(params)
|
|
|
|
|
|
|
|
|
@ -162,7 +175,9 @@ class SearxSearchWrapper(BaseModel):
|
|
|
|
|
|
|
|
|
|
# only return the content of the results list
|
|
|
|
|
elif len(res.results) > 0:
|
|
|
|
|
toret = "\n\n".join([r.get('content', 'no result found') for r in res.results[:self.k]])
|
|
|
|
|
toret = "\n\n".join(
|
|
|
|
|
[r.get("content", "no result found") for r in res.results[: self.k]]
|
|
|
|
|
)
|
|
|
|
|
else:
|
|
|
|
|
toret = "No good search result found"
|
|
|
|
|
|
|
|
|
@ -171,19 +186,19 @@ class SearxSearchWrapper(BaseModel):
|
|
|
|
|
def results(self, query: str, num_results: int, **kwargs: Any) -> List[Dict]:
|
|
|
|
|
"""Run query through Searx API and returns the results with metadata.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
query: The query to search for.
|
|
|
|
|
num_results: Limit the number of results to return.
|
|
|
|
|
Args:
|
|
|
|
|
query: The query to search for.
|
|
|
|
|
num_results: Limit the number of results to return.
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
A list of dictionaries with the following keys:
|
|
|
|
|
snippet - The description of the result.
|
|
|
|
|
title - The title of the result.
|
|
|
|
|
link - The link to the result.
|
|
|
|
|
Returns:
|
|
|
|
|
A list of dictionaries with the following keys:
|
|
|
|
|
snippet - The description of the result.
|
|
|
|
|
title - The title of the result.
|
|
|
|
|
link - The link to the result.
|
|
|
|
|
"""
|
|
|
|
|
metadata_results = []
|
|
|
|
|
_params = {
|
|
|
|
|
"q": query,
|
|
|
|
|
"q": query,
|
|
|
|
|
}
|
|
|
|
|
params = {**self.params, **_params, **kwargs}
|
|
|
|
|
results = self._searx_api_query(params).results[:num_results]
|
|
|
|
|