From aa887eb375224da7253f3d98d20b922705e28df6 Mon Sep 17 00:00:00 2001 From: Alexandre Flament Date: Tue, 19 Jan 2021 21:26:04 +0100 Subject: [PATCH] [mod] checker : replace pycld3 by langdetect pycld3 requires the native library cld3 langdetect is a pure python package --- requirements.txt | 2 +- searx/search/checker/impl.py | 15 ++++++++++----- utils/searx.sh | 6 ++---- 3 files changed, 13 insertions(+), 10 deletions(-) diff --git a/requirements.txt b/requirements.txt index 776bbc20b..a8d9b3f20 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,4 +9,4 @@ pygments==2.1.3 python-dateutil==2.8.1 pyyaml==5.3.1 requests[socks]==2.25.1 -pycld3==0.20 +langdetect==1.0.8 diff --git a/searx/search/checker/impl.py b/searx/search/checker/impl.py index 244536f1b..25887b0f4 100644 --- a/searx/search/checker/impl.py +++ b/searx/search/checker/impl.py @@ -9,7 +9,8 @@ from time import time from urllib.parse import urlparse import re -import cld3 +from langdetect import detect_langs +from langdetect.lang_detect_exception import LangDetectException import requests.exceptions from searx import poolrequests, logger @@ -181,10 +182,14 @@ class ResultContainerTests: self.test_results.add_error(self.test_name, message, *args, '(' + sqstr + ')') def _add_language(self, text: str) -> typing.Optional[str]: - r = cld3.get_language(str(text)) # pylint: disable=E1101 - if r is not None and r.probability >= 0.98 and r.is_reliable: - self.languages.add(r.language) - self.test_results.add_language(r.language) + try: + r = detect_langs(str(text)) # pylint: disable=E1101 + except LangDetectException: + return None + + if len(r) > 0 and r[0].prob > 0.95: + self.languages.add(r[0].lang) + self.test_results.add_language(r[0].lang) return None def _check_result(self, result): diff --git a/utils/searx.sh b/utils/searx.sh index f85935fa2..a33642ee4 100755 --- a/utils/searx.sh +++ b/utils/searx.sh @@ -46,7 +46,6 @@ SEARX_PACKAGES_debian="\ python3-dev python3-babel python3-venv uwsgi uwsgi-plugin-python3 git build-essential libxslt-dev zlib1g-dev libffi-dev libssl-dev -libprotobuf-dev protobuf-compiler shellcheck" BUILD_PACKAGES_debian="\ @@ -59,7 +58,6 @@ SEARX_PACKAGES_arch="\ python python-pip python-lxml python-babel uwsgi uwsgi-plugin-python git base-devel libxml2 -protobuf shellcheck" BUILD_PACKAGES_arch="\ @@ -71,7 +69,7 @@ SEARX_PACKAGES_fedora="\ python python-pip python-lxml python-babel uwsgi uwsgi-plugin-python3 git @development-tools libxml2 -ShellCheck protobuf-compiler protobuf-devel" +ShellCheck" BUILD_PACKAGES_fedora="\ firefox graphviz graphviz-gd ImageMagick librsvg2-tools @@ -84,7 +82,7 @@ SEARX_PACKAGES_centos="\ python36 python36-pip python36-lxml python-babel uwsgi uwsgi-plugin-python3 git @development-tools libxml2 -ShellCheck protobuf-compiler protobuf-devel" +ShellCheck" BUILD_PACKAGES_centos="\ firefox graphviz graphviz-gd ImageMagick librsvg2-tools