Merge pull request #2047 from dalf/fasttext-predict

Use fasttext-predict instead of fasttext(-wheel)
This commit is contained in:
Alexandre Flament 2022-12-26 20:50:09 +01:00 committed by GitHub
commit 0d4896170f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 7 additions and 10 deletions

View File

@ -36,7 +36,6 @@ RUN apk add --no-cache -t build-dependencies \
su-exec \
python3 \
py3-pip \
py3-numpy \
libxml2 \
libxslt \
openssl \
@ -44,8 +43,6 @@ RUN apk add --no-cache -t build-dependencies \
uwsgi \
uwsgi-python3 \
brotli \
&& pip3 install --no-cache setuptools wheel \
&& sed -i s/fasttext-wheel/fasttext/ requirements.txt \
&& pip3 install --no-cache -r requirements.txt \
&& apk del build-dependencies \
&& rm -rf /root/.cache

View File

@ -15,4 +15,4 @@ setproctitle==1.3.2
redis==4.4.0
markdown-it-py==2.1.0
typing_extensions==4.4.0
fasttext-wheel==0.9.2
fasttext-predict==0.9.2.1

View File

@ -15,7 +15,6 @@ from os.path import splitext, join
from random import choice
from html.parser import HTMLParser
from urllib.parse import urljoin, urlparse
import fasttext
from lxml import html
from lxml.etree import ElementBase, XPath, XPathError, XPathSyntaxError, _ElementStringResult, _ElementUnicodeResult
@ -51,12 +50,9 @@ _STORAGE_UNIT_VALUE: Dict[str, int] = {
_XPATH_CACHE: Dict[str, XPath] = {}
_LANG_TO_LC_CACHE: Dict[str, Dict[str, str]] = {}
_FASTTEXT_MODEL: Optional[fasttext.FastText._FastText] = None
_FASTTEXT_MODEL: Optional["fasttext.FastText._FastText"] = None
"""fasttext model to predict laguage of a search term"""
# Monkey patch: prevent fasttext from showing a (useless) warning when loading a model.
fasttext.FastText.eprint = lambda x: None
class _NotSetClass: # pylint: disable=too-few-public-methods
"""Internal class for this module, do not create instance of this class.
@ -630,9 +626,13 @@ def eval_xpath_getindex(elements: ElementBase, xpath_spec: XPathSpecType, index:
return default
def _get_fasttext_model() -> fasttext.FastText._FastText:
def _get_fasttext_model() -> "fasttext.FastText._FastText":
global _FASTTEXT_MODEL # pylint: disable=global-statement
if _FASTTEXT_MODEL is None:
import fasttext # pylint: disable=import-outside-toplevel
# Monkey patch: prevent fasttext from showing a (useless) warning when loading a model.
fasttext.FastText.eprint = lambda x: None
_FASTTEXT_MODEL = fasttext.load_model(str(data_dir / 'lid.176.ftz'))
return _FASTTEXT_MODEL