Merge pull request #2047 from dalf/fasttext-predict

Use fasttext-predict instead of fasttext(-wheel)
This commit is contained in:
Alexandre Flament 2022-12-26 20:50:09 +01:00 committed by GitHub
commit 0d4896170f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 7 additions and 10 deletions

View File

@ -36,7 +36,6 @@ RUN apk add --no-cache -t build-dependencies \
su-exec \ su-exec \
python3 \ python3 \
py3-pip \ py3-pip \
py3-numpy \
libxml2 \ libxml2 \
libxslt \ libxslt \
openssl \ openssl \
@ -44,8 +43,6 @@ RUN apk add --no-cache -t build-dependencies \
uwsgi \ uwsgi \
uwsgi-python3 \ uwsgi-python3 \
brotli \ brotli \
&& pip3 install --no-cache setuptools wheel \
&& sed -i s/fasttext-wheel/fasttext/ requirements.txt \
&& pip3 install --no-cache -r requirements.txt \ && pip3 install --no-cache -r requirements.txt \
&& apk del build-dependencies \ && apk del build-dependencies \
&& rm -rf /root/.cache && rm -rf /root/.cache

View File

@ -15,4 +15,4 @@ setproctitle==1.3.2
redis==4.4.0 redis==4.4.0
markdown-it-py==2.1.0 markdown-it-py==2.1.0
typing_extensions==4.4.0 typing_extensions==4.4.0
fasttext-wheel==0.9.2 fasttext-predict==0.9.2.1

View File

@ -15,7 +15,6 @@ from os.path import splitext, join
from random import choice from random import choice
from html.parser import HTMLParser from html.parser import HTMLParser
from urllib.parse import urljoin, urlparse from urllib.parse import urljoin, urlparse
import fasttext
from lxml import html from lxml import html
from lxml.etree import ElementBase, XPath, XPathError, XPathSyntaxError, _ElementStringResult, _ElementUnicodeResult from lxml.etree import ElementBase, XPath, XPathError, XPathSyntaxError, _ElementStringResult, _ElementUnicodeResult
@ -51,12 +50,9 @@ _STORAGE_UNIT_VALUE: Dict[str, int] = {
_XPATH_CACHE: Dict[str, XPath] = {} _XPATH_CACHE: Dict[str, XPath] = {}
_LANG_TO_LC_CACHE: Dict[str, Dict[str, str]] = {} _LANG_TO_LC_CACHE: Dict[str, Dict[str, str]] = {}
_FASTTEXT_MODEL: Optional[fasttext.FastText._FastText] = None _FASTTEXT_MODEL: Optional["fasttext.FastText._FastText"] = None
"""fasttext model to predict laguage of a search term""" """fasttext model to predict laguage of a search term"""
# Monkey patch: prevent fasttext from showing a (useless) warning when loading a model.
fasttext.FastText.eprint = lambda x: None
class _NotSetClass: # pylint: disable=too-few-public-methods class _NotSetClass: # pylint: disable=too-few-public-methods
"""Internal class for this module, do not create instance of this class. """Internal class for this module, do not create instance of this class.
@ -630,9 +626,13 @@ def eval_xpath_getindex(elements: ElementBase, xpath_spec: XPathSpecType, index:
return default return default
def _get_fasttext_model() -> fasttext.FastText._FastText: def _get_fasttext_model() -> "fasttext.FastText._FastText":
global _FASTTEXT_MODEL # pylint: disable=global-statement global _FASTTEXT_MODEL # pylint: disable=global-statement
if _FASTTEXT_MODEL is None: if _FASTTEXT_MODEL is None:
import fasttext # pylint: disable=import-outside-toplevel
# Monkey patch: prevent fasttext from showing a (useless) warning when loading a model.
fasttext.FastText.eprint = lambda x: None
_FASTTEXT_MODEL = fasttext.load_model(str(data_dir / 'lid.176.ftz')) _FASTTEXT_MODEL = fasttext.load_model(str(data_dir / 'lid.176.ftz'))
return _FASTTEXT_MODEL return _FASTTEXT_MODEL