From 921caf67168402960a18ea83624ecd809b989cfb Mon Sep 17 00:00:00 2001 From: Ozzie Isaacs Date: Fri, 10 May 2024 09:05:31 +0200 Subject: [PATCH 1/4] Fix for #3050 (metadata extraction for cb7 files not working) --- cps/comic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cps/comic.py b/cps/comic.py index 4242bb2f..27c86c9a 100644 --- a/cps/comic.py +++ b/cps/comic.py @@ -102,7 +102,7 @@ def _extract_cover_from_archive(original_file_extension, tmp_file_name, rar_exec extension = ext[1].lower() if extension in cover.COVER_EXTENSIONS: try: - cover_data = cf.read(name)[name].read() + cover_data = cf.read([name])[name].read() except (py7zr.Bad7zFile, OSError) as ex: log.error('7Zip file failed with error: {}'.format(ex)) break From 25a875b6280d361b4d8f874cab1d5bdacfaba09f Mon Sep 17 00:00:00 2001 From: Ozzie Isaacs Date: Fri, 10 May 2024 09:42:44 +0200 Subject: [PATCH 2/4] Fix for goodreads blocking "requests" --- cps/__init__.py | 1 - cps/admin.py | 3 -- cps/config_sql.py | 10 +------ cps/services/goodreads_support.py | 47 +++++++++++++++++++++++++------ cps/templates/config_edit.html | 4 --- 5 files changed, 40 insertions(+), 25 deletions(-) diff --git a/cps/__init__.py b/cps/__init__.py index 621705fd..9601c63b 100755 --- a/cps/__init__.py +++ b/cps/__init__.py @@ -186,7 +186,6 @@ def create_app(): services.ldap.init_app(app, config) if services.goodreads_support: services.goodreads_support.connect(config.config_goodreads_api_key, - config.config_goodreads_api_secret_e, config.config_use_goodreads) config.store_calibre_uuid(calibre_db, db.Library_Id) # Configure rate limiter diff --git a/cps/admin.py b/cps/admin.py index 86e59317..b709764b 100755 --- a/cps/admin.py +++ b/cps/admin.py @@ -1806,11 +1806,8 @@ def _configuration_update_helper(): # Goodreads configuration _config_checkbox(to_save, "config_use_goodreads") _config_string(to_save, "config_goodreads_api_key") - if to_save.get("config_goodreads_api_secret_e", ""): - _config_string(to_save, "config_goodreads_api_secret_e") if services.goodreads_support: services.goodreads_support.connect(config.config_goodreads_api_key, - config.config_goodreads_api_secret_e, config.config_use_goodreads) _config_int(to_save, "config_updatechannel") diff --git a/cps/config_sql.py b/cps/config_sql.py index a781f2c5..4ea4a9e0 100644 --- a/cps/config_sql.py +++ b/cps/config_sql.py @@ -114,8 +114,6 @@ class _Settings(_Base): config_use_goodreads = Column(Boolean, default=False) config_goodreads_api_key = Column(String) - config_goodreads_api_secret_e = Column(String) - config_goodreads_api_secret = Column(String) config_register_email = Column(Boolean, default=False) config_login_type = Column(Integer, default=0) @@ -422,19 +420,13 @@ def _encrypt_fields(session, secret_key): except OperationalError: with session.bind.connect() as conn: conn.execute(text("ALTER TABLE settings ADD column 'mail_password_e' String")) - conn.execute(text("ALTER TABLE settings ADD column 'config_goodreads_api_secret_e' String")) conn.execute(text("ALTER TABLE settings ADD column 'config_ldap_serv_password_e' String")) session.commit() crypter = Fernet(secret_key) - settings = session.query(_Settings.mail_password, _Settings.config_goodreads_api_secret, - _Settings.config_ldap_serv_password).first() + settings = session.query(_Settings.mail_password, _Settings.config_ldap_serv_password).first() if settings.mail_password: session.query(_Settings).update( {_Settings.mail_password_e: crypter.encrypt(settings.mail_password.encode())}) - if settings.config_goodreads_api_secret: - session.query(_Settings).update( - {_Settings.config_goodreads_api_secret_e: - crypter.encrypt(settings.config_goodreads_api_secret.encode())}) if settings.config_ldap_serv_password: session.query(_Settings).update( {_Settings.config_ldap_serv_password_e: diff --git a/cps/services/goodreads_support.py b/cps/services/goodreads_support.py index 74e6eba9..a4425ee4 100644 --- a/cps/services/goodreads_support.py +++ b/cps/services/goodreads_support.py @@ -18,11 +18,11 @@ import time from functools import reduce +import requests +import xmltodict -try: - from goodreads.client import GoodreadsClient -except ImportError: - from betterreads.client import GoodreadsClient +from goodreads.client import GoodreadsClient +from goodreads.request import GoodreadsRequest try: import Levenshtein except ImportError: Levenshtein = False @@ -30,6 +30,37 @@ except ImportError: Levenshtein = False from .. import logger +class my_GoodreadsClient(GoodreadsClient): + + def request(self, *args, **kwargs): + """Create a GoodreadsRequest object and make that request""" + req = my_GoodreadsRequest(self, *args, **kwargs) + return req.request() + +class GoodreadsRequestException(Exception): + def __init__(self, error_msg, url): + self.error_msg = error_msg + self.url = url + + def __str__(self): + return self.url, ':', self.error_msg + + +class my_GoodreadsRequest(GoodreadsRequest): + + def request(self): + resp = requests.get(self.host+self.path, params=self.params, + headers={"User-agent":"Mozilla/5.0 (X11; Linux x86_64; rv:125.0) " + "Gecko/20100101 Firefox/125.0"}) + if resp.status_code != 200: + raise GoodreadsRequestException(resp.reason, self.path) + if self.req_format == 'xml': + data_dict = xmltodict.parse(resp.content) + return data_dict['GoodreadsResponse'] + else: + raise Exception("Invalid format") + + log = logger.create() _client = None # type: GoodreadsClient @@ -38,20 +69,20 @@ _CACHE_TIMEOUT = 23 * 60 * 60 # 23 hours (in seconds) _AUTHORS_CACHE = {} -def connect(key=None, secret=None, enabled=True): +def connect(key=None, enabled=True): global _client - if not enabled or not key or not secret: + if not enabled or not key: _client = None return if _client: # make sure the configuration has not changed since last we used the client - if _client.client_key != key or _client.client_secret != secret: + if _client.client_key != key: _client = None if not _client: - _client = GoodreadsClient(key, secret) + _client = GoodreadsClient(key, None) def get_author_info(author_name): diff --git a/cps/templates/config_edit.html b/cps/templates/config_edit.html index 8035d03f..b03b7d6d 100755 --- a/cps/templates/config_edit.html +++ b/cps/templates/config_edit.html @@ -162,10 +162,6 @@ -
- - -
{% endif %}
From 5c49c8cdd7d582f4396563c346516e8476928063 Mon Sep 17 00:00:00 2001 From: Ozzie Isaacs Date: Fri, 10 May 2024 20:23:41 +0200 Subject: [PATCH 3/4] Fix for Flask-SimpleLDAP 2.0.0 --- cps/admin.py | 5 ++++- optional-requirements.txt | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/cps/admin.py b/cps/admin.py index b709764b..3382e566 100755 --- a/cps/admin.py +++ b/cps/admin.py @@ -1631,7 +1631,10 @@ def import_ldap_users(): imported = 0 for username in new_users: - user = username.decode('utf-8') + if isinstance(username, bytes): + user = username.decode('utf-8') + else: + user = username if '=' in user: # if member object field is empty take user object as filter if config.config_ldap_member_user_object: diff --git a/optional-requirements.txt b/optional-requirements.txt index f4f0d169..7b27a63e 100644 --- a/optional-requirements.txt +++ b/optional-requirements.txt @@ -21,7 +21,7 @@ python-Levenshtein>=0.12.0,<0.26.0 # ldap login python-ldap>=3.0.0,<3.5.0 -Flask-SimpleLDAP>=1.4.0,<1.5.0 +Flask-SimpleLDAP>=1.4.0,<2.1.0 # oauth Flask-Dance>=2.0.0,<7.1.0 From 7e85894b3af8d1394390b34d8871abd54bed72e7 Mon Sep 17 00:00:00 2001 From: Ozzie Isaacs Date: Sat, 11 May 2024 07:10:41 +0200 Subject: [PATCH 4/4] Bugfix for goodreads (html formated info for authors now visible) --- cps/clean_html.py | 53 ++++++++ cps/editbooks.py | 81 ++++++------ cps/services/goodreads_support.py | 7 +- cps/templates/author.html | 4 +- cps/templates/config_edit.html | 3 +- setup.cfg | 3 +- test/Calibre-Web TestSummary_Linux.html | 169 +++++++++++++++--------- 7 files changed, 213 insertions(+), 107 deletions(-) create mode 100644 cps/clean_html.py diff --git a/cps/clean_html.py b/cps/clean_html.py new file mode 100644 index 00000000..19e87599 --- /dev/null +++ b/cps/clean_html.py @@ -0,0 +1,53 @@ +# -*- coding: utf-8 -*- + +# This file is part of the Calibre-Web (https://github.com/janeczku/calibre-web) +# Copyright (C) 2018-2019 OzzieIsaacs +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +from . import logger +from lxml.etree import ParserError + +try: + # at least bleach 6.0 is needed -> incomplatible change from list arguments to set arguments + from bleach import clean_text as clean_html + BLEACH = True +except ImportError: + try: + BLEACH = False + from nh3 import clean as clean_html + except ImportError: + try: + BLEACH = False + from lxml.html.clean import clean_html + except ImportError: + clean_html = None + + +log = logger.create() + + +def clean_string(unsafe_text, book_id=0): + try: + if BLEACH: + safe_text = clean_html(unsafe_text, tags=set(), attributes=set()) + else: + safe_text = clean_html(unsafe_text) + except ParserError as e: + log.error("Comments of book {} are corrupted: {}".format(book_id, e)) + safe_text = "" + except TypeError as e: + log.error("Comments can't be parsed, maybe 'lxml' is too new, try installing 'bleach': {}".format(e)) + safe_text = "" + return safe_text diff --git a/cps/editbooks.py b/cps/editbooks.py index 030fbf90..43309a14 100644 --- a/cps/editbooks.py +++ b/cps/editbooks.py @@ -27,22 +27,22 @@ from shutil import copyfile from uuid import uuid4 from markupsafe import escape, Markup # dependency of flask from functools import wraps -from lxml.etree import ParserError - -try: - # at least bleach 6.0 is needed -> incomplatible change from list arguments to set arguments - from bleach import clean_text as clean_html - BLEACH = True -except ImportError: - try: - BLEACH = False - from nh3 import clean as clean_html - except ImportError: - try: - BLEACH = False - from lxml.html.clean import clean_html - except ImportError: - clean_html = None +# from lxml.etree import ParserError + +#try: +# # at least bleach 6.0 is needed -> incomplatible change from list arguments to set arguments +# from bleach import clean_text as clean_html +# BLEACH = True +#except ImportError: +# try: +# BLEACH = False +# from nh3 import clean as clean_html +# except ImportError: +# try: +# BLEACH = False +# from lxml.html.clean import clean_html +# except ImportError: +# clean_html = None from flask import Blueprint, request, flash, redirect, url_for, abort, Response from flask_babel import gettext as _ @@ -54,6 +54,7 @@ from sqlalchemy.orm.exc import StaleDataError from sqlalchemy.sql.expression import func from . import constants, logger, isoLanguages, gdriveutils, uploader, helper, kobo_sync_status +from .clean_html import clean_string from . import config, ub, db, calibre_db from .services.worker import WorkerThread from .tasks.upload import TaskUpload @@ -1004,17 +1005,18 @@ def edit_book_series_index(series_index, book): def edit_book_comments(comments, book): modify_date = False if comments: - try: - if BLEACH: - comments = clean_html(comments, tags=set(), attributes=set()) - else: - comments = clean_html(comments) - except ParserError as e: - log.error("Comments of book {} are corrupted: {}".format(book.id, e)) - comments = "" - except TypeError as e: - log.error("Comments can't be parsed, maybe 'lxml' is too new, try installing 'bleach': {}".format(e)) - comments = "" + comments = clean_string(comments, book.id) + #try: + # if BLEACH: + # comments = clean_html(comments, tags=set(), attributes=set()) + # else: + # comments = clean_html(comments) + #except ParserError as e: + # log.error("Comments of book {} are corrupted: {}".format(book.id, e)) + # comments = "" + #except TypeError as e: + # log.error("Comments can't be parsed, maybe 'lxml' is too new, try installing 'bleach': {}".format(e)) + # comments = "" if len(book.comments): if book.comments[0].text != comments: book.comments[0].text = comments @@ -1072,18 +1074,19 @@ def edit_cc_data_value(book_id, book, c, to_save, cc_db_value, cc_string): elif c.datatype == 'comments': to_save[cc_string] = Markup(to_save[cc_string]).unescape() if to_save[cc_string]: - try: - if BLEACH: - to_save[cc_string] = clean_html(to_save[cc_string], tags=set(), attributes=set()) - else: - to_save[cc_string] = clean_html(to_save[cc_string]) - except ParserError as e: - log.error("Customs Comments of book {} are corrupted: {}".format(book_id, e)) - to_save[cc_string] = "" - except TypeError as e: - to_save[cc_string] = "" - log.error("Customs Comments can't be parsed, maybe 'lxml' is too new, " - "try installing 'bleach': {}".format(e)) + to_save[cc_string] = clean_string(to_save[cc_string], book_id) + #try: + # if BLEACH: + # to_save[cc_string] = clean_html(to_save[cc_string], tags=set(), attributes=set()) + # else: + # to_save[cc_string] = clean_html(to_save[cc_string]) + #except ParserError as e: + # log.error("Customs Comments of book {} are corrupted: {}".format(book_id, e)) + # to_save[cc_string] = "" + #except TypeError as e: + # to_save[cc_string] = "" + # log.error("Customs Comments can't be parsed, maybe 'lxml' is too new, " + # "try installing 'bleach': {}".format(e)) elif c.datatype == 'datetime': try: to_save[cc_string] = datetime.strptime(to_save[cc_string], "%Y-%m-%d") diff --git a/cps/services/goodreads_support.py b/cps/services/goodreads_support.py index 514cceea..29c5c9f9 100644 --- a/cps/services/goodreads_support.py +++ b/cps/services/goodreads_support.py @@ -30,7 +30,7 @@ except ImportError: Levenshtein = False from .. import logger - +from ..clean_html import clean_string class my_GoodreadsClient(GoodreadsClient): @@ -52,7 +52,7 @@ class my_GoodreadsRequest(GoodreadsRequest): def request(self): resp = requests.get(self.host+self.path, params=self.params, - headers={"User-agent":"Mozilla/5.0 (X11; Linux x86_64; rv:125.0) " + headers={"User-Agent":"Mozilla/5.0 (X11; Linux x86_64; rv:125.0) " "Gecko/20100101 Firefox/125.0"}) if resp.status_code != 200: raise GoodreadsRequestException(resp.reason, self.path) @@ -84,7 +84,7 @@ def connect(key=None, enabled=True): _client = None if not _client: - _client = GoodreadsClient(key, None) + _client = my_GoodreadsClient(key, None) def get_author_info(author_name): @@ -109,6 +109,7 @@ def get_author_info(author_name): if author_info: author_info._timestamp = now + author_info.safe_about = clean_string(author_info.about) _AUTHORS_CACHE[author_name] = author_info return author_info diff --git a/cps/templates/author.html b/cps/templates/author.html index 3e82161c..f7314586 100644 --- a/cps/templates/author.html +++ b/cps/templates/author.html @@ -8,8 +8,8 @@ {{author.name}} {% endif %} - {%if author.about is not none %} -

{{author.about}}

+ {%if author.safe_about is not none %} +

{{author.safe_about|safe}}

{% endif %} - {{_("via")}} Goodreads diff --git a/cps/templates/config_edit.html b/cps/templates/config_edit.html index b03b7d6d..0d0a695f 100755 --- a/cps/templates/config_edit.html +++ b/cps/templates/config_edit.html @@ -9,7 +9,7 @@

{{title}}

-
+

@@ -155,7 +155,6 @@
- {{_('Create an API Key')}}
diff --git a/setup.cfg b/setup.cfg index 615a44da..89edcbde 100644 --- a/setup.cfg +++ b/setup.cfg @@ -58,6 +58,7 @@ install_requires = chardet>=3.0.0,<4.1.0 advocate>=1.0.0,<1.1.0 Flask-Limiter>=2.3.0,<3.6.0 + regex>=2022.3.2,<2024.2.25 [options.packages.find] @@ -85,7 +86,7 @@ goodreads = python-Levenshtein>=0.12.0,<0.26.0 ldap = python-ldap>=3.0.0,<3.5.0 - Flask-SimpleLDAP>=1.4.0,<1.5.0 + Flask-SimpleLDAP>=1.4.0,<2.1.0 oauth = Flask-Dance>=2.0.0,<7.1.0 SQLAlchemy-Utils>=0.33.5,<0.42.0 diff --git a/test/Calibre-Web TestSummary_Linux.html b/test/Calibre-Web TestSummary_Linux.html index 196ea131..b1a91154 100644 --- a/test/Calibre-Web TestSummary_Linux.html +++ b/test/Calibre-Web TestSummary_Linux.html @@ -37,20 +37,20 @@
-

Start Time: 2024-02-26 20:07:24

+

Start Time: 2024-05-10 20:24:40

-

Stop Time: 2024-02-27 03:19:17

+

Stop Time: 2024-05-11 03:33:47

-

Duration: 6h 0 min

+

Duration: 5h 58 min

@@ -320,38 +320,30 @@ - +
TestBackupMetadata - test_backup_change_book_series_index
- PASS - - - - - - -
TestBackupMetadata - test_backup_change_book_tags
-
- FAIL + FAIL
-
Traceback (most recent call last):
-  File "/home/ozzie/Development/calibre-web-test/test/test_login.py", line 532, in test_proxy_login_multi_user
+  File "/home/ozzie/Development/calibre-web-test/test/test_login.py", line 575, in test_proxy_login_multi_user
     self.assertTrue('<input type="text" class="form-control" name="name" id="name" value="new_user1" autocomplete="off">' in resp.text)
 AssertionError: False is not true
@@ -5569,8 +5612,8 @@ AssertionError: False is not true Total 492 - 479 - 2 + 477 + 4 1 10   @@ -5600,7 +5643,7 @@ AssertionError: False is not true Platform - Linux 6.5.0-21-generic #21~22.04.1-Ubuntu SMP PREEMPT_DYNAMIC Fri Feb 9 13:32:52 UTC 2 x86_64 x86_64 + Linux 6.5.0-28-generic #29~22.04.1-Ubuntu SMP PREEMPT_DYNAMIC Thu Apr 4 14:39:20 UTC 2 x86_64 x86_64 Basic @@ -5624,7 +5667,7 @@ AssertionError: False is not true Babel - 2.14.0 + 2.15.0 Basic @@ -5684,19 +5727,19 @@ AssertionError: False is not true Jinja2 - 3.1.3 + 3.1.4 Basic lxml - 5.1.0 + 5.1.1 Basic pyasn1 - 0.5.1 + 0.6.0 Basic @@ -5712,6 +5755,12 @@ AssertionError: False is not true Basic + + regex + 2023.12.25 + Basic + + requests 2.31.0 @@ -5720,7 +5769,7 @@ AssertionError: False is not true SQLAlchemy - 2.0.27 + 2.0.30 Basic @@ -5750,7 +5799,7 @@ AssertionError: False is not true google-api-python-client - 2.119.0 + 2.128.0 TestBackupMetadataGdrive @@ -5780,7 +5829,7 @@ AssertionError: False is not true google-api-python-client - 2.119.0 + 2.128.0 TestCliGdrivedb @@ -5810,7 +5859,7 @@ AssertionError: False is not true google-api-python-client - 2.119.0 + 2.128.0 TestEbookConvertCalibreGDrive @@ -5840,7 +5889,7 @@ AssertionError: False is not true google-api-python-client - 2.119.0 + 2.129.0 TestEbookConvertGDriveKepubify @@ -5876,25 +5925,25 @@ AssertionError: False is not true py7zr - 0.20.8 + 0.21.0 TestEditAdditionalBooks rarfile - 4.1 + 4.2 TestEditAdditionalBooks py7zr - 0.20.8 + 0.21.0 TestEditBooks google-api-python-client - 2.119.0 + 2.129.0 TestEditAuthorsGdrive @@ -5930,7 +5979,7 @@ AssertionError: False is not true google-api-python-client - 2.119.0 + 2.129.0 TestEditBooksOnGdrive @@ -5972,7 +6021,7 @@ AssertionError: False is not true google-api-python-client - 2.119.0 + 2.129.0 TestEmbedMetadataGdrive @@ -6002,7 +6051,7 @@ AssertionError: False is not true google-api-python-client - 2.119.0 + 2.129.0 TestSetupGdrive @@ -6038,31 +6087,31 @@ AssertionError: False is not true python-Levenshtein - 0.25.0 + 0.25.1 TestGoodreads jsonschema - 4.21.1 + 4.22.0 TestKoboSync jsonschema - 4.21.1 + 4.22.0 TestKoboSyncBig Flask-SimpleLDAP - 1.4.0 + 2.0.0 TestLdapLogin jsonschema - 4.21.1 + 4.22.0 TestLdapLogin @@ -6074,13 +6123,13 @@ AssertionError: False is not true Flask-Dance - 7.0.1 + 7.1.0 TestOAuthLogin SQLAlchemy-Utils - 0.41.1 + 0.41.2 TestOAuthLogin @@ -6092,7 +6141,7 @@ AssertionError: False is not true