From ec8844c7d4440a8338372f6cff352e1f0c038033 Mon Sep 17 00:00:00 2001 From: Ozzie Isaacs Date: Sat, 27 Aug 2022 15:44:21 +0200 Subject: [PATCH] Make pyPDF2 again to the favorite pdf metadata extractor --- cps/uploader.py | 15 +++++++++------ requirements.txt | 2 +- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/cps/uploader.py b/cps/uploader.py index 92035356..ecf4868c 100644 --- a/cps/uploader.py +++ b/cps/uploader.py @@ -37,16 +37,15 @@ except (ImportError, RuntimeError) as e: use_generic_pdf_cover = True try: - from PyPDF3 import PdfFileReader - from PyPDF3 import __version__ as PyPdfVersion + from PyPDF2 import PdfFileReader use_pdf_meta = True except ImportError as ex: + log.debug('PyPDF2 is recommended for best performance in metadata extracting from pdf files: %s', ex) try: - from PyPDF2 import PdfFileReader - from PyPDF2 import __version__ as PyPdfVersion + from PyPDF3 import PdfFileReader use_pdf_meta = True except ImportError as e: - log.debug('Cannot import PyPDF3/PyPDF2, extracting pdf metadata will not work: %s / %s', ex, e) + log.debug('Cannot import PyPDF3/PyPDF2, extracting pdf metadata will not work: %s / %s', e) use_pdf_meta = False try: @@ -114,7 +113,7 @@ def parse_xmp(pdf_file): try: xmp_info = pdf_file.getXmpMetadata() except Exception as ex: - log.debug('Can not read XMP metadata {}'.format(ex)) + log.debug('Can not read PDF XMP metadata {}'.format(ex)) return None if xmp_info: @@ -160,6 +159,10 @@ def pdf_meta(tmp_file_path, original_file_name, original_file_extension): with open(tmp_file_path, 'rb') as f: pdf_file = PdfFileReader(f) doc_info = pdf_file.getDocumentInfo() + try: + doc_info = pdf_file.getDocumentInfo() + except Exception as exc: + log.debug('Can not read PDF DocumentInfo {}'.format(exc)) xmp_info = parse_xmp(pdf_file) if xmp_info: diff --git a/requirements.txt b/requirements.txt index acaf4a40..0c20c71d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,7 +7,7 @@ Flask-Principal>=0.3.2,<0.5.1 backports_abc>=0.4 Flask>=1.0.2,<2.1.0 iso-639>=0.4.5,<0.5.0 -PyPDF3>=1.0.0,<1.0.7 +PyPDF2>=1.20,<2.11.0 pytz>=2016.10 requests>=2.11.1,<2.28.0 SQLAlchemy>=1.3.0,<1.5.0