calibre-web/cps/book_formats.py

#!/usr/bin/env python
# -*- coding: utf-8 -*-

import logging
import uploader
import os
from flask_babel import gettext as _
import comic

try:
    from lxml.etree import LXML_VERSION as lxmlversion
except ImportError:
    lxmlversion = None

__author__ = 'lemmsh'

logger = logging.getLogger("book_formats")

try:
    from wand.image import Image
    from wand import version as ImageVersion
    use_generic_pdf_cover = False
except (ImportError, RuntimeError) as e:
    logger.warning('cannot import Image, generating pdf covers for pdf uploads will not work: %s', e)
    use_generic_pdf_cover = True
try:
    from PyPDF2 import PdfFileReader
    from PyPDF2 import __version__ as PyPdfVersion
    use_pdf_meta = True
except ImportError as e:
    logger.warning('cannot import PyPDF2, extracting pdf metadata will not work: %s', e)
    use_pdf_meta = False

try:
    import epub
    use_epub_meta = True
except ImportError as e:
    logger.warning('cannot import epub, extracting epub metadata will not work: %s', e)
    use_epub_meta = False

try:
    import fb2
    use_fb2_meta = True
except ImportError as e:
    logger.warning('cannot import fb2, extracting fb2 metadata will not work: %s', e)
    use_fb2_meta = False


def process(tmp_file_path, original_file_name, original_file_extension):
    meta = None
    try:
        if ".PDF" == original_file_extension.upper():
            meta = pdf_meta(tmp_file_path, original_file_name, original_file_extension)
        if ".EPUB" == original_file_extension.upper() and use_epub_meta is True:
            meta = epub.get_epub_info(tmp_file_path, original_file_name, original_file_extension)
        if ".FB2" == original_file_extension.upper() and use_fb2_meta is True:
            meta = fb2.get_fb2_info(tmp_file_path, original_file_extension)
        if original_file_extension.upper() in ['.CBZ', '.CBT']:
            meta = comic.get_comic_info(tmp_file_path, original_file_name, original_file_extension)

    except Exception as ex:
        logger.warning('cannot parse metadata, using default: %s', ex)

    if meta and meta.title.strip() and meta.author.strip():
        return meta
    else:
        return default_meta(tmp_file_path, original_file_name, original_file_extension)


def default_meta(tmp_file_path, original_file_name, original_file_extension):
    return uploader.BookMeta(
        file_path=tmp_file_path,
        extension=original_file_extension,
        title=original_file_name,
        author=u"Unknown",
        cover=None,
        description="",
        tags="",
        series="",
        series_id="",
        languages="")


def pdf_meta(tmp_file_path, original_file_name, original_file_extension):

    if use_pdf_meta:
        pdf = PdfFileReader(open(tmp_file_path, 'rb'))
        doc_info = pdf.getDocumentInfo()
    else:
        doc_info = None

    if doc_info is not None:
        author = doc_info.author if doc_info.author else u"Unknown"
        title = doc_info.title if doc_info.title else original_file_name
        subject = doc_info.subject
    else:
        author = u"Unknown"
        title = original_file_name
        subject = ""
    return uploader.BookMeta(
        file_path=tmp_file_path,
        extension=original_file_extension,
        title=title,
        author=author,
        cover=pdf_preview(tmp_file_path, original_file_name),
        description=subject,
        tags="",
        series="",
        series_id="",
        languages="")


def pdf_preview(tmp_file_path, tmp_dir):
    if use_generic_pdf_cover:
        return None
    else:
        cover_file_name = os.path.splitext(tmp_file_path)[0] + ".cover.jpg"
        with Image(filename=tmp_file_path + "[0]", resolution=150) as img:
            img.compression_quality = 88
            img.save(filename=os.path.join(tmp_dir, cover_file_name))
        return cover_file_name


def get_versions():
    if not use_generic_pdf_cover:
        IVersion = ImageVersion.MAGICK_VERSION
        WVersion = ImageVersion.VERSION
    else:
        IVersion = _(u'not installed')
        WVersion = _(u'not installed')
    if use_pdf_meta:
        PVersion='v'+PyPdfVersion
    else:
        PVersion=_(u'not installed')
    if lxmlversion:
        XVersion = 'v'+'.'.join(map(str, lxmlversion))
    else:
        XVersion = _(u'not installed')
    return {'Image Magick': IVersion, 'PyPdf': PVersion, 'lxml':XVersion, 'Wand Version': WVersion}
Code cosmetics Bugfix download opds added changable title to opds feed removed unused search.xml file 2017-01-29 20:06:08 +00:00			`#!/usr/bin/env python`
			`# -- coding: utf-8 --`

changes for #77 Code cosmetics #75: - More debug infos for kindlegen and sending e-mail. - Button for sending test e-mail. - timeout of 5min for sending e-mail 2016-12-23 08:53:39 +00:00			`import logging`
			`import uploader`
			`import os`
			`from flask_babel import gettext as _`
Import covers from comic files during upload 2017-12-02 16:33:55 +00:00			`import comic`
changes for #77 Code cosmetics #75: - More debug infos for kindlegen and sending e-mail. - Button for sending test e-mail. - timeout of 5min for sending e-mail 2016-12-23 08:53:39 +00:00
Fix #544 (missed some config options) Added lxml to stats sceen 2018-07-15 14:27:16 +00:00			`try:`
			`from lxml.etree import LXML_VERSION as lxmlversion`
			`except ImportError:`
			`lxmlversion = None`

refactoring to make adding new formats possible 2016-06-05 15:41:47 +00:00			`__author__ = 'lemmsh'`

logging, tmp cleanup 2016-06-05 16:42:18 +00:00			`logger = logging.getLogger("book_formats")`

refactoring to make adding new formats possible 2016-06-05 15:41:47 +00:00			`try:`
			`from wand.image import Image`
changes for #77 Code cosmetics #75: - More debug infos for kindlegen and sending e-mail. - Button for sending test e-mail. - timeout of 5min for sending e-mail 2016-12-23 08:53:39 +00:00			`from wand import version as ImageVersion`
refactoring to make adding new formats possible 2016-06-05 15:41:47 +00:00			`use_generic_pdf_cover = False`
Bugfix Upload Message Improved errorhandling at import imagemagick 2018-03-31 06:00:51 +00:00			`except (ImportError, RuntimeError) as e:`
epub uploading 2016-06-05 19:28:30 +00:00			`logger.warning('cannot import Image, generating pdf covers for pdf uploads will not work: %s', e)`
refactoring to make adding new formats possible 2016-06-05 15:41:47 +00:00			`use_generic_pdf_cover = True`
logging, tmp cleanup 2016-06-05 16:42:18 +00:00			`try:`
			`from PyPDF2 import PdfFileReader`
changes for #77 Code cosmetics #75: - More debug infos for kindlegen and sending e-mail. - Button for sending test e-mail. - timeout of 5min for sending e-mail 2016-12-23 08:53:39 +00:00			`from PyPDF2 import __version__ as PyPdfVersion`
logging, tmp cleanup 2016-06-05 16:42:18 +00:00			`use_pdf_meta = True`
Least change to adapt with python3 change some syntax - except clause - unicode -> bulitins.str - sqllite uri - fix import local path - 01 to 1 (0 is meaningless) add module - future - builtins (from future) - imp (python3 ) - past (from future) - sqlalchemy (update one) refer to http://python-future.org/compatible_idioms.html 2017-03-05 09:40:39 +00:00			`except ImportError as e:`
epub uploading 2016-06-05 19:28:30 +00:00			`logger.warning('cannot import PyPDF2, extracting pdf metadata will not work: %s', e)`
logging, tmp cleanup 2016-06-05 16:42:18 +00:00			`use_pdf_meta = False`
refactoring to make adding new formats possible 2016-06-05 15:41:47 +00:00
epub uploading 2016-06-05 19:28:30 +00:00			`try:`
			`import epub`
			`use_epub_meta = True`
Least change to adapt with python3 change some syntax - except clause - unicode -> bulitins.str - sqllite uri - fix import local path - 01 to 1 (0 is meaningless) add module - future - builtins (from future) - imp (python3 ) - past (from future) - sqlalchemy (update one) refer to http://python-future.org/compatible_idioms.html 2017-03-05 09:40:39 +00:00			`except ImportError as e:`
resolve conflicts with PR 30 2016-08-07 16:46:38 +00:00			`logger.warning('cannot import epub, extracting epub metadata will not work: %s', e)`
epub uploading 2016-06-05 19:28:30 +00:00			`use_epub_meta = False`

fb2 uploading 2016-06-18 13:50:32 +00:00			`try:`
			`import fb2`
			`use_fb2_meta = True`
Least change to adapt with python3 change some syntax - except clause - unicode -> bulitins.str - sqllite uri - fix import local path - 01 to 1 (0 is meaningless) add module - future - builtins (from future) - imp (python3 ) - past (from future) - sqlalchemy (update one) refer to http://python-future.org/compatible_idioms.html 2017-03-05 09:40:39 +00:00			`except ImportError as e:`
resolve conflicts with PR 30 2016-08-07 16:46:38 +00:00			`logger.warning('cannot import fb2, extracting fb2 metadata will not work: %s', e)`
fb2 uploading 2016-06-18 13:50:32 +00:00			`use_fb2_meta = False`

epub uploading 2016-06-05 19:28:30 +00:00
refactoring to make adding new formats possible 2016-06-05 15:41:47 +00:00			`def process(tmp_file_path, original_file_name, original_file_extension):`
Ensure file paths in uploads are non-empty, fix #183 (#184) * Ensure file paths in uploads are non-empty, fix #183 * Fix AttributeError in process(), handle strings of spaces 2017-04-23 06:22:10 +00:00			`meta = None`
epub uploading 2016-06-05 19:28:30 +00:00			`try:`
			`if ".PDF" == original_file_extension.upper():`
Ensure file paths in uploads are non-empty, fix #183 (#184) * Ensure file paths in uploads are non-empty, fix #183 * Fix AttributeError in process(), handle strings of spaces 2017-04-23 06:22:10 +00:00			`meta = pdf_meta(tmp_file_path, original_file_name, original_file_extension)`
changes for #77 Code cosmetics #75: - More debug infos for kindlegen and sending e-mail. - Button for sending test e-mail. - timeout of 5min for sending e-mail 2016-12-23 08:53:39 +00:00			`if ".EPUB" == original_file_extension.upper() and use_epub_meta is True:`
Ensure file paths in uploads are non-empty, fix #183 (#184) * Ensure file paths in uploads are non-empty, fix #183 * Fix AttributeError in process(), handle strings of spaces 2017-04-23 06:22:10 +00:00			`meta = epub.get_epub_info(tmp_file_path, original_file_name, original_file_extension)`
changes for #77 Code cosmetics #75: - More debug infos for kindlegen and sending e-mail. - Button for sending test e-mail. - timeout of 5min for sending e-mail 2016-12-23 08:53:39 +00:00			`if ".FB2" == original_file_extension.upper() and use_fb2_meta is True:`
Ensure file paths in uploads are non-empty, fix #183 (#184) * Ensure file paths in uploads are non-empty, fix #183 * Fix AttributeError in process(), handle strings of spaces 2017-04-23 06:22:10 +00:00			`meta = fb2.get_fb2_info(tmp_file_path, original_file_extension)`
Import covers from comic files during upload 2017-12-02 16:33:55 +00:00			`if original_file_extension.upper() in ['.CBZ', '.CBT']:`
			`meta = comic.get_comic_info(tmp_file_path, original_file_name, original_file_extension)`

Improved error handling for renaming authors and titles (changes related to filesystem and not only to database) 2017-11-30 15:49:46 +00:00			`except Exception as ex:`
			`logger.warning('cannot parse metadata, using default: %s', ex)`
Ensure file paths in uploads are non-empty, fix #183 (#184) * Ensure file paths in uploads are non-empty, fix #183 * Fix AttributeError in process(), handle strings of spaces 2017-04-23 06:22:10 +00:00
			`if meta and meta.title.strip() and meta.author.strip():`
			`return meta`
			`else:`
			`return default_meta(tmp_file_path, original_file_name, original_file_extension)`
default upload logic 2016-06-05 16:52:28 +00:00

			`def default_meta(tmp_file_path, original_file_name, original_file_extension):`
			`return uploader.BookMeta(`
changes for #77 Code cosmetics #75: - More debug infos for kindlegen and sending e-mail. - Button for sending test e-mail. - timeout of 5min for sending e-mail 2016-12-23 08:53:39 +00:00			`file_path=tmp_file_path,`
			`extension=original_file_extension,`
			`title=original_file_name,`
- added best rated section in normal view - added most downloaded section in opds view - imporved fb2 upload, correct handling of missing elements - author sort is set on editing and uploading files - Encoding stuff on uploading files 2017-02-04 13:28:18 +00:00			`author=u"Unknown",`
changes for #77 Code cosmetics #75: - More debug infos for kindlegen and sending e-mail. - Button for sending test e-mail. - timeout of 5min for sending e-mail 2016-12-23 08:53:39 +00:00			`cover=None,`
			`description="",`
			`tags="",`
			`series="",`
add languages field for BookMeta 2017-03-02 11:59:35 +00:00			`series_id="",`
Upload support detection of language 2017-03-02 14:57:02 +00:00			`languages="")`
refactoring to make adding new formats possible 2016-06-05 15:41:47 +00:00

			`def pdf_meta(tmp_file_path, original_file_name, original_file_extension):`
logging, tmp cleanup 2016-06-05 16:42:18 +00:00
changes for #77 Code cosmetics #75: - More debug infos for kindlegen and sending e-mail. - Button for sending test e-mail. - timeout of 5min for sending e-mail 2016-12-23 08:53:39 +00:00			`if use_pdf_meta:`
logging, tmp cleanup 2016-06-05 16:42:18 +00:00			`pdf = PdfFileReader(open(tmp_file_path, 'rb'))`
			`doc_info = pdf.getDocumentInfo()`
			`else:`
			`doc_info = None`

changes for #77 Code cosmetics #75: - More debug infos for kindlegen and sending e-mail. - Button for sending test e-mail. - timeout of 5min for sending e-mail 2016-12-23 08:53:39 +00:00			`if doc_info is not None:`
Ensure file paths in uploads are non-empty, fix #183 (#184) * Ensure file paths in uploads are non-empty, fix #183 * Fix AttributeError in process(), handle strings of spaces 2017-04-23 06:22:10 +00:00			`author = doc_info.author if doc_info.author else u"Unknown"`
			`title = doc_info.title if doc_info.title else original_file_name`
refactoring to make adding new formats possible 2016-06-05 15:41:47 +00:00			`subject = doc_info.subject`
			`else:`
- added best rated section in normal view - added most downloaded section in opds view - imporved fb2 upload, correct handling of missing elements - author sort is set on editing and uploading files - Encoding stuff on uploading files 2017-02-04 13:28:18 +00:00			`author = u"Unknown"`
refactoring to make adding new formats possible 2016-06-05 15:41:47 +00:00			`title = original_file_name`
			`subject = ""`
			`return uploader.BookMeta(`
changes for #77 Code cosmetics #75: - More debug infos for kindlegen and sending e-mail. - Button for sending test e-mail. - timeout of 5min for sending e-mail 2016-12-23 08:53:39 +00:00			`file_path=tmp_file_path,`
			`extension=original_file_extension,`
			`title=title,`
			`author=author,`
			`cover=pdf_preview(tmp_file_path, original_file_name),`
			`description=subject,`
			`tags="",`
			`series="",`
add languages field for BookMeta 2017-03-02 11:59:35 +00:00			`series_id="",`
Upload support detection of language 2017-03-02 14:57:02 +00:00			`languages="")`
refactoring to make adding new formats possible 2016-06-05 15:41:47 +00:00
changes for #77 Code cosmetics #75: - More debug infos for kindlegen and sending e-mail. - Button for sending test e-mail. - timeout of 5min for sending e-mail 2016-12-23 08:53:39 +00:00
refactoring to make adding new formats possible 2016-06-05 15:41:47 +00:00			`def pdf_preview(tmp_file_path, tmp_dir):`
			`if use_generic_pdf_cover:`
			`return None`
			`else:`
			`cover_file_name = os.path.splitext(tmp_file_path)[0] + ".cover.jpg"`
Added polish in readme to supported UI languages Handling of missing tags in fb import naming of path is more imitating calibre (replacement of special characters, "pinyining" of author names if unidecode is available ) Sorting of authors (similar to calibre for jr./sr./I..IV endings) bugfix pathseparator on windows and linux during upload bugfix os.rename for authordir publishing date on detailview is formated according to slected locale filename on downloading from web ui is now correct displayed added ids to html for testing 2017-02-15 17:09:17 +00:00			`with Image(filename=tmp_file_path + "[0]", resolution=150) as img:`
refactoring to make adding new formats possible 2016-06-05 15:41:47 +00:00			`img.compression_quality = 88`
			`img.save(filename=os.path.join(tmp_dir, cover_file_name))`
			`return cover_file_name`
changes for #77 Code cosmetics #75: - More debug infos for kindlegen and sending e-mail. - Button for sending test e-mail. - timeout of 5min for sending e-mail 2016-12-23 08:53:39 +00:00
Improved error handling for renaming authors and titles (changes related to filesystem and not only to database) 2017-11-30 15:49:46 +00:00
changes for #77 Code cosmetics #75: - More debug infos for kindlegen and sending e-mail. - Button for sending test e-mail. - timeout of 5min for sending e-mail 2016-12-23 08:53:39 +00:00			`def get_versions():`
			`if not use_generic_pdf_cover:`
Improved message for non configured ebook converter Gdrive cleared interface (drive variable) refactored web -> reverseproxy outsourced, edit_books split, removed unused md5 function rearanged imports added Wand and pytz Version info to stats Page 2018-09-30 07:43:20 +00:00			`IVersion = ImageVersion.MAGICK_VERSION`
			`WVersion = ImageVersion.VERSION`
changes for #77 Code cosmetics #75: - More debug infos for kindlegen and sending e-mail. - Button for sending test e-mail. - timeout of 5min for sending e-mail 2016-12-23 08:53:39 +00:00			`else:`
Fix #544 (missed some config options) Added lxml to stats sceen 2018-07-15 14:27:16 +00:00			`IVersion = _(u'not installed')`
Improved message for non configured ebook converter Gdrive cleared interface (drive variable) refactored web -> reverseproxy outsourced, edit_books split, removed unused md5 function rearanged imports added Wand and pytz Version info to stats Page 2018-09-30 07:43:20 +00:00			`WVersion = _(u'not installed')`
changes for #77 Code cosmetics #75: - More debug infos for kindlegen and sending e-mail. - Button for sending test e-mail. - timeout of 5min for sending e-mail 2016-12-23 08:53:39 +00:00			`if use_pdf_meta:`
Code cleaning Stats page Enable calibre's ebook-convert as converter for mobi files (#411, #533) 2018-07-18 18:21:44 +00:00			`PVersion='v'+PyPdfVersion`
changes for #77 Code cosmetics #75: - More debug infos for kindlegen and sending e-mail. - Button for sending test e-mail. - timeout of 5min for sending e-mail 2016-12-23 08:53:39 +00:00			`else:`
- added best rated section in normal view - added most downloaded section in opds view - imporved fb2 upload, correct handling of missing elements - author sort is set on editing and uploading files - Encoding stuff on uploading files 2017-02-04 13:28:18 +00:00			`PVersion=_(u'not installed')`
Fix #544 (missed some config options) Added lxml to stats sceen 2018-07-15 14:27:16 +00:00			`if lxmlversion:`
Code cleaning Stats page Enable calibre's ebook-convert as converter for mobi files (#411, #533) 2018-07-18 18:21:44 +00:00			`XVersion = 'v'+'.'.join(map(str, lxmlversion))`
Fix #544 (missed some config options) Added lxml to stats sceen 2018-07-15 14:27:16 +00:00			`else:`
			`XVersion = _(u'not installed')`
Improved message for non configured ebook converter Gdrive cleared interface (drive variable) refactored web -> reverseproxy outsourced, edit_books split, removed unused md5 function rearanged imports added Wand and pytz Version info to stats Page 2018-09-30 07:43:20 +00:00			`return {'Image Magick': IVersion, 'PyPdf': PVersion, 'lxml':XVersion, 'Wand Version': WVersion}`