diff --git a/cps/book_formats.py b/cps/book_formats.py new file mode 100644 index 00000000..f88faf52 --- /dev/null +++ b/cps/book_formats.py @@ -0,0 +1,99 @@ +__author__ = 'lemmsh' + +import logging +logger = logging.getLogger("book_formats") + +import uploader +import os +try: + from wand.image import Image + use_generic_pdf_cover = False +except ImportError, e: + logger.warning('cannot import Image, generating pdf covers for pdf uploads will not work: %s', e) + use_generic_pdf_cover = True +try: + from PyPDF2 import PdfFileReader + use_pdf_meta = True +except ImportError, e: + logger.warning('cannot import PyPDF2, extracting pdf metadata will not work: %s', e) + use_pdf_meta = False + +try: + import epub + use_epub_meta = True +except ImportError, e: + logger.warning('cannot import epub, extracting epub metadata will not work: %s', e) + use_epub_meta = False + +try: + import fb2 + use_fb2_meta = True +except ImportError, e: + logger.warning('cannot import fb2, extracting fb2 metadata will not work: %s', e) + use_fb2_meta = False + + +def process(tmp_file_path, original_file_name, original_file_extension): + try: + if ".PDF" == original_file_extension.upper(): + return pdf_meta(tmp_file_path, original_file_name, original_file_extension) + if ".EPUB" == original_file_extension.upper() and use_epub_meta == True: + return epub.get_epub_info(tmp_file_path, original_file_name, original_file_extension) + if ".FB2" == original_file_extension.upper() and use_fb2_meta == True: + return fb2.get_fb2_info(tmp_file_path, original_file_name, original_file_extension) + except Exception, e: + logger.warning('cannot parse metadata, using default: %s', e) + + return default_meta(tmp_file_path, original_file_name, original_file_extension) + + + +def default_meta(tmp_file_path, original_file_name, original_file_extension): + return uploader.BookMeta( + file_path = tmp_file_path, + extension = original_file_extension, + title = original_file_name, + author = "Unknown", + cover = None, + description = "", + tags = "", + series = "", + series_id="") + + +def pdf_meta(tmp_file_path, original_file_name, original_file_extension): + + if (use_pdf_meta): + pdf = PdfFileReader(open(tmp_file_path, 'rb')) + doc_info = pdf.getDocumentInfo() + else: + doc_info = None + + if (doc_info is not None): + author = doc_info.author + title = doc_info.title + subject = doc_info.subject + else: + author = "Unknown" + title = original_file_name + subject = "" + return uploader.BookMeta( + file_path = tmp_file_path, + extension = original_file_extension, + title = title, + author = author, + cover = pdf_preview(tmp_file_path, original_file_name), + description = subject, + tags = "", + series = "", + series_id="") + +def pdf_preview(tmp_file_path, tmp_dir): + if use_generic_pdf_cover: + return None + else: + cover_file_name = os.path.splitext(tmp_file_path)[0] + ".cover.jpg" + with Image(filename=tmp_file_path + "[0]", resolution=150) as img: + img.compression_quality = 88 + img.save(filename=os.path.join(tmp_dir, cover_file_name)) + return cover_file_name diff --git a/cps/epub.py b/cps/epub.py new file mode 100644 index 00000000..03fb30bb --- /dev/null +++ b/cps/epub.py @@ -0,0 +1,67 @@ +import zipfile +from lxml import etree +import os +import uploader + +def extractCover(zip, coverFile, tmp_file_name): + if (coverFile is None): + return None + else: + cf = zip.read("OPS/" + coverFile) + prefix = os.path.splitext(tmp_file_name)[0] + tmp_cover_name = prefix + "." + coverFile + image = open(tmp_cover_name, 'wb') + image.write(cf) + image.close() + return tmp_cover_name + + + +def get_epub_info(tmp_file_path, original_file_name, original_file_extension): + ns = { + 'n':'urn:oasis:names:tc:opendocument:xmlns:container', + 'pkg':'http://www.idpf.org/2007/opf', + 'dc':'http://purl.org/dc/elements/1.1/' + } + + zip = zipfile.ZipFile(tmp_file_path) + + txt = zip.read('META-INF/container.xml') + tree = etree.fromstring(txt) + cfname = tree.xpath('n:rootfiles/n:rootfile/@full-path',namespaces=ns)[0] + + cf = zip.read(cfname) + tree = etree.fromstring(cf) + + p = tree.xpath('/pkg:package/pkg:metadata',namespaces=ns)[0] + + epub_metadata = {} + for s in ['title', 'description', 'creator']: + tmp = p.xpath('dc:%s/text()'%(s),namespaces=ns) + if (len(tmp) > 0): + epub_metadata[s] = p.xpath('dc:%s/text()'%(s),namespaces=ns)[0] + else: + epub_metadata[s] = "Unknown" + + coversection = tree.xpath("/pkg:package/pkg:manifest/pkg:item[@id='cover']/@href",namespaces=ns) + if (len(coversection) > 0): + coverfile = extractCover(zip, coversection[0], tmp_file_path) + else: + coverfile = None + if epub_metadata['title'] is None: + title = original_file_name + else: + title = epub_metadata['title'] + + + return uploader.BookMeta( + file_path = tmp_file_path, + extension = original_file_extension, + title = title, + author = epub_metadata['creator'], + cover = coverfile, + description = epub_metadata['description'], + tags = "", + series = "", + series_id="") + diff --git a/cps/fb2.py b/cps/fb2.py new file mode 100644 index 00000000..79449c4d --- /dev/null +++ b/cps/fb2.py @@ -0,0 +1,35 @@ + +from lxml import etree +import os +import uploader + + +def get_fb2_info(tmp_file_path, original_file_name, original_file_extension): + + ns = { + 'fb':'http://www.gribuser.ru/xml/fictionbook/2.0', + 'l':'http://www.w3.org/1999/xlink', + } + + fb2_file = open(tmp_file_path) + tree = etree.fromstring(fb2_file.read()) + + authors = tree.xpath('/fb:FictionBook/fb:description/fb:title-info/fb:author', namespaces=ns) + def get_author(element): + return element.xpath('fb:first-name/text()', namespaces=ns)[0] + ' ' + element.xpath('fb:middle-name/text()', namespaces=ns)[0] + ' ' + element.xpath('fb:last-name/text()', namespaces=ns)[0] + author = ", ".join(map(get_author, authors)) + + title = unicode(tree.xpath('/fb:FictionBook/fb:description/fb:title-info/fb:book-title/text()', namespaces=ns)[0]) + description = unicode(tree.xpath('/fb:FictionBook/fb:description/fb:publish-info/fb:book-name/text()', namespaces=ns)[0]) + + return uploader.BookMeta( + file_path = tmp_file_path, + extension = original_file_extension, + title = title, + author = author, + cover = None, + description = description, + tags = "", + series = "", + series_id="") + diff --git a/cps/uploader.py b/cps/uploader.py new file mode 100644 index 00000000..73d7f538 --- /dev/null +++ b/cps/uploader.py @@ -0,0 +1,30 @@ +import os +import hashlib +from collections import namedtuple +import book_formats + + +tmp_dir = "/tmp/calibre-web" + +BookMeta = namedtuple('BookMeta', 'file_path, extension, title, author, cover, description, tags, series, series_id') + + +""" + :rtype: BookMeta +""" +def upload(file): + if not os.path.isdir(tmp_dir): + os.mkdir(tmp_dir) + + filename = file.filename + filename_root, file_extension = os.path.splitext(filename) + md5 = hashlib.md5() + md5.update(filename) + tmp_file_path = os.path.join(tmp_dir, md5.hexdigest()) + file.save(tmp_file_path) + meta = book_formats.process(tmp_file_path, filename_root, file_extension) + return meta + + + + diff --git a/cps/web.py b/cps/web.py index b21cc486..1ee62b5d 100755 --- a/cps/web.py +++ b/cps/web.py @@ -25,11 +25,6 @@ from sqlalchemy.sql import * import json import datetime from uuid import uuid4 -try: - from wand.image import Image - use_generic_pdf_cover = False -except ImportError, e: - use_generic_pdf_cover = True from shutil import copyfile class ReverseProxied(object): @@ -78,6 +73,9 @@ file_handler.setLevel(logging.INFO) file_handler.setFormatter(formatter) app.logger.addHandler(file_handler) app.logger.info('Starting Calibre Web...') +logging.getLogger("book_formats").addHandler(file_handler) +logging.getLogger("book_formats").setLevel(logging.INFO) + Principal(app) @@ -1123,6 +1121,9 @@ def edit_book(book_id): else: return render_template('edit_book.html', book=book, authors=author_names, cc=cc) +import uploader +from shutil import move + @app.route("/upload", methods = ["GET", "POST"]) @login_required @upload_required @@ -1134,20 +1135,16 @@ def upload(): db.session.connection().connection.connection.create_function('uuid4', 0, lambda : str(uuid4())) if request.method == 'POST' and 'btn-upload' in request.files: file = request.files['btn-upload'] - filename = file.filename - filename_root, fileextension = os.path.splitext(filename) - if fileextension.upper() == ".PDF": - title = filename_root - author = "Unknown" - else: - flash("Upload is only available for PDF files", category="error") - return redirect(url_for('index', _external=True)) - + meta = uploader.upload(file) + + title = meta.title + author = meta.author + title_dir = helper.get_valid_filename(title, False) author_dir = helper.get_valid_filename(author.decode('utf-8'), False) data_name = title_dir filepath = config.DB_ROOT + "/" + author_dir + "/" + title_dir - saved_filename = filepath + "/" + data_name + fileextension + saved_filename = filepath + "/" + data_name + meta.extension if not os.path.exists(filepath): try: os.makedirs(filepath) @@ -1155,21 +1152,20 @@ def upload(): flash("Failed to create path %s (Permission denied)." % filepath, category="error") return redirect(url_for('index', _external=True)) try: - file.save(saved_filename) + move(meta.file_path, saved_filename) except OSError: flash("Failed to store file %s (Permission denied)." % saved_filename, category="error") return redirect(url_for('index', _external=True)) + file_size = os.path.getsize(saved_filename) - has_cover = 0 - if fileextension.upper() == ".PDF": - if use_generic_pdf_cover: - basedir = os.path.dirname(__file__) - copyfile(os.path.join(basedir, "static/generic_cover.jpg"), os.path.join(filepath, "cover.jpg")) - else: - with Image(filename=saved_filename + "[0]", resolution=150) as img: - img.compression_quality = 88 - img.save(filename=os.path.join(filepath, "cover.jpg")) - has_cover = 1 + if meta.cover is None: + has_cover = 0 + basedir = os.path.dirname(__file__) + copyfile(os.path.join(basedir, "static/generic_cover.jpg"), os.path.join(filepath, "cover.jpg")) + else: + has_cover = 1 + move(meta.cover, os.path.join(filepath, "cover.jpg")) + is_author = db.session.query(db.Authors).filter(db.Authors.name == author).first() if is_author: db_author = is_author @@ -1179,7 +1175,7 @@ def upload(): path = os.path.join(author_dir, title_dir) db_book = db.Books(title, "", "", datetime.datetime.now(), datetime.datetime(101, 01,01), 1, datetime.datetime.now(), path, has_cover, db_author, []) db_book.authors.append(db_author) - db_data = db.Data(db_book, fileextension.upper()[1:], file_size, data_name) + db_data = db.Data(db_book, meta.extension.upper()[1:], file_size, data_name) db_book.data.append(db_data) db.session.add(db_book)