diff --git a/DeDRM_Macintosh_Application/DeDRM.app/Contents/Info.plist b/DeDRM_Macintosh_Application/DeDRM.app/Contents/Info.plist index 9a030d4..cfd9fcc 100644 --- a/DeDRM_Macintosh_Application/DeDRM.app/Contents/Info.plist +++ b/DeDRM_Macintosh_Application/DeDRM.app/Contents/Info.plist @@ -24,19 +24,19 @@ CFBundleExecutable droplet CFBundleGetInfoString - DeDRM AppleScript 6.0.7. Written 2010–2013 by Apprentice Alf and others. + DeDRM AppleScript 6.0.8. Written 2010–2013 by Apprentice Alf and others. CFBundleIconFile DeDRM CFBundleIdentifier com.apple.ScriptEditor.id.707CCCD5-0C6C-4BEB-B67C-B6E866ADE85A CFBundleInfoDictionaryVersion - 6.0.7 + 6.0.8 CFBundleName DeDRM CFBundlePackageType APPL CFBundleShortVersionString - 6.0.7 + 6.0.8 CFBundleSignature dplt LSRequiresCarbon diff --git a/DeDRM_Macintosh_Application/DeDRM.app/Contents/Resources/__init__.py b/DeDRM_Macintosh_Application/DeDRM.app/Contents/Resources/__init__.py index caed6e8..37d4cb1 100644 --- a/DeDRM_Macintosh_Application/DeDRM.app/Contents/Resources/__init__.py +++ b/DeDRM_Macintosh_Application/DeDRM.app/Contents/Resources/__init__.py @@ -31,14 +31,17 @@ __docformat__ = 'restructuredtext en' # 6.0.3 - Fixes for Kindle for Mac and Windows non-ascii user names # 6.0.4 - Fixes for stand-alone scripts and applications # and pdb files in plugin and initial conversion of prefs. +# 6.0.5 - Fix a key issue # 6.0.6 - Fix up an incorrect function call +# 6.0.7 - Error handling for incomplete PDF metadata +# 6.0.8 - Fixes a Wine key issue and topaz support """ Decrypt DRMed ebooks. """ PLUGIN_NAME = u"DeDRM" -PLUGIN_VERSION_TUPLE = (6, 0, 7) +PLUGIN_VERSION_TUPLE = (6, 0, 8) PLUGIN_VERSION = u".".join([unicode(str(x)) for x in PLUGIN_VERSION_TUPLE]) # Include an html helpfile in the plugin's zipfile with the following name. RESOURCE_NAME = PLUGIN_NAME + '_Help.htm' @@ -313,7 +316,7 @@ class DeDRM(FileTypePlugin): from wineutils import WineGetKeys scriptpath = os.path.join(self.alfdir,u"adobekey.py") - defaultkeys = self.WineGetKeys(scriptpath, u".der",dedrmprefs['adobewineprefix']) + defaultkeys = WineGetKeys(scriptpath, u".der",dedrmprefs['adobewineprefix']) except: pass @@ -391,7 +394,7 @@ class DeDRM(FileTypePlugin): from wineutils import WineGetKeys scriptpath = os.path.join(self.alfdir,u"kindlekey.py") - defaultkeys = self.WineGetKeys(scriptpath, u".k4i",dedrmprefs['kindlewineprefix']) + defaultkeys = WineGetKeys(scriptpath, u".k4i",dedrmprefs['kindlewineprefix']) except: pass diff --git a/DeDRM_Macintosh_Application/DeDRM.app/Contents/Resources/android.py b/DeDRM_Macintosh_Application/DeDRM.app/Contents/Resources/android.py new file mode 100644 index 0000000..ddb94f5 --- /dev/null +++ b/DeDRM_Macintosh_Application/DeDRM.app/Contents/Resources/android.py @@ -0,0 +1,157 @@ +#!/usr/bin/env python +#fileencoding: utf-8 + +import os +import sys +import zlib +import tarfile +from hashlib import md5 +from cStringIO import StringIO +from binascii import a2b_hex, b2a_hex + +STORAGE = 'AmazonSecureStorage.xml' + +class AndroidObfuscation(object): + '''AndroidObfuscation + For the key, it's written in java, and run in android dalvikvm + ''' + + key = a2b_hex('0176e04c9408b1702d90be333fd53523') + + def encrypt(self, plaintext): + cipher = self._get_cipher() + padding = len(self.key) - len(plaintext) % len(self.key) + plaintext += chr(padding) * padding + return b2a_hex(cipher.encrypt(plaintext)) + + def decrypt(self, ciphertext): + cipher = self._get_cipher() + plaintext = cipher.decrypt(a2b_hex(ciphertext)) + return plaintext[:-ord(plaintext[-1])] + + def _get_cipher(self): + try: + from Crypto.Cipher import AES + return AES.new(self.key) + except ImportError: + from aescbc import AES, noPadding + return AES(self.key, padding=noPadding()) + +class AndroidObfuscationV2(AndroidObfuscation): + '''AndroidObfuscationV2 + ''' + + count = 503 + password = 'Thomsun was here!' + + def __init__(self, salt): + key = self.password + salt + for _ in range(self.count): + key = md5(key).digest() + self.key = key[:8] + self.iv = key[8:16] + + def _get_cipher(self): + try : + from Crypto.Cipher import DES + return DES.new(self.key, DES.MODE_CBC, self.iv) + except ImportError: + from python_des import Des, CBC + return Des(self.key, CBC, self.iv) + +def parse_preference(path): + ''' parse android's shared preference xml ''' + storage = {} + read = open(path) + for line in read: + line = line.strip() + # value + if line.startswith(' adb backup com.amazon.kindle + ''' + output = None + read = open(path, 'rb') + head = read.read(24) + if head == 'ANDROID BACKUP\n1\n1\nnone\n': + output = StringIO(zlib.decompress(read.read())) + read.close() + + if not output: + return False + + tar = tarfile.open(fileobj=output) + for member in tar.getmembers(): + if member.name.strip().endswith(STORAGE): + write = open(STORAGE, 'w') + write.write(tar.extractfile(member).read()) + write.close() + break + + return True + +__all__ = [ 'get_storage', 'get_serials', 'parse_preference', + 'AndroidObfuscation', 'AndroidObfuscationV2', 'STORAGE'] + +if __name__ == '__main__': + print get_serials() \ No newline at end of file diff --git a/DeDRM_Macintosh_Application/DeDRM.app/Contents/Resources/android_readme.txt b/DeDRM_Macintosh_Application/DeDRM.app/Contents/Resources/android_readme.txt new file mode 100644 index 0000000..9e7d035 --- /dev/null +++ b/DeDRM_Macintosh_Application/DeDRM.app/Contents/Resources/android_readme.txt @@ -0,0 +1,6 @@ +1.1 get AmazonSecureStorage.xml from /data/data/com.amazon.kindle/shared_prefs/AmazonSecureStorage.xml + +1.2 on android 4.0+, run `adb backup com.amazon.kindle` from PC will get backup.ab + now android.py can convert backup.ab to AmazonSecureStorage.xml + +2. run `k4mobidedrm.py -a AmazonSecureStorage.xml ' diff --git a/DeDRM_Macintosh_Application/DeDRM.app/Contents/Resources/flatxml2html.py b/DeDRM_Macintosh_Application/DeDRM.app/Contents/Resources/flatxml2html.py index 4d83368..991591b 100644 --- a/DeDRM_Macintosh_Application/DeDRM.app/Contents/Resources/flatxml2html.py +++ b/DeDRM_Macintosh_Application/DeDRM.app/Contents/Resources/flatxml2html.py @@ -458,7 +458,11 @@ class DocParser(object): (wtype, num) = pdesc[j] if wtype == 'ocr' : - word = self.ocrtext[num] + try: + word = self.ocrtext[num] + except: + word = "" + sep = ' ' if handle_links: diff --git a/DeDRM_Macintosh_Application/DeDRM.app/Contents/Resources/k4mobidedrm.py b/DeDRM_Macintosh_Application/DeDRM.app/Contents/Resources/k4mobidedrm.py index 929ce57..504105b 100644 --- a/DeDRM_Macintosh_Application/DeDRM.app/Contents/Resources/k4mobidedrm.py +++ b/DeDRM_Macintosh_Application/DeDRM.app/Contents/Resources/k4mobidedrm.py @@ -80,10 +80,12 @@ if inCalibre: from calibre_plugins.dedrm import mobidedrm from calibre_plugins.dedrm import topazextract from calibre_plugins.dedrm import kgenpids + from calibre_plugins.dedrm import android else: import mobidedrm import topazextract import kgenpids + import android # Wrap a stream so that output gets flushed immediately # and also make sure that any unicode strings get @@ -273,7 +275,7 @@ def decryptBook(infile, outdir, kDatabaseFiles, serials, pids): def usage(progname): print u"Removes DRM protection from Mobipocket, Amazon KF8, Amazon Print Replica and Amazon Topaz ebooks" print u"Usage:" - print u" {0} [-k ] [-p ] [-s ] ".format(progname) + print u" {0} [-k ] [-p ] [-s ] [ -a ] ".format(progname) # # Main @@ -284,7 +286,7 @@ def cli_main(): print u"K4MobiDeDrm v{0}.\nCopyright © 2008-2013 The Dark Reverser et al.".format(__version__) try: - opts, args = getopt.getopt(argv[1:], "k:p:s:") + opts, args = getopt.getopt(argv[1:], "k:p:s:a:") except getopt.GetoptError, err: print u"Error in options or arguments: {0}".format(err.args[0]) usage(progname) @@ -312,6 +314,11 @@ def cli_main(): if a == None : raise DrmException("Invalid parameter for -s") serials = a.split(',') + if o == '-a': + if a == None: + continue + serials.extend(android.get_serials(a)) + serials.extend(android.get_serials()) # try with built in Kindle Info files if not on Linux k4 = not sys.platform.startswith('linux') diff --git a/DeDRM_Macintosh_Application/DeDRM.app/Contents/Resources/kindlekey.py b/DeDRM_Macintosh_Application/DeDRM.app/Contents/Resources/kindlekey.py index f58e973..8852769 100644 --- a/DeDRM_Macintosh_Application/DeDRM.app/Contents/Resources/kindlekey.py +++ b/DeDRM_Macintosh_Application/DeDRM.app/Contents/Resources/kindlekey.py @@ -19,6 +19,7 @@ from __future__ import with_statement # 1.6 - Fixed a problem getting the disk serial numbers # 1.7 - Work if TkInter is missing # 1.8 - Fixes for Kindle for Mac, and non-ascii in Windows user names +# 1.9 - Fixes for Unicode in Windows user names """ @@ -26,7 +27,7 @@ Retrieve Kindle for PC/Mac user key. """ __license__ = 'GPL v3' -__version__ = '1.8' +__version__ = '1.9' import sys, os, re from struct import pack, unpack, unpack_from @@ -907,18 +908,34 @@ if iswindows: return CryptUnprotectData CryptUnprotectData = CryptUnprotectData() + # Returns Environmental Variables that contain unicode + def getEnvironmentVariable(name): + import ctypes + name = unicode(name) # make sure string argument is unicode + n = ctypes.windll.kernel32.GetEnvironmentVariableW(name, None, 0) + if n == 0: + return None + buf = ctypes.create_unicode_buffer(u'\0'*n) + ctypes.windll.kernel32.GetEnvironmentVariableW(name, buf, n) + return buf.value # Locate all of the kindle-info style files and return as list def getKindleInfoFiles(): kInfoFiles = [] # some 64 bit machines do not have the proper registry key for some reason - # or the pythonn interface to the 32 vs 64 bit registry is broken + # or the python interface to the 32 vs 64 bit registry is broken path = "" if 'LOCALAPPDATA' in os.environ.keys(): - path = os.environ['LOCALAPPDATA'] + # Python 2.x does not return unicode env. Use Python 3.x + path = winreg.ExpandEnvironmentStrings(u"%LOCALAPPDATA%") + # this is just another alternative. + # path = getEnvironmentVariable('LOCALAPPDATA') + if not os.path.isdir(path): + path = "" else: # User Shell Folders show take precedent over Shell Folders if present try: + # this will still break regkey = winreg.OpenKey(winreg.HKEY_CURRENT_USER, "Software\\Microsoft\\Windows\\CurrentVersion\\Explorer\\User Shell Folders\\") path = winreg.QueryValueEx(regkey, 'Local AppData')[0] if not os.path.isdir(path): @@ -937,13 +954,14 @@ if iswindows: if path == "": print ('Could not find the folder in which to look for kinfoFiles.') else: - print('searching for kinfoFiles in ' + path) + # Probably not the best. To Fix (shouldn't ignore in encoding) or use utf-8 + print(u'searching for kinfoFiles in ' + path.encode('ascii', 'ignore')) # look for (K4PC 1.9.0 and later) .kinf2011 file kinfopath = path +'\\Amazon\\Kindle\\storage\\.kinf2011' if os.path.isfile(kinfopath): found = True - print('Found K4PC 1.9+ kinf2011 file: ' + kinfopath) + print('Found K4PC 1.9+ kinf2011 file: ' + kinfopath.encode('ascii','ignore')) kInfoFiles.append(kinfopath) # look for (K4PC 1.6.0 and later) rainier.2.1.1.kinf file @@ -1142,7 +1160,7 @@ if iswindows: cleartext = CryptUnprotectData(encryptedValue, entropy, 1) DB[keyname] = cleartext - if 'MazamaRandomNumber' in DB and 'kindle.account.tokens' in DB: + if 'kindle.account.tokens' in DB: print u"Decrypted key file using IDString '{0:s}' and UserName '{1:s}'".format(GetIDString(), GetUserName().decode("latin-1")) # store values used in decryption DB['IDString'] = GetIDString() @@ -1758,7 +1776,7 @@ elif isosx: break except: pass - if 'MazamaRandomNumber' in DB and 'kindle.account.tokens' in DB: + if 'kindle.account.tokens' in DB: # store values used in decryption print u"Decrypted key file using IDString '{0:s}' and UserName '{1:s}'".format(IDString, GetUserName()) DB['IDString'] = IDString diff --git a/DeDRM_Macintosh_Application/DeDRM.app/Contents/Resources/mobidedrm.py b/DeDRM_Macintosh_Application/DeDRM.app/Contents/Resources/mobidedrm.py index 7b69edc..89cc695 100644 --- a/DeDRM_Macintosh_Application/DeDRM.app/Contents/Resources/mobidedrm.py +++ b/DeDRM_Macintosh_Application/DeDRM.app/Contents/Resources/mobidedrm.py @@ -156,6 +156,8 @@ def PC1(key, src, decryption=True): return Pukall_Cipher().PC1(key,src,decryption) except NameError: pass + except TypeError: + pass # use slow python version, since Pukall_Cipher didn't load sum1 = 0; diff --git a/DeDRM_Macintosh_Application/DeDRM.app/Contents/Resources/stylexml2css.py b/DeDRM_Macintosh_Application/DeDRM.app/Contents/Resources/stylexml2css.py index c111850..daa108a 100644 --- a/DeDRM_Macintosh_Application/DeDRM.app/Contents/Resources/stylexml2css.py +++ b/DeDRM_Macintosh_Application/DeDRM.app/Contents/Resources/stylexml2css.py @@ -178,7 +178,12 @@ class DocParser(object): if val == "": val = 0 - if not ((attr == 'hang') and (int(val) == 0)) : + if not ((attr == 'hang') and (int(val) == 0)): + try: + f = float(val) + except: + print "Warning: unrecognised val, ignoring" + val = 0 pv = float(val)/scale cssargs[attr] = (self.attr_val_map[attr], pv) keep = True diff --git a/DeDRM_Macintosh_Application/DeDRM.app/Contents/Resources/topazextract.py b/DeDRM_Macintosh_Application/DeDRM.app/Contents/Resources/topazextract.py index 97f6583..fb5eb7a 100644 --- a/DeDRM_Macintosh_Application/DeDRM.app/Contents/Resources/topazextract.py +++ b/DeDRM_Macintosh_Application/DeDRM.app/Contents/Resources/topazextract.py @@ -356,7 +356,7 @@ class TopazBook: self.setBookKey(bookKey) self.createBookDirectory() - self.extractFiles() + self.extractFiles() print u"Successfully Extracted Topaz contents" if inCalibre: from calibre_plugins.dedrm import genbook diff --git a/DeDRM_Windows_Application/DeDRM_App/DeDRM_lib/DeDRM_app.pyw b/DeDRM_Windows_Application/DeDRM_App/DeDRM_lib/DeDRM_app.pyw index e73226b..7225b6d 100644 --- a/DeDRM_Windows_Application/DeDRM_App/DeDRM_lib/DeDRM_app.pyw +++ b/DeDRM_Windows_Application/DeDRM_App/DeDRM_lib/DeDRM_app.pyw @@ -12,7 +12,7 @@ # 6.0.4 - Fix for other potential unicode problems # 6.0.5 - Fix typo -__version__ = '6.0.7' +__version__ = '6.0.8' import sys import os, os.path diff --git a/DeDRM_Windows_Application/DeDRM_App/DeDRM_lib/lib/__init__.py b/DeDRM_Windows_Application/DeDRM_App/DeDRM_lib/lib/__init__.py index caed6e8..37d4cb1 100644 --- a/DeDRM_Windows_Application/DeDRM_App/DeDRM_lib/lib/__init__.py +++ b/DeDRM_Windows_Application/DeDRM_App/DeDRM_lib/lib/__init__.py @@ -31,14 +31,17 @@ __docformat__ = 'restructuredtext en' # 6.0.3 - Fixes for Kindle for Mac and Windows non-ascii user names # 6.0.4 - Fixes for stand-alone scripts and applications # and pdb files in plugin and initial conversion of prefs. +# 6.0.5 - Fix a key issue # 6.0.6 - Fix up an incorrect function call +# 6.0.7 - Error handling for incomplete PDF metadata +# 6.0.8 - Fixes a Wine key issue and topaz support """ Decrypt DRMed ebooks. """ PLUGIN_NAME = u"DeDRM" -PLUGIN_VERSION_TUPLE = (6, 0, 7) +PLUGIN_VERSION_TUPLE = (6, 0, 8) PLUGIN_VERSION = u".".join([unicode(str(x)) for x in PLUGIN_VERSION_TUPLE]) # Include an html helpfile in the plugin's zipfile with the following name. RESOURCE_NAME = PLUGIN_NAME + '_Help.htm' @@ -313,7 +316,7 @@ class DeDRM(FileTypePlugin): from wineutils import WineGetKeys scriptpath = os.path.join(self.alfdir,u"adobekey.py") - defaultkeys = self.WineGetKeys(scriptpath, u".der",dedrmprefs['adobewineprefix']) + defaultkeys = WineGetKeys(scriptpath, u".der",dedrmprefs['adobewineprefix']) except: pass @@ -391,7 +394,7 @@ class DeDRM(FileTypePlugin): from wineutils import WineGetKeys scriptpath = os.path.join(self.alfdir,u"kindlekey.py") - defaultkeys = self.WineGetKeys(scriptpath, u".k4i",dedrmprefs['kindlewineprefix']) + defaultkeys = WineGetKeys(scriptpath, u".k4i",dedrmprefs['kindlewineprefix']) except: pass diff --git a/DeDRM_Windows_Application/DeDRM_App/DeDRM_lib/lib/android.py b/DeDRM_Windows_Application/DeDRM_App/DeDRM_lib/lib/android.py new file mode 100644 index 0000000..ddb94f5 --- /dev/null +++ b/DeDRM_Windows_Application/DeDRM_App/DeDRM_lib/lib/android.py @@ -0,0 +1,157 @@ +#!/usr/bin/env python +#fileencoding: utf-8 + +import os +import sys +import zlib +import tarfile +from hashlib import md5 +from cStringIO import StringIO +from binascii import a2b_hex, b2a_hex + +STORAGE = 'AmazonSecureStorage.xml' + +class AndroidObfuscation(object): + '''AndroidObfuscation + For the key, it's written in java, and run in android dalvikvm + ''' + + key = a2b_hex('0176e04c9408b1702d90be333fd53523') + + def encrypt(self, plaintext): + cipher = self._get_cipher() + padding = len(self.key) - len(plaintext) % len(self.key) + plaintext += chr(padding) * padding + return b2a_hex(cipher.encrypt(plaintext)) + + def decrypt(self, ciphertext): + cipher = self._get_cipher() + plaintext = cipher.decrypt(a2b_hex(ciphertext)) + return plaintext[:-ord(plaintext[-1])] + + def _get_cipher(self): + try: + from Crypto.Cipher import AES + return AES.new(self.key) + except ImportError: + from aescbc import AES, noPadding + return AES(self.key, padding=noPadding()) + +class AndroidObfuscationV2(AndroidObfuscation): + '''AndroidObfuscationV2 + ''' + + count = 503 + password = 'Thomsun was here!' + + def __init__(self, salt): + key = self.password + salt + for _ in range(self.count): + key = md5(key).digest() + self.key = key[:8] + self.iv = key[8:16] + + def _get_cipher(self): + try : + from Crypto.Cipher import DES + return DES.new(self.key, DES.MODE_CBC, self.iv) + except ImportError: + from python_des import Des, CBC + return Des(self.key, CBC, self.iv) + +def parse_preference(path): + ''' parse android's shared preference xml ''' + storage = {} + read = open(path) + for line in read: + line = line.strip() + # value + if line.startswith(' adb backup com.amazon.kindle + ''' + output = None + read = open(path, 'rb') + head = read.read(24) + if head == 'ANDROID BACKUP\n1\n1\nnone\n': + output = StringIO(zlib.decompress(read.read())) + read.close() + + if not output: + return False + + tar = tarfile.open(fileobj=output) + for member in tar.getmembers(): + if member.name.strip().endswith(STORAGE): + write = open(STORAGE, 'w') + write.write(tar.extractfile(member).read()) + write.close() + break + + return True + +__all__ = [ 'get_storage', 'get_serials', 'parse_preference', + 'AndroidObfuscation', 'AndroidObfuscationV2', 'STORAGE'] + +if __name__ == '__main__': + print get_serials() \ No newline at end of file diff --git a/DeDRM_Windows_Application/DeDRM_App/DeDRM_lib/lib/android_readme.txt b/DeDRM_Windows_Application/DeDRM_App/DeDRM_lib/lib/android_readme.txt new file mode 100644 index 0000000..9e7d035 --- /dev/null +++ b/DeDRM_Windows_Application/DeDRM_App/DeDRM_lib/lib/android_readme.txt @@ -0,0 +1,6 @@ +1.1 get AmazonSecureStorage.xml from /data/data/com.amazon.kindle/shared_prefs/AmazonSecureStorage.xml + +1.2 on android 4.0+, run `adb backup com.amazon.kindle` from PC will get backup.ab + now android.py can convert backup.ab to AmazonSecureStorage.xml + +2. run `k4mobidedrm.py -a AmazonSecureStorage.xml ' diff --git a/DeDRM_Windows_Application/DeDRM_App/DeDRM_lib/lib/flatxml2html.py b/DeDRM_Windows_Application/DeDRM_App/DeDRM_lib/lib/flatxml2html.py index 4d83368..991591b 100644 --- a/DeDRM_Windows_Application/DeDRM_App/DeDRM_lib/lib/flatxml2html.py +++ b/DeDRM_Windows_Application/DeDRM_App/DeDRM_lib/lib/flatxml2html.py @@ -458,7 +458,11 @@ class DocParser(object): (wtype, num) = pdesc[j] if wtype == 'ocr' : - word = self.ocrtext[num] + try: + word = self.ocrtext[num] + except: + word = "" + sep = ' ' if handle_links: diff --git a/DeDRM_Windows_Application/DeDRM_App/DeDRM_lib/lib/k4mobidedrm.py b/DeDRM_Windows_Application/DeDRM_App/DeDRM_lib/lib/k4mobidedrm.py index 929ce57..504105b 100644 --- a/DeDRM_Windows_Application/DeDRM_App/DeDRM_lib/lib/k4mobidedrm.py +++ b/DeDRM_Windows_Application/DeDRM_App/DeDRM_lib/lib/k4mobidedrm.py @@ -80,10 +80,12 @@ if inCalibre: from calibre_plugins.dedrm import mobidedrm from calibre_plugins.dedrm import topazextract from calibre_plugins.dedrm import kgenpids + from calibre_plugins.dedrm import android else: import mobidedrm import topazextract import kgenpids + import android # Wrap a stream so that output gets flushed immediately # and also make sure that any unicode strings get @@ -273,7 +275,7 @@ def decryptBook(infile, outdir, kDatabaseFiles, serials, pids): def usage(progname): print u"Removes DRM protection from Mobipocket, Amazon KF8, Amazon Print Replica and Amazon Topaz ebooks" print u"Usage:" - print u" {0} [-k ] [-p ] [-s ] ".format(progname) + print u" {0} [-k ] [-p ] [-s ] [ -a ] ".format(progname) # # Main @@ -284,7 +286,7 @@ def cli_main(): print u"K4MobiDeDrm v{0}.\nCopyright © 2008-2013 The Dark Reverser et al.".format(__version__) try: - opts, args = getopt.getopt(argv[1:], "k:p:s:") + opts, args = getopt.getopt(argv[1:], "k:p:s:a:") except getopt.GetoptError, err: print u"Error in options or arguments: {0}".format(err.args[0]) usage(progname) @@ -312,6 +314,11 @@ def cli_main(): if a == None : raise DrmException("Invalid parameter for -s") serials = a.split(',') + if o == '-a': + if a == None: + continue + serials.extend(android.get_serials(a)) + serials.extend(android.get_serials()) # try with built in Kindle Info files if not on Linux k4 = not sys.platform.startswith('linux') diff --git a/DeDRM_Windows_Application/DeDRM_App/DeDRM_lib/lib/kindlekey.py b/DeDRM_Windows_Application/DeDRM_App/DeDRM_lib/lib/kindlekey.py index f58e973..8852769 100644 --- a/DeDRM_Windows_Application/DeDRM_App/DeDRM_lib/lib/kindlekey.py +++ b/DeDRM_Windows_Application/DeDRM_App/DeDRM_lib/lib/kindlekey.py @@ -19,6 +19,7 @@ from __future__ import with_statement # 1.6 - Fixed a problem getting the disk serial numbers # 1.7 - Work if TkInter is missing # 1.8 - Fixes for Kindle for Mac, and non-ascii in Windows user names +# 1.9 - Fixes for Unicode in Windows user names """ @@ -26,7 +27,7 @@ Retrieve Kindle for PC/Mac user key. """ __license__ = 'GPL v3' -__version__ = '1.8' +__version__ = '1.9' import sys, os, re from struct import pack, unpack, unpack_from @@ -907,18 +908,34 @@ if iswindows: return CryptUnprotectData CryptUnprotectData = CryptUnprotectData() + # Returns Environmental Variables that contain unicode + def getEnvironmentVariable(name): + import ctypes + name = unicode(name) # make sure string argument is unicode + n = ctypes.windll.kernel32.GetEnvironmentVariableW(name, None, 0) + if n == 0: + return None + buf = ctypes.create_unicode_buffer(u'\0'*n) + ctypes.windll.kernel32.GetEnvironmentVariableW(name, buf, n) + return buf.value # Locate all of the kindle-info style files and return as list def getKindleInfoFiles(): kInfoFiles = [] # some 64 bit machines do not have the proper registry key for some reason - # or the pythonn interface to the 32 vs 64 bit registry is broken + # or the python interface to the 32 vs 64 bit registry is broken path = "" if 'LOCALAPPDATA' in os.environ.keys(): - path = os.environ['LOCALAPPDATA'] + # Python 2.x does not return unicode env. Use Python 3.x + path = winreg.ExpandEnvironmentStrings(u"%LOCALAPPDATA%") + # this is just another alternative. + # path = getEnvironmentVariable('LOCALAPPDATA') + if not os.path.isdir(path): + path = "" else: # User Shell Folders show take precedent over Shell Folders if present try: + # this will still break regkey = winreg.OpenKey(winreg.HKEY_CURRENT_USER, "Software\\Microsoft\\Windows\\CurrentVersion\\Explorer\\User Shell Folders\\") path = winreg.QueryValueEx(regkey, 'Local AppData')[0] if not os.path.isdir(path): @@ -937,13 +954,14 @@ if iswindows: if path == "": print ('Could not find the folder in which to look for kinfoFiles.') else: - print('searching for kinfoFiles in ' + path) + # Probably not the best. To Fix (shouldn't ignore in encoding) or use utf-8 + print(u'searching for kinfoFiles in ' + path.encode('ascii', 'ignore')) # look for (K4PC 1.9.0 and later) .kinf2011 file kinfopath = path +'\\Amazon\\Kindle\\storage\\.kinf2011' if os.path.isfile(kinfopath): found = True - print('Found K4PC 1.9+ kinf2011 file: ' + kinfopath) + print('Found K4PC 1.9+ kinf2011 file: ' + kinfopath.encode('ascii','ignore')) kInfoFiles.append(kinfopath) # look for (K4PC 1.6.0 and later) rainier.2.1.1.kinf file @@ -1142,7 +1160,7 @@ if iswindows: cleartext = CryptUnprotectData(encryptedValue, entropy, 1) DB[keyname] = cleartext - if 'MazamaRandomNumber' in DB and 'kindle.account.tokens' in DB: + if 'kindle.account.tokens' in DB: print u"Decrypted key file using IDString '{0:s}' and UserName '{1:s}'".format(GetIDString(), GetUserName().decode("latin-1")) # store values used in decryption DB['IDString'] = GetIDString() @@ -1758,7 +1776,7 @@ elif isosx: break except: pass - if 'MazamaRandomNumber' in DB and 'kindle.account.tokens' in DB: + if 'kindle.account.tokens' in DB: # store values used in decryption print u"Decrypted key file using IDString '{0:s}' and UserName '{1:s}'".format(IDString, GetUserName()) DB['IDString'] = IDString diff --git a/DeDRM_Windows_Application/DeDRM_App/DeDRM_lib/lib/mobidedrm.py b/DeDRM_Windows_Application/DeDRM_App/DeDRM_lib/lib/mobidedrm.py index 7b69edc..89cc695 100644 --- a/DeDRM_Windows_Application/DeDRM_App/DeDRM_lib/lib/mobidedrm.py +++ b/DeDRM_Windows_Application/DeDRM_App/DeDRM_lib/lib/mobidedrm.py @@ -156,6 +156,8 @@ def PC1(key, src, decryption=True): return Pukall_Cipher().PC1(key,src,decryption) except NameError: pass + except TypeError: + pass # use slow python version, since Pukall_Cipher didn't load sum1 = 0; diff --git a/DeDRM_Windows_Application/DeDRM_App/DeDRM_lib/lib/stylexml2css.py b/DeDRM_Windows_Application/DeDRM_App/DeDRM_lib/lib/stylexml2css.py index c111850..daa108a 100644 --- a/DeDRM_Windows_Application/DeDRM_App/DeDRM_lib/lib/stylexml2css.py +++ b/DeDRM_Windows_Application/DeDRM_App/DeDRM_lib/lib/stylexml2css.py @@ -178,7 +178,12 @@ class DocParser(object): if val == "": val = 0 - if not ((attr == 'hang') and (int(val) == 0)) : + if not ((attr == 'hang') and (int(val) == 0)): + try: + f = float(val) + except: + print "Warning: unrecognised val, ignoring" + val = 0 pv = float(val)/scale cssargs[attr] = (self.attr_val_map[attr], pv) keep = True diff --git a/DeDRM_Windows_Application/DeDRM_App/DeDRM_lib/lib/topazextract.py b/DeDRM_Windows_Application/DeDRM_App/DeDRM_lib/lib/topazextract.py index 97f6583..fb5eb7a 100644 --- a/DeDRM_Windows_Application/DeDRM_App/DeDRM_lib/lib/topazextract.py +++ b/DeDRM_Windows_Application/DeDRM_App/DeDRM_lib/lib/topazextract.py @@ -356,7 +356,7 @@ class TopazBook: self.setBookKey(bookKey) self.createBookDirectory() - self.extractFiles() + self.extractFiles() print u"Successfully Extracted Topaz contents" if inCalibre: from calibre_plugins.dedrm import genbook diff --git a/DeDRM_calibre_plugin/DeDRM_plugin.zip b/DeDRM_calibre_plugin/DeDRM_plugin.zip index 7c4878a..58d8174 100644 Binary files a/DeDRM_calibre_plugin/DeDRM_plugin.zip and b/DeDRM_calibre_plugin/DeDRM_plugin.zip differ diff --git a/DeDRM_calibre_plugin/DeDRM_plugin/__init__.py b/DeDRM_calibre_plugin/DeDRM_plugin/__init__.py index caed6e8..37d4cb1 100644 --- a/DeDRM_calibre_plugin/DeDRM_plugin/__init__.py +++ b/DeDRM_calibre_plugin/DeDRM_plugin/__init__.py @@ -31,14 +31,17 @@ __docformat__ = 'restructuredtext en' # 6.0.3 - Fixes for Kindle for Mac and Windows non-ascii user names # 6.0.4 - Fixes for stand-alone scripts and applications # and pdb files in plugin and initial conversion of prefs. +# 6.0.5 - Fix a key issue # 6.0.6 - Fix up an incorrect function call +# 6.0.7 - Error handling for incomplete PDF metadata +# 6.0.8 - Fixes a Wine key issue and topaz support """ Decrypt DRMed ebooks. """ PLUGIN_NAME = u"DeDRM" -PLUGIN_VERSION_TUPLE = (6, 0, 7) +PLUGIN_VERSION_TUPLE = (6, 0, 8) PLUGIN_VERSION = u".".join([unicode(str(x)) for x in PLUGIN_VERSION_TUPLE]) # Include an html helpfile in the plugin's zipfile with the following name. RESOURCE_NAME = PLUGIN_NAME + '_Help.htm' @@ -313,7 +316,7 @@ class DeDRM(FileTypePlugin): from wineutils import WineGetKeys scriptpath = os.path.join(self.alfdir,u"adobekey.py") - defaultkeys = self.WineGetKeys(scriptpath, u".der",dedrmprefs['adobewineprefix']) + defaultkeys = WineGetKeys(scriptpath, u".der",dedrmprefs['adobewineprefix']) except: pass @@ -391,7 +394,7 @@ class DeDRM(FileTypePlugin): from wineutils import WineGetKeys scriptpath = os.path.join(self.alfdir,u"kindlekey.py") - defaultkeys = self.WineGetKeys(scriptpath, u".k4i",dedrmprefs['kindlewineprefix']) + defaultkeys = WineGetKeys(scriptpath, u".k4i",dedrmprefs['kindlewineprefix']) except: pass diff --git a/DeDRM_calibre_plugin/DeDRM_plugin/android.py b/DeDRM_calibre_plugin/DeDRM_plugin/android.py new file mode 100644 index 0000000..ddb94f5 --- /dev/null +++ b/DeDRM_calibre_plugin/DeDRM_plugin/android.py @@ -0,0 +1,157 @@ +#!/usr/bin/env python +#fileencoding: utf-8 + +import os +import sys +import zlib +import tarfile +from hashlib import md5 +from cStringIO import StringIO +from binascii import a2b_hex, b2a_hex + +STORAGE = 'AmazonSecureStorage.xml' + +class AndroidObfuscation(object): + '''AndroidObfuscation + For the key, it's written in java, and run in android dalvikvm + ''' + + key = a2b_hex('0176e04c9408b1702d90be333fd53523') + + def encrypt(self, plaintext): + cipher = self._get_cipher() + padding = len(self.key) - len(plaintext) % len(self.key) + plaintext += chr(padding) * padding + return b2a_hex(cipher.encrypt(plaintext)) + + def decrypt(self, ciphertext): + cipher = self._get_cipher() + plaintext = cipher.decrypt(a2b_hex(ciphertext)) + return plaintext[:-ord(plaintext[-1])] + + def _get_cipher(self): + try: + from Crypto.Cipher import AES + return AES.new(self.key) + except ImportError: + from aescbc import AES, noPadding + return AES(self.key, padding=noPadding()) + +class AndroidObfuscationV2(AndroidObfuscation): + '''AndroidObfuscationV2 + ''' + + count = 503 + password = 'Thomsun was here!' + + def __init__(self, salt): + key = self.password + salt + for _ in range(self.count): + key = md5(key).digest() + self.key = key[:8] + self.iv = key[8:16] + + def _get_cipher(self): + try : + from Crypto.Cipher import DES + return DES.new(self.key, DES.MODE_CBC, self.iv) + except ImportError: + from python_des import Des, CBC + return Des(self.key, CBC, self.iv) + +def parse_preference(path): + ''' parse android's shared preference xml ''' + storage = {} + read = open(path) + for line in read: + line = line.strip() + # value + if line.startswith(' adb backup com.amazon.kindle + ''' + output = None + read = open(path, 'rb') + head = read.read(24) + if head == 'ANDROID BACKUP\n1\n1\nnone\n': + output = StringIO(zlib.decompress(read.read())) + read.close() + + if not output: + return False + + tar = tarfile.open(fileobj=output) + for member in tar.getmembers(): + if member.name.strip().endswith(STORAGE): + write = open(STORAGE, 'w') + write.write(tar.extractfile(member).read()) + write.close() + break + + return True + +__all__ = [ 'get_storage', 'get_serials', 'parse_preference', + 'AndroidObfuscation', 'AndroidObfuscationV2', 'STORAGE'] + +if __name__ == '__main__': + print get_serials() \ No newline at end of file diff --git a/DeDRM_calibre_plugin/DeDRM_plugin/android_readme.txt b/DeDRM_calibre_plugin/DeDRM_plugin/android_readme.txt new file mode 100644 index 0000000..9e7d035 --- /dev/null +++ b/DeDRM_calibre_plugin/DeDRM_plugin/android_readme.txt @@ -0,0 +1,6 @@ +1.1 get AmazonSecureStorage.xml from /data/data/com.amazon.kindle/shared_prefs/AmazonSecureStorage.xml + +1.2 on android 4.0+, run `adb backup com.amazon.kindle` from PC will get backup.ab + now android.py can convert backup.ab to AmazonSecureStorage.xml + +2. run `k4mobidedrm.py -a AmazonSecureStorage.xml ' diff --git a/DeDRM_calibre_plugin/DeDRM_plugin/dialogs.py b/DeDRM_calibre_plugin/DeDRM_plugin/dialogs.py deleted file mode 100644 index 6bb8c37..0000000 --- a/DeDRM_calibre_plugin/DeDRM_plugin/dialogs.py +++ /dev/null @@ -1,45 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -# base64.py, version 1.0 -# Copyright © 2010 Apprentice Alf - -# Released under the terms of the GNU General Public Licence, version 3 or -# later. - -# Revision history: -# 1 - Initial release. To allow Applescript to do base64 encoding - -""" -Provide base64 encoding. -""" - -from __future__ import with_statement - -__license__ = 'GPL v3' - -import sys -import os -import base64 - -def usage(progname): - print "Applies base64 encoding to the supplied file, sending to standard output" - print "Usage:" - print " %s " % progname - -def cli_main(argv=sys.argv): - progname = os.path.basename(argv[0]) - - if len(argv)<2: - usage(progname) - sys.exit(2) - - keypath = argv[1] - with open(keypath, 'rb') as f: - keyder = f.read() - print keyder.encode('base64') - return 0 - - -if __name__ == '__main__': - sys.exit(cli_main()) diff --git a/DeDRM_calibre_plugin/DeDRM_plugin/encodebase64.py b/DeDRM_calibre_plugin/DeDRM_plugin/encodebase64.py index 11f1427..6bb8c37 100644 --- a/DeDRM_calibre_plugin/DeDRM_plugin/encodebase64.py +++ b/DeDRM_calibre_plugin/DeDRM_plugin/encodebase64.py @@ -1,208 +1,45 @@ -#!/usr/bin/python -# -# This is a python script. You need a Python interpreter to run it. -# For example, ActiveState Python, which exists for windows. -# -# Changelog drmcheck -# 1.00 - Initial version, with code from various other scripts -# 1.01 - Moved authorship announcement to usage section. -# -# Changelog epubtest -# 1.00 - Cut to epubtest.py, testing ePub files only by Apprentice Alf -# 1.01 - Added routine for use by Windows DeDRM -# -# Written in 2011 by Paul Durrant -# Released with unlicense. See http://unlicense.org/ -# -############################################################################# -# -# This is free and unencumbered software released into the public domain. -# -# Anyone is free to copy, modify, publish, use, compile, sell, or -# distribute this software, either in source code form or as a compiled -# binary, for any purpose, commercial or non-commercial, and by any -# means. -# -# In jurisdictions that recognize copyright laws, the author or authors -# of this software dedicate any and all copyright interest in the -# software to the public domain. We make this dedication for the benefit -# of the public at large and to the detriment of our heirs and -# successors. We intend this dedication to be an overt act of -# relinquishment in perpetuity of all present and future rights to this -# software under copyright law. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR -# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -# OTHER DEALINGS IN THE SOFTWARE. -# -############################################################################# -# -# It's still polite to give attribution if you do reuse this code. -# +#!/usr/bin/env python +# -*- coding: utf-8 -*- -from __future__ import with_statement - -__version__ = '1.01' - -import sys, struct, os -import zlib -import zipfile -import xml.etree.ElementTree as etree - -NSMAP = {'adept': 'http://ns.adobe.com/adept', - 'enc': 'http://www.w3.org/2001/04/xmlenc#'} - -# Wrap a stream so that output gets flushed immediately -# and also make sure that any unicode strings get -# encoded using "replace" before writing them. -class SafeUnbuffered: - def __init__(self, stream): - self.stream = stream - self.encoding = stream.encoding - if self.encoding == None: - self.encoding = "utf-8" - def write(self, data): - if isinstance(data,unicode): - data = data.encode(self.encoding,"replace") - self.stream.write(data) - self.stream.flush() - def __getattr__(self, attr): - return getattr(self.stream, attr) - -try: - from calibre.constants import iswindows, isosx -except: - iswindows = sys.platform.startswith('win') - isosx = sys.platform.startswith('darwin') - -def unicode_argv(): - if iswindows: - # Uses shell32.GetCommandLineArgvW to get sys.argv as a list of Unicode - # strings. - - # Versions 2.x of Python don't support Unicode in sys.argv on - # Windows, with the underlying Windows API instead replacing multi-byte - # characters with '?'. So use shell32.GetCommandLineArgvW to get sys.argv - # as a list of Unicode strings and encode them as utf-8 +# base64.py, version 1.0 +# Copyright © 2010 Apprentice Alf - from ctypes import POINTER, byref, cdll, c_int, windll - from ctypes.wintypes import LPCWSTR, LPWSTR +# Released under the terms of the GNU General Public Licence, version 3 or +# later. - GetCommandLineW = cdll.kernel32.GetCommandLineW - GetCommandLineW.argtypes = [] - GetCommandLineW.restype = LPCWSTR +# Revision history: +# 1 - Initial release. To allow Applescript to do base64 encoding - CommandLineToArgvW = windll.shell32.CommandLineToArgvW - CommandLineToArgvW.argtypes = [LPCWSTR, POINTER(c_int)] - CommandLineToArgvW.restype = POINTER(LPWSTR) +""" +Provide base64 encoding. +""" - cmd = GetCommandLineW() - argc = c_int(0) - argv = CommandLineToArgvW(cmd, byref(argc)) - if argc.value > 0: - # Remove Python executable and commands if present - start = argc.value - len(sys.argv) - return [argv[i] for i in - xrange(start, argc.value)] - # if we don't have any arguments at all, just pass back script name - # this should never happen - return [u"epubtest.py"] - else: - argvencoding = sys.stdin.encoding - if argvencoding == None: - argvencoding = "utf-8" - return [arg if (type(arg) == unicode) else unicode(arg,argvencoding) for arg in sys.argv] - -_FILENAME_LEN_OFFSET = 26 -_EXTRA_LEN_OFFSET = 28 -_FILENAME_OFFSET = 30 -_MAX_SIZE = 64 * 1024 - - -def uncompress(cmpdata): - dc = zlib.decompressobj(-15) - data = '' - while len(cmpdata) > 0: - if len(cmpdata) > _MAX_SIZE : - newdata = cmpdata[0:_MAX_SIZE] - cmpdata = cmpdata[_MAX_SIZE:] - else: - newdata = cmpdata - cmpdata = '' - newdata = dc.decompress(newdata) - unprocessed = dc.unconsumed_tail - if len(unprocessed) == 0: - newdata += dc.flush() - data += newdata - cmpdata += unprocessed - unprocessed = '' - return data - -def getfiledata(file, zi): - # get file name length and exta data length to find start of file data - local_header_offset = zi.header_offset - - file.seek(local_header_offset + _FILENAME_LEN_OFFSET) - leninfo = file.read(2) - local_name_length, = struct.unpack('" % progname - return data +def cli_main(argv=sys.argv): + progname = os.path.basename(argv[0]) -def encryption(infile): - # returns encryption: one of Unencrypted, Adobe, B&N and Unknown - encryption = "Unknown" - try: - with open(infile,'rb') as infileobject: - bookdata = infileobject.read(58) - # Check for Zip - if bookdata[0:0+2] == "PK": - foundrights = False - foundencryption = False - inzip = zipfile.ZipFile(infile,'r') - namelist = set(inzip.namelist()) - if 'META-INF/rights.xml' not in namelist or 'META-INF/encryption.xml' not in namelist: - encryption = "Unencrypted" - else: - rights = etree.fromstring(inzip.read('META-INF/rights.xml')) - adept = lambda tag: '{%s}%s' % (NSMAP['adept'], tag) - expr = './/%s' % (adept('encryptedKey'),) - bookkey = ''.join(rights.findtext(expr)) - if len(bookkey) == 172: - encryption = "Adobe" - elif len(bookkey) == 64: - encryption = "B&N" - else: - encryption = "Unknown" - except: - traceback.print_exc() - return encryption + if len(argv)<2: + usage(progname) + sys.exit(2) -def main(): - argv=unicode_argv() - print encryption(argv[1]) + keypath = argv[1] + with open(keypath, 'rb') as f: + keyder = f.read() + print keyder.encode('base64') return 0 -if __name__ == "__main__": - sys.stdout=SafeUnbuffered(sys.stdout) - sys.stderr=SafeUnbuffered(sys.stderr) - sys.exit(main()) + +if __name__ == '__main__': + sys.exit(cli_main()) diff --git a/DeDRM_calibre_plugin/DeDRM_plugin/epubtest.py b/DeDRM_calibre_plugin/DeDRM_plugin/epubtest.py index 1dfef42..11f1427 100644 --- a/DeDRM_calibre_plugin/DeDRM_plugin/epubtest.py +++ b/DeDRM_calibre_plugin/DeDRM_plugin/epubtest.py @@ -1,82 +1,60 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -# erdr2pml.py -# Copyright © 2008 The Dark Reverser +#!/usr/bin/python # -# Modified 2008–2012 by some_updates, DiapDealer and Apprentice Alf - # This is a python script. You need a Python interpreter to run it. # For example, ActiveState Python, which exists for windows. -# Changelog # -# Based on ereader2html version 0.08 plus some later small fixes +# Changelog drmcheck +# 1.00 - Initial version, with code from various other scripts +# 1.01 - Moved authorship announcement to usage section. +# +# Changelog epubtest +# 1.00 - Cut to epubtest.py, testing ePub files only by Apprentice Alf +# 1.01 - Added routine for use by Windows DeDRM +# +# Written in 2011 by Paul Durrant +# Released with unlicense. See http://unlicense.org/ +# +############################################################################# +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# +############################################################################# +# +# It's still polite to give attribution if you do reuse this code. # -# 0.01 - Initial version -# 0.02 - Support more eReader files. Support bold text and links. Fix PML decoder parsing bug. -# 0.03 - Fix incorrect variable usage at one place. -# 0.03b - enhancement by DeBockle (version 259 support) -# Custom version 0.03 - no change to eReader support, only usability changes -# - start of pep-8 indentation (spaces not tab), fix trailing blanks -# - version variable, only one place to change -# - added main routine, now callable as a library/module, -# means tools can add optional support for ereader2html -# - outdir is no longer a mandatory parameter (defaults based on input name if missing) -# - time taken output to stdout -# - Psyco support - reduces runtime by a factor of (over) 3! -# E.g. (~600Kb file) 90 secs down to 24 secs -# - newstyle classes -# - changed map call to list comprehension -# may not work with python 2.3 -# without Psyco this reduces runtime to 90% -# E.g. 90 secs down to 77 secs -# Psyco with map calls takes longer, do not run with map in Psyco JIT! -# - izip calls used instead of zip (if available), further reduction -# in run time (factor of 4.5). -# E.g. (~600Kb file) 90 secs down to 20 secs -# - Python 2.6+ support, avoid DeprecationWarning with sha/sha1 -# 0.04 - Footnote support, PML output, correct charset in html, support more PML tags -# - Feature change, dump out PML file -# - Added supprt for footnote tags. NOTE footnote ids appear to be bad (not usable) -# in some pdb files :-( due to the same id being used multiple times -# - Added correct charset encoding (pml is based on cp1252) -# - Added logging support. -# 0.05 - Improved type 272 support for sidebars, links, chapters, metainfo, etc -# 0.06 - Merge of 0.04 and 0.05. Improved HTML output -# Placed images in subfolder, so that it's possible to just -# drop the book.pml file onto DropBook to make an unencrypted -# copy of the eReader file. -# Using that with Calibre works a lot better than the HTML -# conversion in this code. -# 0.07 - Further Improved type 272 support for sidebars with all earlier fixes -# 0.08 - fixed typos, removed extraneous things -# 0.09 - fixed typos in first_pages to first_page to again support older formats -# 0.10 - minor cleanups -# 0.11 - fixups for using correct xml for footnotes and sidebars for use with Dropbook -# 0.12 - Fix added to prevent lowercasing of image names when the pml code itself uses a different case in the link name. -# 0.13 - change to unbuffered stdout for use with gui front ends -# 0.14 - contributed enhancement to support --make-pmlz switch -# 0.15 - enabled high-ascii to pml character encoding. DropBook now works on Mac. -# 0.16 - convert to use openssl DES (very very fast) or pure python DES if openssl's libcrypto is not available -# 0.17 - added support for pycrypto's DES as well -# 0.18 - on Windows try PyCrypto first and OpenSSL next -# 0.19 - Modify the interface to allow use of import -# 0.20 - modify to allow use inside new interface for calibre plugins -# 0.21 - Support eReader (drm) version 11. -# - Don't reject dictionary format. -# - Ignore sidebars for dictionaries (different format?) -# 0.22 - Unicode and plugin support, different image folders for PMLZ and source -# 0.23 - moved unicode_argv call inside main for Windows DeDRM compatibility -__version__='0.23' +from __future__ import with_statement -import sys, re -import struct, binascii, getopt, zlib, os, os.path, urllib, tempfile, traceback +__version__ = '1.01' -if 'calibre' in sys.modules: - inCalibre = True -else: - inCalibre = False +import sys, struct, os +import zlib +import zipfile +import xml.etree.ElementTree as etree + +NSMAP = {'adept': 'http://ns.adobe.com/adept', + 'enc': 'http://www.w3.org/2001/04/xmlenc#'} # Wrap a stream so that output gets flushed immediately # and also make sure that any unicode strings get @@ -95,8 +73,11 @@ class SafeUnbuffered: def __getattr__(self, attr): return getattr(self.stream, attr) -iswindows = sys.platform.startswith('win') -isosx = sys.platform.startswith('darwin') +try: + from calibre.constants import iswindows, isosx +except: + iswindows = sys.platform.startswith('win') + isosx = sys.platform.startswith('darwin') def unicode_argv(): if iswindows: @@ -105,8 +86,8 @@ def unicode_argv(): # Versions 2.x of Python don't support Unicode in sys.argv on # Windows, with the underlying Windows API instead replacing multi-byte - # characters with '?'. - + # characters with '?'. So use shell32.GetCommandLineArgvW to get sys.argv + # as a list of Unicode strings and encode them as utf-8 from ctypes import POINTER, byref, cdll, c_int, windll from ctypes.wintypes import LPCWSTR, LPWSTR @@ -129,469 +110,99 @@ def unicode_argv(): xrange(start, argc.value)] # if we don't have any arguments at all, just pass back script name # this should never happen - return [u"mobidedrm.py"] + return [u"epubtest.py"] else: argvencoding = sys.stdin.encoding if argvencoding == None: argvencoding = "utf-8" return [arg if (type(arg) == unicode) else unicode(arg,argvencoding) for arg in sys.argv] -Des = None -if iswindows: - # first try with pycrypto - if inCalibre: - from calibre_plugins.dedrm import pycrypto_des - else: - import pycrypto_des - Des = pycrypto_des.load_pycrypto() - if Des == None: - # they try with openssl - if inCalibre: - from calibre_plugins.dedrm import openssl_des - else: - import openssl_des - Des = openssl_des.load_libcrypto() -else: - # first try with openssl - if inCalibre: - from calibre_plugins.dedrm import openssl_des - else: - import openssl_des - Des = openssl_des.load_libcrypto() - if Des == None: - # then try with pycrypto - if inCalibre: - from calibre_plugins.dedrm import pycrypto_des - else: - import pycrypto_des - Des = pycrypto_des.load_pycrypto() - -# if that did not work then use pure python implementation -# of DES and try to speed it up with Psycho -if Des == None: - if inCalibre: - from calibre_plugins.dedrm import python_des - else: - import python_des - Des = python_des.Des - # Import Psyco if available - try: - # http://psyco.sourceforge.net - import psyco - psyco.full() - except ImportError: - pass - -try: - from hashlib import sha1 -except ImportError: - # older Python release - import sha - sha1 = lambda s: sha.new(s) - -import cgi -import logging - -logging.basicConfig() -#logging.basicConfig(level=logging.DEBUG) - - -class Sectionizer(object): - bkType = "Book" - - def __init__(self, filename, ident): - self.contents = file(filename, 'rb').read() - self.header = self.contents[0:72] - self.num_sections, = struct.unpack('>H', self.contents[76:78]) - # Dictionary or normal content (TODO: Not hard-coded) - if self.header[0x3C:0x3C+8] != ident: - if self.header[0x3C:0x3C+8] == "PDctPPrs": - self.bkType = "Dict" - else: - raise ValueError('Invalid file format') - self.sections = [] - for i in xrange(self.num_sections): - offset, a1,a2,a3,a4 = struct.unpack('>LBBBB', self.contents[78+i*8:78+i*8+8]) - flags, val = a1, a2<<16|a3<<8|a4 - self.sections.append( (offset, flags, val) ) - def loadSection(self, section): - if section + 1 == self.num_sections: - end_off = len(self.contents) - else: - end_off = self.sections[section + 1][0] - off = self.sections[section][0] - return self.contents[off:end_off] - -# cleanup unicode filenames -# borrowed from calibre from calibre/src/calibre/__init__.py -# added in removal of control (<32) chars -# and removal of . at start and end -# and with some (heavily edited) code from Paul Durrant's kindlenamer.py -def sanitizeFileName(name): - # substitute filename unfriendly characters - name = name.replace(u"<",u"[").replace(u">",u"]").replace(u" : ",u" – ").replace(u": ",u" – ").replace(u":",u"—").replace(u"/",u"_").replace(u"\\",u"_").replace(u"|",u"_").replace(u"\"",u"\'") - # delete control characters - name = u"".join(char for char in name if ord(char)>=32) - # white space to single space, delete leading and trailing while space - name = re.sub(ur"\s", u" ", name).strip() - # remove leading dots - while len(name)>0 and name[0] == u".": - name = name[1:] - # remove trailing dots (Windows doesn't like them) - if name.endswith(u'.'): - name = name[:-1] - return name - -def fixKey(key): - def fixByte(b): - return b ^ ((b ^ (b<<1) ^ (b<<2) ^ (b<<3) ^ (b<<4) ^ (b<<5) ^ (b<<6) ^ (b<<7) ^ 0x80) & 0x80) - return "".join([chr(fixByte(ord(a))) for a in key]) - -def deXOR(text, sp, table): - r='' - j = sp - for i in xrange(len(text)): - r += chr(ord(table[j]) ^ ord(text[i])) - j = j + 1 - if j == len(table): - j = 0 - return r - -class EreaderProcessor(object): - def __init__(self, sect, user_key): - self.section_reader = sect.loadSection - data = self.section_reader(0) - version, = struct.unpack('>H', data[0:2]) - self.version = version - logging.info('eReader file format version %s', version) - if version != 272 and version != 260 and version != 259: - raise ValueError('incorrect eReader version %d (error 1)' % version) - data = self.section_reader(1) - self.data = data - des = Des(fixKey(data[0:8])) - cookie_shuf, cookie_size = struct.unpack('>LL', des.decrypt(data[-8:])) - if cookie_shuf < 3 or cookie_shuf > 0x14 or cookie_size < 0xf0 or cookie_size > 0x200: - raise ValueError('incorrect eReader version (error 2)') - input = des.decrypt(data[-cookie_size:]) - def unshuff(data, shuf): - r = [''] * len(data) - j = 0 - for i in xrange(len(data)): - j = (j + shuf) % len(data) - r[j] = data[i] - assert len("".join(r)) == len(data) - return "".join(r) - r = unshuff(input[0:-8], cookie_shuf) +_FILENAME_LEN_OFFSET = 26 +_EXTRA_LEN_OFFSET = 28 +_FILENAME_OFFSET = 30 +_MAX_SIZE = 64 * 1024 - drm_sub_version = struct.unpack('>H', r[0:2])[0] - self.num_text_pages = struct.unpack('>H', r[2:4])[0] - 1 - self.num_image_pages = struct.unpack('>H', r[26:26+2])[0] - self.first_image_page = struct.unpack('>H', r[24:24+2])[0] - # Default values - self.num_footnote_pages = 0 - self.num_sidebar_pages = 0 - self.first_footnote_page = -1 - self.first_sidebar_page = -1 - if self.version == 272: - self.num_footnote_pages = struct.unpack('>H', r[46:46+2])[0] - self.first_footnote_page = struct.unpack('>H', r[44:44+2])[0] - if (sect.bkType == "Book"): - self.num_sidebar_pages = struct.unpack('>H', r[38:38+2])[0] - self.first_sidebar_page = struct.unpack('>H', r[36:36+2])[0] - # self.num_bookinfo_pages = struct.unpack('>H', r[34:34+2])[0] - # self.first_bookinfo_page = struct.unpack('>H', r[32:32+2])[0] - # self.num_chapter_pages = struct.unpack('>H', r[22:22+2])[0] - # self.first_chapter_page = struct.unpack('>H', r[20:20+2])[0] - # self.num_link_pages = struct.unpack('>H', r[30:30+2])[0] - # self.first_link_page = struct.unpack('>H', r[28:28+2])[0] - # self.num_xtextsize_pages = struct.unpack('>H', r[54:54+2])[0] - # self.first_xtextsize_page = struct.unpack('>H', r[52:52+2])[0] - # **before** data record 1 was decrypted and unshuffled, it contained data - # to create an XOR table and which is used to fix footnote record 0, link records, chapter records, etc - self.xortable_offset = struct.unpack('>H', r[40:40+2])[0] - self.xortable_size = struct.unpack('>H', r[42:42+2])[0] - self.xortable = self.data[self.xortable_offset:self.xortable_offset + self.xortable_size] +def uncompress(cmpdata): + dc = zlib.decompressobj(-15) + data = '' + while len(cmpdata) > 0: + if len(cmpdata) > _MAX_SIZE : + newdata = cmpdata[0:_MAX_SIZE] + cmpdata = cmpdata[_MAX_SIZE:] else: - # Nothing needs to be done - pass - # self.num_bookinfo_pages = 0 - # self.num_chapter_pages = 0 - # self.num_link_pages = 0 - # self.num_xtextsize_pages = 0 - # self.first_bookinfo_page = -1 - # self.first_chapter_page = -1 - # self.first_link_page = -1 - # self.first_xtextsize_page = -1 - - logging.debug('self.num_text_pages %d', self.num_text_pages) - logging.debug('self.num_footnote_pages %d, self.first_footnote_page %d', self.num_footnote_pages , self.first_footnote_page) - logging.debug('self.num_sidebar_pages %d, self.first_sidebar_page %d', self.num_sidebar_pages , self.first_sidebar_page) - self.flags = struct.unpack('>L', r[4:8])[0] - reqd_flags = (1<<9) | (1<<7) | (1<<10) - if (self.flags & reqd_flags) != reqd_flags: - print "Flags: 0x%X" % self.flags - raise ValueError('incompatible eReader file') - des = Des(fixKey(user_key)) - if version == 259: - if drm_sub_version != 7: - raise ValueError('incorrect eReader version %d (error 3)' % drm_sub_version) - encrypted_key_sha = r[44:44+20] - encrypted_key = r[64:64+8] - elif version == 260: - if drm_sub_version != 13 and drm_sub_version != 11: - raise ValueError('incorrect eReader version %d (error 3)' % drm_sub_version) - if drm_sub_version == 13: - encrypted_key = r[44:44+8] - encrypted_key_sha = r[52:52+20] - else: - encrypted_key = r[64:64+8] - encrypted_key_sha = r[44:44+20] - elif version == 272: - encrypted_key = r[172:172+8] - encrypted_key_sha = r[56:56+20] - self.content_key = des.decrypt(encrypted_key) - if sha1(self.content_key).digest() != encrypted_key_sha: - raise ValueError('Incorrect Name and/or Credit Card') - - def getNumImages(self): - return self.num_image_pages - - def getImage(self, i): - sect = self.section_reader(self.first_image_page + i) - name = sect[4:4+32].strip('\0') - data = sect[62:] - return sanitizeFileName(unicode(name,'windows-1252')), data - - - # def getChapterNamePMLOffsetData(self): - # cv = '' - # if self.num_chapter_pages > 0: - # for i in xrange(self.num_chapter_pages): - # chaps = self.section_reader(self.first_chapter_page + i) - # j = i % self.xortable_size - # offname = deXOR(chaps, j, self.xortable) - # offset = struct.unpack('>L', offname[0:4])[0] - # name = offname[4:].strip('\0') - # cv += '%d|%s\n' % (offset, name) - # return cv - - # def getLinkNamePMLOffsetData(self): - # lv = '' - # if self.num_link_pages > 0: - # for i in xrange(self.num_link_pages): - # links = self.section_reader(self.first_link_page + i) - # j = i % self.xortable_size - # offname = deXOR(links, j, self.xortable) - # offset = struct.unpack('>L', offname[0:4])[0] - # name = offname[4:].strip('\0') - # lv += '%d|%s\n' % (offset, name) - # return lv - - # def getExpandedTextSizesData(self): - # ts = '' - # if self.num_xtextsize_pages > 0: - # tsize = deXOR(self.section_reader(self.first_xtextsize_page), 0, self.xortable) - # for i in xrange(self.num_text_pages): - # xsize = struct.unpack('>H', tsize[0:2])[0] - # ts += "%d\n" % xsize - # tsize = tsize[2:] - # return ts - - # def getBookInfo(self): - # bkinfo = '' - # if self.num_bookinfo_pages > 0: - # info = self.section_reader(self.first_bookinfo_page) - # bkinfo = deXOR(info, 0, self.xortable) - # bkinfo = bkinfo.replace('\0','|') - # bkinfo += '\n' - # return bkinfo - - def getText(self): - des = Des(fixKey(self.content_key)) - r = '' - for i in xrange(self.num_text_pages): - logging.debug('get page %d', i) - r += zlib.decompress(des.decrypt(self.section_reader(1 + i))) - - # now handle footnotes pages - if self.num_footnote_pages > 0: - r += '\n' - # the record 0 of the footnote section must pass through the Xor Table to make it useful - sect = self.section_reader(self.first_footnote_page) - fnote_ids = deXOR(sect, 0, self.xortable) - # the remaining records of the footnote sections need to be decoded with the content_key and zlib inflated - des = Des(fixKey(self.content_key)) - for i in xrange(1,self.num_footnote_pages): - logging.debug('get footnotepage %d', i) - id_len = ord(fnote_ids[2]) - id = fnote_ids[3:3+id_len] - fmarker = '\n' % id - fmarker += zlib.decompress(des.decrypt(self.section_reader(self.first_footnote_page + i))) - fmarker += '\n\n' - r += fmarker - fnote_ids = fnote_ids[id_len+4:] - - # TODO: Handle dictionary index (?) pages - which are also marked as - # sidebar_pages (?). For now dictionary sidebars are ignored - # For dictionaries - record 0 is null terminated strings, followed by - # blocks of around 62000 bytes and a final block. Not sure of the - # encoding - - # now handle sidebar pages - if self.num_sidebar_pages > 0: - r += '\n' - # the record 0 of the sidebar section must pass through the Xor Table to make it useful - sect = self.section_reader(self.first_sidebar_page) - sbar_ids = deXOR(sect, 0, self.xortable) - # the remaining records of the sidebar sections need to be decoded with the content_key and zlib inflated - des = Des(fixKey(self.content_key)) - for i in xrange(1,self.num_sidebar_pages): - id_len = ord(sbar_ids[2]) - id = sbar_ids[3:3+id_len] - smarker = '\n' % id - smarker += zlib.decompress(des.decrypt(self.section_reader(self.first_sidebar_page + i))) - smarker += '\n\n' - r += smarker - sbar_ids = sbar_ids[id_len+4:] - - return r - -def cleanPML(pml): - # Convert special characters to proper PML code. High ASCII start at (\x80, \a128) and go up to (\xff, \a255) - pml2 = pml - for k in xrange(128,256): - badChar = chr(k) - pml2 = pml2.replace(badChar, '\\a%03d' % k) - return pml2 - -def decryptBook(infile, outpath, make_pmlz, user_key): - bookname = os.path.splitext(os.path.basename(infile))[0] - if make_pmlz: - # outpath is actually pmlz name - pmlzname = outpath - outdir = tempfile.mkdtemp() - imagedirpath = os.path.join(outdir,u"images") - else: - pmlzname = None - outdir = outpath - imagedirpath = os.path.join(outdir,bookname + u"_img") - + newdata = cmpdata + cmpdata = '' + newdata = dc.decompress(newdata) + unprocessed = dc.unconsumed_tail + if len(unprocessed) == 0: + newdata += dc.flush() + data += newdata + cmpdata += unprocessed + unprocessed = '' + return data + +def getfiledata(file, zi): + # get file name length and exta data length to find start of file data + local_header_offset = zi.header_offset + + file.seek(local_header_offset + _FILENAME_LEN_OFFSET) + leninfo = file.read(2) + local_name_length, = struct.unpack(' 0: - print u"Extracting images" - if not os.path.exists(imagedirpath): - os.makedirs(imagedirpath) - for i in xrange(er.getNumImages()): - name, contents = er.getImage(i) - file(os.path.join(imagedirpath, name), 'wb').write(contents) - - print u"Extracting pml" - pml_string = er.getText() - pmlfilename = bookname + ".pml" - file(os.path.join(outdir, pmlfilename),'wb').write(cleanPML(pml_string)) - if pmlzname is not None: - import zipfile - import shutil - print u"Creating PMLZ file {0}".format(os.path.basename(pmlzname)) - myZipFile = zipfile.ZipFile(pmlzname,'w',zipfile.ZIP_STORED, False) - list = os.listdir(outdir) - for filename in list: - localname = filename - filePath = os.path.join(outdir,filename) - if os.path.isfile(filePath): - myZipFile.write(filePath, localname) - elif os.path.isdir(filePath): - imageList = os.listdir(filePath) - localimgdir = os.path.basename(filePath) - for image in imageList: - localname = os.path.join(localimgdir,image) - imagePath = os.path.join(filePath,image) - if os.path.isfile(imagePath): - myZipFile.write(imagePath, localname) - myZipFile.close() - # remove temporary directory - shutil.rmtree(outdir, True) - print u"Output is {0}".format(pmlzname) - else : - print u"Output is in {0}".format(outdir) - print "done" - except ValueError, e: - print u"Error: {0}".format(e) + with open(infile,'rb') as infileobject: + bookdata = infileobject.read(58) + # Check for Zip + if bookdata[0:0+2] == "PK": + foundrights = False + foundencryption = False + inzip = zipfile.ZipFile(infile,'r') + namelist = set(inzip.namelist()) + if 'META-INF/rights.xml' not in namelist or 'META-INF/encryption.xml' not in namelist: + encryption = "Unencrypted" + else: + rights = etree.fromstring(inzip.read('META-INF/rights.xml')) + adept = lambda tag: '{%s}%s' % (NSMAP['adept'], tag) + expr = './/%s' % (adept('encryptedKey'),) + bookkey = ''.join(rights.findtext(expr)) + if len(bookkey) == 172: + encryption = "Adobe" + elif len(bookkey) == 64: + encryption = "B&N" + else: + encryption = "Unknown" + except: traceback.print_exc() - return 1 - return 0 - - -def usage(): - print u"Converts DRMed eReader books to PML Source" - print u"Usage:" - print u" erdr2pml [options] infile.pdb [outpath] \"your name\" credit_card_number" - print u" " - print u"Options: " - print u" -h prints this message" - print u" -p create PMLZ instead of source folder" - print u" --make-pmlz create PMLZ instead of source folder" - print u" " - print u"Note:" - print u" if outpath is ommitted, creates source in 'infile_Source' folder" - print u" if outpath is ommitted and pmlz option, creates PMLZ 'infile.pmlz'" - print u" if source folder created, images are in infile_img folder" - print u" if pmlz file created, images are in images folder" - print u" It's enough to enter the last 8 digits of the credit card number" - return - -def getuser_key(name,cc): - newname = "".join(c for c in name.lower() if c >= 'a' and c <= 'z' or c >= '0' and c <= '9') - cc = cc.replace(" ","") - return struct.pack('>LL', binascii.crc32(newname) & 0xffffffff,binascii.crc32(cc[-8:])& 0xffffffff) - -def cli_main(): - print u"eRdr2Pml v{0}. Copyright © 2009–2012 The Dark Reverser et al.".format(__version__) + return encryption +def main(): argv=unicode_argv() - try: - opts, args = getopt.getopt(argv[1:], "hp", ["make-pmlz"]) - except getopt.GetoptError, err: - print err.args[0] - usage() - return 1 - make_pmlz = False - for o, a in opts: - if o == "-h": - usage() - return 0 - elif o == "-p": - make_pmlz = True - elif o == "--make-pmlz": - make_pmlz = True - - if len(args)!=3 and len(args)!=4: - usage() - return 1 - - if len(args)==3: - infile, name, cc = args - if make_pmlz: - outpath = os.path.splitext(infile)[0] + u".pmlz" - else: - outpath = os.path.splitext(infile)[0] + u"_Source" - elif len(args)==4: - infile, outpath, name, cc = args - - print getuser_key(name,cc).encode('hex') - - return decryptBook(infile, outpath, make_pmlz, getuser_key(name,cc)) - + print encryption(argv[1]) + return 0 if __name__ == "__main__": sys.stdout=SafeUnbuffered(sys.stdout) sys.stderr=SafeUnbuffered(sys.stderr) - sys.exit(cli_main()) - + sys.exit(main()) diff --git a/DeDRM_calibre_plugin/DeDRM_plugin/erdr2pml.py b/DeDRM_calibre_plugin/DeDRM_plugin/erdr2pml.py index 4d83368..1dfef42 100644 --- a/DeDRM_calibre_plugin/DeDRM_plugin/erdr2pml.py +++ b/DeDRM_calibre_plugin/DeDRM_plugin/erdr2pml.py @@ -1,797 +1,597 @@ -#! /usr/bin/python -# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab -# For use with Topaz Scripts Version 2.6 - -import sys -import csv -import os -import math -import getopt -from struct import pack -from struct import unpack - - -class DocParser(object): - def __init__(self, flatxml, classlst, fileid, bookDir, gdict, fixedimage): - self.id = os.path.basename(fileid).replace('.dat','') - self.svgcount = 0 - self.docList = flatxml.split('\n') - self.docSize = len(self.docList) - self.classList = {} - self.bookDir = bookDir - self.gdict = gdict - tmpList = classlst.split('\n') - for pclass in tmpList: - if pclass != '': - # remove the leading period from the css name - cname = pclass[1:] - self.classList[cname] = True - self.fixedimage = fixedimage - self.ocrtext = [] - self.link_id = [] - self.link_title = [] - self.link_page = [] - self.link_href = [] - self.link_type = [] - self.dehyphen_rootid = [] - self.paracont_stemid = [] - self.parastems_stemid = [] - - - def getGlyph(self, gid): - result = '' - id='id="gl%d"' % gid - return self.gdict.lookup(id) - - def glyphs_to_image(self, glyphList): - - def extract(path, key): - b = path.find(key) + len(key) - e = path.find(' ',b) - return int(path[b:e]) - - svgDir = os.path.join(self.bookDir,'svg') - - imgDir = os.path.join(self.bookDir,'img') - imgname = self.id + '_%04d.svg' % self.svgcount - imgfile = os.path.join(imgDir,imgname) - - # get glyph information - gxList = self.getData('info.glyph.x',0,-1) - gyList = self.getData('info.glyph.y',0,-1) - gidList = self.getData('info.glyph.glyphID',0,-1) - - gids = [] - maxws = [] - maxhs = [] - xs = [] - ys = [] - gdefs = [] - - # get path defintions, positions, dimensions for each glyph - # that makes up the image, and find min x and min y to reposition origin - minx = -1 - miny = -1 - for j in glyphList: - gid = gidList[j] - gids.append(gid) - - xs.append(gxList[j]) - if minx == -1: minx = gxList[j] - else : minx = min(minx, gxList[j]) - - ys.append(gyList[j]) - if miny == -1: miny = gyList[j] - else : miny = min(miny, gyList[j]) - - path = self.getGlyph(gid) - gdefs.append(path) - - maxws.append(extract(path,'width=')) - maxhs.append(extract(path,'height=')) - - - # change the origin to minx, miny and calc max height and width - maxw = maxws[0] + xs[0] - minx - maxh = maxhs[0] + ys[0] - miny - for j in xrange(0, len(xs)): - xs[j] = xs[j] - minx - ys[j] = ys[j] - miny - maxw = max( maxw, (maxws[j] + xs[j]) ) - maxh = max( maxh, (maxhs[j] + ys[j]) ) - - # open the image file for output - ifile = open(imgfile,'w') - ifile.write('\n') - ifile.write('\n') - ifile.write('\n' % (math.floor(maxw/10), math.floor(maxh/10), maxw, maxh)) - ifile.write('\n') - for j in xrange(0,len(gdefs)): - ifile.write(gdefs[j]) - ifile.write('\n') - for j in xrange(0,len(gids)): - ifile.write('\n' % (gids[j], xs[j], ys[j])) - ifile.write('') - ifile.close() - - return 0 - - - - # return tag at line pos in document - def lineinDoc(self, pos) : - if (pos >= 0) and (pos < self.docSize) : - item = self.docList[pos] - if item.find('=') >= 0: - (name, argres) = item.split('=',1) - else : - name = item - argres = '' - return name, argres - - - # find tag in doc if within pos to end inclusive - def findinDoc(self, tagpath, pos, end) : - result = None - if end == -1 : - end = self.docSize +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +# erdr2pml.py +# Copyright © 2008 The Dark Reverser +# +# Modified 2008–2012 by some_updates, DiapDealer and Apprentice Alf + +# This is a python script. You need a Python interpreter to run it. +# For example, ActiveState Python, which exists for windows. +# Changelog +# +# Based on ereader2html version 0.08 plus some later small fixes +# +# 0.01 - Initial version +# 0.02 - Support more eReader files. Support bold text and links. Fix PML decoder parsing bug. +# 0.03 - Fix incorrect variable usage at one place. +# 0.03b - enhancement by DeBockle (version 259 support) +# Custom version 0.03 - no change to eReader support, only usability changes +# - start of pep-8 indentation (spaces not tab), fix trailing blanks +# - version variable, only one place to change +# - added main routine, now callable as a library/module, +# means tools can add optional support for ereader2html +# - outdir is no longer a mandatory parameter (defaults based on input name if missing) +# - time taken output to stdout +# - Psyco support - reduces runtime by a factor of (over) 3! +# E.g. (~600Kb file) 90 secs down to 24 secs +# - newstyle classes +# - changed map call to list comprehension +# may not work with python 2.3 +# without Psyco this reduces runtime to 90% +# E.g. 90 secs down to 77 secs +# Psyco with map calls takes longer, do not run with map in Psyco JIT! +# - izip calls used instead of zip (if available), further reduction +# in run time (factor of 4.5). +# E.g. (~600Kb file) 90 secs down to 20 secs +# - Python 2.6+ support, avoid DeprecationWarning with sha/sha1 +# 0.04 - Footnote support, PML output, correct charset in html, support more PML tags +# - Feature change, dump out PML file +# - Added supprt for footnote tags. NOTE footnote ids appear to be bad (not usable) +# in some pdb files :-( due to the same id being used multiple times +# - Added correct charset encoding (pml is based on cp1252) +# - Added logging support. +# 0.05 - Improved type 272 support for sidebars, links, chapters, metainfo, etc +# 0.06 - Merge of 0.04 and 0.05. Improved HTML output +# Placed images in subfolder, so that it's possible to just +# drop the book.pml file onto DropBook to make an unencrypted +# copy of the eReader file. +# Using that with Calibre works a lot better than the HTML +# conversion in this code. +# 0.07 - Further Improved type 272 support for sidebars with all earlier fixes +# 0.08 - fixed typos, removed extraneous things +# 0.09 - fixed typos in first_pages to first_page to again support older formats +# 0.10 - minor cleanups +# 0.11 - fixups for using correct xml for footnotes and sidebars for use with Dropbook +# 0.12 - Fix added to prevent lowercasing of image names when the pml code itself uses a different case in the link name. +# 0.13 - change to unbuffered stdout for use with gui front ends +# 0.14 - contributed enhancement to support --make-pmlz switch +# 0.15 - enabled high-ascii to pml character encoding. DropBook now works on Mac. +# 0.16 - convert to use openssl DES (very very fast) or pure python DES if openssl's libcrypto is not available +# 0.17 - added support for pycrypto's DES as well +# 0.18 - on Windows try PyCrypto first and OpenSSL next +# 0.19 - Modify the interface to allow use of import +# 0.20 - modify to allow use inside new interface for calibre plugins +# 0.21 - Support eReader (drm) version 11. +# - Don't reject dictionary format. +# - Ignore sidebars for dictionaries (different format?) +# 0.22 - Unicode and plugin support, different image folders for PMLZ and source +# 0.23 - moved unicode_argv call inside main for Windows DeDRM compatibility + +__version__='0.23' + +import sys, re +import struct, binascii, getopt, zlib, os, os.path, urllib, tempfile, traceback + +if 'calibre' in sys.modules: + inCalibre = True +else: + inCalibre = False + +# Wrap a stream so that output gets flushed immediately +# and also make sure that any unicode strings get +# encoded using "replace" before writing them. +class SafeUnbuffered: + def __init__(self, stream): + self.stream = stream + self.encoding = stream.encoding + if self.encoding == None: + self.encoding = "utf-8" + def write(self, data): + if isinstance(data,unicode): + data = data.encode(self.encoding,"replace") + self.stream.write(data) + self.stream.flush() + def __getattr__(self, attr): + return getattr(self.stream, attr) + +iswindows = sys.platform.startswith('win') +isosx = sys.platform.startswith('darwin') + +def unicode_argv(): + if iswindows: + # Uses shell32.GetCommandLineArgvW to get sys.argv as a list of Unicode + # strings. + + # Versions 2.x of Python don't support Unicode in sys.argv on + # Windows, with the underlying Windows API instead replacing multi-byte + # characters with '?'. + + + from ctypes import POINTER, byref, cdll, c_int, windll + from ctypes.wintypes import LPCWSTR, LPWSTR + + GetCommandLineW = cdll.kernel32.GetCommandLineW + GetCommandLineW.argtypes = [] + GetCommandLineW.restype = LPCWSTR + + CommandLineToArgvW = windll.shell32.CommandLineToArgvW + CommandLineToArgvW.argtypes = [LPCWSTR, POINTER(c_int)] + CommandLineToArgvW.restype = POINTER(LPWSTR) + + cmd = GetCommandLineW() + argc = c_int(0) + argv = CommandLineToArgvW(cmd, byref(argc)) + if argc.value > 0: + # Remove Python executable and commands if present + start = argc.value - len(sys.argv) + return [argv[i] for i in + xrange(start, argc.value)] + # if we don't have any arguments at all, just pass back script name + # this should never happen + return [u"mobidedrm.py"] + else: + argvencoding = sys.stdin.encoding + if argvencoding == None: + argvencoding = "utf-8" + return [arg if (type(arg) == unicode) else unicode(arg,argvencoding) for arg in sys.argv] + +Des = None +if iswindows: + # first try with pycrypto + if inCalibre: + from calibre_plugins.dedrm import pycrypto_des + else: + import pycrypto_des + Des = pycrypto_des.load_pycrypto() + if Des == None: + # they try with openssl + if inCalibre: + from calibre_plugins.dedrm import openssl_des else: - end = min(self.docSize, end) - foundat = -1 - for j in xrange(pos, end): - item = self.docList[j] - if item.find('=') >= 0: - (name, argres) = item.split('=',1) - else : - name = item - argres = '' - if name.endswith(tagpath) : - result = argres - foundat = j - break - return foundat, result - - - # return list of start positions for the tagpath - def posinDoc(self, tagpath): - startpos = [] - pos = 0 - res = "" - while res != None : - (foundpos, res) = self.findinDoc(tagpath, pos, -1) - if res != None : - startpos.append(foundpos) - pos = foundpos + 1 - return startpos - - - # returns a vector of integers for the tagpath - def getData(self, tagpath, pos, end): - argres=[] - (foundat, argt) = self.findinDoc(tagpath, pos, end) - if (argt != None) and (len(argt) > 0) : - argList = argt.split('|') - argres = [ int(strval) for strval in argList] - return argres - - - # get the class - def getClass(self, pclass): - nclass = pclass - - # class names are an issue given topaz may start them with numerals (not allowed), - # use a mix of cases (which cause some browsers problems), and actually - # attach numbers after "_reclustered*" to the end to deal classeses that inherit - # from a base class (but then not actually provide all of these _reclustereed - # classes in the stylesheet! - - # so we clean this up by lowercasing, prepend 'cl-', and getting any baseclass - # that exists in the stylesheet first, and then adding this specific class - # after - - # also some class names have spaces in them so need to convert to dashes - if nclass != None : - nclass = nclass.replace(' ','-') - classres = '' - nclass = nclass.lower() - nclass = 'cl-' + nclass - baseclass = '' - # graphic is the base class for captions - if nclass.find('cl-cap-') >=0 : - classres = 'graphic' + ' ' - else : - # strip to find baseclass - p = nclass.find('_') - if p > 0 : - baseclass = nclass[0:p] - if baseclass in self.classList: - classres += baseclass + ' ' - classres += nclass - nclass = classres - return nclass - - - # develop a sorted description of the starting positions of - # groups and regions on the page, as well as the page type - def PageDescription(self): - - def compare(x, y): - (xtype, xval) = x - (ytype, yval) = y - if xval > yval: - return 1 - if xval == yval: - return 0 - return -1 - - result = [] - (pos, pagetype) = self.findinDoc('page.type',0,-1) - - groupList = self.posinDoc('page.group') - groupregionList = self.posinDoc('page.group.region') - pageregionList = self.posinDoc('page.region') - # integrate into one list - for j in groupList: - result.append(('grpbeg',j)) - for j in groupregionList: - result.append(('gregion',j)) - for j in pageregionList: - result.append(('pregion',j)) - result.sort(compare) - - # insert group end and page end indicators - inGroup = False - j = 0 - while True: - if j == len(result): break - rtype = result[j][0] - rval = result[j][1] - if not inGroup and (rtype == 'grpbeg') : - inGroup = True - j = j + 1 - elif inGroup and (rtype in ('grpbeg', 'pregion')): - result.insert(j,('grpend',rval)) - inGroup = False + import openssl_des + Des = openssl_des.load_libcrypto() +else: + # first try with openssl + if inCalibre: + from calibre_plugins.dedrm import openssl_des + else: + import openssl_des + Des = openssl_des.load_libcrypto() + if Des == None: + # then try with pycrypto + if inCalibre: + from calibre_plugins.dedrm import pycrypto_des + else: + import pycrypto_des + Des = pycrypto_des.load_pycrypto() + +# if that did not work then use pure python implementation +# of DES and try to speed it up with Psycho +if Des == None: + if inCalibre: + from calibre_plugins.dedrm import python_des + else: + import python_des + Des = python_des.Des + # Import Psyco if available + try: + # http://psyco.sourceforge.net + import psyco + psyco.full() + except ImportError: + pass + +try: + from hashlib import sha1 +except ImportError: + # older Python release + import sha + sha1 = lambda s: sha.new(s) + +import cgi +import logging + +logging.basicConfig() +#logging.basicConfig(level=logging.DEBUG) + + +class Sectionizer(object): + bkType = "Book" + + def __init__(self, filename, ident): + self.contents = file(filename, 'rb').read() + self.header = self.contents[0:72] + self.num_sections, = struct.unpack('>H', self.contents[76:78]) + # Dictionary or normal content (TODO: Not hard-coded) + if self.header[0x3C:0x3C+8] != ident: + if self.header[0x3C:0x3C+8] == "PDctPPrs": + self.bkType = "Dict" else: - j = j + 1 - if inGroup: - result.append(('grpend',-1)) - result.append(('pageend', -1)) - return pagetype, result - - - - # build a description of the paragraph - def getParaDescription(self, start, end, regtype): - - result = [] - - # paragraph - (pos, pclass) = self.findinDoc('paragraph.class',start,end) - - pclass = self.getClass(pclass) - - # if paragraph uses extratokens (extra glyphs) then make it fixed - (pos, extraglyphs) = self.findinDoc('paragraph.extratokens',start,end) - - # build up a description of the paragraph in result and return it - # first check for the basic - all words paragraph - (pos, sfirst) = self.findinDoc('paragraph.firstWord',start,end) - (pos, slast) = self.findinDoc('paragraph.lastWord',start,end) - if (sfirst != None) and (slast != None) : - first = int(sfirst) - last = int(slast) - - makeImage = (regtype == 'vertical') or (regtype == 'table') - makeImage = makeImage or (extraglyphs != None) - if self.fixedimage: - makeImage = makeImage or (regtype == 'fixed') - - if (pclass != None): - makeImage = makeImage or (pclass.find('.inverted') >= 0) - if self.fixedimage : - makeImage = makeImage or (pclass.find('cl-f-') >= 0) - - # before creating an image make sure glyph info exists - gidList = self.getData('info.glyph.glyphID',0,-1) - - makeImage = makeImage & (len(gidList) > 0) - - if not makeImage : - # standard all word paragraph - for wordnum in xrange(first, last): - result.append(('ocr', wordnum)) - return pclass, result - - # convert paragraph to svg image - # translate first and last word into first and last glyphs - # and generate inline image and include it - glyphList = [] - firstglyphList = self.getData('word.firstGlyph',0,-1) - gidList = self.getData('info.glyph.glyphID',0,-1) - firstGlyph = firstglyphList[first] - if last < len(firstglyphList): - lastGlyph = firstglyphList[last] - else : - lastGlyph = len(gidList) - - # handle case of white sapce paragraphs with no actual glyphs in them - # by reverting to text based paragraph - if firstGlyph >= lastGlyph: - # revert to standard text based paragraph - for wordnum in xrange(first, last): - result.append(('ocr', wordnum)) - return pclass, result - - for glyphnum in xrange(firstGlyph, lastGlyph): - glyphList.append(glyphnum) - # include any extratokens if they exist - (pos, sfg) = self.findinDoc('extratokens.firstGlyph',start,end) - (pos, slg) = self.findinDoc('extratokens.lastGlyph',start,end) - if (sfg != None) and (slg != None): - for glyphnum in xrange(int(sfg), int(slg)): - glyphList.append(glyphnum) - num = self.svgcount - self.glyphs_to_image(glyphList) - self.svgcount += 1 - result.append(('svg', num)) - return pclass, result - - # this type of paragraph may be made up of multiple spans, inline - # word monograms (images), and words with semantic meaning, - # plus glyphs used to form starting letter of first word - - # need to parse this type line by line - line = start + 1 - word_class = '' - - # if end is -1 then we must search to end of document - if end == -1 : - end = self.docSize - - # seems some xml has last* coming before first* so we have to - # handle any order - sp_first = -1 - sp_last = -1 - - gl_first = -1 - gl_last = -1 - - ws_first = -1 - ws_last = -1 - - word_class = '' - - word_semantic_type = '' - - while (line < end) : - - (name, argres) = self.lineinDoc(line) - - if name.endswith('span.firstWord') : - sp_first = int(argres) - - elif name.endswith('span.lastWord') : - sp_last = int(argres) - - elif name.endswith('word.firstGlyph') : - gl_first = int(argres) - - elif name.endswith('word.lastGlyph') : - gl_last = int(argres) - - elif name.endswith('word_semantic.firstWord'): - ws_first = int(argres) - - elif name.endswith('word_semantic.lastWord'): - ws_last = int(argres) - - elif name.endswith('word.class'): - # we only handle spaceafter word class - try: - (cname, space) = argres.split('-',1) - if space == '' : space = '0' - if (cname == 'spaceafter') and (int(space) > 0) : - word_class = 'sa' - except: - pass - - elif name.endswith('word.img.src'): - result.append(('img' + word_class, int(argres))) - word_class = '' - - elif name.endswith('region.img.src'): - result.append(('img' + word_class, int(argres))) - - if (sp_first != -1) and (sp_last != -1): - for wordnum in xrange(sp_first, sp_last): - result.append(('ocr', wordnum)) - sp_first = -1 - sp_last = -1 - - if (gl_first != -1) and (gl_last != -1): - glyphList = [] - for glyphnum in xrange(gl_first, gl_last): - glyphList.append(glyphnum) - num = self.svgcount - self.glyphs_to_image(glyphList) - self.svgcount += 1 - result.append(('svg', num)) - gl_first = -1 - gl_last = -1 - - if (ws_first != -1) and (ws_last != -1): - for wordnum in xrange(ws_first, ws_last): - result.append(('ocr', wordnum)) - ws_first = -1 - ws_last = -1 - - line += 1 - - return pclass, result - - - def buildParagraph(self, pclass, pdesc, type, regtype) : - parares = '' - sep ='' - - classres = '' - if pclass : - classres = ' class="' + pclass + '"' - - br_lb = (regtype == 'fixed') or (regtype == 'chapterheading') or (regtype == 'vertical') - - handle_links = len(self.link_id) > 0 - - if (type == 'full') or (type == 'begin') : - parares += '' - - if (type == 'end'): - parares += ' ' - - lstart = len(parares) - - cnt = len(pdesc) - - for j in xrange( 0, cnt) : - - (wtype, num) = pdesc[j] - - if wtype == 'ocr' : - word = self.ocrtext[num] - sep = ' ' - - if handle_links: - link = self.link_id[num] - if (link > 0): - linktype = self.link_type[link-1] - title = self.link_title[link-1] - if (title == "") or (parares.rfind(title) < 0): - title=parares[lstart:] - if linktype == 'external' : - linkhref = self.link_href[link-1] - linkhtml = '' % linkhref - else : - if len(self.link_page) >= link : - ptarget = self.link_page[link-1] - 1 - linkhtml = '' % ptarget - else : - # just link to the current page - linkhtml = '' - linkhtml += title + '' - pos = parares.rfind(title) - if pos >= 0: - parares = parares[0:pos] + linkhtml + parares[pos+len(title):] - else : - parares += linkhtml - lstart = len(parares) - if word == '_link_' : word = '' - elif (link < 0) : - if word == '_link_' : word = '' - - if word == '_lb_': - if ((num-1) in self.dehyphen_rootid ) or handle_links: - word = '' - sep = '' - elif br_lb : - word = '
\n' - sep = '' - else : - word = '\n' - sep = '' - - if num in self.dehyphen_rootid : - word = word[0:-1] - sep = '' - - parares += word + sep - - elif wtype == 'img' : - sep = '' - parares += '' % num - parares += sep - - elif wtype == 'imgsa' : - sep = ' ' - parares += '' % num - parares += sep - - elif wtype == 'svg' : - sep = '' - parares += '' % num - parares += sep - - if len(sep) > 0 : parares = parares[0:-1] - if (type == 'full') or (type == 'end') : - parares += '

' - return parares - - - def buildTOCEntry(self, pdesc) : - parares = '' - sep ='' - tocentry = '' - handle_links = len(self.link_id) > 0 - - lstart = 0 - - cnt = len(pdesc) - for j in xrange( 0, cnt) : - - (wtype, num) = pdesc[j] - - if wtype == 'ocr' : - word = self.ocrtext[num] - sep = ' ' - - if handle_links: - link = self.link_id[num] - if (link > 0): - linktype = self.link_type[link-1] - title = self.link_title[link-1] - title = title.rstrip('. ') - alt_title = parares[lstart:] - alt_title = alt_title.strip() - # now strip off the actual printed page number - alt_title = alt_title.rstrip('01234567890ivxldIVXLD-.') - alt_title = alt_title.rstrip('. ') - # skip over any external links - can't have them in a books toc - if linktype == 'external' : - title = '' - alt_title = '' - linkpage = '' - else : - if len(self.link_page) >= link : - ptarget = self.link_page[link-1] - 1 - linkpage = '%04d' % ptarget - else : - # just link to the current page - linkpage = self.id[4:] - if len(alt_title) >= len(title): - title = alt_title - if title != '' and linkpage != '': - tocentry += title + '|' + linkpage + '\n' - lstart = len(parares) - if word == '_link_' : word = '' - elif (link < 0) : - if word == '_link_' : word = '' - - if word == '_lb_': - word = '' - sep = '' - - if num in self.dehyphen_rootid : - word = word[0:-1] - sep = '' - - parares += word + sep - - else : - continue - - return tocentry - - - - - # walk the document tree collecting the information needed - # to build an html page using the ocrText - - def process(self): - - tocinfo = '' - hlst = [] - - # get the ocr text - (pos, argres) = self.findinDoc('info.word.ocrText',0,-1) - if argres : self.ocrtext = argres.split('|') - - # get information to dehyphenate the text - self.dehyphen_rootid = self.getData('info.dehyphen.rootID',0,-1) - - # determine if first paragraph is continued from previous page - (pos, self.parastems_stemid) = self.findinDoc('info.paraStems.stemID',0,-1) - first_para_continued = (self.parastems_stemid != None) - - # determine if last paragraph is continued onto the next page - (pos, self.paracont_stemid) = self.findinDoc('info.paraCont.stemID',0,-1) - last_para_continued = (self.paracont_stemid != None) - - # collect link ids - self.link_id = self.getData('info.word.link_id',0,-1) - - # collect link destination page numbers - self.link_page = self.getData('info.links.page',0,-1) - - # collect link types (container versus external) - (pos, argres) = self.findinDoc('info.links.type',0,-1) - if argres : self.link_type = argres.split('|') - - # collect link destinations - (pos, argres) = self.findinDoc('info.links.href',0,-1) - if argres : self.link_href = argres.split('|') - - # collect link titles - (pos, argres) = self.findinDoc('info.links.title',0,-1) - if argres : - self.link_title = argres.split('|') + raise ValueError('Invalid file format') + self.sections = [] + for i in xrange(self.num_sections): + offset, a1,a2,a3,a4 = struct.unpack('>LBBBB', self.contents[78+i*8:78+i*8+8]) + flags, val = a1, a2<<16|a3<<8|a4 + self.sections.append( (offset, flags, val) ) + def loadSection(self, section): + if section + 1 == self.num_sections: + end_off = len(self.contents) else: - self.link_title.append('') - - # get a descriptions of the starting points of the regions - # and groups on the page - (pagetype, pageDesc) = self.PageDescription() - regcnt = len(pageDesc) - 1 - - anchorSet = False - breakSet = False - inGroup = False - - # process each region on the page and convert what you can to html - - for j in xrange(regcnt): - - (etype, start) = pageDesc[j] - (ntype, end) = pageDesc[j+1] + end_off = self.sections[section + 1][0] + off = self.sections[section][0] + return self.contents[off:end_off] + +# cleanup unicode filenames +# borrowed from calibre from calibre/src/calibre/__init__.py +# added in removal of control (<32) chars +# and removal of . at start and end +# and with some (heavily edited) code from Paul Durrant's kindlenamer.py +def sanitizeFileName(name): + # substitute filename unfriendly characters + name = name.replace(u"<",u"[").replace(u">",u"]").replace(u" : ",u" – ").replace(u": ",u" – ").replace(u":",u"—").replace(u"/",u"_").replace(u"\\",u"_").replace(u"|",u"_").replace(u"\"",u"\'") + # delete control characters + name = u"".join(char for char in name if ord(char)>=32) + # white space to single space, delete leading and trailing while space + name = re.sub(ur"\s", u" ", name).strip() + # remove leading dots + while len(name)>0 and name[0] == u".": + name = name[1:] + # remove trailing dots (Windows doesn't like them) + if name.endswith(u'.'): + name = name[:-1] + return name + +def fixKey(key): + def fixByte(b): + return b ^ ((b ^ (b<<1) ^ (b<<2) ^ (b<<3) ^ (b<<4) ^ (b<<5) ^ (b<<6) ^ (b<<7) ^ 0x80) & 0x80) + return "".join([chr(fixByte(ord(a))) for a in key]) + +def deXOR(text, sp, table): + r='' + j = sp + for i in xrange(len(text)): + r += chr(ord(table[j]) ^ ord(text[i])) + j = j + 1 + if j == len(table): + j = 0 + return r + +class EreaderProcessor(object): + def __init__(self, sect, user_key): + self.section_reader = sect.loadSection + data = self.section_reader(0) + version, = struct.unpack('>H', data[0:2]) + self.version = version + logging.info('eReader file format version %s', version) + if version != 272 and version != 260 and version != 259: + raise ValueError('incorrect eReader version %d (error 1)' % version) + data = self.section_reader(1) + self.data = data + des = Des(fixKey(data[0:8])) + cookie_shuf, cookie_size = struct.unpack('>LL', des.decrypt(data[-8:])) + if cookie_shuf < 3 or cookie_shuf > 0x14 or cookie_size < 0xf0 or cookie_size > 0x200: + raise ValueError('incorrect eReader version (error 2)') + input = des.decrypt(data[-cookie_size:]) + def unshuff(data, shuf): + r = [''] * len(data) + j = 0 + for i in xrange(len(data)): + j = (j + shuf) % len(data) + r[j] = data[i] + assert len("".join(r)) == len(data) + return "".join(r) + r = unshuff(input[0:-8], cookie_shuf) + + drm_sub_version = struct.unpack('>H', r[0:2])[0] + self.num_text_pages = struct.unpack('>H', r[2:4])[0] - 1 + self.num_image_pages = struct.unpack('>H', r[26:26+2])[0] + self.first_image_page = struct.unpack('>H', r[24:24+2])[0] + # Default values + self.num_footnote_pages = 0 + self.num_sidebar_pages = 0 + self.first_footnote_page = -1 + self.first_sidebar_page = -1 + if self.version == 272: + self.num_footnote_pages = struct.unpack('>H', r[46:46+2])[0] + self.first_footnote_page = struct.unpack('>H', r[44:44+2])[0] + if (sect.bkType == "Book"): + self.num_sidebar_pages = struct.unpack('>H', r[38:38+2])[0] + self.first_sidebar_page = struct.unpack('>H', r[36:36+2])[0] + # self.num_bookinfo_pages = struct.unpack('>H', r[34:34+2])[0] + # self.first_bookinfo_page = struct.unpack('>H', r[32:32+2])[0] + # self.num_chapter_pages = struct.unpack('>H', r[22:22+2])[0] + # self.first_chapter_page = struct.unpack('>H', r[20:20+2])[0] + # self.num_link_pages = struct.unpack('>H', r[30:30+2])[0] + # self.first_link_page = struct.unpack('>H', r[28:28+2])[0] + # self.num_xtextsize_pages = struct.unpack('>H', r[54:54+2])[0] + # self.first_xtextsize_page = struct.unpack('>H', r[52:52+2])[0] + + # **before** data record 1 was decrypted and unshuffled, it contained data + # to create an XOR table and which is used to fix footnote record 0, link records, chapter records, etc + self.xortable_offset = struct.unpack('>H', r[40:40+2])[0] + self.xortable_size = struct.unpack('>H', r[42:42+2])[0] + self.xortable = self.data[self.xortable_offset:self.xortable_offset + self.xortable_size] + else: + # Nothing needs to be done + pass + # self.num_bookinfo_pages = 0 + # self.num_chapter_pages = 0 + # self.num_link_pages = 0 + # self.num_xtextsize_pages = 0 + # self.first_bookinfo_page = -1 + # self.first_chapter_page = -1 + # self.first_link_page = -1 + # self.first_xtextsize_page = -1 + + logging.debug('self.num_text_pages %d', self.num_text_pages) + logging.debug('self.num_footnote_pages %d, self.first_footnote_page %d', self.num_footnote_pages , self.first_footnote_page) + logging.debug('self.num_sidebar_pages %d, self.first_sidebar_page %d', self.num_sidebar_pages , self.first_sidebar_page) + self.flags = struct.unpack('>L', r[4:8])[0] + reqd_flags = (1<<9) | (1<<7) | (1<<10) + if (self.flags & reqd_flags) != reqd_flags: + print "Flags: 0x%X" % self.flags + raise ValueError('incompatible eReader file') + des = Des(fixKey(user_key)) + if version == 259: + if drm_sub_version != 7: + raise ValueError('incorrect eReader version %d (error 3)' % drm_sub_version) + encrypted_key_sha = r[44:44+20] + encrypted_key = r[64:64+8] + elif version == 260: + if drm_sub_version != 13 and drm_sub_version != 11: + raise ValueError('incorrect eReader version %d (error 3)' % drm_sub_version) + if drm_sub_version == 13: + encrypted_key = r[44:44+8] + encrypted_key_sha = r[52:52+20] + else: + encrypted_key = r[64:64+8] + encrypted_key_sha = r[44:44+20] + elif version == 272: + encrypted_key = r[172:172+8] + encrypted_key_sha = r[56:56+20] + self.content_key = des.decrypt(encrypted_key) + if sha1(self.content_key).digest() != encrypted_key_sha: + raise ValueError('Incorrect Name and/or Credit Card') + + def getNumImages(self): + return self.num_image_pages + + def getImage(self, i): + sect = self.section_reader(self.first_image_page + i) + name = sect[4:4+32].strip('\0') + data = sect[62:] + return sanitizeFileName(unicode(name,'windows-1252')), data + + + # def getChapterNamePMLOffsetData(self): + # cv = '' + # if self.num_chapter_pages > 0: + # for i in xrange(self.num_chapter_pages): + # chaps = self.section_reader(self.first_chapter_page + i) + # j = i % self.xortable_size + # offname = deXOR(chaps, j, self.xortable) + # offset = struct.unpack('>L', offname[0:4])[0] + # name = offname[4:].strip('\0') + # cv += '%d|%s\n' % (offset, name) + # return cv + + # def getLinkNamePMLOffsetData(self): + # lv = '' + # if self.num_link_pages > 0: + # for i in xrange(self.num_link_pages): + # links = self.section_reader(self.first_link_page + i) + # j = i % self.xortable_size + # offname = deXOR(links, j, self.xortable) + # offset = struct.unpack('>L', offname[0:4])[0] + # name = offname[4:].strip('\0') + # lv += '%d|%s\n' % (offset, name) + # return lv + + # def getExpandedTextSizesData(self): + # ts = '' + # if self.num_xtextsize_pages > 0: + # tsize = deXOR(self.section_reader(self.first_xtextsize_page), 0, self.xortable) + # for i in xrange(self.num_text_pages): + # xsize = struct.unpack('>H', tsize[0:2])[0] + # ts += "%d\n" % xsize + # tsize = tsize[2:] + # return ts + + # def getBookInfo(self): + # bkinfo = '' + # if self.num_bookinfo_pages > 0: + # info = self.section_reader(self.first_bookinfo_page) + # bkinfo = deXOR(info, 0, self.xortable) + # bkinfo = bkinfo.replace('\0','|') + # bkinfo += '\n' + # return bkinfo + + def getText(self): + des = Des(fixKey(self.content_key)) + r = '' + for i in xrange(self.num_text_pages): + logging.debug('get page %d', i) + r += zlib.decompress(des.decrypt(self.section_reader(1 + i))) + + # now handle footnotes pages + if self.num_footnote_pages > 0: + r += '\n' + # the record 0 of the footnote section must pass through the Xor Table to make it useful + sect = self.section_reader(self.first_footnote_page) + fnote_ids = deXOR(sect, 0, self.xortable) + # the remaining records of the footnote sections need to be decoded with the content_key and zlib inflated + des = Des(fixKey(self.content_key)) + for i in xrange(1,self.num_footnote_pages): + logging.debug('get footnotepage %d', i) + id_len = ord(fnote_ids[2]) + id = fnote_ids[3:3+id_len] + fmarker = '\n' % id + fmarker += zlib.decompress(des.decrypt(self.section_reader(self.first_footnote_page + i))) + fmarker += '\n\n' + r += fmarker + fnote_ids = fnote_ids[id_len+4:] + + # TODO: Handle dictionary index (?) pages - which are also marked as + # sidebar_pages (?). For now dictionary sidebars are ignored + # For dictionaries - record 0 is null terminated strings, followed by + # blocks of around 62000 bytes and a final block. Not sure of the + # encoding + + # now handle sidebar pages + if self.num_sidebar_pages > 0: + r += '\n' + # the record 0 of the sidebar section must pass through the Xor Table to make it useful + sect = self.section_reader(self.first_sidebar_page) + sbar_ids = deXOR(sect, 0, self.xortable) + # the remaining records of the sidebar sections need to be decoded with the content_key and zlib inflated + des = Des(fixKey(self.content_key)) + for i in xrange(1,self.num_sidebar_pages): + id_len = ord(sbar_ids[2]) + id = sbar_ids[3:3+id_len] + smarker = '\n' % id + smarker += zlib.decompress(des.decrypt(self.section_reader(self.first_sidebar_page + i))) + smarker += '\n\n' + r += smarker + sbar_ids = sbar_ids[id_len+4:] + + return r + +def cleanPML(pml): + # Convert special characters to proper PML code. High ASCII start at (\x80, \a128) and go up to (\xff, \a255) + pml2 = pml + for k in xrange(128,256): + badChar = chr(k) + pml2 = pml2.replace(badChar, '\\a%03d' % k) + return pml2 + +def decryptBook(infile, outpath, make_pmlz, user_key): + bookname = os.path.splitext(os.path.basename(infile))[0] + if make_pmlz: + # outpath is actually pmlz name + pmlzname = outpath + outdir = tempfile.mkdtemp() + imagedirpath = os.path.join(outdir,u"images") + else: + pmlzname = None + outdir = outpath + imagedirpath = os.path.join(outdir,bookname + u"_img") + + try: + if not os.path.exists(outdir): + os.makedirs(outdir) + print u"Decoding File" + sect = Sectionizer(infile, 'PNRdPPrs') + er = EreaderProcessor(sect, user_key) + + if er.getNumImages() > 0: + print u"Extracting images" + if not os.path.exists(imagedirpath): + os.makedirs(imagedirpath) + for i in xrange(er.getNumImages()): + name, contents = er.getImage(i) + file(os.path.join(imagedirpath, name), 'wb').write(contents) + + print u"Extracting pml" + pml_string = er.getText() + pmlfilename = bookname + ".pml" + file(os.path.join(outdir, pmlfilename),'wb').write(cleanPML(pml_string)) + if pmlzname is not None: + import zipfile + import shutil + print u"Creating PMLZ file {0}".format(os.path.basename(pmlzname)) + myZipFile = zipfile.ZipFile(pmlzname,'w',zipfile.ZIP_STORED, False) + list = os.listdir(outdir) + for filename in list: + localname = filename + filePath = os.path.join(outdir,filename) + if os.path.isfile(filePath): + myZipFile.write(filePath, localname) + elif os.path.isdir(filePath): + imageList = os.listdir(filePath) + localimgdir = os.path.basename(filePath) + for image in imageList: + localname = os.path.join(localimgdir,image) + imagePath = os.path.join(filePath,image) + if os.path.isfile(imagePath): + myZipFile.write(imagePath, localname) + myZipFile.close() + # remove temporary directory + shutil.rmtree(outdir, True) + print u"Output is {0}".format(pmlzname) + else : + print u"Output is in {0}".format(outdir) + print "done" + except ValueError, e: + print u"Error: {0}".format(e) + traceback.print_exc() + return 1 + return 0 + + +def usage(): + print u"Converts DRMed eReader books to PML Source" + print u"Usage:" + print u" erdr2pml [options] infile.pdb [outpath] \"your name\" credit_card_number" + print u" " + print u"Options: " + print u" -h prints this message" + print u" -p create PMLZ instead of source folder" + print u" --make-pmlz create PMLZ instead of source folder" + print u" " + print u"Note:" + print u" if outpath is ommitted, creates source in 'infile_Source' folder" + print u" if outpath is ommitted and pmlz option, creates PMLZ 'infile.pmlz'" + print u" if source folder created, images are in infile_img folder" + print u" if pmlz file created, images are in images folder" + print u" It's enough to enter the last 8 digits of the credit card number" + return + +def getuser_key(name,cc): + newname = "".join(c for c in name.lower() if c >= 'a' and c <= 'z' or c >= '0' and c <= '9') + cc = cc.replace(" ","") + return struct.pack('>LL', binascii.crc32(newname) & 0xffffffff,binascii.crc32(cc[-8:])& 0xffffffff) + +def cli_main(): + print u"eRdr2Pml v{0}. Copyright © 2009–2012 The Dark Reverser et al.".format(__version__) + + argv=unicode_argv() + try: + opts, args = getopt.getopt(argv[1:], "hp", ["make-pmlz"]) + except getopt.GetoptError, err: + print err.args[0] + usage() + return 1 + make_pmlz = False + for o, a in opts: + if o == "-h": + usage() + return 0 + elif o == "-p": + make_pmlz = True + elif o == "--make-pmlz": + make_pmlz = True + + if len(args)!=3 and len(args)!=4: + usage() + return 1 + + if len(args)==3: + infile, name, cc = args + if make_pmlz: + outpath = os.path.splitext(infile)[0] + u".pmlz" + else: + outpath = os.path.splitext(infile)[0] + u"_Source" + elif len(args)==4: + infile, outpath, name, cc = args + print getuser_key(name,cc).encode('hex') - # set anchor for link target on this page - if not anchorSet and not first_para_continued: - hlst.append('\n') - anchorSet = True + return decryptBook(infile, outpath, make_pmlz, getuser_key(name,cc)) - # handle groups of graphics with text captions - if (etype == 'grpbeg'): - (pos, grptype) = self.findinDoc('group.type', start, end) - if grptype != None: - if grptype == 'graphic': - gcstr = ' class="' + grptype + '"' - hlst.append('') - inGroup = True - elif (etype == 'grpend'): - if inGroup: - hlst.append('\n') - inGroup = False +if __name__ == "__main__": + sys.stdout=SafeUnbuffered(sys.stdout) + sys.stderr=SafeUnbuffered(sys.stderr) + sys.exit(cli_main()) - else: - (pos, regtype) = self.findinDoc('region.type',start,end) - - if regtype == 'graphic' : - (pos, simgsrc) = self.findinDoc('img.src',start,end) - if simgsrc: - if inGroup: - hlst.append('' % int(simgsrc)) - else: - hlst.append('
' % int(simgsrc)) - - elif regtype == 'chapterheading' : - (pclass, pdesc) = self.getParaDescription(start,end, regtype) - if not breakSet: - hlst.append('
 
\n') - breakSet = True - tag = 'h1' - if pclass and (len(pclass) >= 7): - if pclass[3:7] == 'ch1-' : tag = 'h1' - if pclass[3:7] == 'ch2-' : tag = 'h2' - if pclass[3:7] == 'ch3-' : tag = 'h3' - hlst.append('<' + tag + ' class="' + pclass + '">') - else: - hlst.append('<' + tag + '>') - hlst.append(self.buildParagraph(pclass, pdesc, 'middle', regtype)) - hlst.append('') - - elif (regtype == 'text') or (regtype == 'fixed') or (regtype == 'insert') or (regtype == 'listitem'): - ptype = 'full' - # check to see if this is a continution from the previous page - if first_para_continued : - ptype = 'end' - first_para_continued = False - (pclass, pdesc) = self.getParaDescription(start,end, regtype) - if pclass and (len(pclass) >= 6) and (ptype == 'full'): - tag = 'p' - if pclass[3:6] == 'h1-' : tag = 'h4' - if pclass[3:6] == 'h2-' : tag = 'h5' - if pclass[3:6] == 'h3-' : tag = 'h6' - hlst.append('<' + tag + ' class="' + pclass + '">') - hlst.append(self.buildParagraph(pclass, pdesc, 'middle', regtype)) - hlst.append('') - else : - hlst.append(self.buildParagraph(pclass, pdesc, ptype, regtype)) - - elif (regtype == 'tocentry') : - ptype = 'full' - if first_para_continued : - ptype = 'end' - first_para_continued = False - (pclass, pdesc) = self.getParaDescription(start,end, regtype) - tocinfo += self.buildTOCEntry(pdesc) - hlst.append(self.buildParagraph(pclass, pdesc, ptype, regtype)) - - elif (regtype == 'vertical') or (regtype == 'table') : - ptype = 'full' - if inGroup: - ptype = 'middle' - if first_para_continued : - ptype = 'end' - first_para_continued = False - (pclass, pdesc) = self.getParaDescription(start, end, regtype) - hlst.append(self.buildParagraph(pclass, pdesc, ptype, regtype)) - - - elif (regtype == 'synth_fcvr.center'): - (pos, simgsrc) = self.findinDoc('img.src',start,end) - if simgsrc: - hlst.append('
' % int(simgsrc)) - - else : - print ' Making region type', regtype, - (pos, temp) = self.findinDoc('paragraph',start,end) - (pos2, temp) = self.findinDoc('span',start,end) - if pos != -1 or pos2 != -1: - print ' a "text" region' - orig_regtype = regtype - regtype = 'fixed' - ptype = 'full' - # check to see if this is a continution from the previous page - if first_para_continued : - ptype = 'end' - first_para_continued = False - (pclass, pdesc) = self.getParaDescription(start,end, regtype) - if not pclass: - if orig_regtype.endswith('.right') : pclass = 'cl-right' - elif orig_regtype.endswith('.center') : pclass = 'cl-center' - elif orig_regtype.endswith('.left') : pclass = 'cl-left' - elif orig_regtype.endswith('.justify') : pclass = 'cl-justify' - if pclass and (ptype == 'full') and (len(pclass) >= 6): - tag = 'p' - if pclass[3:6] == 'h1-' : tag = 'h4' - if pclass[3:6] == 'h2-' : tag = 'h5' - if pclass[3:6] == 'h3-' : tag = 'h6' - hlst.append('<' + tag + ' class="' + pclass + '">') - hlst.append(self.buildParagraph(pclass, pdesc, 'middle', regtype)) - hlst.append('') - else : - hlst.append(self.buildParagraph(pclass, pdesc, ptype, regtype)) - else : - print ' a "graphic" region' - (pos, simgsrc) = self.findinDoc('img.src',start,end) - if simgsrc: - hlst.append('
' % int(simgsrc)) - - - htmlpage = "".join(hlst) - if last_para_continued : - if htmlpage[-4:] == '

': - htmlpage = htmlpage[0:-4] - last_para_continued = False - - return htmlpage, tocinfo - - -def convert2HTML(flatxml, classlst, fileid, bookDir, gdict, fixedimage): - # create a document parser - dp = DocParser(flatxml, classlst, fileid, bookDir, gdict, fixedimage) - htmlpage, tocinfo = dp.process() - return htmlpage, tocinfo diff --git a/DeDRM_calibre_plugin/DeDRM_plugin/flatxml2html.py b/DeDRM_calibre_plugin/DeDRM_plugin/flatxml2html.py index 4dfd6c7..991591b 100644 --- a/DeDRM_calibre_plugin/DeDRM_plugin/flatxml2html.py +++ b/DeDRM_calibre_plugin/DeDRM_plugin/flatxml2html.py @@ -1,63 +1,127 @@ #! /usr/bin/python # vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab +# For use with Topaz Scripts Version 2.6 import sys import csv import os +import math import getopt from struct import pack from struct import unpack -class PParser(object): - def __init__(self, gd, flatxml, meta_array): - self.gd = gd - self.flatdoc = flatxml.split('\n') - self.docSize = len(self.flatdoc) - self.temp = [] - - self.ph = -1 - self.pw = -1 - startpos = self.posinDoc('page.h') or self.posinDoc('book.h') - for p in startpos: - (name, argres) = self.lineinDoc(p) - self.ph = max(self.ph, int(argres)) - startpos = self.posinDoc('page.w') or self.posinDoc('book.w') - for p in startpos: - (name, argres) = self.lineinDoc(p) - self.pw = max(self.pw, int(argres)) - - if self.ph <= 0: - self.ph = int(meta_array.get('pageHeight', '11000')) - if self.pw <= 0: - self.pw = int(meta_array.get('pageWidth', '8500')) - - res = [] - startpos = self.posinDoc('info.glyph.x') - for p in startpos: - argres = self.getDataatPos('info.glyph.x', p) - res.extend(argres) - self.gx = res - - res = [] - startpos = self.posinDoc('info.glyph.y') - for p in startpos: - argres = self.getDataatPos('info.glyph.y', p) - res.extend(argres) - self.gy = res - - res = [] - startpos = self.posinDoc('info.glyph.glyphID') - for p in startpos: - argres = self.getDataatPos('info.glyph.glyphID', p) - res.extend(argres) - self.gid = res +class DocParser(object): + def __init__(self, flatxml, classlst, fileid, bookDir, gdict, fixedimage): + self.id = os.path.basename(fileid).replace('.dat','') + self.svgcount = 0 + self.docList = flatxml.split('\n') + self.docSize = len(self.docList) + self.classList = {} + self.bookDir = bookDir + self.gdict = gdict + tmpList = classlst.split('\n') + for pclass in tmpList: + if pclass != '': + # remove the leading period from the css name + cname = pclass[1:] + self.classList[cname] = True + self.fixedimage = fixedimage + self.ocrtext = [] + self.link_id = [] + self.link_title = [] + self.link_page = [] + self.link_href = [] + self.link_type = [] + self.dehyphen_rootid = [] + self.paracont_stemid = [] + self.parastems_stemid = [] + + + def getGlyph(self, gid): + result = '' + id='id="gl%d"' % gid + return self.gdict.lookup(id) + + def glyphs_to_image(self, glyphList): + + def extract(path, key): + b = path.find(key) + len(key) + e = path.find(' ',b) + return int(path[b:e]) + + svgDir = os.path.join(self.bookDir,'svg') + + imgDir = os.path.join(self.bookDir,'img') + imgname = self.id + '_%04d.svg' % self.svgcount + imgfile = os.path.join(imgDir,imgname) + + # get glyph information + gxList = self.getData('info.glyph.x',0,-1) + gyList = self.getData('info.glyph.y',0,-1) + gidList = self.getData('info.glyph.glyphID',0,-1) + + gids = [] + maxws = [] + maxhs = [] + xs = [] + ys = [] + gdefs = [] + + # get path defintions, positions, dimensions for each glyph + # that makes up the image, and find min x and min y to reposition origin + minx = -1 + miny = -1 + for j in glyphList: + gid = gidList[j] + gids.append(gid) + + xs.append(gxList[j]) + if minx == -1: minx = gxList[j] + else : minx = min(minx, gxList[j]) + + ys.append(gyList[j]) + if miny == -1: miny = gyList[j] + else : miny = min(miny, gyList[j]) + + path = self.getGlyph(gid) + gdefs.append(path) + + maxws.append(extract(path,'width=')) + maxhs.append(extract(path,'height=')) + + + # change the origin to minx, miny and calc max height and width + maxw = maxws[0] + xs[0] - minx + maxh = maxhs[0] + ys[0] - miny + for j in xrange(0, len(xs)): + xs[j] = xs[j] - minx + ys[j] = ys[j] - miny + maxw = max( maxw, (maxws[j] + xs[j]) ) + maxh = max( maxh, (maxhs[j] + ys[j]) ) + + # open the image file for output + ifile = open(imgfile,'w') + ifile.write('\n') + ifile.write('\n') + ifile.write('\n' % (math.floor(maxw/10), math.floor(maxh/10), maxw, maxh)) + ifile.write('\n') + for j in xrange(0,len(gdefs)): + ifile.write(gdefs[j]) + ifile.write('\n') + for j in xrange(0,len(gids)): + ifile.write('\n' % (gids[j], xs[j], ys[j])) + ifile.write('') + ifile.close() + + return 0 + # return tag at line pos in document def lineinDoc(self, pos) : if (pos >= 0) and (pos < self.docSize) : - item = self.flatdoc[pos] + item = self.docList[pos] if item.find('=') >= 0: (name, argres) = item.split('=',1) else : @@ -65,6 +129,7 @@ class PParser(object): argres = '' return name, argres + # find tag in doc if within pos to end inclusive def findinDoc(self, tagpath, pos, end) : result = None @@ -74,7 +139,7 @@ class PParser(object): end = min(self.docSize, end) foundat = -1 for j in xrange(pos, end): - item = self.flatdoc[j] + item = self.docList[j] if item.find('=') >= 0: (name, argres) = item.split('=',1) else : @@ -86,6 +151,7 @@ class PParser(object): break return foundat, result + # return list of start positions for the tagpath def posinDoc(self, tagpath): startpos = [] @@ -98,152 +164,638 @@ class PParser(object): pos = foundpos + 1 return startpos - def getData(self, path): - result = None - cnt = len(self.flatdoc) - for j in xrange(cnt): - item = self.flatdoc[j] - if item.find('=') >= 0: - (name, argt) = item.split('=') - argres = argt.split('|') - else: - name = item - argres = [] - if (name.endswith(path)): - result = argres - break - if (len(argres) > 0) : - for j in xrange(0,len(argres)): - argres[j] = int(argres[j]) - return result - def getDataatPos(self, path, pos): - result = None - item = self.flatdoc[pos] - if item.find('=') >= 0: - (name, argt) = item.split('=') - argres = argt.split('|') - else: - name = item - argres = [] - if (len(argres) > 0) : - for j in xrange(0,len(argres)): - argres[j] = int(argres[j]) - if (name.endswith(path)): - result = argres - return result - - def getDataTemp(self, path): - result = None - cnt = len(self.temp) - for j in xrange(cnt): - item = self.temp[j] - if item.find('=') >= 0: - (name, argt) = item.split('=') - argres = argt.split('|') - else: - name = item - argres = [] - if (name.endswith(path)): - result = argres - self.temp.pop(j) - break - if (len(argres) > 0) : - for j in xrange(0,len(argres)): - argres[j] = int(argres[j]) - return result + # returns a vector of integers for the tagpath + def getData(self, tagpath, pos, end): + argres=[] + (foundat, argt) = self.findinDoc(tagpath, pos, end) + if (argt != None) and (len(argt) > 0) : + argList = argt.split('|') + argres = [ int(strval) for strval in argList] + return argres + + + # get the class + def getClass(self, pclass): + nclass = pclass + + # class names are an issue given topaz may start them with numerals (not allowed), + # use a mix of cases (which cause some browsers problems), and actually + # attach numbers after "_reclustered*" to the end to deal classeses that inherit + # from a base class (but then not actually provide all of these _reclustereed + # classes in the stylesheet! + + # so we clean this up by lowercasing, prepend 'cl-', and getting any baseclass + # that exists in the stylesheet first, and then adding this specific class + # after + + # also some class names have spaces in them so need to convert to dashes + if nclass != None : + nclass = nclass.replace(' ','-') + classres = '' + nclass = nclass.lower() + nclass = 'cl-' + nclass + baseclass = '' + # graphic is the base class for captions + if nclass.find('cl-cap-') >=0 : + classres = 'graphic' + ' ' + else : + # strip to find baseclass + p = nclass.find('_') + if p > 0 : + baseclass = nclass[0:p] + if baseclass in self.classList: + classres += baseclass + ' ' + classres += nclass + nclass = classres + return nclass + + + # develop a sorted description of the starting positions of + # groups and regions on the page, as well as the page type + def PageDescription(self): + + def compare(x, y): + (xtype, xval) = x + (ytype, yval) = y + if xval > yval: + return 1 + if xval == yval: + return 0 + return -1 - def getImages(self): result = [] - self.temp = self.flatdoc - while (self.getDataTemp('img') != None): - h = self.getDataTemp('img.h')[0] - w = self.getDataTemp('img.w')[0] - x = self.getDataTemp('img.x')[0] - y = self.getDataTemp('img.y')[0] - src = self.getDataTemp('img.src')[0] - result.append('\n' % (src, x, y, w, h)) - return result - - def getGlyphs(self): + (pos, pagetype) = self.findinDoc('page.type',0,-1) + + groupList = self.posinDoc('page.group') + groupregionList = self.posinDoc('page.group.region') + pageregionList = self.posinDoc('page.region') + # integrate into one list + for j in groupList: + result.append(('grpbeg',j)) + for j in groupregionList: + result.append(('gregion',j)) + for j in pageregionList: + result.append(('pregion',j)) + result.sort(compare) + + # insert group end and page end indicators + inGroup = False + j = 0 + while True: + if j == len(result): break + rtype = result[j][0] + rval = result[j][1] + if not inGroup and (rtype == 'grpbeg') : + inGroup = True + j = j + 1 + elif inGroup and (rtype in ('grpbeg', 'pregion')): + result.insert(j,('grpend',rval)) + inGroup = False + else: + j = j + 1 + if inGroup: + result.append(('grpend',-1)) + result.append(('pageend', -1)) + return pagetype, result + + + + # build a description of the paragraph + def getParaDescription(self, start, end, regtype): + result = [] - if (self.gid != None) and (len(self.gid) > 0): - glyphs = [] - for j in set(self.gid): - glyphs.append(j) - glyphs.sort() - for gid in glyphs: - id='id="gl%d"' % gid - path = self.gd.lookup(id) - if path: - result.append(id + ' ' + path) - return result - - -def convert2SVG(gdict, flat_xml, pageid, previd, nextid, svgDir, raw, meta_array, scaledpi): - mlst = [] - pp = PParser(gdict, flat_xml, meta_array) - mlst.append('\n') - if (raw): - mlst.append('\n') - mlst.append('\n' % (pp.pw / scaledpi, pp.ph / scaledpi, pp.pw -1, pp.ph -1)) - mlst.append('Page %d - %s by %s\n' % (pageid, meta_array['Title'],meta_array['Authors'])) - else: - mlst.append('\n') - mlst.append('\n') - mlst.append('Page %d - %s by %s\n' % (pageid, meta_array['Title'],meta_array['Authors'])) - mlst.append('\n') - mlst.append('\n') - mlst.append('\n') - mlst.append('
\n') - if previd == None: - mlst.append('\n') + + # paragraph + (pos, pclass) = self.findinDoc('paragraph.class',start,end) + + pclass = self.getClass(pclass) + + # if paragraph uses extratokens (extra glyphs) then make it fixed + (pos, extraglyphs) = self.findinDoc('paragraph.extratokens',start,end) + + # build up a description of the paragraph in result and return it + # first check for the basic - all words paragraph + (pos, sfirst) = self.findinDoc('paragraph.firstWord',start,end) + (pos, slast) = self.findinDoc('paragraph.lastWord',start,end) + if (sfirst != None) and (slast != None) : + first = int(sfirst) + last = int(slast) + + makeImage = (regtype == 'vertical') or (regtype == 'table') + makeImage = makeImage or (extraglyphs != None) + if self.fixedimage: + makeImage = makeImage or (regtype == 'fixed') + + if (pclass != None): + makeImage = makeImage or (pclass.find('.inverted') >= 0) + if self.fixedimage : + makeImage = makeImage or (pclass.find('cl-f-') >= 0) + + # before creating an image make sure glyph info exists + gidList = self.getData('info.glyph.glyphID',0,-1) + + makeImage = makeImage & (len(gidList) > 0) + + if not makeImage : + # standard all word paragraph + for wordnum in xrange(first, last): + result.append(('ocr', wordnum)) + return pclass, result + + # convert paragraph to svg image + # translate first and last word into first and last glyphs + # and generate inline image and include it + glyphList = [] + firstglyphList = self.getData('word.firstGlyph',0,-1) + gidList = self.getData('info.glyph.glyphID',0,-1) + firstGlyph = firstglyphList[first] + if last < len(firstglyphList): + lastGlyph = firstglyphList[last] + else : + lastGlyph = len(gidList) + + # handle case of white sapce paragraphs with no actual glyphs in them + # by reverting to text based paragraph + if firstGlyph >= lastGlyph: + # revert to standard text based paragraph + for wordnum in xrange(first, last): + result.append(('ocr', wordnum)) + return pclass, result + + for glyphnum in xrange(firstGlyph, lastGlyph): + glyphList.append(glyphnum) + # include any extratokens if they exist + (pos, sfg) = self.findinDoc('extratokens.firstGlyph',start,end) + (pos, slg) = self.findinDoc('extratokens.lastGlyph',start,end) + if (sfg != None) and (slg != None): + for glyphnum in xrange(int(sfg), int(slg)): + glyphList.append(glyphnum) + num = self.svgcount + self.glyphs_to_image(glyphList) + self.svgcount += 1 + result.append(('svg', num)) + return pclass, result + + # this type of paragraph may be made up of multiple spans, inline + # word monograms (images), and words with semantic meaning, + # plus glyphs used to form starting letter of first word + + # need to parse this type line by line + line = start + 1 + word_class = '' + + # if end is -1 then we must search to end of document + if end == -1 : + end = self.docSize + + # seems some xml has last* coming before first* so we have to + # handle any order + sp_first = -1 + sp_last = -1 + + gl_first = -1 + gl_last = -1 + + ws_first = -1 + ws_last = -1 + + word_class = '' + + word_semantic_type = '' + + while (line < end) : + + (name, argres) = self.lineinDoc(line) + + if name.endswith('span.firstWord') : + sp_first = int(argres) + + elif name.endswith('span.lastWord') : + sp_last = int(argres) + + elif name.endswith('word.firstGlyph') : + gl_first = int(argres) + + elif name.endswith('word.lastGlyph') : + gl_last = int(argres) + + elif name.endswith('word_semantic.firstWord'): + ws_first = int(argres) + + elif name.endswith('word_semantic.lastWord'): + ws_last = int(argres) + + elif name.endswith('word.class'): + # we only handle spaceafter word class + try: + (cname, space) = argres.split('-',1) + if space == '' : space = '0' + if (cname == 'spaceafter') and (int(space) > 0) : + word_class = 'sa' + except: + pass + + elif name.endswith('word.img.src'): + result.append(('img' + word_class, int(argres))) + word_class = '' + + elif name.endswith('region.img.src'): + result.append(('img' + word_class, int(argres))) + + if (sp_first != -1) and (sp_last != -1): + for wordnum in xrange(sp_first, sp_last): + result.append(('ocr', wordnum)) + sp_first = -1 + sp_last = -1 + + if (gl_first != -1) and (gl_last != -1): + glyphList = [] + for glyphnum in xrange(gl_first, gl_last): + glyphList.append(glyphnum) + num = self.svgcount + self.glyphs_to_image(glyphList) + self.svgcount += 1 + result.append(('svg', num)) + gl_first = -1 + gl_last = -1 + + if (ws_first != -1) and (ws_last != -1): + for wordnum in xrange(ws_first, ws_last): + result.append(('ocr', wordnum)) + ws_first = -1 + ws_last = -1 + + line += 1 + + return pclass, result + + + def buildParagraph(self, pclass, pdesc, type, regtype) : + parares = '' + sep ='' + + classres = '' + if pclass : + classres = ' class="' + pclass + '"' + + br_lb = (regtype == 'fixed') or (regtype == 'chapterheading') or (regtype == 'vertical') + + handle_links = len(self.link_id) > 0 + + if (type == 'full') or (type == 'begin') : + parares += '' + + if (type == 'end'): + parares += ' ' + + lstart = len(parares) + + cnt = len(pdesc) + + for j in xrange( 0, cnt) : + + (wtype, num) = pdesc[j] + + if wtype == 'ocr' : + try: + word = self.ocrtext[num] + except: + word = "" + + sep = ' ' + + if handle_links: + link = self.link_id[num] + if (link > 0): + linktype = self.link_type[link-1] + title = self.link_title[link-1] + if (title == "") or (parares.rfind(title) < 0): + title=parares[lstart:] + if linktype == 'external' : + linkhref = self.link_href[link-1] + linkhtml = '' % linkhref + else : + if len(self.link_page) >= link : + ptarget = self.link_page[link-1] - 1 + linkhtml = '' % ptarget + else : + # just link to the current page + linkhtml = '' + linkhtml += title + '' + pos = parares.rfind(title) + if pos >= 0: + parares = parares[0:pos] + linkhtml + parares[pos+len(title):] + else : + parares += linkhtml + lstart = len(parares) + if word == '_link_' : word = '' + elif (link < 0) : + if word == '_link_' : word = '' + + if word == '_lb_': + if ((num-1) in self.dehyphen_rootid ) or handle_links: + word = '' + sep = '' + elif br_lb : + word = '
\n' + sep = '' + else : + word = '\n' + sep = '' + + if num in self.dehyphen_rootid : + word = word[0:-1] + sep = '' + + parares += word + sep + + elif wtype == 'img' : + sep = '' + parares += '' % num + parares += sep + + elif wtype == 'imgsa' : + sep = ' ' + parares += '' % num + parares += sep + + elif wtype == 'svg' : + sep = '' + parares += '' % num + parares += sep + + if len(sep) > 0 : parares = parares[0:-1] + if (type == 'full') or (type == 'end') : + parares += '

' + return parares + + + def buildTOCEntry(self, pdesc) : + parares = '' + sep ='' + tocentry = '' + handle_links = len(self.link_id) > 0 + + lstart = 0 + + cnt = len(pdesc) + for j in xrange( 0, cnt) : + + (wtype, num) = pdesc[j] + + if wtype == 'ocr' : + word = self.ocrtext[num] + sep = ' ' + + if handle_links: + link = self.link_id[num] + if (link > 0): + linktype = self.link_type[link-1] + title = self.link_title[link-1] + title = title.rstrip('. ') + alt_title = parares[lstart:] + alt_title = alt_title.strip() + # now strip off the actual printed page number + alt_title = alt_title.rstrip('01234567890ivxldIVXLD-.') + alt_title = alt_title.rstrip('. ') + # skip over any external links - can't have them in a books toc + if linktype == 'external' : + title = '' + alt_title = '' + linkpage = '' + else : + if len(self.link_page) >= link : + ptarget = self.link_page[link-1] - 1 + linkpage = '%04d' % ptarget + else : + # just link to the current page + linkpage = self.id[4:] + if len(alt_title) >= len(title): + title = alt_title + if title != '' and linkpage != '': + tocentry += title + '|' + linkpage + '\n' + lstart = len(parares) + if word == '_link_' : word = '' + elif (link < 0) : + if word == '_link_' : word = '' + + if word == '_lb_': + word = '' + sep = '' + + if num in self.dehyphen_rootid : + word = word[0:-1] + sep = '' + + parares += word + sep + + else : + continue + + return tocentry + + + + + # walk the document tree collecting the information needed + # to build an html page using the ocrText + + def process(self): + + tocinfo = '' + hlst = [] + + # get the ocr text + (pos, argres) = self.findinDoc('info.word.ocrText',0,-1) + if argres : self.ocrtext = argres.split('|') + + # get information to dehyphenate the text + self.dehyphen_rootid = self.getData('info.dehyphen.rootID',0,-1) + + # determine if first paragraph is continued from previous page + (pos, self.parastems_stemid) = self.findinDoc('info.paraStems.stemID',0,-1) + first_para_continued = (self.parastems_stemid != None) + + # determine if last paragraph is continued onto the next page + (pos, self.paracont_stemid) = self.findinDoc('info.paraCont.stemID',0,-1) + last_para_continued = (self.paracont_stemid != None) + + # collect link ids + self.link_id = self.getData('info.word.link_id',0,-1) + + # collect link destination page numbers + self.link_page = self.getData('info.links.page',0,-1) + + # collect link types (container versus external) + (pos, argres) = self.findinDoc('info.links.type',0,-1) + if argres : self.link_type = argres.split('|') + + # collect link destinations + (pos, argres) = self.findinDoc('info.links.href',0,-1) + if argres : self.link_href = argres.split('|') + + # collect link titles + (pos, argres) = self.findinDoc('info.links.title',0,-1) + if argres : + self.link_title = argres.split('|') else: - mlst.append('\n') + self.link_title.append('') - mlst.append('' % (pp.pw, pp.ph)) - if (pp.gid != None): - mlst.append('\n') - gdefs = pp.getGlyphs() - for j in xrange(0,len(gdefs)): - mlst.append(gdefs[j]) - mlst.append('\n') - img = pp.getImages() - if (img != None): - for j in xrange(0,len(img)): - mlst.append(img[j]) - if (pp.gid != None): - for j in xrange(0,len(pp.gid)): - mlst.append('\n' % (pp.gid[j], pp.gx[j], pp.gy[j])) - if (img == None or len(img) == 0) and (pp.gid == None or len(pp.gid) == 0): - xpos = "%d" % (pp.pw // 3) - ypos = "%d" % (pp.ph // 3) - mlst.append('This page intentionally left blank.\n') - if (raw) : - mlst.append('') - else : - mlst.append('\n') - if nextid == None: - mlst.append('\n') - else : - mlst.append('\n') - mlst.append('
\n') - mlst.append('\n') - mlst.append('\n') - mlst.append('\n') - return "".join(mlst) + # get a descriptions of the starting points of the regions + # and groups on the page + (pagetype, pageDesc) = self.PageDescription() + regcnt = len(pageDesc) - 1 + + anchorSet = False + breakSet = False + inGroup = False + + # process each region on the page and convert what you can to html + + for j in xrange(regcnt): + + (etype, start) = pageDesc[j] + (ntype, end) = pageDesc[j+1] + + + # set anchor for link target on this page + if not anchorSet and not first_para_continued: + hlst.append('\n') + anchorSet = True + + # handle groups of graphics with text captions + if (etype == 'grpbeg'): + (pos, grptype) = self.findinDoc('group.type', start, end) + if grptype != None: + if grptype == 'graphic': + gcstr = ' class="' + grptype + '"' + hlst.append('') + inGroup = True + + elif (etype == 'grpend'): + if inGroup: + hlst.append('\n') + inGroup = False + + else: + (pos, regtype) = self.findinDoc('region.type',start,end) + + if regtype == 'graphic' : + (pos, simgsrc) = self.findinDoc('img.src',start,end) + if simgsrc: + if inGroup: + hlst.append('' % int(simgsrc)) + else: + hlst.append('
' % int(simgsrc)) + + elif regtype == 'chapterheading' : + (pclass, pdesc) = self.getParaDescription(start,end, regtype) + if not breakSet: + hlst.append('
 
\n') + breakSet = True + tag = 'h1' + if pclass and (len(pclass) >= 7): + if pclass[3:7] == 'ch1-' : tag = 'h1' + if pclass[3:7] == 'ch2-' : tag = 'h2' + if pclass[3:7] == 'ch3-' : tag = 'h3' + hlst.append('<' + tag + ' class="' + pclass + '">') + else: + hlst.append('<' + tag + '>') + hlst.append(self.buildParagraph(pclass, pdesc, 'middle', regtype)) + hlst.append('') + + elif (regtype == 'text') or (regtype == 'fixed') or (regtype == 'insert') or (regtype == 'listitem'): + ptype = 'full' + # check to see if this is a continution from the previous page + if first_para_continued : + ptype = 'end' + first_para_continued = False + (pclass, pdesc) = self.getParaDescription(start,end, regtype) + if pclass and (len(pclass) >= 6) and (ptype == 'full'): + tag = 'p' + if pclass[3:6] == 'h1-' : tag = 'h4' + if pclass[3:6] == 'h2-' : tag = 'h5' + if pclass[3:6] == 'h3-' : tag = 'h6' + hlst.append('<' + tag + ' class="' + pclass + '">') + hlst.append(self.buildParagraph(pclass, pdesc, 'middle', regtype)) + hlst.append('') + else : + hlst.append(self.buildParagraph(pclass, pdesc, ptype, regtype)) + + elif (regtype == 'tocentry') : + ptype = 'full' + if first_para_continued : + ptype = 'end' + first_para_continued = False + (pclass, pdesc) = self.getParaDescription(start,end, regtype) + tocinfo += self.buildTOCEntry(pdesc) + hlst.append(self.buildParagraph(pclass, pdesc, ptype, regtype)) + + elif (regtype == 'vertical') or (regtype == 'table') : + ptype = 'full' + if inGroup: + ptype = 'middle' + if first_para_continued : + ptype = 'end' + first_para_continued = False + (pclass, pdesc) = self.getParaDescription(start, end, regtype) + hlst.append(self.buildParagraph(pclass, pdesc, ptype, regtype)) + + + elif (regtype == 'synth_fcvr.center'): + (pos, simgsrc) = self.findinDoc('img.src',start,end) + if simgsrc: + hlst.append('
' % int(simgsrc)) + + else : + print ' Making region type', regtype, + (pos, temp) = self.findinDoc('paragraph',start,end) + (pos2, temp) = self.findinDoc('span',start,end) + if pos != -1 or pos2 != -1: + print ' a "text" region' + orig_regtype = regtype + regtype = 'fixed' + ptype = 'full' + # check to see if this is a continution from the previous page + if first_para_continued : + ptype = 'end' + first_para_continued = False + (pclass, pdesc) = self.getParaDescription(start,end, regtype) + if not pclass: + if orig_regtype.endswith('.right') : pclass = 'cl-right' + elif orig_regtype.endswith('.center') : pclass = 'cl-center' + elif orig_regtype.endswith('.left') : pclass = 'cl-left' + elif orig_regtype.endswith('.justify') : pclass = 'cl-justify' + if pclass and (ptype == 'full') and (len(pclass) >= 6): + tag = 'p' + if pclass[3:6] == 'h1-' : tag = 'h4' + if pclass[3:6] == 'h2-' : tag = 'h5' + if pclass[3:6] == 'h3-' : tag = 'h6' + hlst.append('<' + tag + ' class="' + pclass + '">') + hlst.append(self.buildParagraph(pclass, pdesc, 'middle', regtype)) + hlst.append('') + else : + hlst.append(self.buildParagraph(pclass, pdesc, ptype, regtype)) + else : + print ' a "graphic" region' + (pos, simgsrc) = self.findinDoc('img.src',start,end) + if simgsrc: + hlst.append('
' % int(simgsrc)) + + + htmlpage = "".join(hlst) + if last_para_continued : + if htmlpage[-4:] == '

': + htmlpage = htmlpage[0:-4] + last_para_continued = False + + return htmlpage, tocinfo + + +def convert2HTML(flatxml, classlst, fileid, bookDir, gdict, fixedimage): + # create a document parser + dp = DocParser(flatxml, classlst, fileid, bookDir, gdict, fixedimage) + htmlpage, tocinfo = dp.process() + return htmlpage, tocinfo diff --git a/DeDRM_calibre_plugin/DeDRM_plugin/flatxml2svg.py b/DeDRM_calibre_plugin/DeDRM_plugin/flatxml2svg.py index 3ed925d..4dfd6c7 100644 --- a/DeDRM_calibre_plugin/DeDRM_plugin/flatxml2svg.py +++ b/DeDRM_calibre_plugin/DeDRM_plugin/flatxml2svg.py @@ -1,148 +1,82 @@ #! /usr/bin/python # vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab -class Unbuffered: - def __init__(self, stream): - self.stream = stream - def write(self, data): - self.stream.write(data) - self.stream.flush() - def __getattr__(self, attr): - return getattr(self.stream, attr) - import sys -sys.stdout=Unbuffered(sys.stdout) - import csv import os import getopt from struct import pack from struct import unpack -class TpzDRMError(Exception): - pass - -# local support routines -if 'calibre' in sys.modules: - inCalibre = True -else: - inCalibre = False - -if inCalibre : - from calibre_plugins.dedrm import convert2xml - from calibre_plugins.dedrm import flatxml2html - from calibre_plugins.dedrm import flatxml2svg - from calibre_plugins.dedrm import stylexml2css -else : - import convert2xml - import flatxml2html - import flatxml2svg - import stylexml2css - -# global switch -buildXML = False - -# Get a 7 bit encoded number from a file -def readEncodedNumber(file): - flag = False - c = file.read(1) - if (len(c) == 0): - return None - data = ord(c) - if data == 0xFF: - flag = True - c = file.read(1) - if (len(c) == 0): - return None - data = ord(c) - if data >= 0x80: - datax = (data & 0x7F) - while data >= 0x80 : - c = file.read(1) - if (len(c) == 0): - return None - data = ord(c) - datax = (datax <<7) + (data & 0x7F) - data = datax - if flag: - data = -data - return data - -# Get a length prefixed string from the file -def lengthPrefixString(data): - return encodeNumber(len(data))+data - -def readString(file): - stringLength = readEncodedNumber(file) - if (stringLength == None): - return None - sv = file.read(stringLength) - if (len(sv) != stringLength): - return "" - return unpack(str(stringLength)+"s",sv)[0] - -def getMetaArray(metaFile): - # parse the meta file - result = {} - fo = file(metaFile,'rb') - size = readEncodedNumber(fo) - for i in xrange(size): - tag = readString(fo) - value = readString(fo) - result[tag] = value - # print tag, value - fo.close() - return result - - -# dictionary of all text strings by index value -class Dictionary(object): - def __init__(self, dictFile): - self.filename = dictFile - self.size = 0 - self.fo = file(dictFile,'rb') - self.stable = [] - self.size = readEncodedNumber(self.fo) - for i in xrange(self.size): - self.stable.append(self.escapestr(readString(self.fo))) - self.pos = 0 - def escapestr(self, str): - str = str.replace('&','&') - str = str.replace('<','<') - str = str.replace('>','>') - str = str.replace('=','=') - return str - def lookup(self,val): - if ((val >= 0) and (val < self.size)) : - self.pos = val - return self.stable[self.pos] - else: - print "Error: %d outside of string table limits" % val - raise TpzDRMError('outside or string table limits') - # sys.exit(-1) - def getSize(self): - return self.size - def getPos(self): - return self.pos - -class PageDimParser(object): - def __init__(self, flatxml): +class PParser(object): + def __init__(self, gd, flatxml, meta_array): + self.gd = gd self.flatdoc = flatxml.split('\n') - # find tag if within pos to end inclusive + self.docSize = len(self.flatdoc) + self.temp = [] + + self.ph = -1 + self.pw = -1 + startpos = self.posinDoc('page.h') or self.posinDoc('book.h') + for p in startpos: + (name, argres) = self.lineinDoc(p) + self.ph = max(self.ph, int(argres)) + startpos = self.posinDoc('page.w') or self.posinDoc('book.w') + for p in startpos: + (name, argres) = self.lineinDoc(p) + self.pw = max(self.pw, int(argres)) + + if self.ph <= 0: + self.ph = int(meta_array.get('pageHeight', '11000')) + if self.pw <= 0: + self.pw = int(meta_array.get('pageWidth', '8500')) + + res = [] + startpos = self.posinDoc('info.glyph.x') + for p in startpos: + argres = self.getDataatPos('info.glyph.x', p) + res.extend(argres) + self.gx = res + + res = [] + startpos = self.posinDoc('info.glyph.y') + for p in startpos: + argres = self.getDataatPos('info.glyph.y', p) + res.extend(argres) + self.gy = res + + res = [] + startpos = self.posinDoc('info.glyph.glyphID') + for p in startpos: + argres = self.getDataatPos('info.glyph.glyphID', p) + res.extend(argres) + self.gid = res + + + # return tag at line pos in document + def lineinDoc(self, pos) : + if (pos >= 0) and (pos < self.docSize) : + item = self.flatdoc[pos] + if item.find('=') >= 0: + (name, argres) = item.split('=',1) + else : + name = item + argres = '' + return name, argres + + # find tag in doc if within pos to end inclusive def findinDoc(self, tagpath, pos, end) : result = None - docList = self.flatdoc - cnt = len(docList) if end == -1 : - end = cnt + end = self.docSize else: - end = min(cnt,end) + end = min(self.docSize, end) foundat = -1 for j in xrange(pos, end): - item = docList[j] + item = self.flatdoc[j] if item.find('=') >= 0: - (name, argres) = item.split('=') + (name, argres) = item.split('=',1) else : name = item argres = '' @@ -151,44 +85,19 @@ class PageDimParser(object): foundat = j break return foundat, result - def process(self): - (pos, sph) = self.findinDoc('page.h',0,-1) - (pos, spw) = self.findinDoc('page.w',0,-1) - if (sph == None): sph = '-1' - if (spw == None): spw = '-1' - return sph, spw -def getPageDim(flatxml): - # create a document parser - dp = PageDimParser(flatxml) - (ph, pw) = dp.process() - return ph, pw + # return list of start positions for the tagpath + def posinDoc(self, tagpath): + startpos = [] + pos = 0 + res = "" + while res != None : + (foundpos, res) = self.findinDoc(tagpath, pos, -1) + if res != None : + startpos.append(foundpos) + pos = foundpos + 1 + return startpos -class GParser(object): - def __init__(self, flatxml): - self.flatdoc = flatxml.split('\n') - self.dpi = 1440 - self.gh = self.getData('info.glyph.h') - self.gw = self.getData('info.glyph.w') - self.guse = self.getData('info.glyph.use') - if self.guse : - self.count = len(self.guse) - else : - self.count = 0 - self.gvtx = self.getData('info.glyph.vtx') - self.glen = self.getData('info.glyph.len') - self.gdpi = self.getData('info.glyph.dpi') - self.vx = self.getData('info.vtx.x') - self.vy = self.getData('info.vtx.y') - self.vlen = self.getData('info.len.n') - if self.vlen : - self.glen.append(len(self.vlen)) - elif self.glen: - self.glen.append(0) - if self.vx : - self.gvtx.append(len(self.vx)) - elif self.gvtx : - self.gvtx.append(0) def getData(self, path): result = None cnt = len(self.flatdoc) @@ -200,522 +109,141 @@ class GParser(object): else: name = item argres = [] - if (name == path): + if (name.endswith(path)): result = argres break if (len(argres) > 0) : for j in xrange(0,len(argres)): argres[j] = int(argres[j]) return result - def getGlyphDim(self, gly): - if self.gdpi[gly] == 0: - return 0, 0 - maxh = (self.gh[gly] * self.dpi) / self.gdpi[gly] - maxw = (self.gw[gly] * self.dpi) / self.gdpi[gly] - return maxh, maxw - def getPath(self, gly): - path = '' - if (gly < 0) or (gly >= self.count): - return path - tx = self.vx[self.gvtx[gly]:self.gvtx[gly+1]] - ty = self.vy[self.gvtx[gly]:self.gvtx[gly+1]] - p = 0 - for k in xrange(self.glen[gly], self.glen[gly+1]): - if (p == 0): - zx = tx[0:self.vlen[k]+1] - zy = ty[0:self.vlen[k]+1] - else: - zx = tx[self.vlen[k-1]+1:self.vlen[k]+1] - zy = ty[self.vlen[k-1]+1:self.vlen[k]+1] - p += 1 - j = 0 - while ( j < len(zx) ): - if (j == 0): - # Start Position. - path += 'M %d %d ' % (zx[j] * self.dpi / self.gdpi[gly], zy[j] * self.dpi / self.gdpi[gly]) - elif (j <= len(zx)-3): - # Cubic Bezier Curve - path += 'C %d %d %d %d %d %d ' % (zx[j] * self.dpi / self.gdpi[gly], zy[j] * self.dpi / self.gdpi[gly], zx[j+1] * self.dpi / self.gdpi[gly], zy[j+1] * self.dpi / self.gdpi[gly], zx[j+2] * self.dpi / self.gdpi[gly], zy[j+2] * self.dpi / self.gdpi[gly]) - j += 2 - elif (j == len(zx)-2): - # Cubic Bezier Curve to Start Position - path += 'C %d %d %d %d %d %d ' % (zx[j] * self.dpi / self.gdpi[gly], zy[j] * self.dpi / self.gdpi[gly], zx[j+1] * self.dpi / self.gdpi[gly], zy[j+1] * self.dpi / self.gdpi[gly], zx[0] * self.dpi / self.gdpi[gly], zy[0] * self.dpi / self.gdpi[gly]) - j += 1 - elif (j == len(zx)-1): - # Quadratic Bezier Curve to Start Position - path += 'Q %d %d %d %d ' % (zx[j] * self.dpi / self.gdpi[gly], zy[j] * self.dpi / self.gdpi[gly], zx[0] * self.dpi / self.gdpi[gly], zy[0] * self.dpi / self.gdpi[gly]) - - j += 1 - path += 'z' - return path - - - -# dictionary of all text strings by index value -class GlyphDict(object): - def __init__(self): - self.gdict = {} - def lookup(self, id): - # id='id="gl%d"' % val - if id in self.gdict: - return self.gdict[id] - return None - def addGlyph(self, val, path): - id='id="gl%d"' % val - self.gdict[id] = path - -def generateBook(bookDir, raw, fixedimage): - # sanity check Topaz file extraction - if not os.path.exists(bookDir) : - print "Can not find directory with unencrypted book" - return 1 - - dictFile = os.path.join(bookDir,'dict0000.dat') - if not os.path.exists(dictFile) : - print "Can not find dict0000.dat file" - return 1 - - pageDir = os.path.join(bookDir,'page') - if not os.path.exists(pageDir) : - print "Can not find page directory in unencrypted book" - return 1 - - imgDir = os.path.join(bookDir,'img') - if not os.path.exists(imgDir) : - print "Can not find image directory in unencrypted book" - return 1 - - glyphsDir = os.path.join(bookDir,'glyphs') - if not os.path.exists(glyphsDir) : - print "Can not find glyphs directory in unencrypted book" - return 1 - - metaFile = os.path.join(bookDir,'metadata0000.dat') - if not os.path.exists(metaFile) : - print "Can not find metadata0000.dat in unencrypted book" - return 1 - - svgDir = os.path.join(bookDir,'svg') - if not os.path.exists(svgDir) : - os.makedirs(svgDir) - - if buildXML: - xmlDir = os.path.join(bookDir,'xml') - if not os.path.exists(xmlDir) : - os.makedirs(xmlDir) - - otherFile = os.path.join(bookDir,'other0000.dat') - if not os.path.exists(otherFile) : - print "Can not find other0000.dat in unencrypted book" - return 1 - - print "Updating to color images if available" - spath = os.path.join(bookDir,'color_img') - dpath = os.path.join(bookDir,'img') - filenames = os.listdir(spath) - filenames = sorted(filenames) - for filename in filenames: - imgname = filename.replace('color','img') - sfile = os.path.join(spath,filename) - dfile = os.path.join(dpath,imgname) - imgdata = file(sfile,'rb').read() - file(dfile,'wb').write(imgdata) - - print "Creating cover.jpg" - isCover = False - cpath = os.path.join(bookDir,'img') - cpath = os.path.join(cpath,'img0000.jpg') - if os.path.isfile(cpath): - cover = file(cpath, 'rb').read() - cpath = os.path.join(bookDir,'cover.jpg') - file(cpath, 'wb').write(cover) - isCover = True - - - print 'Processing Dictionary' - dict = Dictionary(dictFile) - - print 'Processing Meta Data and creating OPF' - meta_array = getMetaArray(metaFile) - - # replace special chars in title and authors like & < > - title = meta_array.get('Title','No Title Provided') - title = title.replace('&','&') - title = title.replace('<','<') - title = title.replace('>','>') - meta_array['Title'] = title - authors = meta_array.get('Authors','No Authors Provided') - authors = authors.replace('&','&') - authors = authors.replace('<','<') - authors = authors.replace('>','>') - meta_array['Authors'] = authors - - if buildXML: - xname = os.path.join(xmlDir, 'metadata.xml') - mlst = [] - for key in meta_array: - mlst.append('\n') - metastr = "".join(mlst) - mlst = None - file(xname, 'wb').write(metastr) - - print 'Processing StyleSheet' - - # get some scaling info from metadata to use while processing styles - # and first page info - - fontsize = '135' - if 'fontSize' in meta_array: - fontsize = meta_array['fontSize'] - - # also get the size of a normal text page - # get the total number of pages unpacked as a safety check - filenames = os.listdir(pageDir) - numfiles = len(filenames) - - spage = '1' - if 'firstTextPage' in meta_array: - spage = meta_array['firstTextPage'] - pnum = int(spage) - if pnum >= numfiles or pnum < 0: - # metadata is wrong so just select a page near the front - # 10% of the book to get a normal text page - pnum = int(0.10 * numfiles) - # print "first normal text page is", spage - - # get page height and width from first text page for use in stylesheet scaling - pname = 'page%04d.dat' % (pnum + 1) - fname = os.path.join(pageDir,pname) - flat_xml = convert2xml.fromData(dict, fname) - - (ph, pw) = getPageDim(flat_xml) - if (ph == '-1') or (ph == '0') : ph = '11000' - if (pw == '-1') or (pw == '0') : pw = '8500' - meta_array['pageHeight'] = ph - meta_array['pageWidth'] = pw - if 'fontSize' not in meta_array.keys(): - meta_array['fontSize'] = fontsize - - # process other.dat for css info and for map of page files to svg images - # this map is needed because some pages actually are made up of multiple - # pageXXXX.xml files - xname = os.path.join(bookDir, 'style.css') - flat_xml = convert2xml.fromData(dict, otherFile) - - # extract info.original.pid to get original page information - pageIDMap = {} - pageidnums = stylexml2css.getpageIDMap(flat_xml) - if len(pageidnums) == 0: - filenames = os.listdir(pageDir) - numfiles = len(filenames) - for k in range(numfiles): - pageidnums.append(k) - # create a map from page ids to list of page file nums to process for that page - for i in range(len(pageidnums)): - id = pageidnums[i] - if id in pageIDMap.keys(): - pageIDMap[id].append(i) + def getDataatPos(self, path, pos): + result = None + item = self.flatdoc[pos] + if item.find('=') >= 0: + (name, argt) = item.split('=') + argres = argt.split('|') else: - pageIDMap[id] = [i] - - # now get the css info - cssstr , classlst = stylexml2css.convert2CSS(flat_xml, fontsize, ph, pw) - file(xname, 'wb').write(cssstr) - if buildXML: - xname = os.path.join(xmlDir, 'other0000.xml') - file(xname, 'wb').write(convert2xml.getXML(dict, otherFile)) - - print 'Processing Glyphs' - gd = GlyphDict() - filenames = os.listdir(glyphsDir) - filenames = sorted(filenames) - glyfname = os.path.join(svgDir,'glyphs.svg') - glyfile = open(glyfname, 'w') - glyfile.write('\n') - glyfile.write('\n') - glyfile.write('\n') - glyfile.write('Glyphs for %s\n' % meta_array['Title']) - glyfile.write('\n') - counter = 0 - for filename in filenames: - # print ' ', filename - print '.', - fname = os.path.join(glyphsDir,filename) - flat_xml = convert2xml.fromData(dict, fname) - - if buildXML: - xname = os.path.join(xmlDir, filename.replace('.dat','.xml')) - file(xname, 'wb').write(convert2xml.getXML(dict, fname)) - - gp = GParser(flat_xml) - for i in xrange(0, gp.count): - path = gp.getPath(i) - maxh, maxw = gp.getGlyphDim(i) - fullpath = '\n' % (counter * 256 + i, path, maxw, maxh) - glyfile.write(fullpath) - gd.addGlyph(counter * 256 + i, fullpath) - counter += 1 - glyfile.write('\n') - glyfile.write('\n') - glyfile.close() - print " " - - - # start up the html - # also build up tocentries while processing html - htmlFileName = "book.html" - hlst = [] - hlst.append('\n') - hlst.append('\n') - hlst.append('\n') - hlst.append('\n') - hlst.append('\n') - hlst.append('' + meta_array['Title'] + ' by ' + meta_array['Authors'] + '\n') - hlst.append('\n') - hlst.append('\n') - if 'ASIN' in meta_array: - hlst.append('\n') - if 'GUID' in meta_array: - hlst.append('\n') - hlst.append('\n') - hlst.append('\n\n') - - print 'Processing Pages' - # Books are at 1440 DPI. This is rendering at twice that size for - # readability when rendering to the screen. - scaledpi = 1440.0 - - filenames = os.listdir(pageDir) - filenames = sorted(filenames) - numfiles = len(filenames) - - xmllst = [] - elst = [] - - for filename in filenames: - # print ' ', filename - print ".", - fname = os.path.join(pageDir,filename) - flat_xml = convert2xml.fromData(dict, fname) - - # keep flat_xml for later svg processing - xmllst.append(flat_xml) - - if buildXML: - xname = os.path.join(xmlDir, filename.replace('.dat','.xml')) - file(xname, 'wb').write(convert2xml.getXML(dict, fname)) + name = item + argres = [] + if (len(argres) > 0) : + for j in xrange(0,len(argres)): + argres[j] = int(argres[j]) + if (name.endswith(path)): + result = argres + return result - # first get the html - pagehtml, tocinfo = flatxml2html.convert2HTML(flat_xml, classlst, fname, bookDir, gd, fixedimage) - elst.append(tocinfo) - hlst.append(pagehtml) + def getDataTemp(self, path): + result = None + cnt = len(self.temp) + for j in xrange(cnt): + item = self.temp[j] + if item.find('=') >= 0: + (name, argt) = item.split('=') + argres = argt.split('|') + else: + name = item + argres = [] + if (name.endswith(path)): + result = argres + self.temp.pop(j) + break + if (len(argres) > 0) : + for j in xrange(0,len(argres)): + argres[j] = int(argres[j]) + return result - # finish up the html string and output it - hlst.append('\n\n') - htmlstr = "".join(hlst) - hlst = None - file(os.path.join(bookDir, htmlFileName), 'wb').write(htmlstr) + def getImages(self): + result = [] + self.temp = self.flatdoc + while (self.getDataTemp('img') != None): + h = self.getDataTemp('img.h')[0] + w = self.getDataTemp('img.w')[0] + x = self.getDataTemp('img.x')[0] + y = self.getDataTemp('img.y')[0] + src = self.getDataTemp('img.src')[0] + result.append('\n' % (src, x, y, w, h)) + return result - print " " - print 'Extracting Table of Contents from Amazon OCR' + def getGlyphs(self): + result = [] + if (self.gid != None) and (len(self.gid) > 0): + glyphs = [] + for j in set(self.gid): + glyphs.append(j) + glyphs.sort() + for gid in glyphs: + id='id="gl%d"' % gid + path = self.gd.lookup(id) + if path: + result.append(id + ' ' + path) + return result - # first create a table of contents file for the svg images - tlst = [] - tlst.append('\n') - tlst.append('\n') - tlst.append('') - tlst.append('\n') - tlst.append('' + meta_array['Title'] + '\n') - tlst.append('\n') - tlst.append('\n') - if 'ASIN' in meta_array: - tlst.append('\n') - if 'GUID' in meta_array: - tlst.append('\n') - tlst.append('\n') - tlst.append('\n') - tlst.append('

Table of Contents

\n') - start = pageidnums[0] +def convert2SVG(gdict, flat_xml, pageid, previd, nextid, svgDir, raw, meta_array, scaledpi): + mlst = [] + pp = PParser(gdict, flat_xml, meta_array) + mlst.append('\n') if (raw): - startname = 'page%04d.svg' % start + mlst.append('\n') + mlst.append('\n' % (pp.pw / scaledpi, pp.ph / scaledpi, pp.pw -1, pp.ph -1)) + mlst.append('Page %d - %s by %s\n' % (pageid, meta_array['Title'],meta_array['Authors'])) else: - startname = 'page%04d.xhtml' % start - - tlst.append('

Start of Book

\n') - # build up a table of contents for the svg xhtml output - tocentries = "".join(elst) - elst = None - toclst = tocentries.split('\n') - toclst.pop() - for entry in toclst: - print entry - title, pagenum = entry.split('|') - id = pageidnums[int(pagenum)] - if (raw): - fname = 'page%04d.svg' % id + mlst.append('\n') + mlst.append('\n') + mlst.append('Page %d - %s by %s\n' % (pageid, meta_array['Title'],meta_array['Authors'])) + mlst.append('\n') + mlst.append('\n') + mlst.append('\n') + mlst.append('
\n') + if previd == None: + mlst.append('\n') else: - fname = 'page%04d.xhtml' % id - tlst.append('

' + title + '

\n') - tlst.append('\n') - tlst.append('\n') - tochtml = "".join(tlst) - file(os.path.join(svgDir, 'toc.xhtml'), 'wb').write(tochtml) - - - # now create index_svg.xhtml that points to all required files - slst = [] - slst.append('\n') - slst.append('\n') - slst.append('') - slst.append('\n') - slst.append('' + meta_array['Title'] + '\n') - slst.append('\n') - slst.append('\n') - if 'ASIN' in meta_array: - slst.append('\n') - if 'GUID' in meta_array: - slst.append('\n') - slst.append('\n') - slst.append('\n') - - print "Building svg images of each book page" - slst.append('

List of Pages

\n') - slst.append('
\n') - idlst = sorted(pageIDMap.keys()) - numids = len(idlst) - cnt = len(idlst) - previd = None - for j in range(cnt): - pageid = idlst[j] - if j < cnt - 1: - nextid = idlst[j+1] - else: - nextid = None - print '.', - pagelst = pageIDMap[pageid] - flst = [] - for page in pagelst: - flst.append(xmllst[page]) - flat_svg = "".join(flst) - flst=None - svgxml = flatxml2svg.convert2SVG(gd, flat_svg, pageid, previd, nextid, svgDir, raw, meta_array, scaledpi) - if (raw) : - pfile = open(os.path.join(svgDir,'page%04d.svg' % pageid),'w') - slst.append('Page %d\n' % (pageid, pageid)) + mlst.append('\n') + + mlst.append('' % (pp.pw, pp.ph)) + if (pp.gid != None): + mlst.append('\n') + gdefs = pp.getGlyphs() + for j in xrange(0,len(gdefs)): + mlst.append(gdefs[j]) + mlst.append('\n') + img = pp.getImages() + if (img != None): + for j in xrange(0,len(img)): + mlst.append(img[j]) + if (pp.gid != None): + for j in xrange(0,len(pp.gid)): + mlst.append('\n' % (pp.gid[j], pp.gx[j], pp.gy[j])) + if (img == None or len(img) == 0) and (pp.gid == None or len(pp.gid) == 0): + xpos = "%d" % (pp.pw // 3) + ypos = "%d" % (pp.ph // 3) + mlst.append('This page intentionally left blank.\n') + if (raw) : + mlst.append('') + else : + mlst.append('\n') + if nextid == None: + mlst.append('\n') else : - pfile = open(os.path.join(svgDir,'page%04d.xhtml' % pageid), 'w') - slst.append('Page %d\n' % (pageid, pageid)) - previd = pageid - pfile.write(svgxml) - pfile.close() - counter += 1 - slst.append('
\n') - slst.append('

Table of Contents

\n') - slst.append('\n\n') - svgindex = "".join(slst) - slst = None - file(os.path.join(bookDir, 'index_svg.xhtml'), 'wb').write(svgindex) - - print " " - - # build the opf file - opfname = os.path.join(bookDir, 'book.opf') - olst = [] - olst.append('\n') - olst.append('\n') - # adding metadata - olst.append(' \n') - if 'GUID' in meta_array: - olst.append(' ' + meta_array['GUID'] + '\n') - if 'ASIN' in meta_array: - olst.append(' ' + meta_array['ASIN'] + '\n') - if 'oASIN' in meta_array: - olst.append(' ' + meta_array['oASIN'] + '\n') - olst.append(' ' + meta_array['Title'] + '\n') - olst.append(' ' + meta_array['Authors'] + '\n') - olst.append(' en\n') - olst.append(' ' + meta_array['UpdateTime'] + '\n') - if isCover: - olst.append(' \n') - olst.append(' \n') - olst.append('\n') - olst.append(' \n') - olst.append(' \n') - # adding image files to manifest - filenames = os.listdir(imgDir) - filenames = sorted(filenames) - for filename in filenames: - imgname, imgext = os.path.splitext(filename) - if imgext == '.jpg': - imgext = 'jpeg' - if imgext == '.svg': - imgext = 'svg+xml' - olst.append(' \n') - if isCover: - olst.append(' \n') - olst.append('\n') - # adding spine - olst.append('\n \n\n') - if isCover: - olst.append(' \n') - olst.append(' \n') - olst.append(' \n') - olst.append('\n') - opfstr = "".join(olst) - olst = None - file(opfname, 'wb').write(opfstr) - - print 'Processing Complete' - - return 0 - -def usage(): - print "genbook.py generates a book from the extract Topaz Files" - print "Usage:" - print " genbook.py [-r] [-h [--fixed-image] " - print " " - print "Options:" - print " -h : help - print this usage message" - print " -r : generate raw svg files (not wrapped in xhtml)" - print " --fixed-image : genearate any Fixed Area as an svg image in the html" - print " " - - -def main(argv): - bookDir = '' - if len(argv) == 0: - argv = sys.argv - - try: - opts, args = getopt.getopt(argv[1:], "rh:",["fixed-image"]) - - except getopt.GetoptError, err: - print str(err) - usage() - return 1 - - if len(opts) == 0 and len(args) == 0 : - usage() - return 1 - - raw = 0 - fixedimage = True - for o, a in opts: - if o =="-h": - usage() - return 0 - if o =="-r": - raw = 1 - if o =="--fixed-image": - fixedimage = True - - bookDir = args[0] - - rv = generateBook(bookDir, raw, fixedimage) - return rv - - -if __name__ == '__main__': - sys.exit(main('')) + mlst.append('\n') + mlst.append('
\n') + mlst.append('\n') + mlst.append('\n') + mlst.append('\n') + return "".join(mlst) diff --git a/DeDRM_calibre_plugin/DeDRM_plugin/genbook.py b/DeDRM_calibre_plugin/DeDRM_plugin/genbook.py index ac73d1e..3ed925d 100644 --- a/DeDRM_calibre_plugin/DeDRM_plugin/genbook.py +++ b/DeDRM_calibre_plugin/DeDRM_plugin/genbook.py @@ -1,452 +1,721 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -from __future__ import with_statement - -# ignobleepub.pyw, version 3.8 -# Copyright © 2009-2010 by i♥cabbages - -# Released under the terms of the GNU General Public Licence, version 3 -# - -# Modified 2010–2013 by some_updates, DiapDealer and Apprentice Alf - -# Windows users: Before running this program, you must first install Python 2.6 -# from and PyCrypto from -# (make sure to -# install the version for Python 2.6). Save this script file as -# ineptepub.pyw and double-click on it to run it. -# -# Mac OS X users: Save this script file as ineptepub.pyw. You can run this -# program from the command line (pythonw ineptepub.pyw) or by double-clicking -# it when it has been associated with PythonLauncher. - -# Revision history: -# 1 - Initial release -# 2 - Added OS X support by using OpenSSL when available -# 3 - screen out improper key lengths to prevent segfaults on Linux -# 3.1 - Allow Windows versions of libcrypto to be found -# 3.2 - add support for encoding to 'utf-8' when building up list of files to decrypt from encryption.xml -# 3.3 - On Windows try PyCrypto first, OpenSSL next -# 3.4 - Modify interface to allow use with import -# 3.5 - Fix for potential problem with PyCrypto -# 3.6 - Revised to allow use in calibre plugins to eliminate need for duplicate code -# 3.7 - Tweaked to match ineptepub more closely -# 3.8 - Fixed to retain zip file metadata (e.g. file modification date) -# 3.9 - moved unicode_argv call inside main for Windows DeDRM compatibility -# 4.0 - Work if TkInter is missing - -""" -Decrypt Barnes & Noble encrypted ePub books. -""" - -__license__ = 'GPL v3' -__version__ = "4.0" +#! /usr/bin/python +# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab -import sys -import os -import traceback -import zlib -import zipfile -from zipfile import ZipInfo, ZipFile, ZIP_STORED, ZIP_DEFLATED -from contextlib import closing -import xml.etree.ElementTree as etree - -# Wrap a stream so that output gets flushed immediately -# and also make sure that any unicode strings get -# encoded using "replace" before writing them. -class SafeUnbuffered: +class Unbuffered: def __init__(self, stream): self.stream = stream - self.encoding = stream.encoding - if self.encoding == None: - self.encoding = "utf-8" def write(self, data): - if isinstance(data,unicode): - data = data.encode(self.encoding,"replace") self.stream.write(data) self.stream.flush() def __getattr__(self, attr): return getattr(self.stream, attr) -try: - from calibre.constants import iswindows, isosx -except: - iswindows = sys.platform.startswith('win') - isosx = sys.platform.startswith('darwin') - -def unicode_argv(): - if iswindows: - # Uses shell32.GetCommandLineArgvW to get sys.argv as a list of Unicode - # strings. - - # Versions 2.x of Python don't support Unicode in sys.argv on - # Windows, with the underlying Windows API instead replacing multi-byte - # characters with '?'. - - - from ctypes import POINTER, byref, cdll, c_int, windll - from ctypes.wintypes import LPCWSTR, LPWSTR - - GetCommandLineW = cdll.kernel32.GetCommandLineW - GetCommandLineW.argtypes = [] - GetCommandLineW.restype = LPCWSTR - - CommandLineToArgvW = windll.shell32.CommandLineToArgvW - CommandLineToArgvW.argtypes = [LPCWSTR, POINTER(c_int)] - CommandLineToArgvW.restype = POINTER(LPWSTR) - - cmd = GetCommandLineW() - argc = c_int(0) - argv = CommandLineToArgvW(cmd, byref(argc)) - if argc.value > 0: - # Remove Python executable and commands if present - start = argc.value - len(sys.argv) - return [argv[i] for i in - xrange(start, argc.value)] - return [u"ineptepub.py"] - else: - argvencoding = sys.stdin.encoding - if argvencoding == None: - argvencoding = "utf-8" - return [arg if (type(arg) == unicode) else unicode(arg,argvencoding) for arg in sys.argv] +import sys +sys.stdout=Unbuffered(sys.stdout) +import csv +import os +import getopt +from struct import pack +from struct import unpack -class IGNOBLEError(Exception): +class TpzDRMError(Exception): pass -def _load_crypto_libcrypto(): - from ctypes import CDLL, POINTER, c_void_p, c_char_p, c_int, c_long, \ - Structure, c_ulong, create_string_buffer, cast - from ctypes.util import find_library +# local support routines +if 'calibre' in sys.modules: + inCalibre = True +else: + inCalibre = False + +if inCalibre : + from calibre_plugins.dedrm import convert2xml + from calibre_plugins.dedrm import flatxml2html + from calibre_plugins.dedrm import flatxml2svg + from calibre_plugins.dedrm import stylexml2css +else : + import convert2xml + import flatxml2html + import flatxml2svg + import stylexml2css + +# global switch +buildXML = False + +# Get a 7 bit encoded number from a file +def readEncodedNumber(file): + flag = False + c = file.read(1) + if (len(c) == 0): + return None + data = ord(c) + if data == 0xFF: + flag = True + c = file.read(1) + if (len(c) == 0): + return None + data = ord(c) + if data >= 0x80: + datax = (data & 0x7F) + while data >= 0x80 : + c = file.read(1) + if (len(c) == 0): + return None + data = ord(c) + datax = (datax <<7) + (data & 0x7F) + data = datax + if flag: + data = -data + return data + +# Get a length prefixed string from the file +def lengthPrefixString(data): + return encodeNumber(len(data))+data + +def readString(file): + stringLength = readEncodedNumber(file) + if (stringLength == None): + return None + sv = file.read(stringLength) + if (len(sv) != stringLength): + return "" + return unpack(str(stringLength)+"s",sv)[0] + +def getMetaArray(metaFile): + # parse the meta file + result = {} + fo = file(metaFile,'rb') + size = readEncodedNumber(fo) + for i in xrange(size): + tag = readString(fo) + value = readString(fo) + result[tag] = value + # print tag, value + fo.close() + return result + + +# dictionary of all text strings by index value +class Dictionary(object): + def __init__(self, dictFile): + self.filename = dictFile + self.size = 0 + self.fo = file(dictFile,'rb') + self.stable = [] + self.size = readEncodedNumber(self.fo) + for i in xrange(self.size): + self.stable.append(self.escapestr(readString(self.fo))) + self.pos = 0 + def escapestr(self, str): + str = str.replace('&','&') + str = str.replace('<','<') + str = str.replace('>','>') + str = str.replace('=','=') + return str + def lookup(self,val): + if ((val >= 0) and (val < self.size)) : + self.pos = val + return self.stable[self.pos] + else: + print "Error: %d outside of string table limits" % val + raise TpzDRMError('outside or string table limits') + # sys.exit(-1) + def getSize(self): + return self.size + def getPos(self): + return self.pos + + +class PageDimParser(object): + def __init__(self, flatxml): + self.flatdoc = flatxml.split('\n') + # find tag if within pos to end inclusive + def findinDoc(self, tagpath, pos, end) : + result = None + docList = self.flatdoc + cnt = len(docList) + if end == -1 : + end = cnt + else: + end = min(cnt,end) + foundat = -1 + for j in xrange(pos, end): + item = docList[j] + if item.find('=') >= 0: + (name, argres) = item.split('=') + else : + name = item + argres = '' + if name.endswith(tagpath) : + result = argres + foundat = j + break + return foundat, result + def process(self): + (pos, sph) = self.findinDoc('page.h',0,-1) + (pos, spw) = self.findinDoc('page.w',0,-1) + if (sph == None): sph = '-1' + if (spw == None): spw = '-1' + return sph, spw + +def getPageDim(flatxml): + # create a document parser + dp = PageDimParser(flatxml) + (ph, pw) = dp.process() + return ph, pw + +class GParser(object): + def __init__(self, flatxml): + self.flatdoc = flatxml.split('\n') + self.dpi = 1440 + self.gh = self.getData('info.glyph.h') + self.gw = self.getData('info.glyph.w') + self.guse = self.getData('info.glyph.use') + if self.guse : + self.count = len(self.guse) + else : + self.count = 0 + self.gvtx = self.getData('info.glyph.vtx') + self.glen = self.getData('info.glyph.len') + self.gdpi = self.getData('info.glyph.dpi') + self.vx = self.getData('info.vtx.x') + self.vy = self.getData('info.vtx.y') + self.vlen = self.getData('info.len.n') + if self.vlen : + self.glen.append(len(self.vlen)) + elif self.glen: + self.glen.append(0) + if self.vx : + self.gvtx.append(len(self.vx)) + elif self.gvtx : + self.gvtx.append(0) + def getData(self, path): + result = None + cnt = len(self.flatdoc) + for j in xrange(cnt): + item = self.flatdoc[j] + if item.find('=') >= 0: + (name, argt) = item.split('=') + argres = argt.split('|') + else: + name = item + argres = [] + if (name == path): + result = argres + break + if (len(argres) > 0) : + for j in xrange(0,len(argres)): + argres[j] = int(argres[j]) + return result + def getGlyphDim(self, gly): + if self.gdpi[gly] == 0: + return 0, 0 + maxh = (self.gh[gly] * self.dpi) / self.gdpi[gly] + maxw = (self.gw[gly] * self.dpi) / self.gdpi[gly] + return maxh, maxw + def getPath(self, gly): + path = '' + if (gly < 0) or (gly >= self.count): + return path + tx = self.vx[self.gvtx[gly]:self.gvtx[gly+1]] + ty = self.vy[self.gvtx[gly]:self.gvtx[gly+1]] + p = 0 + for k in xrange(self.glen[gly], self.glen[gly+1]): + if (p == 0): + zx = tx[0:self.vlen[k]+1] + zy = ty[0:self.vlen[k]+1] + else: + zx = tx[self.vlen[k-1]+1:self.vlen[k]+1] + zy = ty[self.vlen[k-1]+1:self.vlen[k]+1] + p += 1 + j = 0 + while ( j < len(zx) ): + if (j == 0): + # Start Position. + path += 'M %d %d ' % (zx[j] * self.dpi / self.gdpi[gly], zy[j] * self.dpi / self.gdpi[gly]) + elif (j <= len(zx)-3): + # Cubic Bezier Curve + path += 'C %d %d %d %d %d %d ' % (zx[j] * self.dpi / self.gdpi[gly], zy[j] * self.dpi / self.gdpi[gly], zx[j+1] * self.dpi / self.gdpi[gly], zy[j+1] * self.dpi / self.gdpi[gly], zx[j+2] * self.dpi / self.gdpi[gly], zy[j+2] * self.dpi / self.gdpi[gly]) + j += 2 + elif (j == len(zx)-2): + # Cubic Bezier Curve to Start Position + path += 'C %d %d %d %d %d %d ' % (zx[j] * self.dpi / self.gdpi[gly], zy[j] * self.dpi / self.gdpi[gly], zx[j+1] * self.dpi / self.gdpi[gly], zy[j+1] * self.dpi / self.gdpi[gly], zx[0] * self.dpi / self.gdpi[gly], zy[0] * self.dpi / self.gdpi[gly]) + j += 1 + elif (j == len(zx)-1): + # Quadratic Bezier Curve to Start Position + path += 'Q %d %d %d %d ' % (zx[j] * self.dpi / self.gdpi[gly], zy[j] * self.dpi / self.gdpi[gly], zx[0] * self.dpi / self.gdpi[gly], zy[0] * self.dpi / self.gdpi[gly]) + + j += 1 + path += 'z' + return path + + + +# dictionary of all text strings by index value +class GlyphDict(object): + def __init__(self): + self.gdict = {} + def lookup(self, id): + # id='id="gl%d"' % val + if id in self.gdict: + return self.gdict[id] + return None + def addGlyph(self, val, path): + id='id="gl%d"' % val + self.gdict[id] = path + + +def generateBook(bookDir, raw, fixedimage): + # sanity check Topaz file extraction + if not os.path.exists(bookDir) : + print "Can not find directory with unencrypted book" + return 1 + + dictFile = os.path.join(bookDir,'dict0000.dat') + if not os.path.exists(dictFile) : + print "Can not find dict0000.dat file" + return 1 + + pageDir = os.path.join(bookDir,'page') + if not os.path.exists(pageDir) : + print "Can not find page directory in unencrypted book" + return 1 + + imgDir = os.path.join(bookDir,'img') + if not os.path.exists(imgDir) : + print "Can not find image directory in unencrypted book" + return 1 - if iswindows: - libcrypto = find_library('libeay32') + glyphsDir = os.path.join(bookDir,'glyphs') + if not os.path.exists(glyphsDir) : + print "Can not find glyphs directory in unencrypted book" + return 1 + + metaFile = os.path.join(bookDir,'metadata0000.dat') + if not os.path.exists(metaFile) : + print "Can not find metadata0000.dat in unencrypted book" + return 1 + + svgDir = os.path.join(bookDir,'svg') + if not os.path.exists(svgDir) : + os.makedirs(svgDir) + + if buildXML: + xmlDir = os.path.join(bookDir,'xml') + if not os.path.exists(xmlDir) : + os.makedirs(xmlDir) + + otherFile = os.path.join(bookDir,'other0000.dat') + if not os.path.exists(otherFile) : + print "Can not find other0000.dat in unencrypted book" + return 1 + + print "Updating to color images if available" + spath = os.path.join(bookDir,'color_img') + dpath = os.path.join(bookDir,'img') + filenames = os.listdir(spath) + filenames = sorted(filenames) + for filename in filenames: + imgname = filename.replace('color','img') + sfile = os.path.join(spath,filename) + dfile = os.path.join(dpath,imgname) + imgdata = file(sfile,'rb').read() + file(dfile,'wb').write(imgdata) + + print "Creating cover.jpg" + isCover = False + cpath = os.path.join(bookDir,'img') + cpath = os.path.join(cpath,'img0000.jpg') + if os.path.isfile(cpath): + cover = file(cpath, 'rb').read() + cpath = os.path.join(bookDir,'cover.jpg') + file(cpath, 'wb').write(cover) + isCover = True + + + print 'Processing Dictionary' + dict = Dictionary(dictFile) + + print 'Processing Meta Data and creating OPF' + meta_array = getMetaArray(metaFile) + + # replace special chars in title and authors like & < > + title = meta_array.get('Title','No Title Provided') + title = title.replace('&','&') + title = title.replace('<','<') + title = title.replace('>','>') + meta_array['Title'] = title + authors = meta_array.get('Authors','No Authors Provided') + authors = authors.replace('&','&') + authors = authors.replace('<','<') + authors = authors.replace('>','>') + meta_array['Authors'] = authors + + if buildXML: + xname = os.path.join(xmlDir, 'metadata.xml') + mlst = [] + for key in meta_array: + mlst.append('\n') + metastr = "".join(mlst) + mlst = None + file(xname, 'wb').write(metastr) + + print 'Processing StyleSheet' + + # get some scaling info from metadata to use while processing styles + # and first page info + + fontsize = '135' + if 'fontSize' in meta_array: + fontsize = meta_array['fontSize'] + + # also get the size of a normal text page + # get the total number of pages unpacked as a safety check + filenames = os.listdir(pageDir) + numfiles = len(filenames) + + spage = '1' + if 'firstTextPage' in meta_array: + spage = meta_array['firstTextPage'] + pnum = int(spage) + if pnum >= numfiles or pnum < 0: + # metadata is wrong so just select a page near the front + # 10% of the book to get a normal text page + pnum = int(0.10 * numfiles) + # print "first normal text page is", spage + + # get page height and width from first text page for use in stylesheet scaling + pname = 'page%04d.dat' % (pnum + 1) + fname = os.path.join(pageDir,pname) + flat_xml = convert2xml.fromData(dict, fname) + + (ph, pw) = getPageDim(flat_xml) + if (ph == '-1') or (ph == '0') : ph = '11000' + if (pw == '-1') or (pw == '0') : pw = '8500' + meta_array['pageHeight'] = ph + meta_array['pageWidth'] = pw + if 'fontSize' not in meta_array.keys(): + meta_array['fontSize'] = fontsize + + # process other.dat for css info and for map of page files to svg images + # this map is needed because some pages actually are made up of multiple + # pageXXXX.xml files + xname = os.path.join(bookDir, 'style.css') + flat_xml = convert2xml.fromData(dict, otherFile) + + # extract info.original.pid to get original page information + pageIDMap = {} + pageidnums = stylexml2css.getpageIDMap(flat_xml) + if len(pageidnums) == 0: + filenames = os.listdir(pageDir) + numfiles = len(filenames) + for k in range(numfiles): + pageidnums.append(k) + # create a map from page ids to list of page file nums to process for that page + for i in range(len(pageidnums)): + id = pageidnums[i] + if id in pageIDMap.keys(): + pageIDMap[id].append(i) + else: + pageIDMap[id] = [i] + + # now get the css info + cssstr , classlst = stylexml2css.convert2CSS(flat_xml, fontsize, ph, pw) + file(xname, 'wb').write(cssstr) + if buildXML: + xname = os.path.join(xmlDir, 'other0000.xml') + file(xname, 'wb').write(convert2xml.getXML(dict, otherFile)) + + print 'Processing Glyphs' + gd = GlyphDict() + filenames = os.listdir(glyphsDir) + filenames = sorted(filenames) + glyfname = os.path.join(svgDir,'glyphs.svg') + glyfile = open(glyfname, 'w') + glyfile.write('\n') + glyfile.write('\n') + glyfile.write('\n') + glyfile.write('Glyphs for %s\n' % meta_array['Title']) + glyfile.write('\n') + counter = 0 + for filename in filenames: + # print ' ', filename + print '.', + fname = os.path.join(glyphsDir,filename) + flat_xml = convert2xml.fromData(dict, fname) + + if buildXML: + xname = os.path.join(xmlDir, filename.replace('.dat','.xml')) + file(xname, 'wb').write(convert2xml.getXML(dict, fname)) + + gp = GParser(flat_xml) + for i in xrange(0, gp.count): + path = gp.getPath(i) + maxh, maxw = gp.getGlyphDim(i) + fullpath = '\n' % (counter * 256 + i, path, maxw, maxh) + glyfile.write(fullpath) + gd.addGlyph(counter * 256 + i, fullpath) + counter += 1 + glyfile.write('\n') + glyfile.write('\n') + glyfile.close() + print " " + + + # start up the html + # also build up tocentries while processing html + htmlFileName = "book.html" + hlst = [] + hlst.append('\n') + hlst.append('\n') + hlst.append('\n') + hlst.append('\n') + hlst.append('\n') + hlst.append('' + meta_array['Title'] + ' by ' + meta_array['Authors'] + '\n') + hlst.append('\n') + hlst.append('\n') + if 'ASIN' in meta_array: + hlst.append('\n') + if 'GUID' in meta_array: + hlst.append('\n') + hlst.append('\n') + hlst.append('\n\n') + + print 'Processing Pages' + # Books are at 1440 DPI. This is rendering at twice that size for + # readability when rendering to the screen. + scaledpi = 1440.0 + + filenames = os.listdir(pageDir) + filenames = sorted(filenames) + numfiles = len(filenames) + + xmllst = [] + elst = [] + + for filename in filenames: + # print ' ', filename + print ".", + fname = os.path.join(pageDir,filename) + flat_xml = convert2xml.fromData(dict, fname) + + # keep flat_xml for later svg processing + xmllst.append(flat_xml) + + if buildXML: + xname = os.path.join(xmlDir, filename.replace('.dat','.xml')) + file(xname, 'wb').write(convert2xml.getXML(dict, fname)) + + # first get the html + pagehtml, tocinfo = flatxml2html.convert2HTML(flat_xml, classlst, fname, bookDir, gd, fixedimage) + elst.append(tocinfo) + hlst.append(pagehtml) + + # finish up the html string and output it + hlst.append('\n\n') + htmlstr = "".join(hlst) + hlst = None + file(os.path.join(bookDir, htmlFileName), 'wb').write(htmlstr) + + print " " + print 'Extracting Table of Contents from Amazon OCR' + + # first create a table of contents file for the svg images + tlst = [] + tlst.append('\n') + tlst.append('\n') + tlst.append('') + tlst.append('\n') + tlst.append('' + meta_array['Title'] + '\n') + tlst.append('\n') + tlst.append('\n') + if 'ASIN' in meta_array: + tlst.append('\n') + if 'GUID' in meta_array: + tlst.append('\n') + tlst.append('\n') + tlst.append('\n') + + tlst.append('

Table of Contents

\n') + start = pageidnums[0] + if (raw): + startname = 'page%04d.svg' % start else: - libcrypto = find_library('crypto') - - if libcrypto is None: - raise IGNOBLEError('libcrypto not found') - libcrypto = CDLL(libcrypto) - - AES_MAXNR = 14 - - c_char_pp = POINTER(c_char_p) - c_int_p = POINTER(c_int) - - class AES_KEY(Structure): - _fields_ = [('rd_key', c_long * (4 * (AES_MAXNR + 1))), - ('rounds', c_int)] - AES_KEY_p = POINTER(AES_KEY) - - def F(restype, name, argtypes): - func = getattr(libcrypto, name) - func.restype = restype - func.argtypes = argtypes - return func - - AES_set_decrypt_key = F(c_int, 'AES_set_decrypt_key', - [c_char_p, c_int, AES_KEY_p]) - AES_cbc_encrypt = F(None, 'AES_cbc_encrypt', - [c_char_p, c_char_p, c_ulong, AES_KEY_p, c_char_p, - c_int]) - - class AES(object): - def __init__(self, userkey): - self._blocksize = len(userkey) - if (self._blocksize != 16) and (self._blocksize != 24) and (self._blocksize != 32) : - raise IGNOBLEError('AES improper key used') - return - key = self._key = AES_KEY() - rv = AES_set_decrypt_key(userkey, len(userkey) * 8, key) - if rv < 0: - raise IGNOBLEError('Failed to initialize AES key') - - def decrypt(self, data): - out = create_string_buffer(len(data)) - iv = ("\x00" * self._blocksize) - rv = AES_cbc_encrypt(data, out, len(data), self._key, iv, 0) - if rv == 0: - raise IGNOBLEError('AES decryption failed') - return out.raw - - return AES - -def _load_crypto_pycrypto(): - from Crypto.Cipher import AES as _AES - - class AES(object): - def __init__(self, key): - self._aes = _AES.new(key, _AES.MODE_CBC, '\x00'*16) - - def decrypt(self, data): - return self._aes.decrypt(data) - - return AES - -def _load_crypto(): - AES = None - cryptolist = (_load_crypto_libcrypto, _load_crypto_pycrypto) - if sys.platform.startswith('win'): - cryptolist = (_load_crypto_pycrypto, _load_crypto_libcrypto) - for loader in cryptolist: - try: - AES = loader() - break - except (ImportError, IGNOBLEError): - pass - return AES - -AES = _load_crypto() - -META_NAMES = ('mimetype', 'META-INF/rights.xml', 'META-INF/encryption.xml') -NSMAP = {'adept': 'http://ns.adobe.com/adept', - 'enc': 'http://www.w3.org/2001/04/xmlenc#'} - -class Decryptor(object): - def __init__(self, bookkey, encryption): - enc = lambda tag: '{%s}%s' % (NSMAP['enc'], tag) - self._aes = AES(bookkey) - encryption = etree.fromstring(encryption) - self._encrypted = encrypted = set() - expr = './%s/%s/%s' % (enc('EncryptedData'), enc('CipherData'), - enc('CipherReference')) - for elem in encryption.findall(expr): - path = elem.get('URI', None) - if path is not None: - path = path.encode('utf-8') - encrypted.add(path) - - def decompress(self, bytes): - dc = zlib.decompressobj(-15) - bytes = dc.decompress(bytes) - ex = dc.decompress('Z') + dc.flush() - if ex: - bytes = bytes + ex - return bytes - - def decrypt(self, path, data): - if path in self._encrypted: - data = self._aes.decrypt(data)[16:] - data = data[:-ord(data[-1])] - data = self.decompress(data) - return data - -# check file to make check whether it's probably an Adobe Adept encrypted ePub -def ignobleBook(inpath): - with closing(ZipFile(open(inpath, 'rb'))) as inf: - namelist = set(inf.namelist()) - if 'META-INF/rights.xml' not in namelist or \ - 'META-INF/encryption.xml' not in namelist: - return False - try: - rights = etree.fromstring(inf.read('META-INF/rights.xml')) - adept = lambda tag: '{%s}%s' % (NSMAP['adept'], tag) - expr = './/%s' % (adept('encryptedKey'),) - bookkey = ''.join(rights.findtext(expr)) - if len(bookkey) == 64: - return True - except: - # if we couldn't check, assume it is - return True - return False - -def decryptBook(keyb64, inpath, outpath): - if AES is None: - raise IGNOBLEError(u"PyCrypto or OpenSSL must be installed.") - key = keyb64.decode('base64')[:16] - aes = AES(key) - with closing(ZipFile(open(inpath, 'rb'))) as inf: - namelist = set(inf.namelist()) - if 'META-INF/rights.xml' not in namelist or \ - 'META-INF/encryption.xml' not in namelist: - print u"{0:s} is DRM-free.".format(os.path.basename(inpath)) - return 1 - for name in META_NAMES: - namelist.remove(name) - try: - rights = etree.fromstring(inf.read('META-INF/rights.xml')) - adept = lambda tag: '{%s}%s' % (NSMAP['adept'], tag) - expr = './/%s' % (adept('encryptedKey'),) - bookkey = ''.join(rights.findtext(expr)) - if len(bookkey) != 64: - print u"{0:s} is not a secure Barnes & Noble ePub.".format(os.path.basename(inpath)) - return 1 - bookkey = aes.decrypt(bookkey.decode('base64')) - bookkey = bookkey[:-ord(bookkey[-1])] - encryption = inf.read('META-INF/encryption.xml') - decryptor = Decryptor(bookkey[-16:], encryption) - kwds = dict(compression=ZIP_DEFLATED, allowZip64=False) - with closing(ZipFile(open(outpath, 'wb'), 'w', **kwds)) as outf: - zi = ZipInfo('mimetype') - zi.compress_type=ZIP_STORED - try: - # if the mimetype is present, get its info, including time-stamp - oldzi = inf.getinfo('mimetype') - # copy across fields to be preserved - zi.date_time = oldzi.date_time - zi.comment = oldzi.comment - zi.extra = oldzi.extra - zi.internal_attr = oldzi.internal_attr - # external attributes are dependent on the create system, so copy both. - zi.external_attr = oldzi.external_attr - zi.create_system = oldzi.create_system - except: - pass - outf.writestr(zi, inf.read('mimetype')) - for path in namelist: - data = inf.read(path) - zi = ZipInfo(path) - zi.compress_type=ZIP_DEFLATED - try: - # get the file info, including time-stamp - oldzi = inf.getinfo(path) - # copy across useful fields - zi.date_time = oldzi.date_time - zi.comment = oldzi.comment - zi.extra = oldzi.extra - zi.internal_attr = oldzi.internal_attr - # external attributes are dependent on the create system, so copy both. - zi.external_attr = oldzi.external_attr - zi.create_system = oldzi.create_system - except: - pass - outf.writestr(zi, decryptor.decrypt(path, data)) - except: - print u"Could not decrypt {0:s} because of an exception:\n{1:s}".format(os.path.basename(inpath), traceback.format_exc()) - return 2 + startname = 'page%04d.xhtml' % start + + tlst.append('

Start of Book

\n') + # build up a table of contents for the svg xhtml output + tocentries = "".join(elst) + elst = None + toclst = tocentries.split('\n') + toclst.pop() + for entry in toclst: + print entry + title, pagenum = entry.split('|') + id = pageidnums[int(pagenum)] + if (raw): + fname = 'page%04d.svg' % id + else: + fname = 'page%04d.xhtml' % id + tlst.append('

' + title + '

\n') + tlst.append('\n') + tlst.append('\n') + tochtml = "".join(tlst) + file(os.path.join(svgDir, 'toc.xhtml'), 'wb').write(tochtml) + + + # now create index_svg.xhtml that points to all required files + slst = [] + slst.append('\n') + slst.append('\n') + slst.append('') + slst.append('\n') + slst.append('' + meta_array['Title'] + '\n') + slst.append('\n') + slst.append('\n') + if 'ASIN' in meta_array: + slst.append('\n') + if 'GUID' in meta_array: + slst.append('\n') + slst.append('\n') + slst.append('\n') + + print "Building svg images of each book page" + slst.append('

List of Pages

\n') + slst.append('
\n') + idlst = sorted(pageIDMap.keys()) + numids = len(idlst) + cnt = len(idlst) + previd = None + for j in range(cnt): + pageid = idlst[j] + if j < cnt - 1: + nextid = idlst[j+1] + else: + nextid = None + print '.', + pagelst = pageIDMap[pageid] + flst = [] + for page in pagelst: + flst.append(xmllst[page]) + flat_svg = "".join(flst) + flst=None + svgxml = flatxml2svg.convert2SVG(gd, flat_svg, pageid, previd, nextid, svgDir, raw, meta_array, scaledpi) + if (raw) : + pfile = open(os.path.join(svgDir,'page%04d.svg' % pageid),'w') + slst.append('Page %d\n' % (pageid, pageid)) + else : + pfile = open(os.path.join(svgDir,'page%04d.xhtml' % pageid), 'w') + slst.append('Page %d\n' % (pageid, pageid)) + previd = pageid + pfile.write(svgxml) + pfile.close() + counter += 1 + slst.append('
\n') + slst.append('

Table of Contents

\n') + slst.append('\n\n') + svgindex = "".join(slst) + slst = None + file(os.path.join(bookDir, 'index_svg.xhtml'), 'wb').write(svgindex) + + print " " + + # build the opf file + opfname = os.path.join(bookDir, 'book.opf') + olst = [] + olst.append('\n') + olst.append('\n') + # adding metadata + olst.append(' \n') + if 'GUID' in meta_array: + olst.append(' ' + meta_array['GUID'] + '\n') + if 'ASIN' in meta_array: + olst.append(' ' + meta_array['ASIN'] + '\n') + if 'oASIN' in meta_array: + olst.append(' ' + meta_array['oASIN'] + '\n') + olst.append(' ' + meta_array['Title'] + '\n') + olst.append(' ' + meta_array['Authors'] + '\n') + olst.append(' en\n') + olst.append(' ' + meta_array['UpdateTime'] + '\n') + if isCover: + olst.append(' \n') + olst.append(' \n') + olst.append('\n') + olst.append(' \n') + olst.append(' \n') + # adding image files to manifest + filenames = os.listdir(imgDir) + filenames = sorted(filenames) + for filename in filenames: + imgname, imgext = os.path.splitext(filename) + if imgext == '.jpg': + imgext = 'jpeg' + if imgext == '.svg': + imgext = 'svg+xml' + olst.append(' \n') + if isCover: + olst.append(' \n') + olst.append('\n') + # adding spine + olst.append('\n \n\n') + if isCover: + olst.append(' \n') + olst.append(' \n') + olst.append(' \n') + olst.append('\n') + opfstr = "".join(olst) + olst = None + file(opfname, 'wb').write(opfstr) + + print 'Processing Complete' + return 0 +def usage(): + print "genbook.py generates a book from the extract Topaz Files" + print "Usage:" + print " genbook.py [-r] [-h [--fixed-image] " + print " " + print "Options:" + print " -h : help - print this usage message" + print " -r : generate raw svg files (not wrapped in xhtml)" + print " --fixed-image : genearate any Fixed Area as an svg image in the html" + print " " -def cli_main(): - sys.stdout=SafeUnbuffered(sys.stdout) - sys.stderr=SafeUnbuffered(sys.stderr) - argv=unicode_argv() - progname = os.path.basename(argv[0]) - if len(argv) != 4: - print u"usage: {0} ".format(progname) - return 1 - keypath, inpath, outpath = argv[1:] - userkey = open(keypath,'rb').read() - result = decryptBook(userkey, inpath, outpath) - if result == 0: - print u"Successfully decrypted {0:s} as {1:s}".format(os.path.basename(inpath),os.path.basename(outpath)) - return result -def gui_main(): +def main(argv): + bookDir = '' + if len(argv) == 0: + argv = sys.argv + try: - import Tkinter - import Tkconstants - import tkMessageBox - import traceback - except: - return cli_main() - - class DecryptionDialog(Tkinter.Frame): - def __init__(self, root): - Tkinter.Frame.__init__(self, root, border=5) - self.status = Tkinter.Label(self, text=u"Select files for decryption") - self.status.pack(fill=Tkconstants.X, expand=1) - body = Tkinter.Frame(self) - body.pack(fill=Tkconstants.X, expand=1) - sticky = Tkconstants.E + Tkconstants.W - body.grid_columnconfigure(1, weight=2) - Tkinter.Label(body, text=u"Key file").grid(row=0) - self.keypath = Tkinter.Entry(body, width=30) - self.keypath.grid(row=0, column=1, sticky=sticky) - if os.path.exists(u"bnepubkey.b64"): - self.keypath.insert(0, u"bnepubkey.b64") - button = Tkinter.Button(body, text=u"...", command=self.get_keypath) - button.grid(row=0, column=2) - Tkinter.Label(body, text=u"Input file").grid(row=1) - self.inpath = Tkinter.Entry(body, width=30) - self.inpath.grid(row=1, column=1, sticky=sticky) - button = Tkinter.Button(body, text=u"...", command=self.get_inpath) - button.grid(row=1, column=2) - Tkinter.Label(body, text=u"Output file").grid(row=2) - self.outpath = Tkinter.Entry(body, width=30) - self.outpath.grid(row=2, column=1, sticky=sticky) - button = Tkinter.Button(body, text=u"...", command=self.get_outpath) - button.grid(row=2, column=2) - buttons = Tkinter.Frame(self) - buttons.pack() - botton = Tkinter.Button( - buttons, text=u"Decrypt", width=10, command=self.decrypt) - botton.pack(side=Tkconstants.LEFT) - Tkinter.Frame(buttons, width=10).pack(side=Tkconstants.LEFT) - button = Tkinter.Button( - buttons, text=u"Quit", width=10, command=self.quit) - button.pack(side=Tkconstants.RIGHT) - - def get_keypath(self): - keypath = tkFileDialog.askopenfilename( - parent=None, title=u"Select Barnes & Noble \'.b64\' key file", - defaultextension=u".b64", - filetypes=[('base64-encoded files', '.b64'), - ('All Files', '.*')]) - if keypath: - keypath = os.path.normpath(keypath) - self.keypath.delete(0, Tkconstants.END) - self.keypath.insert(0, keypath) - return - - def get_inpath(self): - inpath = tkFileDialog.askopenfilename( - parent=None, title=u"Select B&N-encrypted ePub file to decrypt", - defaultextension=u".epub", filetypes=[('ePub files', '.epub')]) - if inpath: - inpath = os.path.normpath(inpath) - self.inpath.delete(0, Tkconstants.END) - self.inpath.insert(0, inpath) - return - - def get_outpath(self): - outpath = tkFileDialog.asksaveasfilename( - parent=None, title=u"Select unencrypted ePub file to produce", - defaultextension=u".epub", filetypes=[('ePub files', '.epub')]) - if outpath: - outpath = os.path.normpath(outpath) - self.outpath.delete(0, Tkconstants.END) - self.outpath.insert(0, outpath) - return - - def decrypt(self): - keypath = self.keypath.get() - inpath = self.inpath.get() - outpath = self.outpath.get() - if not keypath or not os.path.exists(keypath): - self.status['text'] = u"Specified key file does not exist" - return - if not inpath or not os.path.exists(inpath): - self.status['text'] = u"Specified input file does not exist" - return - if not outpath: - self.status['text'] = u"Output file not specified" - return - if inpath == outpath: - self.status['text'] = u"Must have different input and output files" - return - userkey = open(keypath,'rb').read() - self.status['text'] = u"Decrypting..." - try: - decrypt_status = decryptBook(userkey, inpath, outpath) - except Exception, e: - self.status['text'] = u"Error: {0}".format(e.args[0]) - return - if decrypt_status == 0: - self.status['text'] = u"File successfully decrypted" - else: - self.status['text'] = u"The was an error decrypting the file." - - root = Tkinter.Tk() - root.title(u"Barnes & Noble ePub Decrypter v.{0}".format(__version__)) - root.resizable(True, False) - root.minsize(300, 0) - DecryptionDialog(root).pack(fill=Tkconstants.X, expand=1) - root.mainloop() - return 0 + opts, args = getopt.getopt(argv[1:], "rh:",["fixed-image"]) + + except getopt.GetoptError, err: + print str(err) + usage() + return 1 + + if len(opts) == 0 and len(args) == 0 : + usage() + return 1 + + raw = 0 + fixedimage = True + for o, a in opts: + if o =="-h": + usage() + return 0 + if o =="-r": + raw = 1 + if o =="--fixed-image": + fixedimage = True + + bookDir = args[0] + + rv = generateBook(bookDir, raw, fixedimage) + return rv + if __name__ == '__main__': - if len(sys.argv) > 1: - sys.exit(cli_main()) - sys.exit(gui_main()) + sys.exit(main('')) diff --git a/DeDRM_calibre_plugin/DeDRM_plugin/ignobleepub.py b/DeDRM_calibre_plugin/DeDRM_plugin/ignobleepub.py index 5118c87..ac73d1e 100644 --- a/DeDRM_calibre_plugin/DeDRM_plugin/ignobleepub.py +++ b/DeDRM_calibre_plugin/DeDRM_plugin/ignobleepub.py @@ -3,47 +3,54 @@ from __future__ import with_statement -# ignoblekeygen.pyw, version 2.5 -# Copyright © 2009-2010 i♥cabbages +# ignobleepub.pyw, version 3.8 +# Copyright © 2009-2010 by i♥cabbages # Released under the terms of the GNU General Public Licence, version 3 # # Modified 2010–2013 by some_updates, DiapDealer and Apprentice Alf -# Windows users: Before running this program, you must first install Python. -# We recommend ActiveState Python 2.7.X for Windows (x86) from -# http://www.activestate.com/activepython/downloads. -# You must also install PyCrypto from -# http://www.voidspace.org.uk/python/modules.shtml#pycrypto -# (make certain to install the version for Python 2.7). -# Then save this script file as ignoblekeygen.pyw and double-click on it to run it. +# Windows users: Before running this program, you must first install Python 2.6 +# from and PyCrypto from +# (make sure to +# install the version for Python 2.6). Save this script file as +# ineptepub.pyw and double-click on it to run it. # -# Mac OS X users: Save this script file as ignoblekeygen.pyw. You can run this -# program from the command line (python ignoblekeygen.pyw) or by double-clicking +# Mac OS X users: Save this script file as ineptepub.pyw. You can run this +# program from the command line (pythonw ineptepub.pyw) or by double-clicking # it when it has been associated with PythonLauncher. # Revision history: # 1 - Initial release -# 2 - Add OS X support by using OpenSSL when available (taken/modified from ineptepub v5) -# 2.1 - Allow Windows versions of libcrypto to be found -# 2.2 - On Windows try PyCrypto first and then OpenSSL next -# 2.3 - Modify interface to allow use of import -# 2.4 - Improvements to UI and now works in plugins -# 2.5 - Additional improvement for unicode and plugin support -# 2.6 - moved unicode_argv call inside main for Windows DeDRM compatibility -# 2.7 - Work if TkInter is missing +# 2 - Added OS X support by using OpenSSL when available +# 3 - screen out improper key lengths to prevent segfaults on Linux +# 3.1 - Allow Windows versions of libcrypto to be found +# 3.2 - add support for encoding to 'utf-8' when building up list of files to decrypt from encryption.xml +# 3.3 - On Windows try PyCrypto first, OpenSSL next +# 3.4 - Modify interface to allow use with import +# 3.5 - Fix for potential problem with PyCrypto +# 3.6 - Revised to allow use in calibre plugins to eliminate need for duplicate code +# 3.7 - Tweaked to match ineptepub more closely +# 3.8 - Fixed to retain zip file metadata (e.g. file modification date) +# 3.9 - moved unicode_argv call inside main for Windows DeDRM compatibility +# 4.0 - Work if TkInter is missing """ -Generate Barnes & Noble EPUB user key from name and credit card number. +Decrypt Barnes & Noble encrypted ePub books. """ __license__ = 'GPL v3' -__version__ = "2.7" +__version__ = "4.0" import sys import os -import hashlib +import traceback +import zlib +import zipfile +from zipfile import ZipInfo, ZipFile, ZIP_STORED, ZIP_DEFLATED +from contextlib import closing +import xml.etree.ElementTree as etree # Wrap a stream so that output gets flushed immediately # and also make sure that any unicode strings get @@ -75,8 +82,8 @@ def unicode_argv(): # Versions 2.x of Python don't support Unicode in sys.argv on # Windows, with the underlying Windows API instead replacing multi-byte - # characters with '?'. So use shell32.GetCommandLineArgvW to get sys.argv - # as a list of Unicode strings and encode them as utf-8 + # characters with '?'. + from ctypes import POINTER, byref, cdll, c_int, windll from ctypes.wintypes import LPCWSTR, LPWSTR @@ -97,9 +104,7 @@ def unicode_argv(): start = argc.value - len(sys.argv) return [argv[i] for i in xrange(start, argc.value)] - # if we don't have any arguments at all, just pass back script name - # this should never happen - return [u"ignoblekeygen.py"] + return [u"ineptepub.py"] else: argvencoding = sys.stdin.encoding if argvencoding == None: @@ -140,26 +145,29 @@ def _load_crypto_libcrypto(): func.argtypes = argtypes return func - AES_set_encrypt_key = F(c_int, 'AES_set_encrypt_key', + AES_set_decrypt_key = F(c_int, 'AES_set_decrypt_key', [c_char_p, c_int, AES_KEY_p]) AES_cbc_encrypt = F(None, 'AES_cbc_encrypt', [c_char_p, c_char_p, c_ulong, AES_KEY_p, c_char_p, c_int]) class AES(object): - def __init__(self, userkey, iv): + def __init__(self, userkey): self._blocksize = len(userkey) - self._iv = iv + if (self._blocksize != 16) and (self._blocksize != 24) and (self._blocksize != 32) : + raise IGNOBLEError('AES improper key used') + return key = self._key = AES_KEY() - rv = AES_set_encrypt_key(userkey, len(userkey) * 8, key) + rv = AES_set_decrypt_key(userkey, len(userkey) * 8, key) if rv < 0: - raise IGNOBLEError('Failed to initialize AES Encrypt key') + raise IGNOBLEError('Failed to initialize AES key') - def encrypt(self, data): + def decrypt(self, data): out = create_string_buffer(len(data)) - rv = AES_cbc_encrypt(data, out, len(data), self._key, self._iv, 1) + iv = ("\x00" * self._blocksize) + rv = AES_cbc_encrypt(data, out, len(data), self._key, iv, 0) if rv == 0: - raise IGNOBLEError('AES encryption failed') + raise IGNOBLEError('AES decryption failed') return out.raw return AES @@ -168,11 +176,11 @@ def _load_crypto_pycrypto(): from Crypto.Cipher import AES as _AES class AES(object): - def __init__(self, key, iv): - self._aes = _AES.new(key, _AES.MODE_CBC, iv) + def __init__(self, key): + self._aes = _AES.new(key, _AES.MODE_CBC, '\x00'*16) - def encrypt(self, data): - return self._aes.encrypt(data) + def decrypt(self, data): + return self._aes.decrypt(data) return AES @@ -191,29 +199,123 @@ def _load_crypto(): AES = _load_crypto() -def normalize_name(name): - return ''.join(x for x in name.lower() if x != ' ') - - -def generate_key(name, ccn): - # remove spaces and case from name and CC numbers. - if type(name)==unicode: - name = name.encode('utf-8') - if type(ccn)==unicode: - ccn = ccn.encode('utf-8') - - name = normalize_name(name) + '\x00' - ccn = normalize_name(ccn) + '\x00' - - name_sha = hashlib.sha1(name).digest()[:16] - ccn_sha = hashlib.sha1(ccn).digest()[:16] - both_sha = hashlib.sha1(name + ccn).digest() - aes = AES(ccn_sha, name_sha) - crypt = aes.encrypt(both_sha + ('\x0c' * 0x0c)) - userkey = hashlib.sha1(crypt).digest() - return userkey.encode('base64') - - +META_NAMES = ('mimetype', 'META-INF/rights.xml', 'META-INF/encryption.xml') +NSMAP = {'adept': 'http://ns.adobe.com/adept', + 'enc': 'http://www.w3.org/2001/04/xmlenc#'} + +class Decryptor(object): + def __init__(self, bookkey, encryption): + enc = lambda tag: '{%s}%s' % (NSMAP['enc'], tag) + self._aes = AES(bookkey) + encryption = etree.fromstring(encryption) + self._encrypted = encrypted = set() + expr = './%s/%s/%s' % (enc('EncryptedData'), enc('CipherData'), + enc('CipherReference')) + for elem in encryption.findall(expr): + path = elem.get('URI', None) + if path is not None: + path = path.encode('utf-8') + encrypted.add(path) + + def decompress(self, bytes): + dc = zlib.decompressobj(-15) + bytes = dc.decompress(bytes) + ex = dc.decompress('Z') + dc.flush() + if ex: + bytes = bytes + ex + return bytes + + def decrypt(self, path, data): + if path in self._encrypted: + data = self._aes.decrypt(data)[16:] + data = data[:-ord(data[-1])] + data = self.decompress(data) + return data + +# check file to make check whether it's probably an Adobe Adept encrypted ePub +def ignobleBook(inpath): + with closing(ZipFile(open(inpath, 'rb'))) as inf: + namelist = set(inf.namelist()) + if 'META-INF/rights.xml' not in namelist or \ + 'META-INF/encryption.xml' not in namelist: + return False + try: + rights = etree.fromstring(inf.read('META-INF/rights.xml')) + adept = lambda tag: '{%s}%s' % (NSMAP['adept'], tag) + expr = './/%s' % (adept('encryptedKey'),) + bookkey = ''.join(rights.findtext(expr)) + if len(bookkey) == 64: + return True + except: + # if we couldn't check, assume it is + return True + return False + +def decryptBook(keyb64, inpath, outpath): + if AES is None: + raise IGNOBLEError(u"PyCrypto or OpenSSL must be installed.") + key = keyb64.decode('base64')[:16] + aes = AES(key) + with closing(ZipFile(open(inpath, 'rb'))) as inf: + namelist = set(inf.namelist()) + if 'META-INF/rights.xml' not in namelist or \ + 'META-INF/encryption.xml' not in namelist: + print u"{0:s} is DRM-free.".format(os.path.basename(inpath)) + return 1 + for name in META_NAMES: + namelist.remove(name) + try: + rights = etree.fromstring(inf.read('META-INF/rights.xml')) + adept = lambda tag: '{%s}%s' % (NSMAP['adept'], tag) + expr = './/%s' % (adept('encryptedKey'),) + bookkey = ''.join(rights.findtext(expr)) + if len(bookkey) != 64: + print u"{0:s} is not a secure Barnes & Noble ePub.".format(os.path.basename(inpath)) + return 1 + bookkey = aes.decrypt(bookkey.decode('base64')) + bookkey = bookkey[:-ord(bookkey[-1])] + encryption = inf.read('META-INF/encryption.xml') + decryptor = Decryptor(bookkey[-16:], encryption) + kwds = dict(compression=ZIP_DEFLATED, allowZip64=False) + with closing(ZipFile(open(outpath, 'wb'), 'w', **kwds)) as outf: + zi = ZipInfo('mimetype') + zi.compress_type=ZIP_STORED + try: + # if the mimetype is present, get its info, including time-stamp + oldzi = inf.getinfo('mimetype') + # copy across fields to be preserved + zi.date_time = oldzi.date_time + zi.comment = oldzi.comment + zi.extra = oldzi.extra + zi.internal_attr = oldzi.internal_attr + # external attributes are dependent on the create system, so copy both. + zi.external_attr = oldzi.external_attr + zi.create_system = oldzi.create_system + except: + pass + outf.writestr(zi, inf.read('mimetype')) + for path in namelist: + data = inf.read(path) + zi = ZipInfo(path) + zi.compress_type=ZIP_DEFLATED + try: + # get the file info, including time-stamp + oldzi = inf.getinfo(path) + # copy across useful fields + zi.date_time = oldzi.date_time + zi.comment = oldzi.comment + zi.extra = oldzi.extra + zi.internal_attr = oldzi.internal_attr + # external attributes are dependent on the create system, so copy both. + zi.external_attr = oldzi.external_attr + zi.create_system = oldzi.create_system + except: + pass + outf.writestr(zi, decryptor.decrypt(path, data)) + except: + print u"Could not decrypt {0:s} because of an exception:\n{1:s}".format(os.path.basename(inpath), traceback.format_exc()) + return 2 + return 0 def cli_main(): @@ -221,19 +323,15 @@ def cli_main(): sys.stderr=SafeUnbuffered(sys.stderr) argv=unicode_argv() progname = os.path.basename(argv[0]) - if AES is None: - print "%s: This script requires OpenSSL or PyCrypto, which must be installed " \ - "separately. Read the top-of-script comment for details." % \ - (progname,) - return 1 if len(argv) != 4: - print u"usage: {0} ".format(progname) + print u"usage: {0} ".format(progname) return 1 - name, ccn, keypath = argv[1:] - userkey = generate_key(name, ccn) - open(keypath,'wb').write(userkey) - return 0 - + keypath, inpath, outpath = argv[1:] + userkey = open(keypath,'rb').read() + result = decryptBook(userkey, inpath, outpath) + if result == 0: + print u"Successfully decrypted {0:s} as {1:s}".format(os.path.basename(inpath),os.path.basename(outpath)) + return result def gui_main(): try: @@ -247,28 +345,33 @@ def gui_main(): class DecryptionDialog(Tkinter.Frame): def __init__(self, root): Tkinter.Frame.__init__(self, root, border=5) - self.status = Tkinter.Label(self, text=u"Enter parameters") + self.status = Tkinter.Label(self, text=u"Select files for decryption") self.status.pack(fill=Tkconstants.X, expand=1) body = Tkinter.Frame(self) body.pack(fill=Tkconstants.X, expand=1) sticky = Tkconstants.E + Tkconstants.W body.grid_columnconfigure(1, weight=2) - Tkinter.Label(body, text=u"Account Name").grid(row=0) - self.name = Tkinter.Entry(body, width=40) - self.name.grid(row=0, column=1, sticky=sticky) - Tkinter.Label(body, text=u"CC#").grid(row=1) - self.ccn = Tkinter.Entry(body, width=40) - self.ccn.grid(row=1, column=1, sticky=sticky) - Tkinter.Label(body, text=u"Output file").grid(row=2) - self.keypath = Tkinter.Entry(body, width=40) - self.keypath.grid(row=2, column=1, sticky=sticky) - self.keypath.insert(2, u"bnepubkey.b64") + Tkinter.Label(body, text=u"Key file").grid(row=0) + self.keypath = Tkinter.Entry(body, width=30) + self.keypath.grid(row=0, column=1, sticky=sticky) + if os.path.exists(u"bnepubkey.b64"): + self.keypath.insert(0, u"bnepubkey.b64") button = Tkinter.Button(body, text=u"...", command=self.get_keypath) + button.grid(row=0, column=2) + Tkinter.Label(body, text=u"Input file").grid(row=1) + self.inpath = Tkinter.Entry(body, width=30) + self.inpath.grid(row=1, column=1, sticky=sticky) + button = Tkinter.Button(body, text=u"...", command=self.get_inpath) + button.grid(row=1, column=2) + Tkinter.Label(body, text=u"Output file").grid(row=2) + self.outpath = Tkinter.Entry(body, width=30) + self.outpath.grid(row=2, column=1, sticky=sticky) + button = Tkinter.Button(body, text=u"...", command=self.get_outpath) button.grid(row=2, column=2) buttons = Tkinter.Frame(self) buttons.pack() botton = Tkinter.Button( - buttons, text=u"Generate", width=10, command=self.generate) + buttons, text=u"Decrypt", width=10, command=self.decrypt) botton.pack(side=Tkconstants.LEFT) Tkinter.Frame(buttons, width=10).pack(side=Tkconstants.LEFT) button = Tkinter.Button( @@ -276,8 +379,8 @@ def gui_main(): button.pack(side=Tkconstants.RIGHT) def get_keypath(self): - keypath = tkFileDialog.asksaveasfilename( - parent=None, title=u"Select B&N ePub key file to produce", + keypath = tkFileDialog.askopenfilename( + parent=None, title=u"Select Barnes & Noble \'.b64\' key file", defaultextension=u".b64", filetypes=[('base64-encoded files', '.b64'), ('All Files', '.*')]) @@ -287,37 +390,56 @@ def gui_main(): self.keypath.insert(0, keypath) return - def generate(self): - name = self.name.get() - ccn = self.ccn.get() + def get_inpath(self): + inpath = tkFileDialog.askopenfilename( + parent=None, title=u"Select B&N-encrypted ePub file to decrypt", + defaultextension=u".epub", filetypes=[('ePub files', '.epub')]) + if inpath: + inpath = os.path.normpath(inpath) + self.inpath.delete(0, Tkconstants.END) + self.inpath.insert(0, inpath) + return + + def get_outpath(self): + outpath = tkFileDialog.asksaveasfilename( + parent=None, title=u"Select unencrypted ePub file to produce", + defaultextension=u".epub", filetypes=[('ePub files', '.epub')]) + if outpath: + outpath = os.path.normpath(outpath) + self.outpath.delete(0, Tkconstants.END) + self.outpath.insert(0, outpath) + return + + def decrypt(self): keypath = self.keypath.get() - if not name: - self.status['text'] = u"Name not specified" + inpath = self.inpath.get() + outpath = self.outpath.get() + if not keypath or not os.path.exists(keypath): + self.status['text'] = u"Specified key file does not exist" return - if not ccn: - self.status['text'] = u"Credit card number not specified" + if not inpath or not os.path.exists(inpath): + self.status['text'] = u"Specified input file does not exist" return - if not keypath: - self.status['text'] = u"Output keyfile path not specified" + if not outpath: + self.status['text'] = u"Output file not specified" return - self.status['text'] = u"Generating..." + if inpath == outpath: + self.status['text'] = u"Must have different input and output files" + return + userkey = open(keypath,'rb').read() + self.status['text'] = u"Decrypting..." try: - userkey = generate_key(name, ccn) + decrypt_status = decryptBook(userkey, inpath, outpath) except Exception, e: - self.status['text'] = u"Error: (0}".format(e.args[0]) + self.status['text'] = u"Error: {0}".format(e.args[0]) return - open(keypath,'wb').write(userkey) - self.status['text'] = u"Keyfile successfully generated" + if decrypt_status == 0: + self.status['text'] = u"File successfully decrypted" + else: + self.status['text'] = u"The was an error decrypting the file." root = Tkinter.Tk() - if AES is None: - root.withdraw() - tkMessageBox.showerror( - "Ignoble EPUB Keyfile Generator", - "This script requires OpenSSL or PyCrypto, which must be installed " - "separately. Read the top-of-script comment for details.") - return 1 - root.title(u"Barnes & Noble ePub Keyfile Generator v.{0}".format(__version__)) + root.title(u"Barnes & Noble ePub Decrypter v.{0}".format(__version__)) root.resizable(True, False) root.minsize(300, 0) DecryptionDialog(root).pack(fill=Tkconstants.X, expand=1) diff --git a/DeDRM_calibre_plugin/DeDRM_plugin/ignoblekeygen.py b/DeDRM_calibre_plugin/DeDRM_plugin/ignoblekeygen.py index f8181cb..5118c87 100644 --- a/DeDRM_calibre_plugin/DeDRM_plugin/ignoblekeygen.py +++ b/DeDRM_calibre_plugin/DeDRM_plugin/ignoblekeygen.py @@ -3,56 +3,47 @@ from __future__ import with_statement -# ineptepub.pyw, version 6.1 -# Copyright © 2009-2010 by i♥cabbages +# ignoblekeygen.pyw, version 2.5 +# Copyright © 2009-2010 i♥cabbages # Released under the terms of the GNU General Public Licence, version 3 # # Modified 2010–2013 by some_updates, DiapDealer and Apprentice Alf -# Windows users: Before running this program, you must first install Python 2.6 -# from and PyCrypto from -# (make sure to -# install the version for Python 2.6). Save this script file as -# ineptepub.pyw and double-click on it to run it. +# Windows users: Before running this program, you must first install Python. +# We recommend ActiveState Python 2.7.X for Windows (x86) from +# http://www.activestate.com/activepython/downloads. +# You must also install PyCrypto from +# http://www.voidspace.org.uk/python/modules.shtml#pycrypto +# (make certain to install the version for Python 2.7). +# Then save this script file as ignoblekeygen.pyw and double-click on it to run it. # -# Mac OS X users: Save this script file as ineptepub.pyw. You can run this -# program from the command line (pythonw ineptepub.pyw) or by double-clicking +# Mac OS X users: Save this script file as ignoblekeygen.pyw. You can run this +# program from the command line (python ignoblekeygen.pyw) or by double-clicking # it when it has been associated with PythonLauncher. # Revision history: # 1 - Initial release -# 2 - Rename to INEPT, fix exit code -# 5 - Version bump to avoid (?) confusion; -# Improve OS X support by using OpenSSL when available -# 5.1 - Improve OpenSSL error checking -# 5.2 - Fix ctypes error causing segfaults on some systems -# 5.3 - add support for OpenSSL on Windows, fix bug with some versions of libcrypto 0.9.8 prior to path level o -# 5.4 - add support for encoding to 'utf-8' when building up list of files to decrypt from encryption.xml -# 5.5 - On Windows try PyCrypto first, OpenSSL next -# 5.6 - Modify interface to allow use with import -# 5.7 - Fix for potential problem with PyCrypto -# 5.8 - Revised to allow use in calibre plugins to eliminate need for duplicate code -# 5.9 - Fixed to retain zip file metadata (e.g. file modification date) -# 6.0 - moved unicode_argv call inside main for Windows DeDRM compatibility -# 6.1 - Work if TkInter is missing +# 2 - Add OS X support by using OpenSSL when available (taken/modified from ineptepub v5) +# 2.1 - Allow Windows versions of libcrypto to be found +# 2.2 - On Windows try PyCrypto first and then OpenSSL next +# 2.3 - Modify interface to allow use of import +# 2.4 - Improvements to UI and now works in plugins +# 2.5 - Additional improvement for unicode and plugin support +# 2.6 - moved unicode_argv call inside main for Windows DeDRM compatibility +# 2.7 - Work if TkInter is missing """ -Decrypt Adobe Digital Editions encrypted ePub books. +Generate Barnes & Noble EPUB user key from name and credit card number. """ __license__ = 'GPL v3' -__version__ = "6.1" +__version__ = "2.7" import sys import os -import traceback -import zlib -import zipfile -from zipfile import ZipInfo, ZipFile, ZIP_STORED, ZIP_DEFLATED -from contextlib import closing -import xml.etree.ElementTree as etree +import hashlib # Wrap a stream so that output gets flushed immediately # and also make sure that any unicode strings get @@ -84,8 +75,8 @@ def unicode_argv(): # Versions 2.x of Python don't support Unicode in sys.argv on # Windows, with the underlying Windows API instead replacing multi-byte - # characters with '?'. - + # characters with '?'. So use shell32.GetCommandLineArgvW to get sys.argv + # as a list of Unicode strings and encode them as utf-8 from ctypes import POINTER, byref, cdll, c_int, windll from ctypes.wintypes import LPCWSTR, LPWSTR @@ -106,7 +97,9 @@ def unicode_argv(): start = argc.value - len(sys.argv) return [argv[i] for i in xrange(start, argc.value)] - return [u"ineptepub.py"] + # if we don't have any arguments at all, just pass back script name + # this should never happen + return [u"ignoblekeygen.py"] else: argvencoding = sys.stdin.encoding if argvencoding == None: @@ -114,7 +107,7 @@ def unicode_argv(): return [arg if (type(arg) == unicode) else unicode(arg,argvencoding) for arg in sys.argv] -class ADEPTError(Exception): +class IGNOBLEError(Exception): pass def _load_crypto_libcrypto(): @@ -128,19 +121,14 @@ def _load_crypto_libcrypto(): libcrypto = find_library('crypto') if libcrypto is None: - raise ADEPTError('libcrypto not found') + raise IGNOBLEError('libcrypto not found') libcrypto = CDLL(libcrypto) - RSA_NO_PADDING = 3 AES_MAXNR = 14 c_char_pp = POINTER(c_char_p) c_int_p = POINTER(c_int) - class RSA(Structure): - pass - RSA_p = POINTER(RSA) - class AES_KEY(Structure): _fields_ = [('rd_key', c_long * (4 * (AES_MAXNR + 1))), ('rounds', c_int)] @@ -152,312 +140,80 @@ def _load_crypto_libcrypto(): func.argtypes = argtypes return func - d2i_RSAPrivateKey = F(RSA_p, 'd2i_RSAPrivateKey', - [RSA_p, c_char_pp, c_long]) - RSA_size = F(c_int, 'RSA_size', [RSA_p]) - RSA_private_decrypt = F(c_int, 'RSA_private_decrypt', - [c_int, c_char_p, c_char_p, RSA_p, c_int]) - RSA_free = F(None, 'RSA_free', [RSA_p]) - AES_set_decrypt_key = F(c_int, 'AES_set_decrypt_key', + AES_set_encrypt_key = F(c_int, 'AES_set_encrypt_key', [c_char_p, c_int, AES_KEY_p]) AES_cbc_encrypt = F(None, 'AES_cbc_encrypt', [c_char_p, c_char_p, c_ulong, AES_KEY_p, c_char_p, c_int]) - class RSA(object): - def __init__(self, der): - buf = create_string_buffer(der) - pp = c_char_pp(cast(buf, c_char_p)) - rsa = self._rsa = d2i_RSAPrivateKey(None, pp, len(der)) - if rsa is None: - raise ADEPTError('Error parsing ADEPT user key DER') - - def decrypt(self, from_): - rsa = self._rsa - to = create_string_buffer(RSA_size(rsa)) - dlen = RSA_private_decrypt(len(from_), from_, to, rsa, - RSA_NO_PADDING) - if dlen < 0: - raise ADEPTError('RSA decryption failed') - return to[:dlen] - - def __del__(self): - if self._rsa is not None: - RSA_free(self._rsa) - self._rsa = None - class AES(object): - def __init__(self, userkey): + def __init__(self, userkey, iv): self._blocksize = len(userkey) - if (self._blocksize != 16) and (self._blocksize != 24) and (self._blocksize != 32) : - raise ADEPTError('AES improper key used') - return + self._iv = iv key = self._key = AES_KEY() - rv = AES_set_decrypt_key(userkey, len(userkey) * 8, key) + rv = AES_set_encrypt_key(userkey, len(userkey) * 8, key) if rv < 0: - raise ADEPTError('Failed to initialize AES key') + raise IGNOBLEError('Failed to initialize AES Encrypt key') - def decrypt(self, data): + def encrypt(self, data): out = create_string_buffer(len(data)) - iv = ("\x00" * self._blocksize) - rv = AES_cbc_encrypt(data, out, len(data), self._key, iv, 0) + rv = AES_cbc_encrypt(data, out, len(data), self._key, self._iv, 1) if rv == 0: - raise ADEPTError('AES decryption failed') + raise IGNOBLEError('AES encryption failed') return out.raw - return (AES, RSA) + return AES def _load_crypto_pycrypto(): from Crypto.Cipher import AES as _AES - from Crypto.PublicKey import RSA as _RSA - - # ASN.1 parsing code from tlslite - class ASN1Error(Exception): - pass - - class ASN1Parser(object): - class Parser(object): - def __init__(self, bytes): - self.bytes = bytes - self.index = 0 - - def get(self, length): - if self.index + length > len(self.bytes): - raise ASN1Error("Error decoding ASN.1") - x = 0 - for count in range(length): - x <<= 8 - x |= self.bytes[self.index] - self.index += 1 - return x - - def getFixBytes(self, lengthBytes): - bytes = self.bytes[self.index : self.index+lengthBytes] - self.index += lengthBytes - return bytes - - def getVarBytes(self, lengthLength): - lengthBytes = self.get(lengthLength) - return self.getFixBytes(lengthBytes) - - def getFixList(self, length, lengthList): - l = [0] * lengthList - for x in range(lengthList): - l[x] = self.get(length) - return l - - def getVarList(self, length, lengthLength): - lengthList = self.get(lengthLength) - if lengthList % length != 0: - raise ASN1Error("Error decoding ASN.1") - lengthList = int(lengthList/length) - l = [0] * lengthList - for x in range(lengthList): - l[x] = self.get(length) - return l - - def startLengthCheck(self, lengthLength): - self.lengthCheck = self.get(lengthLength) - self.indexCheck = self.index - - def setLengthCheck(self, length): - self.lengthCheck = length - self.indexCheck = self.index - - def stopLengthCheck(self): - if (self.index - self.indexCheck) != self.lengthCheck: - raise ASN1Error("Error decoding ASN.1") - - def atLengthCheck(self): - if (self.index - self.indexCheck) < self.lengthCheck: - return False - elif (self.index - self.indexCheck) == self.lengthCheck: - return True - else: - raise ASN1Error("Error decoding ASN.1") - - def __init__(self, bytes): - p = self.Parser(bytes) - p.get(1) - self.length = self._getASN1Length(p) - self.value = p.getFixBytes(self.length) - - def getChild(self, which): - p = self.Parser(self.value) - for x in range(which+1): - markIndex = p.index - p.get(1) - length = self._getASN1Length(p) - p.getFixBytes(length) - return ASN1Parser(p.bytes[markIndex:p.index]) - - def _getASN1Length(self, p): - firstLength = p.get(1) - if firstLength<=127: - return firstLength - else: - lengthLength = firstLength & 0x7F - return p.get(lengthLength) class AES(object): - def __init__(self, key): - self._aes = _AES.new(key, _AES.MODE_CBC, '\x00'*16) + def __init__(self, key, iv): + self._aes = _AES.new(key, _AES.MODE_CBC, iv) - def decrypt(self, data): - return self._aes.decrypt(data) + def encrypt(self, data): + return self._aes.encrypt(data) - class RSA(object): - def __init__(self, der): - key = ASN1Parser([ord(x) for x in der]) - key = [key.getChild(x).value for x in xrange(1, 4)] - key = [self.bytesToNumber(v) for v in key] - self._rsa = _RSA.construct(key) - - def bytesToNumber(self, bytes): - total = 0L - for byte in bytes: - total = (total << 8) + byte - return total - - def decrypt(self, data): - return self._rsa.decrypt(data) - - return (AES, RSA) + return AES def _load_crypto(): - AES = RSA = None + AES = None cryptolist = (_load_crypto_libcrypto, _load_crypto_pycrypto) if sys.platform.startswith('win'): cryptolist = (_load_crypto_pycrypto, _load_crypto_libcrypto) for loader in cryptolist: try: - AES, RSA = loader() + AES = loader() break - except (ImportError, ADEPTError): + except (ImportError, IGNOBLEError): pass - return (AES, RSA) - -AES, RSA = _load_crypto() - -META_NAMES = ('mimetype', 'META-INF/rights.xml', 'META-INF/encryption.xml') -NSMAP = {'adept': 'http://ns.adobe.com/adept', - 'enc': 'http://www.w3.org/2001/04/xmlenc#'} - -class Decryptor(object): - def __init__(self, bookkey, encryption): - enc = lambda tag: '{%s}%s' % (NSMAP['enc'], tag) - self._aes = AES(bookkey) - encryption = etree.fromstring(encryption) - self._encrypted = encrypted = set() - expr = './%s/%s/%s' % (enc('EncryptedData'), enc('CipherData'), - enc('CipherReference')) - for elem in encryption.findall(expr): - path = elem.get('URI', None) - if path is not None: - path = path.encode('utf-8') - encrypted.add(path) - - def decompress(self, bytes): - dc = zlib.decompressobj(-15) - bytes = dc.decompress(bytes) - ex = dc.decompress('Z') + dc.flush() - if ex: - bytes = bytes + ex - return bytes - - def decrypt(self, path, data): - if path in self._encrypted: - data = self._aes.decrypt(data)[16:] - data = data[:-ord(data[-1])] - data = self.decompress(data) - return data - -# check file to make check whether it's probably an Adobe Adept encrypted ePub -def adeptBook(inpath): - with closing(ZipFile(open(inpath, 'rb'))) as inf: - namelist = set(inf.namelist()) - if 'META-INF/rights.xml' not in namelist or \ - 'META-INF/encryption.xml' not in namelist: - return False - try: - rights = etree.fromstring(inf.read('META-INF/rights.xml')) - adept = lambda tag: '{%s}%s' % (NSMAP['adept'], tag) - expr = './/%s' % (adept('encryptedKey'),) - bookkey = ''.join(rights.findtext(expr)) - if len(bookkey) == 172: - return True - except: - # if we couldn't check, assume it is - return True - return False - -def decryptBook(userkey, inpath, outpath): - if AES is None: - raise ADEPTError(u"PyCrypto or OpenSSL must be installed.") - rsa = RSA(userkey) - with closing(ZipFile(open(inpath, 'rb'))) as inf: - namelist = set(inf.namelist()) - if 'META-INF/rights.xml' not in namelist or \ - 'META-INF/encryption.xml' not in namelist: - print u"{0:s} is DRM-free.".format(os.path.basename(inpath)) - return 1 - for name in META_NAMES: - namelist.remove(name) - try: - rights = etree.fromstring(inf.read('META-INF/rights.xml')) - adept = lambda tag: '{%s}%s' % (NSMAP['adept'], tag) - expr = './/%s' % (adept('encryptedKey'),) - bookkey = ''.join(rights.findtext(expr)) - if len(bookkey) != 172: - print u"{0:s} is not a secure Adobe Adept ePub.".format(os.path.basename(inpath)) - return 1 - bookkey = rsa.decrypt(bookkey.decode('base64')) - # Padded as per RSAES-PKCS1-v1_5 - if bookkey[-17] != '\x00': - print u"Could not decrypt {0:s}. Wrong key".format(os.path.basename(inpath)) - return 2 - encryption = inf.read('META-INF/encryption.xml') - decryptor = Decryptor(bookkey[-16:], encryption) - kwds = dict(compression=ZIP_DEFLATED, allowZip64=False) - with closing(ZipFile(open(outpath, 'wb'), 'w', **kwds)) as outf: - zi = ZipInfo('mimetype') - zi.compress_type=ZIP_STORED - try: - # if the mimetype is present, get its info, including time-stamp - oldzi = inf.getinfo('mimetype') - # copy across fields to be preserved - zi.date_time = oldzi.date_time - zi.comment = oldzi.comment - zi.extra = oldzi.extra - zi.internal_attr = oldzi.internal_attr - # external attributes are dependent on the create system, so copy both. - zi.external_attr = oldzi.external_attr - zi.create_system = oldzi.create_system - except: - pass - outf.writestr(zi, inf.read('mimetype')) - for path in namelist: - data = inf.read(path) - zi = ZipInfo(path) - zi.compress_type=ZIP_DEFLATED - try: - # get the file info, including time-stamp - oldzi = inf.getinfo(path) - # copy across useful fields - zi.date_time = oldzi.date_time - zi.comment = oldzi.comment - zi.extra = oldzi.extra - zi.internal_attr = oldzi.internal_attr - # external attributes are dependent on the create system, so copy both. - zi.external_attr = oldzi.external_attr - zi.create_system = oldzi.create_system - except: - pass - outf.writestr(zi, decryptor.decrypt(path, data)) - except: - print u"Could not decrypt {0:s} because of an exception:\n{1:s}".format(os.path.basename(inpath), traceback.format_exc()) - return 2 - return 0 + return AES + +AES = _load_crypto() + +def normalize_name(name): + return ''.join(x for x in name.lower() if x != ' ') + + +def generate_key(name, ccn): + # remove spaces and case from name and CC numbers. + if type(name)==unicode: + name = name.encode('utf-8') + if type(ccn)==unicode: + ccn = ccn.encode('utf-8') + + name = normalize_name(name) + '\x00' + ccn = normalize_name(ccn) + '\x00' + + name_sha = hashlib.sha1(name).digest()[:16] + ccn_sha = hashlib.sha1(ccn).digest()[:16] + both_sha = hashlib.sha1(name + ccn).digest() + aes = AES(ccn_sha, name_sha) + crypt = aes.encrypt(both_sha + ('\x0c' * 0x0c)) + userkey = hashlib.sha1(crypt).digest() + return userkey.encode('base64') + + def cli_main(): @@ -465,15 +221,19 @@ def cli_main(): sys.stderr=SafeUnbuffered(sys.stderr) argv=unicode_argv() progname = os.path.basename(argv[0]) + if AES is None: + print "%s: This script requires OpenSSL or PyCrypto, which must be installed " \ + "separately. Read the top-of-script comment for details." % \ + (progname,) + return 1 if len(argv) != 4: - print u"usage: {0} ".format(progname) + print u"usage: {0} ".format(progname) return 1 - keypath, inpath, outpath = argv[1:] - userkey = open(keypath,'rb').read() - result = decryptBook(userkey, inpath, outpath) - if result == 0: - print u"Successfully decrypted {0:s} as {1:s}".format(os.path.basename(inpath),os.path.basename(outpath)) - return result + name, ccn, keypath = argv[1:] + userkey = generate_key(name, ccn) + open(keypath,'wb').write(userkey) + return 0 + def gui_main(): try: @@ -487,33 +247,28 @@ def gui_main(): class DecryptionDialog(Tkinter.Frame): def __init__(self, root): Tkinter.Frame.__init__(self, root, border=5) - self.status = Tkinter.Label(self, text=u"Select files for decryption") + self.status = Tkinter.Label(self, text=u"Enter parameters") self.status.pack(fill=Tkconstants.X, expand=1) body = Tkinter.Frame(self) body.pack(fill=Tkconstants.X, expand=1) sticky = Tkconstants.E + Tkconstants.W body.grid_columnconfigure(1, weight=2) - Tkinter.Label(body, text=u"Key file").grid(row=0) - self.keypath = Tkinter.Entry(body, width=30) - self.keypath.grid(row=0, column=1, sticky=sticky) - if os.path.exists(u"adeptkey.der"): - self.keypath.insert(0, u"adeptkey.der") - button = Tkinter.Button(body, text=u"...", command=self.get_keypath) - button.grid(row=0, column=2) - Tkinter.Label(body, text=u"Input file").grid(row=1) - self.inpath = Tkinter.Entry(body, width=30) - self.inpath.grid(row=1, column=1, sticky=sticky) - button = Tkinter.Button(body, text=u"...", command=self.get_inpath) - button.grid(row=1, column=2) + Tkinter.Label(body, text=u"Account Name").grid(row=0) + self.name = Tkinter.Entry(body, width=40) + self.name.grid(row=0, column=1, sticky=sticky) + Tkinter.Label(body, text=u"CC#").grid(row=1) + self.ccn = Tkinter.Entry(body, width=40) + self.ccn.grid(row=1, column=1, sticky=sticky) Tkinter.Label(body, text=u"Output file").grid(row=2) - self.outpath = Tkinter.Entry(body, width=30) - self.outpath.grid(row=2, column=1, sticky=sticky) - button = Tkinter.Button(body, text=u"...", command=self.get_outpath) + self.keypath = Tkinter.Entry(body, width=40) + self.keypath.grid(row=2, column=1, sticky=sticky) + self.keypath.insert(2, u"bnepubkey.b64") + button = Tkinter.Button(body, text=u"...", command=self.get_keypath) button.grid(row=2, column=2) buttons = Tkinter.Frame(self) buttons.pack() botton = Tkinter.Button( - buttons, text=u"Decrypt", width=10, command=self.decrypt) + buttons, text=u"Generate", width=10, command=self.generate) botton.pack(side=Tkconstants.LEFT) Tkinter.Frame(buttons, width=10).pack(side=Tkconstants.LEFT) button = Tkinter.Button( @@ -521,10 +276,10 @@ def gui_main(): button.pack(side=Tkconstants.RIGHT) def get_keypath(self): - keypath = tkFileDialog.askopenfilename( - parent=None, title=u"Select Adobe Adept \'.der\' key file", - defaultextension=u".der", - filetypes=[('Adobe Adept DER-encoded files', '.der'), + keypath = tkFileDialog.asksaveasfilename( + parent=None, title=u"Select B&N ePub key file to produce", + defaultextension=u".b64", + filetypes=[('base64-encoded files', '.b64'), ('All Files', '.*')]) if keypath: keypath = os.path.normpath(keypath) @@ -532,56 +287,37 @@ def gui_main(): self.keypath.insert(0, keypath) return - def get_inpath(self): - inpath = tkFileDialog.askopenfilename( - parent=None, title=u"Select ADEPT-encrypted ePub file to decrypt", - defaultextension=u".epub", filetypes=[('ePub files', '.epub')]) - if inpath: - inpath = os.path.normpath(inpath) - self.inpath.delete(0, Tkconstants.END) - self.inpath.insert(0, inpath) - return - - def get_outpath(self): - outpath = tkFileDialog.asksaveasfilename( - parent=None, title=u"Select unencrypted ePub file to produce", - defaultextension=u".epub", filetypes=[('ePub files', '.epub')]) - if outpath: - outpath = os.path.normpath(outpath) - self.outpath.delete(0, Tkconstants.END) - self.outpath.insert(0, outpath) - return - - def decrypt(self): + def generate(self): + name = self.name.get() + ccn = self.ccn.get() keypath = self.keypath.get() - inpath = self.inpath.get() - outpath = self.outpath.get() - if not keypath or not os.path.exists(keypath): - self.status['text'] = u"Specified key file does not exist" - return - if not inpath or not os.path.exists(inpath): - self.status['text'] = u"Specified input file does not exist" + if not name: + self.status['text'] = u"Name not specified" return - if not outpath: - self.status['text'] = u"Output file not specified" + if not ccn: + self.status['text'] = u"Credit card number not specified" return - if inpath == outpath: - self.status['text'] = u"Must have different input and output files" + if not keypath: + self.status['text'] = u"Output keyfile path not specified" return - userkey = open(keypath,'rb').read() - self.status['text'] = u"Decrypting..." + self.status['text'] = u"Generating..." try: - decrypt_status = decryptBook(userkey, inpath, outpath) + userkey = generate_key(name, ccn) except Exception, e: - self.status['text'] = u"Error: {0}".format(e.args[0]) + self.status['text'] = u"Error: (0}".format(e.args[0]) return - if decrypt_status == 0: - self.status['text'] = u"File successfully decrypted" - else: - self.status['text'] = u"The was an error decrypting the file." + open(keypath,'wb').write(userkey) + self.status['text'] = u"Keyfile successfully generated" root = Tkinter.Tk() - root.title(u"Adobe Adept ePub Decrypter v.{0}".format(__version__)) + if AES is None: + root.withdraw() + tkMessageBox.showerror( + "Ignoble EPUB Keyfile Generator", + "This script requires OpenSSL or PyCrypto, which must be installed " + "separately. Read the top-of-script comment for details.") + return 1 + root.title(u"Barnes & Noble ePub Keyfile Generator v.{0}".format(__version__)) root.resizable(True, False) root.minsize(300, 0) DecryptionDialog(root).pack(fill=Tkconstants.X, expand=1) diff --git a/DeDRM_calibre_plugin/DeDRM_plugin/ineptepub.py b/DeDRM_calibre_plugin/DeDRM_plugin/ineptepub.py index 1986e20..f8181cb 100644 --- a/DeDRM_calibre_plugin/DeDRM_plugin/ineptepub.py +++ b/DeDRM_calibre_plugin/DeDRM_plugin/ineptepub.py @@ -1,73 +1,57 @@ -#! /usr/bin/python +#!/usr/bin/env python # -*- coding: utf-8 -*- from __future__ import with_statement -# ineptpdf.pyw, version 7.11 +# ineptepub.pyw, version 6.1 # Copyright © 2009-2010 by i♥cabbages # Released under the terms of the GNU General Public Licence, version 3 # -# Modified 2010–2012 by some_updates, DiapDealer and Apprentice Alf +# Modified 2010–2013 by some_updates, DiapDealer and Apprentice Alf # Windows users: Before running this program, you must first install Python 2.6 # from and PyCrypto from # (make sure to # install the version for Python 2.6). Save this script file as -# ineptpdf.pyw and double-click on it to run it. +# ineptepub.pyw and double-click on it to run it. # -# Mac OS X users: Save this script file as ineptpdf.pyw. You can run this -# program from the command line (pythonw ineptpdf.pyw) or by double-clicking +# Mac OS X users: Save this script file as ineptepub.pyw. You can run this +# program from the command line (pythonw ineptepub.pyw) or by double-clicking # it when it has been associated with PythonLauncher. # Revision history: # 1 - Initial release -# 2 - Improved determination of key-generation algorithm -# 3 - Correctly handle PDF >=1.5 cross-reference streams -# 4 - Removal of ciando's personal ID -# 5 - Automated decryption of a complete directory -# 6.1 - backward compatibility for 1.7.1 and old adeptkey.der -# 7 - Get cross reference streams and object streams working for input. -# Not yet supported on output but this only effects file size, -# not functionality. (anon2) -# 7.1 - Correct a problem when an old trailer is not followed by startxref -# 7.2 - Correct malformed Mac OS resource forks for Stanza (anon2) -# - Support for cross ref streams on output (decreases file size) -# 7.3 - Correct bug in trailer with cross ref stream that caused the error -# "The root object is missing or invalid" in Adobe Reader. (anon2) -# 7.4 - Force all generation numbers in output file to be 0, like in v6. -# Fallback code for wrong xref improved (search till last trailer -# instead of first) (anon2) -# 7.5 - allow support for OpenSSL to replace pycrypto on all platforms -# implemented ARC4 interface to OpenSSL -# fixed minor typos -# 7.6 - backported AES and other fixes from version 8.4.48 -# 7.7 - On Windows try PyCrypto first and OpenSSL next -# 7.8 - Modify interface to allow use of import -# 7.9 - Bug fix for some session key errors when len(bookkey) > length required -# 7.10 - Various tweaks to fix minor problems. -# 7.11 - More tweaks to fix minor problems. -# 7.12 - Revised to allow use in calibre plugins to eliminate need for duplicate code -# 7.13 - Fixed erroneous mentions of ineptepub -# 7.14 - moved unicode_argv call inside main for Windows DeDRM compatibility -# 8.0 - Work if TkInter is missing -# 8.0.1 - Broken Metadata fix. +# 2 - Rename to INEPT, fix exit code +# 5 - Version bump to avoid (?) confusion; +# Improve OS X support by using OpenSSL when available +# 5.1 - Improve OpenSSL error checking +# 5.2 - Fix ctypes error causing segfaults on some systems +# 5.3 - add support for OpenSSL on Windows, fix bug with some versions of libcrypto 0.9.8 prior to path level o +# 5.4 - add support for encoding to 'utf-8' when building up list of files to decrypt from encryption.xml +# 5.5 - On Windows try PyCrypto first, OpenSSL next +# 5.6 - Modify interface to allow use with import +# 5.7 - Fix for potential problem with PyCrypto +# 5.8 - Revised to allow use in calibre plugins to eliminate need for duplicate code +# 5.9 - Fixed to retain zip file metadata (e.g. file modification date) +# 6.0 - moved unicode_argv call inside main for Windows DeDRM compatibility +# 6.1 - Work if TkInter is missing """ -Decrypts Adobe ADEPT-encrypted PDF files. +Decrypt Adobe Digital Editions encrypted ePub books. """ __license__ = 'GPL v3' -__version__ = "8.0.1" +__version__ = "6.1" import sys import os -import re +import traceback import zlib -import struct -import hashlib -from itertools import chain, islice +import zipfile +from zipfile import ZipInfo, ZipFile, ZIP_STORED, ZIP_DEFLATED +from contextlib import closing import xml.etree.ElementTree as etree # Wrap a stream so that output gets flushed immediately @@ -87,8 +71,11 @@ class SafeUnbuffered: def __getattr__(self, attr): return getattr(self.stream, attr) -iswindows = sys.platform.startswith('win') -isosx = sys.platform.startswith('darwin') +try: + from calibre.constants import iswindows, isosx +except: + iswindows = sys.platform.startswith('win') + isosx = sys.platform.startswith('darwin') def unicode_argv(): if iswindows: @@ -119,7 +106,7 @@ def unicode_argv(): start = argc.value - len(sys.argv) return [argv[i] for i in xrange(start, argc.value)] - return [u"ineptpdf.py"] + return [u"ineptepub.py"] else: argvencoding = sys.stdin.encoding if argvencoding == None: @@ -130,21 +117,12 @@ def unicode_argv(): class ADEPTError(Exception): pass - -import hashlib - -def SHA256(message): - ctx = hashlib.sha256() - ctx.update(message) - return ctx.digest() - - def _load_crypto_libcrypto(): from ctypes import CDLL, POINTER, c_void_p, c_char_p, c_int, c_long, \ Structure, c_ulong, create_string_buffer, cast from ctypes.util import find_library - if sys.platform.startswith('win'): + if iswindows: libcrypto = find_library('libeay32') else: libcrypto = find_library('crypto') @@ -153,43 +131,38 @@ def _load_crypto_libcrypto(): raise ADEPTError('libcrypto not found') libcrypto = CDLL(libcrypto) - AES_MAXNR = 14 - RSA_NO_PADDING = 3 + AES_MAXNR = 14 c_char_pp = POINTER(c_char_p) c_int_p = POINTER(c_int) - class AES_KEY(Structure): - _fields_ = [('rd_key', c_long * (4 * (AES_MAXNR + 1))), ('rounds', c_int)] - AES_KEY_p = POINTER(AES_KEY) - - class RC4_KEY(Structure): - _fields_ = [('x', c_int), ('y', c_int), ('box', c_int * 256)] - RC4_KEY_p = POINTER(RC4_KEY) - class RSA(Structure): pass RSA_p = POINTER(RSA) + class AES_KEY(Structure): + _fields_ = [('rd_key', c_long * (4 * (AES_MAXNR + 1))), + ('rounds', c_int)] + AES_KEY_p = POINTER(AES_KEY) + def F(restype, name, argtypes): func = getattr(libcrypto, name) func.restype = restype func.argtypes = argtypes return func - AES_cbc_encrypt = F(None, 'AES_cbc_encrypt',[c_char_p, c_char_p, c_ulong, AES_KEY_p, c_char_p,c_int]) - AES_set_decrypt_key = F(c_int, 'AES_set_decrypt_key',[c_char_p, c_int, AES_KEY_p]) - - RC4_set_key = F(None,'RC4_set_key',[RC4_KEY_p, c_int, c_char_p]) - RC4_crypt = F(None,'RC4',[RC4_KEY_p, c_int, c_char_p, c_char_p]) - d2i_RSAPrivateKey = F(RSA_p, 'd2i_RSAPrivateKey', [RSA_p, c_char_pp, c_long]) RSA_size = F(c_int, 'RSA_size', [RSA_p]) RSA_private_decrypt = F(c_int, 'RSA_private_decrypt', [c_int, c_char_p, c_char_p, RSA_p, c_int]) RSA_free = F(None, 'RSA_free', [RSA_p]) + AES_set_decrypt_key = F(c_int, 'AES_set_decrypt_key', + [c_char_p, c_int, AES_KEY_p]) + AES_cbc_encrypt = F(None, 'AES_cbc_encrypt', + [c_char_p, c_char_p, c_ulong, AES_KEY_p, c_char_p, + c_int]) class RSA(object): def __init__(self, der): @@ -206,65 +179,37 @@ def _load_crypto_libcrypto(): RSA_NO_PADDING) if dlen < 0: raise ADEPTError('RSA decryption failed') - return to[1:dlen] + return to[:dlen] def __del__(self): if self._rsa is not None: RSA_free(self._rsa) self._rsa = None - class ARC4(object): - @classmethod - def new(cls, userkey): - self = ARC4() - self._blocksize = len(userkey) - key = self._key = RC4_KEY() - RC4_set_key(key, self._blocksize, userkey) - return self - def __init__(self): - self._blocksize = 0 - self._key = None - def decrypt(self, data): - out = create_string_buffer(len(data)) - RC4_crypt(self._key, len(data), data, out) - return out.raw - class AES(object): - MODE_CBC = 0 - @classmethod - def new(cls, userkey, mode, iv): - self = AES() + def __init__(self, userkey): self._blocksize = len(userkey) - # mode is ignored since CBCMODE is only thing supported/used so far - self._mode = mode if (self._blocksize != 16) and (self._blocksize != 24) and (self._blocksize != 32) : raise ADEPTError('AES improper key used') return - keyctx = self._keyctx = AES_KEY() - self._iv = iv - rv = AES_set_decrypt_key(userkey, len(userkey) * 8, keyctx) + key = self._key = AES_KEY() + rv = AES_set_decrypt_key(userkey, len(userkey) * 8, key) if rv < 0: raise ADEPTError('Failed to initialize AES key') - return self - def __init__(self): - self._blocksize = 0 - self._keyctx = None - self._iv = 0 - self._mode = 0 + def decrypt(self, data): out = create_string_buffer(len(data)) - rv = AES_cbc_encrypt(data, out, len(data), self._keyctx, self._iv, 0) + iv = ("\x00" * self._blocksize) + rv = AES_cbc_encrypt(data, out, len(data), self._key, iv, 0) if rv == 0: raise ADEPTError('AES decryption failed') return out.raw - return (ARC4, RSA, AES) - + return (AES, RSA) def _load_crypto_pycrypto(): - from Crypto.PublicKey import RSA as _RSA - from Crypto.Cipher import ARC4 as _ARC4 from Crypto.Cipher import AES as _AES + from Crypto.PublicKey import RSA as _RSA # ASN.1 parsing code from tlslite class ASN1Error(Exception): @@ -354,26 +299,10 @@ def _load_crypto_pycrypto(): lengthLength = firstLength & 0x7F return p.get(lengthLength) - class ARC4(object): - @classmethod - def new(cls, userkey): - self = ARC4() - self._arc4 = _ARC4.new(userkey) - return self - def __init__(self): - self._arc4 = None - def decrypt(self, data): - return self._arc4.decrypt(data) - class AES(object): - MODE_CBC = _AES.MODE_CBC - @classmethod - def new(cls, userkey, mode, iv): - self = AES() - self._aes = _AES.new(userkey, mode, iv) - return self - def __init__(self): - self._aes = None + def __init__(self, key): + self._aes = _AES.new(key, _AES.MODE_CBC, '\x00'*16) + def decrypt(self, data): return self._aes.decrypt(data) @@ -393,1801 +322,141 @@ def _load_crypto_pycrypto(): def decrypt(self, data): return self._rsa.decrypt(data) - return (ARC4, RSA, AES) + return (AES, RSA) def _load_crypto(): - ARC4 = RSA = AES = None + AES = RSA = None cryptolist = (_load_crypto_libcrypto, _load_crypto_pycrypto) if sys.platform.startswith('win'): cryptolist = (_load_crypto_pycrypto, _load_crypto_libcrypto) for loader in cryptolist: try: - ARC4, RSA, AES = loader() + AES, RSA = loader() break except (ImportError, ADEPTError): pass - return (ARC4, RSA, AES) -ARC4, RSA, AES = _load_crypto() - - -try: - from cStringIO import StringIO -except ImportError: - from StringIO import StringIO - - -# Do we generate cross reference streams on output? -# 0 = never -# 1 = only if present in input -# 2 = always - -GEN_XREF_STM = 1 - -# This is the value for the current document -gen_xref_stm = False # will be set in PDFSerializer - -# PDF parsing routines from pdfminer, with changes for EBX_HANDLER - -# Utilities - -def choplist(n, seq): - '''Groups every n elements of the list.''' - r = [] - for x in seq: - r.append(x) - if len(r) == n: - yield tuple(r) - r = [] - return - -def nunpack(s, default=0): - '''Unpacks up to 4 bytes big endian.''' - l = len(s) - if not l: - return default - elif l == 1: - return ord(s) - elif l == 2: - return struct.unpack('>H', s)[0] - elif l == 3: - return struct.unpack('>L', '\x00'+s)[0] - elif l == 4: - return struct.unpack('>L', s)[0] - else: - return TypeError('invalid length: %d' % l) - - -STRICT = 0 - - -# PS Exceptions - -class PSException(Exception): pass -class PSEOF(PSException): pass -class PSSyntaxError(PSException): pass -class PSTypeError(PSException): pass -class PSValueError(PSException): pass - - -# Basic PostScript Types - - -# PSLiteral -class PSObject(object): pass - -class PSLiteral(PSObject): - ''' - PS literals (e.g. "/Name"). - Caution: Never create these objects directly. - Use PSLiteralTable.intern() instead. - ''' - def __init__(self, name): - self.name = name - return - - def __repr__(self): - name = [] - for char in self.name: - if not char.isalnum(): - char = '#%02x' % ord(char) - name.append(char) - return '/%s' % ''.join(name) - -# PSKeyword -class PSKeyword(PSObject): - ''' - PS keywords (e.g. "showpage"). - Caution: Never create these objects directly. - Use PSKeywordTable.intern() instead. - ''' - def __init__(self, name): - self.name = name - return - - def __repr__(self): - return self.name - -# PSSymbolTable -class PSSymbolTable(object): - - ''' - Symbol table that stores PSLiteral or PSKeyword. - ''' - - def __init__(self, classe): - self.dic = {} - self.classe = classe - return - - def intern(self, name): - if name in self.dic: - lit = self.dic[name] - else: - lit = self.classe(name) - self.dic[name] = lit - return lit - -PSLiteralTable = PSSymbolTable(PSLiteral) -PSKeywordTable = PSSymbolTable(PSKeyword) -LIT = PSLiteralTable.intern -KWD = PSKeywordTable.intern -KEYWORD_BRACE_BEGIN = KWD('{') -KEYWORD_BRACE_END = KWD('}') -KEYWORD_ARRAY_BEGIN = KWD('[') -KEYWORD_ARRAY_END = KWD(']') -KEYWORD_DICT_BEGIN = KWD('<<') -KEYWORD_DICT_END = KWD('>>') - - -def literal_name(x): - if not isinstance(x, PSLiteral): - if STRICT: - raise PSTypeError('Literal required: %r' % x) - else: - return str(x) - return x.name - -def keyword_name(x): - if not isinstance(x, PSKeyword): - if STRICT: - raise PSTypeError('Keyword required: %r' % x) - else: - return str(x) - return x.name - - -## PSBaseParser -## -EOL = re.compile(r'[\r\n]') -SPC = re.compile(r'\s') -NONSPC = re.compile(r'\S') -HEX = re.compile(r'[0-9a-fA-F]') -END_LITERAL = re.compile(r'[#/%\[\]()<>{}\s]') -END_HEX_STRING = re.compile(r'[^\s0-9a-fA-F]') -HEX_PAIR = re.compile(r'[0-9a-fA-F]{2}|.') -END_NUMBER = re.compile(r'[^0-9]') -END_KEYWORD = re.compile(r'[#/%\[\]()<>{}\s]') -END_STRING = re.compile(r'[()\134]') -OCT_STRING = re.compile(r'[0-7]') -ESC_STRING = { 'b':8, 't':9, 'n':10, 'f':12, 'r':13, '(':40, ')':41, '\\':92 } - -class PSBaseParser(object): - - ''' - Most basic PostScript parser that performs only basic tokenization. - ''' - BUFSIZ = 4096 - - def __init__(self, fp): - self.fp = fp - self.seek(0) - return - - def __repr__(self): - return '' % (self.fp, self.bufpos) - - def flush(self): - return - - def close(self): - self.flush() - return - - def tell(self): - return self.bufpos+self.charpos - - def poll(self, pos=None, n=80): - pos0 = self.fp.tell() - if not pos: - pos = self.bufpos+self.charpos - self.fp.seek(pos) - ##print >>sys.stderr, 'poll(%d): %r' % (pos, self.fp.read(n)) - self.fp.seek(pos0) - return - - def seek(self, pos): - ''' - Seeks the parser to the given position. - ''' - self.fp.seek(pos) - # reset the status for nextline() - self.bufpos = pos - self.buf = '' - self.charpos = 0 - # reset the status for nexttoken() - self.parse1 = self.parse_main - self.tokens = [] - return - - def fillbuf(self): - if self.charpos < len(self.buf): return - # fetch next chunk. - self.bufpos = self.fp.tell() - self.buf = self.fp.read(self.BUFSIZ) - if not self.buf: - raise PSEOF('Unexpected EOF') - self.charpos = 0 - return - - def parse_main(self, s, i): - m = NONSPC.search(s, i) - if not m: - return (self.parse_main, len(s)) - j = m.start(0) - c = s[j] - self.tokenstart = self.bufpos+j - if c == '%': - self.token = '%' - return (self.parse_comment, j+1) - if c == '/': - self.token = '' - return (self.parse_literal, j+1) - if c in '-+' or c.isdigit(): - self.token = c - return (self.parse_number, j+1) - if c == '.': - self.token = c - return (self.parse_float, j+1) - if c.isalpha(): - self.token = c - return (self.parse_keyword, j+1) - if c == '(': - self.token = '' - self.paren = 1 - return (self.parse_string, j+1) - if c == '<': - self.token = '' - return (self.parse_wopen, j+1) - if c == '>': - self.token = '' - return (self.parse_wclose, j+1) - self.add_token(KWD(c)) - return (self.parse_main, j+1) - - def add_token(self, obj): - self.tokens.append((self.tokenstart, obj)) - return - - def parse_comment(self, s, i): - m = EOL.search(s, i) - if not m: - self.token += s[i:] - return (self.parse_comment, len(s)) - j = m.start(0) - self.token += s[i:j] - # We ignore comments. - #self.tokens.append(self.token) - return (self.parse_main, j) - - def parse_literal(self, s, i): - m = END_LITERAL.search(s, i) - if not m: - self.token += s[i:] - return (self.parse_literal, len(s)) - j = m.start(0) - self.token += s[i:j] - c = s[j] - if c == '#': - self.hex = '' - return (self.parse_literal_hex, j+1) - self.add_token(LIT(self.token)) - return (self.parse_main, j) - - def parse_literal_hex(self, s, i): - c = s[i] - if HEX.match(c) and len(self.hex) < 2: - self.hex += c - return (self.parse_literal_hex, i+1) - if self.hex: - self.token += chr(int(self.hex, 16)) - return (self.parse_literal, i) - - def parse_number(self, s, i): - m = END_NUMBER.search(s, i) - if not m: - self.token += s[i:] - return (self.parse_number, len(s)) - j = m.start(0) - self.token += s[i:j] - c = s[j] - if c == '.': - self.token += c - return (self.parse_float, j+1) - try: - self.add_token(int(self.token)) - except ValueError: - pass - return (self.parse_main, j) - def parse_float(self, s, i): - m = END_NUMBER.search(s, i) - if not m: - self.token += s[i:] - return (self.parse_float, len(s)) - j = m.start(0) - self.token += s[i:j] - self.add_token(float(self.token)) - return (self.parse_main, j) - - def parse_keyword(self, s, i): - m = END_KEYWORD.search(s, i) - if not m: - self.token += s[i:] - return (self.parse_keyword, len(s)) - j = m.start(0) - self.token += s[i:j] - if self.token == 'true': - token = True - elif self.token == 'false': - token = False - else: - token = KWD(self.token) - self.add_token(token) - return (self.parse_main, j) - - def parse_string(self, s, i): - m = END_STRING.search(s, i) - if not m: - self.token += s[i:] - return (self.parse_string, len(s)) - j = m.start(0) - self.token += s[i:j] - c = s[j] - if c == '\\': - self.oct = '' - return (self.parse_string_1, j+1) - if c == '(': - self.paren += 1 - self.token += c - return (self.parse_string, j+1) - if c == ')': - self.paren -= 1 - if self.paren: - self.token += c - return (self.parse_string, j+1) - self.add_token(self.token) - return (self.parse_main, j+1) - def parse_string_1(self, s, i): - c = s[i] - if OCT_STRING.match(c) and len(self.oct) < 3: - self.oct += c - return (self.parse_string_1, i+1) - if self.oct: - self.token += chr(int(self.oct, 8)) - return (self.parse_string, i) - if c in ESC_STRING: - self.token += chr(ESC_STRING[c]) - return (self.parse_string, i+1) - - def parse_wopen(self, s, i): - c = s[i] - if c.isspace() or HEX.match(c): - return (self.parse_hexstring, i) - if c == '<': - self.add_token(KEYWORD_DICT_BEGIN) - i += 1 - return (self.parse_main, i) - - def parse_wclose(self, s, i): - c = s[i] - if c == '>': - self.add_token(KEYWORD_DICT_END) - i += 1 - return (self.parse_main, i) - - def parse_hexstring(self, s, i): - m = END_HEX_STRING.search(s, i) - if not m: - self.token += s[i:] - return (self.parse_hexstring, len(s)) - j = m.start(0) - self.token += s[i:j] - token = HEX_PAIR.sub(lambda m: chr(int(m.group(0), 16)), - SPC.sub('', self.token)) - self.add_token(token) - return (self.parse_main, j) - - def nexttoken(self): - while not self.tokens: - self.fillbuf() - (self.parse1, self.charpos) = self.parse1(self.buf, self.charpos) - token = self.tokens.pop(0) - return token - - def nextline(self): - ''' - Fetches a next line that ends either with \\r or \\n. - ''' - linebuf = '' - linepos = self.bufpos + self.charpos - eol = False - while 1: - self.fillbuf() - if eol: - c = self.buf[self.charpos] - # handle '\r\n' - if c == '\n': - linebuf += c - self.charpos += 1 - break - m = EOL.search(self.buf, self.charpos) - if m: - linebuf += self.buf[self.charpos:m.end(0)] - self.charpos = m.end(0) - if linebuf[-1] == '\r': - eol = True - else: - break - else: - linebuf += self.buf[self.charpos:] - self.charpos = len(self.buf) - return (linepos, linebuf) - - def revreadlines(self): - ''' - Fetches a next line backword. This is used to locate - the trailers at the end of a file. - ''' - self.fp.seek(0, 2) - pos = self.fp.tell() - buf = '' - while 0 < pos: - prevpos = pos - pos = max(0, pos-self.BUFSIZ) - self.fp.seek(pos) - s = self.fp.read(prevpos-pos) - if not s: break - while 1: - n = max(s.rfind('\r'), s.rfind('\n')) - if n == -1: - buf = s + buf - break - yield s[n:]+buf - s = s[:n] - buf = '' - return - - -## PSStackParser -## -class PSStackParser(PSBaseParser): - - def __init__(self, fp): - PSBaseParser.__init__(self, fp) - self.reset() - return - - def reset(self): - self.context = [] - self.curtype = None - self.curstack = [] - self.results = [] - return - - def seek(self, pos): - PSBaseParser.seek(self, pos) - self.reset() - return - - def push(self, *objs): - self.curstack.extend(objs) - return - def pop(self, n): - objs = self.curstack[-n:] - self.curstack[-n:] = [] - return objs - def popall(self): - objs = self.curstack - self.curstack = [] - return objs - def add_results(self, *objs): - self.results.extend(objs) - return - - def start_type(self, pos, type): - self.context.append((pos, self.curtype, self.curstack)) - (self.curtype, self.curstack) = (type, []) - return - def end_type(self, type): - if self.curtype != type: - raise PSTypeError('Type mismatch: %r != %r' % (self.curtype, type)) - objs = [ obj for (_,obj) in self.curstack ] - (pos, self.curtype, self.curstack) = self.context.pop() - return (pos, objs) - - def do_keyword(self, pos, token): - return - - def nextobject(self, direct=False): - ''' - Yields a list of objects: keywords, literals, strings, - numbers, arrays and dictionaries. Arrays and dictionaries - are represented as Python sequence and dictionaries. - ''' - while not self.results: - (pos, token) = self.nexttoken() - ##print (pos,token), (self.curtype, self.curstack) - if (isinstance(token, int) or - isinstance(token, float) or - isinstance(token, bool) or - isinstance(token, str) or - isinstance(token, PSLiteral)): - # normal token - self.push((pos, token)) - elif token == KEYWORD_ARRAY_BEGIN: - # begin array - self.start_type(pos, 'a') - elif token == KEYWORD_ARRAY_END: - # end array - try: - self.push(self.end_type('a')) - except PSTypeError: - if STRICT: raise - elif token == KEYWORD_DICT_BEGIN: - # begin dictionary - self.start_type(pos, 'd') - elif token == KEYWORD_DICT_END: - # end dictionary - try: - (pos, objs) = self.end_type('d') - if len(objs) % 2 != 0: - print "Incomplete dictionary construct" - objs.append("") # this isn't necessary. - # temporary fix. is this due to rental books? - # raise PSSyntaxError( - # 'Invalid dictionary construct: %r' % objs) - d = dict((literal_name(k), v) \ - for (k,v) in choplist(2, objs)) - self.push((pos, d)) - except PSTypeError: - if STRICT: raise - else: - self.do_keyword(pos, token) - if self.context: - continue - else: - if direct: - return self.pop(1)[0] - self.flush() - obj = self.results.pop(0) - return obj - - -LITERAL_CRYPT = PSLiteralTable.intern('Crypt') -LITERALS_FLATE_DECODE = (PSLiteralTable.intern('FlateDecode'), PSLiteralTable.intern('Fl')) -LITERALS_LZW_DECODE = (PSLiteralTable.intern('LZWDecode'), PSLiteralTable.intern('LZW')) -LITERALS_ASCII85_DECODE = (PSLiteralTable.intern('ASCII85Decode'), PSLiteralTable.intern('A85')) - - -## PDF Objects -## -class PDFObject(PSObject): pass - -class PDFException(PSException): pass -class PDFTypeError(PDFException): pass -class PDFValueError(PDFException): pass -class PDFNotImplementedError(PSException): pass - - -## PDFObjRef -## -class PDFObjRef(PDFObject): - - def __init__(self, doc, objid, genno): - if objid == 0: - if STRICT: - raise PDFValueError('PDF object id cannot be 0.') - self.doc = doc - self.objid = objid - self.genno = genno - return - - def __repr__(self): - return '' % (self.objid, self.genno) - - def resolve(self): - return self.doc.getobj(self.objid) - - -# resolve -def resolve1(x): - ''' - Resolve an object. If this is an array or dictionary, - it may still contains some indirect objects inside. - ''' - while isinstance(x, PDFObjRef): - x = x.resolve() - return x - -def resolve_all(x): - ''' - Recursively resolve X and all the internals. - Make sure there is no indirect reference within the nested object. - This procedure might be slow. - ''' - while isinstance(x, PDFObjRef): - x = x.resolve() - if isinstance(x, list): - x = [ resolve_all(v) for v in x ] - elif isinstance(x, dict): - for (k,v) in x.iteritems(): - x[k] = resolve_all(v) - return x - -def decipher_all(decipher, objid, genno, x): - ''' - Recursively decipher X. - ''' - if isinstance(x, str): - return decipher(objid, genno, x) - decf = lambda v: decipher_all(decipher, objid, genno, v) - if isinstance(x, list): - x = [decf(v) for v in x] - elif isinstance(x, dict): - x = dict((k, decf(v)) for (k, v) in x.iteritems()) - return x - - -# Type cheking -def int_value(x): - x = resolve1(x) - if not isinstance(x, int): - if STRICT: - raise PDFTypeError('Integer required: %r' % x) - return 0 - return x - -def float_value(x): - x = resolve1(x) - if not isinstance(x, float): - if STRICT: - raise PDFTypeError('Float required: %r' % x) - return 0.0 - return x - -def num_value(x): - x = resolve1(x) - if not (isinstance(x, int) or isinstance(x, float)): - if STRICT: - raise PDFTypeError('Int or Float required: %r' % x) - return 0 - return x - -def str_value(x): - x = resolve1(x) - if not isinstance(x, str): - if STRICT: - raise PDFTypeError('String required: %r' % x) - return '' - return x - -def list_value(x): - x = resolve1(x) - if not (isinstance(x, list) or isinstance(x, tuple)): - if STRICT: - raise PDFTypeError('List required: %r' % x) - return [] - return x - -def dict_value(x): - x = resolve1(x) - if not isinstance(x, dict): - if STRICT: - raise PDFTypeError('Dict required: %r' % x) - return {} - return x - -def stream_value(x): - x = resolve1(x) - if not isinstance(x, PDFStream): - if STRICT: - raise PDFTypeError('PDFStream required: %r' % x) - return PDFStream({}, '') - return x - -# ascii85decode(data) -def ascii85decode(data): - n = b = 0 - out = '' - for c in data: - if '!' <= c and c <= 'u': - n += 1 - b = b*85+(ord(c)-33) - if n == 5: - out += struct.pack('>L',b) - n = b = 0 - elif c == 'z': - assert n == 0 - out += '\0\0\0\0' - elif c == '~': - if n: - for _ in range(5-n): - b = b*85+84 - out += struct.pack('>L',b)[:n-1] - break - return out - - -## PDFStream type -class PDFStream(PDFObject): - def __init__(self, dic, rawdata, decipher=None): - length = int_value(dic.get('Length', 0)) - eol = rawdata[length:] - # quick and dirty fix for false length attribute, - # might not work if the pdf stream parser has a problem - if decipher != None and decipher.__name__ == 'decrypt_aes': - if (len(rawdata) % 16) != 0: - cutdiv = len(rawdata) // 16 - rawdata = rawdata[:16*cutdiv] - else: - if eol in ('\r', '\n', '\r\n'): - rawdata = rawdata[:length] - - self.dic = dic - self.rawdata = rawdata - self.decipher = decipher - self.data = None - self.decdata = None - self.objid = None - self.genno = None - return - - def set_objid(self, objid, genno): - self.objid = objid - self.genno = genno - return - - def __repr__(self): - if self.rawdata: - return '' % \ - (self.objid, len(self.rawdata), self.dic) - else: - return '' % \ - (self.objid, len(self.data), self.dic) - - def decode(self): - assert self.data is None and self.rawdata is not None - data = self.rawdata - if self.decipher: - # Handle encryption - data = self.decipher(self.objid, self.genno, data) - if gen_xref_stm: - self.decdata = data # keep decrypted data - if 'Filter' not in self.dic: - self.data = data - self.rawdata = None - ##print self.dict - return - filters = self.dic['Filter'] - if not isinstance(filters, list): - filters = [ filters ] - for f in filters: - if f in LITERALS_FLATE_DECODE: - # will get errors if the document is encrypted. - data = zlib.decompress(data) - elif f in LITERALS_LZW_DECODE: - data = ''.join(LZWDecoder(StringIO(data)).run()) - elif f in LITERALS_ASCII85_DECODE: - data = ascii85decode(data) - elif f == LITERAL_CRYPT: - raise PDFNotImplementedError('/Crypt filter is unsupported') - else: - raise PDFNotImplementedError('Unsupported filter: %r' % f) - # apply predictors - if 'DP' in self.dic: - params = self.dic['DP'] - else: - params = self.dic.get('DecodeParms', {}) - if 'Predictor' in params: - pred = int_value(params['Predictor']) - if pred: - if pred != 12: - raise PDFNotImplementedError( - 'Unsupported predictor: %r' % pred) - if 'Columns' not in params: - raise PDFValueError( - 'Columns undefined for predictor=12') - columns = int_value(params['Columns']) - buf = '' - ent0 = '\x00' * columns - for i in xrange(0, len(data), columns+1): - pred = data[i] - ent1 = data[i+1:i+1+columns] - if pred == '\x02': - ent1 = ''.join(chr((ord(a)+ord(b)) & 255) \ - for (a,b) in zip(ent0,ent1)) - buf += ent1 - ent0 = ent1 - data = buf - self.data = data - self.rawdata = None - return - - def get_data(self): - if self.data is None: - self.decode() - return self.data - - def get_rawdata(self): - return self.rawdata - - def get_decdata(self): - if self.decdata is not None: - return self.decdata - data = self.rawdata - if self.decipher and data: - # Handle encryption - data = self.decipher(self.objid, self.genno, data) + return (AES, RSA) + +AES, RSA = _load_crypto() + +META_NAMES = ('mimetype', 'META-INF/rights.xml', 'META-INF/encryption.xml') +NSMAP = {'adept': 'http://ns.adobe.com/adept', + 'enc': 'http://www.w3.org/2001/04/xmlenc#'} + +class Decryptor(object): + def __init__(self, bookkey, encryption): + enc = lambda tag: '{%s}%s' % (NSMAP['enc'], tag) + self._aes = AES(bookkey) + encryption = etree.fromstring(encryption) + self._encrypted = encrypted = set() + expr = './%s/%s/%s' % (enc('EncryptedData'), enc('CipherData'), + enc('CipherReference')) + for elem in encryption.findall(expr): + path = elem.get('URI', None) + if path is not None: + path = path.encode('utf-8') + encrypted.add(path) + + def decompress(self, bytes): + dc = zlib.decompressobj(-15) + bytes = dc.decompress(bytes) + ex = dc.decompress('Z') + dc.flush() + if ex: + bytes = bytes + ex + return bytes + + def decrypt(self, path, data): + if path in self._encrypted: + data = self._aes.decrypt(data)[16:] + data = data[:-ord(data[-1])] + data = self.decompress(data) return data - -## PDF Exceptions -## -class PDFSyntaxError(PDFException): pass -class PDFNoValidXRef(PDFSyntaxError): pass -class PDFEncryptionError(PDFException): pass -class PDFPasswordIncorrect(PDFEncryptionError): pass - -# some predefined literals and keywords. -LITERAL_OBJSTM = PSLiteralTable.intern('ObjStm') -LITERAL_XREF = PSLiteralTable.intern('XRef') -LITERAL_PAGE = PSLiteralTable.intern('Page') -LITERAL_PAGES = PSLiteralTable.intern('Pages') -LITERAL_CATALOG = PSLiteralTable.intern('Catalog') - - -## XRefs -## - -## PDFXRef -## -class PDFXRef(object): - - def __init__(self): - self.offsets = None - return - - def __repr__(self): - return '' % len(self.offsets) - - def objids(self): - return self.offsets.iterkeys() - - def load(self, parser): - self.offsets = {} - while 1: - try: - (pos, line) = parser.nextline() - except PSEOF: - raise PDFNoValidXRef('Unexpected EOF - file corrupted?') - if not line: - raise PDFNoValidXRef('Premature eof: %r' % parser) - if line.startswith('trailer'): - parser.seek(pos) - break - f = line.strip().split(' ') - if len(f) != 2: - raise PDFNoValidXRef('Trailer not found: %r: line=%r' % (parser, line)) - try: - (start, nobjs) = map(int, f) - except ValueError: - raise PDFNoValidXRef('Invalid line: %r: line=%r' % (parser, line)) - for objid in xrange(start, start+nobjs): - try: - (_, line) = parser.nextline() - except PSEOF: - raise PDFNoValidXRef('Unexpected EOF - file corrupted?') - f = line.strip().split(' ') - if len(f) != 3: - raise PDFNoValidXRef('Invalid XRef format: %r, line=%r' % (parser, line)) - (pos, genno, use) = f - if use != 'n': continue - self.offsets[objid] = (int(genno), int(pos)) - self.load_trailer(parser) - return - - KEYWORD_TRAILER = PSKeywordTable.intern('trailer') - def load_trailer(self, parser): +# check file to make check whether it's probably an Adobe Adept encrypted ePub +def adeptBook(inpath): + with closing(ZipFile(open(inpath, 'rb'))) as inf: + namelist = set(inf.namelist()) + if 'META-INF/rights.xml' not in namelist or \ + 'META-INF/encryption.xml' not in namelist: + return False try: - (_,kwd) = parser.nexttoken() - assert kwd is self.KEYWORD_TRAILER - (_,dic) = parser.nextobject(direct=True) - except PSEOF: - x = parser.pop(1) - if not x: - raise PDFNoValidXRef('Unexpected EOF - file corrupted') - (_,dic) = x[0] - self.trailer = dict_value(dic) - return - - def getpos(self, objid): + rights = etree.fromstring(inf.read('META-INF/rights.xml')) + adept = lambda tag: '{%s}%s' % (NSMAP['adept'], tag) + expr = './/%s' % (adept('encryptedKey'),) + bookkey = ''.join(rights.findtext(expr)) + if len(bookkey) == 172: + return True + except: + # if we couldn't check, assume it is + return True + return False + +def decryptBook(userkey, inpath, outpath): + if AES is None: + raise ADEPTError(u"PyCrypto or OpenSSL must be installed.") + rsa = RSA(userkey) + with closing(ZipFile(open(inpath, 'rb'))) as inf: + namelist = set(inf.namelist()) + if 'META-INF/rights.xml' not in namelist or \ + 'META-INF/encryption.xml' not in namelist: + print u"{0:s} is DRM-free.".format(os.path.basename(inpath)) + return 1 + for name in META_NAMES: + namelist.remove(name) try: - (genno, pos) = self.offsets[objid] - except KeyError: - raise - return (None, pos) - - -## PDFXRefStream -## -class PDFXRefStream(object): - - def __init__(self): - self.index = None - self.data = None - self.entlen = None - self.fl1 = self.fl2 = self.fl3 = None - return - - def __repr__(self): - return '' % self.index - - def objids(self): - for first, size in self.index: - for objid in xrange(first, first + size): - yield objid - - def load(self, parser, debug=0): - (_,objid) = parser.nexttoken() # ignored - (_,genno) = parser.nexttoken() # ignored - (_,kwd) = parser.nexttoken() - (_,stream) = parser.nextobject() - if not isinstance(stream, PDFStream) or \ - stream.dic['Type'] is not LITERAL_XREF: - raise PDFNoValidXRef('Invalid PDF stream spec.') - size = stream.dic['Size'] - index = stream.dic.get('Index', (0,size)) - self.index = zip(islice(index, 0, None, 2), - islice(index, 1, None, 2)) - (self.fl1, self.fl2, self.fl3) = stream.dic['W'] - self.data = stream.get_data() - self.entlen = self.fl1+self.fl2+self.fl3 - self.trailer = stream.dic - return - - def getpos(self, objid): - offset = 0 - for first, size in self.index: - if first <= objid and objid < (first + size): - break - offset += size - else: - raise KeyError(objid) - i = self.entlen * ((objid - first) + offset) - ent = self.data[i:i+self.entlen] - f1 = nunpack(ent[:self.fl1], 1) - if f1 == 1: - pos = nunpack(ent[self.fl1:self.fl1+self.fl2]) - genno = nunpack(ent[self.fl1+self.fl2:]) - return (None, pos) - elif f1 == 2: - objid = nunpack(ent[self.fl1:self.fl1+self.fl2]) - index = nunpack(ent[self.fl1+self.fl2:]) - return (objid, index) - # this is a free object - raise KeyError(objid) - - -## PDFDocument -## -## A PDFDocument object represents a PDF document. -## Since a PDF file is usually pretty big, normally it is not loaded -## at once. Rather it is parsed dynamically as processing goes. -## A PDF parser is associated with the document. -## -class PDFDocument(object): - - def __init__(self): - self.xrefs = [] - self.objs = {} - self.parsed_objs = {} - self.root = None - self.catalog = None - self.parser = None - self.encryption = None - self.decipher = None - return - - # set_parser(parser) - # Associates the document with an (already initialized) parser object. - def set_parser(self, parser): - if self.parser: return - self.parser = parser - # The document is set to be temporarily ready during collecting - # all the basic information about the document, e.g. - # the header, the encryption information, and the access rights - # for the document. - self.ready = True - # Retrieve the information of each header that was appended - # (maybe multiple times) at the end of the document. - self.xrefs = parser.read_xref() - for xref in self.xrefs: - trailer = xref.trailer - if not trailer: continue - - # If there's an encryption info, remember it. - if 'Encrypt' in trailer: - #assert not self.encryption + rights = etree.fromstring(inf.read('META-INF/rights.xml')) + adept = lambda tag: '{%s}%s' % (NSMAP['adept'], tag) + expr = './/%s' % (adept('encryptedKey'),) + bookkey = ''.join(rights.findtext(expr)) + if len(bookkey) != 172: + print u"{0:s} is not a secure Adobe Adept ePub.".format(os.path.basename(inpath)) + return 1 + bookkey = rsa.decrypt(bookkey.decode('base64')) + # Padded as per RSAES-PKCS1-v1_5 + if bookkey[-17] != '\x00': + print u"Could not decrypt {0:s}. Wrong key".format(os.path.basename(inpath)) + return 2 + encryption = inf.read('META-INF/encryption.xml') + decryptor = Decryptor(bookkey[-16:], encryption) + kwds = dict(compression=ZIP_DEFLATED, allowZip64=False) + with closing(ZipFile(open(outpath, 'wb'), 'w', **kwds)) as outf: + zi = ZipInfo('mimetype') + zi.compress_type=ZIP_STORED try: - self.encryption = (list_value(trailer['ID']), - dict_value(trailer['Encrypt'])) - # fix for bad files + # if the mimetype is present, get its info, including time-stamp + oldzi = inf.getinfo('mimetype') + # copy across fields to be preserved + zi.date_time = oldzi.date_time + zi.comment = oldzi.comment + zi.extra = oldzi.extra + zi.internal_attr = oldzi.internal_attr + # external attributes are dependent on the create system, so copy both. + zi.external_attr = oldzi.external_attr + zi.create_system = oldzi.create_system except: - self.encryption = ('ffffffffffffffffffffffffffffffffffff', - dict_value(trailer['Encrypt'])) - if 'Root' in trailer: - self.set_root(dict_value(trailer['Root'])) - break - else: - raise PDFSyntaxError('No /Root object! - Is this really a PDF?') - # The document is set to be non-ready again, until all the - # proper initialization (asking the password key and - # verifying the access permission, so on) is finished. - self.ready = False - return - - # set_root(root) - # Set the Root dictionary of the document. - # Each PDF file must have exactly one /Root dictionary. - def set_root(self, root): - self.root = root - self.catalog = dict_value(self.root) - if self.catalog.get('Type') is not LITERAL_CATALOG: - if STRICT: - raise PDFSyntaxError('Catalog not found!') - return - # initialize(password='') - # Perform the initialization with a given password. - # This step is mandatory even if there's no password associated - # with the document. - def initialize(self, password=''): - if not self.encryption: - self.is_printable = self.is_modifiable = self.is_extractable = True - self.ready = True - return - (docid, param) = self.encryption - type = literal_name(param['Filter']) - if type == 'Adobe.APS': - return self.initialize_adobe_ps(password, docid, param) - if type == 'Standard': - return self.initialize_standard(password, docid, param) - if type == 'EBX_HANDLER': - return self.initialize_ebx(password, docid, param) - raise PDFEncryptionError('Unknown filter: param=%r' % param) - - def initialize_adobe_ps(self, password, docid, param): - global KEYFILEPATH - self.decrypt_key = self.genkey_adobe_ps(param) - self.genkey = self.genkey_v4 - self.decipher = self.decrypt_aes - self.ready = True - return - - def genkey_adobe_ps(self, param): - # nice little offline principal keys dictionary - # global static principal key for German Onleihe / Bibliothek Digital - principalkeys = { 'bibliothek-digital.de': 'rRwGv2tbpKov1krvv7PO0ws9S436/lArPlfipz5Pqhw='.decode('base64')} - self.is_printable = self.is_modifiable = self.is_extractable = True - length = int_value(param.get('Length', 0)) / 8 - edcdata = str_value(param.get('EDCData')).decode('base64') - pdrllic = str_value(param.get('PDRLLic')).decode('base64') - pdrlpol = str_value(param.get('PDRLPol')).decode('base64') - edclist = [] - for pair in edcdata.split('\n'): - edclist.append(pair) - # principal key request - for key in principalkeys: - if key in pdrllic: - principalkey = principalkeys[key] - else: - raise ADEPTError('Cannot find principal key for this pdf') - shakey = SHA256(principalkey) - ivector = 16 * chr(0) - plaintext = AES.new(shakey,AES.MODE_CBC,ivector).decrypt(edclist[9].decode('base64')) - if plaintext[-16:] != 16 * chr(16): - raise ADEPTError('Offlinekey cannot be decrypted, aborting ...') - pdrlpol = AES.new(plaintext[16:32],AES.MODE_CBC,edclist[2].decode('base64')).decrypt(pdrlpol) - if ord(pdrlpol[-1]) < 1 or ord(pdrlpol[-1]) > 16: - raise ADEPTError('Could not decrypt PDRLPol, aborting ...') - else: - cutter = -1 * ord(pdrlpol[-1]) - pdrlpol = pdrlpol[:cutter] - return plaintext[:16] - - PASSWORD_PADDING = '(\xbfN^Nu\x8aAd\x00NV\xff\xfa\x01\x08..' \ - '\x00\xb6\xd0h>\x80/\x0c\xa9\xfedSiz' - # experimental aes pw support - def initialize_standard(self, password, docid, param): - # copy from a global variable - V = int_value(param.get('V', 0)) - if (V <=0 or V > 4): - raise PDFEncryptionError('Unknown algorithm: param=%r' % param) - length = int_value(param.get('Length', 40)) # Key length (bits) - O = str_value(param['O']) - R = int_value(param['R']) # Revision - if 5 <= R: - raise PDFEncryptionError('Unknown revision: %r' % R) - U = str_value(param['U']) - P = int_value(param['P']) - try: - EncMetadata = str_value(param['EncryptMetadata']) - except: - EncMetadata = 'True' - self.is_printable = bool(P & 4) - self.is_modifiable = bool(P & 8) - self.is_extractable = bool(P & 16) - self.is_annotationable = bool(P & 32) - self.is_formsenabled = bool(P & 256) - self.is_textextractable = bool(P & 512) - self.is_assemblable = bool(P & 1024) - self.is_formprintable = bool(P & 2048) - # Algorithm 3.2 - password = (password+self.PASSWORD_PADDING)[:32] # 1 - hash = hashlib.md5(password) # 2 - hash.update(O) # 3 - hash.update(struct.pack('= 3: - # Algorithm 3.5 - hash = hashlib.md5(self.PASSWORD_PADDING) # 2 - hash.update(docid[0]) # 3 - x = ARC4.new(key).decrypt(hash.digest()[:16]) # 4 - for i in xrange(1,19+1): - k = ''.join( chr(ord(c) ^ i) for c in key ) - x = ARC4.new(k).decrypt(x) - u1 = x+x # 32bytes total - if R == 2: - is_authenticated = (u1 == U) - else: - is_authenticated = (u1[:16] == U[:16]) - if not is_authenticated: - raise ADEPTError('Password is not correct.') - self.decrypt_key = key - # genkey method - if V == 1 or V == 2: - self.genkey = self.genkey_v2 - elif V == 3: - self.genkey = self.genkey_v3 - elif V == 4: - self.genkey = self.genkey_v2 - #self.genkey = self.genkey_v3 if V == 3 else self.genkey_v2 - # rc4 - if V != 4: - self.decipher = self.decipher_rc4 # XXX may be AES - # aes - elif V == 4 and Length == 128: - elf.decipher = self.decipher_aes - elif V == 4 and Length == 256: - raise PDFNotImplementedError('AES256 encryption is currently unsupported') - self.ready = True - return - - def initialize_ebx(self, password, docid, param): - self.is_printable = self.is_modifiable = self.is_extractable = True - rsa = RSA(password) - length = int_value(param.get('Length', 0)) / 8 - rights = str_value(param.get('ADEPT_LICENSE')).decode('base64') - rights = zlib.decompress(rights, -15) - rights = etree.fromstring(rights) - expr = './/{http://ns.adobe.com/adept}encryptedKey' - bookkey = ''.join(rights.findtext(expr)).decode('base64') - bookkey = rsa.decrypt(bookkey) - if bookkey[0] != '\x02': - raise ADEPTError('error decrypting book session key') - index = bookkey.index('\0') + 1 - bookkey = bookkey[index:] - ebx_V = int_value(param.get('V', 4)) - ebx_type = int_value(param.get('EBX_ENCRYPTIONTYPE', 6)) - # added because of improper booktype / decryption book session key errors - if length > 0: - if len(bookkey) == length: - if ebx_V == 3: - V = 3 - else: - V = 2 - elif len(bookkey) == length + 1: - V = ord(bookkey[0]) - bookkey = bookkey[1:] - else: - print "ebx_V is %d and ebx_type is %d" % (ebx_V, ebx_type) - print "length is %d and len(bookkey) is %d" % (length, len(bookkey)) - print "bookkey[0] is %d" % ord(bookkey[0]) - raise ADEPTError('error decrypting book session key - mismatched length') - else: - # proper length unknown try with whatever you have - print "ebx_V is %d and ebx_type is %d" % (ebx_V, ebx_type) - print "length is %d and len(bookkey) is %d" % (length, len(bookkey)) - print "bookkey[0] is %d" % ord(bookkey[0]) - if ebx_V == 3: - V = 3 - else: - V = 2 - self.decrypt_key = bookkey - self.genkey = self.genkey_v3 if V == 3 else self.genkey_v2 - self.decipher = self.decrypt_rc4 - self.ready = True - return - - # genkey functions - def genkey_v2(self, objid, genno): - objid = struct.pack(' PDFObjStmRef.maxindex: - PDFObjStmRef.maxindex = index - - -## PDFParser -## -class PDFParser(PSStackParser): - - def __init__(self, doc, fp): - PSStackParser.__init__(self, fp) - self.doc = doc - self.doc.set_parser(self) - return - - def __repr__(self): - return '' - - KEYWORD_R = PSKeywordTable.intern('R') - KEYWORD_ENDOBJ = PSKeywordTable.intern('endobj') - KEYWORD_STREAM = PSKeywordTable.intern('stream') - KEYWORD_XREF = PSKeywordTable.intern('xref') - KEYWORD_STARTXREF = PSKeywordTable.intern('startxref') - def do_keyword(self, pos, token): - if token in (self.KEYWORD_XREF, self.KEYWORD_STARTXREF): - self.add_results(*self.pop(1)) - return - if token is self.KEYWORD_ENDOBJ: - self.add_results(*self.pop(4)) - return - - if token is self.KEYWORD_R: - # reference to indirect object - try: - ((_,objid), (_,genno)) = self.pop(2) - (objid, genno) = (int(objid), int(genno)) - obj = PDFObjRef(self.doc, objid, genno) - self.push((pos, obj)) - except PSSyntaxError: - pass - return - - if token is self.KEYWORD_STREAM: - # stream object - ((_,dic),) = self.pop(1) - dic = dict_value(dic) - try: - objlen = int_value(dic['Length']) - except KeyError: - if STRICT: - raise PDFSyntaxError('/Length is undefined: %r' % dic) - objlen = 0 - self.seek(pos) - try: - (_, line) = self.nextline() # 'stream' - except PSEOF: - if STRICT: - raise PDFSyntaxError('Unexpected EOF') - return - pos += len(line) - self.fp.seek(pos) - data = self.fp.read(objlen) - self.seek(pos+objlen) - while 1: - try: - (linepos, line) = self.nextline() - except PSEOF: - if STRICT: - raise PDFSyntaxError('Unexpected EOF') - break - if 'endstream' in line: - i = line.index('endstream') - objlen += i - data += line[:i] - break - objlen += len(line) - data += line - self.seek(pos+objlen) - obj = PDFStream(dic, data, self.doc.decipher) - self.push((pos, obj)) - return - - # others - self.push((pos, token)) - return - - def find_xref(self): - # search the last xref table by scanning the file backwards. - prev = None - for line in self.revreadlines(): - line = line.strip() - if line == 'startxref': break - if line: - prev = line - else: - raise PDFNoValidXRef('Unexpected EOF') - return int(prev) - - # read xref table - def read_xref_from(self, start, xrefs): - self.seek(start) - self.reset() - try: - (pos, token) = self.nexttoken() - except PSEOF: - raise PDFNoValidXRef('Unexpected EOF') - if isinstance(token, int): - # XRefStream: PDF-1.5 - if GEN_XREF_STM == 1: - global gen_xref_stm - gen_xref_stm = True - self.seek(pos) - self.reset() - xref = PDFXRefStream() - xref.load(self) - else: - if token is not self.KEYWORD_XREF: - raise PDFNoValidXRef('xref not found: pos=%d, token=%r' % - (pos, token)) - self.nextline() - xref = PDFXRef() - xref.load(self) - xrefs.append(xref) - trailer = xref.trailer - if 'XRefStm' in trailer: - pos = int_value(trailer['XRefStm']) - self.read_xref_from(pos, xrefs) - if 'Prev' in trailer: - # find previous xref - pos = int_value(trailer['Prev']) - self.read_xref_from(pos, xrefs) - return - - # read xref tables and trailers - def read_xref(self): - xrefs = [] - trailerpos = None - try: - pos = self.find_xref() - self.read_xref_from(pos, xrefs) - except PDFNoValidXRef: - # fallback - self.seek(0) - pat = re.compile(r'^(\d+)\s+(\d+)\s+obj\b') - offsets = {} - xref = PDFXRef() - while 1: - try: - (pos, line) = self.nextline() - except PSEOF: - break - if line.startswith('trailer'): - trailerpos = pos # remember last trailer - m = pat.match(line) - if not m: continue - (objid, genno) = m.groups() - offsets[int(objid)] = (0, pos) - if not offsets: raise - xref.offsets = offsets - if trailerpos: - self.seek(trailerpos) - xref.load_trailer(self) - xrefs.append(xref) - return xrefs - -## PDFObjStrmParser -## -class PDFObjStrmParser(PDFParser): - - def __init__(self, data, doc): - PSStackParser.__init__(self, StringIO(data)) - self.doc = doc - return - - def flush(self): - self.add_results(*self.popall()) - return - - KEYWORD_R = KWD('R') - def do_keyword(self, pos, token): - if token is self.KEYWORD_R: - # reference to indirect object - try: - ((_,objid), (_,genno)) = self.pop(2) - (objid, genno) = (int(objid), int(genno)) - obj = PDFObjRef(self.doc, objid, genno) - self.push((pos, obj)) - except PSSyntaxError: - pass - return - # others - self.push((pos, token)) - return - -### -### My own code, for which there is none else to blame - -class PDFSerializer(object): - def __init__(self, inf, userkey): - global GEN_XREF_STM, gen_xref_stm - gen_xref_stm = GEN_XREF_STM > 1 - self.version = inf.read(8) - inf.seek(0) - self.doc = doc = PDFDocument() - parser = PDFParser(doc, inf) - doc.initialize(userkey) - self.objids = objids = set() - for xref in reversed(doc.xrefs): - trailer = xref.trailer - for objid in xref.objids(): - objids.add(objid) - trailer = dict(trailer) - trailer.pop('Prev', None) - trailer.pop('XRefStm', None) - if 'Encrypt' in trailer: - objids.remove(trailer.pop('Encrypt').objid) - self.trailer = trailer - - def dump(self, outf): - self.outf = outf - self.write(self.version) - self.write('\n%\xe2\xe3\xcf\xd3\n') - doc = self.doc - objids = self.objids - xrefs = {} - maxobj = max(objids) - trailer = dict(self.trailer) - trailer['Size'] = maxobj + 1 - for objid in objids: - obj = doc.getobj(objid) - if isinstance(obj, PDFObjStmRef): - xrefs[objid] = obj - continue - if obj is not None: - try: - genno = obj.genno - except AttributeError: - genno = 0 - xrefs[objid] = (self.tell(), genno) - self.serialize_indirect(objid, obj) - startxref = self.tell() - - if not gen_xref_stm: - self.write('xref\n') - self.write('0 %d\n' % (maxobj + 1,)) - for objid in xrange(0, maxobj + 1): - if objid in xrefs: - # force the genno to be 0 - self.write("%010d 00000 n \n" % xrefs[objid][0]) - else: - self.write("%010d %05d f \n" % (0, 65535)) - - self.write('trailer\n') - self.serialize_object(trailer) - self.write('\nstartxref\n%d\n%%%%EOF' % startxref) - - else: # Generate crossref stream. - - # Calculate size of entries - maxoffset = max(startxref, maxobj) - maxindex = PDFObjStmRef.maxindex - fl2 = 2 - power = 65536 - while maxoffset >= power: - fl2 += 1 - power *= 256 - fl3 = 1 - power = 256 - while maxindex >= power: - fl3 += 1 - power *= 256 - - index = [] - first = None - prev = None - data = [] - # Put the xrefstream's reference in itself - startxref = self.tell() - maxobj += 1 - xrefs[maxobj] = (startxref, 0) - for objid in sorted(xrefs): - if first is None: - first = objid - elif objid != prev + 1: - index.extend((first, prev - first + 1)) - first = objid - prev = objid - objref = xrefs[objid] - if isinstance(objref, PDFObjStmRef): - f1 = 2 - f2 = objref.stmid - f3 = objref.index - else: - f1 = 1 - f2 = objref[0] - # we force all generation numbers to be 0 - # f3 = objref[1] - f3 = 0 - - data.append(struct.pack('>B', f1)) - data.append(struct.pack('>L', f2)[-fl2:]) - data.append(struct.pack('>L', f3)[-fl3:]) - index.extend((first, prev - first + 1)) - data = zlib.compress(''.join(data)) - dic = {'Type': LITERAL_XREF, 'Size': prev + 1, 'Index': index, - 'W': [1, fl2, fl3], 'Length': len(data), - 'Filter': LITERALS_FLATE_DECODE[0], - 'Root': trailer['Root'],} - if 'Info' in trailer: - dic['Info'] = trailer['Info'] - xrefstm = PDFStream(dic, data) - self.serialize_indirect(maxobj, xrefstm) - self.write('startxref\n%d\n%%%%EOF' % startxref) - def write(self, data): - self.outf.write(data) - self.last = data[-1:] - - def tell(self): - return self.outf.tell() - - def escape_string(self, string): - string = string.replace('\\', '\\\\') - string = string.replace('\n', r'\n') - string = string.replace('(', r'\(') - string = string.replace(')', r'\)') - # get rid of ciando id - regularexp = re.compile(r'http://www.ciando.com/index.cfm/intRefererID/\d{5}') - if regularexp.match(string): return ('http://www.ciando.com') - return string - - def serialize_object(self, obj): - if isinstance(obj, dict): - # Correct malformed Mac OS resource forks for Stanza - if 'ResFork' in obj and 'Type' in obj and 'Subtype' not in obj \ - and isinstance(obj['Type'], int): - obj['Subtype'] = obj['Type'] - del obj['Type'] - # end - hope this doesn't have bad effects - self.write('<<') - for key, val in obj.items(): - self.write('/%s' % key) - self.serialize_object(val) - self.write('>>') - elif isinstance(obj, list): - self.write('[') - for val in obj: - self.serialize_object(val) - self.write(']') - elif isinstance(obj, str): - self.write('(%s)' % self.escape_string(obj)) - elif isinstance(obj, bool): - if self.last.isalnum(): - self.write(' ') - self.write(str(obj).lower()) - elif isinstance(obj, (int, long, float)): - if self.last.isalnum(): - self.write(' ') - self.write(str(obj)) - elif isinstance(obj, PDFObjRef): - if self.last.isalnum(): - self.write(' ') - self.write('%d %d R' % (obj.objid, 0)) - elif isinstance(obj, PDFStream): - ### If we don't generate cross ref streams the object streams - ### are no longer useful, as we have extracted all objects from - ### them. Therefore leave them out from the output. - if obj.dic.get('Type') == LITERAL_OBJSTM and not gen_xref_stm: - self.write('(deleted)') - else: - data = obj.get_decdata() - self.serialize_object(obj.dic) - self.write('stream\n') - self.write(data) - self.write('\nendstream') - else: - data = str(obj) - if data[0].isalnum() and self.last.isalnum(): - self.write(' ') - self.write(data) - - def serialize_indirect(self, objid, obj): - self.write('%d 0 obj' % (objid,)) - self.serialize_object(obj) - if self.last.isalnum(): - self.write('\n') - self.write('endobj\n') - - - - -def decryptBook(userkey, inpath, outpath): - if RSA is None: - raise ADEPTError(u"PyCrypto or OpenSSL must be installed.") - with open(inpath, 'rb') as inf: - try: - serializer = PDFSerializer(inf, userkey) + outf.writestr(zi, decryptor.decrypt(path, data)) except: - print u"Error serializing pdf {0}. Probably wrong key.".format(os.path.basename(inpath)) + print u"Could not decrypt {0:s} because of an exception:\n{1:s}".format(os.path.basename(inpath), traceback.format_exc()) return 2 - # hope this will fix the 'bad file descriptor' problem - with open(outpath, 'wb') as outf: - # help construct to make sure the method runs to the end - try: - serializer.dump(outf) - except Exception, e: - print u"error writing pdf: {0}".format(e.args[0]) - return 2 return 0 @@ -2197,7 +466,7 @@ def cli_main(): argv=unicode_argv() progname = os.path.basename(argv[0]) if len(argv) != 4: - print u"usage: {0} ".format(progname) + print u"usage: {0} ".format(progname) return 1 keypath, inpath, outpath = argv[1:] userkey = open(keypath,'rb').read() @@ -2206,7 +475,6 @@ def cli_main(): print u"Successfully decrypted {0:s} as {1:s}".format(os.path.basename(inpath),os.path.basename(outpath)) return result - def gui_main(): try: import Tkinter @@ -2266,8 +534,8 @@ def gui_main(): def get_inpath(self): inpath = tkFileDialog.askopenfilename( - parent=None, title=u"Select ADEPT-encrypted PDF file to decrypt", - defaultextension=u".pdf", filetypes=[('PDF files', '.pdf')]) + parent=None, title=u"Select ADEPT-encrypted ePub file to decrypt", + defaultextension=u".epub", filetypes=[('ePub files', '.epub')]) if inpath: inpath = os.path.normpath(inpath) self.inpath.delete(0, Tkconstants.END) @@ -2276,8 +544,8 @@ def gui_main(): def get_outpath(self): outpath = tkFileDialog.asksaveasfilename( - parent=None, title=u"Select unencrypted PDF file to produce", - defaultextension=u".pdf", filetypes=[('PDF files', '.pdf')]) + parent=None, title=u"Select unencrypted ePub file to produce", + defaultextension=u".epub", filetypes=[('ePub files', '.epub')]) if outpath: outpath = os.path.normpath(outpath) self.outpath.delete(0, Tkconstants.END) @@ -2305,30 +573,21 @@ def gui_main(): try: decrypt_status = decryptBook(userkey, inpath, outpath) except Exception, e: - self.status['text'] = u"Error; {0}".format(e.args[0]) + self.status['text'] = u"Error: {0}".format(e.args[0]) return if decrypt_status == 0: self.status['text'] = u"File successfully decrypted" else: self.status['text'] = u"The was an error decrypting the file." - root = Tkinter.Tk() - if RSA is None: - root.withdraw() - tkMessageBox.showerror( - "INEPT PDF", - "This script requires OpenSSL or PyCrypto, which must be installed " - "separately. Read the top-of-script comment for details.") - return 1 - root.title(u"Adobe Adept PDF Decrypter v.{0}".format(__version__)) + root.title(u"Adobe Adept ePub Decrypter v.{0}".format(__version__)) root.resizable(True, False) - root.minsize(370, 0) + root.minsize(300, 0) DecryptionDialog(root).pack(fill=Tkconstants.X, expand=1) root.mainloop() return 0 - if __name__ == '__main__': if len(sys.argv) > 1: sys.exit(cli_main()) diff --git a/DeDRM_calibre_plugin/DeDRM_plugin/ineptpdf.py b/DeDRM_calibre_plugin/DeDRM_plugin/ineptpdf.py index 929ce57..1986e20 100644 --- a/DeDRM_calibre_plugin/DeDRM_plugin/ineptpdf.py +++ b/DeDRM_calibre_plugin/DeDRM_plugin/ineptpdf.py @@ -1,89 +1,74 @@ -#!/usr/bin/env python +#! /usr/bin/python # -*- coding: utf-8 -*- from __future__ import with_statement -# ignobleepub.pyw, version 3.6 -# Copyright © 2009-2012 by DiapDealer et al. - -# engine to remove drm from Kindle for Mac and Kindle for PC books -# for personal use for archiving and converting your ebooks - -# PLEASE DO NOT PIRATE EBOOKS! - -# We want all authors and publishers, and eBook stores to live -# long and prosperous lives but at the same time we just want to -# be able to read OUR books on whatever device we want and to keep -# readable for a long, long time - -# This borrows very heavily from works by CMBDTC, IHeartCabbages, skindle, -# unswindle, DarkReverser, ApprenticeAlf, DiapDealer, some_updates -# and many many others -# Special thanks to The Dark Reverser for MobiDeDrm and CMBDTC for cmbdtc_dump -# from which this script borrows most unashamedly. - - -# Changelog -# 1.0 - Name change to k4mobidedrm. Adds Mac support, Adds plugin code -# 1.1 - Adds support for additional kindle.info files -# 1.2 - Better error handling for older Mobipocket -# 1.3 - Don't try to decrypt Topaz books -# 1.7 - Add support for Topaz books and Kindle serial numbers. Split code. -# 1.9 - Tidy up after Topaz, minor exception changes -# 2.1 - Topaz fix and filename sanitizing -# 2.2 - Topaz Fix and minor Mac code fix -# 2.3 - More Topaz fixes -# 2.4 - K4PC/Mac key generation fix -# 2.6 - Better handling of non-K4PC/Mac ebooks -# 2.7 - Better trailing bytes handling in mobidedrm -# 2.8 - Moved parsing of kindle.info files to mac & pc util files. -# 3.1 - Updated for new calibre interface. Now __init__ in plugin. -# 3.5 - Now support Kindle for PC/Mac 1.6 -# 3.6 - Even better trailing bytes handling in mobidedrm -# 3.7 - Add support for Amazon Print Replica ebooks. -# 3.8 - Improved Topaz support -# 4.1 - Improved Topaz support and faster decryption with alfcrypto -# 4.2 - Added support for Amazon's KF8 format ebooks -# 4.4 - Linux calls to Wine added, and improved configuration dialog -# 4.5 - Linux works again without Wine. Some Mac key file search changes -# 4.6 - First attempt to handle unicode properly -# 4.7 - Added timing reports, and changed search for Mac key files -# 4.8 - Much better unicode handling, matching the updated inept and ignoble scripts -# - Moved back into plugin, __init__ in plugin now only contains plugin code. -# 4.9 - Missed some invalid characters in cleanup_name -# 5.0 - Extraction of info from Kindle for PC/Mac moved into kindlekey.py -# - tweaked GetDecryptedBook interface to leave passed parameters unchanged -# 5.1 - moved unicode_argv call inside main for Windows DeDRM compatibility -# 5.2 - Fixed error in command line processing of unicode arguments - -__version__ = '5.2' - - -import sys, os, re -import csv -import getopt -import re -import traceback -import time -import htmlentitydefs -import json +# ineptpdf.pyw, version 7.11 +# Copyright © 2009-2010 by i♥cabbages -class DrmException(Exception): - pass +# Released under the terms of the GNU General Public Licence, version 3 +# + +# Modified 2010–2012 by some_updates, DiapDealer and Apprentice Alf + +# Windows users: Before running this program, you must first install Python 2.6 +# from and PyCrypto from +# (make sure to +# install the version for Python 2.6). Save this script file as +# ineptpdf.pyw and double-click on it to run it. +# +# Mac OS X users: Save this script file as ineptpdf.pyw. You can run this +# program from the command line (pythonw ineptpdf.pyw) or by double-clicking +# it when it has been associated with PythonLauncher. -if 'calibre' in sys.modules: - inCalibre = True -else: - inCalibre = False +# Revision history: +# 1 - Initial release +# 2 - Improved determination of key-generation algorithm +# 3 - Correctly handle PDF >=1.5 cross-reference streams +# 4 - Removal of ciando's personal ID +# 5 - Automated decryption of a complete directory +# 6.1 - backward compatibility for 1.7.1 and old adeptkey.der +# 7 - Get cross reference streams and object streams working for input. +# Not yet supported on output but this only effects file size, +# not functionality. (anon2) +# 7.1 - Correct a problem when an old trailer is not followed by startxref +# 7.2 - Correct malformed Mac OS resource forks for Stanza (anon2) +# - Support for cross ref streams on output (decreases file size) +# 7.3 - Correct bug in trailer with cross ref stream that caused the error +# "The root object is missing or invalid" in Adobe Reader. (anon2) +# 7.4 - Force all generation numbers in output file to be 0, like in v6. +# Fallback code for wrong xref improved (search till last trailer +# instead of first) (anon2) +# 7.5 - allow support for OpenSSL to replace pycrypto on all platforms +# implemented ARC4 interface to OpenSSL +# fixed minor typos +# 7.6 - backported AES and other fixes from version 8.4.48 +# 7.7 - On Windows try PyCrypto first and OpenSSL next +# 7.8 - Modify interface to allow use of import +# 7.9 - Bug fix for some session key errors when len(bookkey) > length required +# 7.10 - Various tweaks to fix minor problems. +# 7.11 - More tweaks to fix minor problems. +# 7.12 - Revised to allow use in calibre plugins to eliminate need for duplicate code +# 7.13 - Fixed erroneous mentions of ineptepub +# 7.14 - moved unicode_argv call inside main for Windows DeDRM compatibility +# 8.0 - Work if TkInter is missing +# 8.0.1 - Broken Metadata fix. -if inCalibre: - from calibre_plugins.dedrm import mobidedrm - from calibre_plugins.dedrm import topazextract - from calibre_plugins.dedrm import kgenpids -else: - import mobidedrm - import topazextract - import kgenpids +""" +Decrypts Adobe ADEPT-encrypted PDF files. +""" + +__license__ = 'GPL v3' +__version__ = "8.0.1" + +import sys +import os +import re +import zlib +import struct +import hashlib +from itertools import chain, islice +import xml.etree.ElementTree as etree # Wrap a stream so that output gets flushed immediately # and also make sure that any unicode strings get @@ -134,192 +119,2217 @@ def unicode_argv(): start = argc.value - len(sys.argv) return [argv[i] for i in xrange(start, argc.value)] - # if we don't have any arguments at all, just pass back script name - # this should never happen - return [u"mobidedrm.py"] + return [u"ineptpdf.py"] else: argvencoding = sys.stdin.encoding if argvencoding == None: argvencoding = "utf-8" return [arg if (type(arg) == unicode) else unicode(arg,argvencoding) for arg in sys.argv] -# cleanup unicode filenames -# borrowed from calibre from calibre/src/calibre/__init__.py -# added in removal of control (<32) chars -# and removal of . at start and end -# and with some (heavily edited) code from Paul Durrant's kindlenamer.py -def cleanup_name(name): - # substitute filename unfriendly characters - name = name.replace(u"<",u"[").replace(u">",u"]").replace(u" : ",u" – ").replace(u": ",u" – ").replace(u":",u"—").replace(u"/",u"_").replace(u"\\",u"_").replace(u"|",u"_").replace(u"\"",u"\'").replace(u"*",u"_").replace(u"?",u"") - # delete control characters - name = u"".join(char for char in name if ord(char)>=32) - # white space to single space, delete leading and trailing while space - name = re.sub(ur"\s", u" ", name).strip() - # remove leading dots - while len(name)>0 and name[0] == u".": - name = name[1:] - # remove trailing dots (Windows doesn't like them) - if name.endswith(u'.'): - name = name[:-1] - return name - -# must be passed unicode -def unescape(text): - def fixup(m): - text = m.group(0) - if text[:2] == u"&#": - # character reference - try: - if text[:3] == u"&#x": - return unichr(int(text[3:-1], 16)) + +class ADEPTError(Exception): + pass + + +import hashlib + +def SHA256(message): + ctx = hashlib.sha256() + ctx.update(message) + return ctx.digest() + + +def _load_crypto_libcrypto(): + from ctypes import CDLL, POINTER, c_void_p, c_char_p, c_int, c_long, \ + Structure, c_ulong, create_string_buffer, cast + from ctypes.util import find_library + + if sys.platform.startswith('win'): + libcrypto = find_library('libeay32') + else: + libcrypto = find_library('crypto') + + if libcrypto is None: + raise ADEPTError('libcrypto not found') + libcrypto = CDLL(libcrypto) + + AES_MAXNR = 14 + + RSA_NO_PADDING = 3 + + c_char_pp = POINTER(c_char_p) + c_int_p = POINTER(c_int) + + class AES_KEY(Structure): + _fields_ = [('rd_key', c_long * (4 * (AES_MAXNR + 1))), ('rounds', c_int)] + AES_KEY_p = POINTER(AES_KEY) + + class RC4_KEY(Structure): + _fields_ = [('x', c_int), ('y', c_int), ('box', c_int * 256)] + RC4_KEY_p = POINTER(RC4_KEY) + + class RSA(Structure): + pass + RSA_p = POINTER(RSA) + + def F(restype, name, argtypes): + func = getattr(libcrypto, name) + func.restype = restype + func.argtypes = argtypes + return func + + AES_cbc_encrypt = F(None, 'AES_cbc_encrypt',[c_char_p, c_char_p, c_ulong, AES_KEY_p, c_char_p,c_int]) + AES_set_decrypt_key = F(c_int, 'AES_set_decrypt_key',[c_char_p, c_int, AES_KEY_p]) + + RC4_set_key = F(None,'RC4_set_key',[RC4_KEY_p, c_int, c_char_p]) + RC4_crypt = F(None,'RC4',[RC4_KEY_p, c_int, c_char_p, c_char_p]) + + d2i_RSAPrivateKey = F(RSA_p, 'd2i_RSAPrivateKey', + [RSA_p, c_char_pp, c_long]) + RSA_size = F(c_int, 'RSA_size', [RSA_p]) + RSA_private_decrypt = F(c_int, 'RSA_private_decrypt', + [c_int, c_char_p, c_char_p, RSA_p, c_int]) + RSA_free = F(None, 'RSA_free', [RSA_p]) + + class RSA(object): + def __init__(self, der): + buf = create_string_buffer(der) + pp = c_char_pp(cast(buf, c_char_p)) + rsa = self._rsa = d2i_RSAPrivateKey(None, pp, len(der)) + if rsa is None: + raise ADEPTError('Error parsing ADEPT user key DER') + + def decrypt(self, from_): + rsa = self._rsa + to = create_string_buffer(RSA_size(rsa)) + dlen = RSA_private_decrypt(len(from_), from_, to, rsa, + RSA_NO_PADDING) + if dlen < 0: + raise ADEPTError('RSA decryption failed') + return to[1:dlen] + + def __del__(self): + if self._rsa is not None: + RSA_free(self._rsa) + self._rsa = None + + class ARC4(object): + @classmethod + def new(cls, userkey): + self = ARC4() + self._blocksize = len(userkey) + key = self._key = RC4_KEY() + RC4_set_key(key, self._blocksize, userkey) + return self + def __init__(self): + self._blocksize = 0 + self._key = None + def decrypt(self, data): + out = create_string_buffer(len(data)) + RC4_crypt(self._key, len(data), data, out) + return out.raw + + class AES(object): + MODE_CBC = 0 + @classmethod + def new(cls, userkey, mode, iv): + self = AES() + self._blocksize = len(userkey) + # mode is ignored since CBCMODE is only thing supported/used so far + self._mode = mode + if (self._blocksize != 16) and (self._blocksize != 24) and (self._blocksize != 32) : + raise ADEPTError('AES improper key used') + return + keyctx = self._keyctx = AES_KEY() + self._iv = iv + rv = AES_set_decrypt_key(userkey, len(userkey) * 8, keyctx) + if rv < 0: + raise ADEPTError('Failed to initialize AES key') + return self + def __init__(self): + self._blocksize = 0 + self._keyctx = None + self._iv = 0 + self._mode = 0 + def decrypt(self, data): + out = create_string_buffer(len(data)) + rv = AES_cbc_encrypt(data, out, len(data), self._keyctx, self._iv, 0) + if rv == 0: + raise ADEPTError('AES decryption failed') + return out.raw + + return (ARC4, RSA, AES) + + +def _load_crypto_pycrypto(): + from Crypto.PublicKey import RSA as _RSA + from Crypto.Cipher import ARC4 as _ARC4 + from Crypto.Cipher import AES as _AES + + # ASN.1 parsing code from tlslite + class ASN1Error(Exception): + pass + + class ASN1Parser(object): + class Parser(object): + def __init__(self, bytes): + self.bytes = bytes + self.index = 0 + + def get(self, length): + if self.index + length > len(self.bytes): + raise ASN1Error("Error decoding ASN.1") + x = 0 + for count in range(length): + x <<= 8 + x |= self.bytes[self.index] + self.index += 1 + return x + + def getFixBytes(self, lengthBytes): + bytes = self.bytes[self.index : self.index+lengthBytes] + self.index += lengthBytes + return bytes + + def getVarBytes(self, lengthLength): + lengthBytes = self.get(lengthLength) + return self.getFixBytes(lengthBytes) + + def getFixList(self, length, lengthList): + l = [0] * lengthList + for x in range(lengthList): + l[x] = self.get(length) + return l + + def getVarList(self, length, lengthLength): + lengthList = self.get(lengthLength) + if lengthList % length != 0: + raise ASN1Error("Error decoding ASN.1") + lengthList = int(lengthList/length) + l = [0] * lengthList + for x in range(lengthList): + l[x] = self.get(length) + return l + + def startLengthCheck(self, lengthLength): + self.lengthCheck = self.get(lengthLength) + self.indexCheck = self.index + + def setLengthCheck(self, length): + self.lengthCheck = length + self.indexCheck = self.index + + def stopLengthCheck(self): + if (self.index - self.indexCheck) != self.lengthCheck: + raise ASN1Error("Error decoding ASN.1") + + def atLengthCheck(self): + if (self.index - self.indexCheck) < self.lengthCheck: + return False + elif (self.index - self.indexCheck) == self.lengthCheck: + return True else: - return unichr(int(text[2:-1])) + raise ASN1Error("Error decoding ASN.1") + + def __init__(self, bytes): + p = self.Parser(bytes) + p.get(1) + self.length = self._getASN1Length(p) + self.value = p.getFixBytes(self.length) + + def getChild(self, which): + p = self.Parser(self.value) + for x in range(which+1): + markIndex = p.index + p.get(1) + length = self._getASN1Length(p) + p.getFixBytes(length) + return ASN1Parser(p.bytes[markIndex:p.index]) + + def _getASN1Length(self, p): + firstLength = p.get(1) + if firstLength<=127: + return firstLength + else: + lengthLength = firstLength & 0x7F + return p.get(lengthLength) + + class ARC4(object): + @classmethod + def new(cls, userkey): + self = ARC4() + self._arc4 = _ARC4.new(userkey) + return self + def __init__(self): + self._arc4 = None + def decrypt(self, data): + return self._arc4.decrypt(data) + + class AES(object): + MODE_CBC = _AES.MODE_CBC + @classmethod + def new(cls, userkey, mode, iv): + self = AES() + self._aes = _AES.new(userkey, mode, iv) + return self + def __init__(self): + self._aes = None + def decrypt(self, data): + return self._aes.decrypt(data) + + class RSA(object): + def __init__(self, der): + key = ASN1Parser([ord(x) for x in der]) + key = [key.getChild(x).value for x in xrange(1, 4)] + key = [self.bytesToNumber(v) for v in key] + self._rsa = _RSA.construct(key) + + def bytesToNumber(self, bytes): + total = 0L + for byte in bytes: + total = (total << 8) + byte + return total + + def decrypt(self, data): + return self._rsa.decrypt(data) + + return (ARC4, RSA, AES) + +def _load_crypto(): + ARC4 = RSA = AES = None + cryptolist = (_load_crypto_libcrypto, _load_crypto_pycrypto) + if sys.platform.startswith('win'): + cryptolist = (_load_crypto_pycrypto, _load_crypto_libcrypto) + for loader in cryptolist: + try: + ARC4, RSA, AES = loader() + break + except (ImportError, ADEPTError): + pass + return (ARC4, RSA, AES) +ARC4, RSA, AES = _load_crypto() + + +try: + from cStringIO import StringIO +except ImportError: + from StringIO import StringIO + + +# Do we generate cross reference streams on output? +# 0 = never +# 1 = only if present in input +# 2 = always + +GEN_XREF_STM = 1 + +# This is the value for the current document +gen_xref_stm = False # will be set in PDFSerializer + +# PDF parsing routines from pdfminer, with changes for EBX_HANDLER + +# Utilities + +def choplist(n, seq): + '''Groups every n elements of the list.''' + r = [] + for x in seq: + r.append(x) + if len(r) == n: + yield tuple(r) + r = [] + return + +def nunpack(s, default=0): + '''Unpacks up to 4 bytes big endian.''' + l = len(s) + if not l: + return default + elif l == 1: + return ord(s) + elif l == 2: + return struct.unpack('>H', s)[0] + elif l == 3: + return struct.unpack('>L', '\x00'+s)[0] + elif l == 4: + return struct.unpack('>L', s)[0] + else: + return TypeError('invalid length: %d' % l) + + +STRICT = 0 + + +# PS Exceptions + +class PSException(Exception): pass +class PSEOF(PSException): pass +class PSSyntaxError(PSException): pass +class PSTypeError(PSException): pass +class PSValueError(PSException): pass + + +# Basic PostScript Types + + +# PSLiteral +class PSObject(object): pass + +class PSLiteral(PSObject): + ''' + PS literals (e.g. "/Name"). + Caution: Never create these objects directly. + Use PSLiteralTable.intern() instead. + ''' + def __init__(self, name): + self.name = name + return + + def __repr__(self): + name = [] + for char in self.name: + if not char.isalnum(): + char = '#%02x' % ord(char) + name.append(char) + return '/%s' % ''.join(name) + +# PSKeyword +class PSKeyword(PSObject): + ''' + PS keywords (e.g. "showpage"). + Caution: Never create these objects directly. + Use PSKeywordTable.intern() instead. + ''' + def __init__(self, name): + self.name = name + return + + def __repr__(self): + return self.name + +# PSSymbolTable +class PSSymbolTable(object): + + ''' + Symbol table that stores PSLiteral or PSKeyword. + ''' + + def __init__(self, classe): + self.dic = {} + self.classe = classe + return + + def intern(self, name): + if name in self.dic: + lit = self.dic[name] + else: + lit = self.classe(name) + self.dic[name] = lit + return lit + +PSLiteralTable = PSSymbolTable(PSLiteral) +PSKeywordTable = PSSymbolTable(PSKeyword) +LIT = PSLiteralTable.intern +KWD = PSKeywordTable.intern +KEYWORD_BRACE_BEGIN = KWD('{') +KEYWORD_BRACE_END = KWD('}') +KEYWORD_ARRAY_BEGIN = KWD('[') +KEYWORD_ARRAY_END = KWD(']') +KEYWORD_DICT_BEGIN = KWD('<<') +KEYWORD_DICT_END = KWD('>>') + + +def literal_name(x): + if not isinstance(x, PSLiteral): + if STRICT: + raise PSTypeError('Literal required: %r' % x) + else: + return str(x) + return x.name + +def keyword_name(x): + if not isinstance(x, PSKeyword): + if STRICT: + raise PSTypeError('Keyword required: %r' % x) + else: + return str(x) + return x.name + + +## PSBaseParser +## +EOL = re.compile(r'[\r\n]') +SPC = re.compile(r'\s') +NONSPC = re.compile(r'\S') +HEX = re.compile(r'[0-9a-fA-F]') +END_LITERAL = re.compile(r'[#/%\[\]()<>{}\s]') +END_HEX_STRING = re.compile(r'[^\s0-9a-fA-F]') +HEX_PAIR = re.compile(r'[0-9a-fA-F]{2}|.') +END_NUMBER = re.compile(r'[^0-9]') +END_KEYWORD = re.compile(r'[#/%\[\]()<>{}\s]') +END_STRING = re.compile(r'[()\134]') +OCT_STRING = re.compile(r'[0-7]') +ESC_STRING = { 'b':8, 't':9, 'n':10, 'f':12, 'r':13, '(':40, ')':41, '\\':92 } + +class PSBaseParser(object): + + ''' + Most basic PostScript parser that performs only basic tokenization. + ''' + BUFSIZ = 4096 + + def __init__(self, fp): + self.fp = fp + self.seek(0) + return + + def __repr__(self): + return '' % (self.fp, self.bufpos) + + def flush(self): + return + + def close(self): + self.flush() + return + + def tell(self): + return self.bufpos+self.charpos + + def poll(self, pos=None, n=80): + pos0 = self.fp.tell() + if not pos: + pos = self.bufpos+self.charpos + self.fp.seek(pos) + ##print >>sys.stderr, 'poll(%d): %r' % (pos, self.fp.read(n)) + self.fp.seek(pos0) + return + + def seek(self, pos): + ''' + Seeks the parser to the given position. + ''' + self.fp.seek(pos) + # reset the status for nextline() + self.bufpos = pos + self.buf = '' + self.charpos = 0 + # reset the status for nexttoken() + self.parse1 = self.parse_main + self.tokens = [] + return + + def fillbuf(self): + if self.charpos < len(self.buf): return + # fetch next chunk. + self.bufpos = self.fp.tell() + self.buf = self.fp.read(self.BUFSIZ) + if not self.buf: + raise PSEOF('Unexpected EOF') + self.charpos = 0 + return + + def parse_main(self, s, i): + m = NONSPC.search(s, i) + if not m: + return (self.parse_main, len(s)) + j = m.start(0) + c = s[j] + self.tokenstart = self.bufpos+j + if c == '%': + self.token = '%' + return (self.parse_comment, j+1) + if c == '/': + self.token = '' + return (self.parse_literal, j+1) + if c in '-+' or c.isdigit(): + self.token = c + return (self.parse_number, j+1) + if c == '.': + self.token = c + return (self.parse_float, j+1) + if c.isalpha(): + self.token = c + return (self.parse_keyword, j+1) + if c == '(': + self.token = '' + self.paren = 1 + return (self.parse_string, j+1) + if c == '<': + self.token = '' + return (self.parse_wopen, j+1) + if c == '>': + self.token = '' + return (self.parse_wclose, j+1) + self.add_token(KWD(c)) + return (self.parse_main, j+1) + + def add_token(self, obj): + self.tokens.append((self.tokenstart, obj)) + return + + def parse_comment(self, s, i): + m = EOL.search(s, i) + if not m: + self.token += s[i:] + return (self.parse_comment, len(s)) + j = m.start(0) + self.token += s[i:j] + # We ignore comments. + #self.tokens.append(self.token) + return (self.parse_main, j) + + def parse_literal(self, s, i): + m = END_LITERAL.search(s, i) + if not m: + self.token += s[i:] + return (self.parse_literal, len(s)) + j = m.start(0) + self.token += s[i:j] + c = s[j] + if c == '#': + self.hex = '' + return (self.parse_literal_hex, j+1) + self.add_token(LIT(self.token)) + return (self.parse_main, j) + + def parse_literal_hex(self, s, i): + c = s[i] + if HEX.match(c) and len(self.hex) < 2: + self.hex += c + return (self.parse_literal_hex, i+1) + if self.hex: + self.token += chr(int(self.hex, 16)) + return (self.parse_literal, i) + + def parse_number(self, s, i): + m = END_NUMBER.search(s, i) + if not m: + self.token += s[i:] + return (self.parse_number, len(s)) + j = m.start(0) + self.token += s[i:j] + c = s[j] + if c == '.': + self.token += c + return (self.parse_float, j+1) + try: + self.add_token(int(self.token)) + except ValueError: + pass + return (self.parse_main, j) + def parse_float(self, s, i): + m = END_NUMBER.search(s, i) + if not m: + self.token += s[i:] + return (self.parse_float, len(s)) + j = m.start(0) + self.token += s[i:j] + self.add_token(float(self.token)) + return (self.parse_main, j) + + def parse_keyword(self, s, i): + m = END_KEYWORD.search(s, i) + if not m: + self.token += s[i:] + return (self.parse_keyword, len(s)) + j = m.start(0) + self.token += s[i:j] + if self.token == 'true': + token = True + elif self.token == 'false': + token = False + else: + token = KWD(self.token) + self.add_token(token) + return (self.parse_main, j) + + def parse_string(self, s, i): + m = END_STRING.search(s, i) + if not m: + self.token += s[i:] + return (self.parse_string, len(s)) + j = m.start(0) + self.token += s[i:j] + c = s[j] + if c == '\\': + self.oct = '' + return (self.parse_string_1, j+1) + if c == '(': + self.paren += 1 + self.token += c + return (self.parse_string, j+1) + if c == ')': + self.paren -= 1 + if self.paren: + self.token += c + return (self.parse_string, j+1) + self.add_token(self.token) + return (self.parse_main, j+1) + def parse_string_1(self, s, i): + c = s[i] + if OCT_STRING.match(c) and len(self.oct) < 3: + self.oct += c + return (self.parse_string_1, i+1) + if self.oct: + self.token += chr(int(self.oct, 8)) + return (self.parse_string, i) + if c in ESC_STRING: + self.token += chr(ESC_STRING[c]) + return (self.parse_string, i+1) + + def parse_wopen(self, s, i): + c = s[i] + if c.isspace() or HEX.match(c): + return (self.parse_hexstring, i) + if c == '<': + self.add_token(KEYWORD_DICT_BEGIN) + i += 1 + return (self.parse_main, i) + + def parse_wclose(self, s, i): + c = s[i] + if c == '>': + self.add_token(KEYWORD_DICT_END) + i += 1 + return (self.parse_main, i) + + def parse_hexstring(self, s, i): + m = END_HEX_STRING.search(s, i) + if not m: + self.token += s[i:] + return (self.parse_hexstring, len(s)) + j = m.start(0) + self.token += s[i:j] + token = HEX_PAIR.sub(lambda m: chr(int(m.group(0), 16)), + SPC.sub('', self.token)) + self.add_token(token) + return (self.parse_main, j) + + def nexttoken(self): + while not self.tokens: + self.fillbuf() + (self.parse1, self.charpos) = self.parse1(self.buf, self.charpos) + token = self.tokens.pop(0) + return token + + def nextline(self): + ''' + Fetches a next line that ends either with \\r or \\n. + ''' + linebuf = '' + linepos = self.bufpos + self.charpos + eol = False + while 1: + self.fillbuf() + if eol: + c = self.buf[self.charpos] + # handle '\r\n' + if c == '\n': + linebuf += c + self.charpos += 1 + break + m = EOL.search(self.buf, self.charpos) + if m: + linebuf += self.buf[self.charpos:m.end(0)] + self.charpos = m.end(0) + if linebuf[-1] == '\r': + eol = True + else: + break + else: + linebuf += self.buf[self.charpos:] + self.charpos = len(self.buf) + return (linepos, linebuf) + + def revreadlines(self): + ''' + Fetches a next line backword. This is used to locate + the trailers at the end of a file. + ''' + self.fp.seek(0, 2) + pos = self.fp.tell() + buf = '' + while 0 < pos: + prevpos = pos + pos = max(0, pos-self.BUFSIZ) + self.fp.seek(pos) + s = self.fp.read(prevpos-pos) + if not s: break + while 1: + n = max(s.rfind('\r'), s.rfind('\n')) + if n == -1: + buf = s + buf + break + yield s[n:]+buf + s = s[:n] + buf = '' + return + + +## PSStackParser +## +class PSStackParser(PSBaseParser): + + def __init__(self, fp): + PSBaseParser.__init__(self, fp) + self.reset() + return + + def reset(self): + self.context = [] + self.curtype = None + self.curstack = [] + self.results = [] + return + + def seek(self, pos): + PSBaseParser.seek(self, pos) + self.reset() + return + + def push(self, *objs): + self.curstack.extend(objs) + return + def pop(self, n): + objs = self.curstack[-n:] + self.curstack[-n:] = [] + return objs + def popall(self): + objs = self.curstack + self.curstack = [] + return objs + def add_results(self, *objs): + self.results.extend(objs) + return + + def start_type(self, pos, type): + self.context.append((pos, self.curtype, self.curstack)) + (self.curtype, self.curstack) = (type, []) + return + def end_type(self, type): + if self.curtype != type: + raise PSTypeError('Type mismatch: %r != %r' % (self.curtype, type)) + objs = [ obj for (_,obj) in self.curstack ] + (pos, self.curtype, self.curstack) = self.context.pop() + return (pos, objs) + + def do_keyword(self, pos, token): + return + + def nextobject(self, direct=False): + ''' + Yields a list of objects: keywords, literals, strings, + numbers, arrays and dictionaries. Arrays and dictionaries + are represented as Python sequence and dictionaries. + ''' + while not self.results: + (pos, token) = self.nexttoken() + ##print (pos,token), (self.curtype, self.curstack) + if (isinstance(token, int) or + isinstance(token, float) or + isinstance(token, bool) or + isinstance(token, str) or + isinstance(token, PSLiteral)): + # normal token + self.push((pos, token)) + elif token == KEYWORD_ARRAY_BEGIN: + # begin array + self.start_type(pos, 'a') + elif token == KEYWORD_ARRAY_END: + # end array + try: + self.push(self.end_type('a')) + except PSTypeError: + if STRICT: raise + elif token == KEYWORD_DICT_BEGIN: + # begin dictionary + self.start_type(pos, 'd') + elif token == KEYWORD_DICT_END: + # end dictionary + try: + (pos, objs) = self.end_type('d') + if len(objs) % 2 != 0: + print "Incomplete dictionary construct" + objs.append("") # this isn't necessary. + # temporary fix. is this due to rental books? + # raise PSSyntaxError( + # 'Invalid dictionary construct: %r' % objs) + d = dict((literal_name(k), v) \ + for (k,v) in choplist(2, objs)) + self.push((pos, d)) + except PSTypeError: + if STRICT: raise + else: + self.do_keyword(pos, token) + if self.context: + continue + else: + if direct: + return self.pop(1)[0] + self.flush() + obj = self.results.pop(0) + return obj + + +LITERAL_CRYPT = PSLiteralTable.intern('Crypt') +LITERALS_FLATE_DECODE = (PSLiteralTable.intern('FlateDecode'), PSLiteralTable.intern('Fl')) +LITERALS_LZW_DECODE = (PSLiteralTable.intern('LZWDecode'), PSLiteralTable.intern('LZW')) +LITERALS_ASCII85_DECODE = (PSLiteralTable.intern('ASCII85Decode'), PSLiteralTable.intern('A85')) + + +## PDF Objects +## +class PDFObject(PSObject): pass + +class PDFException(PSException): pass +class PDFTypeError(PDFException): pass +class PDFValueError(PDFException): pass +class PDFNotImplementedError(PSException): pass + + +## PDFObjRef +## +class PDFObjRef(PDFObject): + + def __init__(self, doc, objid, genno): + if objid == 0: + if STRICT: + raise PDFValueError('PDF object id cannot be 0.') + self.doc = doc + self.objid = objid + self.genno = genno + return + + def __repr__(self): + return '' % (self.objid, self.genno) + + def resolve(self): + return self.doc.getobj(self.objid) + + +# resolve +def resolve1(x): + ''' + Resolve an object. If this is an array or dictionary, + it may still contains some indirect objects inside. + ''' + while isinstance(x, PDFObjRef): + x = x.resolve() + return x + +def resolve_all(x): + ''' + Recursively resolve X and all the internals. + Make sure there is no indirect reference within the nested object. + This procedure might be slow. + ''' + while isinstance(x, PDFObjRef): + x = x.resolve() + if isinstance(x, list): + x = [ resolve_all(v) for v in x ] + elif isinstance(x, dict): + for (k,v) in x.iteritems(): + x[k] = resolve_all(v) + return x + +def decipher_all(decipher, objid, genno, x): + ''' + Recursively decipher X. + ''' + if isinstance(x, str): + return decipher(objid, genno, x) + decf = lambda v: decipher_all(decipher, objid, genno, v) + if isinstance(x, list): + x = [decf(v) for v in x] + elif isinstance(x, dict): + x = dict((k, decf(v)) for (k, v) in x.iteritems()) + return x + + +# Type cheking +def int_value(x): + x = resolve1(x) + if not isinstance(x, int): + if STRICT: + raise PDFTypeError('Integer required: %r' % x) + return 0 + return x + +def float_value(x): + x = resolve1(x) + if not isinstance(x, float): + if STRICT: + raise PDFTypeError('Float required: %r' % x) + return 0.0 + return x + +def num_value(x): + x = resolve1(x) + if not (isinstance(x, int) or isinstance(x, float)): + if STRICT: + raise PDFTypeError('Int or Float required: %r' % x) + return 0 + return x + +def str_value(x): + x = resolve1(x) + if not isinstance(x, str): + if STRICT: + raise PDFTypeError('String required: %r' % x) + return '' + return x + +def list_value(x): + x = resolve1(x) + if not (isinstance(x, list) or isinstance(x, tuple)): + if STRICT: + raise PDFTypeError('List required: %r' % x) + return [] + return x + +def dict_value(x): + x = resolve1(x) + if not isinstance(x, dict): + if STRICT: + raise PDFTypeError('Dict required: %r' % x) + return {} + return x + +def stream_value(x): + x = resolve1(x) + if not isinstance(x, PDFStream): + if STRICT: + raise PDFTypeError('PDFStream required: %r' % x) + return PDFStream({}, '') + return x + +# ascii85decode(data) +def ascii85decode(data): + n = b = 0 + out = '' + for c in data: + if '!' <= c and c <= 'u': + n += 1 + b = b*85+(ord(c)-33) + if n == 5: + out += struct.pack('>L',b) + n = b = 0 + elif c == 'z': + assert n == 0 + out += '\0\0\0\0' + elif c == '~': + if n: + for _ in range(5-n): + b = b*85+84 + out += struct.pack('>L',b)[:n-1] + break + return out + + +## PDFStream type +class PDFStream(PDFObject): + def __init__(self, dic, rawdata, decipher=None): + length = int_value(dic.get('Length', 0)) + eol = rawdata[length:] + # quick and dirty fix for false length attribute, + # might not work if the pdf stream parser has a problem + if decipher != None and decipher.__name__ == 'decrypt_aes': + if (len(rawdata) % 16) != 0: + cutdiv = len(rawdata) // 16 + rawdata = rawdata[:16*cutdiv] + else: + if eol in ('\r', '\n', '\r\n'): + rawdata = rawdata[:length] + + self.dic = dic + self.rawdata = rawdata + self.decipher = decipher + self.data = None + self.decdata = None + self.objid = None + self.genno = None + return + + def set_objid(self, objid, genno): + self.objid = objid + self.genno = genno + return + + def __repr__(self): + if self.rawdata: + return '' % \ + (self.objid, len(self.rawdata), self.dic) + else: + return '' % \ + (self.objid, len(self.data), self.dic) + + def decode(self): + assert self.data is None and self.rawdata is not None + data = self.rawdata + if self.decipher: + # Handle encryption + data = self.decipher(self.objid, self.genno, data) + if gen_xref_stm: + self.decdata = data # keep decrypted data + if 'Filter' not in self.dic: + self.data = data + self.rawdata = None + ##print self.dict + return + filters = self.dic['Filter'] + if not isinstance(filters, list): + filters = [ filters ] + for f in filters: + if f in LITERALS_FLATE_DECODE: + # will get errors if the document is encrypted. + data = zlib.decompress(data) + elif f in LITERALS_LZW_DECODE: + data = ''.join(LZWDecoder(StringIO(data)).run()) + elif f in LITERALS_ASCII85_DECODE: + data = ascii85decode(data) + elif f == LITERAL_CRYPT: + raise PDFNotImplementedError('/Crypt filter is unsupported') + else: + raise PDFNotImplementedError('Unsupported filter: %r' % f) + # apply predictors + if 'DP' in self.dic: + params = self.dic['DP'] + else: + params = self.dic.get('DecodeParms', {}) + if 'Predictor' in params: + pred = int_value(params['Predictor']) + if pred: + if pred != 12: + raise PDFNotImplementedError( + 'Unsupported predictor: %r' % pred) + if 'Columns' not in params: + raise PDFValueError( + 'Columns undefined for predictor=12') + columns = int_value(params['Columns']) + buf = '' + ent0 = '\x00' * columns + for i in xrange(0, len(data), columns+1): + pred = data[i] + ent1 = data[i+1:i+1+columns] + if pred == '\x02': + ent1 = ''.join(chr((ord(a)+ord(b)) & 255) \ + for (a,b) in zip(ent0,ent1)) + buf += ent1 + ent0 = ent1 + data = buf + self.data = data + self.rawdata = None + return + + def get_data(self): + if self.data is None: + self.decode() + return self.data + + def get_rawdata(self): + return self.rawdata + + def get_decdata(self): + if self.decdata is not None: + return self.decdata + data = self.rawdata + if self.decipher and data: + # Handle encryption + data = self.decipher(self.objid, self.genno, data) + return data + + +## PDF Exceptions +## +class PDFSyntaxError(PDFException): pass +class PDFNoValidXRef(PDFSyntaxError): pass +class PDFEncryptionError(PDFException): pass +class PDFPasswordIncorrect(PDFEncryptionError): pass + +# some predefined literals and keywords. +LITERAL_OBJSTM = PSLiteralTable.intern('ObjStm') +LITERAL_XREF = PSLiteralTable.intern('XRef') +LITERAL_PAGE = PSLiteralTable.intern('Page') +LITERAL_PAGES = PSLiteralTable.intern('Pages') +LITERAL_CATALOG = PSLiteralTable.intern('Catalog') + + +## XRefs +## + +## PDFXRef +## +class PDFXRef(object): + + def __init__(self): + self.offsets = None + return + + def __repr__(self): + return '' % len(self.offsets) + + def objids(self): + return self.offsets.iterkeys() + + def load(self, parser): + self.offsets = {} + while 1: + try: + (pos, line) = parser.nextline() + except PSEOF: + raise PDFNoValidXRef('Unexpected EOF - file corrupted?') + if not line: + raise PDFNoValidXRef('Premature eof: %r' % parser) + if line.startswith('trailer'): + parser.seek(pos) + break + f = line.strip().split(' ') + if len(f) != 2: + raise PDFNoValidXRef('Trailer not found: %r: line=%r' % (parser, line)) + try: + (start, nobjs) = map(int, f) except ValueError: - pass + raise PDFNoValidXRef('Invalid line: %r: line=%r' % (parser, line)) + for objid in xrange(start, start+nobjs): + try: + (_, line) = parser.nextline() + except PSEOF: + raise PDFNoValidXRef('Unexpected EOF - file corrupted?') + f = line.strip().split(' ') + if len(f) != 3: + raise PDFNoValidXRef('Invalid XRef format: %r, line=%r' % (parser, line)) + (pos, genno, use) = f + if use != 'n': continue + self.offsets[objid] = (int(genno), int(pos)) + self.load_trailer(parser) + return + + KEYWORD_TRAILER = PSKeywordTable.intern('trailer') + def load_trailer(self, parser): + try: + (_,kwd) = parser.nexttoken() + assert kwd is self.KEYWORD_TRAILER + (_,dic) = parser.nextobject(direct=True) + except PSEOF: + x = parser.pop(1) + if not x: + raise PDFNoValidXRef('Unexpected EOF - file corrupted') + (_,dic) = x[0] + self.trailer = dict_value(dic) + return + + def getpos(self, objid): + try: + (genno, pos) = self.offsets[objid] + except KeyError: + raise + return (None, pos) + + +## PDFXRefStream +## +class PDFXRefStream(object): + + def __init__(self): + self.index = None + self.data = None + self.entlen = None + self.fl1 = self.fl2 = self.fl3 = None + return + + def __repr__(self): + return '' % self.index + + def objids(self): + for first, size in self.index: + for objid in xrange(first, first + size): + yield objid + + def load(self, parser, debug=0): + (_,objid) = parser.nexttoken() # ignored + (_,genno) = parser.nexttoken() # ignored + (_,kwd) = parser.nexttoken() + (_,stream) = parser.nextobject() + if not isinstance(stream, PDFStream) or \ + stream.dic['Type'] is not LITERAL_XREF: + raise PDFNoValidXRef('Invalid PDF stream spec.') + size = stream.dic['Size'] + index = stream.dic.get('Index', (0,size)) + self.index = zip(islice(index, 0, None, 2), + islice(index, 1, None, 2)) + (self.fl1, self.fl2, self.fl3) = stream.dic['W'] + self.data = stream.get_data() + self.entlen = self.fl1+self.fl2+self.fl3 + self.trailer = stream.dic + return + + def getpos(self, objid): + offset = 0 + for first, size in self.index: + if first <= objid and objid < (first + size): + break + offset += size + else: + raise KeyError(objid) + i = self.entlen * ((objid - first) + offset) + ent = self.data[i:i+self.entlen] + f1 = nunpack(ent[:self.fl1], 1) + if f1 == 1: + pos = nunpack(ent[self.fl1:self.fl1+self.fl2]) + genno = nunpack(ent[self.fl1+self.fl2:]) + return (None, pos) + elif f1 == 2: + objid = nunpack(ent[self.fl1:self.fl1+self.fl2]) + index = nunpack(ent[self.fl1+self.fl2:]) + return (objid, index) + # this is a free object + raise KeyError(objid) + + +## PDFDocument +## +## A PDFDocument object represents a PDF document. +## Since a PDF file is usually pretty big, normally it is not loaded +## at once. Rather it is parsed dynamically as processing goes. +## A PDF parser is associated with the document. +## +class PDFDocument(object): + + def __init__(self): + self.xrefs = [] + self.objs = {} + self.parsed_objs = {} + self.root = None + self.catalog = None + self.parser = None + self.encryption = None + self.decipher = None + return + + # set_parser(parser) + # Associates the document with an (already initialized) parser object. + def set_parser(self, parser): + if self.parser: return + self.parser = parser + # The document is set to be temporarily ready during collecting + # all the basic information about the document, e.g. + # the header, the encryption information, and the access rights + # for the document. + self.ready = True + # Retrieve the information of each header that was appended + # (maybe multiple times) at the end of the document. + self.xrefs = parser.read_xref() + for xref in self.xrefs: + trailer = xref.trailer + if not trailer: continue + + # If there's an encryption info, remember it. + if 'Encrypt' in trailer: + #assert not self.encryption + try: + self.encryption = (list_value(trailer['ID']), + dict_value(trailer['Encrypt'])) + # fix for bad files + except: + self.encryption = ('ffffffffffffffffffffffffffffffffffff', + dict_value(trailer['Encrypt'])) + if 'Root' in trailer: + self.set_root(dict_value(trailer['Root'])) + break + else: + raise PDFSyntaxError('No /Root object! - Is this really a PDF?') + # The document is set to be non-ready again, until all the + # proper initialization (asking the password key and + # verifying the access permission, so on) is finished. + self.ready = False + return + + # set_root(root) + # Set the Root dictionary of the document. + # Each PDF file must have exactly one /Root dictionary. + def set_root(self, root): + self.root = root + self.catalog = dict_value(self.root) + if self.catalog.get('Type') is not LITERAL_CATALOG: + if STRICT: + raise PDFSyntaxError('Catalog not found!') + return + # initialize(password='') + # Perform the initialization with a given password. + # This step is mandatory even if there's no password associated + # with the document. + def initialize(self, password=''): + if not self.encryption: + self.is_printable = self.is_modifiable = self.is_extractable = True + self.ready = True + return + (docid, param) = self.encryption + type = literal_name(param['Filter']) + if type == 'Adobe.APS': + return self.initialize_adobe_ps(password, docid, param) + if type == 'Standard': + return self.initialize_standard(password, docid, param) + if type == 'EBX_HANDLER': + return self.initialize_ebx(password, docid, param) + raise PDFEncryptionError('Unknown filter: param=%r' % param) + + def initialize_adobe_ps(self, password, docid, param): + global KEYFILEPATH + self.decrypt_key = self.genkey_adobe_ps(param) + self.genkey = self.genkey_v4 + self.decipher = self.decrypt_aes + self.ready = True + return + + def genkey_adobe_ps(self, param): + # nice little offline principal keys dictionary + # global static principal key for German Onleihe / Bibliothek Digital + principalkeys = { 'bibliothek-digital.de': 'rRwGv2tbpKov1krvv7PO0ws9S436/lArPlfipz5Pqhw='.decode('base64')} + self.is_printable = self.is_modifiable = self.is_extractable = True + length = int_value(param.get('Length', 0)) / 8 + edcdata = str_value(param.get('EDCData')).decode('base64') + pdrllic = str_value(param.get('PDRLLic')).decode('base64') + pdrlpol = str_value(param.get('PDRLPol')).decode('base64') + edclist = [] + for pair in edcdata.split('\n'): + edclist.append(pair) + # principal key request + for key in principalkeys: + if key in pdrllic: + principalkey = principalkeys[key] + else: + raise ADEPTError('Cannot find principal key for this pdf') + shakey = SHA256(principalkey) + ivector = 16 * chr(0) + plaintext = AES.new(shakey,AES.MODE_CBC,ivector).decrypt(edclist[9].decode('base64')) + if plaintext[-16:] != 16 * chr(16): + raise ADEPTError('Offlinekey cannot be decrypted, aborting ...') + pdrlpol = AES.new(plaintext[16:32],AES.MODE_CBC,edclist[2].decode('base64')).decrypt(pdrlpol) + if ord(pdrlpol[-1]) < 1 or ord(pdrlpol[-1]) > 16: + raise ADEPTError('Could not decrypt PDRLPol, aborting ...') + else: + cutter = -1 * ord(pdrlpol[-1]) + pdrlpol = pdrlpol[:cutter] + return plaintext[:16] + + PASSWORD_PADDING = '(\xbfN^Nu\x8aAd\x00NV\xff\xfa\x01\x08..' \ + '\x00\xb6\xd0h>\x80/\x0c\xa9\xfedSiz' + # experimental aes pw support + def initialize_standard(self, password, docid, param): + # copy from a global variable + V = int_value(param.get('V', 0)) + if (V <=0 or V > 4): + raise PDFEncryptionError('Unknown algorithm: param=%r' % param) + length = int_value(param.get('Length', 40)) # Key length (bits) + O = str_value(param['O']) + R = int_value(param['R']) # Revision + if 5 <= R: + raise PDFEncryptionError('Unknown revision: %r' % R) + U = str_value(param['U']) + P = int_value(param['P']) + try: + EncMetadata = str_value(param['EncryptMetadata']) + except: + EncMetadata = 'True' + self.is_printable = bool(P & 4) + self.is_modifiable = bool(P & 8) + self.is_extractable = bool(P & 16) + self.is_annotationable = bool(P & 32) + self.is_formsenabled = bool(P & 256) + self.is_textextractable = bool(P & 512) + self.is_assemblable = bool(P & 1024) + self.is_formprintable = bool(P & 2048) + # Algorithm 3.2 + password = (password+self.PASSWORD_PADDING)[:32] # 1 + hash = hashlib.md5(password) # 2 + hash.update(O) # 3 + hash.update(struct.pack('= 3: + # Algorithm 3.5 + hash = hashlib.md5(self.PASSWORD_PADDING) # 2 + hash.update(docid[0]) # 3 + x = ARC4.new(key).decrypt(hash.digest()[:16]) # 4 + for i in xrange(1,19+1): + k = ''.join( chr(ord(c) ^ i) for c in key ) + x = ARC4.new(k).decrypt(x) + u1 = x+x # 32bytes total + if R == 2: + is_authenticated = (u1 == U) + else: + is_authenticated = (u1[:16] == U[:16]) + if not is_authenticated: + raise ADEPTError('Password is not correct.') + self.decrypt_key = key + # genkey method + if V == 1 or V == 2: + self.genkey = self.genkey_v2 + elif V == 3: + self.genkey = self.genkey_v3 + elif V == 4: + self.genkey = self.genkey_v2 + #self.genkey = self.genkey_v3 if V == 3 else self.genkey_v2 + # rc4 + if V != 4: + self.decipher = self.decipher_rc4 # XXX may be AES + # aes + elif V == 4 and Length == 128: + elf.decipher = self.decipher_aes + elif V == 4 and Length == 256: + raise PDFNotImplementedError('AES256 encryption is currently unsupported') + self.ready = True + return + + def initialize_ebx(self, password, docid, param): + self.is_printable = self.is_modifiable = self.is_extractable = True + rsa = RSA(password) + length = int_value(param.get('Length', 0)) / 8 + rights = str_value(param.get('ADEPT_LICENSE')).decode('base64') + rights = zlib.decompress(rights, -15) + rights = etree.fromstring(rights) + expr = './/{http://ns.adobe.com/adept}encryptedKey' + bookkey = ''.join(rights.findtext(expr)).decode('base64') + bookkey = rsa.decrypt(bookkey) + if bookkey[0] != '\x02': + raise ADEPTError('error decrypting book session key') + index = bookkey.index('\0') + 1 + bookkey = bookkey[index:] + ebx_V = int_value(param.get('V', 4)) + ebx_type = int_value(param.get('EBX_ENCRYPTIONTYPE', 6)) + # added because of improper booktype / decryption book session key errors + if length > 0: + if len(bookkey) == length: + if ebx_V == 3: + V = 3 + else: + V = 2 + elif len(bookkey) == length + 1: + V = ord(bookkey[0]) + bookkey = bookkey[1:] + else: + print "ebx_V is %d and ebx_type is %d" % (ebx_V, ebx_type) + print "length is %d and len(bookkey) is %d" % (length, len(bookkey)) + print "bookkey[0] is %d" % ord(bookkey[0]) + raise ADEPTError('error decrypting book session key - mismatched length') + else: + # proper length unknown try with whatever you have + print "ebx_V is %d and ebx_type is %d" % (ebx_V, ebx_type) + print "length is %d and len(bookkey) is %d" % (length, len(bookkey)) + print "bookkey[0] is %d" % ord(bookkey[0]) + if ebx_V == 3: + V = 3 + else: + V = 2 + self.decrypt_key = bookkey + self.genkey = self.genkey_v3 if V == 3 else self.genkey_v2 + self.decipher = self.decrypt_rc4 + self.ready = True + return + + # genkey functions + def genkey_v2(self, objid, genno): + objid = struct.pack(' PDFObjStmRef.maxindex: + PDFObjStmRef.maxindex = index + + +## PDFParser +## +class PDFParser(PSStackParser): + + def __init__(self, doc, fp): + PSStackParser.__init__(self, fp) + self.doc = doc + self.doc.set_parser(self) + return + + def __repr__(self): + return '' + + KEYWORD_R = PSKeywordTable.intern('R') + KEYWORD_ENDOBJ = PSKeywordTable.intern('endobj') + KEYWORD_STREAM = PSKeywordTable.intern('stream') + KEYWORD_XREF = PSKeywordTable.intern('xref') + KEYWORD_STARTXREF = PSKeywordTable.intern('startxref') + def do_keyword(self, pos, token): + if token in (self.KEYWORD_XREF, self.KEYWORD_STARTXREF): + self.add_results(*self.pop(1)) + return + if token is self.KEYWORD_ENDOBJ: + self.add_results(*self.pop(4)) + return + + if token is self.KEYWORD_R: + # reference to indirect object try: - text = unichr(htmlentitydefs.name2codepoint[text[1:-1]]) - except KeyError: + ((_,objid), (_,genno)) = self.pop(2) + (objid, genno) = (int(objid), int(genno)) + obj = PDFObjRef(self.doc, objid, genno) + self.push((pos, obj)) + except PSSyntaxError: pass - return text # leave as is - return re.sub(u"&#?\w+;", fixup, text) + return -def GetDecryptedBook(infile, kDatabases, serials, pids, starttime = time.time()): - # handle the obvious cases at the beginning - if not os.path.isfile(infile): - raise DrmException(u"Input file does not exist.") + if token is self.KEYWORD_STREAM: + # stream object + ((_,dic),) = self.pop(1) + dic = dict_value(dic) + try: + objlen = int_value(dic['Length']) + except KeyError: + if STRICT: + raise PDFSyntaxError('/Length is undefined: %r' % dic) + objlen = 0 + self.seek(pos) + try: + (_, line) = self.nextline() # 'stream' + except PSEOF: + if STRICT: + raise PDFSyntaxError('Unexpected EOF') + return + pos += len(line) + self.fp.seek(pos) + data = self.fp.read(objlen) + self.seek(pos+objlen) + while 1: + try: + (linepos, line) = self.nextline() + except PSEOF: + if STRICT: + raise PDFSyntaxError('Unexpected EOF') + break + if 'endstream' in line: + i = line.index('endstream') + objlen += i + data += line[:i] + break + objlen += len(line) + data += line + self.seek(pos+objlen) + obj = PDFStream(dic, data, self.doc.decipher) + self.push((pos, obj)) + return - mobi = True - magic3 = open(infile,'rb').read(3) - if magic3 == 'TPZ': - mobi = False + # others + self.push((pos, token)) + return - if mobi: - mb = mobidedrm.MobiBook(infile) - else: - mb = topazextract.TopazBook(infile) + def find_xref(self): + # search the last xref table by scanning the file backwards. + prev = None + for line in self.revreadlines(): + line = line.strip() + if line == 'startxref': break + if line: + prev = line + else: + raise PDFNoValidXRef('Unexpected EOF') + return int(prev) - bookname = unescape(mb.getBookTitle()) - print u"Decrypting {1} ebook: {0}".format(bookname, mb.getBookType()) + # read xref table + def read_xref_from(self, start, xrefs): + self.seek(start) + self.reset() + try: + (pos, token) = self.nexttoken() + except PSEOF: + raise PDFNoValidXRef('Unexpected EOF') + if isinstance(token, int): + # XRefStream: PDF-1.5 + if GEN_XREF_STM == 1: + global gen_xref_stm + gen_xref_stm = True + self.seek(pos) + self.reset() + xref = PDFXRefStream() + xref.load(self) + else: + if token is not self.KEYWORD_XREF: + raise PDFNoValidXRef('xref not found: pos=%d, token=%r' % + (pos, token)) + self.nextline() + xref = PDFXRef() + xref.load(self) + xrefs.append(xref) + trailer = xref.trailer + if 'XRefStm' in trailer: + pos = int_value(trailer['XRefStm']) + self.read_xref_from(pos, xrefs) + if 'Prev' in trailer: + # find previous xref + pos = int_value(trailer['Prev']) + self.read_xref_from(pos, xrefs) + return - # copy list of pids - totalpids = list(pids) - # extend PID list with book-specific PIDs - md1, md2 = mb.getPIDMetaInfo() - totalpids.extend(kgenpids.getPidList(md1, md2, serials, kDatabases)) - print u"Found {1:d} keys to try after {0:.1f} seconds".format(time.time()-starttime, len(totalpids)) + # read xref tables and trailers + def read_xref(self): + xrefs = [] + trailerpos = None + try: + pos = self.find_xref() + self.read_xref_from(pos, xrefs) + except PDFNoValidXRef: + # fallback + self.seek(0) + pat = re.compile(r'^(\d+)\s+(\d+)\s+obj\b') + offsets = {} + xref = PDFXRef() + while 1: + try: + (pos, line) = self.nextline() + except PSEOF: + break + if line.startswith('trailer'): + trailerpos = pos # remember last trailer + m = pat.match(line) + if not m: continue + (objid, genno) = m.groups() + offsets[int(objid)] = (0, pos) + if not offsets: raise + xref.offsets = offsets + if trailerpos: + self.seek(trailerpos) + xref.load_trailer(self) + xrefs.append(xref) + return xrefs - try: - mb.processBook(totalpids) - except: - mb.cleanup - raise +## PDFObjStrmParser +## +class PDFObjStrmParser(PDFParser): - print u"Decryption succeeded after {0:.1f} seconds".format(time.time()-starttime) - return mb + def __init__(self, data, doc): + PSStackParser.__init__(self, StringIO(data)) + self.doc = doc + return + def flush(self): + self.add_results(*self.popall()) + return -# kDatabaseFiles is a list of files created by kindlekey -def decryptBook(infile, outdir, kDatabaseFiles, serials, pids): - starttime = time.time() - kDatabases = [] - for dbfile in kDatabaseFiles: - kindleDatabase = {} - try: - with open(dbfile, 'r') as keyfilein: - kindleDatabase = json.loads(keyfilein.read()) - kDatabases.append([dbfile,kindleDatabase]) - except Exception, e: - print u"Error getting database from file {0:s}: {1:s}".format(dbfile,e) - traceback.print_exc() + KEYWORD_R = KWD('R') + def do_keyword(self, pos, token): + if token is self.KEYWORD_R: + # reference to indirect object + try: + ((_,objid), (_,genno)) = self.pop(2) + (objid, genno) = (int(objid), int(genno)) + obj = PDFObjRef(self.doc, objid, genno) + self.push((pos, obj)) + except PSSyntaxError: + pass + return + # others + self.push((pos, token)) + return +### +### My own code, for which there is none else to blame +class PDFSerializer(object): + def __init__(self, inf, userkey): + global GEN_XREF_STM, gen_xref_stm + gen_xref_stm = GEN_XREF_STM > 1 + self.version = inf.read(8) + inf.seek(0) + self.doc = doc = PDFDocument() + parser = PDFParser(doc, inf) + doc.initialize(userkey) + self.objids = objids = set() + for xref in reversed(doc.xrefs): + trailer = xref.trailer + for objid in xref.objids(): + objids.add(objid) + trailer = dict(trailer) + trailer.pop('Prev', None) + trailer.pop('XRefStm', None) + if 'Encrypt' in trailer: + objids.remove(trailer.pop('Encrypt').objid) + self.trailer = trailer - try: - book = GetDecryptedBook(infile, kDatabases, serials, pids, starttime) - except Exception, e: - print u"Error decrypting book after {1:.1f} seconds: {0}".format(e.args[0],time.time()-starttime) - traceback.print_exc() - return 1 + def dump(self, outf): + self.outf = outf + self.write(self.version) + self.write('\n%\xe2\xe3\xcf\xd3\n') + doc = self.doc + objids = self.objids + xrefs = {} + maxobj = max(objids) + trailer = dict(self.trailer) + trailer['Size'] = maxobj + 1 + for objid in objids: + obj = doc.getobj(objid) + if isinstance(obj, PDFObjStmRef): + xrefs[objid] = obj + continue + if obj is not None: + try: + genno = obj.genno + except AttributeError: + genno = 0 + xrefs[objid] = (self.tell(), genno) + self.serialize_indirect(objid, obj) + startxref = self.tell() - # if we're saving to the same folder as the original, use file name_ - # if to a different folder, use book name - if os.path.normcase(os.path.normpath(outdir)) == os.path.normcase(os.path.normpath(os.path.dirname(infile))): - outfilename = os.path.splitext(os.path.basename(infile))[0] - else: - outfilename = cleanup_name(book.getBookTitle()) + if not gen_xref_stm: + self.write('xref\n') + self.write('0 %d\n' % (maxobj + 1,)) + for objid in xrange(0, maxobj + 1): + if objid in xrefs: + # force the genno to be 0 + self.write("%010d 00000 n \n" % xrefs[objid][0]) + else: + self.write("%010d %05d f \n" % (0, 65535)) - # avoid excessively long file names - if len(outfilename)>150: - outfilename = outfilename[:150] + self.write('trailer\n') + self.serialize_object(trailer) + self.write('\nstartxref\n%d\n%%%%EOF' % startxref) - outfilename = outfilename+u"_nodrm" - outfile = os.path.join(outdir, outfilename + book.getBookExtension()) + else: # Generate crossref stream. - book.getFile(outfile) - print u"Saved decrypted book {1:s} after {0:.1f} seconds".format(time.time()-starttime, outfilename) + # Calculate size of entries + maxoffset = max(startxref, maxobj) + maxindex = PDFObjStmRef.maxindex + fl2 = 2 + power = 65536 + while maxoffset >= power: + fl2 += 1 + power *= 256 + fl3 = 1 + power = 256 + while maxindex >= power: + fl3 += 1 + power *= 256 - if book.getBookType()==u"Topaz": - zipname = os.path.join(outdir, outfilename + u"_SVG.zip") - book.getSVGZip(zipname) - print u"Saved SVG ZIP Archive for {1:s} after {0:.1f} seconds".format(time.time()-starttime, outfilename) + index = [] + first = None + prev = None + data = [] + # Put the xrefstream's reference in itself + startxref = self.tell() + maxobj += 1 + xrefs[maxobj] = (startxref, 0) + for objid in sorted(xrefs): + if first is None: + first = objid + elif objid != prev + 1: + index.extend((first, prev - first + 1)) + first = objid + prev = objid + objref = xrefs[objid] + if isinstance(objref, PDFObjStmRef): + f1 = 2 + f2 = objref.stmid + f3 = objref.index + else: + f1 = 1 + f2 = objref[0] + # we force all generation numbers to be 0 + # f3 = objref[1] + f3 = 0 - # remove internal temporary directory of Topaz pieces - book.cleanup() - return 0 + data.append(struct.pack('>B', f1)) + data.append(struct.pack('>L', f2)[-fl2:]) + data.append(struct.pack('>L', f3)[-fl3:]) + index.extend((first, prev - first + 1)) + data = zlib.compress(''.join(data)) + dic = {'Type': LITERAL_XREF, 'Size': prev + 1, 'Index': index, + 'W': [1, fl2, fl3], 'Length': len(data), + 'Filter': LITERALS_FLATE_DECODE[0], + 'Root': trailer['Root'],} + if 'Info' in trailer: + dic['Info'] = trailer['Info'] + xrefstm = PDFStream(dic, data) + self.serialize_indirect(maxobj, xrefstm) + self.write('startxref\n%d\n%%%%EOF' % startxref) + def write(self, data): + self.outf.write(data) + self.last = data[-1:] + def tell(self): + return self.outf.tell() + + def escape_string(self, string): + string = string.replace('\\', '\\\\') + string = string.replace('\n', r'\n') + string = string.replace('(', r'\(') + string = string.replace(')', r'\)') + # get rid of ciando id + regularexp = re.compile(r'http://www.ciando.com/index.cfm/intRefererID/\d{5}') + if regularexp.match(string): return ('http://www.ciando.com') + return string + + def serialize_object(self, obj): + if isinstance(obj, dict): + # Correct malformed Mac OS resource forks for Stanza + if 'ResFork' in obj and 'Type' in obj and 'Subtype' not in obj \ + and isinstance(obj['Type'], int): + obj['Subtype'] = obj['Type'] + del obj['Type'] + # end - hope this doesn't have bad effects + self.write('<<') + for key, val in obj.items(): + self.write('/%s' % key) + self.serialize_object(val) + self.write('>>') + elif isinstance(obj, list): + self.write('[') + for val in obj: + self.serialize_object(val) + self.write(']') + elif isinstance(obj, str): + self.write('(%s)' % self.escape_string(obj)) + elif isinstance(obj, bool): + if self.last.isalnum(): + self.write(' ') + self.write(str(obj).lower()) + elif isinstance(obj, (int, long, float)): + if self.last.isalnum(): + self.write(' ') + self.write(str(obj)) + elif isinstance(obj, PDFObjRef): + if self.last.isalnum(): + self.write(' ') + self.write('%d %d R' % (obj.objid, 0)) + elif isinstance(obj, PDFStream): + ### If we don't generate cross ref streams the object streams + ### are no longer useful, as we have extracted all objects from + ### them. Therefore leave them out from the output. + if obj.dic.get('Type') == LITERAL_OBJSTM and not gen_xref_stm: + self.write('(deleted)') + else: + data = obj.get_decdata() + self.serialize_object(obj.dic) + self.write('stream\n') + self.write(data) + self.write('\nendstream') + else: + data = str(obj) + if data[0].isalnum() and self.last.isalnum(): + self.write(' ') + self.write(data) + + def serialize_indirect(self, objid, obj): + self.write('%d 0 obj' % (objid,)) + self.serialize_object(obj) + if self.last.isalnum(): + self.write('\n') + self.write('endobj\n') + + + + +def decryptBook(userkey, inpath, outpath): + if RSA is None: + raise ADEPTError(u"PyCrypto or OpenSSL must be installed.") + with open(inpath, 'rb') as inf: + try: + serializer = PDFSerializer(inf, userkey) + except: + print u"Error serializing pdf {0}. Probably wrong key.".format(os.path.basename(inpath)) + return 2 + # hope this will fix the 'bad file descriptor' problem + with open(outpath, 'wb') as outf: + # help construct to make sure the method runs to the end + try: + serializer.dump(outf) + except Exception, e: + print u"error writing pdf: {0}".format(e.args[0]) + return 2 + return 0 -def usage(progname): - print u"Removes DRM protection from Mobipocket, Amazon KF8, Amazon Print Replica and Amazon Topaz ebooks" - print u"Usage:" - print u" {0} [-k ] [-p ] [-s ] ".format(progname) -# -# Main -# def cli_main(): + sys.stdout=SafeUnbuffered(sys.stdout) + sys.stderr=SafeUnbuffered(sys.stderr) argv=unicode_argv() progname = os.path.basename(argv[0]) - print u"K4MobiDeDrm v{0}.\nCopyright © 2008-2013 The Dark Reverser et al.".format(__version__) + if len(argv) != 4: + print u"usage: {0} ".format(progname) + return 1 + keypath, inpath, outpath = argv[1:] + userkey = open(keypath,'rb').read() + result = decryptBook(userkey, inpath, outpath) + if result == 0: + print u"Successfully decrypted {0:s} as {1:s}".format(os.path.basename(inpath),os.path.basename(outpath)) + return result + +def gui_main(): try: - opts, args = getopt.getopt(argv[1:], "k:p:s:") - except getopt.GetoptError, err: - print u"Error in options or arguments: {0}".format(err.args[0]) - usage(progname) - sys.exit(2) - if len(args)<2: - usage(progname) - sys.exit(2) - - infile = args[0] - outdir = args[1] - kDatabaseFiles = [] - serials = [] - pids = [] - - for o, a in opts: - if o == "-k": - if a == None : - raise DrmException("Invalid parameter for -k") - kDatabaseFiles.append(a) - if o == "-p": - if a == None : - raise DrmException("Invalid parameter for -p") - pids = a.split(',') - if o == "-s": - if a == None : - raise DrmException("Invalid parameter for -s") - serials = a.split(',') - - # try with built in Kindle Info files if not on Linux - k4 = not sys.platform.startswith('linux') - - return decryptBook(infile, outdir, kDatabaseFiles, serials, pids) + import Tkinter + import Tkconstants + import tkMessageBox + import traceback + except: + return cli_main() + + class DecryptionDialog(Tkinter.Frame): + def __init__(self, root): + Tkinter.Frame.__init__(self, root, border=5) + self.status = Tkinter.Label(self, text=u"Select files for decryption") + self.status.pack(fill=Tkconstants.X, expand=1) + body = Tkinter.Frame(self) + body.pack(fill=Tkconstants.X, expand=1) + sticky = Tkconstants.E + Tkconstants.W + body.grid_columnconfigure(1, weight=2) + Tkinter.Label(body, text=u"Key file").grid(row=0) + self.keypath = Tkinter.Entry(body, width=30) + self.keypath.grid(row=0, column=1, sticky=sticky) + if os.path.exists(u"adeptkey.der"): + self.keypath.insert(0, u"adeptkey.der") + button = Tkinter.Button(body, text=u"...", command=self.get_keypath) + button.grid(row=0, column=2) + Tkinter.Label(body, text=u"Input file").grid(row=1) + self.inpath = Tkinter.Entry(body, width=30) + self.inpath.grid(row=1, column=1, sticky=sticky) + button = Tkinter.Button(body, text=u"...", command=self.get_inpath) + button.grid(row=1, column=2) + Tkinter.Label(body, text=u"Output file").grid(row=2) + self.outpath = Tkinter.Entry(body, width=30) + self.outpath.grid(row=2, column=1, sticky=sticky) + button = Tkinter.Button(body, text=u"...", command=self.get_outpath) + button.grid(row=2, column=2) + buttons = Tkinter.Frame(self) + buttons.pack() + botton = Tkinter.Button( + buttons, text=u"Decrypt", width=10, command=self.decrypt) + botton.pack(side=Tkconstants.LEFT) + Tkinter.Frame(buttons, width=10).pack(side=Tkconstants.LEFT) + button = Tkinter.Button( + buttons, text=u"Quit", width=10, command=self.quit) + button.pack(side=Tkconstants.RIGHT) + + def get_keypath(self): + keypath = tkFileDialog.askopenfilename( + parent=None, title=u"Select Adobe Adept \'.der\' key file", + defaultextension=u".der", + filetypes=[('Adobe Adept DER-encoded files', '.der'), + ('All Files', '.*')]) + if keypath: + keypath = os.path.normpath(keypath) + self.keypath.delete(0, Tkconstants.END) + self.keypath.insert(0, keypath) + return + + def get_inpath(self): + inpath = tkFileDialog.askopenfilename( + parent=None, title=u"Select ADEPT-encrypted PDF file to decrypt", + defaultextension=u".pdf", filetypes=[('PDF files', '.pdf')]) + if inpath: + inpath = os.path.normpath(inpath) + self.inpath.delete(0, Tkconstants.END) + self.inpath.insert(0, inpath) + return + + def get_outpath(self): + outpath = tkFileDialog.asksaveasfilename( + parent=None, title=u"Select unencrypted PDF file to produce", + defaultextension=u".pdf", filetypes=[('PDF files', '.pdf')]) + if outpath: + outpath = os.path.normpath(outpath) + self.outpath.delete(0, Tkconstants.END) + self.outpath.insert(0, outpath) + return + + def decrypt(self): + keypath = self.keypath.get() + inpath = self.inpath.get() + outpath = self.outpath.get() + if not keypath or not os.path.exists(keypath): + self.status['text'] = u"Specified key file does not exist" + return + if not inpath or not os.path.exists(inpath): + self.status['text'] = u"Specified input file does not exist" + return + if not outpath: + self.status['text'] = u"Output file not specified" + return + if inpath == outpath: + self.status['text'] = u"Must have different input and output files" + return + userkey = open(keypath,'rb').read() + self.status['text'] = u"Decrypting..." + try: + decrypt_status = decryptBook(userkey, inpath, outpath) + except Exception, e: + self.status['text'] = u"Error; {0}".format(e.args[0]) + return + if decrypt_status == 0: + self.status['text'] = u"File successfully decrypted" + else: + self.status['text'] = u"The was an error decrypting the file." + + + root = Tkinter.Tk() + if RSA is None: + root.withdraw() + tkMessageBox.showerror( + "INEPT PDF", + "This script requires OpenSSL or PyCrypto, which must be installed " + "separately. Read the top-of-script comment for details.") + return 1 + root.title(u"Adobe Adept PDF Decrypter v.{0}".format(__version__)) + root.resizable(True, False) + root.minsize(370, 0) + DecryptionDialog(root).pack(fill=Tkconstants.X, expand=1) + root.mainloop() + return 0 if __name__ == '__main__': - sys.stdout=SafeUnbuffered(sys.stdout) - sys.stderr=SafeUnbuffered(sys.stderr) - sys.exit(cli_main()) + if len(sys.argv) > 1: + sys.exit(cli_main()) + sys.exit(gui_main()) diff --git a/DeDRM_calibre_plugin/DeDRM_plugin/k4mobidedrm.py b/DeDRM_calibre_plugin/DeDRM_plugin/k4mobidedrm.py index dd88797..504105b 100644 --- a/DeDRM_calibre_plugin/DeDRM_plugin/k4mobidedrm.py +++ b/DeDRM_calibre_plugin/DeDRM_plugin/k4mobidedrm.py @@ -2,266 +2,331 @@ # -*- coding: utf-8 -*- from __future__ import with_statement -import sys -import os, csv -import binascii -import zlib + +# ignobleepub.pyw, version 3.6 +# Copyright © 2009-2012 by DiapDealer et al. + +# engine to remove drm from Kindle for Mac and Kindle for PC books +# for personal use for archiving and converting your ebooks + +# PLEASE DO NOT PIRATE EBOOKS! + +# We want all authors and publishers, and eBook stores to live +# long and prosperous lives but at the same time we just want to +# be able to read OUR books on whatever device we want and to keep +# readable for a long, long time + +# This borrows very heavily from works by CMBDTC, IHeartCabbages, skindle, +# unswindle, DarkReverser, ApprenticeAlf, DiapDealer, some_updates +# and many many others +# Special thanks to The Dark Reverser for MobiDeDrm and CMBDTC for cmbdtc_dump +# from which this script borrows most unashamedly. + + +# Changelog +# 1.0 - Name change to k4mobidedrm. Adds Mac support, Adds plugin code +# 1.1 - Adds support for additional kindle.info files +# 1.2 - Better error handling for older Mobipocket +# 1.3 - Don't try to decrypt Topaz books +# 1.7 - Add support for Topaz books and Kindle serial numbers. Split code. +# 1.9 - Tidy up after Topaz, minor exception changes +# 2.1 - Topaz fix and filename sanitizing +# 2.2 - Topaz Fix and minor Mac code fix +# 2.3 - More Topaz fixes +# 2.4 - K4PC/Mac key generation fix +# 2.6 - Better handling of non-K4PC/Mac ebooks +# 2.7 - Better trailing bytes handling in mobidedrm +# 2.8 - Moved parsing of kindle.info files to mac & pc util files. +# 3.1 - Updated for new calibre interface. Now __init__ in plugin. +# 3.5 - Now support Kindle for PC/Mac 1.6 +# 3.6 - Even better trailing bytes handling in mobidedrm +# 3.7 - Add support for Amazon Print Replica ebooks. +# 3.8 - Improved Topaz support +# 4.1 - Improved Topaz support and faster decryption with alfcrypto +# 4.2 - Added support for Amazon's KF8 format ebooks +# 4.4 - Linux calls to Wine added, and improved configuration dialog +# 4.5 - Linux works again without Wine. Some Mac key file search changes +# 4.6 - First attempt to handle unicode properly +# 4.7 - Added timing reports, and changed search for Mac key files +# 4.8 - Much better unicode handling, matching the updated inept and ignoble scripts +# - Moved back into plugin, __init__ in plugin now only contains plugin code. +# 4.9 - Missed some invalid characters in cleanup_name +# 5.0 - Extraction of info from Kindle for PC/Mac moved into kindlekey.py +# - tweaked GetDecryptedBook interface to leave passed parameters unchanged +# 5.1 - moved unicode_argv call inside main for Windows DeDRM compatibility +# 5.2 - Fixed error in command line processing of unicode arguments + +__version__ = '5.2' + + +import sys, os, re +import csv +import getopt import re -from struct import pack, unpack, unpack_from import traceback +import time +import htmlentitydefs +import json class DrmException(Exception): pass -global charMap1 -global charMap3 -global charMap4 - - -charMap1 = 'n5Pr6St7Uv8Wx9YzAb0Cd1Ef2Gh3Jk4M' -charMap3 = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/' -charMap4 = 'ABCDEFGHIJKLMNPQRSTUVWXYZ123456789' - -# crypto digestroutines -import hashlib - -def MD5(message): - ctx = hashlib.md5() - ctx.update(message) - return ctx.digest() - -def SHA1(message): - ctx = hashlib.sha1() - ctx.update(message) - return ctx.digest() - - -# Encode the bytes in data with the characters in map -def encode(data, map): - result = '' - for char in data: - value = ord(char) - Q = (value ^ 0x80) // len(map) - R = value % len(map) - result += map[Q] - result += map[R] - return result - -# Hash the bytes in data and then encode the digest with the characters in map -def encodeHash(data,map): - return encode(MD5(data),map) - -# Decode the string in data with the characters in map. Returns the decoded bytes -def decode(data,map): - result = '' - for i in range (0,len(data)-1,2): - high = map.find(data[i]) - low = map.find(data[i+1]) - if (high == -1) or (low == -1) : - break - value = (((high * len(map)) ^ 0x80) & 0xFF) + low - result += pack('B',value) - return result - -# -# PID generation routines -# - -# Returns two bit at offset from a bit field -def getTwoBitsFromBitField(bitField,offset): - byteNumber = offset // 4 - bitPosition = 6 - 2*(offset % 4) - return ord(bitField[byteNumber]) >> bitPosition & 3 - -# Returns the six bits at offset from a bit field -def getSixBitsFromBitField(bitField,offset): - offset *= 3 - value = (getTwoBitsFromBitField(bitField,offset) <<4) + (getTwoBitsFromBitField(bitField,offset+1) << 2) +getTwoBitsFromBitField(bitField,offset+2) - return value - -# 8 bits to six bits encoding from hash to generate PID string -def encodePID(hash): - global charMap3 - PID = '' - for position in range (0,8): - PID += charMap3[getSixBitsFromBitField(hash,position)] - return PID - -# Encryption table used to generate the device PID -def generatePidEncryptionTable() : - table = [] - for counter1 in range (0,0x100): - value = counter1 - for counter2 in range (0,8): - if (value & 1 == 0) : - value = value >> 1 - else : - value = value >> 1 - value = value ^ 0xEDB88320 - table.append(value) - return table - -# Seed value used to generate the device PID -def generatePidSeed(table,dsn) : - value = 0 - for counter in range (0,4) : - index = (ord(dsn[counter]) ^ value) &0xFF - value = (value >> 8) ^ table[index] - return value - -# Generate the device PID -def generateDevicePID(table,dsn,nbRoll): - global charMap4 - seed = generatePidSeed(table,dsn) - pidAscii = '' - pid = [(seed >>24) &0xFF,(seed >> 16) &0xff,(seed >> 8) &0xFF,(seed) & 0xFF,(seed>>24) & 0xFF,(seed >> 16) &0xff,(seed >> 8) &0xFF,(seed) & 0xFF] - index = 0 - for counter in range (0,nbRoll): - pid[index] = pid[index] ^ ord(dsn[counter]) - index = (index+1) %8 - for counter in range (0,8): - index = ((((pid[counter] >>5) & 3) ^ pid[counter]) & 0x1f) + (pid[counter] >> 7) - pidAscii += charMap4[index] - return pidAscii - -def crc32(s): - return (~binascii.crc32(s,-1))&0xFFFFFFFF - -# convert from 8 digit PID to 10 digit PID with checksum -def checksumPid(s): - global charMap4 - crc = crc32(s) - crc = crc ^ (crc >> 16) - res = s - l = len(charMap4) - for i in (0,1): - b = crc & 0xff - pos = (b // l) ^ (b % l) - res += charMap4[pos%l] - crc >>= 8 - return res - - -# old kindle serial number to fixed pid -def pidFromSerial(s, l): - global charMap4 - crc = crc32(s) - arr1 = [0]*l - for i in xrange(len(s)): - arr1[i%l] ^= ord(s[i]) - crc_bytes = [crc >> 24 & 0xff, crc >> 16 & 0xff, crc >> 8 & 0xff, crc & 0xff] - for i in xrange(l): - arr1[i] ^= crc_bytes[i&3] - pid = "" - for i in xrange(l): - b = arr1[i] & 0xff - pid+=charMap4[(b >> 7) + ((b >> 5 & 3) ^ (b & 0x1f))] - return pid - - -# Parse the EXTH header records and use the Kindle serial number to calculate the book pid. -def getKindlePids(rec209, token, serialnum): - pids=[] - - if isinstance(serialnum,unicode): - serialnum = serialnum.encode('ascii') - - # Compute book PID - pidHash = SHA1(serialnum+rec209+token) - bookPID = encodePID(pidHash) - bookPID = checksumPid(bookPID) - pids.append(bookPID) - - # compute fixed pid for old pre 2.5 firmware update pid as well - kindlePID = pidFromSerial(serialnum, 7) + "*" - kindlePID = checksumPid(kindlePID) - pids.append(kindlePID) - - return pids - - -# parse the Kindleinfo file to calculate the book pid. - -keynames = ['kindle.account.tokens','kindle.cookie.item','eulaVersionAccepted','login_date','kindle.token.item','login','kindle.key.item','kindle.name.info','kindle.device.info', 'MazamaRandomNumber'] - -def getK4Pids(rec209, token, kindleDatabase): - global charMap1 - pids = [] +if 'calibre' in sys.modules: + inCalibre = True +else: + inCalibre = False + +if inCalibre: + from calibre_plugins.dedrm import mobidedrm + from calibre_plugins.dedrm import topazextract + from calibre_plugins.dedrm import kgenpids + from calibre_plugins.dedrm import android +else: + import mobidedrm + import topazextract + import kgenpids + import android + +# Wrap a stream so that output gets flushed immediately +# and also make sure that any unicode strings get +# encoded using "replace" before writing them. +class SafeUnbuffered: + def __init__(self, stream): + self.stream = stream + self.encoding = stream.encoding + if self.encoding == None: + self.encoding = "utf-8" + def write(self, data): + if isinstance(data,unicode): + data = data.encode(self.encoding,"replace") + self.stream.write(data) + self.stream.flush() + def __getattr__(self, attr): + return getattr(self.stream, attr) + +iswindows = sys.platform.startswith('win') +isosx = sys.platform.startswith('darwin') + +def unicode_argv(): + if iswindows: + # Uses shell32.GetCommandLineArgvW to get sys.argv as a list of Unicode + # strings. + + # Versions 2.x of Python don't support Unicode in sys.argv on + # Windows, with the underlying Windows API instead replacing multi-byte + # characters with '?'. + + + from ctypes import POINTER, byref, cdll, c_int, windll + from ctypes.wintypes import LPCWSTR, LPWSTR + + GetCommandLineW = cdll.kernel32.GetCommandLineW + GetCommandLineW.argtypes = [] + GetCommandLineW.restype = LPCWSTR + + CommandLineToArgvW = windll.shell32.CommandLineToArgvW + CommandLineToArgvW.argtypes = [LPCWSTR, POINTER(c_int)] + CommandLineToArgvW.restype = POINTER(LPWSTR) + + cmd = GetCommandLineW() + argc = c_int(0) + argv = CommandLineToArgvW(cmd, byref(argc)) + if argc.value > 0: + # Remove Python executable and commands if present + start = argc.value - len(sys.argv) + return [argv[i] for i in + xrange(start, argc.value)] + # if we don't have any arguments at all, just pass back script name + # this should never happen + return [u"mobidedrm.py"] + else: + argvencoding = sys.stdin.encoding + if argvencoding == None: + argvencoding = "utf-8" + return [arg if (type(arg) == unicode) else unicode(arg,argvencoding) for arg in sys.argv] + +# cleanup unicode filenames +# borrowed from calibre from calibre/src/calibre/__init__.py +# added in removal of control (<32) chars +# and removal of . at start and end +# and with some (heavily edited) code from Paul Durrant's kindlenamer.py +def cleanup_name(name): + # substitute filename unfriendly characters + name = name.replace(u"<",u"[").replace(u">",u"]").replace(u" : ",u" – ").replace(u": ",u" – ").replace(u":",u"—").replace(u"/",u"_").replace(u"\\",u"_").replace(u"|",u"_").replace(u"\"",u"\'").replace(u"*",u"_").replace(u"?",u"") + # delete control characters + name = u"".join(char for char in name if ord(char)>=32) + # white space to single space, delete leading and trailing while space + name = re.sub(ur"\s", u" ", name).strip() + # remove leading dots + while len(name)>0 and name[0] == u".": + name = name[1:] + # remove trailing dots (Windows doesn't like them) + if name.endswith(u'.'): + name = name[:-1] + return name + +# must be passed unicode +def unescape(text): + def fixup(m): + text = m.group(0) + if text[:2] == u"&#": + # character reference + try: + if text[:3] == u"&#x": + return unichr(int(text[3:-1], 16)) + else: + return unichr(int(text[2:-1])) + except ValueError: + pass + else: + # named entity + try: + text = unichr(htmlentitydefs.name2codepoint[text[1:-1]]) + except KeyError: + pass + return text # leave as is + return re.sub(u"&#?\w+;", fixup, text) + +def GetDecryptedBook(infile, kDatabases, serials, pids, starttime = time.time()): + # handle the obvious cases at the beginning + if not os.path.isfile(infile): + raise DrmException(u"Input file does not exist.") + + mobi = True + magic3 = open(infile,'rb').read(3) + if magic3 == 'TPZ': + mobi = False + + if mobi: + mb = mobidedrm.MobiBook(infile) + else: + mb = topazextract.TopazBook(infile) + + bookname = unescape(mb.getBookTitle()) + print u"Decrypting {1} ebook: {0}".format(bookname, mb.getBookType()) + + # copy list of pids + totalpids = list(pids) + # extend PID list with book-specific PIDs + md1, md2 = mb.getPIDMetaInfo() + totalpids.extend(kgenpids.getPidList(md1, md2, serials, kDatabases)) + print u"Found {1:d} keys to try after {0:.1f} seconds".format(time.time()-starttime, len(totalpids)) try: - # Get the Mazama Random number - MazamaRandomNumber = (kindleDatabase[1])['MazamaRandomNumber'].decode('hex').encode('ascii') + mb.processBook(totalpids) + except: + mb.cleanup + raise - # Get the kindle account token - kindleAccountToken = (kindleDatabase[1])['kindle.account.tokens'].decode('hex').encode('ascii') + print u"Decryption succeeded after {0:.1f} seconds".format(time.time()-starttime) + return mb - # Get the IDString used to decode the Kindle Info file - IDString = (kindleDatabase[1])['IDString'].decode('hex').encode('ascii') - # Get the UserName stored when the Kindle Info file was decoded - UserName = (kindleDatabase[1])['UserName'].decode('hex').encode('ascii') - - except KeyError: - print u"Keys not found in the database {0}.".format(kindleDatabase[0]) - return pids +# kDatabaseFiles is a list of files created by kindlekey +def decryptBook(infile, outdir, kDatabaseFiles, serials, pids): + starttime = time.time() + kDatabases = [] + for dbfile in kDatabaseFiles: + kindleDatabase = {} + try: + with open(dbfile, 'r') as keyfilein: + kindleDatabase = json.loads(keyfilein.read()) + kDatabases.append([dbfile,kindleDatabase]) + except Exception, e: + print u"Error getting database from file {0:s}: {1:s}".format(dbfile,e) + traceback.print_exc() - # Get the ID string used - encodedIDString = encodeHash(IDString,charMap1) - # Get the current user name - encodedUsername = encodeHash(UserName,charMap1) - # concat, hash and encode to calculate the DSN - DSN = encode(SHA1(MazamaRandomNumber+encodedIDString+encodedUsername),charMap1) + try: + book = GetDecryptedBook(infile, kDatabases, serials, pids, starttime) + except Exception, e: + print u"Error decrypting book after {1:.1f} seconds: {0}".format(e.args[0],time.time()-starttime) + traceback.print_exc() + return 1 - # Compute the device PID (for which I can tell, is used for nothing). - table = generatePidEncryptionTable() - devicePID = generateDevicePID(table,DSN,4) - devicePID = checksumPid(devicePID) - pids.append(devicePID) + # if we're saving to the same folder as the original, use file name_ + # if to a different folder, use book name + if os.path.normcase(os.path.normpath(outdir)) == os.path.normcase(os.path.normpath(os.path.dirname(infile))): + outfilename = os.path.splitext(os.path.basename(infile))[0] + else: + outfilename = cleanup_name(book.getBookTitle()) - # Compute book PIDs + # avoid excessively long file names + if len(outfilename)>150: + outfilename = outfilename[:150] - # book pid - pidHash = SHA1(DSN+kindleAccountToken+rec209+token) - bookPID = encodePID(pidHash) - bookPID = checksumPid(bookPID) - pids.append(bookPID) + outfilename = outfilename+u"_nodrm" + outfile = os.path.join(outdir, outfilename + book.getBookExtension()) - # variant 1 - pidHash = SHA1(kindleAccountToken+rec209+token) - bookPID = encodePID(pidHash) - bookPID = checksumPid(bookPID) - pids.append(bookPID) + book.getFile(outfile) + print u"Saved decrypted book {1:s} after {0:.1f} seconds".format(time.time()-starttime, outfilename) - # variant 2 - pidHash = SHA1(DSN+rec209+token) - bookPID = encodePID(pidHash) - bookPID = checksumPid(bookPID) - pids.append(bookPID) + if book.getBookType()==u"Topaz": + zipname = os.path.join(outdir, outfilename + u"_SVG.zip") + book.getSVGZip(zipname) + print u"Saved SVG ZIP Archive for {1:s} after {0:.1f} seconds".format(time.time()-starttime, outfilename) - return pids + # remove internal temporary directory of Topaz pieces + book.cleanup() + return 0 -def getPidList(md1, md2, serials=[], kDatabases=[]): - pidlst = [] - if kDatabases is None: - kDatabases = [] - if serials is None: - serials = [] +def usage(progname): + print u"Removes DRM protection from Mobipocket, Amazon KF8, Amazon Print Replica and Amazon Topaz ebooks" + print u"Usage:" + print u" {0} [-k ] [-p ] [-s ] [ -a ] ".format(progname) - for kDatabase in kDatabases: - try: - pidlst.extend(getK4Pids(md1, md2, kDatabase)) - except Exception, e: - print u"Error getting PIDs from database {0}: {1}".format(kDatabase[0],e.args[0]) - traceback.print_exc() +# +# Main +# +def cli_main(): + argv=unicode_argv() + progname = os.path.basename(argv[0]) + print u"K4MobiDeDrm v{0}.\nCopyright © 2008-2013 The Dark Reverser et al.".format(__version__) - for serialnum in serials: - try: - pidlst.extend(getKindlePids(md1, md2, serialnum)) - except Exception, e: - print u"Error getting PIDs from serial number {0}: {1}".format(serialnum ,e.args[0]) - traceback.print_exc() + try: + opts, args = getopt.getopt(argv[1:], "k:p:s:a:") + except getopt.GetoptError, err: + print u"Error in options or arguments: {0}".format(err.args[0]) + usage(progname) + sys.exit(2) + if len(args)<2: + usage(progname) + sys.exit(2) + + infile = args[0] + outdir = args[1] + kDatabaseFiles = [] + serials = [] + pids = [] - return pidlst + for o, a in opts: + if o == "-k": + if a == None : + raise DrmException("Invalid parameter for -k") + kDatabaseFiles.append(a) + if o == "-p": + if a == None : + raise DrmException("Invalid parameter for -p") + pids = a.split(',') + if o == "-s": + if a == None : + raise DrmException("Invalid parameter for -s") + serials = a.split(',') + if o == '-a': + if a == None: + continue + serials.extend(android.get_serials(a)) + serials.extend(android.get_serials()) + + # try with built in Kindle Info files if not on Linux + k4 = not sys.platform.startswith('linux') + + return decryptBook(infile, outdir, kDatabaseFiles, serials, pids) + + +if __name__ == '__main__': + sys.stdout=SafeUnbuffered(sys.stdout) + sys.stderr=SafeUnbuffered(sys.stderr) + sys.exit(cli_main()) diff --git a/DeDRM_calibre_plugin/DeDRM_plugin/kgenpids.py b/DeDRM_calibre_plugin/DeDRM_plugin/kgenpids.py index f58e973..dd88797 100644 --- a/DeDRM_calibre_plugin/DeDRM_plugin/kgenpids.py +++ b/DeDRM_calibre_plugin/DeDRM_plugin/kgenpids.py @@ -2,102 +2,25 @@ # -*- coding: utf-8 -*- from __future__ import with_statement - -# kindlekey.py -# Copyright © 2010-2013 by some_updates and Apprentice Alf -# -# Currently requires alfcrypto.py which requires the alfcrypto library - -# Revision history: -# 1.0 - Kindle info file decryption, extracted from k4mobidedrm, etc. -# 1.1 - Added Tkinter to match adobekey.py -# 1.2 - Fixed testing of successful retrieval on Mac -# 1.3 - Added getkey interface for Windows DeDRM application -# Simplified some of the Kindle for Mac code. -# 1.4 - Remove dependency on alfcrypto -# 1.5 - moved unicode_argv call inside main for Windows DeDRM compatibility -# 1.6 - Fixed a problem getting the disk serial numbers -# 1.7 - Work if TkInter is missing -# 1.8 - Fixes for Kindle for Mac, and non-ascii in Windows user names - - -""" -Retrieve Kindle for PC/Mac user key. -""" - -__license__ = 'GPL v3' -__version__ = '1.8' - -import sys, os, re +import sys +import os, csv +import binascii +import zlib +import re from struct import pack, unpack, unpack_from -import json -import getopt - -# Routines common to Mac and PC +import traceback -# Wrap a stream so that output gets flushed immediately -# and also make sure that any unicode strings get -# encoded using "replace" before writing them. -class SafeUnbuffered: - def __init__(self, stream): - self.stream = stream - self.encoding = stream.encoding - if self.encoding == None: - self.encoding = "utf-8" - def write(self, data): - if isinstance(data,unicode): - data = data.encode(self.encoding,"replace") - self.stream.write(data) - self.stream.flush() - def __getattr__(self, attr): - return getattr(self.stream, attr) - -try: - from calibre.constants import iswindows, isosx -except: - iswindows = sys.platform.startswith('win') - isosx = sys.platform.startswith('darwin') - -def unicode_argv(): - if iswindows: - # Uses shell32.GetCommandLineArgvW to get sys.argv as a list of Unicode - # strings. - - # Versions 2.x of Python don't support Unicode in sys.argv on - # Windows, with the underlying Windows API instead replacing multi-byte - # characters with '?'. So use shell32.GetCommandLineArgvW to get sys.argv - # as a list of Unicode strings and encode them as utf-8 - - from ctypes import POINTER, byref, cdll, c_int, windll - from ctypes.wintypes import LPCWSTR, LPWSTR +class DrmException(Exception): + pass - GetCommandLineW = cdll.kernel32.GetCommandLineW - GetCommandLineW.argtypes = [] - GetCommandLineW.restype = LPCWSTR +global charMap1 +global charMap3 +global charMap4 - CommandLineToArgvW = windll.shell32.CommandLineToArgvW - CommandLineToArgvW.argtypes = [LPCWSTR, POINTER(c_int)] - CommandLineToArgvW.restype = POINTER(LPWSTR) - cmd = GetCommandLineW() - argc = c_int(0) - argv = CommandLineToArgvW(cmd, byref(argc)) - if argc.value > 0: - # Remove Python executable and commands if present - start = argc.value - len(sys.argv) - return [argv[i] for i in - xrange(start, argc.value)] - # if we don't have any arguments at all, just pass back script name - # this should never happen - return [u"kindlekey.py"] - else: - argvencoding = sys.stdin.encoding - if argvencoding == None: - argvencoding = "utf-8" - return [arg if (type(arg) == unicode) else unicode(arg,argvencoding) for arg in sys.argv] - -class DrmException(Exception): - pass +charMap1 = 'n5Pr6St7Uv8Wx9YzAb0Cd1Ef2Gh3Jk4M' +charMap3 = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/' +charMap4 = 'ABCDEFGHIJKLMNPQRSTUVWXYZ123456789' # crypto digestroutines import hashlib @@ -112,31 +35,6 @@ def SHA1(message): ctx.update(message) return ctx.digest() -def SHA256(message): - ctx = hashlib.sha256() - ctx.update(message) - return ctx.digest() - -# For K4M/PC 1.6.X and later -# generate table of prime number less than or equal to int n -def primes(n): - if n==2: return [2] - elif n<2: return [] - s=range(3,n+1,2) - mroot = n ** 0.5 - half=(n+1)/2-1 - i=0 - m=3 - while m <= mroot: - if s[i]: - j=(m*m-3)/2 - s[j]=0 - while j> bitPosition & 3 + +# Returns the six bits at offset from a bit field +def getSixBitsFromBitField(bitField,offset): + offset *= 3 + value = (getTwoBitsFromBitField(bitField,offset) <<4) + (getTwoBitsFromBitField(bitField,offset+1) << 2) +getTwoBitsFromBitField(bitField,offset+2) + return value + +# 8 bits to six bits encoding from hash to generate PID string +def encodePID(hash): + global charMap3 + PID = '' + for position in range (0,8): + PID += charMap3[getSixBitsFromBitField(hash,position)] + return PID + +# Encryption table used to generate the device PID +def generatePidEncryptionTable() : + table = [] + for counter1 in range (0,0x100): + value = counter1 + for counter2 in range (0,8): + if (value & 1 == 0) : + value = value >> 1 + else : + value = value >> 1 + value = value ^ 0xEDB88320 + table.append(value) + return table + +# Seed value used to generate the device PID +def generatePidSeed(table,dsn) : + value = 0 + for counter in range (0,4) : + index = (ord(dsn[counter]) ^ value) &0xFF + value = (value >> 8) ^ table[index] + return value + +# Generate the device PID +def generateDevicePID(table,dsn,nbRoll): + global charMap4 + seed = generatePidSeed(table,dsn) + pidAscii = '' + pid = [(seed >>24) &0xFF,(seed >> 16) &0xff,(seed >> 8) &0xFF,(seed) & 0xFF,(seed>>24) & 0xFF,(seed >> 16) &0xff,(seed >> 8) &0xFF,(seed) & 0xFF] + index = 0 + for counter in range (0,nbRoll): + pid[index] = pid[index] ^ ord(dsn[counter]) + index = (index+1) %8 + for counter in range (0,8): + index = ((((pid[counter] >>5) & 3) ^ pid[counter]) & 0x1f) + (pid[counter] >> 7) + pidAscii += charMap4[index] + return pidAscii + +def crc32(s): + return (~binascii.crc32(s,-1))&0xFFFFFFFF + +# convert from 8 digit PID to 10 digit PID with checksum +def checksumPid(s): + global charMap4 + crc = crc32(s) + crc = crc ^ (crc >> 16) + res = s + l = len(charMap4) + for i in (0,1): + b = crc & 0xff + pos = (b // l) ^ (b % l) + res += charMap4[pos%l] + crc >>= 8 + return res + + +# old kindle serial number to fixed pid +def pidFromSerial(s, l): + global charMap4 + crc = crc32(s) + arr1 = [0]*l + for i in xrange(len(s)): + arr1[i%l] ^= ord(s[i]) + crc_bytes = [crc >> 24 & 0xff, crc >> 16 & 0xff, crc >> 8 & 0xff, crc & 0xff] + for i in xrange(l): + arr1[i] ^= crc_bytes[i&3] + pid = "" + for i in xrange(l): + b = arr1[i] & 0xff + pid+=charMap4[(b >> 7) + ((b >> 5 & 3) ^ (b & 0x1f))] + return pid + + +# Parse the EXTH header records and use the Kindle serial number to calculate the book pid. +def getKindlePids(rec209, token, serialnum): + pids=[] + + if isinstance(serialnum,unicode): + serialnum = serialnum.encode('ascii') + + # Compute book PID + pidHash = SHA1(serialnum+rec209+token) + bookPID = encodePID(pidHash) + bookPID = checksumPid(bookPID) + pids.append(bookPID) + + # compute fixed pid for old pre 2.5 firmware update pid as well + kindlePID = pidFromSerial(serialnum, 7) + "*" + kindlePID = checksumPid(kindlePID) + pids.append(kindlePID) + + return pids + + +# parse the Kindleinfo file to calculate the book pid. + +keynames = ['kindle.account.tokens','kindle.cookie.item','eulaVersionAccepted','login_date','kindle.token.item','login','kindle.key.item','kindle.name.info','kindle.device.info', 'MazamaRandomNumber'] + +def getK4Pids(rec209, token, kindleDatabase): + global charMap1 + pids = [] try: - # try to get fast routines from alfcrypto - from alfcrypto import AES_CBC, KeyIVGen - except: - # alfcrypto not available, so use python implementations - """ - Routines for doing AES CBC in one file - - Modified by some_updates to extract - and combine only those parts needed for AES CBC - into one simple to add python file - - Original Version - Copyright (c) 2002 by Paul A. Lambert - Under: - CryptoPy Artisitic License Version 1.0 - See the wonderful pure python package cryptopy-1.2.5 - and read its LICENSE.txt for complete license details. - """ - - class CryptoError(Exception): - """ Base class for crypto exceptions """ - def __init__(self,errorMessage='Error!'): - self.message = errorMessage - def __str__(self): - return self.message - - class InitCryptoError(CryptoError): - """ Crypto errors during algorithm initialization """ - class BadKeySizeError(InitCryptoError): - """ Bad key size error """ - class EncryptError(CryptoError): - """ Error in encryption processing """ - class DecryptError(CryptoError): - """ Error in decryption processing """ - class DecryptNotBlockAlignedError(DecryptError): - """ Error in decryption processing """ - - def xorS(a,b): - """ XOR two strings """ - assert len(a)==len(b) - x = [] - for i in range(len(a)): - x.append( chr(ord(a[i])^ord(b[i]))) - return ''.join(x) - - def xor(a,b): - """ XOR two strings """ - x = [] - for i in range(min(len(a),len(b))): - x.append( chr(ord(a[i])^ord(b[i]))) - return ''.join(x) - - """ - Base 'BlockCipher' and Pad classes for cipher instances. - BlockCipher supports automatic padding and type conversion. The BlockCipher - class was written to make the actual algorithm code more readable and - not for performance. - """ - - class BlockCipher: - """ Block ciphers """ - def __init__(self): - self.reset() - - def reset(self): - self.resetEncrypt() - self.resetDecrypt() - def resetEncrypt(self): - self.encryptBlockCount = 0 - self.bytesToEncrypt = '' - def resetDecrypt(self): - self.decryptBlockCount = 0 - self.bytesToDecrypt = '' - - def encrypt(self, plainText, more = None): - """ Encrypt a string and return a binary string """ - self.bytesToEncrypt += plainText # append plainText to any bytes from prior encrypt - numBlocks, numExtraBytes = divmod(len(self.bytesToEncrypt), self.blockSize) - cipherText = '' - for i in range(numBlocks): - bStart = i*self.blockSize - ctBlock = self.encryptBlock(self.bytesToEncrypt[bStart:bStart+self.blockSize]) - self.encryptBlockCount += 1 - cipherText += ctBlock - if numExtraBytes > 0: # save any bytes that are not block aligned - self.bytesToEncrypt = self.bytesToEncrypt[-numExtraBytes:] - else: - self.bytesToEncrypt = '' - - if more == None: # no more data expected from caller - finalBytes = self.padding.addPad(self.bytesToEncrypt,self.blockSize) - if len(finalBytes) > 0: - ctBlock = self.encryptBlock(finalBytes) - self.encryptBlockCount += 1 - cipherText += ctBlock - self.resetEncrypt() - return cipherText - - def decrypt(self, cipherText, more = None): - """ Decrypt a string and return a string """ - self.bytesToDecrypt += cipherText # append to any bytes from prior decrypt - - numBlocks, numExtraBytes = divmod(len(self.bytesToDecrypt), self.blockSize) - if more == None: # no more calls to decrypt, should have all the data - if numExtraBytes != 0: - raise DecryptNotBlockAlignedError, 'Data not block aligned on decrypt' - - # hold back some bytes in case last decrypt has zero len - if (more != None) and (numExtraBytes == 0) and (numBlocks >0) : - numBlocks -= 1 - numExtraBytes = self.blockSize - - plainText = '' - for i in range(numBlocks): - bStart = i*self.blockSize - ptBlock = self.decryptBlock(self.bytesToDecrypt[bStart : bStart+self.blockSize]) - self.decryptBlockCount += 1 - plainText += ptBlock - - if numExtraBytes > 0: # save any bytes that are not block aligned - self.bytesToEncrypt = self.bytesToEncrypt[-numExtraBytes:] - else: - self.bytesToEncrypt = '' - - if more == None: # last decrypt remove padding - plainText = self.padding.removePad(plainText, self.blockSize) - self.resetDecrypt() - return plainText - - - class Pad: - def __init__(self): - pass # eventually could put in calculation of min and max size extension - - class padWithPadLen(Pad): - """ Pad a binary string with the length of the padding """ - - def addPad(self, extraBytes, blockSize): - """ Add padding to a binary string to make it an even multiple - of the block size """ - blocks, numExtraBytes = divmod(len(extraBytes), blockSize) - padLength = blockSize - numExtraBytes - return extraBytes + padLength*chr(padLength) - - def removePad(self, paddedBinaryString, blockSize): - """ Remove padding from a binary string """ - if not(0 6 and i%Nk == 4 : - temp = [ Sbox[byte] for byte in temp ] # SubWord(temp) - w.append( [ w[i-Nk][byte]^temp[byte] for byte in range(4) ] ) - return w - - Rcon = (0,0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80,0x1b,0x36, # note extra '0' !!! - 0x6c,0xd8,0xab,0x4d,0x9a,0x2f,0x5e,0xbc,0x63,0xc6, - 0x97,0x35,0x6a,0xd4,0xb3,0x7d,0xfa,0xef,0xc5,0x91) - - #------------------------------------- - def AddRoundKey(algInstance, keyBlock): - """ XOR the algorithm state with a block of key material """ - for column in range(algInstance.Nb): - for row in range(4): - algInstance.state[column][row] ^= keyBlock[column][row] - #------------------------------------- - - def SubBytes(algInstance): - for column in range(algInstance.Nb): - for row in range(4): - algInstance.state[column][row] = Sbox[algInstance.state[column][row]] - - def InvSubBytes(algInstance): - for column in range(algInstance.Nb): - for row in range(4): - algInstance.state[column][row] = InvSbox[algInstance.state[column][row]] - - Sbox = (0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5, - 0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76, - 0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0, - 0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0, - 0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc, - 0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15, - 0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a, - 0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75, - 0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0, - 0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84, - 0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b, - 0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf, - 0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85, - 0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8, - 0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5, - 0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2, - 0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17, - 0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73, - 0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88, - 0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb, - 0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c, - 0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79, - 0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9, - 0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08, - 0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6, - 0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a, - 0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e, - 0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e, - 0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94, - 0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf, - 0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68, - 0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16) - - InvSbox = (0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38, - 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb, - 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87, - 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb, - 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d, - 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e, - 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2, - 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25, - 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16, - 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92, - 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda, - 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84, - 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a, - 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06, - 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02, - 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b, - 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea, - 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73, - 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85, - 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e, - 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89, - 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b, - 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20, - 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4, - 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31, - 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f, - 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d, - 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef, - 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0, - 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61, - 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26, - 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d) - - #------------------------------------- - """ For each block size (Nb), the ShiftRow operation shifts row i - by the amount Ci. Note that row 0 is not shifted. - Nb C1 C2 C3 - ------------------- """ - shiftOffset = { 4 : ( 0, 1, 2, 3), - 5 : ( 0, 1, 2, 3), - 6 : ( 0, 1, 2, 3), - 7 : ( 0, 1, 2, 4), - 8 : ( 0, 1, 3, 4) } - def ShiftRows(algInstance): - tmp = [0]*algInstance.Nb # list of size Nb - for r in range(1,4): # row 0 reamains unchanged and can be skipped - for c in range(algInstance.Nb): - tmp[c] = algInstance.state[(c+shiftOffset[algInstance.Nb][r]) % algInstance.Nb][r] - for c in range(algInstance.Nb): - algInstance.state[c][r] = tmp[c] - def InvShiftRows(algInstance): - tmp = [0]*algInstance.Nb # list of size Nb - for r in range(1,4): # row 0 reamains unchanged and can be skipped - for c in range(algInstance.Nb): - tmp[c] = algInstance.state[(c+algInstance.Nb-shiftOffset[algInstance.Nb][r]) % algInstance.Nb][r] - for c in range(algInstance.Nb): - algInstance.state[c][r] = tmp[c] - #------------------------------------- - def MixColumns(a): - Sprime = [0,0,0,0] - for j in range(a.Nb): # for each column - Sprime[0] = mul(2,a.state[j][0])^mul(3,a.state[j][1])^mul(1,a.state[j][2])^mul(1,a.state[j][3]) - Sprime[1] = mul(1,a.state[j][0])^mul(2,a.state[j][1])^mul(3,a.state[j][2])^mul(1,a.state[j][3]) - Sprime[2] = mul(1,a.state[j][0])^mul(1,a.state[j][1])^mul(2,a.state[j][2])^mul(3,a.state[j][3]) - Sprime[3] = mul(3,a.state[j][0])^mul(1,a.state[j][1])^mul(1,a.state[j][2])^mul(2,a.state[j][3]) - for i in range(4): - a.state[j][i] = Sprime[i] - - def InvMixColumns(a): - """ Mix the four bytes of every column in a linear way - This is the opposite operation of Mixcolumn """ - Sprime = [0,0,0,0] - for j in range(a.Nb): # for each column - Sprime[0] = mul(0x0E,a.state[j][0])^mul(0x0B,a.state[j][1])^mul(0x0D,a.state[j][2])^mul(0x09,a.state[j][3]) - Sprime[1] = mul(0x09,a.state[j][0])^mul(0x0E,a.state[j][1])^mul(0x0B,a.state[j][2])^mul(0x0D,a.state[j][3]) - Sprime[2] = mul(0x0D,a.state[j][0])^mul(0x09,a.state[j][1])^mul(0x0E,a.state[j][2])^mul(0x0B,a.state[j][3]) - Sprime[3] = mul(0x0B,a.state[j][0])^mul(0x0D,a.state[j][1])^mul(0x09,a.state[j][2])^mul(0x0E,a.state[j][3]) - for i in range(4): - a.state[j][i] = Sprime[i] - - #------------------------------------- - def mul(a, b): - """ Multiply two elements of GF(2^m) - needed for MixColumn and InvMixColumn """ - if (a !=0 and b!=0): - return Alogtable[(Logtable[a] + Logtable[b])%255] - else: - return 0 - - Logtable = ( 0, 0, 25, 1, 50, 2, 26, 198, 75, 199, 27, 104, 51, 238, 223, 3, - 100, 4, 224, 14, 52, 141, 129, 239, 76, 113, 8, 200, 248, 105, 28, 193, - 125, 194, 29, 181, 249, 185, 39, 106, 77, 228, 166, 114, 154, 201, 9, 120, - 101, 47, 138, 5, 33, 15, 225, 36, 18, 240, 130, 69, 53, 147, 218, 142, - 150, 143, 219, 189, 54, 208, 206, 148, 19, 92, 210, 241, 64, 70, 131, 56, - 102, 221, 253, 48, 191, 6, 139, 98, 179, 37, 226, 152, 34, 136, 145, 16, - 126, 110, 72, 195, 163, 182, 30, 66, 58, 107, 40, 84, 250, 133, 61, 186, - 43, 121, 10, 21, 155, 159, 94, 202, 78, 212, 172, 229, 243, 115, 167, 87, - 175, 88, 168, 80, 244, 234, 214, 116, 79, 174, 233, 213, 231, 230, 173, 232, - 44, 215, 117, 122, 235, 22, 11, 245, 89, 203, 95, 176, 156, 169, 81, 160, - 127, 12, 246, 111, 23, 196, 73, 236, 216, 67, 31, 45, 164, 118, 123, 183, - 204, 187, 62, 90, 251, 96, 177, 134, 59, 82, 161, 108, 170, 85, 41, 157, - 151, 178, 135, 144, 97, 190, 220, 252, 188, 149, 207, 205, 55, 63, 91, 209, - 83, 57, 132, 60, 65, 162, 109, 71, 20, 42, 158, 93, 86, 242, 211, 171, - 68, 17, 146, 217, 35, 32, 46, 137, 180, 124, 184, 38, 119, 153, 227, 165, - 103, 74, 237, 222, 197, 49, 254, 24, 13, 99, 140, 128, 192, 247, 112, 7) - - Alogtable= ( 1, 3, 5, 15, 17, 51, 85, 255, 26, 46, 114, 150, 161, 248, 19, 53, - 95, 225, 56, 72, 216, 115, 149, 164, 247, 2, 6, 10, 30, 34, 102, 170, - 229, 52, 92, 228, 55, 89, 235, 38, 106, 190, 217, 112, 144, 171, 230, 49, - 83, 245, 4, 12, 20, 60, 68, 204, 79, 209, 104, 184, 211, 110, 178, 205, - 76, 212, 103, 169, 224, 59, 77, 215, 98, 166, 241, 8, 24, 40, 120, 136, - 131, 158, 185, 208, 107, 189, 220, 127, 129, 152, 179, 206, 73, 219, 118, 154, - 181, 196, 87, 249, 16, 48, 80, 240, 11, 29, 39, 105, 187, 214, 97, 163, - 254, 25, 43, 125, 135, 146, 173, 236, 47, 113, 147, 174, 233, 32, 96, 160, - 251, 22, 58, 78, 210, 109, 183, 194, 93, 231, 50, 86, 250, 21, 63, 65, - 195, 94, 226, 61, 71, 201, 64, 192, 91, 237, 44, 116, 156, 191, 218, 117, - 159, 186, 213, 100, 172, 239, 42, 126, 130, 157, 188, 223, 122, 142, 137, 128, - 155, 182, 193, 88, 232, 35, 101, 175, 234, 37, 111, 177, 200, 67, 197, 84, - 252, 31, 33, 99, 165, 244, 7, 9, 27, 45, 119, 153, 176, 203, 70, 202, - 69, 207, 74, 222, 121, 139, 134, 145, 168, 227, 62, 66, 198, 81, 243, 14, - 18, 54, 90, 238, 41, 123, 141, 140, 143, 138, 133, 148, 167, 242, 13, 23, - 57, 75, 221, 124, 132, 151, 162, 253, 28, 36, 108, 180, 199, 82, 246, 1) - - - - - """ - AES Encryption Algorithm - The AES algorithm is just Rijndael algorithm restricted to the default - blockSize of 128 bits. - """ - - class AES(Rijndael): - """ The AES algorithm is the Rijndael block cipher restricted to block - sizes of 128 bits and key sizes of 128, 192 or 256 bits - """ - def __init__(self, key = None, padding = padWithPadLen(), keySize=16): - """ Initialize AES, keySize is in bytes """ - if not (keySize == 16 or keySize == 24 or keySize == 32) : - raise BadKeySizeError, 'Illegal AES key size, must be 16, 24, or 32 bytes' - - Rijndael.__init__( self, key, padding=padding, keySize=keySize, blockSize=16 ) - - self.name = 'AES' - - - """ - CBC mode of encryption for block ciphers. - This algorithm mode wraps any BlockCipher to make a - Cipher Block Chaining mode. - """ - from random import Random # should change to crypto.random!!! - - - class CBC(BlockCipher): - """ The CBC class wraps block ciphers to make cipher block chaining (CBC) mode - algorithms. The initialization (IV) is automatic if set to None. Padding - is also automatic based on the Pad class used to initialize the algorithm - """ - def __init__(self, blockCipherInstance, padding = padWithPadLen()): - """ CBC algorithms are created by initializing with a BlockCipher instance """ - self.baseCipher = blockCipherInstance - self.name = self.baseCipher.name + '_CBC' - self.blockSize = self.baseCipher.blockSize - self.keySize = self.baseCipher.keySize - self.padding = padding - self.baseCipher.padding = noPadding() # baseCipher should NOT pad!! - self.r = Random() # for IV generation, currently uses - # mediocre standard distro version <---------------- - import time - newSeed = time.ctime()+str(self.r) # seed with instance location - self.r.seed(newSeed) # to make unique - self.reset() - - def setKey(self, key): - self.baseCipher.setKey(key) - - # Overload to reset both CBC state and the wrapped baseCipher - def resetEncrypt(self): - BlockCipher.resetEncrypt(self) # reset CBC encrypt state (super class) - self.baseCipher.resetEncrypt() # reset base cipher encrypt state - - def resetDecrypt(self): - BlockCipher.resetDecrypt(self) # reset CBC state (super class) - self.baseCipher.resetDecrypt() # reset base cipher decrypt state - - def encrypt(self, plainText, iv=None, more=None): - """ CBC encryption - overloads baseCipher to allow optional explicit IV - when iv=None, iv is auto generated! - """ - if self.encryptBlockCount == 0: - self.iv = iv - else: - assert(iv==None), 'IV used only on first call to encrypt' - - return BlockCipher.encrypt(self,plainText, more=more) - - def decrypt(self, cipherText, iv=None, more=None): - """ CBC decryption - overloads baseCipher to allow optional explicit IV - when iv=None, iv is auto generated! - """ - if self.decryptBlockCount == 0: - self.iv = iv - else: - assert(iv==None), 'IV used only on first call to decrypt' - - return BlockCipher.decrypt(self, cipherText, more=more) - - def encryptBlock(self, plainTextBlock): - """ CBC block encryption, IV is set with 'encrypt' """ - auto_IV = '' - if self.encryptBlockCount == 0: - if self.iv == None: - # generate IV and use - self.iv = ''.join([chr(self.r.randrange(256)) for i in range(self.blockSize)]) - self.prior_encr_CT_block = self.iv - auto_IV = self.prior_encr_CT_block # prepend IV if it's automatic - else: # application provided IV - assert(len(self.iv) == self.blockSize ),'IV must be same length as block' - self.prior_encr_CT_block = self.iv - """ encrypt the prior CT XORed with the PT """ - ct = self.baseCipher.encryptBlock( xor(self.prior_encr_CT_block, plainTextBlock) ) - self.prior_encr_CT_block = ct - return auto_IV+ct - - def decryptBlock(self, encryptedBlock): - """ Decrypt a single block """ - - if self.decryptBlockCount == 0: # first call, process IV - if self.iv == None: # auto decrypt IV? - self.prior_CT_block = encryptedBlock - return '' - else: - assert(len(self.iv)==self.blockSize),"Bad IV size on CBC decryption" - self.prior_CT_block = self.iv - - dct = self.baseCipher.decryptBlock(encryptedBlock) - """ XOR the prior decrypted CT with the prior CT """ - dct_XOR_priorCT = xor( self.prior_CT_block, dct ) - - self.prior_CT_block = encryptedBlock - - return dct_XOR_priorCT - - - """ - AES_CBC Encryption Algorithm - """ - - class aescbc_AES_CBC(CBC): - """ AES encryption in CBC feedback mode """ - def __init__(self, key=None, padding=padWithPadLen(), keySize=16): - CBC.__init__( self, AES(key, noPadding(), keySize), padding) - self.name = 'AES_CBC' - - class AES_CBC(object): - def __init__(self): - self._key = None - self._iv = None - self.aes = None - - def set_decrypt_key(self, userkey, iv): - self._key = userkey - self._iv = iv - self.aes = aescbc_AES_CBC(userkey, noPadding(), len(userkey)) - - def decrypt(self, data): - iv = self._iv - cleartext = self.aes.decrypt(iv + data) - return cleartext - - import hmac - - class KeyIVGen(object): - # this only exists in openssl so we will use pure python implementation instead - # PKCS5_PBKDF2_HMAC_SHA1 = F(c_int, 'PKCS5_PBKDF2_HMAC_SHA1', - # [c_char_p, c_ulong, c_char_p, c_ulong, c_ulong, c_ulong, c_char_p]) - def pbkdf2(self, passwd, salt, iter, keylen): - - def xorstr( a, b ): - if len(a) != len(b): - raise Exception("xorstr(): lengths differ") - return ''.join((chr(ord(x)^ord(y)) for x, y in zip(a, b))) - - def prf( h, data ): - hm = h.copy() - hm.update( data ) - return hm.digest() - - def pbkdf2_F( h, salt, itercount, blocknum ): - U = prf( h, salt + pack('>i',blocknum ) ) - T = U - for i in range(2, itercount+1): - U = prf( h, U ) - T = xorstr( T, U ) - return T - - sha = hashlib.sha1 - digest_size = sha().digest_size - # l - number of output blocks to produce - l = keylen / digest_size - if keylen % digest_size != 0: - l += 1 - h = hmac.new( passwd, None, sha ) - T = "" - for i in range(1, l+1): - T += pbkdf2_F( h, salt, iter, i ) - return T[0: keylen] - - def UnprotectHeaderData(encryptedData): - passwdData = 'header_key_data' - salt = 'HEADER.2011' - iter = 0x80 - keylen = 0x100 - key_iv = KeyIVGen().pbkdf2(passwdData, salt, iter, keylen) - key = key_iv[0:32] - iv = key_iv[32:48] - aes=AES_CBC() - aes.set_decrypt_key(key, iv) - cleartext = aes.decrypt(encryptedData) - return cleartext - - # Various character maps used to decrypt kindle info values. - # Probably supposed to act as obfuscation - charMap2 = "AaZzB0bYyCc1XxDdW2wEeVv3FfUuG4g-TtHh5SsIiR6rJjQq7KkPpL8lOoMm9Nn_" - charMap5 = "AzB0bYyCeVvaZ3FfUuG4g-TtHh5SsIiR6rJjQq7KkPpL8lOoMm9Nn_c1XxDdW2wE" - # New maps in K4PC 1.9.0 - testMap1 = "n5Pr6St7Uv8Wx9YzAb0Cd1Ef2Gh3Jk4M" - testMap6 = "9YzAb0Cd1Ef2n5Pr6St7Uvh3Jk4M8WxG" - testMap8 = "YvaZ3FfUm9Nn_c1XuG4yCAzB0beVg-TtHh5SsIiR6rJjQdW2wEq7KkPpL8lOoMxD" - - # interface with Windows OS Routines - class DataBlob(Structure): - _fields_ = [('cbData', c_uint), - ('pbData', c_void_p)] - DataBlob_p = POINTER(DataBlob) - - - def GetSystemDirectory(): - GetSystemDirectoryW = kernel32.GetSystemDirectoryW - GetSystemDirectoryW.argtypes = [c_wchar_p, c_uint] - GetSystemDirectoryW.restype = c_uint - def GetSystemDirectory(): - buffer = create_unicode_buffer(MAX_PATH + 1) - GetSystemDirectoryW(buffer, len(buffer)) - return buffer.value - return GetSystemDirectory - GetSystemDirectory = GetSystemDirectory() - - def GetVolumeSerialNumber(): - GetVolumeInformationW = kernel32.GetVolumeInformationW - GetVolumeInformationW.argtypes = [c_wchar_p, c_wchar_p, c_uint, - POINTER(c_uint), POINTER(c_uint), - POINTER(c_uint), c_wchar_p, c_uint] - GetVolumeInformationW.restype = c_uint - def GetVolumeSerialNumber(path = GetSystemDirectory().split('\\')[0] + '\\'): - vsn = c_uint(0) - GetVolumeInformationW(path, None, 0, byref(vsn), None, None, None, 0) - return str(vsn.value) - return GetVolumeSerialNumber - GetVolumeSerialNumber = GetVolumeSerialNumber() - - def GetIDString(): - vsn = GetVolumeSerialNumber() - #print('Using Volume Serial Number for ID: '+vsn) - return vsn - - def getLastError(): - GetLastError = kernel32.GetLastError - GetLastError.argtypes = None - GetLastError.restype = c_uint - def getLastError(): - return GetLastError() - return getLastError - getLastError = getLastError() + # Get the Mazama Random number + MazamaRandomNumber = (kindleDatabase[1])['MazamaRandomNumber'].decode('hex').encode('ascii') - def GetUserName(): - GetUserNameW = advapi32.GetUserNameW - GetUserNameW.argtypes = [c_wchar_p, POINTER(c_uint)] - GetUserNameW.restype = c_uint - def GetUserName(): - buffer = create_unicode_buffer(2) - size = c_uint(len(buffer)) - while not GetUserNameW(buffer, byref(size)): - errcd = getLastError() - if errcd == 234: - # bad wine implementation up through wine 1.3.21 - return "AlternateUserName" - buffer = create_unicode_buffer(len(buffer) * 2) - size.value = len(buffer) - return buffer.value.encode('utf-16-le')[::2] - return GetUserName - GetUserName = GetUserName() + # Get the kindle account token + kindleAccountToken = (kindleDatabase[1])['kindle.account.tokens'].decode('hex').encode('ascii') - def CryptUnprotectData(): - _CryptUnprotectData = crypt32.CryptUnprotectData - _CryptUnprotectData.argtypes = [DataBlob_p, c_wchar_p, DataBlob_p, - c_void_p, c_void_p, c_uint, DataBlob_p] - _CryptUnprotectData.restype = c_uint - def CryptUnprotectData(indata, entropy, flags): - indatab = create_string_buffer(indata) - indata = DataBlob(len(indata), cast(indatab, c_void_p)) - entropyb = create_string_buffer(entropy) - entropy = DataBlob(len(entropy), cast(entropyb, c_void_p)) - outdata = DataBlob() - if not _CryptUnprotectData(byref(indata), None, byref(entropy), - None, None, flags, byref(outdata)): - # raise DrmException("Failed to Unprotect Data") - return 'failed' - return string_at(outdata.pbData, outdata.cbData) - return CryptUnprotectData - CryptUnprotectData = CryptUnprotectData() + # Get the IDString used to decode the Kindle Info file + IDString = (kindleDatabase[1])['IDString'].decode('hex').encode('ascii') + # Get the UserName stored when the Kindle Info file was decoded + UserName = (kindleDatabase[1])['UserName'].decode('hex').encode('ascii') - # Locate all of the kindle-info style files and return as list - def getKindleInfoFiles(): - kInfoFiles = [] - # some 64 bit machines do not have the proper registry key for some reason - # or the pythonn interface to the 32 vs 64 bit registry is broken - path = "" - if 'LOCALAPPDATA' in os.environ.keys(): - path = os.environ['LOCALAPPDATA'] - else: - # User Shell Folders show take precedent over Shell Folders if present - try: - regkey = winreg.OpenKey(winreg.HKEY_CURRENT_USER, "Software\\Microsoft\\Windows\\CurrentVersion\\Explorer\\User Shell Folders\\") - path = winreg.QueryValueEx(regkey, 'Local AppData')[0] - if not os.path.isdir(path): - path = "" - try: - regkey = winreg.OpenKey(winreg.HKEY_CURRENT_USER, "Software\\Microsoft\\Windows\\CurrentVersion\\Explorer\\Shell Folders\\") - path = winreg.QueryValueEx(regkey, 'Local AppData')[0] - if not os.path.isdir(path): - path = "" - except RegError: - pass - except RegError: - pass + except KeyError: + print u"Keys not found in the database {0}.".format(kindleDatabase[0]) + return pids - found = False - if path == "": - print ('Could not find the folder in which to look for kinfoFiles.') - else: - print('searching for kinfoFiles in ' + path) + # Get the ID string used + encodedIDString = encodeHash(IDString,charMap1) - # look for (K4PC 1.9.0 and later) .kinf2011 file - kinfopath = path +'\\Amazon\\Kindle\\storage\\.kinf2011' - if os.path.isfile(kinfopath): - found = True - print('Found K4PC 1.9+ kinf2011 file: ' + kinfopath) - kInfoFiles.append(kinfopath) + # Get the current user name + encodedUsername = encodeHash(UserName,charMap1) - # look for (K4PC 1.6.0 and later) rainier.2.1.1.kinf file - kinfopath = path +'\\Amazon\\Kindle\\storage\\rainier.2.1.1.kinf' - if os.path.isfile(kinfopath): - found = True - print('Found K4PC 1.6-1.8 kinf file: ' + kinfopath) - kInfoFiles.append(kinfopath) + # concat, hash and encode to calculate the DSN + DSN = encode(SHA1(MazamaRandomNumber+encodedIDString+encodedUsername),charMap1) - # look for (K4PC 1.5.0 and later) rainier.2.1.1.kinf file - kinfopath = path +'\\Amazon\\Kindle For PC\\storage\\rainier.2.1.1.kinf' - if os.path.isfile(kinfopath): - found = True - print('Found K4PC 1.5 kinf file: ' + kinfopath) - kInfoFiles.append(kinfopath) + # Compute the device PID (for which I can tell, is used for nothing). + table = generatePidEncryptionTable() + devicePID = generateDevicePID(table,DSN,4) + devicePID = checksumPid(devicePID) + pids.append(devicePID) - # look for original (earlier than K4PC 1.5.0) kindle-info files - kinfopath = path +'\\Amazon\\Kindle For PC\\{AMAwzsaPaaZAzmZzZQzgZCAkZ3AjA_AY}\\kindle.info' - if os.path.isfile(kinfopath): - found = True - print('Found K4PC kindle.info file: ' + kinfopath) - kInfoFiles.append(kinfopath) + # Compute book PIDs - if not found: - print('No K4PC kindle.info/kinf/kinf2011 files have been found.') - return kInfoFiles + # book pid + pidHash = SHA1(DSN+kindleAccountToken+rec209+token) + bookPID = encodePID(pidHash) + bookPID = checksumPid(bookPID) + pids.append(bookPID) + # variant 1 + pidHash = SHA1(kindleAccountToken+rec209+token) + bookPID = encodePID(pidHash) + bookPID = checksumPid(bookPID) + pids.append(bookPID) - # determine type of kindle info provided and return a - # database of keynames and values - def getDBfromFile(kInfoFile): - names = ['kindle.account.tokens','kindle.cookie.item','eulaVersionAccepted','login_date','kindle.token.item','login','kindle.key.item','kindle.name.info','kindle.device.info', 'MazamaRandomNumber', 'max_date', 'SIGVERIF'] - DB = {} - with open(kInfoFile, 'rb') as infoReader: - hdr = infoReader.read(1) - data = infoReader.read() + # variant 2 + pidHash = SHA1(DSN+rec209+token) + bookPID = encodePID(pidHash) + bookPID = checksumPid(bookPID) + pids.append(bookPID) - if data.find('{') != -1 : - # older style kindle-info file - items = data.split('{') - for item in items: - if item != '': - keyhash, rawdata = item.split(':') - keyname = "unknown" - for name in names: - if encodeHash(name,charMap2) == keyhash: - keyname = name - break - if keyname == "unknown": - keyname = keyhash - encryptedValue = decode(rawdata,charMap2) - DB[keyname] = CryptUnprotectData(encryptedValue, "", 0) - elif hdr == '/': - # else rainier-2-1-1 .kinf file - # the .kinf file uses "/" to separate it into records - # so remove the trailing "/" to make it easy to use split - data = data[:-1] - items = data.split('/') + return pids - # loop through the item records until all are processed - while len(items) > 0: +def getPidList(md1, md2, serials=[], kDatabases=[]): + pidlst = [] - # get the first item record - item = items.pop(0) + if kDatabases is None: + kDatabases = [] + if serials is None: + serials = [] - # the first 32 chars of the first record of a group - # is the MD5 hash of the key name encoded by charMap5 - keyhash = item[0:32] - - # the raw keyhash string is used to create entropy for the actual - # CryptProtectData Blob that represents that keys contents - entropy = SHA1(keyhash) - - # the remainder of the first record when decoded with charMap5 - # has the ':' split char followed by the string representation - # of the number of records that follow - # and make up the contents - srcnt = decode(item[34:],charMap5) - rcnt = int(srcnt) - - # read and store in rcnt records of data - # that make up the contents value - edlst = [] - for i in xrange(rcnt): - item = items.pop(0) - edlst.append(item) - - keyname = "unknown" - for name in names: - if encodeHash(name,charMap5) == keyhash: - keyname = name - break - if keyname == "unknown": - keyname = keyhash - # the charMap5 encoded contents data has had a length - # of chars (always odd) cut off of the front and moved - # to the end to prevent decoding using charMap5 from - # working properly, and thereby preventing the ensuing - # CryptUnprotectData call from succeeding. - - # The offset into the charMap5 encoded contents seems to be: - # len(contents)-largest prime number <= int(len(content)/3) - # (in other words split "about" 2/3rds of the way through) - - # move first offsets chars to end to align for decode by charMap5 - encdata = "".join(edlst) - contlen = len(encdata) - noffset = contlen - primes(int(contlen/3))[-1] - - # now properly split and recombine - # by moving noffset chars from the start of the - # string to the end of the string - pfx = encdata[0:noffset] - encdata = encdata[noffset:] - encdata = encdata + pfx - - # decode using Map5 to get the CryptProtect Data - encryptedValue = decode(encdata,charMap5) - DB[keyname] = CryptUnprotectData(encryptedValue, entropy, 1) - else: - # else newest .kinf2011 style .kinf file - # the .kinf file uses "/" to separate it into records - # so remove the trailing "/" to make it easy to use split - # need to put back the first char read because it it part - # of the added entropy blob - data = hdr + data[:-1] - items = data.split('/') - - # starts with and encoded and encrypted header blob - headerblob = items.pop(0) - encryptedValue = decode(headerblob, testMap1) - cleartext = UnprotectHeaderData(encryptedValue) - # now extract the pieces that form the added entropy - pattern = re.compile(r'''\[Version:(\d+)\]\[Build:(\d+)\]\[Cksum:([^\]]+)\]\[Guid:([\{\}a-z0-9\-]+)\]''', re.IGNORECASE) - for m in re.finditer(pattern, cleartext): - added_entropy = m.group(2) + m.group(4) - - - # loop through the item records until all are processed - while len(items) > 0: - - # get the first item record - item = items.pop(0) - - # the first 32 chars of the first record of a group - # is the MD5 hash of the key name encoded by charMap5 - keyhash = item[0:32] - - # the sha1 of raw keyhash string is used to create entropy along - # with the added entropy provided above from the headerblob - entropy = SHA1(keyhash) + added_entropy - - # the remainder of the first record when decoded with charMap5 - # has the ':' split char followed by the string representation - # of the number of records that follow - # and make up the contents - srcnt = decode(item[34:],charMap5) - rcnt = int(srcnt) - - # read and store in rcnt records of data - # that make up the contents value - edlst = [] - for i in xrange(rcnt): - item = items.pop(0) - edlst.append(item) - - # key names now use the new testMap8 encoding - keyname = "unknown" - for name in names: - if encodeHash(name,testMap8) == keyhash: - keyname = name - break - - # the testMap8 encoded contents data has had a length - # of chars (always odd) cut off of the front and moved - # to the end to prevent decoding using testMap8 from - # working properly, and thereby preventing the ensuing - # CryptUnprotectData call from succeeding. - - # The offset into the testMap8 encoded contents seems to be: - # len(contents)-largest prime number <= int(len(content)/3) - # (in other words split "about" 2/3rds of the way through) - - # move first offsets chars to end to align for decode by testMap8 - # by moving noffset chars from the start of the - # string to the end of the string - encdata = "".join(edlst) - contlen = len(encdata) - noffset = contlen - primes(int(contlen/3))[-1] - pfx = encdata[0:noffset] - encdata = encdata[noffset:] - encdata = encdata + pfx - - # decode using new testMap8 to get the original CryptProtect Data - encryptedValue = decode(encdata,testMap8) - cleartext = CryptUnprotectData(encryptedValue, entropy, 1) - DB[keyname] = cleartext - - if 'MazamaRandomNumber' in DB and 'kindle.account.tokens' in DB: - print u"Decrypted key file using IDString '{0:s}' and UserName '{1:s}'".format(GetIDString(), GetUserName().decode("latin-1")) - # store values used in decryption - DB['IDString'] = GetIDString() - DB['UserName'] = GetUserName() - else: - DB = {} - return DB -elif isosx: - import copy - import subprocess - - # interface to needed routines in openssl's libcrypto - def _load_crypto_libcrypto(): - from ctypes import CDLL, byref, POINTER, c_void_p, c_char_p, c_int, c_long, \ - Structure, c_ulong, create_string_buffer, addressof, string_at, cast - from ctypes.util import find_library - - libcrypto = find_library('crypto') - if libcrypto is None: - raise DrmException(u"libcrypto not found") - libcrypto = CDLL(libcrypto) - - # From OpenSSL's crypto aes header - # - # AES_ENCRYPT 1 - # AES_DECRYPT 0 - # AES_MAXNR 14 (in bytes) - # AES_BLOCK_SIZE 16 (in bytes) - # - # struct aes_key_st { - # unsigned long rd_key[4 *(AES_MAXNR + 1)]; - # int rounds; - # }; - # typedef struct aes_key_st AES_KEY; - # - # int AES_set_decrypt_key(const unsigned char *userKey, const int bits, AES_KEY *key); - # - # note: the ivec string, and output buffer are both mutable - # void AES_cbc_encrypt(const unsigned char *in, unsigned char *out, - # const unsigned long length, const AES_KEY *key, unsigned char *ivec, const int enc); - - AES_MAXNR = 14 - c_char_pp = POINTER(c_char_p) - c_int_p = POINTER(c_int) - - class AES_KEY(Structure): - _fields_ = [('rd_key', c_long * (4 * (AES_MAXNR + 1))), ('rounds', c_int)] - AES_KEY_p = POINTER(AES_KEY) - - def F(restype, name, argtypes): - func = getattr(libcrypto, name) - func.restype = restype - func.argtypes = argtypes - return func - - AES_cbc_encrypt = F(None, 'AES_cbc_encrypt',[c_char_p, c_char_p, c_ulong, AES_KEY_p, c_char_p,c_int]) - - AES_set_decrypt_key = F(c_int, 'AES_set_decrypt_key',[c_char_p, c_int, AES_KEY_p]) - - # From OpenSSL's Crypto evp/p5_crpt2.c - # - # int PKCS5_PBKDF2_HMAC_SHA1(const char *pass, int passlen, - # const unsigned char *salt, int saltlen, int iter, - # int keylen, unsigned char *out); - - PKCS5_PBKDF2_HMAC_SHA1 = F(c_int, 'PKCS5_PBKDF2_HMAC_SHA1', - [c_char_p, c_ulong, c_char_p, c_ulong, c_ulong, c_ulong, c_char_p]) - - class LibCrypto(object): - def __init__(self): - self._blocksize = 0 - self._keyctx = None - self._iv = 0 - - def set_decrypt_key(self, userkey, iv): - self._blocksize = len(userkey) - if (self._blocksize != 16) and (self._blocksize != 24) and (self._blocksize != 32) : - raise DrmException(u"AES improper key used") - return - keyctx = self._keyctx = AES_KEY() - self._iv = iv - self._userkey = userkey - rv = AES_set_decrypt_key(userkey, len(userkey) * 8, keyctx) - if rv < 0: - raise DrmException(u"Failed to initialize AES key") - - def decrypt(self, data): - out = create_string_buffer(len(data)) - mutable_iv = create_string_buffer(self._iv, len(self._iv)) - keyctx = self._keyctx - rv = AES_cbc_encrypt(data, out, len(data), keyctx, mutable_iv, 0) - if rv == 0: - raise DrmException(u"AES decryption failed") - return out.raw - - def keyivgen(self, passwd, salt, iter, keylen): - saltlen = len(salt) - passlen = len(passwd) - out = create_string_buffer(keylen) - rv = PKCS5_PBKDF2_HMAC_SHA1(passwd, passlen, salt, saltlen, iter, keylen, out) - return out.raw - return LibCrypto - - def _load_crypto(): - LibCrypto = None + for kDatabase in kDatabases: try: - LibCrypto = _load_crypto_libcrypto() - except (ImportError, DrmException): - pass - return LibCrypto - - LibCrypto = _load_crypto() - - # Various character maps used to decrypt books. Probably supposed to act as obfuscation - charMap1 = 'n5Pr6St7Uv8Wx9YzAb0Cd1Ef2Gh3Jk4M' - charMap2 = 'ZB0bYyc1xDdW2wEV3Ff7KkPpL8UuGA4gz-Tme9Nn_tHh5SvXCsIiR6rJjQaqlOoM' - - # For kinf approach of K4Mac 1.6.X or later - # On K4PC charMap5 = 'AzB0bYyCeVvaZ3FfUuG4g-TtHh5SsIiR6rJjQq7KkPpL8lOoMm9Nn_c1XxDdW2wE' - # For Mac they seem to re-use charMap2 here - charMap5 = charMap2 - - # new in K4M 1.9.X - testMap8 = 'YvaZ3FfUm9Nn_c1XuG4yCAzB0beVg-TtHh5SsIiR6rJjQdW2wEq7KkPpL8lOoMxD' - - # uses a sub process to get the Hard Drive Serial Number using ioreg - # returns serial numbers of all internal hard drive drives - def GetVolumesSerialNumbers(): - sernums = [] - sernum = os.getenv('MYSERIALNUMBER') - if sernum != None: - sernums.append(sernum.strip()) - cmdline = '/usr/sbin/ioreg -w 0 -r -c AppleAHCIDiskDriver' - cmdline = cmdline.encode(sys.getfilesystemencoding()) - p = subprocess.Popen(cmdline, shell=True, stdin=None, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=False) - out1, out2 = p.communicate() - reslst = out1.split('\n') - cnt = len(reslst) - bsdname = None - sernum = None - foundIt = False - for j in xrange(cnt): - resline = reslst[j] - pp = resline.find('\"Serial Number\" = \"') - if pp >= 0: - sernum = resline[pp+19:-1] - sernums.append(sernum.strip()) - return sernums - - def GetUserHomeAppSupKindleDirParitionName(): - home = os.getenv('HOME') - dpath = home + '/Library' - cmdline = '/sbin/mount' - cmdline = cmdline.encode(sys.getfilesystemencoding()) - p = subprocess.Popen(cmdline, shell=True, stdin=None, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=False) - out1, out2 = p.communicate() - reslst = out1.split('\n') - cnt = len(reslst) - disk = '' - foundIt = False - for j in xrange(cnt): - resline = reslst[j] - if resline.startswith('/dev'): - (devpart, mpath) = resline.split(' on ') - dpart = devpart[5:] - pp = mpath.find('(') - if pp >= 0: - mpath = mpath[:pp-1] - if dpath.startswith(mpath): - disk = dpart - return disk - - # uses a sub process to get the UUID of the specified disk partition using ioreg - def GetDiskPartitionUUIDs(diskpart): - uuids = [] - uuidnum = os.getenv('MYUUIDNUMBER') - if uuidnum != None: - uuids.append(strip(uuidnum)) - cmdline = '/usr/sbin/ioreg -l -S -w 0 -r -c AppleAHCIDiskDriver' - cmdline = cmdline.encode(sys.getfilesystemencoding()) - p = subprocess.Popen(cmdline, shell=True, stdin=None, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=False) - out1, out2 = p.communicate() - reslst = out1.split('\n') - cnt = len(reslst) - bsdname = None - uuidnum = None - foundIt = False - nest = 0 - uuidnest = -1 - partnest = -2 - for j in xrange(cnt): - resline = reslst[j] - if resline.find('{') >= 0: - nest += 1 - if resline.find('}') >= 0: - nest -= 1 - pp = resline.find('\"UUID\" = \"') - if pp >= 0: - uuidnum = resline[pp+10:-1] - uuidnum = uuidnum.strip() - uuidnest = nest - if partnest == uuidnest and uuidnest > 0: - foundIt = True - break - bb = resline.find('\"BSD Name\" = \"') - if bb >= 0: - bsdname = resline[bb+14:-1] - bsdname = bsdname.strip() - if (bsdname == diskpart): - partnest = nest - else : - partnest = -2 - if partnest == uuidnest and partnest > 0: - foundIt = True - break - if nest == 0: - partnest = -2 - uuidnest = -1 - uuidnum = None - bsdname = None - if foundIt: - uuids.append(uuidnum) - return uuids - - def GetMACAddressesMunged(): - macnums = [] - macnum = os.getenv('MYMACNUM') - if macnum != None: - macnums.append(macnum) - cmdline = '/sbin/ifconfig en0' - cmdline = cmdline.encode(sys.getfilesystemencoding()) - p = subprocess.Popen(cmdline, shell=True, stdin=None, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=False) - out1, out2 = p.communicate() - reslst = out1.split('\n') - cnt = len(reslst) - macnum = None - foundIt = False - for j in xrange(cnt): - resline = reslst[j] - pp = resline.find('ether ') - if pp >= 0: - macnum = resline[pp+6:-1] - macnum = macnum.strip() - # print 'original mac', macnum - # now munge it up the way Kindle app does - # by xoring it with 0xa5 and swapping elements 3 and 4 - maclst = macnum.split(':') - n = len(maclst) - if n != 6: - fountIt = False - break - for i in range(6): - maclst[i] = int('0x' + maclst[i], 0) - mlst = [0x00, 0x00, 0x00, 0x00, 0x00, 0x00] - mlst[5] = maclst[5] ^ 0xa5 - mlst[4] = maclst[3] ^ 0xa5 - mlst[3] = maclst[4] ^ 0xa5 - mlst[2] = maclst[2] ^ 0xa5 - mlst[1] = maclst[1] ^ 0xa5 - mlst[0] = maclst[0] ^ 0xa5 - macnum = '%0.2x%0.2x%0.2x%0.2x%0.2x%0.2x' % (mlst[0], mlst[1], mlst[2], mlst[3], mlst[4], mlst[5]) - foundIt = True - break - if foundIt: - macnums.append(macnum) - return macnums - - - # uses unix env to get username instead of using sysctlbyname - def GetUserName(): - username = os.getenv('USER') - return username - - def GetIDStrings(): - # Return all possible ID Strings - strings = [] - strings.extend(GetMACAddressesMunged()) - strings.extend(GetVolumesSerialNumbers()) - diskpart = GetUserHomeAppSupKindleDirParitionName() - strings.extend(GetDiskPartitionUUIDs(diskpart)) - strings.append('9999999999') - #print strings - return strings - - - # implements an Pseudo Mac Version of Windows built-in Crypto routine - # used by Kindle for Mac versions < 1.6.0 - class CryptUnprotectData(object): - def __init__(self, IDString): - sp = IDString + '!@#' + GetUserName() - passwdData = encode(SHA256(sp),charMap1) - salt = '16743' - self.crp = LibCrypto() - iter = 0x3e8 - keylen = 0x80 - key_iv = self.crp.keyivgen(passwdData, salt, iter, keylen) - self.key = key_iv[0:32] - self.iv = key_iv[32:48] - self.crp.set_decrypt_key(self.key, self.iv) - - def decrypt(self, encryptedData): - cleartext = self.crp.decrypt(encryptedData) - cleartext = decode(cleartext,charMap1) - return cleartext - - - # implements an Pseudo Mac Version of Windows built-in Crypto routine - # used for Kindle for Mac Versions >= 1.6.0 - class CryptUnprotectDataV2(object): - def __init__(self, IDString): - sp = GetUserName() + ':&%:' + IDString - passwdData = encode(SHA256(sp),charMap5) - # salt generation as per the code - salt = 0x0512981d * 2 * 1 * 1 - salt = str(salt) + GetUserName() - salt = encode(salt,charMap5) - self.crp = LibCrypto() - iter = 0x800 - keylen = 0x400 - key_iv = self.crp.keyivgen(passwdData, salt, iter, keylen) - self.key = key_iv[0:32] - self.iv = key_iv[32:48] - self.crp.set_decrypt_key(self.key, self.iv) - - def decrypt(self, encryptedData): - cleartext = self.crp.decrypt(encryptedData) - cleartext = decode(cleartext, charMap5) - return cleartext - - - # unprotect the new header blob in .kinf2011 - # used in Kindle for Mac Version >= 1.9.0 - def UnprotectHeaderData(encryptedData): - passwdData = 'header_key_data' - salt = 'HEADER.2011' - iter = 0x80 - keylen = 0x100 - crp = LibCrypto() - key_iv = crp.keyivgen(passwdData, salt, iter, keylen) - key = key_iv[0:32] - iv = key_iv[32:48] - crp.set_decrypt_key(key,iv) - cleartext = crp.decrypt(encryptedData) - return cleartext - + pidlst.extend(getK4Pids(md1, md2, kDatabase)) + except Exception, e: + print u"Error getting PIDs from database {0}: {1}".format(kDatabase[0],e.args[0]) + traceback.print_exc() - # implements an Pseudo Mac Version of Windows built-in Crypto routine - # used for Kindle for Mac Versions >= 1.9.0 - class CryptUnprotectDataV3(object): - def __init__(self, entropy, IDString): - sp = GetUserName() + '+@#$%+' + IDString - passwdData = encode(SHA256(sp),charMap2) - salt = entropy - self.crp = LibCrypto() - iter = 0x800 - keylen = 0x400 - key_iv = self.crp.keyivgen(passwdData, salt, iter, keylen) - self.key = key_iv[0:32] - self.iv = key_iv[32:48] - self.crp.set_decrypt_key(self.key, self.iv) - - def decrypt(self, encryptedData): - cleartext = self.crp.decrypt(encryptedData) - cleartext = decode(cleartext, charMap2) - return cleartext - - - # Locate the .kindle-info files - def getKindleInfoFiles(): - # file searches can take a long time on some systems, so just look in known specific places. - kInfoFiles=[] - found = False - home = os.getenv('HOME') - # check for .kinf2011 file in new location (App Store Kindle for Mac) - testpath = home + '/Library/Containers/com.amazon.Kindle/Data/Library/Application Support/Kindle/storage/.kinf2011' - if os.path.isfile(testpath): - kInfoFiles.append(testpath) - print('Found k4Mac kinf2011 file: ' + testpath) - found = True - # check for .kinf2011 files from 1.10 - testpath = home + '/Library/Application Support/Kindle/storage/.kinf2011' - if os.path.isfile(testpath): - kInfoFiles.append(testpath) - print('Found k4Mac kinf2011 file: ' + testpath) - found = True - # check for .rainier-2.1.1-kinf files from 1.6 - testpath = home + '/Library/Application Support/Kindle/storage/.rainier-2.1.1-kinf' - if os.path.isfile(testpath): - kInfoFiles.append(testpath) - print('Found k4Mac rainier file: ' + testpath) - found = True - # check for .kindle-info files from 1.4 - testpath = home + '/Library/Application Support/Kindle/storage/.kindle-info' - if os.path.isfile(testpath): - kInfoFiles.append(testpath) - print('Found k4Mac kindle-info file: ' + testpath) - found = True - # check for .kindle-info file from 1.2.2 - testpath = home + '/Library/Application Support/Amazon/Kindle/storage/.kindle-info' - if os.path.isfile(testpath): - kInfoFiles.append(testpath) - print('Found k4Mac kindle-info file: ' + testpath) - found = True - # check for .kindle-info file from 1.0 beta 1 (27214) - testpath = home + '/Library/Application Support/Amazon/Kindle for Mac/storage/.kindle-info' - if os.path.isfile(testpath): - kInfoFiles.append(testpath) - print('Found k4Mac kindle-info file: ' + testpath) - found = True - if not found: - print('No k4Mac kindle-info/rainier/kinf2011 files have been found.') - return kInfoFiles - - # determine type of kindle info provided and return a - # database of keynames and values - def getDBfromFile(kInfoFile): - names = ['kindle.account.tokens','kindle.cookie.item','eulaVersionAccepted','login_date','kindle.token.item','login','kindle.key.item','kindle.name.info','kindle.device.info', 'MazamaRandomNumber', 'max_date', 'SIGVERIF'] - with open(kInfoFile, 'rb') as infoReader: - filehdr = infoReader.read(1) - filedata = infoReader.read() - - IDStrings = GetIDStrings() - for IDString in IDStrings: - DB = {} - #print "trying IDString:",IDString - try: - hdr = filehdr - data = filedata - if data.find('[') != -1 : - # older style kindle-info file - cud = CryptUnprotectData(IDString) - items = data.split('[') - for item in items: - if item != '': - keyhash, rawdata = item.split(':') - keyname = 'unknown' - for name in names: - if encodeHash(name,charMap2) == keyhash: - keyname = name - break - if keyname == 'unknown': - keyname = keyhash - encryptedValue = decode(rawdata,charMap2) - cleartext = cud.decrypt(encryptedValue) - if len(cleartext) > 0: - DB[keyname] = cleartext - if 'MazamaRandomNumber' in DB and 'kindle.account.tokens' in DB: - break - elif hdr == '/': - # else newer style .kinf file used by K4Mac >= 1.6.0 - # the .kinf file uses '/' to separate it into records - # so remove the trailing '/' to make it easy to use split - data = data[:-1] - items = data.split('/') - cud = CryptUnprotectDataV2(IDString) - - # loop through the item records until all are processed - while len(items) > 0: - - # get the first item record - item = items.pop(0) - - # the first 32 chars of the first record of a group - # is the MD5 hash of the key name encoded by charMap5 - keyhash = item[0:32] - keyname = 'unknown' - - # the raw keyhash string is also used to create entropy for the actual - # CryptProtectData Blob that represents that keys contents - # 'entropy' not used for K4Mac only K4PC - # entropy = SHA1(keyhash) - - # the remainder of the first record when decoded with charMap5 - # has the ':' split char followed by the string representation - # of the number of records that follow - # and make up the contents - srcnt = decode(item[34:],charMap5) - rcnt = int(srcnt) - - # read and store in rcnt records of data - # that make up the contents value - edlst = [] - for i in xrange(rcnt): - item = items.pop(0) - edlst.append(item) - - keyname = 'unknown' - for name in names: - if encodeHash(name,charMap5) == keyhash: - keyname = name - break - if keyname == 'unknown': - keyname = keyhash - - # the charMap5 encoded contents data has had a length - # of chars (always odd) cut off of the front and moved - # to the end to prevent decoding using charMap5 from - # working properly, and thereby preventing the ensuing - # CryptUnprotectData call from succeeding. - - # The offset into the charMap5 encoded contents seems to be: - # len(contents) - largest prime number less than or equal to int(len(content)/3) - # (in other words split 'about' 2/3rds of the way through) - - # move first offsets chars to end to align for decode by charMap5 - encdata = ''.join(edlst) - contlen = len(encdata) - - # now properly split and recombine - # by moving noffset chars from the start of the - # string to the end of the string - noffset = contlen - primes(int(contlen/3))[-1] - pfx = encdata[0:noffset] - encdata = encdata[noffset:] - encdata = encdata + pfx - - # decode using charMap5 to get the CryptProtect Data - encryptedValue = decode(encdata,charMap5) - cleartext = cud.decrypt(encryptedValue) - if len(cleartext) > 0: - DB[keyname] = cleartext - - if 'MazamaRandomNumber' in DB and 'kindle.account.tokens' in DB: - break - else: - # the latest .kinf2011 version for K4M 1.9.1 - # put back the hdr char, it is needed - data = hdr + data - data = data[:-1] - items = data.split('/') - - # the headerblob is the encrypted information needed to build the entropy string - headerblob = items.pop(0) - encryptedValue = decode(headerblob, charMap1) - cleartext = UnprotectHeaderData(encryptedValue) - - # now extract the pieces in the same way - # this version is different from K4PC it scales the build number by multipying by 735 - pattern = re.compile(r'''\[Version:(\d+)\]\[Build:(\d+)\]\[Cksum:([^\]]+)\]\[Guid:([\{\}a-z0-9\-]+)\]''', re.IGNORECASE) - for m in re.finditer(pattern, cleartext): - entropy = str(int(m.group(2)) * 0x2df) + m.group(4) - - cud = CryptUnprotectDataV3(entropy,IDString) - - # loop through the item records until all are processed - while len(items) > 0: - - # get the first item record - item = items.pop(0) - - # the first 32 chars of the first record of a group - # is the MD5 hash of the key name encoded by charMap5 - keyhash = item[0:32] - keyname = 'unknown' - - # unlike K4PC the keyhash is not used in generating entropy - # entropy = SHA1(keyhash) + added_entropy - # entropy = added_entropy - - # the remainder of the first record when decoded with charMap5 - # has the ':' split char followed by the string representation - # of the number of records that follow - # and make up the contents - srcnt = decode(item[34:],charMap5) - rcnt = int(srcnt) - - # read and store in rcnt records of data - # that make up the contents value - edlst = [] - for i in xrange(rcnt): - item = items.pop(0) - edlst.append(item) - - keyname = 'unknown' - for name in names: - if encodeHash(name,testMap8) == keyhash: - keyname = name - break - if keyname == 'unknown': - keyname = keyhash - - # the testMap8 encoded contents data has had a length - # of chars (always odd) cut off of the front and moved - # to the end to prevent decoding using testMap8 from - # working properly, and thereby preventing the ensuing - # CryptUnprotectData call from succeeding. - - # The offset into the testMap8 encoded contents seems to be: - # len(contents) - largest prime number less than or equal to int(len(content)/3) - # (in other words split 'about' 2/3rds of the way through) - - # move first offsets chars to end to align for decode by testMap8 - encdata = ''.join(edlst) - contlen = len(encdata) - - # now properly split and recombine - # by moving noffset chars from the start of the - # string to the end of the string - noffset = contlen - primes(int(contlen/3))[-1] - pfx = encdata[0:noffset] - encdata = encdata[noffset:] - encdata = encdata + pfx - - # decode using testMap8 to get the CryptProtect Data - encryptedValue = decode(encdata,testMap8) - cleartext = cud.decrypt(encryptedValue) - # print keyname - # print cleartext - if len(cleartext) > 0: - DB[keyname] = cleartext - - if 'MazamaRandomNumber' in DB and 'kindle.account.tokens' in DB: - break - except: - pass - if 'MazamaRandomNumber' in DB and 'kindle.account.tokens' in DB: - # store values used in decryption - print u"Decrypted key file using IDString '{0:s}' and UserName '{1:s}'".format(IDString, GetUserName()) - DB['IDString'] = IDString - DB['UserName'] = GetUserName() - else: - print u"Couldn't decrypt file." - DB = {} - return DB -else: - def getDBfromFile(kInfoFile): - raise DrmException(u"This script only runs under Windows or Mac OS X.") - return {} - -def kindlekeys(files = []): - keys = [] - if files == []: - files = getKindleInfoFiles() - for file in files: - key = getDBfromFile(file) - if key: - # convert all values to hex, just in case. - for keyname in key: - key[keyname]=key[keyname].encode('hex') - keys.append(key) - return keys - -# interface for Python DeDRM -# returns single key or multiple keys, depending on path or file passed in -def getkey(outpath, files=[]): - keys = kindlekeys(files) - if len(keys) > 0: - if not os.path.isdir(outpath): - outfile = outpath - with file(outfile, 'w') as keyfileout: - keyfileout.write(json.dumps(keys[0])) - print u"Saved a key to {0}".format(outfile) - else: - keycount = 0 - for key in keys: - while True: - keycount += 1 - outfile = os.path.join(outpath,u"kindlekey{0:d}.k4i".format(keycount)) - if not os.path.exists(outfile): - break - with file(outfile, 'w') as keyfileout: - keyfileout.write(json.dumps(key)) - print u"Saved a key to {0}".format(outfile) - return True - return False - -def usage(progname): - print u"Finds, decrypts and saves the default Kindle For Mac/PC encryption keys." - print u"Keys are saved to the current directory, or a specified output directory." - print u"If a file name is passed instead of a directory, only the first key is saved, in that file." - print u"Usage:" - print u" {0:s} [-h] [-k ] []".format(progname) - - -def cli_main(): - sys.stdout=SafeUnbuffered(sys.stdout) - sys.stderr=SafeUnbuffered(sys.stderr) - argv=unicode_argv() - progname = os.path.basename(argv[0]) - print u"{0} v{1}\nCopyright © 2010-2013 some_updates and Apprentice Alf".format(progname,__version__) - - try: - opts, args = getopt.getopt(argv[1:], "hk:") - except getopt.GetoptError, err: - print u"Error in options or arguments: {0}".format(err.args[0]) - usage(progname) - sys.exit(2) - - files = [] - for o, a in opts: - if o == "-h": - usage(progname) - sys.exit(0) - if o == "-k": - files = [a] - - if len(args) > 1: - usage(progname) - sys.exit(2) - - if len(args) == 1: - # save to the specified file or directory - outpath = args[0] - if not os.path.isabs(outpath): - outpath = os.path.abspath(outpath) - else: - # save to the same directory as the script - outpath = os.path.dirname(argv[0]) - - # make sure the outpath is the - outpath = os.path.realpath(os.path.normpath(outpath)) - - if not getkey(outpath, files): - print u"Could not retrieve Kindle for Mac/PC key." - return 0 - - -def gui_main(): - try: - import Tkinter - import Tkconstants - import tkMessageBox - import traceback - except: - return cli_main() - - class ExceptionDialog(Tkinter.Frame): - def __init__(self, root, text): - Tkinter.Frame.__init__(self, root, border=5) - label = Tkinter.Label(self, text=u"Unexpected error:", - anchor=Tkconstants.W, justify=Tkconstants.LEFT) - label.pack(fill=Tkconstants.X, expand=0) - self.text = Tkinter.Text(self) - self.text.pack(fill=Tkconstants.BOTH, expand=1) - - self.text.insert(Tkconstants.END, text) - - - argv=unicode_argv() - root = Tkinter.Tk() - root.withdraw() - progpath, progname = os.path.split(argv[0]) - success = False - try: - keys = kindlekeys() - keycount = 0 - for key in keys: - while True: - keycount += 1 - outfile = os.path.join(progpath,u"kindlekey{0:d}.k4i".format(keycount)) - if not os.path.exists(outfile): - break - - with file(outfile, 'w') as keyfileout: - keyfileout.write(json.dumps(key)) - success = True - tkMessageBox.showinfo(progname, u"Key successfully retrieved to {0}".format(outfile)) - except DrmException, e: - tkMessageBox.showerror(progname, u"Error: {0}".format(str(e))) - except Exception: - root.wm_state('normal') - root.title(progname) - text = traceback.format_exc() - ExceptionDialog(root, text).pack(fill=Tkconstants.BOTH, expand=1) - root.mainloop() - if not success: - return 1 - return 0 + for serialnum in serials: + try: + pidlst.extend(getKindlePids(md1, md2, serialnum)) + except Exception, e: + print u"Error getting PIDs from serial number {0}: {1}".format(serialnum ,e.args[0]) + traceback.print_exc() -if __name__ == '__main__': - if len(sys.argv) > 1: - sys.exit(cli_main()) - sys.exit(gui_main()) + return pidlst diff --git a/DeDRM_calibre_plugin/DeDRM_plugin/kindlekey.py b/DeDRM_calibre_plugin/DeDRM_plugin/kindlekey.py index 8bbcf69..8852769 100644 --- a/DeDRM_calibre_plugin/DeDRM_plugin/kindlekey.py +++ b/DeDRM_calibre_plugin/DeDRM_plugin/kindlekey.py @@ -1,18 +1,40 @@ -#!/usr/bin/python +#!/usr/bin/env python # -*- coding: utf-8 -*- -# Mobipocket PID calculator v0.4 for Amazon Kindle. -# Copyright (c) 2007, 2009 Igor Skochinsky -# History: -# 0.1 Initial release -# 0.2 Added support for generating PID for iPhone (thanks to mbp) -# 0.3 changed to autoflush stdout, fixed return code usage -# 0.3 updated for unicode -# 0.4 Added support for serial numbers starting with '9', fixed unicode bugs. -# 0.5 moved unicode_argv call inside main for Windows DeDRM compatibility +from __future__ import with_statement -import sys -import binascii +# kindlekey.py +# Copyright © 2010-2013 by some_updates and Apprentice Alf +# +# Currently requires alfcrypto.py which requires the alfcrypto library + +# Revision history: +# 1.0 - Kindle info file decryption, extracted from k4mobidedrm, etc. +# 1.1 - Added Tkinter to match adobekey.py +# 1.2 - Fixed testing of successful retrieval on Mac +# 1.3 - Added getkey interface for Windows DeDRM application +# Simplified some of the Kindle for Mac code. +# 1.4 - Remove dependency on alfcrypto +# 1.5 - moved unicode_argv call inside main for Windows DeDRM compatibility +# 1.6 - Fixed a problem getting the disk serial numbers +# 1.7 - Work if TkInter is missing +# 1.8 - Fixes for Kindle for Mac, and non-ascii in Windows user names +# 1.9 - Fixes for Unicode in Windows user names + + +""" +Retrieve Kindle for PC/Mac user key. +""" + +__license__ = 'GPL v3' +__version__ = '1.9' + +import sys, os, re +from struct import pack, unpack, unpack_from +import json +import getopt + +# Routines common to Mac and PC # Wrap a stream so that output gets flushed immediately # and also make sure that any unicode strings get @@ -31,8 +53,11 @@ class SafeUnbuffered: def __getattr__(self, attr): return getattr(self.stream, attr) -iswindows = sys.platform.startswith('win') -isosx = sys.platform.startswith('darwin') +try: + from calibre.constants import iswindows, isosx +except: + iswindows = sys.platform.startswith('win') + isosx = sys.platform.startswith('darwin') def unicode_argv(): if iswindows: @@ -41,8 +66,8 @@ def unicode_argv(): # Versions 2.x of Python don't support Unicode in sys.argv on # Windows, with the underlying Windows API instead replacing multi-byte - # characters with '?'. - + # characters with '?'. So use shell32.GetCommandLineArgvW to get sys.argv + # as a list of Unicode strings and encode them as utf-8 from ctypes import POINTER, byref, cdll, c_int, windll from ctypes.wintypes import LPCWSTR, LPWSTR @@ -65,80 +90,1847 @@ def unicode_argv(): xrange(start, argc.value)] # if we don't have any arguments at all, just pass back script name # this should never happen - return [u"kindlepid.py"] + return [u"kindlekey.py"] else: argvencoding = sys.stdin.encoding if argvencoding == None: argvencoding = "utf-8" return [arg if (type(arg) == unicode) else unicode(arg,argvencoding) for arg in sys.argv] -if sys.hexversion >= 0x3000000: - print 'This script is incompatible with Python 3.x. Please install Python 2.7.x.' - sys.exit(2) +class DrmException(Exception): + pass + +# crypto digestroutines +import hashlib + +def MD5(message): + ctx = hashlib.md5() + ctx.update(message) + return ctx.digest() + +def SHA1(message): + ctx = hashlib.sha1() + ctx.update(message) + return ctx.digest() + +def SHA256(message): + ctx = hashlib.sha256() + ctx.update(message) + return ctx.digest() + +# For K4M/PC 1.6.X and later +# generate table of prime number less than or equal to int n +def primes(n): + if n==2: return [2] + elif n<2: return [] + s=range(3,n+1,2) + mroot = n ** 0.5 + half=(n+1)/2-1 + i=0 + m=3 + while m <= mroot: + if s[i]: + j=(m*m-3)/2 + s[j]=0 + while j 0: # save any bytes that are not block aligned + self.bytesToEncrypt = self.bytesToEncrypt[-numExtraBytes:] + else: + self.bytesToEncrypt = '' + + if more == None: # no more data expected from caller + finalBytes = self.padding.addPad(self.bytesToEncrypt,self.blockSize) + if len(finalBytes) > 0: + ctBlock = self.encryptBlock(finalBytes) + self.encryptBlockCount += 1 + cipherText += ctBlock + self.resetEncrypt() + return cipherText + + def decrypt(self, cipherText, more = None): + """ Decrypt a string and return a string """ + self.bytesToDecrypt += cipherText # append to any bytes from prior decrypt + + numBlocks, numExtraBytes = divmod(len(self.bytesToDecrypt), self.blockSize) + if more == None: # no more calls to decrypt, should have all the data + if numExtraBytes != 0: + raise DecryptNotBlockAlignedError, 'Data not block aligned on decrypt' + + # hold back some bytes in case last decrypt has zero len + if (more != None) and (numExtraBytes == 0) and (numBlocks >0) : + numBlocks -= 1 + numExtraBytes = self.blockSize + + plainText = '' + for i in range(numBlocks): + bStart = i*self.blockSize + ptBlock = self.decryptBlock(self.bytesToDecrypt[bStart : bStart+self.blockSize]) + self.decryptBlockCount += 1 + plainText += ptBlock + + if numExtraBytes > 0: # save any bytes that are not block aligned + self.bytesToEncrypt = self.bytesToEncrypt[-numExtraBytes:] + else: + self.bytesToEncrypt = '' + + if more == None: # last decrypt remove padding + plainText = self.padding.removePad(plainText, self.blockSize) + self.resetDecrypt() + return plainText + + + class Pad: + def __init__(self): + pass # eventually could put in calculation of min and max size extension + + class padWithPadLen(Pad): + """ Pad a binary string with the length of the padding """ + + def addPad(self, extraBytes, blockSize): + """ Add padding to a binary string to make it an even multiple + of the block size """ + blocks, numExtraBytes = divmod(len(extraBytes), blockSize) + padLength = blockSize - numExtraBytes + return extraBytes + padLength*chr(padLength) + + def removePad(self, paddedBinaryString, blockSize): + """ Remove padding from a binary string """ + if not(0 6 and i%Nk == 4 : + temp = [ Sbox[byte] for byte in temp ] # SubWord(temp) + w.append( [ w[i-Nk][byte]^temp[byte] for byte in range(4) ] ) + return w + + Rcon = (0,0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80,0x1b,0x36, # note extra '0' !!! + 0x6c,0xd8,0xab,0x4d,0x9a,0x2f,0x5e,0xbc,0x63,0xc6, + 0x97,0x35,0x6a,0xd4,0xb3,0x7d,0xfa,0xef,0xc5,0x91) + + #------------------------------------- + def AddRoundKey(algInstance, keyBlock): + """ XOR the algorithm state with a block of key material """ + for column in range(algInstance.Nb): + for row in range(4): + algInstance.state[column][row] ^= keyBlock[column][row] + #------------------------------------- + + def SubBytes(algInstance): + for column in range(algInstance.Nb): + for row in range(4): + algInstance.state[column][row] = Sbox[algInstance.state[column][row]] + + def InvSubBytes(algInstance): + for column in range(algInstance.Nb): + for row in range(4): + algInstance.state[column][row] = InvSbox[algInstance.state[column][row]] + + Sbox = (0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5, + 0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76, + 0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0, + 0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0, + 0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc, + 0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15, + 0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a, + 0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75, + 0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0, + 0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84, + 0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b, + 0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf, + 0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85, + 0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8, + 0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5, + 0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2, + 0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17, + 0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73, + 0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88, + 0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb, + 0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c, + 0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79, + 0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9, + 0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08, + 0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6, + 0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a, + 0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e, + 0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e, + 0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94, + 0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf, + 0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68, + 0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16) + + InvSbox = (0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38, + 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb, + 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87, + 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb, + 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d, + 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e, + 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2, + 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25, + 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16, + 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92, + 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda, + 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84, + 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a, + 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06, + 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02, + 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b, + 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea, + 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73, + 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85, + 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e, + 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89, + 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b, + 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20, + 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4, + 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31, + 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f, + 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d, + 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef, + 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0, + 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61, + 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26, + 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d) + + #------------------------------------- + """ For each block size (Nb), the ShiftRow operation shifts row i + by the amount Ci. Note that row 0 is not shifted. + Nb C1 C2 C3 + ------------------- """ + shiftOffset = { 4 : ( 0, 1, 2, 3), + 5 : ( 0, 1, 2, 3), + 6 : ( 0, 1, 2, 3), + 7 : ( 0, 1, 2, 4), + 8 : ( 0, 1, 3, 4) } + def ShiftRows(algInstance): + tmp = [0]*algInstance.Nb # list of size Nb + for r in range(1,4): # row 0 reamains unchanged and can be skipped + for c in range(algInstance.Nb): + tmp[c] = algInstance.state[(c+shiftOffset[algInstance.Nb][r]) % algInstance.Nb][r] + for c in range(algInstance.Nb): + algInstance.state[c][r] = tmp[c] + def InvShiftRows(algInstance): + tmp = [0]*algInstance.Nb # list of size Nb + for r in range(1,4): # row 0 reamains unchanged and can be skipped + for c in range(algInstance.Nb): + tmp[c] = algInstance.state[(c+algInstance.Nb-shiftOffset[algInstance.Nb][r]) % algInstance.Nb][r] + for c in range(algInstance.Nb): + algInstance.state[c][r] = tmp[c] + #------------------------------------- + def MixColumns(a): + Sprime = [0,0,0,0] + for j in range(a.Nb): # for each column + Sprime[0] = mul(2,a.state[j][0])^mul(3,a.state[j][1])^mul(1,a.state[j][2])^mul(1,a.state[j][3]) + Sprime[1] = mul(1,a.state[j][0])^mul(2,a.state[j][1])^mul(3,a.state[j][2])^mul(1,a.state[j][3]) + Sprime[2] = mul(1,a.state[j][0])^mul(1,a.state[j][1])^mul(2,a.state[j][2])^mul(3,a.state[j][3]) + Sprime[3] = mul(3,a.state[j][0])^mul(1,a.state[j][1])^mul(1,a.state[j][2])^mul(2,a.state[j][3]) + for i in range(4): + a.state[j][i] = Sprime[i] + + def InvMixColumns(a): + """ Mix the four bytes of every column in a linear way + This is the opposite operation of Mixcolumn """ + Sprime = [0,0,0,0] + for j in range(a.Nb): # for each column + Sprime[0] = mul(0x0E,a.state[j][0])^mul(0x0B,a.state[j][1])^mul(0x0D,a.state[j][2])^mul(0x09,a.state[j][3]) + Sprime[1] = mul(0x09,a.state[j][0])^mul(0x0E,a.state[j][1])^mul(0x0B,a.state[j][2])^mul(0x0D,a.state[j][3]) + Sprime[2] = mul(0x0D,a.state[j][0])^mul(0x09,a.state[j][1])^mul(0x0E,a.state[j][2])^mul(0x0B,a.state[j][3]) + Sprime[3] = mul(0x0B,a.state[j][0])^mul(0x0D,a.state[j][1])^mul(0x09,a.state[j][2])^mul(0x0E,a.state[j][3]) + for i in range(4): + a.state[j][i] = Sprime[i] + + #------------------------------------- + def mul(a, b): + """ Multiply two elements of GF(2^m) + needed for MixColumn and InvMixColumn """ + if (a !=0 and b!=0): + return Alogtable[(Logtable[a] + Logtable[b])%255] + else: + return 0 + + Logtable = ( 0, 0, 25, 1, 50, 2, 26, 198, 75, 199, 27, 104, 51, 238, 223, 3, + 100, 4, 224, 14, 52, 141, 129, 239, 76, 113, 8, 200, 248, 105, 28, 193, + 125, 194, 29, 181, 249, 185, 39, 106, 77, 228, 166, 114, 154, 201, 9, 120, + 101, 47, 138, 5, 33, 15, 225, 36, 18, 240, 130, 69, 53, 147, 218, 142, + 150, 143, 219, 189, 54, 208, 206, 148, 19, 92, 210, 241, 64, 70, 131, 56, + 102, 221, 253, 48, 191, 6, 139, 98, 179, 37, 226, 152, 34, 136, 145, 16, + 126, 110, 72, 195, 163, 182, 30, 66, 58, 107, 40, 84, 250, 133, 61, 186, + 43, 121, 10, 21, 155, 159, 94, 202, 78, 212, 172, 229, 243, 115, 167, 87, + 175, 88, 168, 80, 244, 234, 214, 116, 79, 174, 233, 213, 231, 230, 173, 232, + 44, 215, 117, 122, 235, 22, 11, 245, 89, 203, 95, 176, 156, 169, 81, 160, + 127, 12, 246, 111, 23, 196, 73, 236, 216, 67, 31, 45, 164, 118, 123, 183, + 204, 187, 62, 90, 251, 96, 177, 134, 59, 82, 161, 108, 170, 85, 41, 157, + 151, 178, 135, 144, 97, 190, 220, 252, 188, 149, 207, 205, 55, 63, 91, 209, + 83, 57, 132, 60, 65, 162, 109, 71, 20, 42, 158, 93, 86, 242, 211, 171, + 68, 17, 146, 217, 35, 32, 46, 137, 180, 124, 184, 38, 119, 153, 227, 165, + 103, 74, 237, 222, 197, 49, 254, 24, 13, 99, 140, 128, 192, 247, 112, 7) + + Alogtable= ( 1, 3, 5, 15, 17, 51, 85, 255, 26, 46, 114, 150, 161, 248, 19, 53, + 95, 225, 56, 72, 216, 115, 149, 164, 247, 2, 6, 10, 30, 34, 102, 170, + 229, 52, 92, 228, 55, 89, 235, 38, 106, 190, 217, 112, 144, 171, 230, 49, + 83, 245, 4, 12, 20, 60, 68, 204, 79, 209, 104, 184, 211, 110, 178, 205, + 76, 212, 103, 169, 224, 59, 77, 215, 98, 166, 241, 8, 24, 40, 120, 136, + 131, 158, 185, 208, 107, 189, 220, 127, 129, 152, 179, 206, 73, 219, 118, 154, + 181, 196, 87, 249, 16, 48, 80, 240, 11, 29, 39, 105, 187, 214, 97, 163, + 254, 25, 43, 125, 135, 146, 173, 236, 47, 113, 147, 174, 233, 32, 96, 160, + 251, 22, 58, 78, 210, 109, 183, 194, 93, 231, 50, 86, 250, 21, 63, 65, + 195, 94, 226, 61, 71, 201, 64, 192, 91, 237, 44, 116, 156, 191, 218, 117, + 159, 186, 213, 100, 172, 239, 42, 126, 130, 157, 188, 223, 122, 142, 137, 128, + 155, 182, 193, 88, 232, 35, 101, 175, 234, 37, 111, 177, 200, 67, 197, 84, + 252, 31, 33, 99, 165, 244, 7, 9, 27, 45, 119, 153, 176, 203, 70, 202, + 69, 207, 74, 222, 121, 139, 134, 145, 168, 227, 62, 66, 198, 81, 243, 14, + 18, 54, 90, 238, 41, 123, 141, 140, 143, 138, 133, 148, 167, 242, 13, 23, + 57, 75, 221, 124, 132, 151, 162, 253, 28, 36, 108, 180, 199, 82, 246, 1) + + + + + """ + AES Encryption Algorithm + The AES algorithm is just Rijndael algorithm restricted to the default + blockSize of 128 bits. + """ + + class AES(Rijndael): + """ The AES algorithm is the Rijndael block cipher restricted to block + sizes of 128 bits and key sizes of 128, 192 or 256 bits + """ + def __init__(self, key = None, padding = padWithPadLen(), keySize=16): + """ Initialize AES, keySize is in bytes """ + if not (keySize == 16 or keySize == 24 or keySize == 32) : + raise BadKeySizeError, 'Illegal AES key size, must be 16, 24, or 32 bytes' + + Rijndael.__init__( self, key, padding=padding, keySize=keySize, blockSize=16 ) + + self.name = 'AES' + + + """ + CBC mode of encryption for block ciphers. + This algorithm mode wraps any BlockCipher to make a + Cipher Block Chaining mode. + """ + from random import Random # should change to crypto.random!!! + + + class CBC(BlockCipher): + """ The CBC class wraps block ciphers to make cipher block chaining (CBC) mode + algorithms. The initialization (IV) is automatic if set to None. Padding + is also automatic based on the Pad class used to initialize the algorithm + """ + def __init__(self, blockCipherInstance, padding = padWithPadLen()): + """ CBC algorithms are created by initializing with a BlockCipher instance """ + self.baseCipher = blockCipherInstance + self.name = self.baseCipher.name + '_CBC' + self.blockSize = self.baseCipher.blockSize + self.keySize = self.baseCipher.keySize + self.padding = padding + self.baseCipher.padding = noPadding() # baseCipher should NOT pad!! + self.r = Random() # for IV generation, currently uses + # mediocre standard distro version <---------------- + import time + newSeed = time.ctime()+str(self.r) # seed with instance location + self.r.seed(newSeed) # to make unique + self.reset() + + def setKey(self, key): + self.baseCipher.setKey(key) + + # Overload to reset both CBC state and the wrapped baseCipher + def resetEncrypt(self): + BlockCipher.resetEncrypt(self) # reset CBC encrypt state (super class) + self.baseCipher.resetEncrypt() # reset base cipher encrypt state + + def resetDecrypt(self): + BlockCipher.resetDecrypt(self) # reset CBC state (super class) + self.baseCipher.resetDecrypt() # reset base cipher decrypt state + + def encrypt(self, plainText, iv=None, more=None): + """ CBC encryption - overloads baseCipher to allow optional explicit IV + when iv=None, iv is auto generated! + """ + if self.encryptBlockCount == 0: + self.iv = iv + else: + assert(iv==None), 'IV used only on first call to encrypt' + + return BlockCipher.encrypt(self,plainText, more=more) + + def decrypt(self, cipherText, iv=None, more=None): + """ CBC decryption - overloads baseCipher to allow optional explicit IV + when iv=None, iv is auto generated! + """ + if self.decryptBlockCount == 0: + self.iv = iv + else: + assert(iv==None), 'IV used only on first call to decrypt' + + return BlockCipher.decrypt(self, cipherText, more=more) + + def encryptBlock(self, plainTextBlock): + """ CBC block encryption, IV is set with 'encrypt' """ + auto_IV = '' + if self.encryptBlockCount == 0: + if self.iv == None: + # generate IV and use + self.iv = ''.join([chr(self.r.randrange(256)) for i in range(self.blockSize)]) + self.prior_encr_CT_block = self.iv + auto_IV = self.prior_encr_CT_block # prepend IV if it's automatic + else: # application provided IV + assert(len(self.iv) == self.blockSize ),'IV must be same length as block' + self.prior_encr_CT_block = self.iv + """ encrypt the prior CT XORed with the PT """ + ct = self.baseCipher.encryptBlock( xor(self.prior_encr_CT_block, plainTextBlock) ) + self.prior_encr_CT_block = ct + return auto_IV+ct + + def decryptBlock(self, encryptedBlock): + """ Decrypt a single block """ + + if self.decryptBlockCount == 0: # first call, process IV + if self.iv == None: # auto decrypt IV? + self.prior_CT_block = encryptedBlock + return '' + else: + assert(len(self.iv)==self.blockSize),"Bad IV size on CBC decryption" + self.prior_CT_block = self.iv + + dct = self.baseCipher.decryptBlock(encryptedBlock) + """ XOR the prior decrypted CT with the prior CT """ + dct_XOR_priorCT = xor( self.prior_CT_block, dct ) + + self.prior_CT_block = encryptedBlock + + return dct_XOR_priorCT + + + """ + AES_CBC Encryption Algorithm + """ + + class aescbc_AES_CBC(CBC): + """ AES encryption in CBC feedback mode """ + def __init__(self, key=None, padding=padWithPadLen(), keySize=16): + CBC.__init__( self, AES(key, noPadding(), keySize), padding) + self.name = 'AES_CBC' + + class AES_CBC(object): + def __init__(self): + self._key = None + self._iv = None + self.aes = None + + def set_decrypt_key(self, userkey, iv): + self._key = userkey + self._iv = iv + self.aes = aescbc_AES_CBC(userkey, noPadding(), len(userkey)) + + def decrypt(self, data): + iv = self._iv + cleartext = self.aes.decrypt(iv + data) + return cleartext + + import hmac + + class KeyIVGen(object): + # this only exists in openssl so we will use pure python implementation instead + # PKCS5_PBKDF2_HMAC_SHA1 = F(c_int, 'PKCS5_PBKDF2_HMAC_SHA1', + # [c_char_p, c_ulong, c_char_p, c_ulong, c_ulong, c_ulong, c_char_p]) + def pbkdf2(self, passwd, salt, iter, keylen): + + def xorstr( a, b ): + if len(a) != len(b): + raise Exception("xorstr(): lengths differ") + return ''.join((chr(ord(x)^ord(y)) for x, y in zip(a, b))) + + def prf( h, data ): + hm = h.copy() + hm.update( data ) + return hm.digest() + + def pbkdf2_F( h, salt, itercount, blocknum ): + U = prf( h, salt + pack('>i',blocknum ) ) + T = U + for i in range(2, itercount+1): + U = prf( h, U ) + T = xorstr( T, U ) + return T + + sha = hashlib.sha1 + digest_size = sha().digest_size + # l - number of output blocks to produce + l = keylen / digest_size + if keylen % digest_size != 0: + l += 1 + h = hmac.new( passwd, None, sha ) + T = "" + for i in range(1, l+1): + T += pbkdf2_F( h, salt, iter, i ) + return T[0: keylen] + + def UnprotectHeaderData(encryptedData): + passwdData = 'header_key_data' + salt = 'HEADER.2011' + iter = 0x80 + keylen = 0x100 + key_iv = KeyIVGen().pbkdf2(passwdData, salt, iter, keylen) + key = key_iv[0:32] + iv = key_iv[32:48] + aes=AES_CBC() + aes.set_decrypt_key(key, iv) + cleartext = aes.decrypt(encryptedData) + return cleartext + + # Various character maps used to decrypt kindle info values. + # Probably supposed to act as obfuscation + charMap2 = "AaZzB0bYyCc1XxDdW2wEeVv3FfUuG4g-TtHh5SsIiR6rJjQq7KkPpL8lOoMm9Nn_" + charMap5 = "AzB0bYyCeVvaZ3FfUuG4g-TtHh5SsIiR6rJjQq7KkPpL8lOoMm9Nn_c1XxDdW2wE" + # New maps in K4PC 1.9.0 + testMap1 = "n5Pr6St7Uv8Wx9YzAb0Cd1Ef2Gh3Jk4M" + testMap6 = "9YzAb0Cd1Ef2n5Pr6St7Uvh3Jk4M8WxG" + testMap8 = "YvaZ3FfUm9Nn_c1XuG4yCAzB0beVg-TtHh5SsIiR6rJjQdW2wEq7KkPpL8lOoMxD" + + # interface with Windows OS Routines + class DataBlob(Structure): + _fields_ = [('cbData', c_uint), + ('pbData', c_void_p)] + DataBlob_p = POINTER(DataBlob) + + + def GetSystemDirectory(): + GetSystemDirectoryW = kernel32.GetSystemDirectoryW + GetSystemDirectoryW.argtypes = [c_wchar_p, c_uint] + GetSystemDirectoryW.restype = c_uint + def GetSystemDirectory(): + buffer = create_unicode_buffer(MAX_PATH + 1) + GetSystemDirectoryW(buffer, len(buffer)) + return buffer.value + return GetSystemDirectory + GetSystemDirectory = GetSystemDirectory() + + def GetVolumeSerialNumber(): + GetVolumeInformationW = kernel32.GetVolumeInformationW + GetVolumeInformationW.argtypes = [c_wchar_p, c_wchar_p, c_uint, + POINTER(c_uint), POINTER(c_uint), + POINTER(c_uint), c_wchar_p, c_uint] + GetVolumeInformationW.restype = c_uint + def GetVolumeSerialNumber(path = GetSystemDirectory().split('\\')[0] + '\\'): + vsn = c_uint(0) + GetVolumeInformationW(path, None, 0, byref(vsn), None, None, None, 0) + return str(vsn.value) + return GetVolumeSerialNumber + GetVolumeSerialNumber = GetVolumeSerialNumber() + + def GetIDString(): + vsn = GetVolumeSerialNumber() + #print('Using Volume Serial Number for ID: '+vsn) + return vsn + + def getLastError(): + GetLastError = kernel32.GetLastError + GetLastError.argtypes = None + GetLastError.restype = c_uint + def getLastError(): + return GetLastError() + return getLastError + getLastError = getLastError() + + def GetUserName(): + GetUserNameW = advapi32.GetUserNameW + GetUserNameW.argtypes = [c_wchar_p, POINTER(c_uint)] + GetUserNameW.restype = c_uint + def GetUserName(): + buffer = create_unicode_buffer(2) + size = c_uint(len(buffer)) + while not GetUserNameW(buffer, byref(size)): + errcd = getLastError() + if errcd == 234: + # bad wine implementation up through wine 1.3.21 + return "AlternateUserName" + buffer = create_unicode_buffer(len(buffer) * 2) + size.value = len(buffer) + return buffer.value.encode('utf-16-le')[::2] + return GetUserName + GetUserName = GetUserName() + + def CryptUnprotectData(): + _CryptUnprotectData = crypt32.CryptUnprotectData + _CryptUnprotectData.argtypes = [DataBlob_p, c_wchar_p, DataBlob_p, + c_void_p, c_void_p, c_uint, DataBlob_p] + _CryptUnprotectData.restype = c_uint + def CryptUnprotectData(indata, entropy, flags): + indatab = create_string_buffer(indata) + indata = DataBlob(len(indata), cast(indatab, c_void_p)) + entropyb = create_string_buffer(entropy) + entropy = DataBlob(len(entropy), cast(entropyb, c_void_p)) + outdata = DataBlob() + if not _CryptUnprotectData(byref(indata), None, byref(entropy), + None, None, flags, byref(outdata)): + # raise DrmException("Failed to Unprotect Data") + return 'failed' + return string_at(outdata.pbData, outdata.cbData) + return CryptUnprotectData + CryptUnprotectData = CryptUnprotectData() + + # Returns Environmental Variables that contain unicode + def getEnvironmentVariable(name): + import ctypes + name = unicode(name) # make sure string argument is unicode + n = ctypes.windll.kernel32.GetEnvironmentVariableW(name, None, 0) + if n == 0: + return None + buf = ctypes.create_unicode_buffer(u'\0'*n) + ctypes.windll.kernel32.GetEnvironmentVariableW(name, buf, n) + return buf.value + + # Locate all of the kindle-info style files and return as list + def getKindleInfoFiles(): + kInfoFiles = [] + # some 64 bit machines do not have the proper registry key for some reason + # or the python interface to the 32 vs 64 bit registry is broken + path = "" + if 'LOCALAPPDATA' in os.environ.keys(): + # Python 2.x does not return unicode env. Use Python 3.x + path = winreg.ExpandEnvironmentStrings(u"%LOCALAPPDATA%") + # this is just another alternative. + # path = getEnvironmentVariable('LOCALAPPDATA') + if not os.path.isdir(path): + path = "" + else: + # User Shell Folders show take precedent over Shell Folders if present + try: + # this will still break + regkey = winreg.OpenKey(winreg.HKEY_CURRENT_USER, "Software\\Microsoft\\Windows\\CurrentVersion\\Explorer\\User Shell Folders\\") + path = winreg.QueryValueEx(regkey, 'Local AppData')[0] + if not os.path.isdir(path): + path = "" + try: + regkey = winreg.OpenKey(winreg.HKEY_CURRENT_USER, "Software\\Microsoft\\Windows\\CurrentVersion\\Explorer\\Shell Folders\\") + path = winreg.QueryValueEx(regkey, 'Local AppData')[0] + if not os.path.isdir(path): + path = "" + except RegError: + pass + except RegError: + pass + + found = False + if path == "": + print ('Could not find the folder in which to look for kinfoFiles.') + else: + # Probably not the best. To Fix (shouldn't ignore in encoding) or use utf-8 + print(u'searching for kinfoFiles in ' + path.encode('ascii', 'ignore')) + + # look for (K4PC 1.9.0 and later) .kinf2011 file + kinfopath = path +'\\Amazon\\Kindle\\storage\\.kinf2011' + if os.path.isfile(kinfopath): + found = True + print('Found K4PC 1.9+ kinf2011 file: ' + kinfopath.encode('ascii','ignore')) + kInfoFiles.append(kinfopath) + + # look for (K4PC 1.6.0 and later) rainier.2.1.1.kinf file + kinfopath = path +'\\Amazon\\Kindle\\storage\\rainier.2.1.1.kinf' + if os.path.isfile(kinfopath): + found = True + print('Found K4PC 1.6-1.8 kinf file: ' + kinfopath) + kInfoFiles.append(kinfopath) + + # look for (K4PC 1.5.0 and later) rainier.2.1.1.kinf file + kinfopath = path +'\\Amazon\\Kindle For PC\\storage\\rainier.2.1.1.kinf' + if os.path.isfile(kinfopath): + found = True + print('Found K4PC 1.5 kinf file: ' + kinfopath) + kInfoFiles.append(kinfopath) -letters = 'ABCDEFGHIJKLMNPQRSTUVWXYZ123456789' + # look for original (earlier than K4PC 1.5.0) kindle-info files + kinfopath = path +'\\Amazon\\Kindle For PC\\{AMAwzsaPaaZAzmZzZQzgZCAkZ3AjA_AY}\\kindle.info' + if os.path.isfile(kinfopath): + found = True + print('Found K4PC kindle.info file: ' + kinfopath) + kInfoFiles.append(kinfopath) -def crc32(s): - return (~binascii.crc32(s,-1))&0xFFFFFFFF + if not found: + print('No K4PC kindle.info/kinf/kinf2011 files have been found.') + return kInfoFiles -def checksumPid(s): - crc = crc32(s) - crc = crc ^ (crc >> 16) - res = s - l = len(letters) - for i in (0,1): - b = crc & 0xff - pos = (b // l) ^ (b % l) - res += letters[pos%l] - crc >>= 8 - return res + # determine type of kindle info provided and return a + # database of keynames and values + def getDBfromFile(kInfoFile): + names = ['kindle.account.tokens','kindle.cookie.item','eulaVersionAccepted','login_date','kindle.token.item','login','kindle.key.item','kindle.name.info','kindle.device.info', 'MazamaRandomNumber', 'max_date', 'SIGVERIF'] + DB = {} + with open(kInfoFile, 'rb') as infoReader: + hdr = infoReader.read(1) + data = infoReader.read() -def pidFromSerial(s, l): - crc = crc32(s) + if data.find('{') != -1 : + # older style kindle-info file + items = data.split('{') + for item in items: + if item != '': + keyhash, rawdata = item.split(':') + keyname = "unknown" + for name in names: + if encodeHash(name,charMap2) == keyhash: + keyname = name + break + if keyname == "unknown": + keyname = keyhash + encryptedValue = decode(rawdata,charMap2) + DB[keyname] = CryptUnprotectData(encryptedValue, "", 0) + elif hdr == '/': + # else rainier-2-1-1 .kinf file + # the .kinf file uses "/" to separate it into records + # so remove the trailing "/" to make it easy to use split + data = data[:-1] + items = data.split('/') - arr1 = [0]*l - for i in xrange(len(s)): - arr1[i%l] ^= ord(s[i]) + # loop through the item records until all are processed + while len(items) > 0: - crc_bytes = [crc >> 24 & 0xff, crc >> 16 & 0xff, crc >> 8 & 0xff, crc & 0xff] - for i in xrange(l): - arr1[i] ^= crc_bytes[i&3] + # get the first item record + item = items.pop(0) - pid = '' - for i in xrange(l): - b = arr1[i] & 0xff - pid+=letters[(b >> 7) + ((b >> 5 & 3) ^ (b & 0x1f))] + # the first 32 chars of the first record of a group + # is the MD5 hash of the key name encoded by charMap5 + keyhash = item[0:32] + + # the raw keyhash string is used to create entropy for the actual + # CryptProtectData Blob that represents that keys contents + entropy = SHA1(keyhash) + + # the remainder of the first record when decoded with charMap5 + # has the ':' split char followed by the string representation + # of the number of records that follow + # and make up the contents + srcnt = decode(item[34:],charMap5) + rcnt = int(srcnt) + + # read and store in rcnt records of data + # that make up the contents value + edlst = [] + for i in xrange(rcnt): + item = items.pop(0) + edlst.append(item) + + keyname = "unknown" + for name in names: + if encodeHash(name,charMap5) == keyhash: + keyname = name + break + if keyname == "unknown": + keyname = keyhash + # the charMap5 encoded contents data has had a length + # of chars (always odd) cut off of the front and moved + # to the end to prevent decoding using charMap5 from + # working properly, and thereby preventing the ensuing + # CryptUnprotectData call from succeeding. + + # The offset into the charMap5 encoded contents seems to be: + # len(contents)-largest prime number <= int(len(content)/3) + # (in other words split "about" 2/3rds of the way through) + + # move first offsets chars to end to align for decode by charMap5 + encdata = "".join(edlst) + contlen = len(encdata) + noffset = contlen - primes(int(contlen/3))[-1] + + # now properly split and recombine + # by moving noffset chars from the start of the + # string to the end of the string + pfx = encdata[0:noffset] + encdata = encdata[noffset:] + encdata = encdata + pfx + + # decode using Map5 to get the CryptProtect Data + encryptedValue = decode(encdata,charMap5) + DB[keyname] = CryptUnprotectData(encryptedValue, entropy, 1) + else: + # else newest .kinf2011 style .kinf file + # the .kinf file uses "/" to separate it into records + # so remove the trailing "/" to make it easy to use split + # need to put back the first char read because it it part + # of the added entropy blob + data = hdr + data[:-1] + items = data.split('/') + + # starts with and encoded and encrypted header blob + headerblob = items.pop(0) + encryptedValue = decode(headerblob, testMap1) + cleartext = UnprotectHeaderData(encryptedValue) + # now extract the pieces that form the added entropy + pattern = re.compile(r'''\[Version:(\d+)\]\[Build:(\d+)\]\[Cksum:([^\]]+)\]\[Guid:([\{\}a-z0-9\-]+)\]''', re.IGNORECASE) + for m in re.finditer(pattern, cleartext): + added_entropy = m.group(2) + m.group(4) + + + # loop through the item records until all are processed + while len(items) > 0: + + # get the first item record + item = items.pop(0) + + # the first 32 chars of the first record of a group + # is the MD5 hash of the key name encoded by charMap5 + keyhash = item[0:32] + + # the sha1 of raw keyhash string is used to create entropy along + # with the added entropy provided above from the headerblob + entropy = SHA1(keyhash) + added_entropy + + # the remainder of the first record when decoded with charMap5 + # has the ':' split char followed by the string representation + # of the number of records that follow + # and make up the contents + srcnt = decode(item[34:],charMap5) + rcnt = int(srcnt) + + # read and store in rcnt records of data + # that make up the contents value + edlst = [] + for i in xrange(rcnt): + item = items.pop(0) + edlst.append(item) + + # key names now use the new testMap8 encoding + keyname = "unknown" + for name in names: + if encodeHash(name,testMap8) == keyhash: + keyname = name + break + + # the testMap8 encoded contents data has had a length + # of chars (always odd) cut off of the front and moved + # to the end to prevent decoding using testMap8 from + # working properly, and thereby preventing the ensuing + # CryptUnprotectData call from succeeding. + + # The offset into the testMap8 encoded contents seems to be: + # len(contents)-largest prime number <= int(len(content)/3) + # (in other words split "about" 2/3rds of the way through) + + # move first offsets chars to end to align for decode by testMap8 + # by moving noffset chars from the start of the + # string to the end of the string + encdata = "".join(edlst) + contlen = len(encdata) + noffset = contlen - primes(int(contlen/3))[-1] + pfx = encdata[0:noffset] + encdata = encdata[noffset:] + encdata = encdata + pfx + + # decode using new testMap8 to get the original CryptProtect Data + encryptedValue = decode(encdata,testMap8) + cleartext = CryptUnprotectData(encryptedValue, entropy, 1) + DB[keyname] = cleartext + + if 'kindle.account.tokens' in DB: + print u"Decrypted key file using IDString '{0:s}' and UserName '{1:s}'".format(GetIDString(), GetUserName().decode("latin-1")) + # store values used in decryption + DB['IDString'] = GetIDString() + DB['UserName'] = GetUserName() + else: + DB = {} + return DB +elif isosx: + import copy + import subprocess + + # interface to needed routines in openssl's libcrypto + def _load_crypto_libcrypto(): + from ctypes import CDLL, byref, POINTER, c_void_p, c_char_p, c_int, c_long, \ + Structure, c_ulong, create_string_buffer, addressof, string_at, cast + from ctypes.util import find_library + + libcrypto = find_library('crypto') + if libcrypto is None: + raise DrmException(u"libcrypto not found") + libcrypto = CDLL(libcrypto) + + # From OpenSSL's crypto aes header + # + # AES_ENCRYPT 1 + # AES_DECRYPT 0 + # AES_MAXNR 14 (in bytes) + # AES_BLOCK_SIZE 16 (in bytes) + # + # struct aes_key_st { + # unsigned long rd_key[4 *(AES_MAXNR + 1)]; + # int rounds; + # }; + # typedef struct aes_key_st AES_KEY; + # + # int AES_set_decrypt_key(const unsigned char *userKey, const int bits, AES_KEY *key); + # + # note: the ivec string, and output buffer are both mutable + # void AES_cbc_encrypt(const unsigned char *in, unsigned char *out, + # const unsigned long length, const AES_KEY *key, unsigned char *ivec, const int enc); + + AES_MAXNR = 14 + c_char_pp = POINTER(c_char_p) + c_int_p = POINTER(c_int) + + class AES_KEY(Structure): + _fields_ = [('rd_key', c_long * (4 * (AES_MAXNR + 1))), ('rounds', c_int)] + AES_KEY_p = POINTER(AES_KEY) + + def F(restype, name, argtypes): + func = getattr(libcrypto, name) + func.restype = restype + func.argtypes = argtypes + return func + + AES_cbc_encrypt = F(None, 'AES_cbc_encrypt',[c_char_p, c_char_p, c_ulong, AES_KEY_p, c_char_p,c_int]) + + AES_set_decrypt_key = F(c_int, 'AES_set_decrypt_key',[c_char_p, c_int, AES_KEY_p]) + + # From OpenSSL's Crypto evp/p5_crpt2.c + # + # int PKCS5_PBKDF2_HMAC_SHA1(const char *pass, int passlen, + # const unsigned char *salt, int saltlen, int iter, + # int keylen, unsigned char *out); + + PKCS5_PBKDF2_HMAC_SHA1 = F(c_int, 'PKCS5_PBKDF2_HMAC_SHA1', + [c_char_p, c_ulong, c_char_p, c_ulong, c_ulong, c_ulong, c_char_p]) + + class LibCrypto(object): + def __init__(self): + self._blocksize = 0 + self._keyctx = None + self._iv = 0 + + def set_decrypt_key(self, userkey, iv): + self._blocksize = len(userkey) + if (self._blocksize != 16) and (self._blocksize != 24) and (self._blocksize != 32) : + raise DrmException(u"AES improper key used") + return + keyctx = self._keyctx = AES_KEY() + self._iv = iv + self._userkey = userkey + rv = AES_set_decrypt_key(userkey, len(userkey) * 8, keyctx) + if rv < 0: + raise DrmException(u"Failed to initialize AES key") + + def decrypt(self, data): + out = create_string_buffer(len(data)) + mutable_iv = create_string_buffer(self._iv, len(self._iv)) + keyctx = self._keyctx + rv = AES_cbc_encrypt(data, out, len(data), keyctx, mutable_iv, 0) + if rv == 0: + raise DrmException(u"AES decryption failed") + return out.raw + + def keyivgen(self, passwd, salt, iter, keylen): + saltlen = len(salt) + passlen = len(passwd) + out = create_string_buffer(keylen) + rv = PKCS5_PBKDF2_HMAC_SHA1(passwd, passlen, salt, saltlen, iter, keylen, out) + return out.raw + return LibCrypto + + def _load_crypto(): + LibCrypto = None + try: + LibCrypto = _load_crypto_libcrypto() + except (ImportError, DrmException): + pass + return LibCrypto + + LibCrypto = _load_crypto() + + # Various character maps used to decrypt books. Probably supposed to act as obfuscation + charMap1 = 'n5Pr6St7Uv8Wx9YzAb0Cd1Ef2Gh3Jk4M' + charMap2 = 'ZB0bYyc1xDdW2wEV3Ff7KkPpL8UuGA4gz-Tme9Nn_tHh5SvXCsIiR6rJjQaqlOoM' + + # For kinf approach of K4Mac 1.6.X or later + # On K4PC charMap5 = 'AzB0bYyCeVvaZ3FfUuG4g-TtHh5SsIiR6rJjQq7KkPpL8lOoMm9Nn_c1XxDdW2wE' + # For Mac they seem to re-use charMap2 here + charMap5 = charMap2 + + # new in K4M 1.9.X + testMap8 = 'YvaZ3FfUm9Nn_c1XuG4yCAzB0beVg-TtHh5SsIiR6rJjQdW2wEq7KkPpL8lOoMxD' + + # uses a sub process to get the Hard Drive Serial Number using ioreg + # returns serial numbers of all internal hard drive drives + def GetVolumesSerialNumbers(): + sernums = [] + sernum = os.getenv('MYSERIALNUMBER') + if sernum != None: + sernums.append(sernum.strip()) + cmdline = '/usr/sbin/ioreg -w 0 -r -c AppleAHCIDiskDriver' + cmdline = cmdline.encode(sys.getfilesystemencoding()) + p = subprocess.Popen(cmdline, shell=True, stdin=None, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=False) + out1, out2 = p.communicate() + reslst = out1.split('\n') + cnt = len(reslst) + bsdname = None + sernum = None + foundIt = False + for j in xrange(cnt): + resline = reslst[j] + pp = resline.find('\"Serial Number\" = \"') + if pp >= 0: + sernum = resline[pp+19:-1] + sernums.append(sernum.strip()) + return sernums + + def GetUserHomeAppSupKindleDirParitionName(): + home = os.getenv('HOME') + dpath = home + '/Library' + cmdline = '/sbin/mount' + cmdline = cmdline.encode(sys.getfilesystemencoding()) + p = subprocess.Popen(cmdline, shell=True, stdin=None, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=False) + out1, out2 = p.communicate() + reslst = out1.split('\n') + cnt = len(reslst) + disk = '' + foundIt = False + for j in xrange(cnt): + resline = reslst[j] + if resline.startswith('/dev'): + (devpart, mpath) = resline.split(' on ') + dpart = devpart[5:] + pp = mpath.find('(') + if pp >= 0: + mpath = mpath[:pp-1] + if dpath.startswith(mpath): + disk = dpart + return disk + + # uses a sub process to get the UUID of the specified disk partition using ioreg + def GetDiskPartitionUUIDs(diskpart): + uuids = [] + uuidnum = os.getenv('MYUUIDNUMBER') + if uuidnum != None: + uuids.append(strip(uuidnum)) + cmdline = '/usr/sbin/ioreg -l -S -w 0 -r -c AppleAHCIDiskDriver' + cmdline = cmdline.encode(sys.getfilesystemencoding()) + p = subprocess.Popen(cmdline, shell=True, stdin=None, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=False) + out1, out2 = p.communicate() + reslst = out1.split('\n') + cnt = len(reslst) + bsdname = None + uuidnum = None + foundIt = False + nest = 0 + uuidnest = -1 + partnest = -2 + for j in xrange(cnt): + resline = reslst[j] + if resline.find('{') >= 0: + nest += 1 + if resline.find('}') >= 0: + nest -= 1 + pp = resline.find('\"UUID\" = \"') + if pp >= 0: + uuidnum = resline[pp+10:-1] + uuidnum = uuidnum.strip() + uuidnest = nest + if partnest == uuidnest and uuidnest > 0: + foundIt = True + break + bb = resline.find('\"BSD Name\" = \"') + if bb >= 0: + bsdname = resline[bb+14:-1] + bsdname = bsdname.strip() + if (bsdname == diskpart): + partnest = nest + else : + partnest = -2 + if partnest == uuidnest and partnest > 0: + foundIt = True + break + if nest == 0: + partnest = -2 + uuidnest = -1 + uuidnum = None + bsdname = None + if foundIt: + uuids.append(uuidnum) + return uuids + + def GetMACAddressesMunged(): + macnums = [] + macnum = os.getenv('MYMACNUM') + if macnum != None: + macnums.append(macnum) + cmdline = '/sbin/ifconfig en0' + cmdline = cmdline.encode(sys.getfilesystemencoding()) + p = subprocess.Popen(cmdline, shell=True, stdin=None, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=False) + out1, out2 = p.communicate() + reslst = out1.split('\n') + cnt = len(reslst) + macnum = None + foundIt = False + for j in xrange(cnt): + resline = reslst[j] + pp = resline.find('ether ') + if pp >= 0: + macnum = resline[pp+6:-1] + macnum = macnum.strip() + # print 'original mac', macnum + # now munge it up the way Kindle app does + # by xoring it with 0xa5 and swapping elements 3 and 4 + maclst = macnum.split(':') + n = len(maclst) + if n != 6: + fountIt = False + break + for i in range(6): + maclst[i] = int('0x' + maclst[i], 0) + mlst = [0x00, 0x00, 0x00, 0x00, 0x00, 0x00] + mlst[5] = maclst[5] ^ 0xa5 + mlst[4] = maclst[3] ^ 0xa5 + mlst[3] = maclst[4] ^ 0xa5 + mlst[2] = maclst[2] ^ 0xa5 + mlst[1] = maclst[1] ^ 0xa5 + mlst[0] = maclst[0] ^ 0xa5 + macnum = '%0.2x%0.2x%0.2x%0.2x%0.2x%0.2x' % (mlst[0], mlst[1], mlst[2], mlst[3], mlst[4], mlst[5]) + foundIt = True + break + if foundIt: + macnums.append(macnum) + return macnums + + + # uses unix env to get username instead of using sysctlbyname + def GetUserName(): + username = os.getenv('USER') + return username + + def GetIDStrings(): + # Return all possible ID Strings + strings = [] + strings.extend(GetMACAddressesMunged()) + strings.extend(GetVolumesSerialNumbers()) + diskpart = GetUserHomeAppSupKindleDirParitionName() + strings.extend(GetDiskPartitionUUIDs(diskpart)) + strings.append('9999999999') + #print strings + return strings + + + # implements an Pseudo Mac Version of Windows built-in Crypto routine + # used by Kindle for Mac versions < 1.6.0 + class CryptUnprotectData(object): + def __init__(self, IDString): + sp = IDString + '!@#' + GetUserName() + passwdData = encode(SHA256(sp),charMap1) + salt = '16743' + self.crp = LibCrypto() + iter = 0x3e8 + keylen = 0x80 + key_iv = self.crp.keyivgen(passwdData, salt, iter, keylen) + self.key = key_iv[0:32] + self.iv = key_iv[32:48] + self.crp.set_decrypt_key(self.key, self.iv) + + def decrypt(self, encryptedData): + cleartext = self.crp.decrypt(encryptedData) + cleartext = decode(cleartext,charMap1) + return cleartext + + + # implements an Pseudo Mac Version of Windows built-in Crypto routine + # used for Kindle for Mac Versions >= 1.6.0 + class CryptUnprotectDataV2(object): + def __init__(self, IDString): + sp = GetUserName() + ':&%:' + IDString + passwdData = encode(SHA256(sp),charMap5) + # salt generation as per the code + salt = 0x0512981d * 2 * 1 * 1 + salt = str(salt) + GetUserName() + salt = encode(salt,charMap5) + self.crp = LibCrypto() + iter = 0x800 + keylen = 0x400 + key_iv = self.crp.keyivgen(passwdData, salt, iter, keylen) + self.key = key_iv[0:32] + self.iv = key_iv[32:48] + self.crp.set_decrypt_key(self.key, self.iv) + + def decrypt(self, encryptedData): + cleartext = self.crp.decrypt(encryptedData) + cleartext = decode(cleartext, charMap5) + return cleartext + + + # unprotect the new header blob in .kinf2011 + # used in Kindle for Mac Version >= 1.9.0 + def UnprotectHeaderData(encryptedData): + passwdData = 'header_key_data' + salt = 'HEADER.2011' + iter = 0x80 + keylen = 0x100 + crp = LibCrypto() + key_iv = crp.keyivgen(passwdData, salt, iter, keylen) + key = key_iv[0:32] + iv = key_iv[32:48] + crp.set_decrypt_key(key,iv) + cleartext = crp.decrypt(encryptedData) + return cleartext + + + # implements an Pseudo Mac Version of Windows built-in Crypto routine + # used for Kindle for Mac Versions >= 1.9.0 + class CryptUnprotectDataV3(object): + def __init__(self, entropy, IDString): + sp = GetUserName() + '+@#$%+' + IDString + passwdData = encode(SHA256(sp),charMap2) + salt = entropy + self.crp = LibCrypto() + iter = 0x800 + keylen = 0x400 + key_iv = self.crp.keyivgen(passwdData, salt, iter, keylen) + self.key = key_iv[0:32] + self.iv = key_iv[32:48] + self.crp.set_decrypt_key(self.key, self.iv) + + def decrypt(self, encryptedData): + cleartext = self.crp.decrypt(encryptedData) + cleartext = decode(cleartext, charMap2) + return cleartext + + + # Locate the .kindle-info files + def getKindleInfoFiles(): + # file searches can take a long time on some systems, so just look in known specific places. + kInfoFiles=[] + found = False + home = os.getenv('HOME') + # check for .kinf2011 file in new location (App Store Kindle for Mac) + testpath = home + '/Library/Containers/com.amazon.Kindle/Data/Library/Application Support/Kindle/storage/.kinf2011' + if os.path.isfile(testpath): + kInfoFiles.append(testpath) + print('Found k4Mac kinf2011 file: ' + testpath) + found = True + # check for .kinf2011 files from 1.10 + testpath = home + '/Library/Application Support/Kindle/storage/.kinf2011' + if os.path.isfile(testpath): + kInfoFiles.append(testpath) + print('Found k4Mac kinf2011 file: ' + testpath) + found = True + # check for .rainier-2.1.1-kinf files from 1.6 + testpath = home + '/Library/Application Support/Kindle/storage/.rainier-2.1.1-kinf' + if os.path.isfile(testpath): + kInfoFiles.append(testpath) + print('Found k4Mac rainier file: ' + testpath) + found = True + # check for .kindle-info files from 1.4 + testpath = home + '/Library/Application Support/Kindle/storage/.kindle-info' + if os.path.isfile(testpath): + kInfoFiles.append(testpath) + print('Found k4Mac kindle-info file: ' + testpath) + found = True + # check for .kindle-info file from 1.2.2 + testpath = home + '/Library/Application Support/Amazon/Kindle/storage/.kindle-info' + if os.path.isfile(testpath): + kInfoFiles.append(testpath) + print('Found k4Mac kindle-info file: ' + testpath) + found = True + # check for .kindle-info file from 1.0 beta 1 (27214) + testpath = home + '/Library/Application Support/Amazon/Kindle for Mac/storage/.kindle-info' + if os.path.isfile(testpath): + kInfoFiles.append(testpath) + print('Found k4Mac kindle-info file: ' + testpath) + found = True + if not found: + print('No k4Mac kindle-info/rainier/kinf2011 files have been found.') + return kInfoFiles + + # determine type of kindle info provided and return a + # database of keynames and values + def getDBfromFile(kInfoFile): + names = ['kindle.account.tokens','kindle.cookie.item','eulaVersionAccepted','login_date','kindle.token.item','login','kindle.key.item','kindle.name.info','kindle.device.info', 'MazamaRandomNumber', 'max_date', 'SIGVERIF'] + with open(kInfoFile, 'rb') as infoReader: + filehdr = infoReader.read(1) + filedata = infoReader.read() + + IDStrings = GetIDStrings() + for IDString in IDStrings: + DB = {} + #print "trying IDString:",IDString + try: + hdr = filehdr + data = filedata + if data.find('[') != -1 : + # older style kindle-info file + cud = CryptUnprotectData(IDString) + items = data.split('[') + for item in items: + if item != '': + keyhash, rawdata = item.split(':') + keyname = 'unknown' + for name in names: + if encodeHash(name,charMap2) == keyhash: + keyname = name + break + if keyname == 'unknown': + keyname = keyhash + encryptedValue = decode(rawdata,charMap2) + cleartext = cud.decrypt(encryptedValue) + if len(cleartext) > 0: + DB[keyname] = cleartext + if 'MazamaRandomNumber' in DB and 'kindle.account.tokens' in DB: + break + elif hdr == '/': + # else newer style .kinf file used by K4Mac >= 1.6.0 + # the .kinf file uses '/' to separate it into records + # so remove the trailing '/' to make it easy to use split + data = data[:-1] + items = data.split('/') + cud = CryptUnprotectDataV2(IDString) + + # loop through the item records until all are processed + while len(items) > 0: + + # get the first item record + item = items.pop(0) + + # the first 32 chars of the first record of a group + # is the MD5 hash of the key name encoded by charMap5 + keyhash = item[0:32] + keyname = 'unknown' + + # the raw keyhash string is also used to create entropy for the actual + # CryptProtectData Blob that represents that keys contents + # 'entropy' not used for K4Mac only K4PC + # entropy = SHA1(keyhash) + + # the remainder of the first record when decoded with charMap5 + # has the ':' split char followed by the string representation + # of the number of records that follow + # and make up the contents + srcnt = decode(item[34:],charMap5) + rcnt = int(srcnt) + + # read and store in rcnt records of data + # that make up the contents value + edlst = [] + for i in xrange(rcnt): + item = items.pop(0) + edlst.append(item) + + keyname = 'unknown' + for name in names: + if encodeHash(name,charMap5) == keyhash: + keyname = name + break + if keyname == 'unknown': + keyname = keyhash + + # the charMap5 encoded contents data has had a length + # of chars (always odd) cut off of the front and moved + # to the end to prevent decoding using charMap5 from + # working properly, and thereby preventing the ensuing + # CryptUnprotectData call from succeeding. + + # The offset into the charMap5 encoded contents seems to be: + # len(contents) - largest prime number less than or equal to int(len(content)/3) + # (in other words split 'about' 2/3rds of the way through) + + # move first offsets chars to end to align for decode by charMap5 + encdata = ''.join(edlst) + contlen = len(encdata) + + # now properly split and recombine + # by moving noffset chars from the start of the + # string to the end of the string + noffset = contlen - primes(int(contlen/3))[-1] + pfx = encdata[0:noffset] + encdata = encdata[noffset:] + encdata = encdata + pfx + + # decode using charMap5 to get the CryptProtect Data + encryptedValue = decode(encdata,charMap5) + cleartext = cud.decrypt(encryptedValue) + if len(cleartext) > 0: + DB[keyname] = cleartext + + if 'MazamaRandomNumber' in DB and 'kindle.account.tokens' in DB: + break + else: + # the latest .kinf2011 version for K4M 1.9.1 + # put back the hdr char, it is needed + data = hdr + data + data = data[:-1] + items = data.split('/') + + # the headerblob is the encrypted information needed to build the entropy string + headerblob = items.pop(0) + encryptedValue = decode(headerblob, charMap1) + cleartext = UnprotectHeaderData(encryptedValue) + + # now extract the pieces in the same way + # this version is different from K4PC it scales the build number by multipying by 735 + pattern = re.compile(r'''\[Version:(\d+)\]\[Build:(\d+)\]\[Cksum:([^\]]+)\]\[Guid:([\{\}a-z0-9\-]+)\]''', re.IGNORECASE) + for m in re.finditer(pattern, cleartext): + entropy = str(int(m.group(2)) * 0x2df) + m.group(4) + + cud = CryptUnprotectDataV3(entropy,IDString) + + # loop through the item records until all are processed + while len(items) > 0: + + # get the first item record + item = items.pop(0) + + # the first 32 chars of the first record of a group + # is the MD5 hash of the key name encoded by charMap5 + keyhash = item[0:32] + keyname = 'unknown' + + # unlike K4PC the keyhash is not used in generating entropy + # entropy = SHA1(keyhash) + added_entropy + # entropy = added_entropy + + # the remainder of the first record when decoded with charMap5 + # has the ':' split char followed by the string representation + # of the number of records that follow + # and make up the contents + srcnt = decode(item[34:],charMap5) + rcnt = int(srcnt) + + # read and store in rcnt records of data + # that make up the contents value + edlst = [] + for i in xrange(rcnt): + item = items.pop(0) + edlst.append(item) + + keyname = 'unknown' + for name in names: + if encodeHash(name,testMap8) == keyhash: + keyname = name + break + if keyname == 'unknown': + keyname = keyhash + + # the testMap8 encoded contents data has had a length + # of chars (always odd) cut off of the front and moved + # to the end to prevent decoding using testMap8 from + # working properly, and thereby preventing the ensuing + # CryptUnprotectData call from succeeding. + + # The offset into the testMap8 encoded contents seems to be: + # len(contents) - largest prime number less than or equal to int(len(content)/3) + # (in other words split 'about' 2/3rds of the way through) + + # move first offsets chars to end to align for decode by testMap8 + encdata = ''.join(edlst) + contlen = len(encdata) + + # now properly split and recombine + # by moving noffset chars from the start of the + # string to the end of the string + noffset = contlen - primes(int(contlen/3))[-1] + pfx = encdata[0:noffset] + encdata = encdata[noffset:] + encdata = encdata + pfx + + # decode using testMap8 to get the CryptProtect Data + encryptedValue = decode(encdata,testMap8) + cleartext = cud.decrypt(encryptedValue) + # print keyname + # print cleartext + if len(cleartext) > 0: + DB[keyname] = cleartext + + if 'MazamaRandomNumber' in DB and 'kindle.account.tokens' in DB: + break + except: + pass + if 'kindle.account.tokens' in DB: + # store values used in decryption + print u"Decrypted key file using IDString '{0:s}' and UserName '{1:s}'".format(IDString, GetUserName()) + DB['IDString'] = IDString + DB['UserName'] = GetUserName() + else: + print u"Couldn't decrypt file." + DB = {} + return DB +else: + def getDBfromFile(kInfoFile): + raise DrmException(u"This script only runs under Windows or Mac OS X.") + return {} + +def kindlekeys(files = []): + keys = [] + if files == []: + files = getKindleInfoFiles() + for file in files: + key = getDBfromFile(file) + if key: + # convert all values to hex, just in case. + for keyname in key: + key[keyname]=key[keyname].encode('hex') + keys.append(key) + return keys + +# interface for Python DeDRM +# returns single key or multiple keys, depending on path or file passed in +def getkey(outpath, files=[]): + keys = kindlekeys(files) + if len(keys) > 0: + if not os.path.isdir(outpath): + outfile = outpath + with file(outfile, 'w') as keyfileout: + keyfileout.write(json.dumps(keys[0])) + print u"Saved a key to {0}".format(outfile) + else: + keycount = 0 + for key in keys: + while True: + keycount += 1 + outfile = os.path.join(outpath,u"kindlekey{0:d}.k4i".format(keycount)) + if not os.path.exists(outfile): + break + with file(outfile, 'w') as keyfileout: + keyfileout.write(json.dumps(key)) + print u"Saved a key to {0}".format(outfile) + return True + return False + +def usage(progname): + print u"Finds, decrypts and saves the default Kindle For Mac/PC encryption keys." + print u"Keys are saved to the current directory, or a specified output directory." + print u"If a file name is passed instead of a directory, only the first key is saved, in that file." + print u"Usage:" + print u" {0:s} [-h] [-k ] []".format(progname) - return pid def cli_main(): - print u"Mobipocket PID calculator for Amazon Kindle. Copyright © 2007, 2009 Igor Skochinsky" + sys.stdout=SafeUnbuffered(sys.stdout) + sys.stderr=SafeUnbuffered(sys.stderr) argv=unicode_argv() - if len(argv)==2: - serial = argv[1] + progname = os.path.basename(argv[0]) + print u"{0} v{1}\nCopyright © 2010-2013 some_updates and Apprentice Alf".format(progname,__version__) + + try: + opts, args = getopt.getopt(argv[1:], "hk:") + except getopt.GetoptError, err: + print u"Error in options or arguments: {0}".format(err.args[0]) + usage(progname) + sys.exit(2) + + files = [] + for o, a in opts: + if o == "-h": + usage(progname) + sys.exit(0) + if o == "-k": + files = [a] + + if len(args) > 1: + usage(progname) + sys.exit(2) + + if len(args) == 1: + # save to the specified file or directory + outpath = args[0] + if not os.path.isabs(outpath): + outpath = os.path.abspath(outpath) else: - print u"Usage: kindlepid.py /" + # save to the same directory as the script + outpath = os.path.dirname(argv[0]) + + # make sure the outpath is the + outpath = os.path.realpath(os.path.normpath(outpath)) + + if not getkey(outpath, files): + print u"Could not retrieve Kindle for Mac/PC key." + return 0 + + +def gui_main(): + try: + import Tkinter + import Tkconstants + import tkMessageBox + import traceback + except: + return cli_main() + + class ExceptionDialog(Tkinter.Frame): + def __init__(self, root, text): + Tkinter.Frame.__init__(self, root, border=5) + label = Tkinter.Label(self, text=u"Unexpected error:", + anchor=Tkconstants.W, justify=Tkconstants.LEFT) + label.pack(fill=Tkconstants.X, expand=0) + self.text = Tkinter.Text(self) + self.text.pack(fill=Tkconstants.BOTH, expand=1) + + self.text.insert(Tkconstants.END, text) + + + argv=unicode_argv() + root = Tkinter.Tk() + root.withdraw() + progpath, progname = os.path.split(argv[0]) + success = False + try: + keys = kindlekeys() + keycount = 0 + for key in keys: + while True: + keycount += 1 + outfile = os.path.join(progpath,u"kindlekey{0:d}.k4i".format(keycount)) + if not os.path.exists(outfile): + break + + with file(outfile, 'w') as keyfileout: + keyfileout.write(json.dumps(key)) + success = True + tkMessageBox.showinfo(progname, u"Key successfully retrieved to {0}".format(outfile)) + except DrmException, e: + tkMessageBox.showerror(progname, u"Error: {0}".format(str(e))) + except Exception: + root.wm_state('normal') + root.title(progname) + text = traceback.format_exc() + ExceptionDialog(root, text).pack(fill=Tkconstants.BOTH, expand=1) + root.mainloop() + if not success: return 1 - if len(serial)==16: - if serial.startswith("B") or serial.startswith("9"): - print u"Kindle serial number detected" - else: - print u"Warning: unrecognized serial number. Please recheck input." - return 1 - pid = pidFromSerial(serial.encode("utf-8"),7)+'*' - print u"Mobipocket PID for Kindle serial#{0} is {1}".format(serial,checksumPid(pid)) - return 0 - elif len(serial)==40: - print u"iPhone serial number (UDID) detected" - pid = pidFromSerial(serial.encode("utf-8"),8) - print u"Mobipocket PID for iPhone serial#{0} is {1}".format(serial,checksumPid(pid)) - return 0 - print u"Warning: unrecognized serial number. Please recheck input." - return 1 - - -if __name__ == "__main__": - sys.stdout=SafeUnbuffered(sys.stdout) - sys.stderr=SafeUnbuffered(sys.stderr) - sys.exit(cli_main()) + return 0 + +if __name__ == '__main__': + if len(sys.argv) > 1: + sys.exit(cli_main()) + sys.exit(gui_main()) diff --git a/DeDRM_calibre_plugin/DeDRM_plugin/kindlepid.py b/DeDRM_calibre_plugin/DeDRM_plugin/kindlepid.py index 01c348c..8bbcf69 100644 Binary files a/DeDRM_calibre_plugin/DeDRM_plugin/kindlepid.py and b/DeDRM_calibre_plugin/DeDRM_plugin/kindlepid.py differ diff --git a/DeDRM_calibre_plugin/DeDRM_plugin/libalfcrypto.dylib b/DeDRM_calibre_plugin/DeDRM_plugin/libalfcrypto.dylib index 9a5a442..01c348c 100644 Binary files a/DeDRM_calibre_plugin/DeDRM_plugin/libalfcrypto.dylib and b/DeDRM_calibre_plugin/DeDRM_plugin/libalfcrypto.dylib differ diff --git a/DeDRM_calibre_plugin/DeDRM_plugin/libalfcrypto32.so b/DeDRM_calibre_plugin/DeDRM_plugin/libalfcrypto32.so index a08ac28..9a5a442 100644 Binary files a/DeDRM_calibre_plugin/DeDRM_plugin/libalfcrypto32.so and b/DeDRM_calibre_plugin/DeDRM_plugin/libalfcrypto32.so differ diff --git a/DeDRM_calibre_plugin/DeDRM_plugin/libalfcrypto64.so b/DeDRM_calibre_plugin/DeDRM_plugin/libalfcrypto64.so index 7b69edc..a08ac28 100644 Binary files a/DeDRM_calibre_plugin/DeDRM_plugin/libalfcrypto64.so and b/DeDRM_calibre_plugin/DeDRM_plugin/libalfcrypto64.so differ diff --git a/DeDRM_calibre_plugin/DeDRM_plugin/mobidedrm.py b/DeDRM_calibre_plugin/DeDRM_plugin/mobidedrm.py index 9a84e58..89cc695 100644 --- a/DeDRM_calibre_plugin/DeDRM_plugin/mobidedrm.py +++ b/DeDRM_calibre_plugin/DeDRM_plugin/mobidedrm.py @@ -1,89 +1,541 @@ #!/usr/bin/env python -# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab +# -*- coding: utf-8 -*- -# implement just enough of des from openssl to make erdr2pml.py happy +# mobidedrm.py, version 0.38 +# Copyright © 2008 The Dark Reverser +# +# Modified 2008–2012 by some_updates, DiapDealer and Apprentice Alf -def load_libcrypto(): - from ctypes import CDLL, POINTER, c_void_p, c_char_p, c_char, c_int, c_long, \ - Structure, c_ulong, create_string_buffer, cast - from ctypes.util import find_library - import sys +# This is a python script. You need a Python interpreter to run it. +# For example, ActiveState Python, which exists for windows. +# +# Changelog +# 0.01 - Initial version +# 0.02 - Huffdic compressed books were not properly decrypted +# 0.03 - Wasn't checking MOBI header length +# 0.04 - Wasn't sanity checking size of data record +# 0.05 - It seems that the extra data flags take two bytes not four +# 0.06 - And that low bit does mean something after all :-) +# 0.07 - The extra data flags aren't present in MOBI header < 0xE8 in size +# 0.08 - ...and also not in Mobi header version < 6 +# 0.09 - ...but they are there with Mobi header version 6, header size 0xE4! +# 0.10 - Outputs unencrypted files as-is, so that when run as a Calibre +# import filter it works when importing unencrypted files. +# Also now handles encrypted files that don't need a specific PID. +# 0.11 - use autoflushed stdout and proper return values +# 0.12 - Fix for problems with metadata import as Calibre plugin, report errors +# 0.13 - Formatting fixes: retabbed file, removed trailing whitespace +# and extra blank lines, converted CR/LF pairs at ends of each line, +# and other cosmetic fixes. +# 0.14 - Working out when the extra data flags are present has been problematic +# Versions 7 through 9 have tried to tweak the conditions, but have been +# only partially successful. Closer examination of lots of sample +# files reveals that a confusion has arisen because trailing data entries +# are not encrypted, but it turns out that the multibyte entries +# in utf8 file are encrypted. (Although neither kind gets compressed.) +# This knowledge leads to a simplification of the test for the +# trailing data byte flags - version 5 and higher AND header size >= 0xE4. +# 0.15 - Now outputs 'heartbeat', and is also quicker for long files. +# 0.16 - And reverts to 'done' not 'done.' at the end for unswindle compatibility. +# 0.17 - added modifications to support its use as an imported python module +# both inside calibre and also in other places (ie K4DeDRM tools) +# 0.17a- disabled the standalone plugin feature since a plugin can not import +# a plugin +# 0.18 - It seems that multibyte entries aren't encrypted in a v7 file... +# Removed the disabled Calibre plug-in code +# Permit use of 8-digit PIDs +# 0.19 - It seems that multibyte entries aren't encrypted in a v6 file either. +# 0.20 - Correction: It seems that multibyte entries are encrypted in a v6 file. +# 0.21 - Added support for multiple pids +# 0.22 - revised structure to hold MobiBook as a class to allow an extended interface +# 0.23 - fixed problem with older files with no EXTH section +# 0.24 - add support for type 1 encryption and 'TEXtREAd' books as well +# 0.25 - Fixed support for 'BOOKMOBI' type 1 encryption +# 0.26 - Now enables Text-To-Speech flag and sets clipping limit to 100% +# 0.27 - Correct pid metadata token generation to match that used by skindle (Thank You Bart!) +# 0.28 - slight additional changes to metadata token generation (None -> '') +# 0.29 - It seems that the ideas about when multibyte trailing characters were +# included in the encryption were wrong. They are for DOC compressed +# files, but they are not for HUFF/CDIC compress files! +# 0.30 - Modified interface slightly to work better with new calibre plugin style +# 0.31 - The multibyte encrytion info is true for version 7 files too. +# 0.32 - Added support for "Print Replica" Kindle ebooks +# 0.33 - Performance improvements for large files (concatenation) +# 0.34 - Performance improvements in decryption (libalfcrypto) +# 0.35 - add interface to get mobi_version +# 0.36 - fixed problem with TEXtREAd and getBookTitle interface +# 0.37 - Fixed double announcement for stand-alone operation +# 0.38 - Unicode used wherever possible, cope with absent alfcrypto +# 0.39 - Fixed problem with TEXtREAd and getBookType interface +# 0.40 - moved unicode_argv call inside main for Windows DeDRM compatibility +# 0.41 - Fixed potential unicode problem in command line calls - if sys.platform.startswith('win'): - libcrypto = find_library('libeay32') + +__version__ = u"0.41" + +import sys +import os +import struct +import binascii +try: + from alfcrypto import Pukall_Cipher +except: + print u"AlfCrypto not found. Using python PC1 implementation." + +# Wrap a stream so that output gets flushed immediately +# and also make sure that any unicode strings get +# encoded using "replace" before writing them. +class SafeUnbuffered: + def __init__(self, stream): + self.stream = stream + self.encoding = stream.encoding + if self.encoding == None: + self.encoding = "utf-8" + def write(self, data): + if isinstance(data,unicode): + data = data.encode(self.encoding,"replace") + self.stream.write(data) + self.stream.flush() + def __getattr__(self, attr): + return getattr(self.stream, attr) + +iswindows = sys.platform.startswith('win') +isosx = sys.platform.startswith('darwin') + +def unicode_argv(): + if iswindows: + # Uses shell32.GetCommandLineArgvW to get sys.argv as a list of Unicode + # strings. + + # Versions 2.x of Python don't support Unicode in sys.argv on + # Windows, with the underlying Windows API instead replacing multi-byte + # characters with '?'. + + + from ctypes import POINTER, byref, cdll, c_int, windll + from ctypes.wintypes import LPCWSTR, LPWSTR + + GetCommandLineW = cdll.kernel32.GetCommandLineW + GetCommandLineW.argtypes = [] + GetCommandLineW.restype = LPCWSTR + + CommandLineToArgvW = windll.shell32.CommandLineToArgvW + CommandLineToArgvW.argtypes = [LPCWSTR, POINTER(c_int)] + CommandLineToArgvW.restype = POINTER(LPWSTR) + + cmd = GetCommandLineW() + argc = c_int(0) + argv = CommandLineToArgvW(cmd, byref(argc)) + if argc.value > 0: + # Remove Python executable and commands if present + start = argc.value - len(sys.argv) + return [argv[i] for i in + xrange(start, argc.value)] + # if we don't have any arguments at all, just pass back script name + # this should never happen + return [u"mobidedrm.py"] + else: + argvencoding = sys.stdin.encoding + if argvencoding == None: + argvencoding = 'utf-8' + return [arg if (type(arg) == unicode) else unicode(arg,argvencoding) for arg in sys.argv] + + +class DrmException(Exception): + pass + + +# +# MobiBook Utility Routines +# + +# Implementation of Pukall Cipher 1 +def PC1(key, src, decryption=True): + # if we can get it from alfcrypto, use that + try: + return Pukall_Cipher().PC1(key,src,decryption) + except NameError: + pass + except TypeError: + pass + + # use slow python version, since Pukall_Cipher didn't load + sum1 = 0; + sum2 = 0; + keyXorVal = 0; + if len(key)!=16: + DrmException (u"PC1: Bad key length") + wkey = [] + for i in xrange(8): + wkey.append(ord(key[i*2])<<8 | ord(key[i*2+1])) + dst = "" + for i in xrange(len(src)): + temp1 = 0; + byteXorVal = 0; + for j in xrange(8): + temp1 ^= wkey[j] + sum2 = (sum2+j)*20021 + sum1 + sum1 = (temp1*346)&0xFFFF + sum2 = (sum2+sum1)&0xFFFF + temp1 = (temp1*20021+1)&0xFFFF + byteXorVal ^= temp1 ^ sum2 + curByte = ord(src[i]) + if not decryption: + keyXorVal = curByte * 257; + curByte = ((curByte ^ (byteXorVal >> 8)) ^ byteXorVal) & 0xFF + if decryption: + keyXorVal = curByte * 257; + for j in xrange(8): + wkey[j] ^= keyXorVal; + dst+=chr(curByte) + return dst + +def checksumPid(s): + letters = 'ABCDEFGHIJKLMNPQRSTUVWXYZ123456789' + crc = (~binascii.crc32(s,-1))&0xFFFFFFFF + crc = crc ^ (crc >> 16) + res = s + l = len(letters) + for i in (0,1): + b = crc & 0xff + pos = (b // l) ^ (b % l) + res += letters[pos%l] + crc >>= 8 + return res + +def getSizeOfTrailingDataEntries(ptr, size, flags): + def getSizeOfTrailingDataEntry(ptr, size): + bitpos, result = 0, 0 + if size <= 0: + return result + while True: + v = ord(ptr[size-1]) + result |= (v & 0x7F) << bitpos + bitpos += 7 + size -= 1 + if (v & 0x80) != 0 or (bitpos >= 28) or (size == 0): + return result + num = 0 + testflags = flags >> 1 + while testflags: + if testflags & 1: + num += getSizeOfTrailingDataEntry(ptr, size - num) + testflags >>= 1 + # Check the low bit to see if there's multibyte data present. + # if multibyte data is included in the encryped data, we'll + # have already cleared this flag. + if flags & 1: + num += (ord(ptr[size - num - 1]) & 0x3) + 1 + return num + + + +class MobiBook: + def loadSection(self, section): + if (section + 1 == self.num_sections): + endoff = len(self.data_file) + else: + endoff = self.sections[section + 1][0] + off = self.sections[section][0] + return self.data_file[off:endoff] + + def cleanup(self): + # to match function in Topaz book + pass + + def __init__(self, infile): + print u"MobiDeDrm v{0:s}.\nCopyright © 2008-2012 The Dark Reverser et al.".format(__version__) + + try: + from alfcrypto import Pukall_Cipher + except: + print u"AlfCrypto not found. Using python PC1 implementation." + + # initial sanity check on file + self.data_file = file(infile, 'rb').read() + self.mobi_data = '' + self.header = self.data_file[0:78] + if self.header[0x3C:0x3C+8] != 'BOOKMOBI' and self.header[0x3C:0x3C+8] != 'TEXtREAd': + raise DrmException(u"Invalid file format") + self.magic = self.header[0x3C:0x3C+8] + self.crypto_type = -1 + + # build up section offset and flag info + self.num_sections, = struct.unpack('>H', self.header[76:78]) + self.sections = [] + for i in xrange(self.num_sections): + offset, a1,a2,a3,a4 = struct.unpack('>LBBBB', self.data_file[78+i*8:78+i*8+8]) + flags, val = a1, a2<<16|a3<<8|a4 + self.sections.append( (offset, flags, val) ) + + # parse information from section 0 + self.sect = self.loadSection(0) + self.records, = struct.unpack('>H', self.sect[0x8:0x8+2]) + self.compression, = struct.unpack('>H', self.sect[0x0:0x0+2]) + + # det default values before PalmDoc test + self.print_replica = False + self.extra_data_flags = 0 + self.meta_array = {} + self.mobi_length = 0 + self.mobi_codepage = 1252 + self.mobi_version = -1 + + if self.magic == 'TEXtREAd': + print u"PalmDoc format book detected." + return + + self.mobi_length, = struct.unpack('>L',self.sect[0x14:0x18]) + self.mobi_codepage, = struct.unpack('>L',self.sect[0x1c:0x20]) + self.mobi_version, = struct.unpack('>L',self.sect[0x68:0x6C]) + print u"MOBI header version {0:d}, header length {1:d}".format(self.mobi_version, self.mobi_length) + if (self.mobi_length >= 0xE4) and (self.mobi_version >= 5): + self.extra_data_flags, = struct.unpack('>H', self.sect[0xF2:0xF4]) + print u"Extra Data Flags: {0:d}".format(self.extra_data_flags) + if (self.compression != 17480): + # multibyte utf8 data is included in the encryption for PalmDoc compression + # so clear that byte so that we leave it to be decrypted. + self.extra_data_flags &= 0xFFFE + + # if exth region exists parse it for metadata array + try: + exth_flag, = struct.unpack('>L', self.sect[0x80:0x84]) + exth = '' + if exth_flag & 0x40: + exth = self.sect[16 + self.mobi_length:] + if (len(exth) >= 12) and (exth[:4] == 'EXTH'): + nitems, = struct.unpack('>I', exth[8:12]) + pos = 12 + for i in xrange(nitems): + type, size = struct.unpack('>II', exth[pos: pos + 8]) + content = exth[pos + 8: pos + size] + self.meta_array[type] = content + # reset the text to speech flag and clipping limit, if present + if type == 401 and size == 9: + # set clipping limit to 100% + self.patchSection(0, '\144', 16 + self.mobi_length + pos + 8) + elif type == 404 and size == 9: + # make sure text to speech is enabled + self.patchSection(0, '\0', 16 + self.mobi_length + pos + 8) + # print type, size, content, content.encode('hex') + pos += size + except: + pass + + def getBookTitle(self): + codec_map = { + 1252 : 'windows-1252', + 65001 : 'utf-8', + } + title = '' + codec = 'windows-1252' + if self.magic == 'BOOKMOBI': + if 503 in self.meta_array: + title = self.meta_array[503] + else: + toff, tlen = struct.unpack('>II', self.sect[0x54:0x5c]) + tend = toff + tlen + title = self.sect[toff:tend] + if self.mobi_codepage in codec_map.keys(): + codec = codec_map[self.mobi_codepage] + if title == '': + title = self.header[:32] + title = title.split('\0')[0] + return unicode(title, codec) + + def getPIDMetaInfo(self): + rec209 = '' + token = '' + if 209 in self.meta_array: + rec209 = self.meta_array[209] + data = rec209 + # The 209 data comes in five byte groups. Interpret the last four bytes + # of each group as a big endian unsigned integer to get a key value + # if that key exists in the meta_array, append its contents to the token + for i in xrange(0,len(data),5): + val, = struct.unpack('>I',data[i+1:i+5]) + sval = self.meta_array.get(val,'') + token += sval + return rec209, token + + def patch(self, off, new): + self.data_file = self.data_file[:off] + new + self.data_file[off+len(new):] + + def patchSection(self, section, new, in_off = 0): + if (section + 1 == self.num_sections): + endoff = len(self.data_file) + else: + endoff = self.sections[section + 1][0] + off = self.sections[section][0] + assert off + in_off + len(new) <= endoff + self.patch(off + in_off, new) + + def parseDRM(self, data, count, pidlist): + found_key = None + keyvec1 = '\x72\x38\x33\xB0\xB4\xF2\xE3\xCA\xDF\x09\x01\xD6\xE2\xE0\x3F\x96' + for pid in pidlist: + bigpid = pid.ljust(16,'\0') + temp_key = PC1(keyvec1, bigpid, False) + temp_key_sum = sum(map(ord,temp_key)) & 0xff + found_key = None + for i in xrange(count): + verification, size, type, cksum, cookie = struct.unpack('>LLLBxxx32s', data[i*0x30:i*0x30+0x30]) + if cksum == temp_key_sum: + cookie = PC1(temp_key, cookie) + ver,flags,finalkey,expiry,expiry2 = struct.unpack('>LL16sLL', cookie) + if verification == ver and (flags & 0x1F) == 1: + found_key = finalkey + break + if found_key != None: + break + if not found_key: + # Then try the default encoding that doesn't require a PID + pid = '00000000' + temp_key = keyvec1 + temp_key_sum = sum(map(ord,temp_key)) & 0xff + for i in xrange(count): + verification, size, type, cksum, cookie = struct.unpack('>LLLBxxx32s', data[i*0x30:i*0x30+0x30]) + if cksum == temp_key_sum: + cookie = PC1(temp_key, cookie) + ver,flags,finalkey,expiry,expiry2 = struct.unpack('>LL16sLL', cookie) + if verification == ver: + found_key = finalkey + break + return [found_key,pid] + + def getFile(self, outpath): + file(outpath,'wb').write(self.mobi_data) + + def getBookType(self): + if self.print_replica: + return u"Print Replica" + if self.mobi_version >= 8: + return u"Kindle Format 8" + if self.mobi_version >= 0: + return u"Mobipocket {0:d}".format(self.mobi_version) + return u"PalmDoc" + + def getBookExtension(self): + if self.print_replica: + return u".azw4" + if self.mobi_version >= 8: + return u".azw3" + return u".mobi" + + def processBook(self, pidlist): + crypto_type, = struct.unpack('>H', self.sect[0xC:0xC+2]) + print u"Crypto Type is: {0:d}".format(crypto_type) + self.crypto_type = crypto_type + if crypto_type == 0: + print u"This book is not encrypted." + # we must still check for Print Replica + self.print_replica = (self.loadSection(1)[0:4] == '%MOP') + self.mobi_data = self.data_file + return + if crypto_type != 2 and crypto_type != 1: + raise DrmException(u"Cannot decode unknown Mobipocket encryption type {0:d}".format(crypto_type)) + if 406 in self.meta_array: + data406 = self.meta_array[406] + val406, = struct.unpack('>Q',data406) + if val406 != 0: + raise DrmException(u"Cannot decode library or rented ebooks.") + + goodpids = [] + for pid in pidlist: + if len(pid)==10: + if checksumPid(pid[0:-2]) != pid: + print u"Warning: PID {0} has incorrect checksum, should have been {1}".format(pid,checksumPid(pid[0:-2])) + goodpids.append(pid[0:-2]) + elif len(pid)==8: + goodpids.append(pid) + + if self.crypto_type == 1: + t1_keyvec = 'QDCVEPMU675RUBSZ' + if self.magic == 'TEXtREAd': + bookkey_data = self.sect[0x0E:0x0E+16] + elif self.mobi_version < 0: + bookkey_data = self.sect[0x90:0x90+16] + else: + bookkey_data = self.sect[self.mobi_length+16:self.mobi_length+32] + pid = '00000000' + found_key = PC1(t1_keyvec, bookkey_data) + else : + # calculate the keys + drm_ptr, drm_count, drm_size, drm_flags = struct.unpack('>LLLL', self.sect[0xA8:0xA8+16]) + if drm_count == 0: + raise DrmException(u"Encryption not initialised. Must be opened with Mobipocket Reader first.") + found_key, pid = self.parseDRM(self.sect[drm_ptr:drm_ptr+drm_size], drm_count, goodpids) + if not found_key: + raise DrmException(u"No key found in {0:d} keys tried.".format(len(goodpids))) + # kill the drm keys + self.patchSection(0, '\0' * drm_size, drm_ptr) + # kill the drm pointers + self.patchSection(0, '\xff' * 4 + '\0' * 12, 0xA8) + + if pid=='00000000': + print u"File has default encryption, no specific key needed." + else: + print u"File is encoded with PID {0}.".format(checksumPid(pid)) + + # clear the crypto type + self.patchSection(0, "\0" * 2, 0xC) + + # decrypt sections + print u"Decrypting. Please wait . . .", + mobidataList = [] + mobidataList.append(self.data_file[:self.sections[1][0]]) + for i in xrange(1, self.records+1): + data = self.loadSection(i) + extra_size = getSizeOfTrailingDataEntries(data, len(data), self.extra_data_flags) + if i%100 == 0: + print u".", + # print "record %d, extra_size %d" %(i,extra_size) + decoded_data = PC1(found_key, data[0:len(data) - extra_size]) + if i==1: + self.print_replica = (decoded_data[0:4] == '%MOP') + mobidataList.append(decoded_data) + if extra_size > 0: + mobidataList.append(data[-extra_size:]) + if self.num_sections > self.records+1: + mobidataList.append(self.data_file[self.sections[self.records+1][0]:]) + self.mobi_data = "".join(mobidataList) + print u"done" + return + +def getUnencryptedBook(infile,pidlist): + if not os.path.isfile(infile): + raise DrmException(u"Input File Not Found.") + book = MobiBook(infile) + book.processBook(pidlist) + return book.mobi_data + + +def cli_main(): + argv=unicode_argv() + progname = os.path.basename(argv[0]) + if len(argv)<3 or len(argv)>4: + print u"MobiDeDrm v{0}.\nCopyright © 2008-2012 The Dark Reverser et al.".format(__version__) + print u"Removes protection from Kindle/Mobipocket, Kindle/KF8 and Kindle/Print Replica ebooks" + print u"Usage:" + print u" {0} []".format(progname) + return 1 else: - libcrypto = find_library('crypto') - - if libcrypto is None: - return None - - libcrypto = CDLL(libcrypto) - - # typedef struct DES_ks - # { - # union - # { - # DES_cblock cblock; - # /* make sure things are correct size on machines with - # * 8 byte longs */ - # DES_LONG deslong[2]; - # } ks[16]; - # } DES_key_schedule; - - # just create a big enough place to hold everything - # it will have alignment of structure so we should be okay (16 byte aligned?) - class DES_KEY_SCHEDULE(Structure): - _fields_ = [('DES_cblock1', c_char * 16), - ('DES_cblock2', c_char * 16), - ('DES_cblock3', c_char * 16), - ('DES_cblock4', c_char * 16), - ('DES_cblock5', c_char * 16), - ('DES_cblock6', c_char * 16), - ('DES_cblock7', c_char * 16), - ('DES_cblock8', c_char * 16), - ('DES_cblock9', c_char * 16), - ('DES_cblock10', c_char * 16), - ('DES_cblock11', c_char * 16), - ('DES_cblock12', c_char * 16), - ('DES_cblock13', c_char * 16), - ('DES_cblock14', c_char * 16), - ('DES_cblock15', c_char * 16), - ('DES_cblock16', c_char * 16)] - - DES_KEY_SCHEDULE_p = POINTER(DES_KEY_SCHEDULE) - - def F(restype, name, argtypes): - func = getattr(libcrypto, name) - func.restype = restype - func.argtypes = argtypes - return func - - DES_set_key = F(None, 'DES_set_key',[c_char_p, DES_KEY_SCHEDULE_p]) - DES_ecb_encrypt = F(None, 'DES_ecb_encrypt',[c_char_p, c_char_p, DES_KEY_SCHEDULE_p, c_int]) - - - class DES(object): - def __init__(self, key): - if len(key) != 8 : - raise Exception('DES improper key used') - return - self.key = key - self.keyschedule = DES_KEY_SCHEDULE() - DES_set_key(self.key, self.keyschedule) - def desdecrypt(self, data): - ob = create_string_buffer(len(data)) - DES_ecb_encrypt(data, ob, self.keyschedule, 0) - return ob.raw - def decrypt(self, data): - if not data: - return '' - i = 0 - result = [] - while i < len(data): - block = data[i:i+8] - processed_block = self.desdecrypt(block) - result.append(processed_block) - i += 8 - return ''.join(result) - - return DES + infile = argv[1] + outfile = argv[2] + if len(argv) is 4: + pidlist = argv[3].split(',') + else: + pidlist = [] + try: + stripped_file = getUnencryptedBook(infile, pidlist) + file(outfile, 'wb').write(stripped_file) + except DrmException, e: + print u"MobiDeDRM v{0} Error: {0:s}".format(__version__,e.args[0]) + return 1 + return 0 + + +if __name__ == '__main__': + sys.stdout=SafeUnbuffered(sys.stdout) + sys.stderr=SafeUnbuffered(sys.stderr) + sys.exit(cli_main()) diff --git a/DeDRM_calibre_plugin/DeDRM_plugin/openssl_des.py b/DeDRM_calibre_plugin/DeDRM_plugin/openssl_des.py index e69de29..9a84e58 100644 --- a/DeDRM_calibre_plugin/DeDRM_plugin/openssl_des.py +++ b/DeDRM_calibre_plugin/DeDRM_plugin/openssl_des.py @@ -0,0 +1,89 @@ +#!/usr/bin/env python +# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab + +# implement just enough of des from openssl to make erdr2pml.py happy + +def load_libcrypto(): + from ctypes import CDLL, POINTER, c_void_p, c_char_p, c_char, c_int, c_long, \ + Structure, c_ulong, create_string_buffer, cast + from ctypes.util import find_library + import sys + + if sys.platform.startswith('win'): + libcrypto = find_library('libeay32') + else: + libcrypto = find_library('crypto') + + if libcrypto is None: + return None + + libcrypto = CDLL(libcrypto) + + # typedef struct DES_ks + # { + # union + # { + # DES_cblock cblock; + # /* make sure things are correct size on machines with + # * 8 byte longs */ + # DES_LONG deslong[2]; + # } ks[16]; + # } DES_key_schedule; + + # just create a big enough place to hold everything + # it will have alignment of structure so we should be okay (16 byte aligned?) + class DES_KEY_SCHEDULE(Structure): + _fields_ = [('DES_cblock1', c_char * 16), + ('DES_cblock2', c_char * 16), + ('DES_cblock3', c_char * 16), + ('DES_cblock4', c_char * 16), + ('DES_cblock5', c_char * 16), + ('DES_cblock6', c_char * 16), + ('DES_cblock7', c_char * 16), + ('DES_cblock8', c_char * 16), + ('DES_cblock9', c_char * 16), + ('DES_cblock10', c_char * 16), + ('DES_cblock11', c_char * 16), + ('DES_cblock12', c_char * 16), + ('DES_cblock13', c_char * 16), + ('DES_cblock14', c_char * 16), + ('DES_cblock15', c_char * 16), + ('DES_cblock16', c_char * 16)] + + DES_KEY_SCHEDULE_p = POINTER(DES_KEY_SCHEDULE) + + def F(restype, name, argtypes): + func = getattr(libcrypto, name) + func.restype = restype + func.argtypes = argtypes + return func + + DES_set_key = F(None, 'DES_set_key',[c_char_p, DES_KEY_SCHEDULE_p]) + DES_ecb_encrypt = F(None, 'DES_ecb_encrypt',[c_char_p, c_char_p, DES_KEY_SCHEDULE_p, c_int]) + + + class DES(object): + def __init__(self, key): + if len(key) != 8 : + raise Exception('DES improper key used') + return + self.key = key + self.keyschedule = DES_KEY_SCHEDULE() + DES_set_key(self.key, self.keyschedule) + def desdecrypt(self, data): + ob = create_string_buffer(len(data)) + DES_ecb_encrypt(data, ob, self.keyschedule, 0) + return ob.raw + def decrypt(self, data): + if not data: + return '' + i = 0 + result = [] + while i < len(data): + block = data[i:i+8] + processed_block = self.desdecrypt(block) + result.append(processed_block) + i += 8 + return ''.join(result) + + return DES diff --git a/DeDRM_calibre_plugin/DeDRM_plugin/plugin-import-name-dedrm.txt b/DeDRM_calibre_plugin/DeDRM_plugin/plugin-import-name-dedrm.txt index 05065ac..e69de29 100644 --- a/DeDRM_calibre_plugin/DeDRM_plugin/plugin-import-name-dedrm.txt +++ b/DeDRM_calibre_plugin/DeDRM_plugin/plugin-import-name-dedrm.txt @@ -1,292 +0,0 @@ -#!/usr/bin/env python -# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai - -from __future__ import with_statement -__license__ = 'GPL v3' - -# Standard Python modules. -import os, sys, re, hashlib -import json -import traceback - -from calibre.utils.config import dynamic, config_dir, JSONConfig -from calibre_plugins.dedrm.__init__ import PLUGIN_NAME, PLUGIN_VERSION -from calibre.constants import iswindows, isosx - -class DeDRM_Prefs(): - def __init__(self): - JSON_PATH = os.path.join(u"plugins", PLUGIN_NAME.strip().lower().replace(' ', '_') + '.json') - self.dedrmprefs = JSONConfig(JSON_PATH) - - self.dedrmprefs.defaults['configured'] = False - self.dedrmprefs.defaults['bandnkeys'] = {} - self.dedrmprefs.defaults['adeptkeys'] = {} - self.dedrmprefs.defaults['ereaderkeys'] = {} - self.dedrmprefs.defaults['kindlekeys'] = {} - self.dedrmprefs.defaults['pids'] = [] - self.dedrmprefs.defaults['serials'] = [] - self.dedrmprefs.defaults['adobewineprefix'] = "" - self.dedrmprefs.defaults['kindlewineprefix'] = "" - - # initialise - # we must actually set the prefs that are dictionaries and lists - # to empty dictionaries and lists, otherwise we are unable to add to them - # as then it just adds to the (memory only) dedrmprefs.defaults versions! - if self.dedrmprefs['bandnkeys'] == {}: - self.dedrmprefs['bandnkeys'] = {} - if self.dedrmprefs['adeptkeys'] == {}: - self.dedrmprefs['adeptkeys'] = {} - if self.dedrmprefs['ereaderkeys'] == {}: - self.dedrmprefs['ereaderkeys'] = {} - if self.dedrmprefs['kindlekeys'] == {}: - self.dedrmprefs['kindlekeys'] = {} - if self.dedrmprefs['pids'] == []: - self.dedrmprefs['pids'] = [] - if self.dedrmprefs['serials'] == []: - self.dedrmprefs['serials'] = [] - - def __getitem__(self,kind = None): - if kind is not None: - return self.dedrmprefs[kind] - return self.dedrmprefs - - def set(self, kind, value): - self.dedrmprefs[kind] = value - - def writeprefs(self,value = True): - self.dedrmprefs['configured'] = value - - def addnamedvaluetoprefs(self, prefkind, keyname, keyvalue): - try: - if keyvalue not in self.dedrmprefs[prefkind].values(): - # ensure that the keyname is unique - # by adding a number (starting with 2) to the name if it is not - namecount = 1 - newname = keyname - while newname in self.dedrmprefs[prefkind]: - namecount += 1 - newname = "{0:s}_{1:d}".format(keyname,namecount) - # add to the preferences - self.dedrmprefs[prefkind][newname] = keyvalue - return (True, newname) - except: - traceback.print_exc() - pass - return (False, keyname) - - def addvaluetoprefs(self, prefkind, prefsvalue): - # ensure the keyvalue isn't already in the preferences - try: - if prefsvalue not in self.dedrmprefs[prefkind]: - self.dedrmprefs[prefkind].append(prefsvalue) - return True - except: - traceback.print_exc() - return False - - -def convertprefs(always = False): - - def parseIgnobleString(keystuff): - from calibre_plugins.dedrm.ignoblekeygen import generate_key - userkeys = [] - ar = keystuff.split(':') - for keystring in ar: - try: - name, ccn = keystring.split(',') - # Generate Barnes & Noble EPUB user key from name and credit card number. - keyname = u"{0}_{1}".format(name.strip(),ccn.strip()[-4:]) - keyvalue = generate_key(name, ccn) - userkeys.append([keyname,keyvalue]) - except Exception, e: - traceback.print_exc() - print e.args[0] - pass - return userkeys - - def parseeReaderString(keystuff): - from calibre_plugins.dedrm.erdr2pml import getuser_key - userkeys = [] - ar = keystuff.split(':') - for keystring in ar: - try: - name, cc = keystring.split(',') - # Generate eReader user key from name and credit card number. - keyname = u"{0}_{1}".format(name.strip(),cc.strip()[-4:]) - keyvalue = getuser_key(name,cc).encode('hex') - userkeys.append([keyname,keyvalue]) - except Exception, e: - traceback.print_exc() - print e.args[0] - pass - return userkeys - - def parseKindleString(keystuff): - pids = [] - serials = [] - ar = keystuff.split(',') - for keystring in ar: - keystring = str(keystring).strip().replace(" ","") - if len(keystring) == 10 or len(keystring) == 8 and keystring not in pids: - pids.append(keystring) - elif len(keystring) == 16 and keystring[0] == 'B' and keystring not in serials: - serials.append(keystring) - return (pids,serials) - - def getConfigFiles(extension, encoding = None): - # get any files with extension 'extension' in the config dir - userkeys = [] - files = [f for f in os.listdir(config_dir) if f.endswith(extension)] - for filename in files: - try: - fpath = os.path.join(config_dir, filename) - key = os.path.splitext(filename)[0] - value = open(fpath, 'rb').read() - if encoding is not None: - value = value.encode(encoding) - userkeys.append([key,value]) - except: - traceback.print_exc() - pass - return userkeys - - dedrmprefs = DeDRM_Prefs() - - if (not always) and dedrmprefs['configured']: - # We've already converted old preferences, - # and we're not being forced to do it again, so just return - return - - - print u"{0} v{1}: Importing configuration data from old DeDRM plugins".format(PLUGIN_NAME, PLUGIN_VERSION) - - IGNOBLEPLUGINNAME = "Ignoble Epub DeDRM" - EREADERPLUGINNAME = "eReader PDB 2 PML" - OLDKINDLEPLUGINNAME = "K4PC, K4Mac, Kindle Mobi and Topaz DeDRM" - - # get prefs from older tools - kindleprefs = JSONConfig(os.path.join(u"plugins", u"K4MobiDeDRM")) - ignobleprefs = JSONConfig(os.path.join(u"plugins", u"ignoble_epub_dedrm")) - - # Handle the old ignoble plugin's customization string by converting the - # old string to stored keys... get that personal data out of plain sight. - from calibre.customize.ui import config - sc = config['plugin_customization'] - val = sc.pop(IGNOBLEPLUGINNAME, None) - if val is not None: - print u"{0} v{1}: Converting old Ignoble plugin configuration string.".format(PLUGIN_NAME, PLUGIN_VERSION) - priorkeycount = len(dedrmprefs['bandnkeys']) - userkeys = parseIgnobleString(str(val)) - for keypair in userkeys: - name = keypair[0] - value = keypair[1] - dedrmprefs.addnamedvaluetoprefs('bandnkeys', name, value) - addedkeycount = len(dedrmprefs['bandnkeys'])-priorkeycount - print u"{0} v{1}: {2:d} Barnes and Noble {3} imported from old Ignoble plugin configuration string".format(PLUGIN_NAME, PLUGIN_VERSION, addedkeycount, u"key" if addedkeycount==1 else u"keys") - # Make the json write all the prefs to disk - dedrmprefs.writeprefs(False) - - # Handle the old eReader plugin's customization string by converting the - # old string to stored keys... get that personal data out of plain sight. - val = sc.pop(EREADERPLUGINNAME, None) - if val is not None: - print u"{0} v{1}: Converting old eReader plugin configuration string.".format(PLUGIN_NAME, PLUGIN_VERSION) - priorkeycount = len(dedrmprefs['ereaderkeys']) - userkeys = parseeReaderString(str(val)) - for keypair in userkeys: - name = keypair[0] - value = keypair[1] - dedrmprefs.addnamedvaluetoprefs('ereaderkeys', name, value) - addedkeycount = len(dedrmprefs['ereaderkeys'])-priorkeycount - print u"{0} v{1}: {2:d} eReader {3} imported from old eReader plugin configuration string".format(PLUGIN_NAME, PLUGIN_VERSION, addedkeycount, u"key" if addedkeycount==1 else u"keys") - # Make the json write all the prefs to disk - dedrmprefs.writeprefs(False) - - # get old Kindle plugin configuration string - val = sc.pop(OLDKINDLEPLUGINNAME, None) - if val is not None: - print u"{0} v{1}: Converting old Kindle plugin configuration string.".format(PLUGIN_NAME, PLUGIN_VERSION) - priorpidcount = len(dedrmprefs['pids']) - priorserialcount = len(dedrmprefs['serials']) - pids, serials = parseKindleString(val) - for pid in pids: - dedrmprefs.addvaluetoprefs('pids',pid) - for serial in serials: - dedrmprefs.addvaluetoprefs('serials',serial) - addedpidcount = len(dedrmprefs['pids']) - priorpidcount - addedserialcount = len(dedrmprefs['serials']) - priorserialcount - print u"{0} v{1}: {2:d} {3} and {4:d} {5} imported from old Kindle plugin configuration string.".format(PLUGIN_NAME, PLUGIN_VERSION, addedpidcount, u"PID" if addedpidcount==1 else u"PIDs", addedserialcount, u"serial number" if addedserialcount==1 else u"serial numbers") - # Make the json write all the prefs to disk - dedrmprefs.writeprefs(False) - - # copy the customisations back into calibre preferences, as we've now removed the nasty plaintext - config['plugin_customization'] = sc - - # get any .b64 files in the config dir - priorkeycount = len(dedrmprefs['bandnkeys']) - bandnfilekeys = getConfigFiles('.b64') - for keypair in bandnfilekeys: - name = keypair[0] - value = keypair[1] - dedrmprefs.addnamedvaluetoprefs('bandnkeys', name, value) - addedkeycount = len(dedrmprefs['bandnkeys'])-priorkeycount - if addedkeycount > 0: - print u"{0} v{1}: {2:d} Barnes and Noble {3} imported from config folder.".format(PLUGIN_NAME, PLUGIN_VERSION, addedkeycount, u"key file" if addedkeycount==1 else u"key files") - # Make the json write all the prefs to disk - dedrmprefs.writeprefs(False) - - # get any .der files in the config dir - priorkeycount = len(dedrmprefs['adeptkeys']) - adeptfilekeys = getConfigFiles('.der','hex') - for keypair in adeptfilekeys: - name = keypair[0] - value = keypair[1] - dedrmprefs.addnamedvaluetoprefs('adeptkeys', name, value) - addedkeycount = len(dedrmprefs['adeptkeys'])-priorkeycount - if addedkeycount > 0: - print u"{0} v{1}: {2:d} Adobe Adept {3} imported from config folder.".format(PLUGIN_NAME, PLUGIN_VERSION, addedkeycount, u"keyfile" if addedkeycount==1 else u"keyfiles") - # Make the json write all the prefs to disk - dedrmprefs.writeprefs(False) - - # get ignoble json prefs - if 'keys' in ignobleprefs: - priorkeycount = len(dedrmprefs['bandnkeys']) - for name in ignobleprefs['keys']: - value = ignobleprefs['keys'][name] - dedrmprefs.addnamedvaluetoprefs('bandnkeys', name, value) - addedkeycount = len(dedrmprefs['bandnkeys']) - priorkeycount - # no need to delete old prefs, since they contain no recoverable private data - if addedkeycount > 0: - print u"{0} v{1}: {2:d} Barnes and Noble {3} imported from Ignoble plugin preferences.".format(PLUGIN_NAME, PLUGIN_VERSION, addedkeycount, u"key" if addedkeycount==1 else u"keys") - # Make the json write all the prefs to disk - dedrmprefs.writeprefs(False) - - # get kindle json prefs - priorpidcount = len(dedrmprefs['pids']) - priorserialcount = len(dedrmprefs['serials']) - if 'pids' in kindleprefs: - pids, serials = parseKindleString(kindleprefs['pids']) - for pid in pids: - dedrmprefs.addvaluetoprefs('pids',pid) - if 'serials' in kindleprefs: - pids, serials = parseKindleString(kindleprefs['serials']) - for serial in serials: - dedrmprefs.addvaluetoprefs('serials',serial) - addedpidcount = len(dedrmprefs['pids']) - priorpidcount - if addedpidcount > 0: - print u"{0} v{1}: {2:d} {3} imported from Kindle plugin preferences".format(PLUGIN_NAME, PLUGIN_VERSION, addedpidcount, u"PID" if addedpidcount==1 else u"PIDs") - addedserialcount = len(dedrmprefs['serials']) - priorserialcount - if addedserialcount > 0: - print u"{0} v{1}: {2:d} {3} imported from Kindle plugin preferences".format(PLUGIN_NAME, PLUGIN_VERSION, addedserialcount, u"serial number" if addedserialcount==1 else u"serial numbers") - try: - if 'wineprefix' in kindleprefs and kindleprefs['wineprefix'] != "": - dedrmprefs.set('adobewineprefix',kindleprefs['wineprefix']) - dedrmprefs.set('kindlewineprefix',kindleprefs['wineprefix']) - print u"{0} v{1}: WINEPREFIX ‘(2)’ imported from Kindle plugin preferences".format(PLUGIN_NAME, PLUGIN_VERSION, kindleprefs['wineprefix']) - except: - traceback.print_exc() - - - # Make the json write all the prefs to disk - dedrmprefs.writeprefs() - print u"{0} v{1}: Finished setting up configuration data.".format(PLUGIN_NAME, PLUGIN_VERSION) diff --git a/DeDRM_calibre_plugin/DeDRM_plugin/prefs.py b/DeDRM_calibre_plugin/DeDRM_plugin/prefs.py new file mode 100644 index 0000000..05065ac --- /dev/null +++ b/DeDRM_calibre_plugin/DeDRM_plugin/prefs.py @@ -0,0 +1,292 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai + +from __future__ import with_statement +__license__ = 'GPL v3' + +# Standard Python modules. +import os, sys, re, hashlib +import json +import traceback + +from calibre.utils.config import dynamic, config_dir, JSONConfig +from calibre_plugins.dedrm.__init__ import PLUGIN_NAME, PLUGIN_VERSION +from calibre.constants import iswindows, isosx + +class DeDRM_Prefs(): + def __init__(self): + JSON_PATH = os.path.join(u"plugins", PLUGIN_NAME.strip().lower().replace(' ', '_') + '.json') + self.dedrmprefs = JSONConfig(JSON_PATH) + + self.dedrmprefs.defaults['configured'] = False + self.dedrmprefs.defaults['bandnkeys'] = {} + self.dedrmprefs.defaults['adeptkeys'] = {} + self.dedrmprefs.defaults['ereaderkeys'] = {} + self.dedrmprefs.defaults['kindlekeys'] = {} + self.dedrmprefs.defaults['pids'] = [] + self.dedrmprefs.defaults['serials'] = [] + self.dedrmprefs.defaults['adobewineprefix'] = "" + self.dedrmprefs.defaults['kindlewineprefix'] = "" + + # initialise + # we must actually set the prefs that are dictionaries and lists + # to empty dictionaries and lists, otherwise we are unable to add to them + # as then it just adds to the (memory only) dedrmprefs.defaults versions! + if self.dedrmprefs['bandnkeys'] == {}: + self.dedrmprefs['bandnkeys'] = {} + if self.dedrmprefs['adeptkeys'] == {}: + self.dedrmprefs['adeptkeys'] = {} + if self.dedrmprefs['ereaderkeys'] == {}: + self.dedrmprefs['ereaderkeys'] = {} + if self.dedrmprefs['kindlekeys'] == {}: + self.dedrmprefs['kindlekeys'] = {} + if self.dedrmprefs['pids'] == []: + self.dedrmprefs['pids'] = [] + if self.dedrmprefs['serials'] == []: + self.dedrmprefs['serials'] = [] + + def __getitem__(self,kind = None): + if kind is not None: + return self.dedrmprefs[kind] + return self.dedrmprefs + + def set(self, kind, value): + self.dedrmprefs[kind] = value + + def writeprefs(self,value = True): + self.dedrmprefs['configured'] = value + + def addnamedvaluetoprefs(self, prefkind, keyname, keyvalue): + try: + if keyvalue not in self.dedrmprefs[prefkind].values(): + # ensure that the keyname is unique + # by adding a number (starting with 2) to the name if it is not + namecount = 1 + newname = keyname + while newname in self.dedrmprefs[prefkind]: + namecount += 1 + newname = "{0:s}_{1:d}".format(keyname,namecount) + # add to the preferences + self.dedrmprefs[prefkind][newname] = keyvalue + return (True, newname) + except: + traceback.print_exc() + pass + return (False, keyname) + + def addvaluetoprefs(self, prefkind, prefsvalue): + # ensure the keyvalue isn't already in the preferences + try: + if prefsvalue not in self.dedrmprefs[prefkind]: + self.dedrmprefs[prefkind].append(prefsvalue) + return True + except: + traceback.print_exc() + return False + + +def convertprefs(always = False): + + def parseIgnobleString(keystuff): + from calibre_plugins.dedrm.ignoblekeygen import generate_key + userkeys = [] + ar = keystuff.split(':') + for keystring in ar: + try: + name, ccn = keystring.split(',') + # Generate Barnes & Noble EPUB user key from name and credit card number. + keyname = u"{0}_{1}".format(name.strip(),ccn.strip()[-4:]) + keyvalue = generate_key(name, ccn) + userkeys.append([keyname,keyvalue]) + except Exception, e: + traceback.print_exc() + print e.args[0] + pass + return userkeys + + def parseeReaderString(keystuff): + from calibre_plugins.dedrm.erdr2pml import getuser_key + userkeys = [] + ar = keystuff.split(':') + for keystring in ar: + try: + name, cc = keystring.split(',') + # Generate eReader user key from name and credit card number. + keyname = u"{0}_{1}".format(name.strip(),cc.strip()[-4:]) + keyvalue = getuser_key(name,cc).encode('hex') + userkeys.append([keyname,keyvalue]) + except Exception, e: + traceback.print_exc() + print e.args[0] + pass + return userkeys + + def parseKindleString(keystuff): + pids = [] + serials = [] + ar = keystuff.split(',') + for keystring in ar: + keystring = str(keystring).strip().replace(" ","") + if len(keystring) == 10 or len(keystring) == 8 and keystring not in pids: + pids.append(keystring) + elif len(keystring) == 16 and keystring[0] == 'B' and keystring not in serials: + serials.append(keystring) + return (pids,serials) + + def getConfigFiles(extension, encoding = None): + # get any files with extension 'extension' in the config dir + userkeys = [] + files = [f for f in os.listdir(config_dir) if f.endswith(extension)] + for filename in files: + try: + fpath = os.path.join(config_dir, filename) + key = os.path.splitext(filename)[0] + value = open(fpath, 'rb').read() + if encoding is not None: + value = value.encode(encoding) + userkeys.append([key,value]) + except: + traceback.print_exc() + pass + return userkeys + + dedrmprefs = DeDRM_Prefs() + + if (not always) and dedrmprefs['configured']: + # We've already converted old preferences, + # and we're not being forced to do it again, so just return + return + + + print u"{0} v{1}: Importing configuration data from old DeDRM plugins".format(PLUGIN_NAME, PLUGIN_VERSION) + + IGNOBLEPLUGINNAME = "Ignoble Epub DeDRM" + EREADERPLUGINNAME = "eReader PDB 2 PML" + OLDKINDLEPLUGINNAME = "K4PC, K4Mac, Kindle Mobi and Topaz DeDRM" + + # get prefs from older tools + kindleprefs = JSONConfig(os.path.join(u"plugins", u"K4MobiDeDRM")) + ignobleprefs = JSONConfig(os.path.join(u"plugins", u"ignoble_epub_dedrm")) + + # Handle the old ignoble plugin's customization string by converting the + # old string to stored keys... get that personal data out of plain sight. + from calibre.customize.ui import config + sc = config['plugin_customization'] + val = sc.pop(IGNOBLEPLUGINNAME, None) + if val is not None: + print u"{0} v{1}: Converting old Ignoble plugin configuration string.".format(PLUGIN_NAME, PLUGIN_VERSION) + priorkeycount = len(dedrmprefs['bandnkeys']) + userkeys = parseIgnobleString(str(val)) + for keypair in userkeys: + name = keypair[0] + value = keypair[1] + dedrmprefs.addnamedvaluetoprefs('bandnkeys', name, value) + addedkeycount = len(dedrmprefs['bandnkeys'])-priorkeycount + print u"{0} v{1}: {2:d} Barnes and Noble {3} imported from old Ignoble plugin configuration string".format(PLUGIN_NAME, PLUGIN_VERSION, addedkeycount, u"key" if addedkeycount==1 else u"keys") + # Make the json write all the prefs to disk + dedrmprefs.writeprefs(False) + + # Handle the old eReader plugin's customization string by converting the + # old string to stored keys... get that personal data out of plain sight. + val = sc.pop(EREADERPLUGINNAME, None) + if val is not None: + print u"{0} v{1}: Converting old eReader plugin configuration string.".format(PLUGIN_NAME, PLUGIN_VERSION) + priorkeycount = len(dedrmprefs['ereaderkeys']) + userkeys = parseeReaderString(str(val)) + for keypair in userkeys: + name = keypair[0] + value = keypair[1] + dedrmprefs.addnamedvaluetoprefs('ereaderkeys', name, value) + addedkeycount = len(dedrmprefs['ereaderkeys'])-priorkeycount + print u"{0} v{1}: {2:d} eReader {3} imported from old eReader plugin configuration string".format(PLUGIN_NAME, PLUGIN_VERSION, addedkeycount, u"key" if addedkeycount==1 else u"keys") + # Make the json write all the prefs to disk + dedrmprefs.writeprefs(False) + + # get old Kindle plugin configuration string + val = sc.pop(OLDKINDLEPLUGINNAME, None) + if val is not None: + print u"{0} v{1}: Converting old Kindle plugin configuration string.".format(PLUGIN_NAME, PLUGIN_VERSION) + priorpidcount = len(dedrmprefs['pids']) + priorserialcount = len(dedrmprefs['serials']) + pids, serials = parseKindleString(val) + for pid in pids: + dedrmprefs.addvaluetoprefs('pids',pid) + for serial in serials: + dedrmprefs.addvaluetoprefs('serials',serial) + addedpidcount = len(dedrmprefs['pids']) - priorpidcount + addedserialcount = len(dedrmprefs['serials']) - priorserialcount + print u"{0} v{1}: {2:d} {3} and {4:d} {5} imported from old Kindle plugin configuration string.".format(PLUGIN_NAME, PLUGIN_VERSION, addedpidcount, u"PID" if addedpidcount==1 else u"PIDs", addedserialcount, u"serial number" if addedserialcount==1 else u"serial numbers") + # Make the json write all the prefs to disk + dedrmprefs.writeprefs(False) + + # copy the customisations back into calibre preferences, as we've now removed the nasty plaintext + config['plugin_customization'] = sc + + # get any .b64 files in the config dir + priorkeycount = len(dedrmprefs['bandnkeys']) + bandnfilekeys = getConfigFiles('.b64') + for keypair in bandnfilekeys: + name = keypair[0] + value = keypair[1] + dedrmprefs.addnamedvaluetoprefs('bandnkeys', name, value) + addedkeycount = len(dedrmprefs['bandnkeys'])-priorkeycount + if addedkeycount > 0: + print u"{0} v{1}: {2:d} Barnes and Noble {3} imported from config folder.".format(PLUGIN_NAME, PLUGIN_VERSION, addedkeycount, u"key file" if addedkeycount==1 else u"key files") + # Make the json write all the prefs to disk + dedrmprefs.writeprefs(False) + + # get any .der files in the config dir + priorkeycount = len(dedrmprefs['adeptkeys']) + adeptfilekeys = getConfigFiles('.der','hex') + for keypair in adeptfilekeys: + name = keypair[0] + value = keypair[1] + dedrmprefs.addnamedvaluetoprefs('adeptkeys', name, value) + addedkeycount = len(dedrmprefs['adeptkeys'])-priorkeycount + if addedkeycount > 0: + print u"{0} v{1}: {2:d} Adobe Adept {3} imported from config folder.".format(PLUGIN_NAME, PLUGIN_VERSION, addedkeycount, u"keyfile" if addedkeycount==1 else u"keyfiles") + # Make the json write all the prefs to disk + dedrmprefs.writeprefs(False) + + # get ignoble json prefs + if 'keys' in ignobleprefs: + priorkeycount = len(dedrmprefs['bandnkeys']) + for name in ignobleprefs['keys']: + value = ignobleprefs['keys'][name] + dedrmprefs.addnamedvaluetoprefs('bandnkeys', name, value) + addedkeycount = len(dedrmprefs['bandnkeys']) - priorkeycount + # no need to delete old prefs, since they contain no recoverable private data + if addedkeycount > 0: + print u"{0} v{1}: {2:d} Barnes and Noble {3} imported from Ignoble plugin preferences.".format(PLUGIN_NAME, PLUGIN_VERSION, addedkeycount, u"key" if addedkeycount==1 else u"keys") + # Make the json write all the prefs to disk + dedrmprefs.writeprefs(False) + + # get kindle json prefs + priorpidcount = len(dedrmprefs['pids']) + priorserialcount = len(dedrmprefs['serials']) + if 'pids' in kindleprefs: + pids, serials = parseKindleString(kindleprefs['pids']) + for pid in pids: + dedrmprefs.addvaluetoprefs('pids',pid) + if 'serials' in kindleprefs: + pids, serials = parseKindleString(kindleprefs['serials']) + for serial in serials: + dedrmprefs.addvaluetoprefs('serials',serial) + addedpidcount = len(dedrmprefs['pids']) - priorpidcount + if addedpidcount > 0: + print u"{0} v{1}: {2:d} {3} imported from Kindle plugin preferences".format(PLUGIN_NAME, PLUGIN_VERSION, addedpidcount, u"PID" if addedpidcount==1 else u"PIDs") + addedserialcount = len(dedrmprefs['serials']) - priorserialcount + if addedserialcount > 0: + print u"{0} v{1}: {2:d} {3} imported from Kindle plugin preferences".format(PLUGIN_NAME, PLUGIN_VERSION, addedserialcount, u"serial number" if addedserialcount==1 else u"serial numbers") + try: + if 'wineprefix' in kindleprefs and kindleprefs['wineprefix'] != "": + dedrmprefs.set('adobewineprefix',kindleprefs['wineprefix']) + dedrmprefs.set('kindlewineprefix',kindleprefs['wineprefix']) + print u"{0} v{1}: WINEPREFIX ‘(2)’ imported from Kindle plugin preferences".format(PLUGIN_NAME, PLUGIN_VERSION, kindleprefs['wineprefix']) + except: + traceback.print_exc() + + + # Make the json write all the prefs to disk + dedrmprefs.writeprefs() + print u"{0} v{1}: Finished setting up configuration data.".format(PLUGIN_NAME, PLUGIN_VERSION) diff --git a/DeDRM_calibre_plugin/DeDRM_plugin/stylexml2css.py b/DeDRM_calibre_plugin/DeDRM_plugin/stylexml2css.py index c111850..daa108a 100644 --- a/DeDRM_calibre_plugin/DeDRM_plugin/stylexml2css.py +++ b/DeDRM_calibre_plugin/DeDRM_plugin/stylexml2css.py @@ -178,7 +178,12 @@ class DocParser(object): if val == "": val = 0 - if not ((attr == 'hang') and (int(val) == 0)) : + if not ((attr == 'hang') and (int(val) == 0)): + try: + f = float(val) + except: + print "Warning: unrecognised val, ignoring" + val = 0 pv = float(val)/scale cssargs[attr] = (self.attr_val_map[attr], pv) keep = True diff --git a/DeDRM_calibre_plugin/DeDRM_plugin/subasyncio.py b/DeDRM_calibre_plugin/DeDRM_plugin/subasyncio.py new file mode 100644 index 0000000..de084d3 --- /dev/null +++ b/DeDRM_calibre_plugin/DeDRM_plugin/subasyncio.py @@ -0,0 +1,148 @@ +#!/usr/bin/env python +# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab + +import os, sys +import signal +import threading +import subprocess +from subprocess import Popen, PIPE, STDOUT + +# **heavily** chopped up and modfied version of asyncproc.py +# to make it actually work on Windows as well as Mac/Linux +# For the original see: +# "http://www.lysator.liu.se/~bellman/download/" +# author is "Thomas Bellman " +# available under GPL version 3 or Later + +# create an asynchronous subprocess whose output can be collected in +# a non-blocking manner + +# What a mess! Have to use threads just to get non-blocking io +# in a cross-platform manner + +# luckily all thread use is hidden within this class + +class Process(object): + def __init__(self, *params, **kwparams): + if len(params) <= 3: + kwparams.setdefault('stdin', subprocess.PIPE) + if len(params) <= 4: + kwparams.setdefault('stdout', subprocess.PIPE) + if len(params) <= 5: + kwparams.setdefault('stderr', subprocess.PIPE) + self.__pending_input = [] + self.__collected_outdata = [] + self.__collected_errdata = [] + self.__exitstatus = None + self.__lock = threading.Lock() + self.__inputsem = threading.Semaphore(0) + self.__quit = False + + self.__process = subprocess.Popen(*params, **kwparams) + + if self.__process.stdin: + self.__stdin_thread = threading.Thread( + name="stdin-thread", + target=self.__feeder, args=(self.__pending_input, + self.__process.stdin)) + self.__stdin_thread.setDaemon(True) + self.__stdin_thread.start() + + if self.__process.stdout: + self.__stdout_thread = threading.Thread( + name="stdout-thread", + target=self.__reader, args=(self.__collected_outdata, + self.__process.stdout)) + self.__stdout_thread.setDaemon(True) + self.__stdout_thread.start() + + if self.__process.stderr: + self.__stderr_thread = threading.Thread( + name="stderr-thread", + target=self.__reader, args=(self.__collected_errdata, + self.__process.stderr)) + self.__stderr_thread.setDaemon(True) + self.__stderr_thread.start() + + def pid(self): + return self.__process.pid + + def kill(self, signal): + self.__process.send_signal(signal) + + # check on subprocess (pass in 'nowait') to act like poll + def wait(self, flag): + if flag.lower() == 'nowait': + rc = self.__process.poll() + else: + rc = self.__process.wait() + if rc != None: + if self.__process.stdin: + self.closeinput() + if self.__process.stdout: + self.__stdout_thread.join() + if self.__process.stderr: + self.__stderr_thread.join() + return self.__process.returncode + + def terminate(self): + if self.__process.stdin: + self.closeinput() + self.__process.terminate() + + # thread gets data from subprocess stdout + def __reader(self, collector, source): + while True: + data = os.read(source.fileno(), 65536) + self.__lock.acquire() + collector.append(data) + self.__lock.release() + if data == "": + source.close() + break + return + + # thread feeds data to subprocess stdin + def __feeder(self, pending, drain): + while True: + self.__inputsem.acquire() + self.__lock.acquire() + if not pending and self.__quit: + drain.close() + self.__lock.release() + break + data = pending.pop(0) + self.__lock.release() + drain.write(data) + + # non-blocking read of data from subprocess stdout + def read(self): + self.__lock.acquire() + outdata = "".join(self.__collected_outdata) + del self.__collected_outdata[:] + self.__lock.release() + return outdata + + # non-blocking read of data from subprocess stderr + def readerr(self): + self.__lock.acquire() + errdata = "".join(self.__collected_errdata) + del self.__collected_errdata[:] + self.__lock.release() + return errdata + + # non-blocking write to stdin of subprocess + def write(self, data): + if self.__process.stdin is None: + raise ValueError("Writing to process with stdin not a pipe") + self.__lock.acquire() + self.__pending_input.append(data) + self.__inputsem.release() + self.__lock.release() + + # close stdinput of subprocess + def closeinput(self): + self.__lock.acquire() + self.__quit = True + self.__inputsem.release() + self.__lock.release() diff --git a/DeDRM_calibre_plugin/DeDRM_plugin/topazextract.py b/DeDRM_calibre_plugin/DeDRM_plugin/topazextract.py index de084d3..fb5eb7a 100644 --- a/DeDRM_calibre_plugin/DeDRM_plugin/topazextract.py +++ b/DeDRM_calibre_plugin/DeDRM_plugin/topazextract.py @@ -1,148 +1,538 @@ #!/usr/bin/env python -# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab - -import os, sys -import signal -import threading -import subprocess -from subprocess import Popen, PIPE, STDOUT - -# **heavily** chopped up and modfied version of asyncproc.py -# to make it actually work on Windows as well as Mac/Linux -# For the original see: -# "http://www.lysator.liu.se/~bellman/download/" -# author is "Thomas Bellman " -# available under GPL version 3 or Later - -# create an asynchronous subprocess whose output can be collected in -# a non-blocking manner - -# What a mess! Have to use threads just to get non-blocking io -# in a cross-platform manner - -# luckily all thread use is hidden within this class - -class Process(object): - def __init__(self, *params, **kwparams): - if len(params) <= 3: - kwparams.setdefault('stdin', subprocess.PIPE) - if len(params) <= 4: - kwparams.setdefault('stdout', subprocess.PIPE) - if len(params) <= 5: - kwparams.setdefault('stderr', subprocess.PIPE) - self.__pending_input = [] - self.__collected_outdata = [] - self.__collected_errdata = [] - self.__exitstatus = None - self.__lock = threading.Lock() - self.__inputsem = threading.Semaphore(0) - self.__quit = False - - self.__process = subprocess.Popen(*params, **kwparams) - - if self.__process.stdin: - self.__stdin_thread = threading.Thread( - name="stdin-thread", - target=self.__feeder, args=(self.__pending_input, - self.__process.stdin)) - self.__stdin_thread.setDaemon(True) - self.__stdin_thread.start() - - if self.__process.stdout: - self.__stdout_thread = threading.Thread( - name="stdout-thread", - target=self.__reader, args=(self.__collected_outdata, - self.__process.stdout)) - self.__stdout_thread.setDaemon(True) - self.__stdout_thread.start() - - if self.__process.stderr: - self.__stderr_thread = threading.Thread( - name="stderr-thread", - target=self.__reader, args=(self.__collected_errdata, - self.__process.stderr)) - self.__stderr_thread.setDaemon(True) - self.__stderr_thread.start() - - def pid(self): - return self.__process.pid - - def kill(self, signal): - self.__process.send_signal(signal) - - # check on subprocess (pass in 'nowait') to act like poll - def wait(self, flag): - if flag.lower() == 'nowait': - rc = self.__process.poll() +# -*- coding: utf-8 -*- + +# topazextract.py +# Mostly written by some_updates based on code from many others + +# Changelog +# 4.9 - moved unicode_argv call inside main for Windows DeDRM compatibility +# 5.0 - Fixed potential unicode problem with command line interface + +__version__ = '5.0' + +import sys +import os, csv, getopt +import zlib, zipfile, tempfile, shutil +import traceback +from struct import pack +from struct import unpack +from alfcrypto import Topaz_Cipher + +class SafeUnbuffered: + def __init__(self, stream): + self.stream = stream + self.encoding = stream.encoding + if self.encoding == None: + self.encoding = "utf-8" + def write(self, data): + if isinstance(data,unicode): + data = data.encode(self.encoding,"replace") + self.stream.write(data) + self.stream.flush() + def __getattr__(self, attr): + return getattr(self.stream, attr) + +iswindows = sys.platform.startswith('win') +isosx = sys.platform.startswith('darwin') + +def unicode_argv(): + if iswindows: + # Uses shell32.GetCommandLineArgvW to get sys.argv as a list of Unicode + # strings. + + # Versions 2.x of Python don't support Unicode in sys.argv on + # Windows, with the underlying Windows API instead replacing multi-byte + # characters with '?'. + + + from ctypes import POINTER, byref, cdll, c_int, windll + from ctypes.wintypes import LPCWSTR, LPWSTR + + GetCommandLineW = cdll.kernel32.GetCommandLineW + GetCommandLineW.argtypes = [] + GetCommandLineW.restype = LPCWSTR + + CommandLineToArgvW = windll.shell32.CommandLineToArgvW + CommandLineToArgvW.argtypes = [LPCWSTR, POINTER(c_int)] + CommandLineToArgvW.restype = POINTER(LPWSTR) + + cmd = GetCommandLineW() + argc = c_int(0) + argv = CommandLineToArgvW(cmd, byref(argc)) + if argc.value > 0: + # Remove Python executable and commands if present + start = argc.value - len(sys.argv) + return [argv[i] for i in + xrange(start, argc.value)] + # if we don't have any arguments at all, just pass back script name + # this should never happen + return [u"mobidedrm.py"] + else: + argvencoding = sys.stdin.encoding + if argvencoding == None: + argvencoding = 'utf-8' + return [arg if (type(arg) == unicode) else unicode(arg,argvencoding) for arg in sys.argv] + +#global switch +debug = False + +if 'calibre' in sys.modules: + inCalibre = True + from calibre_plugins.dedrm import kgenpids +else: + inCalibre = False + import kgenpids + + +class DrmException(Exception): + pass + + +# recursive zip creation support routine +def zipUpDir(myzip, tdir, localname): + currentdir = tdir + if localname != u"": + currentdir = os.path.join(currentdir,localname) + list = os.listdir(currentdir) + for file in list: + afilename = file + localfilePath = os.path.join(localname, afilename) + realfilePath = os.path.join(currentdir,file) + if os.path.isfile(realfilePath): + myzip.write(realfilePath, localfilePath) + elif os.path.isdir(realfilePath): + zipUpDir(myzip, tdir, localfilePath) + +# +# Utility routines +# + +# Get a 7 bit encoded number from file +def bookReadEncodedNumber(fo): + flag = False + data = ord(fo.read(1)) + if data == 0xFF: + flag = True + data = ord(fo.read(1)) + if data >= 0x80: + datax = (data & 0x7F) + while data >= 0x80 : + data = ord(fo.read(1)) + datax = (datax <<7) + (data & 0x7F) + data = datax + if flag: + data = -data + return data + +# Get a length prefixed string from file +def bookReadString(fo): + stringLength = bookReadEncodedNumber(fo) + return unpack(str(stringLength)+'s',fo.read(stringLength))[0] + +# +# crypto routines +# + +# Context initialisation for the Topaz Crypto +def topazCryptoInit(key): + return Topaz_Cipher().ctx_init(key) + +# ctx1 = 0x0CAFFE19E +# for keyChar in key: +# keyByte = ord(keyChar) +# ctx2 = ctx1 +# ctx1 = ((((ctx1 >>2) * (ctx1 >>7))&0xFFFFFFFF) ^ (keyByte * keyByte * 0x0F902007)& 0xFFFFFFFF ) +# return [ctx1,ctx2] + +# decrypt data with the context prepared by topazCryptoInit() +def topazCryptoDecrypt(data, ctx): + return Topaz_Cipher().decrypt(data, ctx) +# ctx1 = ctx[0] +# ctx2 = ctx[1] +# plainText = "" +# for dataChar in data: +# dataByte = ord(dataChar) +# m = (dataByte ^ ((ctx1 >> 3) &0xFF) ^ ((ctx2<<3) & 0xFF)) &0xFF +# ctx2 = ctx1 +# ctx1 = (((ctx1 >> 2) * (ctx1 >> 7)) &0xFFFFFFFF) ^((m * m * 0x0F902007) &0xFFFFFFFF) +# plainText += chr(m) +# return plainText + +# Decrypt data with the PID +def decryptRecord(data,PID): + ctx = topazCryptoInit(PID) + return topazCryptoDecrypt(data, ctx) + +# Try to decrypt a dkey record (contains the bookPID) +def decryptDkeyRecord(data,PID): + record = decryptRecord(data,PID) + fields = unpack('3sB8sB8s3s',record) + if fields[0] != 'PID' or fields[5] != 'pid' : + raise DrmException(u"Didn't find PID magic numbers in record") + elif fields[1] != 8 or fields[3] != 8 : + raise DrmException(u"Record didn't contain correct length fields") + elif fields[2] != PID : + raise DrmException(u"Record didn't contain PID") + return fields[4] + +# Decrypt all dkey records (contain the book PID) +def decryptDkeyRecords(data,PID): + nbKeyRecords = ord(data[0]) + records = [] + data = data[1:] + for i in range (0,nbKeyRecords): + length = ord(data[0]) + try: + key = decryptDkeyRecord(data[1:length+1],PID) + records.append(key) + except DrmException: + pass + data = data[1+length:] + if len(records) == 0: + raise DrmException(u"BookKey Not Found") + return records + + +class TopazBook: + def __init__(self, filename): + self.fo = file(filename, 'rb') + self.outdir = tempfile.mkdtemp() + # self.outdir = 'rawdat' + self.bookPayloadOffset = 0 + self.bookHeaderRecords = {} + self.bookMetadata = {} + self.bookKey = None + magic = unpack('4s',self.fo.read(4))[0] + if magic != 'TPZ0': + raise DrmException(u"Parse Error : Invalid Header, not a Topaz file") + self.parseTopazHeaders() + self.parseMetadata() + + def parseTopazHeaders(self): + def bookReadHeaderRecordData(): + # Read and return the data of one header record at the current book file position + # [[offset,decompressedLength,compressedLength],...] + nbValues = bookReadEncodedNumber(self.fo) + if debug: print "%d records in header " % nbValues, + values = [] + for i in range (0,nbValues): + values.append([bookReadEncodedNumber(self.fo),bookReadEncodedNumber(self.fo),bookReadEncodedNumber(self.fo)]) + return values + def parseTopazHeaderRecord(): + # Read and parse one header record at the current book file position and return the associated data + # [[offset,decompressedLength,compressedLength],...] + if ord(self.fo.read(1)) != 0x63: + raise DrmException(u"Parse Error : Invalid Header") + tag = bookReadString(self.fo) + record = bookReadHeaderRecordData() + return [tag,record] + nbRecords = bookReadEncodedNumber(self.fo) + if debug: print "Headers: %d" % nbRecords + for i in range (0,nbRecords): + result = parseTopazHeaderRecord() + if debug: print result[0], ": ", result[1] + self.bookHeaderRecords[result[0]] = result[1] + if ord(self.fo.read(1)) != 0x64 : + raise DrmException(u"Parse Error : Invalid Header") + self.bookPayloadOffset = self.fo.tell() + + def parseMetadata(self): + # Parse the metadata record from the book payload and return a list of [key,values] + self.fo.seek(self.bookPayloadOffset + self.bookHeaderRecords['metadata'][0][0]) + tag = bookReadString(self.fo) + if tag != 'metadata' : + raise DrmException(u"Parse Error : Record Names Don't Match") + flags = ord(self.fo.read(1)) + nbRecords = ord(self.fo.read(1)) + if debug: print "Metadata Records: %d" % nbRecords + for i in range (0,nbRecords) : + keyval = bookReadString(self.fo) + content = bookReadString(self.fo) + if debug: print keyval + if debug: print content + self.bookMetadata[keyval] = content + return self.bookMetadata + + def getPIDMetaInfo(self): + keysRecord = self.bookMetadata.get('keys','') + keysRecordRecord = '' + if keysRecord != '': + keylst = keysRecord.split(',') + for keyval in keylst: + keysRecordRecord += self.bookMetadata.get(keyval,'') + return keysRecord, keysRecordRecord + + def getBookTitle(self): + title = '' + if 'Title' in self.bookMetadata: + title = self.bookMetadata['Title'] + return title.decode('utf-8') + + def setBookKey(self, key): + self.bookKey = key + + def getBookPayloadRecord(self, name, index): + # Get a record in the book payload, given its name and index. + # decrypted and decompressed if necessary + encrypted = False + compressed = False + try: + recordOffset = self.bookHeaderRecords[name][index][0] + except: + raise DrmException("Parse Error : Invalid Record, record not found") + + self.fo.seek(self.bookPayloadOffset + recordOffset) + + tag = bookReadString(self.fo) + if tag != name : + raise DrmException("Parse Error : Invalid Record, record name doesn't match") + + recordIndex = bookReadEncodedNumber(self.fo) + if recordIndex < 0 : + encrypted = True + recordIndex = -recordIndex -1 + + if recordIndex != index : + raise DrmException("Parse Error : Invalid Record, index doesn't match") + + if (self.bookHeaderRecords[name][index][2] > 0): + compressed = True + record = self.fo.read(self.bookHeaderRecords[name][index][2]) else: - rc = self.__process.wait() - if rc != None: - if self.__process.stdin: - self.closeinput() - if self.__process.stdout: - self.__stdout_thread.join() - if self.__process.stderr: - self.__stderr_thread.join() - return self.__process.returncode - - def terminate(self): - if self.__process.stdin: - self.closeinput() - self.__process.terminate() - - # thread gets data from subprocess stdout - def __reader(self, collector, source): - while True: - data = os.read(source.fileno(), 65536) - self.__lock.acquire() - collector.append(data) - self.__lock.release() - if data == "": - source.close() - break - return - - # thread feeds data to subprocess stdin - def __feeder(self, pending, drain): - while True: - self.__inputsem.acquire() - self.__lock.acquire() - if not pending and self.__quit: - drain.close() - self.__lock.release() + record = self.fo.read(self.bookHeaderRecords[name][index][1]) + + if encrypted: + if self.bookKey: + ctx = topazCryptoInit(self.bookKey) + record = topazCryptoDecrypt(record,ctx) + else : + raise DrmException("Error: Attempt to decrypt without bookKey") + + if compressed: + record = zlib.decompress(record) + + return record + + def processBook(self, pidlst): + raw = 0 + fixedimage=True + try: + keydata = self.getBookPayloadRecord('dkey', 0) + except DrmException, e: + print u"no dkey record found, book may not be encrypted" + print u"attempting to extrct files without a book key" + self.createBookDirectory() + self.extractFiles() + print u"Successfully Extracted Topaz contents" + if inCalibre: + from calibre_plugins.dedrm import genbook + else: + import genbook + + rv = genbook.generateBook(self.outdir, raw, fixedimage) + if rv == 0: + print u"Book Successfully generated." + return rv + + # try each pid to decode the file + bookKey = None + for pid in pidlst: + # use 8 digit pids here + pid = pid[0:8] + print u"Trying: {0}".format(pid) + bookKeys = [] + data = keydata + try: + bookKeys+=decryptDkeyRecords(data,pid) + except DrmException, e: + pass + else: + bookKey = bookKeys[0] + print u"Book Key Found! ({0})".format(bookKey.encode('hex')) break - data = pending.pop(0) - self.__lock.release() - drain.write(data) - - # non-blocking read of data from subprocess stdout - def read(self): - self.__lock.acquire() - outdata = "".join(self.__collected_outdata) - del self.__collected_outdata[:] - self.__lock.release() - return outdata - - # non-blocking read of data from subprocess stderr - def readerr(self): - self.__lock.acquire() - errdata = "".join(self.__collected_errdata) - del self.__collected_errdata[:] - self.__lock.release() - return errdata - - # non-blocking write to stdin of subprocess - def write(self, data): - if self.__process.stdin is None: - raise ValueError("Writing to process with stdin not a pipe") - self.__lock.acquire() - self.__pending_input.append(data) - self.__inputsem.release() - self.__lock.release() - - # close stdinput of subprocess - def closeinput(self): - self.__lock.acquire() - self.__quit = True - self.__inputsem.release() - self.__lock.release() + + if not bookKey: + raise DrmException(u"No key found in {0:d} keys tried. Read the FAQs at Alf's blog: http://apprenticealf.wordpress.com/".format(len(pidlst))) + + self.setBookKey(bookKey) + self.createBookDirectory() + self.extractFiles() + print u"Successfully Extracted Topaz contents" + if inCalibre: + from calibre_plugins.dedrm import genbook + else: + import genbook + + rv = genbook.generateBook(self.outdir, raw, fixedimage) + if rv == 0: + print u"Book Successfully generated" + return rv + + def createBookDirectory(self): + outdir = self.outdir + # create output directory structure + if not os.path.exists(outdir): + os.makedirs(outdir) + destdir = os.path.join(outdir,u"img") + if not os.path.exists(destdir): + os.makedirs(destdir) + destdir = os.path.join(outdir,u"color_img") + if not os.path.exists(destdir): + os.makedirs(destdir) + destdir = os.path.join(outdir,u"page") + if not os.path.exists(destdir): + os.makedirs(destdir) + destdir = os.path.join(outdir,u"glyphs") + if not os.path.exists(destdir): + os.makedirs(destdir) + + def extractFiles(self): + outdir = self.outdir + for headerRecord in self.bookHeaderRecords: + name = headerRecord + if name != 'dkey': + ext = u".dat" + if name == 'img': ext = u".jpg" + if name == 'color' : ext = u".jpg" + print u"Processing Section: {0}\n. . .".format(name), + for index in range (0,len(self.bookHeaderRecords[name])) : + fname = u"{0}{1:04d}{2}".format(name,index,ext) + destdir = outdir + if name == 'img': + destdir = os.path.join(outdir,u"img") + if name == 'color': + destdir = os.path.join(outdir,u"color_img") + if name == 'page': + destdir = os.path.join(outdir,u"page") + if name == 'glyphs': + destdir = os.path.join(outdir,u"glyphs") + outputFile = os.path.join(destdir,fname) + print u".", + record = self.getBookPayloadRecord(name,index) + if record != '': + file(outputFile, 'wb').write(record) + print u" " + + def getFile(self, zipname): + htmlzip = zipfile.ZipFile(zipname,'w',zipfile.ZIP_DEFLATED, False) + htmlzip.write(os.path.join(self.outdir,u"book.html"),u"book.html") + htmlzip.write(os.path.join(self.outdir,u"book.opf"),u"book.opf") + if os.path.isfile(os.path.join(self.outdir,u"cover.jpg")): + htmlzip.write(os.path.join(self.outdir,u"cover.jpg"),u"cover.jpg") + htmlzip.write(os.path.join(self.outdir,u"style.css"),u"style.css") + zipUpDir(htmlzip, self.outdir, u"img") + htmlzip.close() + + def getBookType(self): + return u"Topaz" + + def getBookExtension(self): + return u".htmlz" + + def getSVGZip(self, zipname): + svgzip = zipfile.ZipFile(zipname,'w',zipfile.ZIP_DEFLATED, False) + svgzip.write(os.path.join(self.outdir,u"index_svg.xhtml"),u"index_svg.xhtml") + zipUpDir(svgzip, self.outdir, u"svg") + zipUpDir(svgzip, self.outdir, u"img") + svgzip.close() + + def cleanup(self): + if os.path.isdir(self.outdir): + shutil.rmtree(self.outdir, True) + +def usage(progname): + print u"Removes DRM protection from Topaz ebooks and extracts the contents" + print u"Usage:" + print u" {0} [-k ] [-p ] [-s ] ".format(progname) + +# Main +def cli_main(): + argv=unicode_argv() + progname = os.path.basename(argv[0]) + print u"TopazExtract v{0}.".format(__version__) + + try: + opts, args = getopt.getopt(argv[1:], "k:p:s:x") + except getopt.GetoptError, err: + print u"Error in options or arguments: {0}".format(err.args[0]) + usage(progname) + return 1 + if len(args)<2: + usage(progname) + return 1 + + infile = args[0] + outdir = args[1] + if not os.path.isfile(infile): + print u"Input File {0} Does Not Exist.".format(infile) + return 1 + + if not os.path.exists(outdir): + print u"Output Directory {0} Does Not Exist.".format(outdir) + return 1 + + kDatabaseFiles = [] + serials = [] + pids = [] + + for o, a in opts: + if o == '-k': + if a == None : + raise DrmException("Invalid parameter for -k") + kDatabaseFiles.append(a) + if o == '-p': + if a == None : + raise DrmException("Invalid parameter for -p") + pids = a.split(',') + if o == '-s': + if a == None : + raise DrmException("Invalid parameter for -s") + serials = [serial.replace(" ","") for serial in a.split(',')] + + bookname = os.path.splitext(os.path.basename(infile))[0] + + tb = TopazBook(infile) + title = tb.getBookTitle() + print u"Processing Book: {0}".format(title) + md1, md2 = tb.getPIDMetaInfo() + pids.extend(kgenpids.getPidList(md1, md2, serials, kDatabaseFiles)) + + try: + print u"Decrypting Book" + tb.processBook(pids) + + print u" Creating HTML ZIP Archive" + zipname = os.path.join(outdir, bookname + u"_nodrm.htmlz") + tb.getFile(zipname) + + print u" Creating SVG ZIP Archive" + zipname = os.path.join(outdir, bookname + u"_SVG.zip") + tb.getSVGZip(zipname) + + # removing internal temporary directory of pieces + tb.cleanup() + + except DrmException, e: + print u"Decryption failed\n{0}".format(traceback.format_exc()) + + try: + tb.cleanup() + except: + pass + return 1 + + except Exception, e: + print u"Decryption failed\m{0}".format(traceback.format_exc()) + try: + tb.cleanup() + except: + pass + return 1 + + return 0 + + +if __name__ == '__main__': + sys.stdout=SafeUnbuffered(sys.stdout) + sys.stderr=SafeUnbuffered(sys.stderr) + sys.exit(cli_main()) diff --git a/DeDRM_calibre_plugin/DeDRM_plugin/utilities.py b/DeDRM_calibre_plugin/DeDRM_plugin/utilities.py index 97f6583..c730607 100644 --- a/DeDRM_calibre_plugin/DeDRM_plugin/utilities.py +++ b/DeDRM_calibre_plugin/DeDRM_plugin/utilities.py @@ -1,538 +1,39 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -# topazextract.py -# Mostly written by some_updates based on code from many others - -# Changelog -# 4.9 - moved unicode_argv call inside main for Windows DeDRM compatibility -# 5.0 - Fixed potential unicode problem with command line interface - -__version__ = '5.0' - -import sys -import os, csv, getopt -import zlib, zipfile, tempfile, shutil -import traceback -from struct import pack -from struct import unpack -from alfcrypto import Topaz_Cipher - -class SafeUnbuffered: - def __init__(self, stream): - self.stream = stream - self.encoding = stream.encoding - if self.encoding == None: - self.encoding = "utf-8" - def write(self, data): - if isinstance(data,unicode): - data = data.encode(self.encoding,"replace") - self.stream.write(data) - self.stream.flush() - def __getattr__(self, attr): - return getattr(self.stream, attr) - -iswindows = sys.platform.startswith('win') -isosx = sys.platform.startswith('darwin') - -def unicode_argv(): - if iswindows: - # Uses shell32.GetCommandLineArgvW to get sys.argv as a list of Unicode - # strings. - - # Versions 2.x of Python don't support Unicode in sys.argv on - # Windows, with the underlying Windows API instead replacing multi-byte - # characters with '?'. - - - from ctypes import POINTER, byref, cdll, c_int, windll - from ctypes.wintypes import LPCWSTR, LPWSTR - - GetCommandLineW = cdll.kernel32.GetCommandLineW - GetCommandLineW.argtypes = [] - GetCommandLineW.restype = LPCWSTR - - CommandLineToArgvW = windll.shell32.CommandLineToArgvW - CommandLineToArgvW.argtypes = [LPCWSTR, POINTER(c_int)] - CommandLineToArgvW.restype = POINTER(LPWSTR) - - cmd = GetCommandLineW() - argc = c_int(0) - argv = CommandLineToArgvW(cmd, byref(argc)) - if argc.value > 0: - # Remove Python executable and commands if present - start = argc.value - len(sys.argv) - return [argv[i] for i in - xrange(start, argc.value)] - # if we don't have any arguments at all, just pass back script name - # this should never happen - return [u"mobidedrm.py"] +from __future__ import with_statement + +__license__ = 'GPL v3' + +DETAILED_MESSAGE = \ +'You have personal information stored in this plugin\'s customization '+ \ +'string from a previous version of this plugin.\n\n'+ \ +'This new version of the plugin can convert that info '+ \ +'into key data that the new plugin can then use (which doesn\'t '+ \ +'require personal information to be stored/displayed in an insecure '+ \ +'manner like the old plugin did).\n\nIf you choose NOT to migrate this data at this time '+ \ +'you will be prompted to save that personal data to a file elsewhere; and you\'ll have '+ \ +'to manually re-configure this plugin with your information.\n\nEither way... ' + \ +'this new version of the plugin will not be responsible for storing that personal '+ \ +'info in plain sight any longer.' + +def uStrCmp (s1, s2, caseless=False): + import unicodedata as ud + str1 = s1 if isinstance(s1, unicode) else unicode(s1) + str2 = s2 if isinstance(s2, unicode) else unicode(s2) + if caseless: + return ud.normalize('NFC', str1.lower()) == ud.normalize('NFC', str2.lower()) else: - argvencoding = sys.stdin.encoding - if argvencoding == None: - argvencoding = 'utf-8' - return [arg if (type(arg) == unicode) else unicode(arg,argvencoding) for arg in sys.argv] - -#global switch -debug = False - -if 'calibre' in sys.modules: - inCalibre = True - from calibre_plugins.dedrm import kgenpids -else: - inCalibre = False - import kgenpids - - -class DrmException(Exception): - pass - - -# recursive zip creation support routine -def zipUpDir(myzip, tdir, localname): - currentdir = tdir - if localname != u"": - currentdir = os.path.join(currentdir,localname) - list = os.listdir(currentdir) - for file in list: - afilename = file - localfilePath = os.path.join(localname, afilename) - realfilePath = os.path.join(currentdir,file) - if os.path.isfile(realfilePath): - myzip.write(realfilePath, localfilePath) - elif os.path.isdir(realfilePath): - zipUpDir(myzip, tdir, localfilePath) - -# -# Utility routines -# - -# Get a 7 bit encoded number from file -def bookReadEncodedNumber(fo): - flag = False - data = ord(fo.read(1)) - if data == 0xFF: - flag = True - data = ord(fo.read(1)) - if data >= 0x80: - datax = (data & 0x7F) - while data >= 0x80 : - data = ord(fo.read(1)) - datax = (datax <<7) + (data & 0x7F) - data = datax - if flag: - data = -data - return data - -# Get a length prefixed string from file -def bookReadString(fo): - stringLength = bookReadEncodedNumber(fo) - return unpack(str(stringLength)+'s',fo.read(stringLength))[0] + return ud.normalize('NFC', str1) == ud.normalize('NFC', str2) -# -# crypto routines -# - -# Context initialisation for the Topaz Crypto -def topazCryptoInit(key): - return Topaz_Cipher().ctx_init(key) - -# ctx1 = 0x0CAFFE19E -# for keyChar in key: -# keyByte = ord(keyChar) -# ctx2 = ctx1 -# ctx1 = ((((ctx1 >>2) * (ctx1 >>7))&0xFFFFFFFF) ^ (keyByte * keyByte * 0x0F902007)& 0xFFFFFFFF ) -# return [ctx1,ctx2] - -# decrypt data with the context prepared by topazCryptoInit() -def topazCryptoDecrypt(data, ctx): - return Topaz_Cipher().decrypt(data, ctx) -# ctx1 = ctx[0] -# ctx2 = ctx[1] -# plainText = "" -# for dataChar in data: -# dataByte = ord(dataChar) -# m = (dataByte ^ ((ctx1 >> 3) &0xFF) ^ ((ctx2<<3) & 0xFF)) &0xFF -# ctx2 = ctx1 -# ctx1 = (((ctx1 >> 2) * (ctx1 >> 7)) &0xFFFFFFFF) ^((m * m * 0x0F902007) &0xFFFFFFFF) -# plainText += chr(m) -# return plainText - -# Decrypt data with the PID -def decryptRecord(data,PID): - ctx = topazCryptoInit(PID) - return topazCryptoDecrypt(data, ctx) - -# Try to decrypt a dkey record (contains the bookPID) -def decryptDkeyRecord(data,PID): - record = decryptRecord(data,PID) - fields = unpack('3sB8sB8s3s',record) - if fields[0] != 'PID' or fields[5] != 'pid' : - raise DrmException(u"Didn't find PID magic numbers in record") - elif fields[1] != 8 or fields[3] != 8 : - raise DrmException(u"Record didn't contain correct length fields") - elif fields[2] != PID : - raise DrmException(u"Record didn't contain PID") - return fields[4] - -# Decrypt all dkey records (contain the book PID) -def decryptDkeyRecords(data,PID): - nbKeyRecords = ord(data[0]) - records = [] - data = data[1:] - for i in range (0,nbKeyRecords): - length = ord(data[0]) +def parseCustString(keystuff): + userkeys = [] + ar = keystuff.split(':') + for i in ar: try: - key = decryptDkeyRecord(data[1:length+1],PID) - records.append(key) - except DrmException: - pass - data = data[1+length:] - if len(records) == 0: - raise DrmException(u"BookKey Not Found") - return records - - -class TopazBook: - def __init__(self, filename): - self.fo = file(filename, 'rb') - self.outdir = tempfile.mkdtemp() - # self.outdir = 'rawdat' - self.bookPayloadOffset = 0 - self.bookHeaderRecords = {} - self.bookMetadata = {} - self.bookKey = None - magic = unpack('4s',self.fo.read(4))[0] - if magic != 'TPZ0': - raise DrmException(u"Parse Error : Invalid Header, not a Topaz file") - self.parseTopazHeaders() - self.parseMetadata() - - def parseTopazHeaders(self): - def bookReadHeaderRecordData(): - # Read and return the data of one header record at the current book file position - # [[offset,decompressedLength,compressedLength],...] - nbValues = bookReadEncodedNumber(self.fo) - if debug: print "%d records in header " % nbValues, - values = [] - for i in range (0,nbValues): - values.append([bookReadEncodedNumber(self.fo),bookReadEncodedNumber(self.fo),bookReadEncodedNumber(self.fo)]) - return values - def parseTopazHeaderRecord(): - # Read and parse one header record at the current book file position and return the associated data - # [[offset,decompressedLength,compressedLength],...] - if ord(self.fo.read(1)) != 0x63: - raise DrmException(u"Parse Error : Invalid Header") - tag = bookReadString(self.fo) - record = bookReadHeaderRecordData() - return [tag,record] - nbRecords = bookReadEncodedNumber(self.fo) - if debug: print "Headers: %d" % nbRecords - for i in range (0,nbRecords): - result = parseTopazHeaderRecord() - if debug: print result[0], ": ", result[1] - self.bookHeaderRecords[result[0]] = result[1] - if ord(self.fo.read(1)) != 0x64 : - raise DrmException(u"Parse Error : Invalid Header") - self.bookPayloadOffset = self.fo.tell() - - def parseMetadata(self): - # Parse the metadata record from the book payload and return a list of [key,values] - self.fo.seek(self.bookPayloadOffset + self.bookHeaderRecords['metadata'][0][0]) - tag = bookReadString(self.fo) - if tag != 'metadata' : - raise DrmException(u"Parse Error : Record Names Don't Match") - flags = ord(self.fo.read(1)) - nbRecords = ord(self.fo.read(1)) - if debug: print "Metadata Records: %d" % nbRecords - for i in range (0,nbRecords) : - keyval = bookReadString(self.fo) - content = bookReadString(self.fo) - if debug: print keyval - if debug: print content - self.bookMetadata[keyval] = content - return self.bookMetadata - - def getPIDMetaInfo(self): - keysRecord = self.bookMetadata.get('keys','') - keysRecordRecord = '' - if keysRecord != '': - keylst = keysRecord.split(',') - for keyval in keylst: - keysRecordRecord += self.bookMetadata.get(keyval,'') - return keysRecord, keysRecordRecord - - def getBookTitle(self): - title = '' - if 'Title' in self.bookMetadata: - title = self.bookMetadata['Title'] - return title.decode('utf-8') - - def setBookKey(self, key): - self.bookKey = key - - def getBookPayloadRecord(self, name, index): - # Get a record in the book payload, given its name and index. - # decrypted and decompressed if necessary - encrypted = False - compressed = False - try: - recordOffset = self.bookHeaderRecords[name][index][0] - except: - raise DrmException("Parse Error : Invalid Record, record not found") - - self.fo.seek(self.bookPayloadOffset + recordOffset) - - tag = bookReadString(self.fo) - if tag != name : - raise DrmException("Parse Error : Invalid Record, record name doesn't match") - - recordIndex = bookReadEncodedNumber(self.fo) - if recordIndex < 0 : - encrypted = True - recordIndex = -recordIndex -1 - - if recordIndex != index : - raise DrmException("Parse Error : Invalid Record, index doesn't match") - - if (self.bookHeaderRecords[name][index][2] > 0): - compressed = True - record = self.fo.read(self.bookHeaderRecords[name][index][2]) - else: - record = self.fo.read(self.bookHeaderRecords[name][index][1]) - - if encrypted: - if self.bookKey: - ctx = topazCryptoInit(self.bookKey) - record = topazCryptoDecrypt(record,ctx) - else : - raise DrmException("Error: Attempt to decrypt without bookKey") - - if compressed: - record = zlib.decompress(record) - - return record - - def processBook(self, pidlst): - raw = 0 - fixedimage=True - try: - keydata = self.getBookPayloadRecord('dkey', 0) - except DrmException, e: - print u"no dkey record found, book may not be encrypted" - print u"attempting to extrct files without a book key" - self.createBookDirectory() - self.extractFiles() - print u"Successfully Extracted Topaz contents" - if inCalibre: - from calibre_plugins.dedrm import genbook - else: - import genbook - - rv = genbook.generateBook(self.outdir, raw, fixedimage) - if rv == 0: - print u"Book Successfully generated." - return rv - - # try each pid to decode the file - bookKey = None - for pid in pidlst: - # use 8 digit pids here - pid = pid[0:8] - print u"Trying: {0}".format(pid) - bookKeys = [] - data = keydata - try: - bookKeys+=decryptDkeyRecords(data,pid) - except DrmException, e: - pass - else: - bookKey = bookKeys[0] - print u"Book Key Found! ({0})".format(bookKey.encode('hex')) - break - - if not bookKey: - raise DrmException(u"No key found in {0:d} keys tried. Read the FAQs at Alf's blog: http://apprenticealf.wordpress.com/".format(len(pidlst))) - - self.setBookKey(bookKey) - self.createBookDirectory() - self.extractFiles() - print u"Successfully Extracted Topaz contents" - if inCalibre: - from calibre_plugins.dedrm import genbook - else: - import genbook - - rv = genbook.generateBook(self.outdir, raw, fixedimage) - if rv == 0: - print u"Book Successfully generated" - return rv - - def createBookDirectory(self): - outdir = self.outdir - # create output directory structure - if not os.path.exists(outdir): - os.makedirs(outdir) - destdir = os.path.join(outdir,u"img") - if not os.path.exists(destdir): - os.makedirs(destdir) - destdir = os.path.join(outdir,u"color_img") - if not os.path.exists(destdir): - os.makedirs(destdir) - destdir = os.path.join(outdir,u"page") - if not os.path.exists(destdir): - os.makedirs(destdir) - destdir = os.path.join(outdir,u"glyphs") - if not os.path.exists(destdir): - os.makedirs(destdir) - - def extractFiles(self): - outdir = self.outdir - for headerRecord in self.bookHeaderRecords: - name = headerRecord - if name != 'dkey': - ext = u".dat" - if name == 'img': ext = u".jpg" - if name == 'color' : ext = u".jpg" - print u"Processing Section: {0}\n. . .".format(name), - for index in range (0,len(self.bookHeaderRecords[name])) : - fname = u"{0}{1:04d}{2}".format(name,index,ext) - destdir = outdir - if name == 'img': - destdir = os.path.join(outdir,u"img") - if name == 'color': - destdir = os.path.join(outdir,u"color_img") - if name == 'page': - destdir = os.path.join(outdir,u"page") - if name == 'glyphs': - destdir = os.path.join(outdir,u"glyphs") - outputFile = os.path.join(destdir,fname) - print u".", - record = self.getBookPayloadRecord(name,index) - if record != '': - file(outputFile, 'wb').write(record) - print u" " - - def getFile(self, zipname): - htmlzip = zipfile.ZipFile(zipname,'w',zipfile.ZIP_DEFLATED, False) - htmlzip.write(os.path.join(self.outdir,u"book.html"),u"book.html") - htmlzip.write(os.path.join(self.outdir,u"book.opf"),u"book.opf") - if os.path.isfile(os.path.join(self.outdir,u"cover.jpg")): - htmlzip.write(os.path.join(self.outdir,u"cover.jpg"),u"cover.jpg") - htmlzip.write(os.path.join(self.outdir,u"style.css"),u"style.css") - zipUpDir(htmlzip, self.outdir, u"img") - htmlzip.close() - - def getBookType(self): - return u"Topaz" - - def getBookExtension(self): - return u".htmlz" - - def getSVGZip(self, zipname): - svgzip = zipfile.ZipFile(zipname,'w',zipfile.ZIP_DEFLATED, False) - svgzip.write(os.path.join(self.outdir,u"index_svg.xhtml"),u"index_svg.xhtml") - zipUpDir(svgzip, self.outdir, u"svg") - zipUpDir(svgzip, self.outdir, u"img") - svgzip.close() - - def cleanup(self): - if os.path.isdir(self.outdir): - shutil.rmtree(self.outdir, True) - -def usage(progname): - print u"Removes DRM protection from Topaz ebooks and extracts the contents" - print u"Usage:" - print u" {0} [-k ] [-p ] [-s ] ".format(progname) - -# Main -def cli_main(): - argv=unicode_argv() - progname = os.path.basename(argv[0]) - print u"TopazExtract v{0}.".format(__version__) - - try: - opts, args = getopt.getopt(argv[1:], "k:p:s:x") - except getopt.GetoptError, err: - print u"Error in options or arguments: {0}".format(err.args[0]) - usage(progname) - return 1 - if len(args)<2: - usage(progname) - return 1 - - infile = args[0] - outdir = args[1] - if not os.path.isfile(infile): - print u"Input File {0} Does Not Exist.".format(infile) - return 1 - - if not os.path.exists(outdir): - print u"Output Directory {0} Does Not Exist.".format(outdir) - return 1 - - kDatabaseFiles = [] - serials = [] - pids = [] - - for o, a in opts: - if o == '-k': - if a == None : - raise DrmException("Invalid parameter for -k") - kDatabaseFiles.append(a) - if o == '-p': - if a == None : - raise DrmException("Invalid parameter for -p") - pids = a.split(',') - if o == '-s': - if a == None : - raise DrmException("Invalid parameter for -s") - serials = [serial.replace(" ","") for serial in a.split(',')] - - bookname = os.path.splitext(os.path.basename(infile))[0] - - tb = TopazBook(infile) - title = tb.getBookTitle() - print u"Processing Book: {0}".format(title) - md1, md2 = tb.getPIDMetaInfo() - pids.extend(kgenpids.getPidList(md1, md2, serials, kDatabaseFiles)) - - try: - print u"Decrypting Book" - tb.processBook(pids) - - print u" Creating HTML ZIP Archive" - zipname = os.path.join(outdir, bookname + u"_nodrm.htmlz") - tb.getFile(zipname) - - print u" Creating SVG ZIP Archive" - zipname = os.path.join(outdir, bookname + u"_SVG.zip") - tb.getSVGZip(zipname) - - # removing internal temporary directory of pieces - tb.cleanup() - - except DrmException, e: - print u"Decryption failed\n{0}".format(traceback.format_exc()) - - try: - tb.cleanup() - except: - pass - return 1 - - except Exception, e: - print u"Decryption failed\m{0}".format(traceback.format_exc()) - try: - tb.cleanup() + name, ccn = i.split(',') + # Generate Barnes & Noble EPUB user key from name and credit card number. + userkeys.append(generate_key(name, ccn)) except: pass - return 1 - - return 0 - - -if __name__ == '__main__': - sys.stdout=SafeUnbuffered(sys.stdout) - sys.stderr=SafeUnbuffered(sys.stderr) - sys.exit(cli_main()) + return userkeys diff --git a/DeDRM_calibre_plugin/DeDRM_plugin/wineutils.py b/DeDRM_calibre_plugin/DeDRM_plugin/wineutils.py new file mode 100644 index 0000000..b54db80 --- /dev/null +++ b/DeDRM_calibre_plugin/DeDRM_plugin/wineutils.py @@ -0,0 +1,60 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +from __future__ import with_statement + +__license__ = 'GPL v3' + +# Standard Python modules. +import os, sys, re, hashlib +from calibre_plugins.dedrm.__init__ import PLUGIN_NAME, PLUGIN_VERSION + +def WineGetKeys(scriptpath, extension, wineprefix=""): + import subprocess + from subprocess import Popen, PIPE, STDOUT + + import subasyncio + from subasyncio import Process + + if extension == u".k4i": + import json + + basepath, script = os.path.split(scriptpath) + print u"{0} v{1}: Running {2} under Wine".format(PLUGIN_NAME, PLUGIN_VERSION, script) + + outdirpath = os.path.join(basepath, u"winekeysdir") + if not os.path.exists(outdirpath): + os.makedirs(outdirpath) + + if wineprefix != "" and os.path.exists(wineprefix): + cmdline = u"WINEPREFIX=\"{2}\" wine python.exe \"{0}\" \"{1}\"".format(scriptpath,outdirpath,wineprefix) + else: + cmdline = u"wine python.exe \"{0}\" \"{1}\"".format(scriptpath,outdirpath) + print u"{0} v{1}: Command line: “{2}”".format(PLUGIN_NAME, PLUGIN_VERSION, cmdline) + + try: + cmdline = cmdline.encode(sys.getfilesystemencoding()) + p2 = Process(cmdline, shell=True, bufsize=1, stdin=None, stdout=sys.stdout, stderr=STDOUT, close_fds=False) + result = p2.wait("wait") + except Exception, e: + print u"{0} v{1}: Wine subprocess call error: {2}".format(PLUGIN_NAME, PLUGIN_VERSION, e.args[0]) + return [] + + winekeys = [] + # get any files with extension in the output dir + files = [f for f in os.listdir(outdirpath) if f.endswith(extension)] + for filename in files: + try: + fpath = os.path.join(outdirpath, filename) + with open(fpath, 'rb') as keyfile: + if extension == u".k4i": + new_key_value = json.loads(keyfile.read()) + else: + new_key_value = keyfile.read() + winekeys.append(new_key_value) + except: + print u"{0} v{1}: Error loading file {2}".format(PLUGIN_NAME, PLUGIN_VERSION, filename) + traceback.print_exc() + os.remove(fpath) + print u"{0} v{1}: Found and decrypted {2} {3}".format(PLUGIN_NAME, PLUGIN_VERSION, len(winekeys), u"key file" if len(winekeys) == 1 else u"key files") + return winekeys diff --git a/DeDRM_calibre_plugin/DeDRM_plugin/zipfilerugged.py b/DeDRM_calibre_plugin/DeDRM_plugin/zipfilerugged.py index c730607..4a55a69 100644 --- a/DeDRM_calibre_plugin/DeDRM_plugin/zipfilerugged.py +++ b/DeDRM_calibre_plugin/DeDRM_plugin/zipfilerugged.py @@ -1,39 +1,1400 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -from __future__ import with_statement - -__license__ = 'GPL v3' - -DETAILED_MESSAGE = \ -'You have personal information stored in this plugin\'s customization '+ \ -'string from a previous version of this plugin.\n\n'+ \ -'This new version of the plugin can convert that info '+ \ -'into key data that the new plugin can then use (which doesn\'t '+ \ -'require personal information to be stored/displayed in an insecure '+ \ -'manner like the old plugin did).\n\nIf you choose NOT to migrate this data at this time '+ \ -'you will be prompted to save that personal data to a file elsewhere; and you\'ll have '+ \ -'to manually re-configure this plugin with your information.\n\nEither way... ' + \ -'this new version of the plugin will not be responsible for storing that personal '+ \ -'info in plain sight any longer.' - -def uStrCmp (s1, s2, caseless=False): - import unicodedata as ud - str1 = s1 if isinstance(s1, unicode) else unicode(s1) - str2 = s2 if isinstance(s2, unicode) else unicode(s2) - if caseless: - return ud.normalize('NFC', str1.lower()) == ud.normalize('NFC', str2.lower()) - else: - return ud.normalize('NFC', str1) == ud.normalize('NFC', str2) - -def parseCustString(keystuff): - userkeys = [] - ar = keystuff.split(':') - for i in ar: - try: - name, ccn = i.split(',') - # Generate Barnes & Noble EPUB user key from name and credit card number. - userkeys.append(generate_key(name, ccn)) - except: +""" +Read and write ZIP files. +""" +import struct, os, time, sys, shutil +import binascii, cStringIO, stat +import io +import re + +try: + import zlib # We may need its compression method + crc32 = zlib.crc32 +except ImportError: + zlib = None + crc32 = binascii.crc32 + +__all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile", + "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ] + +class BadZipfile(Exception): + pass + + +class LargeZipFile(Exception): + """ + Raised when writing a zipfile, the zipfile requires ZIP64 extensions + and those extensions are disabled. + """ + +error = BadZipfile # The exception raised by this module + +ZIP64_LIMIT = (1 << 31) - 1 +ZIP_FILECOUNT_LIMIT = 1 << 16 +ZIP_MAX_COMMENT = (1 << 16) - 1 + +# constants for Zip file compression methods +ZIP_STORED = 0 +ZIP_DEFLATED = 8 +# Other ZIP compression methods not supported + +# Below are some formats and associated data for reading/writing headers using +# the struct module. The names and structures of headers/records are those used +# in the PKWARE description of the ZIP file format: +# http://www.pkware.com/documents/casestudies/APPNOTE.TXT +# (URL valid as of January 2008) + +# The "end of central directory" structure, magic number, size, and indices +# (section V.I in the format document) +structEndArchive = "<4s4H2LH" +stringEndArchive = "PK\005\006" +sizeEndCentDir = struct.calcsize(structEndArchive) + +_ECD_SIGNATURE = 0 +_ECD_DISK_NUMBER = 1 +_ECD_DISK_START = 2 +_ECD_ENTRIES_THIS_DISK = 3 +_ECD_ENTRIES_TOTAL = 4 +_ECD_SIZE = 5 +_ECD_OFFSET = 6 +_ECD_COMMENT_SIZE = 7 +# These last two indices are not part of the structure as defined in the +# spec, but they are used internally by this module as a convenience +_ECD_COMMENT = 8 +_ECD_LOCATION = 9 + +# The "central directory" structure, magic number, size, and indices +# of entries in the structure (section V.F in the format document) +structCentralDir = "<4s4B4HL2L5H2L" +stringCentralDir = "PK\001\002" +sizeCentralDir = struct.calcsize(structCentralDir) + +# indexes of entries in the central directory structure +_CD_SIGNATURE = 0 +_CD_CREATE_VERSION = 1 +_CD_CREATE_SYSTEM = 2 +_CD_EXTRACT_VERSION = 3 +_CD_EXTRACT_SYSTEM = 4 +_CD_FLAG_BITS = 5 +_CD_COMPRESS_TYPE = 6 +_CD_TIME = 7 +_CD_DATE = 8 +_CD_CRC = 9 +_CD_COMPRESSED_SIZE = 10 +_CD_UNCOMPRESSED_SIZE = 11 +_CD_FILENAME_LENGTH = 12 +_CD_EXTRA_FIELD_LENGTH = 13 +_CD_COMMENT_LENGTH = 14 +_CD_DISK_NUMBER_START = 15 +_CD_INTERNAL_FILE_ATTRIBUTES = 16 +_CD_EXTERNAL_FILE_ATTRIBUTES = 17 +_CD_LOCAL_HEADER_OFFSET = 18 + +# The "local file header" structure, magic number, size, and indices +# (section V.A in the format document) +structFileHeader = "<4s2B4HL2L2H" +stringFileHeader = "PK\003\004" +sizeFileHeader = struct.calcsize(structFileHeader) + +_FH_SIGNATURE = 0 +_FH_EXTRACT_VERSION = 1 +_FH_EXTRACT_SYSTEM = 2 +_FH_GENERAL_PURPOSE_FLAG_BITS = 3 +_FH_COMPRESSION_METHOD = 4 +_FH_LAST_MOD_TIME = 5 +_FH_LAST_MOD_DATE = 6 +_FH_CRC = 7 +_FH_COMPRESSED_SIZE = 8 +_FH_UNCOMPRESSED_SIZE = 9 +_FH_FILENAME_LENGTH = 10 +_FH_EXTRA_FIELD_LENGTH = 11 + +# The "Zip64 end of central directory locator" structure, magic number, and size +structEndArchive64Locator = "<4sLQL" +stringEndArchive64Locator = "PK\x06\x07" +sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator) + +# The "Zip64 end of central directory" record, magic number, size, and indices +# (section V.G in the format document) +structEndArchive64 = "<4sQ2H2L4Q" +stringEndArchive64 = "PK\x06\x06" +sizeEndCentDir64 = struct.calcsize(structEndArchive64) + +_CD64_SIGNATURE = 0 +_CD64_DIRECTORY_RECSIZE = 1 +_CD64_CREATE_VERSION = 2 +_CD64_EXTRACT_VERSION = 3 +_CD64_DISK_NUMBER = 4 +_CD64_DISK_NUMBER_START = 5 +_CD64_NUMBER_ENTRIES_THIS_DISK = 6 +_CD64_NUMBER_ENTRIES_TOTAL = 7 +_CD64_DIRECTORY_SIZE = 8 +_CD64_OFFSET_START_CENTDIR = 9 + +def _check_zipfile(fp): + try: + if _EndRecData(fp): + return True # file has correct magic number + except IOError: + pass + return False + +def is_zipfile(filename): + """Quickly see if a file is a ZIP file by checking the magic number. + + The filename argument may be a file or file-like object too. + """ + result = False + try: + if hasattr(filename, "read"): + result = _check_zipfile(fp=filename) + else: + with open(filename, "rb") as fp: + result = _check_zipfile(fp) + except IOError: + pass + return result + +def _EndRecData64(fpin, offset, endrec): + """ + Read the ZIP64 end-of-archive records and use that to update endrec + """ + fpin.seek(offset - sizeEndCentDir64Locator, 2) + data = fpin.read(sizeEndCentDir64Locator) + sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data) + if sig != stringEndArchive64Locator: + return endrec + + if diskno != 0 or disks != 1: + raise BadZipfile("zipfiles that span multiple disks are not supported") + + # Assume no 'zip64 extensible data' + fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2) + data = fpin.read(sizeEndCentDir64) + sig, sz, create_version, read_version, disk_num, disk_dir, \ + dircount, dircount2, dirsize, diroffset = \ + struct.unpack(structEndArchive64, data) + if sig != stringEndArchive64: + return endrec + + # Update the original endrec using data from the ZIP64 record + endrec[_ECD_SIGNATURE] = sig + endrec[_ECD_DISK_NUMBER] = disk_num + endrec[_ECD_DISK_START] = disk_dir + endrec[_ECD_ENTRIES_THIS_DISK] = dircount + endrec[_ECD_ENTRIES_TOTAL] = dircount2 + endrec[_ECD_SIZE] = dirsize + endrec[_ECD_OFFSET] = diroffset + return endrec + + +def _EndRecData(fpin): + """Return data from the "End of Central Directory" record, or None. + + The data is a list of the nine items in the ZIP "End of central dir" + record followed by a tenth item, the file seek offset of this record.""" + + # Determine file size + fpin.seek(0, 2) + filesize = fpin.tell() + + # Check to see if this is ZIP file with no archive comment (the + # "end of central directory" structure should be the last item in the + # file if this is the case). + try: + fpin.seek(-sizeEndCentDir, 2) + except IOError: + return None + data = fpin.read() + if data[0:4] == stringEndArchive and data[-2:] == "\000\000": + # the signature is correct and there's no comment, unpack structure + endrec = struct.unpack(structEndArchive, data) + endrec=list(endrec) + + # Append a blank comment and record start offset + endrec.append("") + endrec.append(filesize - sizeEndCentDir) + + # Try to read the "Zip64 end of central directory" structure + return _EndRecData64(fpin, -sizeEndCentDir, endrec) + + # Either this is not a ZIP file, or it is a ZIP file with an archive + # comment. Search the end of the file for the "end of central directory" + # record signature. The comment is the last item in the ZIP file and may be + # up to 64K long. It is assumed that the "end of central directory" magic + # number does not appear in the comment. + maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0) + fpin.seek(maxCommentStart, 0) + data = fpin.read() + start = data.rfind(stringEndArchive) + if start >= 0: + # found the magic number; attempt to unpack and interpret + recData = data[start:start+sizeEndCentDir] + endrec = list(struct.unpack(structEndArchive, recData)) + comment = data[start+sizeEndCentDir:] + # check that comment length is correct + if endrec[_ECD_COMMENT_SIZE] == len(comment): + # Append the archive comment and start offset + endrec.append(comment) + endrec.append(maxCommentStart + start) + + # Try to read the "Zip64 end of central directory" structure + return _EndRecData64(fpin, maxCommentStart + start - filesize, + endrec) + + # Unable to find a valid end of central directory structure + return + + +class ZipInfo (object): + """Class with attributes describing each file in the ZIP archive.""" + + __slots__ = ( + 'orig_filename', + 'filename', + 'date_time', + 'compress_type', + 'comment', + 'extra', + 'create_system', + 'create_version', + 'extract_version', + 'reserved', + 'flag_bits', + 'volume', + 'internal_attr', + 'external_attr', + 'header_offset', + 'CRC', + 'compress_size', + 'file_size', + '_raw_time', + ) + + def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)): + self.orig_filename = filename # Original file name in archive + + # Terminate the file name at the first null byte. Null bytes in file + # names are used as tricks by viruses in archives. + null_byte = filename.find(chr(0)) + if null_byte >= 0: + filename = filename[0:null_byte] + # This is used to ensure paths in generated ZIP files always use + # forward slashes as the directory separator, as required by the + # ZIP format specification. + if os.sep != "/" and os.sep in filename: + filename = filename.replace(os.sep, "/") + + self.filename = filename # Normalized file name + self.date_time = date_time # year, month, day, hour, min, sec + # Standard values: + self.compress_type = ZIP_STORED # Type of compression for the file + self.comment = "" # Comment for each file + self.extra = "" # ZIP extra data + if sys.platform == 'win32': + self.create_system = 0 # System which created ZIP archive + else: + # Assume everything else is unix-y + self.create_system = 3 # System which created ZIP archive + self.create_version = 20 # Version which created ZIP archive + self.extract_version = 20 # Version needed to extract archive + self.reserved = 0 # Must be zero + self.flag_bits = 0 # ZIP flag bits + self.volume = 0 # Volume number of file header + self.internal_attr = 0 # Internal attributes + self.external_attr = 0 # External file attributes + # Other attributes are set by class ZipFile: + # header_offset Byte offset to the file header + # CRC CRC-32 of the uncompressed file + # compress_size Size of the compressed file + # file_size Size of the uncompressed file + + def FileHeader(self): + """Return the per-file header as a string.""" + dt = self.date_time + dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2] + dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2) + if self.flag_bits & 0x08: + # Set these to zero because we write them after the file data + CRC = compress_size = file_size = 0 + else: + CRC = self.CRC + compress_size = self.compress_size + file_size = self.file_size + + extra = self.extra + + if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT: + # File is larger than what fits into a 4 byte integer, + # fall back to the ZIP64 extension + fmt = '= 24: + counts = unpack('> 1) & 0x7FFFFFFF) ^ poly + else: + crc = ((crc >> 1) & 0x7FFFFFFF) + table[i] = crc + return table + crctable = _GenerateCRCTable() + + def _crc32(self, ch, crc): + """Compute the CRC32 primitive on one byte.""" + return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ord(ch)) & 0xff] + + def __init__(self, pwd): + self.key0 = 305419896 + self.key1 = 591751049 + self.key2 = 878082192 + for p in pwd: + self._UpdateKeys(p) + + def _UpdateKeys(self, c): + self.key0 = self._crc32(c, self.key0) + self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295 + self.key1 = (self.key1 * 134775813 + 1) & 4294967295 + self.key2 = self._crc32(chr((self.key1 >> 24) & 255), self.key2) + + def __call__(self, c): + """Decrypt a single character.""" + c = ord(c) + k = self.key2 | 2 + c = c ^ (((k * (k^1)) >> 8) & 255) + c = chr(c) + self._UpdateKeys(c) + return c + +class ZipExtFile(io.BufferedIOBase): + """File-like object for reading an archive member. + Is returned by ZipFile.open(). + """ + + # Max size supported by decompressor. + MAX_N = 1 << 31 - 1 + + # Read from compressed files in 4k blocks. + MIN_READ_SIZE = 4096 + + # Search for universal newlines or line chunks. + PATTERN = re.compile(r'^(?P[^\r\n]+)|(?P\n|\r\n?)') + + def __init__(self, fileobj, mode, zipinfo, decrypter=None): + self._fileobj = fileobj + self._decrypter = decrypter + + self._compress_type = zipinfo.compress_type + self._compress_size = zipinfo.compress_size + self._compress_left = zipinfo.compress_size + + if self._compress_type == ZIP_DEFLATED: + self._decompressor = zlib.decompressobj(-15) + self._unconsumed = '' + + self._readbuffer = '' + self._offset = 0 + + self._universal = 'U' in mode + self.newlines = None + + # Adjust read size for encrypted files since the first 12 bytes + # are for the encryption/password information. + if self._decrypter is not None: + self._compress_left -= 12 + + self.mode = mode + self.name = zipinfo.filename + + def readline(self, limit=-1): + """Read and return a line from the stream. + + If limit is specified, at most limit bytes will be read. + """ + + if not self._universal and limit < 0: + # Shortcut common case - newline found in buffer. + i = self._readbuffer.find('\n', self._offset) + 1 + if i > 0: + line = self._readbuffer[self._offset: i] + self._offset = i + return line + + if not self._universal: + return io.BufferedIOBase.readline(self, limit) + + line = '' + while limit < 0 or len(line) < limit: + readahead = self.peek(2) + if readahead == '': + return line + + # + # Search for universal newlines or line chunks. + # + # The pattern returns either a line chunk or a newline, but not + # both. Combined with peek(2), we are assured that the sequence + # '\r\n' is always retrieved completely and never split into + # separate newlines - '\r', '\n' due to coincidental readaheads. + # + match = self.PATTERN.search(readahead) + newline = match.group('newline') + if newline is not None: + if self.newlines is None: + self.newlines = [] + if newline not in self.newlines: + self.newlines.append(newline) + self._offset += len(newline) + return line + '\n' + + chunk = match.group('chunk') + if limit >= 0: + chunk = chunk[: limit - len(line)] + + self._offset += len(chunk) + line += chunk + + return line + + def peek(self, n=1): + """Returns buffered bytes without advancing the position.""" + if n > len(self._readbuffer) - self._offset: + chunk = self.read(n) + self._offset -= len(chunk) + + # Return up to 512 bytes to reduce allocation overhead for tight loops. + return self._readbuffer[self._offset: self._offset + 512] + + def readable(self): + return True + + def read(self, n=-1): + """Read and return up to n bytes. + If the argument is omitted, None, or negative, data is read and returned until EOF is reached.. + """ + + buf = '' + while n < 0 or n is None or n > len(buf): + data = self.read1(n) + if len(data) == 0: + return buf + + buf += data + + return buf + + def read1(self, n): + """Read up to n bytes with at most one read() system call.""" + + # Simplify algorithm (branching) by transforming negative n to large n. + if n < 0 or n is None: + n = self.MAX_N + + # Bytes available in read buffer. + len_readbuffer = len(self._readbuffer) - self._offset + + # Read from file. + if self._compress_left > 0 and n > len_readbuffer + len(self._unconsumed): + nbytes = n - len_readbuffer - len(self._unconsumed) + nbytes = max(nbytes, self.MIN_READ_SIZE) + nbytes = min(nbytes, self._compress_left) + + data = self._fileobj.read(nbytes) + self._compress_left -= len(data) + + if data and self._decrypter is not None: + data = ''.join(map(self._decrypter, data)) + + if self._compress_type == ZIP_STORED: + self._readbuffer = self._readbuffer[self._offset:] + data + self._offset = 0 + else: + # Prepare deflated bytes for decompression. + self._unconsumed += data + + # Handle unconsumed data. + if (len(self._unconsumed) > 0 and n > len_readbuffer and + self._compress_type == ZIP_DEFLATED): + data = self._decompressor.decompress( + self._unconsumed, + max(n - len_readbuffer, self.MIN_READ_SIZE) + ) + + self._unconsumed = self._decompressor.unconsumed_tail + if len(self._unconsumed) == 0 and self._compress_left == 0: + data += self._decompressor.flush() + + self._readbuffer = self._readbuffer[self._offset:] + data + self._offset = 0 + + # Read from buffer. + data = self._readbuffer[self._offset: self._offset + n] + self._offset += len(data) + return data + + + +class ZipFile: + """ Class with methods to open, read, write, close, list zip files. + + z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=False) + + file: Either the path to the file, or a file-like object. + If it is a path, the file will be opened and closed by ZipFile. + mode: The mode can be either read "r", write "w" or append "a". + compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib). + allowZip64: if True ZipFile will create files with ZIP64 extensions when + needed, otherwise it will raise an exception when this would + be necessary. + + """ + + fp = None # Set here since __del__ checks it + + def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False): + """Open the ZIP file with mode read "r", write "w" or append "a".""" + if mode not in ("r", "w", "a"): + raise RuntimeError('ZipFile() requires mode "r", "w", or "a"') + + if compression == ZIP_STORED: pass - return userkeys + elif compression == ZIP_DEFLATED: + if not zlib: + raise RuntimeError,\ + "Compression requires the (missing) zlib module" + else: + raise RuntimeError, "That compression method is not supported" + + self._allowZip64 = allowZip64 + self._didModify = False + self.debug = 0 # Level of printing: 0 through 3 + self.NameToInfo = {} # Find file info given name + self.filelist = [] # List of ZipInfo instances for archive + self.compression = compression # Method of compression + self.mode = key = mode.replace('b', '')[0] + self.pwd = None + self.comment = '' + + # Check if we were passed a file-like object + if isinstance(file, basestring): + self._filePassed = 0 + self.filename = file + modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'} + try: + self.fp = open(file, modeDict[mode]) + except IOError: + if mode == 'a': + mode = key = 'w' + self.fp = open(file, modeDict[mode]) + else: + raise + else: + self._filePassed = 1 + self.fp = file + self.filename = getattr(file, 'name', None) + + if key == 'r': + self._GetContents() + elif key == 'w': + pass + elif key == 'a': + try: # See if file is a zip file + self._RealGetContents() + # seek to start of directory and overwrite + self.fp.seek(self.start_dir, 0) + except BadZipfile: # file is not a zip file, just append + self.fp.seek(0, 2) + else: + if not self._filePassed: + self.fp.close() + self.fp = None + raise RuntimeError, 'Mode must be "r", "w" or "a"' + + def __enter__(self): + return self + + def __exit__(self, type, value, traceback): + self.close() + + def _GetContents(self): + """Read the directory, making sure we close the file if the format + is bad.""" + try: + self._RealGetContents() + except BadZipfile: + if not self._filePassed: + self.fp.close() + self.fp = None + raise + + def _RealGetContents(self): + """Read in the table of contents for the ZIP file.""" + fp = self.fp + endrec = _EndRecData(fp) + if not endrec: + raise BadZipfile, "File is not a zip file" + if self.debug > 1: + print endrec + size_cd = endrec[_ECD_SIZE] # bytes in central directory + offset_cd = endrec[_ECD_OFFSET] # offset of central directory + self.comment = endrec[_ECD_COMMENT] # archive comment + + # "concat" is zero, unless zip was concatenated to another file + concat = endrec[_ECD_LOCATION] - size_cd - offset_cd + if endrec[_ECD_SIGNATURE] == stringEndArchive64: + # If Zip64 extension structures are present, account for them + concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator) + + if self.debug > 2: + inferred = concat + offset_cd + print "given, inferred, offset", offset_cd, inferred, concat + # self.start_dir: Position of start of central directory + self.start_dir = offset_cd + concat + fp.seek(self.start_dir, 0) + data = fp.read(size_cd) + fp = cStringIO.StringIO(data) + total = 0 + while total < size_cd: + centdir = fp.read(sizeCentralDir) + if centdir[0:4] != stringCentralDir: + raise BadZipfile, "Bad magic number for central directory" + centdir = struct.unpack(structCentralDir, centdir) + if self.debug > 2: + print centdir + filename = fp.read(centdir[_CD_FILENAME_LENGTH]) + # Create ZipInfo instance to store file information + x = ZipInfo(filename) + x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH]) + x.comment = fp.read(centdir[_CD_COMMENT_LENGTH]) + x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET] + (x.create_version, x.create_system, x.extract_version, x.reserved, + x.flag_bits, x.compress_type, t, d, + x.CRC, x.compress_size, x.file_size) = centdir[1:12] + x.volume, x.internal_attr, x.external_attr = centdir[15:18] + # Convert date/time code to (year, month, day, hour, min, sec) + x._raw_time = t + x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F, + t>>11, (t>>5)&0x3F, (t&0x1F) * 2 ) + + x._decodeExtra() + x.header_offset = x.header_offset + concat + x.filename = x._decodeFilename() + self.filelist.append(x) + self.NameToInfo[x.filename] = x + + # update total bytes read from central directory + total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH] + + centdir[_CD_EXTRA_FIELD_LENGTH] + + centdir[_CD_COMMENT_LENGTH]) + + if self.debug > 2: + print "total", total + + + def namelist(self): + """Return a list of file names in the archive.""" + l = [] + for data in self.filelist: + l.append(data.filename) + return l + + def infolist(self): + """Return a list of class ZipInfo instances for files in the + archive.""" + return self.filelist + + def printdir(self): + """Print a table of contents for the zip file.""" + print "%-46s %19s %12s" % ("File Name", "Modified ", "Size") + for zinfo in self.filelist: + date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6] + print "%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size) + + def testzip(self): + """Read all the files and check the CRC.""" + chunk_size = 2 ** 20 + for zinfo in self.filelist: + try: + # Read by chunks, to avoid an OverflowError or a + # MemoryError with very large embedded files. + f = self.open(zinfo.filename, "r") + while f.read(chunk_size): # Check CRC-32 + pass + except BadZipfile: + return zinfo.filename + + def getinfo(self, name): + """Return the instance of ZipInfo given 'name'.""" + info = self.NameToInfo.get(name) + if info is None: + raise KeyError( + 'There is no item named %r in the archive' % name) + + return info + + def setpassword(self, pwd): + """Set default password for encrypted files.""" + self.pwd = pwd + + def read(self, name, pwd=None): + """Return file bytes (as a string) for name.""" + return self.open(name, "r", pwd).read() + + def open(self, name, mode="r", pwd=None): + """Return file-like object for 'name'.""" + if mode not in ("r", "U", "rU"): + raise RuntimeError, 'open() requires mode "r", "U", or "rU"' + if not self.fp: + raise RuntimeError, \ + "Attempt to read ZIP archive that was already closed" + + # Only open a new file for instances where we were not + # given a file object in the constructor + if self._filePassed: + zef_file = self.fp + else: + zef_file = open(self.filename, 'rb') + + # Make sure we have an info object + if isinstance(name, ZipInfo): + # 'name' is already an info object + zinfo = name + else: + # Get info object for name + zinfo = self.getinfo(name) + + zef_file.seek(zinfo.header_offset, 0) + + # Skip the file header: + fheader = zef_file.read(sizeFileHeader) + if fheader[0:4] != stringFileHeader: + raise BadZipfile, "Bad magic number for file header" + + fheader = struct.unpack(structFileHeader, fheader) + fname = zef_file.read(fheader[_FH_FILENAME_LENGTH]) + if fheader[_FH_EXTRA_FIELD_LENGTH]: + zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH]) + + if fname != zinfo.orig_filename: + raise BadZipfile, \ + 'File name in directory "%s" and header "%s" differ.' % ( + zinfo.orig_filename, fname) + + # check for encrypted flag & handle password + is_encrypted = zinfo.flag_bits & 0x1 + zd = None + if is_encrypted: + if not pwd: + pwd = self.pwd + if not pwd: + raise RuntimeError, "File %s is encrypted, " \ + "password required for extraction" % name + + zd = _ZipDecrypter(pwd) + # The first 12 bytes in the cypher stream is an encryption header + # used to strengthen the algorithm. The first 11 bytes are + # completely random, while the 12th contains the MSB of the CRC, + # or the MSB of the file time depending on the header type + # and is used to check the correctness of the password. + bytes = zef_file.read(12) + h = map(zd, bytes[0:12]) + if zinfo.flag_bits & 0x8: + # compare against the file type from extended local headers + check_byte = (zinfo._raw_time >> 8) & 0xff + else: + # compare against the CRC otherwise + check_byte = (zinfo.CRC >> 24) & 0xff + if ord(h[11]) != check_byte: + raise RuntimeError("Bad password for file", name) + + return ZipExtFile(zef_file, mode, zinfo, zd) + + def extract(self, member, path=None, pwd=None): + """Extract a member from the archive to the current working directory, + using its full name. Its file information is extracted as accurately + as possible. `member' may be a filename or a ZipInfo object. You can + specify a different directory using `path'. + """ + if not isinstance(member, ZipInfo): + member = self.getinfo(member) + + if path is None: + path = os.getcwd() + + return self._extract_member(member, path, pwd) + + def extractall(self, path=None, members=None, pwd=None): + """Extract all members from the archive to the current working + directory. `path' specifies a different directory to extract to. + `members' is optional and must be a subset of the list returned + by namelist(). + """ + if members is None: + members = self.namelist() + + for zipinfo in members: + self.extract(zipinfo, path, pwd) + + def _extract_member(self, member, targetpath, pwd): + """Extract the ZipInfo object 'member' to a physical + file on the path targetpath. + """ + # build the destination pathname, replacing + # forward slashes to platform specific separators. + # Strip trailing path separator, unless it represents the root. + if (targetpath[-1:] in (os.path.sep, os.path.altsep) + and len(os.path.splitdrive(targetpath)[1]) > 1): + targetpath = targetpath[:-1] + + # don't include leading "/" from file name if present + if member.filename[0] == '/': + targetpath = os.path.join(targetpath, member.filename[1:]) + else: + targetpath = os.path.join(targetpath, member.filename) + + targetpath = os.path.normpath(targetpath) + + # Create all upper directories if necessary. + upperdirs = os.path.dirname(targetpath) + if upperdirs and not os.path.exists(upperdirs): + os.makedirs(upperdirs) + + if member.filename[-1] == '/': + if not os.path.isdir(targetpath): + os.mkdir(targetpath) + return targetpath + + source = self.open(member, pwd=pwd) + target = file(targetpath, "wb") + shutil.copyfileobj(source, target) + source.close() + target.close() + + return targetpath + + def _writecheck(self, zinfo): + """Check for errors before writing a file to the archive.""" + if zinfo.filename in self.NameToInfo: + if self.debug: # Warning for duplicate names + print "Duplicate name:", zinfo.filename + if self.mode not in ("w", "a"): + raise RuntimeError, 'write() requires mode "w" or "a"' + if not self.fp: + raise RuntimeError, \ + "Attempt to write ZIP archive that was already closed" + if zinfo.compress_type == ZIP_DEFLATED and not zlib: + raise RuntimeError, \ + "Compression requires the (missing) zlib module" + if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED): + raise RuntimeError, \ + "That compression method is not supported" + if zinfo.file_size > ZIP64_LIMIT: + if not self._allowZip64: + raise LargeZipFile("Filesize would require ZIP64 extensions") + if zinfo.header_offset > ZIP64_LIMIT: + if not self._allowZip64: + raise LargeZipFile("Zipfile size would require ZIP64 extensions") + + def write(self, filename, arcname=None, compress_type=None): + """Put the bytes from filename into the archive under the name + arcname.""" + if not self.fp: + raise RuntimeError( + "Attempt to write to ZIP archive that was already closed") + + st = os.stat(filename) + isdir = stat.S_ISDIR(st.st_mode) + mtime = time.localtime(st.st_mtime) + date_time = mtime[0:6] + # Create ZipInfo instance to store file information + if arcname is None: + arcname = filename + arcname = os.path.normpath(os.path.splitdrive(arcname)[1]) + while arcname[0] in (os.sep, os.altsep): + arcname = arcname[1:] + if isdir: + arcname += '/' + zinfo = ZipInfo(arcname, date_time) + zinfo.external_attr = (st[0] & 0xFFFF) << 16L # Unix attributes + if compress_type is None: + zinfo.compress_type = self.compression + else: + zinfo.compress_type = compress_type + + zinfo.file_size = st.st_size + zinfo.flag_bits = 0x00 + zinfo.header_offset = self.fp.tell() # Start of header bytes + + self._writecheck(zinfo) + self._didModify = True + + if isdir: + zinfo.file_size = 0 + zinfo.compress_size = 0 + zinfo.CRC = 0 + self.filelist.append(zinfo) + self.NameToInfo[zinfo.filename] = zinfo + self.fp.write(zinfo.FileHeader()) + return + + with open(filename, "rb") as fp: + # Must overwrite CRC and sizes with correct data later + zinfo.CRC = CRC = 0 + zinfo.compress_size = compress_size = 0 + zinfo.file_size = file_size = 0 + self.fp.write(zinfo.FileHeader()) + if zinfo.compress_type == ZIP_DEFLATED: + cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, + zlib.DEFLATED, -15) + else: + cmpr = None + while 1: + buf = fp.read(1024 * 8) + if not buf: + break + file_size = file_size + len(buf) + CRC = crc32(buf, CRC) & 0xffffffff + if cmpr: + buf = cmpr.compress(buf) + compress_size = compress_size + len(buf) + self.fp.write(buf) + if cmpr: + buf = cmpr.flush() + compress_size = compress_size + len(buf) + self.fp.write(buf) + zinfo.compress_size = compress_size + else: + zinfo.compress_size = file_size + zinfo.CRC = CRC + zinfo.file_size = file_size + # Seek backwards and write CRC and file sizes + position = self.fp.tell() # Preserve current position in file + self.fp.seek(zinfo.header_offset + 14, 0) + self.fp.write(struct.pack(" ZIP64_LIMIT \ + or zinfo.compress_size > ZIP64_LIMIT: + extra.append(zinfo.file_size) + extra.append(zinfo.compress_size) + file_size = 0xffffffff + compress_size = 0xffffffff + else: + file_size = zinfo.file_size + compress_size = zinfo.compress_size + + if zinfo.header_offset > ZIP64_LIMIT: + extra.append(zinfo.header_offset) + header_offset = 0xffffffffL + else: + header_offset = zinfo.header_offset + + extra_data = zinfo.extra + if extra: + # Append a ZIP64 field to the extra's + extra_data = struct.pack( + '>sys.stderr, (structCentralDir, + stringCentralDir, create_version, + zinfo.create_system, extract_version, zinfo.reserved, + zinfo.flag_bits, zinfo.compress_type, dostime, dosdate, + zinfo.CRC, compress_size, file_size, + len(zinfo.filename), len(extra_data), len(zinfo.comment), + 0, zinfo.internal_attr, zinfo.external_attr, + header_offset) + raise + self.fp.write(centdir) + self.fp.write(filename) + self.fp.write(extra_data) + self.fp.write(zinfo.comment) + + pos2 = self.fp.tell() + # Write end-of-zip-archive record + centDirCount = count + centDirSize = pos2 - pos1 + centDirOffset = pos1 + if (centDirCount >= ZIP_FILECOUNT_LIMIT or + centDirOffset > ZIP64_LIMIT or + centDirSize > ZIP64_LIMIT): + # Need to write the ZIP64 end-of-archive records + zip64endrec = struct.pack( + structEndArchive64, stringEndArchive64, + 44, 45, 45, 0, 0, centDirCount, centDirCount, + centDirSize, centDirOffset) + self.fp.write(zip64endrec) + + zip64locrec = struct.pack( + structEndArchive64Locator, + stringEndArchive64Locator, 0, pos2, 1) + self.fp.write(zip64locrec) + centDirCount = min(centDirCount, 0xFFFF) + centDirSize = min(centDirSize, 0xFFFFFFFF) + centDirOffset = min(centDirOffset, 0xFFFFFFFF) + + # check for valid comment length + if len(self.comment) >= ZIP_MAX_COMMENT: + if self.debug > 0: + msg = 'Archive comment is too long; truncating to %d bytes' \ + % ZIP_MAX_COMMENT + self.comment = self.comment[:ZIP_MAX_COMMENT] + + endrec = struct.pack(structEndArchive, stringEndArchive, + 0, 0, centDirCount, centDirCount, + centDirSize, centDirOffset, len(self.comment)) + self.fp.write(endrec) + self.fp.write(self.comment) + self.fp.flush() + + if not self._filePassed: + self.fp.close() + self.fp = None + + +class PyZipFile(ZipFile): + """Class to create ZIP archives with Python library files and packages.""" + + def writepy(self, pathname, basename = ""): + """Add all files from "pathname" to the ZIP archive. + + If pathname is a package directory, search the directory and + all package subdirectories recursively for all *.py and enter + the modules into the archive. If pathname is a plain + directory, listdir *.py and enter all modules. Else, pathname + must be a Python *.py file and the module will be put into the + archive. Added modules are always module.pyo or module.pyc. + This method will compile the module.py into module.pyc if + necessary. + """ + dir, name = os.path.split(pathname) + if os.path.isdir(pathname): + initname = os.path.join(pathname, "__init__.py") + if os.path.isfile(initname): + # This is a package directory, add it + if basename: + basename = "%s/%s" % (basename, name) + else: + basename = name + if self.debug: + print "Adding package in", pathname, "as", basename + fname, arcname = self._get_codename(initname[0:-3], basename) + if self.debug: + print "Adding", arcname + self.write(fname, arcname) + dirlist = os.listdir(pathname) + dirlist.remove("__init__.py") + # Add all *.py files and package subdirectories + for filename in dirlist: + path = os.path.join(pathname, filename) + root, ext = os.path.splitext(filename) + if os.path.isdir(path): + if os.path.isfile(os.path.join(path, "__init__.py")): + # This is a package directory, add it + self.writepy(path, basename) # Recursive call + elif ext == ".py": + fname, arcname = self._get_codename(path[0:-3], + basename) + if self.debug: + print "Adding", arcname + self.write(fname, arcname) + else: + # This is NOT a package directory, add its files at top level + if self.debug: + print "Adding files from directory", pathname + for filename in os.listdir(pathname): + path = os.path.join(pathname, filename) + root, ext = os.path.splitext(filename) + if ext == ".py": + fname, arcname = self._get_codename(path[0:-3], + basename) + if self.debug: + print "Adding", arcname + self.write(fname, arcname) + else: + if pathname[-3:] != ".py": + raise RuntimeError, \ + 'Files added with writepy() must end with ".py"' + fname, arcname = self._get_codename(pathname[0:-3], basename) + if self.debug: + print "Adding file", arcname + self.write(fname, arcname) + + def _get_codename(self, pathname, basename): + """Return (filename, archivename) for the path. + + Given a module name path, return the correct file path and + archive name, compiling if necessary. For example, given + /python/lib/string, return (/python/lib/string.pyc, string). + """ + file_py = pathname + ".py" + file_pyc = pathname + ".pyc" + file_pyo = pathname + ".pyo" + if os.path.isfile(file_pyo) and \ + os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime: + fname = file_pyo # Use .pyo file + elif not os.path.isfile(file_pyc) or \ + os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime: + import py_compile + if self.debug: + print "Compiling", file_py + try: + py_compile.compile(file_py, file_pyc, None, True) + except py_compile.PyCompileError,err: + print err.msg + fname = file_pyc + else: + fname = file_pyc + archivename = os.path.split(fname)[1] + if basename: + archivename = "%s/%s" % (basename, archivename) + return (fname, archivename) + + +def main(args = None): + import textwrap + USAGE=textwrap.dedent("""\ + Usage: + zipfile.py -l zipfile.zip # Show listing of a zipfile + zipfile.py -t zipfile.zip # Test if a zipfile is valid + zipfile.py -e zipfile.zip target # Extract zipfile into target dir + zipfile.py -c zipfile.zip src ... # Create zipfile from sources + """) + if args is None: + args = sys.argv[1:] + + if not args or args[0] not in ('-l', '-c', '-e', '-t'): + print USAGE + sys.exit(1) + + if args[0] == '-l': + if len(args) != 2: + print USAGE + sys.exit(1) + zf = ZipFile(args[1], 'r') + zf.printdir() + zf.close() + + elif args[0] == '-t': + if len(args) != 2: + print USAGE + sys.exit(1) + zf = ZipFile(args[1], 'r') + zf.testzip() + print "Done testing" + + elif args[0] == '-e': + if len(args) != 3: + print USAGE + sys.exit(1) + + zf = ZipFile(args[1], 'r') + out = args[2] + for path in zf.namelist(): + if path.startswith('./'): + tgt = os.path.join(out, path[2:]) + else: + tgt = os.path.join(out, path) + + tgtdir = os.path.dirname(tgt) + if not os.path.exists(tgtdir): + os.makedirs(tgtdir) + with open(tgt, 'wb') as fp: + fp.write(zf.read(path)) + zf.close() + + elif args[0] == '-c': + if len(args) < 3: + print USAGE + sys.exit(1) + + def addToZip(zf, path, zippath): + if os.path.isfile(path): + zf.write(path, zippath, ZIP_DEFLATED) + elif os.path.isdir(path): + for nm in os.listdir(path): + addToZip(zf, + os.path.join(path, nm), os.path.join(zippath, nm)) + # else: ignore + + zf = ZipFile(args[1], 'w', allowZip64=True) + for src in args[2:]: + addToZip(zf, src, os.path.basename(src)) + + zf.close() + +if __name__ == "__main__": + main() diff --git a/DeDRM_calibre_plugin/DeDRM_plugin/zipfix.py b/DeDRM_calibre_plugin/DeDRM_plugin/zipfix.py index b54db80..8ddfae3 100644 --- a/DeDRM_calibre_plugin/DeDRM_plugin/zipfix.py +++ b/DeDRM_calibre_plugin/DeDRM_plugin/zipfix.py @@ -1,60 +1,188 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -from __future__ import with_statement +# zipfix.py, version 1.1 +# Copyright © 2010-2013 by some_updates, DiapDealer and Apprentice Alf + +# Released under the terms of the GNU General Public Licence, version 3 +# + +# Revision history: +# 1.0 - Initial release +# 1.1 - Updated to handle zip file metadata correctly + +""" +Re-write zip (or ePub) fixing problems with file names (and mimetype entry). +""" __license__ = 'GPL v3' +__version__ = "1.1" + +import sys +import zlib +import zipfilerugged +import os +import os.path +import getopt +from struct import unpack + + +_FILENAME_LEN_OFFSET = 26 +_EXTRA_LEN_OFFSET = 28 +_FILENAME_OFFSET = 30 +_MAX_SIZE = 64 * 1024 +_MIMETYPE = 'application/epub+zip' + +class ZipInfo(zipfilerugged.ZipInfo): + def __init__(self, *args, **kwargs): + if 'compress_type' in kwargs: + compress_type = kwargs.pop('compress_type') + super(ZipInfo, self).__init__(*args, **kwargs) + self.compress_type = compress_type + +class fixZip: + def __init__(self, zinput, zoutput): + self.ztype = 'zip' + if zinput.lower().find('.epub') >= 0 : + self.ztype = 'epub' + self.inzip = zipfilerugged.ZipFile(zinput,'r') + self.outzip = zipfilerugged.ZipFile(zoutput,'w') + # open the input zip for reading only as a raw file + self.bzf = file(zinput,'rb') + + def getlocalname(self, zi): + local_header_offset = zi.header_offset + self.bzf.seek(local_header_offset + _FILENAME_LEN_OFFSET) + leninfo = self.bzf.read(2) + local_name_length, = unpack(' 0: + if len(cmpdata) > _MAX_SIZE : + newdata = cmpdata[0:_MAX_SIZE] + cmpdata = cmpdata[_MAX_SIZE:] + else: + newdata = cmpdata + cmpdata = '' + newdata = dc.decompress(newdata) + unprocessed = dc.unconsumed_tail + if len(unprocessed) == 0: + newdata += dc.flush() + data += newdata + cmpdata += unprocessed + unprocessed = '' + return data + + def getfiledata(self, zi): + # get file name length and exta data length to find start of file data + local_header_offset = zi.header_offset + + self.bzf.seek(local_header_offset + _FILENAME_LEN_OFFSET) + leninfo = self.bzf.read(2) + local_name_length, = unpack('