diff --git a/DeDRM_Macintosh_Application/DeDRM.app/Contents/Info.plist b/DeDRM_Macintosh_Application/DeDRM.app/Contents/Info.plist
index 9a030d4..cfd9fcc 100644
--- a/DeDRM_Macintosh_Application/DeDRM.app/Contents/Info.plist
+++ b/DeDRM_Macintosh_Application/DeDRM.app/Contents/Info.plist
@@ -24,19 +24,19 @@
CFBundleExecutable
droplet
CFBundleGetInfoString
- DeDRM AppleScript 6.0.7. Written 2010–2013 by Apprentice Alf and others.
+ DeDRM AppleScript 6.0.8. Written 2010–2013 by Apprentice Alf and others.
CFBundleIconFile
DeDRM
CFBundleIdentifier
com.apple.ScriptEditor.id.707CCCD5-0C6C-4BEB-B67C-B6E866ADE85A
CFBundleInfoDictionaryVersion
- 6.0.7
+ 6.0.8
CFBundleName
DeDRM
CFBundlePackageType
APPL
CFBundleShortVersionString
- 6.0.7
+ 6.0.8
CFBundleSignature
dplt
LSRequiresCarbon
diff --git a/DeDRM_Macintosh_Application/DeDRM.app/Contents/Resources/__init__.py b/DeDRM_Macintosh_Application/DeDRM.app/Contents/Resources/__init__.py
index caed6e8..37d4cb1 100644
--- a/DeDRM_Macintosh_Application/DeDRM.app/Contents/Resources/__init__.py
+++ b/DeDRM_Macintosh_Application/DeDRM.app/Contents/Resources/__init__.py
@@ -31,14 +31,17 @@ __docformat__ = 'restructuredtext en'
# 6.0.3 - Fixes for Kindle for Mac and Windows non-ascii user names
# 6.0.4 - Fixes for stand-alone scripts and applications
# and pdb files in plugin and initial conversion of prefs.
+# 6.0.5 - Fix a key issue
# 6.0.6 - Fix up an incorrect function call
+# 6.0.7 - Error handling for incomplete PDF metadata
+# 6.0.8 - Fixes a Wine key issue and topaz support
"""
Decrypt DRMed ebooks.
"""
PLUGIN_NAME = u"DeDRM"
-PLUGIN_VERSION_TUPLE = (6, 0, 7)
+PLUGIN_VERSION_TUPLE = (6, 0, 8)
PLUGIN_VERSION = u".".join([unicode(str(x)) for x in PLUGIN_VERSION_TUPLE])
# Include an html helpfile in the plugin's zipfile with the following name.
RESOURCE_NAME = PLUGIN_NAME + '_Help.htm'
@@ -313,7 +316,7 @@ class DeDRM(FileTypePlugin):
from wineutils import WineGetKeys
scriptpath = os.path.join(self.alfdir,u"adobekey.py")
- defaultkeys = self.WineGetKeys(scriptpath, u".der",dedrmprefs['adobewineprefix'])
+ defaultkeys = WineGetKeys(scriptpath, u".der",dedrmprefs['adobewineprefix'])
except:
pass
@@ -391,7 +394,7 @@ class DeDRM(FileTypePlugin):
from wineutils import WineGetKeys
scriptpath = os.path.join(self.alfdir,u"kindlekey.py")
- defaultkeys = self.WineGetKeys(scriptpath, u".k4i",dedrmprefs['kindlewineprefix'])
+ defaultkeys = WineGetKeys(scriptpath, u".k4i",dedrmprefs['kindlewineprefix'])
except:
pass
diff --git a/DeDRM_Macintosh_Application/DeDRM.app/Contents/Resources/android.py b/DeDRM_Macintosh_Application/DeDRM.app/Contents/Resources/android.py
new file mode 100644
index 0000000..ddb94f5
--- /dev/null
+++ b/DeDRM_Macintosh_Application/DeDRM.app/Contents/Resources/android.py
@@ -0,0 +1,157 @@
+#!/usr/bin/env python
+#fileencoding: utf-8
+
+import os
+import sys
+import zlib
+import tarfile
+from hashlib import md5
+from cStringIO import StringIO
+from binascii import a2b_hex, b2a_hex
+
+STORAGE = 'AmazonSecureStorage.xml'
+
+class AndroidObfuscation(object):
+ '''AndroidObfuscation
+ For the key, it's written in java, and run in android dalvikvm
+ '''
+
+ key = a2b_hex('0176e04c9408b1702d90be333fd53523')
+
+ def encrypt(self, plaintext):
+ cipher = self._get_cipher()
+ padding = len(self.key) - len(plaintext) % len(self.key)
+ plaintext += chr(padding) * padding
+ return b2a_hex(cipher.encrypt(plaintext))
+
+ def decrypt(self, ciphertext):
+ cipher = self._get_cipher()
+ plaintext = cipher.decrypt(a2b_hex(ciphertext))
+ return plaintext[:-ord(plaintext[-1])]
+
+ def _get_cipher(self):
+ try:
+ from Crypto.Cipher import AES
+ return AES.new(self.key)
+ except ImportError:
+ from aescbc import AES, noPadding
+ return AES(self.key, padding=noPadding())
+
+class AndroidObfuscationV2(AndroidObfuscation):
+ '''AndroidObfuscationV2
+ '''
+
+ count = 503
+ password = 'Thomsun was here!'
+
+ def __init__(self, salt):
+ key = self.password + salt
+ for _ in range(self.count):
+ key = md5(key).digest()
+ self.key = key[:8]
+ self.iv = key[8:16]
+
+ def _get_cipher(self):
+ try :
+ from Crypto.Cipher import DES
+ return DES.new(self.key, DES.MODE_CBC, self.iv)
+ except ImportError:
+ from python_des import Des, CBC
+ return Des(self.key, CBC, self.iv)
+
+def parse_preference(path):
+ ''' parse android's shared preference xml '''
+ storage = {}
+ read = open(path)
+ for line in read:
+ line = line.strip()
+ # value
+ if line.startswith(' adb backup com.amazon.kindle
+ '''
+ output = None
+ read = open(path, 'rb')
+ head = read.read(24)
+ if head == 'ANDROID BACKUP\n1\n1\nnone\n':
+ output = StringIO(zlib.decompress(read.read()))
+ read.close()
+
+ if not output:
+ return False
+
+ tar = tarfile.open(fileobj=output)
+ for member in tar.getmembers():
+ if member.name.strip().endswith(STORAGE):
+ write = open(STORAGE, 'w')
+ write.write(tar.extractfile(member).read())
+ write.close()
+ break
+
+ return True
+
+__all__ = [ 'get_storage', 'get_serials', 'parse_preference',
+ 'AndroidObfuscation', 'AndroidObfuscationV2', 'STORAGE']
+
+if __name__ == '__main__':
+ print get_serials()
\ No newline at end of file
diff --git a/DeDRM_Macintosh_Application/DeDRM.app/Contents/Resources/android_readme.txt b/DeDRM_Macintosh_Application/DeDRM.app/Contents/Resources/android_readme.txt
new file mode 100644
index 0000000..9e7d035
--- /dev/null
+++ b/DeDRM_Macintosh_Application/DeDRM.app/Contents/Resources/android_readme.txt
@@ -0,0 +1,6 @@
+1.1 get AmazonSecureStorage.xml from /data/data/com.amazon.kindle/shared_prefs/AmazonSecureStorage.xml
+
+1.2 on android 4.0+, run `adb backup com.amazon.kindle` from PC will get backup.ab
+ now android.py can convert backup.ab to AmazonSecureStorage.xml
+
+2. run `k4mobidedrm.py -a AmazonSecureStorage.xml '
diff --git a/DeDRM_Macintosh_Application/DeDRM.app/Contents/Resources/flatxml2html.py b/DeDRM_Macintosh_Application/DeDRM.app/Contents/Resources/flatxml2html.py
index 4d83368..991591b 100644
--- a/DeDRM_Macintosh_Application/DeDRM.app/Contents/Resources/flatxml2html.py
+++ b/DeDRM_Macintosh_Application/DeDRM.app/Contents/Resources/flatxml2html.py
@@ -458,7 +458,11 @@ class DocParser(object):
(wtype, num) = pdesc[j]
if wtype == 'ocr' :
- word = self.ocrtext[num]
+ try:
+ word = self.ocrtext[num]
+ except:
+ word = ""
+
sep = ' '
if handle_links:
diff --git a/DeDRM_Macintosh_Application/DeDRM.app/Contents/Resources/k4mobidedrm.py b/DeDRM_Macintosh_Application/DeDRM.app/Contents/Resources/k4mobidedrm.py
index 929ce57..504105b 100644
--- a/DeDRM_Macintosh_Application/DeDRM.app/Contents/Resources/k4mobidedrm.py
+++ b/DeDRM_Macintosh_Application/DeDRM.app/Contents/Resources/k4mobidedrm.py
@@ -80,10 +80,12 @@ if inCalibre:
from calibre_plugins.dedrm import mobidedrm
from calibre_plugins.dedrm import topazextract
from calibre_plugins.dedrm import kgenpids
+ from calibre_plugins.dedrm import android
else:
import mobidedrm
import topazextract
import kgenpids
+ import android
# Wrap a stream so that output gets flushed immediately
# and also make sure that any unicode strings get
@@ -273,7 +275,7 @@ def decryptBook(infile, outdir, kDatabaseFiles, serials, pids):
def usage(progname):
print u"Removes DRM protection from Mobipocket, Amazon KF8, Amazon Print Replica and Amazon Topaz ebooks"
print u"Usage:"
- print u" {0} [-k ] [-p ] [-s ] ".format(progname)
+ print u" {0} [-k ] [-p ] [-s ] [ -a ] ".format(progname)
#
# Main
@@ -284,7 +286,7 @@ def cli_main():
print u"K4MobiDeDrm v{0}.\nCopyright © 2008-2013 The Dark Reverser et al.".format(__version__)
try:
- opts, args = getopt.getopt(argv[1:], "k:p:s:")
+ opts, args = getopt.getopt(argv[1:], "k:p:s:a:")
except getopt.GetoptError, err:
print u"Error in options or arguments: {0}".format(err.args[0])
usage(progname)
@@ -312,6 +314,11 @@ def cli_main():
if a == None :
raise DrmException("Invalid parameter for -s")
serials = a.split(',')
+ if o == '-a':
+ if a == None:
+ continue
+ serials.extend(android.get_serials(a))
+ serials.extend(android.get_serials())
# try with built in Kindle Info files if not on Linux
k4 = not sys.platform.startswith('linux')
diff --git a/DeDRM_Macintosh_Application/DeDRM.app/Contents/Resources/kindlekey.py b/DeDRM_Macintosh_Application/DeDRM.app/Contents/Resources/kindlekey.py
index f58e973..8852769 100644
--- a/DeDRM_Macintosh_Application/DeDRM.app/Contents/Resources/kindlekey.py
+++ b/DeDRM_Macintosh_Application/DeDRM.app/Contents/Resources/kindlekey.py
@@ -19,6 +19,7 @@ from __future__ import with_statement
# 1.6 - Fixed a problem getting the disk serial numbers
# 1.7 - Work if TkInter is missing
# 1.8 - Fixes for Kindle for Mac, and non-ascii in Windows user names
+# 1.9 - Fixes for Unicode in Windows user names
"""
@@ -26,7 +27,7 @@ Retrieve Kindle for PC/Mac user key.
"""
__license__ = 'GPL v3'
-__version__ = '1.8'
+__version__ = '1.9'
import sys, os, re
from struct import pack, unpack, unpack_from
@@ -907,18 +908,34 @@ if iswindows:
return CryptUnprotectData
CryptUnprotectData = CryptUnprotectData()
+ # Returns Environmental Variables that contain unicode
+ def getEnvironmentVariable(name):
+ import ctypes
+ name = unicode(name) # make sure string argument is unicode
+ n = ctypes.windll.kernel32.GetEnvironmentVariableW(name, None, 0)
+ if n == 0:
+ return None
+ buf = ctypes.create_unicode_buffer(u'\0'*n)
+ ctypes.windll.kernel32.GetEnvironmentVariableW(name, buf, n)
+ return buf.value
# Locate all of the kindle-info style files and return as list
def getKindleInfoFiles():
kInfoFiles = []
# some 64 bit machines do not have the proper registry key for some reason
- # or the pythonn interface to the 32 vs 64 bit registry is broken
+ # or the python interface to the 32 vs 64 bit registry is broken
path = ""
if 'LOCALAPPDATA' in os.environ.keys():
- path = os.environ['LOCALAPPDATA']
+ # Python 2.x does not return unicode env. Use Python 3.x
+ path = winreg.ExpandEnvironmentStrings(u"%LOCALAPPDATA%")
+ # this is just another alternative.
+ # path = getEnvironmentVariable('LOCALAPPDATA')
+ if not os.path.isdir(path):
+ path = ""
else:
# User Shell Folders show take precedent over Shell Folders if present
try:
+ # this will still break
regkey = winreg.OpenKey(winreg.HKEY_CURRENT_USER, "Software\\Microsoft\\Windows\\CurrentVersion\\Explorer\\User Shell Folders\\")
path = winreg.QueryValueEx(regkey, 'Local AppData')[0]
if not os.path.isdir(path):
@@ -937,13 +954,14 @@ if iswindows:
if path == "":
print ('Could not find the folder in which to look for kinfoFiles.')
else:
- print('searching for kinfoFiles in ' + path)
+ # Probably not the best. To Fix (shouldn't ignore in encoding) or use utf-8
+ print(u'searching for kinfoFiles in ' + path.encode('ascii', 'ignore'))
# look for (K4PC 1.9.0 and later) .kinf2011 file
kinfopath = path +'\\Amazon\\Kindle\\storage\\.kinf2011'
if os.path.isfile(kinfopath):
found = True
- print('Found K4PC 1.9+ kinf2011 file: ' + kinfopath)
+ print('Found K4PC 1.9+ kinf2011 file: ' + kinfopath.encode('ascii','ignore'))
kInfoFiles.append(kinfopath)
# look for (K4PC 1.6.0 and later) rainier.2.1.1.kinf file
@@ -1142,7 +1160,7 @@ if iswindows:
cleartext = CryptUnprotectData(encryptedValue, entropy, 1)
DB[keyname] = cleartext
- if 'MazamaRandomNumber' in DB and 'kindle.account.tokens' in DB:
+ if 'kindle.account.tokens' in DB:
print u"Decrypted key file using IDString '{0:s}' and UserName '{1:s}'".format(GetIDString(), GetUserName().decode("latin-1"))
# store values used in decryption
DB['IDString'] = GetIDString()
@@ -1758,7 +1776,7 @@ elif isosx:
break
except:
pass
- if 'MazamaRandomNumber' in DB and 'kindle.account.tokens' in DB:
+ if 'kindle.account.tokens' in DB:
# store values used in decryption
print u"Decrypted key file using IDString '{0:s}' and UserName '{1:s}'".format(IDString, GetUserName())
DB['IDString'] = IDString
diff --git a/DeDRM_Macintosh_Application/DeDRM.app/Contents/Resources/mobidedrm.py b/DeDRM_Macintosh_Application/DeDRM.app/Contents/Resources/mobidedrm.py
index 7b69edc..89cc695 100644
--- a/DeDRM_Macintosh_Application/DeDRM.app/Contents/Resources/mobidedrm.py
+++ b/DeDRM_Macintosh_Application/DeDRM.app/Contents/Resources/mobidedrm.py
@@ -156,6 +156,8 @@ def PC1(key, src, decryption=True):
return Pukall_Cipher().PC1(key,src,decryption)
except NameError:
pass
+ except TypeError:
+ pass
# use slow python version, since Pukall_Cipher didn't load
sum1 = 0;
diff --git a/DeDRM_Macintosh_Application/DeDRM.app/Contents/Resources/stylexml2css.py b/DeDRM_Macintosh_Application/DeDRM.app/Contents/Resources/stylexml2css.py
index c111850..daa108a 100644
--- a/DeDRM_Macintosh_Application/DeDRM.app/Contents/Resources/stylexml2css.py
+++ b/DeDRM_Macintosh_Application/DeDRM.app/Contents/Resources/stylexml2css.py
@@ -178,7 +178,12 @@ class DocParser(object):
if val == "":
val = 0
- if not ((attr == 'hang') and (int(val) == 0)) :
+ if not ((attr == 'hang') and (int(val) == 0)):
+ try:
+ f = float(val)
+ except:
+ print "Warning: unrecognised val, ignoring"
+ val = 0
pv = float(val)/scale
cssargs[attr] = (self.attr_val_map[attr], pv)
keep = True
diff --git a/DeDRM_Macintosh_Application/DeDRM.app/Contents/Resources/topazextract.py b/DeDRM_Macintosh_Application/DeDRM.app/Contents/Resources/topazextract.py
index 97f6583..fb5eb7a 100644
--- a/DeDRM_Macintosh_Application/DeDRM.app/Contents/Resources/topazextract.py
+++ b/DeDRM_Macintosh_Application/DeDRM.app/Contents/Resources/topazextract.py
@@ -356,7 +356,7 @@ class TopazBook:
self.setBookKey(bookKey)
self.createBookDirectory()
- self.extractFiles()
+ self.extractFiles()
print u"Successfully Extracted Topaz contents"
if inCalibre:
from calibre_plugins.dedrm import genbook
diff --git a/DeDRM_Windows_Application/DeDRM_App/DeDRM_lib/DeDRM_app.pyw b/DeDRM_Windows_Application/DeDRM_App/DeDRM_lib/DeDRM_app.pyw
index e73226b..7225b6d 100644
--- a/DeDRM_Windows_Application/DeDRM_App/DeDRM_lib/DeDRM_app.pyw
+++ b/DeDRM_Windows_Application/DeDRM_App/DeDRM_lib/DeDRM_app.pyw
@@ -12,7 +12,7 @@
# 6.0.4 - Fix for other potential unicode problems
# 6.0.5 - Fix typo
-__version__ = '6.0.7'
+__version__ = '6.0.8'
import sys
import os, os.path
diff --git a/DeDRM_Windows_Application/DeDRM_App/DeDRM_lib/lib/__init__.py b/DeDRM_Windows_Application/DeDRM_App/DeDRM_lib/lib/__init__.py
index caed6e8..37d4cb1 100644
--- a/DeDRM_Windows_Application/DeDRM_App/DeDRM_lib/lib/__init__.py
+++ b/DeDRM_Windows_Application/DeDRM_App/DeDRM_lib/lib/__init__.py
@@ -31,14 +31,17 @@ __docformat__ = 'restructuredtext en'
# 6.0.3 - Fixes for Kindle for Mac and Windows non-ascii user names
# 6.0.4 - Fixes for stand-alone scripts and applications
# and pdb files in plugin and initial conversion of prefs.
+# 6.0.5 - Fix a key issue
# 6.0.6 - Fix up an incorrect function call
+# 6.0.7 - Error handling for incomplete PDF metadata
+# 6.0.8 - Fixes a Wine key issue and topaz support
"""
Decrypt DRMed ebooks.
"""
PLUGIN_NAME = u"DeDRM"
-PLUGIN_VERSION_TUPLE = (6, 0, 7)
+PLUGIN_VERSION_TUPLE = (6, 0, 8)
PLUGIN_VERSION = u".".join([unicode(str(x)) for x in PLUGIN_VERSION_TUPLE])
# Include an html helpfile in the plugin's zipfile with the following name.
RESOURCE_NAME = PLUGIN_NAME + '_Help.htm'
@@ -313,7 +316,7 @@ class DeDRM(FileTypePlugin):
from wineutils import WineGetKeys
scriptpath = os.path.join(self.alfdir,u"adobekey.py")
- defaultkeys = self.WineGetKeys(scriptpath, u".der",dedrmprefs['adobewineprefix'])
+ defaultkeys = WineGetKeys(scriptpath, u".der",dedrmprefs['adobewineprefix'])
except:
pass
@@ -391,7 +394,7 @@ class DeDRM(FileTypePlugin):
from wineutils import WineGetKeys
scriptpath = os.path.join(self.alfdir,u"kindlekey.py")
- defaultkeys = self.WineGetKeys(scriptpath, u".k4i",dedrmprefs['kindlewineprefix'])
+ defaultkeys = WineGetKeys(scriptpath, u".k4i",dedrmprefs['kindlewineprefix'])
except:
pass
diff --git a/DeDRM_Windows_Application/DeDRM_App/DeDRM_lib/lib/android.py b/DeDRM_Windows_Application/DeDRM_App/DeDRM_lib/lib/android.py
new file mode 100644
index 0000000..ddb94f5
--- /dev/null
+++ b/DeDRM_Windows_Application/DeDRM_App/DeDRM_lib/lib/android.py
@@ -0,0 +1,157 @@
+#!/usr/bin/env python
+#fileencoding: utf-8
+
+import os
+import sys
+import zlib
+import tarfile
+from hashlib import md5
+from cStringIO import StringIO
+from binascii import a2b_hex, b2a_hex
+
+STORAGE = 'AmazonSecureStorage.xml'
+
+class AndroidObfuscation(object):
+ '''AndroidObfuscation
+ For the key, it's written in java, and run in android dalvikvm
+ '''
+
+ key = a2b_hex('0176e04c9408b1702d90be333fd53523')
+
+ def encrypt(self, plaintext):
+ cipher = self._get_cipher()
+ padding = len(self.key) - len(plaintext) % len(self.key)
+ plaintext += chr(padding) * padding
+ return b2a_hex(cipher.encrypt(plaintext))
+
+ def decrypt(self, ciphertext):
+ cipher = self._get_cipher()
+ plaintext = cipher.decrypt(a2b_hex(ciphertext))
+ return plaintext[:-ord(plaintext[-1])]
+
+ def _get_cipher(self):
+ try:
+ from Crypto.Cipher import AES
+ return AES.new(self.key)
+ except ImportError:
+ from aescbc import AES, noPadding
+ return AES(self.key, padding=noPadding())
+
+class AndroidObfuscationV2(AndroidObfuscation):
+ '''AndroidObfuscationV2
+ '''
+
+ count = 503
+ password = 'Thomsun was here!'
+
+ def __init__(self, salt):
+ key = self.password + salt
+ for _ in range(self.count):
+ key = md5(key).digest()
+ self.key = key[:8]
+ self.iv = key[8:16]
+
+ def _get_cipher(self):
+ try :
+ from Crypto.Cipher import DES
+ return DES.new(self.key, DES.MODE_CBC, self.iv)
+ except ImportError:
+ from python_des import Des, CBC
+ return Des(self.key, CBC, self.iv)
+
+def parse_preference(path):
+ ''' parse android's shared preference xml '''
+ storage = {}
+ read = open(path)
+ for line in read:
+ line = line.strip()
+ # value
+ if line.startswith(' adb backup com.amazon.kindle
+ '''
+ output = None
+ read = open(path, 'rb')
+ head = read.read(24)
+ if head == 'ANDROID BACKUP\n1\n1\nnone\n':
+ output = StringIO(zlib.decompress(read.read()))
+ read.close()
+
+ if not output:
+ return False
+
+ tar = tarfile.open(fileobj=output)
+ for member in tar.getmembers():
+ if member.name.strip().endswith(STORAGE):
+ write = open(STORAGE, 'w')
+ write.write(tar.extractfile(member).read())
+ write.close()
+ break
+
+ return True
+
+__all__ = [ 'get_storage', 'get_serials', 'parse_preference',
+ 'AndroidObfuscation', 'AndroidObfuscationV2', 'STORAGE']
+
+if __name__ == '__main__':
+ print get_serials()
\ No newline at end of file
diff --git a/DeDRM_Windows_Application/DeDRM_App/DeDRM_lib/lib/android_readme.txt b/DeDRM_Windows_Application/DeDRM_App/DeDRM_lib/lib/android_readme.txt
new file mode 100644
index 0000000..9e7d035
--- /dev/null
+++ b/DeDRM_Windows_Application/DeDRM_App/DeDRM_lib/lib/android_readme.txt
@@ -0,0 +1,6 @@
+1.1 get AmazonSecureStorage.xml from /data/data/com.amazon.kindle/shared_prefs/AmazonSecureStorage.xml
+
+1.2 on android 4.0+, run `adb backup com.amazon.kindle` from PC will get backup.ab
+ now android.py can convert backup.ab to AmazonSecureStorage.xml
+
+2. run `k4mobidedrm.py -a AmazonSecureStorage.xml '
diff --git a/DeDRM_Windows_Application/DeDRM_App/DeDRM_lib/lib/flatxml2html.py b/DeDRM_Windows_Application/DeDRM_App/DeDRM_lib/lib/flatxml2html.py
index 4d83368..991591b 100644
--- a/DeDRM_Windows_Application/DeDRM_App/DeDRM_lib/lib/flatxml2html.py
+++ b/DeDRM_Windows_Application/DeDRM_App/DeDRM_lib/lib/flatxml2html.py
@@ -458,7 +458,11 @@ class DocParser(object):
(wtype, num) = pdesc[j]
if wtype == 'ocr' :
- word = self.ocrtext[num]
+ try:
+ word = self.ocrtext[num]
+ except:
+ word = ""
+
sep = ' '
if handle_links:
diff --git a/DeDRM_Windows_Application/DeDRM_App/DeDRM_lib/lib/k4mobidedrm.py b/DeDRM_Windows_Application/DeDRM_App/DeDRM_lib/lib/k4mobidedrm.py
index 929ce57..504105b 100644
--- a/DeDRM_Windows_Application/DeDRM_App/DeDRM_lib/lib/k4mobidedrm.py
+++ b/DeDRM_Windows_Application/DeDRM_App/DeDRM_lib/lib/k4mobidedrm.py
@@ -80,10 +80,12 @@ if inCalibre:
from calibre_plugins.dedrm import mobidedrm
from calibre_plugins.dedrm import topazextract
from calibre_plugins.dedrm import kgenpids
+ from calibre_plugins.dedrm import android
else:
import mobidedrm
import topazextract
import kgenpids
+ import android
# Wrap a stream so that output gets flushed immediately
# and also make sure that any unicode strings get
@@ -273,7 +275,7 @@ def decryptBook(infile, outdir, kDatabaseFiles, serials, pids):
def usage(progname):
print u"Removes DRM protection from Mobipocket, Amazon KF8, Amazon Print Replica and Amazon Topaz ebooks"
print u"Usage:"
- print u" {0} [-k ] [-p ] [-s ] ".format(progname)
+ print u" {0} [-k ] [-p ] [-s ] [ -a ] ".format(progname)
#
# Main
@@ -284,7 +286,7 @@ def cli_main():
print u"K4MobiDeDrm v{0}.\nCopyright © 2008-2013 The Dark Reverser et al.".format(__version__)
try:
- opts, args = getopt.getopt(argv[1:], "k:p:s:")
+ opts, args = getopt.getopt(argv[1:], "k:p:s:a:")
except getopt.GetoptError, err:
print u"Error in options or arguments: {0}".format(err.args[0])
usage(progname)
@@ -312,6 +314,11 @@ def cli_main():
if a == None :
raise DrmException("Invalid parameter for -s")
serials = a.split(',')
+ if o == '-a':
+ if a == None:
+ continue
+ serials.extend(android.get_serials(a))
+ serials.extend(android.get_serials())
# try with built in Kindle Info files if not on Linux
k4 = not sys.platform.startswith('linux')
diff --git a/DeDRM_Windows_Application/DeDRM_App/DeDRM_lib/lib/kindlekey.py b/DeDRM_Windows_Application/DeDRM_App/DeDRM_lib/lib/kindlekey.py
index f58e973..8852769 100644
--- a/DeDRM_Windows_Application/DeDRM_App/DeDRM_lib/lib/kindlekey.py
+++ b/DeDRM_Windows_Application/DeDRM_App/DeDRM_lib/lib/kindlekey.py
@@ -19,6 +19,7 @@ from __future__ import with_statement
# 1.6 - Fixed a problem getting the disk serial numbers
# 1.7 - Work if TkInter is missing
# 1.8 - Fixes for Kindle for Mac, and non-ascii in Windows user names
+# 1.9 - Fixes for Unicode in Windows user names
"""
@@ -26,7 +27,7 @@ Retrieve Kindle for PC/Mac user key.
"""
__license__ = 'GPL v3'
-__version__ = '1.8'
+__version__ = '1.9'
import sys, os, re
from struct import pack, unpack, unpack_from
@@ -907,18 +908,34 @@ if iswindows:
return CryptUnprotectData
CryptUnprotectData = CryptUnprotectData()
+ # Returns Environmental Variables that contain unicode
+ def getEnvironmentVariable(name):
+ import ctypes
+ name = unicode(name) # make sure string argument is unicode
+ n = ctypes.windll.kernel32.GetEnvironmentVariableW(name, None, 0)
+ if n == 0:
+ return None
+ buf = ctypes.create_unicode_buffer(u'\0'*n)
+ ctypes.windll.kernel32.GetEnvironmentVariableW(name, buf, n)
+ return buf.value
# Locate all of the kindle-info style files and return as list
def getKindleInfoFiles():
kInfoFiles = []
# some 64 bit machines do not have the proper registry key for some reason
- # or the pythonn interface to the 32 vs 64 bit registry is broken
+ # or the python interface to the 32 vs 64 bit registry is broken
path = ""
if 'LOCALAPPDATA' in os.environ.keys():
- path = os.environ['LOCALAPPDATA']
+ # Python 2.x does not return unicode env. Use Python 3.x
+ path = winreg.ExpandEnvironmentStrings(u"%LOCALAPPDATA%")
+ # this is just another alternative.
+ # path = getEnvironmentVariable('LOCALAPPDATA')
+ if not os.path.isdir(path):
+ path = ""
else:
# User Shell Folders show take precedent over Shell Folders if present
try:
+ # this will still break
regkey = winreg.OpenKey(winreg.HKEY_CURRENT_USER, "Software\\Microsoft\\Windows\\CurrentVersion\\Explorer\\User Shell Folders\\")
path = winreg.QueryValueEx(regkey, 'Local AppData')[0]
if not os.path.isdir(path):
@@ -937,13 +954,14 @@ if iswindows:
if path == "":
print ('Could not find the folder in which to look for kinfoFiles.')
else:
- print('searching for kinfoFiles in ' + path)
+ # Probably not the best. To Fix (shouldn't ignore in encoding) or use utf-8
+ print(u'searching for kinfoFiles in ' + path.encode('ascii', 'ignore'))
# look for (K4PC 1.9.0 and later) .kinf2011 file
kinfopath = path +'\\Amazon\\Kindle\\storage\\.kinf2011'
if os.path.isfile(kinfopath):
found = True
- print('Found K4PC 1.9+ kinf2011 file: ' + kinfopath)
+ print('Found K4PC 1.9+ kinf2011 file: ' + kinfopath.encode('ascii','ignore'))
kInfoFiles.append(kinfopath)
# look for (K4PC 1.6.0 and later) rainier.2.1.1.kinf file
@@ -1142,7 +1160,7 @@ if iswindows:
cleartext = CryptUnprotectData(encryptedValue, entropy, 1)
DB[keyname] = cleartext
- if 'MazamaRandomNumber' in DB and 'kindle.account.tokens' in DB:
+ if 'kindle.account.tokens' in DB:
print u"Decrypted key file using IDString '{0:s}' and UserName '{1:s}'".format(GetIDString(), GetUserName().decode("latin-1"))
# store values used in decryption
DB['IDString'] = GetIDString()
@@ -1758,7 +1776,7 @@ elif isosx:
break
except:
pass
- if 'MazamaRandomNumber' in DB and 'kindle.account.tokens' in DB:
+ if 'kindle.account.tokens' in DB:
# store values used in decryption
print u"Decrypted key file using IDString '{0:s}' and UserName '{1:s}'".format(IDString, GetUserName())
DB['IDString'] = IDString
diff --git a/DeDRM_Windows_Application/DeDRM_App/DeDRM_lib/lib/mobidedrm.py b/DeDRM_Windows_Application/DeDRM_App/DeDRM_lib/lib/mobidedrm.py
index 7b69edc..89cc695 100644
--- a/DeDRM_Windows_Application/DeDRM_App/DeDRM_lib/lib/mobidedrm.py
+++ b/DeDRM_Windows_Application/DeDRM_App/DeDRM_lib/lib/mobidedrm.py
@@ -156,6 +156,8 @@ def PC1(key, src, decryption=True):
return Pukall_Cipher().PC1(key,src,decryption)
except NameError:
pass
+ except TypeError:
+ pass
# use slow python version, since Pukall_Cipher didn't load
sum1 = 0;
diff --git a/DeDRM_Windows_Application/DeDRM_App/DeDRM_lib/lib/stylexml2css.py b/DeDRM_Windows_Application/DeDRM_App/DeDRM_lib/lib/stylexml2css.py
index c111850..daa108a 100644
--- a/DeDRM_Windows_Application/DeDRM_App/DeDRM_lib/lib/stylexml2css.py
+++ b/DeDRM_Windows_Application/DeDRM_App/DeDRM_lib/lib/stylexml2css.py
@@ -178,7 +178,12 @@ class DocParser(object):
if val == "":
val = 0
- if not ((attr == 'hang') and (int(val) == 0)) :
+ if not ((attr == 'hang') and (int(val) == 0)):
+ try:
+ f = float(val)
+ except:
+ print "Warning: unrecognised val, ignoring"
+ val = 0
pv = float(val)/scale
cssargs[attr] = (self.attr_val_map[attr], pv)
keep = True
diff --git a/DeDRM_Windows_Application/DeDRM_App/DeDRM_lib/lib/topazextract.py b/DeDRM_Windows_Application/DeDRM_App/DeDRM_lib/lib/topazextract.py
index 97f6583..fb5eb7a 100644
--- a/DeDRM_Windows_Application/DeDRM_App/DeDRM_lib/lib/topazextract.py
+++ b/DeDRM_Windows_Application/DeDRM_App/DeDRM_lib/lib/topazextract.py
@@ -356,7 +356,7 @@ class TopazBook:
self.setBookKey(bookKey)
self.createBookDirectory()
- self.extractFiles()
+ self.extractFiles()
print u"Successfully Extracted Topaz contents"
if inCalibre:
from calibre_plugins.dedrm import genbook
diff --git a/DeDRM_calibre_plugin/DeDRM_plugin.zip b/DeDRM_calibre_plugin/DeDRM_plugin.zip
index 7c4878a..58d8174 100644
Binary files a/DeDRM_calibre_plugin/DeDRM_plugin.zip and b/DeDRM_calibre_plugin/DeDRM_plugin.zip differ
diff --git a/DeDRM_calibre_plugin/DeDRM_plugin/__init__.py b/DeDRM_calibre_plugin/DeDRM_plugin/__init__.py
index caed6e8..37d4cb1 100644
--- a/DeDRM_calibre_plugin/DeDRM_plugin/__init__.py
+++ b/DeDRM_calibre_plugin/DeDRM_plugin/__init__.py
@@ -31,14 +31,17 @@ __docformat__ = 'restructuredtext en'
# 6.0.3 - Fixes for Kindle for Mac and Windows non-ascii user names
# 6.0.4 - Fixes for stand-alone scripts and applications
# and pdb files in plugin and initial conversion of prefs.
+# 6.0.5 - Fix a key issue
# 6.0.6 - Fix up an incorrect function call
+# 6.0.7 - Error handling for incomplete PDF metadata
+# 6.0.8 - Fixes a Wine key issue and topaz support
"""
Decrypt DRMed ebooks.
"""
PLUGIN_NAME = u"DeDRM"
-PLUGIN_VERSION_TUPLE = (6, 0, 7)
+PLUGIN_VERSION_TUPLE = (6, 0, 8)
PLUGIN_VERSION = u".".join([unicode(str(x)) for x in PLUGIN_VERSION_TUPLE])
# Include an html helpfile in the plugin's zipfile with the following name.
RESOURCE_NAME = PLUGIN_NAME + '_Help.htm'
@@ -313,7 +316,7 @@ class DeDRM(FileTypePlugin):
from wineutils import WineGetKeys
scriptpath = os.path.join(self.alfdir,u"adobekey.py")
- defaultkeys = self.WineGetKeys(scriptpath, u".der",dedrmprefs['adobewineprefix'])
+ defaultkeys = WineGetKeys(scriptpath, u".der",dedrmprefs['adobewineprefix'])
except:
pass
@@ -391,7 +394,7 @@ class DeDRM(FileTypePlugin):
from wineutils import WineGetKeys
scriptpath = os.path.join(self.alfdir,u"kindlekey.py")
- defaultkeys = self.WineGetKeys(scriptpath, u".k4i",dedrmprefs['kindlewineprefix'])
+ defaultkeys = WineGetKeys(scriptpath, u".k4i",dedrmprefs['kindlewineprefix'])
except:
pass
diff --git a/DeDRM_calibre_plugin/DeDRM_plugin/android.py b/DeDRM_calibre_plugin/DeDRM_plugin/android.py
new file mode 100644
index 0000000..ddb94f5
--- /dev/null
+++ b/DeDRM_calibre_plugin/DeDRM_plugin/android.py
@@ -0,0 +1,157 @@
+#!/usr/bin/env python
+#fileencoding: utf-8
+
+import os
+import sys
+import zlib
+import tarfile
+from hashlib import md5
+from cStringIO import StringIO
+from binascii import a2b_hex, b2a_hex
+
+STORAGE = 'AmazonSecureStorage.xml'
+
+class AndroidObfuscation(object):
+ '''AndroidObfuscation
+ For the key, it's written in java, and run in android dalvikvm
+ '''
+
+ key = a2b_hex('0176e04c9408b1702d90be333fd53523')
+
+ def encrypt(self, plaintext):
+ cipher = self._get_cipher()
+ padding = len(self.key) - len(plaintext) % len(self.key)
+ plaintext += chr(padding) * padding
+ return b2a_hex(cipher.encrypt(plaintext))
+
+ def decrypt(self, ciphertext):
+ cipher = self._get_cipher()
+ plaintext = cipher.decrypt(a2b_hex(ciphertext))
+ return plaintext[:-ord(plaintext[-1])]
+
+ def _get_cipher(self):
+ try:
+ from Crypto.Cipher import AES
+ return AES.new(self.key)
+ except ImportError:
+ from aescbc import AES, noPadding
+ return AES(self.key, padding=noPadding())
+
+class AndroidObfuscationV2(AndroidObfuscation):
+ '''AndroidObfuscationV2
+ '''
+
+ count = 503
+ password = 'Thomsun was here!'
+
+ def __init__(self, salt):
+ key = self.password + salt
+ for _ in range(self.count):
+ key = md5(key).digest()
+ self.key = key[:8]
+ self.iv = key[8:16]
+
+ def _get_cipher(self):
+ try :
+ from Crypto.Cipher import DES
+ return DES.new(self.key, DES.MODE_CBC, self.iv)
+ except ImportError:
+ from python_des import Des, CBC
+ return Des(self.key, CBC, self.iv)
+
+def parse_preference(path):
+ ''' parse android's shared preference xml '''
+ storage = {}
+ read = open(path)
+ for line in read:
+ line = line.strip()
+ # value
+ if line.startswith(' adb backup com.amazon.kindle
+ '''
+ output = None
+ read = open(path, 'rb')
+ head = read.read(24)
+ if head == 'ANDROID BACKUP\n1\n1\nnone\n':
+ output = StringIO(zlib.decompress(read.read()))
+ read.close()
+
+ if not output:
+ return False
+
+ tar = tarfile.open(fileobj=output)
+ for member in tar.getmembers():
+ if member.name.strip().endswith(STORAGE):
+ write = open(STORAGE, 'w')
+ write.write(tar.extractfile(member).read())
+ write.close()
+ break
+
+ return True
+
+__all__ = [ 'get_storage', 'get_serials', 'parse_preference',
+ 'AndroidObfuscation', 'AndroidObfuscationV2', 'STORAGE']
+
+if __name__ == '__main__':
+ print get_serials()
\ No newline at end of file
diff --git a/DeDRM_calibre_plugin/DeDRM_plugin/android_readme.txt b/DeDRM_calibre_plugin/DeDRM_plugin/android_readme.txt
new file mode 100644
index 0000000..9e7d035
--- /dev/null
+++ b/DeDRM_calibre_plugin/DeDRM_plugin/android_readme.txt
@@ -0,0 +1,6 @@
+1.1 get AmazonSecureStorage.xml from /data/data/com.amazon.kindle/shared_prefs/AmazonSecureStorage.xml
+
+1.2 on android 4.0+, run `adb backup com.amazon.kindle` from PC will get backup.ab
+ now android.py can convert backup.ab to AmazonSecureStorage.xml
+
+2. run `k4mobidedrm.py -a AmazonSecureStorage.xml '
diff --git a/DeDRM_calibre_plugin/DeDRM_plugin/dialogs.py b/DeDRM_calibre_plugin/DeDRM_plugin/dialogs.py
deleted file mode 100644
index 6bb8c37..0000000
--- a/DeDRM_calibre_plugin/DeDRM_plugin/dialogs.py
+++ /dev/null
@@ -1,45 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
-# base64.py, version 1.0
-# Copyright © 2010 Apprentice Alf
-
-# Released under the terms of the GNU General Public Licence, version 3 or
-# later.
-
-# Revision history:
-# 1 - Initial release. To allow Applescript to do base64 encoding
-
-"""
-Provide base64 encoding.
-"""
-
-from __future__ import with_statement
-
-__license__ = 'GPL v3'
-
-import sys
-import os
-import base64
-
-def usage(progname):
- print "Applies base64 encoding to the supplied file, sending to standard output"
- print "Usage:"
- print " %s " % progname
-
-def cli_main(argv=sys.argv):
- progname = os.path.basename(argv[0])
-
- if len(argv)<2:
- usage(progname)
- sys.exit(2)
-
- keypath = argv[1]
- with open(keypath, 'rb') as f:
- keyder = f.read()
- print keyder.encode('base64')
- return 0
-
-
-if __name__ == '__main__':
- sys.exit(cli_main())
diff --git a/DeDRM_calibre_plugin/DeDRM_plugin/encodebase64.py b/DeDRM_calibre_plugin/DeDRM_plugin/encodebase64.py
index 11f1427..6bb8c37 100644
--- a/DeDRM_calibre_plugin/DeDRM_plugin/encodebase64.py
+++ b/DeDRM_calibre_plugin/DeDRM_plugin/encodebase64.py
@@ -1,208 +1,45 @@
-#!/usr/bin/python
-#
-# This is a python script. You need a Python interpreter to run it.
-# For example, ActiveState Python, which exists for windows.
-#
-# Changelog drmcheck
-# 1.00 - Initial version, with code from various other scripts
-# 1.01 - Moved authorship announcement to usage section.
-#
-# Changelog epubtest
-# 1.00 - Cut to epubtest.py, testing ePub files only by Apprentice Alf
-# 1.01 - Added routine for use by Windows DeDRM
-#
-# Written in 2011 by Paul Durrant
-# Released with unlicense. See http://unlicense.org/
-#
-#############################################################################
-#
-# This is free and unencumbered software released into the public domain.
-#
-# Anyone is free to copy, modify, publish, use, compile, sell, or
-# distribute this software, either in source code form or as a compiled
-# binary, for any purpose, commercial or non-commercial, and by any
-# means.
-#
-# In jurisdictions that recognize copyright laws, the author or authors
-# of this software dedicate any and all copyright interest in the
-# software to the public domain. We make this dedication for the benefit
-# of the public at large and to the detriment of our heirs and
-# successors. We intend this dedication to be an overt act of
-# relinquishment in perpetuity of all present and future rights to this
-# software under copyright law.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
-# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
-# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-# OTHER DEALINGS IN THE SOFTWARE.
-#
-#############################################################################
-#
-# It's still polite to give attribution if you do reuse this code.
-#
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
-from __future__ import with_statement
-
-__version__ = '1.01'
-
-import sys, struct, os
-import zlib
-import zipfile
-import xml.etree.ElementTree as etree
-
-NSMAP = {'adept': 'http://ns.adobe.com/adept',
- 'enc': 'http://www.w3.org/2001/04/xmlenc#'}
-
-# Wrap a stream so that output gets flushed immediately
-# and also make sure that any unicode strings get
-# encoded using "replace" before writing them.
-class SafeUnbuffered:
- def __init__(self, stream):
- self.stream = stream
- self.encoding = stream.encoding
- if self.encoding == None:
- self.encoding = "utf-8"
- def write(self, data):
- if isinstance(data,unicode):
- data = data.encode(self.encoding,"replace")
- self.stream.write(data)
- self.stream.flush()
- def __getattr__(self, attr):
- return getattr(self.stream, attr)
-
-try:
- from calibre.constants import iswindows, isosx
-except:
- iswindows = sys.platform.startswith('win')
- isosx = sys.platform.startswith('darwin')
-
-def unicode_argv():
- if iswindows:
- # Uses shell32.GetCommandLineArgvW to get sys.argv as a list of Unicode
- # strings.
-
- # Versions 2.x of Python don't support Unicode in sys.argv on
- # Windows, with the underlying Windows API instead replacing multi-byte
- # characters with '?'. So use shell32.GetCommandLineArgvW to get sys.argv
- # as a list of Unicode strings and encode them as utf-8
+# base64.py, version 1.0
+# Copyright © 2010 Apprentice Alf
- from ctypes import POINTER, byref, cdll, c_int, windll
- from ctypes.wintypes import LPCWSTR, LPWSTR
+# Released under the terms of the GNU General Public Licence, version 3 or
+# later.
- GetCommandLineW = cdll.kernel32.GetCommandLineW
- GetCommandLineW.argtypes = []
- GetCommandLineW.restype = LPCWSTR
+# Revision history:
+# 1 - Initial release. To allow Applescript to do base64 encoding
- CommandLineToArgvW = windll.shell32.CommandLineToArgvW
- CommandLineToArgvW.argtypes = [LPCWSTR, POINTER(c_int)]
- CommandLineToArgvW.restype = POINTER(LPWSTR)
+"""
+Provide base64 encoding.
+"""
- cmd = GetCommandLineW()
- argc = c_int(0)
- argv = CommandLineToArgvW(cmd, byref(argc))
- if argc.value > 0:
- # Remove Python executable and commands if present
- start = argc.value - len(sys.argv)
- return [argv[i] for i in
- xrange(start, argc.value)]
- # if we don't have any arguments at all, just pass back script name
- # this should never happen
- return [u"epubtest.py"]
- else:
- argvencoding = sys.stdin.encoding
- if argvencoding == None:
- argvencoding = "utf-8"
- return [arg if (type(arg) == unicode) else unicode(arg,argvencoding) for arg in sys.argv]
-
-_FILENAME_LEN_OFFSET = 26
-_EXTRA_LEN_OFFSET = 28
-_FILENAME_OFFSET = 30
-_MAX_SIZE = 64 * 1024
-
-
-def uncompress(cmpdata):
- dc = zlib.decompressobj(-15)
- data = ''
- while len(cmpdata) > 0:
- if len(cmpdata) > _MAX_SIZE :
- newdata = cmpdata[0:_MAX_SIZE]
- cmpdata = cmpdata[_MAX_SIZE:]
- else:
- newdata = cmpdata
- cmpdata = ''
- newdata = dc.decompress(newdata)
- unprocessed = dc.unconsumed_tail
- if len(unprocessed) == 0:
- newdata += dc.flush()
- data += newdata
- cmpdata += unprocessed
- unprocessed = ''
- return data
-
-def getfiledata(file, zi):
- # get file name length and exta data length to find start of file data
- local_header_offset = zi.header_offset
-
- file.seek(local_header_offset + _FILENAME_LEN_OFFSET)
- leninfo = file.read(2)
- local_name_length, = struct.unpack('" % progname
- return data
+def cli_main(argv=sys.argv):
+ progname = os.path.basename(argv[0])
-def encryption(infile):
- # returns encryption: one of Unencrypted, Adobe, B&N and Unknown
- encryption = "Unknown"
- try:
- with open(infile,'rb') as infileobject:
- bookdata = infileobject.read(58)
- # Check for Zip
- if bookdata[0:0+2] == "PK":
- foundrights = False
- foundencryption = False
- inzip = zipfile.ZipFile(infile,'r')
- namelist = set(inzip.namelist())
- if 'META-INF/rights.xml' not in namelist or 'META-INF/encryption.xml' not in namelist:
- encryption = "Unencrypted"
- else:
- rights = etree.fromstring(inzip.read('META-INF/rights.xml'))
- adept = lambda tag: '{%s}%s' % (NSMAP['adept'], tag)
- expr = './/%s' % (adept('encryptedKey'),)
- bookkey = ''.join(rights.findtext(expr))
- if len(bookkey) == 172:
- encryption = "Adobe"
- elif len(bookkey) == 64:
- encryption = "B&N"
- else:
- encryption = "Unknown"
- except:
- traceback.print_exc()
- return encryption
+ if len(argv)<2:
+ usage(progname)
+ sys.exit(2)
-def main():
- argv=unicode_argv()
- print encryption(argv[1])
+ keypath = argv[1]
+ with open(keypath, 'rb') as f:
+ keyder = f.read()
+ print keyder.encode('base64')
return 0
-if __name__ == "__main__":
- sys.stdout=SafeUnbuffered(sys.stdout)
- sys.stderr=SafeUnbuffered(sys.stderr)
- sys.exit(main())
+
+if __name__ == '__main__':
+ sys.exit(cli_main())
diff --git a/DeDRM_calibre_plugin/DeDRM_plugin/epubtest.py b/DeDRM_calibre_plugin/DeDRM_plugin/epubtest.py
index 1dfef42..11f1427 100644
--- a/DeDRM_calibre_plugin/DeDRM_plugin/epubtest.py
+++ b/DeDRM_calibre_plugin/DeDRM_plugin/epubtest.py
@@ -1,82 +1,60 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
-# erdr2pml.py
-# Copyright © 2008 The Dark Reverser
+#!/usr/bin/python
#
-# Modified 2008–2012 by some_updates, DiapDealer and Apprentice Alf
-
# This is a python script. You need a Python interpreter to run it.
# For example, ActiveState Python, which exists for windows.
-# Changelog
#
-# Based on ereader2html version 0.08 plus some later small fixes
+# Changelog drmcheck
+# 1.00 - Initial version, with code from various other scripts
+# 1.01 - Moved authorship announcement to usage section.
+#
+# Changelog epubtest
+# 1.00 - Cut to epubtest.py, testing ePub files only by Apprentice Alf
+# 1.01 - Added routine for use by Windows DeDRM
+#
+# Written in 2011 by Paul Durrant
+# Released with unlicense. See http://unlicense.org/
+#
+#############################################################################
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+#############################################################################
+#
+# It's still polite to give attribution if you do reuse this code.
#
-# 0.01 - Initial version
-# 0.02 - Support more eReader files. Support bold text and links. Fix PML decoder parsing bug.
-# 0.03 - Fix incorrect variable usage at one place.
-# 0.03b - enhancement by DeBockle (version 259 support)
-# Custom version 0.03 - no change to eReader support, only usability changes
-# - start of pep-8 indentation (spaces not tab), fix trailing blanks
-# - version variable, only one place to change
-# - added main routine, now callable as a library/module,
-# means tools can add optional support for ereader2html
-# - outdir is no longer a mandatory parameter (defaults based on input name if missing)
-# - time taken output to stdout
-# - Psyco support - reduces runtime by a factor of (over) 3!
-# E.g. (~600Kb file) 90 secs down to 24 secs
-# - newstyle classes
-# - changed map call to list comprehension
-# may not work with python 2.3
-# without Psyco this reduces runtime to 90%
-# E.g. 90 secs down to 77 secs
-# Psyco with map calls takes longer, do not run with map in Psyco JIT!
-# - izip calls used instead of zip (if available), further reduction
-# in run time (factor of 4.5).
-# E.g. (~600Kb file) 90 secs down to 20 secs
-# - Python 2.6+ support, avoid DeprecationWarning with sha/sha1
-# 0.04 - Footnote support, PML output, correct charset in html, support more PML tags
-# - Feature change, dump out PML file
-# - Added supprt for footnote tags. NOTE footnote ids appear to be bad (not usable)
-# in some pdb files :-( due to the same id being used multiple times
-# - Added correct charset encoding (pml is based on cp1252)
-# - Added logging support.
-# 0.05 - Improved type 272 support for sidebars, links, chapters, metainfo, etc
-# 0.06 - Merge of 0.04 and 0.05. Improved HTML output
-# Placed images in subfolder, so that it's possible to just
-# drop the book.pml file onto DropBook to make an unencrypted
-# copy of the eReader file.
-# Using that with Calibre works a lot better than the HTML
-# conversion in this code.
-# 0.07 - Further Improved type 272 support for sidebars with all earlier fixes
-# 0.08 - fixed typos, removed extraneous things
-# 0.09 - fixed typos in first_pages to first_page to again support older formats
-# 0.10 - minor cleanups
-# 0.11 - fixups for using correct xml for footnotes and sidebars for use with Dropbook
-# 0.12 - Fix added to prevent lowercasing of image names when the pml code itself uses a different case in the link name.
-# 0.13 - change to unbuffered stdout for use with gui front ends
-# 0.14 - contributed enhancement to support --make-pmlz switch
-# 0.15 - enabled high-ascii to pml character encoding. DropBook now works on Mac.
-# 0.16 - convert to use openssl DES (very very fast) or pure python DES if openssl's libcrypto is not available
-# 0.17 - added support for pycrypto's DES as well
-# 0.18 - on Windows try PyCrypto first and OpenSSL next
-# 0.19 - Modify the interface to allow use of import
-# 0.20 - modify to allow use inside new interface for calibre plugins
-# 0.21 - Support eReader (drm) version 11.
-# - Don't reject dictionary format.
-# - Ignore sidebars for dictionaries (different format?)
-# 0.22 - Unicode and plugin support, different image folders for PMLZ and source
-# 0.23 - moved unicode_argv call inside main for Windows DeDRM compatibility
-__version__='0.23'
+from __future__ import with_statement
-import sys, re
-import struct, binascii, getopt, zlib, os, os.path, urllib, tempfile, traceback
+__version__ = '1.01'
-if 'calibre' in sys.modules:
- inCalibre = True
-else:
- inCalibre = False
+import sys, struct, os
+import zlib
+import zipfile
+import xml.etree.ElementTree as etree
+
+NSMAP = {'adept': 'http://ns.adobe.com/adept',
+ 'enc': 'http://www.w3.org/2001/04/xmlenc#'}
# Wrap a stream so that output gets flushed immediately
# and also make sure that any unicode strings get
@@ -95,8 +73,11 @@ class SafeUnbuffered:
def __getattr__(self, attr):
return getattr(self.stream, attr)
-iswindows = sys.platform.startswith('win')
-isosx = sys.platform.startswith('darwin')
+try:
+ from calibre.constants import iswindows, isosx
+except:
+ iswindows = sys.platform.startswith('win')
+ isosx = sys.platform.startswith('darwin')
def unicode_argv():
if iswindows:
@@ -105,8 +86,8 @@ def unicode_argv():
# Versions 2.x of Python don't support Unicode in sys.argv on
# Windows, with the underlying Windows API instead replacing multi-byte
- # characters with '?'.
-
+ # characters with '?'. So use shell32.GetCommandLineArgvW to get sys.argv
+ # as a list of Unicode strings and encode them as utf-8
from ctypes import POINTER, byref, cdll, c_int, windll
from ctypes.wintypes import LPCWSTR, LPWSTR
@@ -129,469 +110,99 @@ def unicode_argv():
xrange(start, argc.value)]
# if we don't have any arguments at all, just pass back script name
# this should never happen
- return [u"mobidedrm.py"]
+ return [u"epubtest.py"]
else:
argvencoding = sys.stdin.encoding
if argvencoding == None:
argvencoding = "utf-8"
return [arg if (type(arg) == unicode) else unicode(arg,argvencoding) for arg in sys.argv]
-Des = None
-if iswindows:
- # first try with pycrypto
- if inCalibre:
- from calibre_plugins.dedrm import pycrypto_des
- else:
- import pycrypto_des
- Des = pycrypto_des.load_pycrypto()
- if Des == None:
- # they try with openssl
- if inCalibre:
- from calibre_plugins.dedrm import openssl_des
- else:
- import openssl_des
- Des = openssl_des.load_libcrypto()
-else:
- # first try with openssl
- if inCalibre:
- from calibre_plugins.dedrm import openssl_des
- else:
- import openssl_des
- Des = openssl_des.load_libcrypto()
- if Des == None:
- # then try with pycrypto
- if inCalibre:
- from calibre_plugins.dedrm import pycrypto_des
- else:
- import pycrypto_des
- Des = pycrypto_des.load_pycrypto()
-
-# if that did not work then use pure python implementation
-# of DES and try to speed it up with Psycho
-if Des == None:
- if inCalibre:
- from calibre_plugins.dedrm import python_des
- else:
- import python_des
- Des = python_des.Des
- # Import Psyco if available
- try:
- # http://psyco.sourceforge.net
- import psyco
- psyco.full()
- except ImportError:
- pass
-
-try:
- from hashlib import sha1
-except ImportError:
- # older Python release
- import sha
- sha1 = lambda s: sha.new(s)
-
-import cgi
-import logging
-
-logging.basicConfig()
-#logging.basicConfig(level=logging.DEBUG)
-
-
-class Sectionizer(object):
- bkType = "Book"
-
- def __init__(self, filename, ident):
- self.contents = file(filename, 'rb').read()
- self.header = self.contents[0:72]
- self.num_sections, = struct.unpack('>H', self.contents[76:78])
- # Dictionary or normal content (TODO: Not hard-coded)
- if self.header[0x3C:0x3C+8] != ident:
- if self.header[0x3C:0x3C+8] == "PDctPPrs":
- self.bkType = "Dict"
- else:
- raise ValueError('Invalid file format')
- self.sections = []
- for i in xrange(self.num_sections):
- offset, a1,a2,a3,a4 = struct.unpack('>LBBBB', self.contents[78+i*8:78+i*8+8])
- flags, val = a1, a2<<16|a3<<8|a4
- self.sections.append( (offset, flags, val) )
- def loadSection(self, section):
- if section + 1 == self.num_sections:
- end_off = len(self.contents)
- else:
- end_off = self.sections[section + 1][0]
- off = self.sections[section][0]
- return self.contents[off:end_off]
-
-# cleanup unicode filenames
-# borrowed from calibre from calibre/src/calibre/__init__.py
-# added in removal of control (<32) chars
-# and removal of . at start and end
-# and with some (heavily edited) code from Paul Durrant's kindlenamer.py
-def sanitizeFileName(name):
- # substitute filename unfriendly characters
- name = name.replace(u"<",u"[").replace(u">",u"]").replace(u" : ",u" – ").replace(u": ",u" – ").replace(u":",u"—").replace(u"/",u"_").replace(u"\\",u"_").replace(u"|",u"_").replace(u"\"",u"\'")
- # delete control characters
- name = u"".join(char for char in name if ord(char)>=32)
- # white space to single space, delete leading and trailing while space
- name = re.sub(ur"\s", u" ", name).strip()
- # remove leading dots
- while len(name)>0 and name[0] == u".":
- name = name[1:]
- # remove trailing dots (Windows doesn't like them)
- if name.endswith(u'.'):
- name = name[:-1]
- return name
-
-def fixKey(key):
- def fixByte(b):
- return b ^ ((b ^ (b<<1) ^ (b<<2) ^ (b<<3) ^ (b<<4) ^ (b<<5) ^ (b<<6) ^ (b<<7) ^ 0x80) & 0x80)
- return "".join([chr(fixByte(ord(a))) for a in key])
-
-def deXOR(text, sp, table):
- r=''
- j = sp
- for i in xrange(len(text)):
- r += chr(ord(table[j]) ^ ord(text[i]))
- j = j + 1
- if j == len(table):
- j = 0
- return r
-
-class EreaderProcessor(object):
- def __init__(self, sect, user_key):
- self.section_reader = sect.loadSection
- data = self.section_reader(0)
- version, = struct.unpack('>H', data[0:2])
- self.version = version
- logging.info('eReader file format version %s', version)
- if version != 272 and version != 260 and version != 259:
- raise ValueError('incorrect eReader version %d (error 1)' % version)
- data = self.section_reader(1)
- self.data = data
- des = Des(fixKey(data[0:8]))
- cookie_shuf, cookie_size = struct.unpack('>LL', des.decrypt(data[-8:]))
- if cookie_shuf < 3 or cookie_shuf > 0x14 or cookie_size < 0xf0 or cookie_size > 0x200:
- raise ValueError('incorrect eReader version (error 2)')
- input = des.decrypt(data[-cookie_size:])
- def unshuff(data, shuf):
- r = [''] * len(data)
- j = 0
- for i in xrange(len(data)):
- j = (j + shuf) % len(data)
- r[j] = data[i]
- assert len("".join(r)) == len(data)
- return "".join(r)
- r = unshuff(input[0:-8], cookie_shuf)
+_FILENAME_LEN_OFFSET = 26
+_EXTRA_LEN_OFFSET = 28
+_FILENAME_OFFSET = 30
+_MAX_SIZE = 64 * 1024
- drm_sub_version = struct.unpack('>H', r[0:2])[0]
- self.num_text_pages = struct.unpack('>H', r[2:4])[0] - 1
- self.num_image_pages = struct.unpack('>H', r[26:26+2])[0]
- self.first_image_page = struct.unpack('>H', r[24:24+2])[0]
- # Default values
- self.num_footnote_pages = 0
- self.num_sidebar_pages = 0
- self.first_footnote_page = -1
- self.first_sidebar_page = -1
- if self.version == 272:
- self.num_footnote_pages = struct.unpack('>H', r[46:46+2])[0]
- self.first_footnote_page = struct.unpack('>H', r[44:44+2])[0]
- if (sect.bkType == "Book"):
- self.num_sidebar_pages = struct.unpack('>H', r[38:38+2])[0]
- self.first_sidebar_page = struct.unpack('>H', r[36:36+2])[0]
- # self.num_bookinfo_pages = struct.unpack('>H', r[34:34+2])[0]
- # self.first_bookinfo_page = struct.unpack('>H', r[32:32+2])[0]
- # self.num_chapter_pages = struct.unpack('>H', r[22:22+2])[0]
- # self.first_chapter_page = struct.unpack('>H', r[20:20+2])[0]
- # self.num_link_pages = struct.unpack('>H', r[30:30+2])[0]
- # self.first_link_page = struct.unpack('>H', r[28:28+2])[0]
- # self.num_xtextsize_pages = struct.unpack('>H', r[54:54+2])[0]
- # self.first_xtextsize_page = struct.unpack('>H', r[52:52+2])[0]
- # **before** data record 1 was decrypted and unshuffled, it contained data
- # to create an XOR table and which is used to fix footnote record 0, link records, chapter records, etc
- self.xortable_offset = struct.unpack('>H', r[40:40+2])[0]
- self.xortable_size = struct.unpack('>H', r[42:42+2])[0]
- self.xortable = self.data[self.xortable_offset:self.xortable_offset + self.xortable_size]
+def uncompress(cmpdata):
+ dc = zlib.decompressobj(-15)
+ data = ''
+ while len(cmpdata) > 0:
+ if len(cmpdata) > _MAX_SIZE :
+ newdata = cmpdata[0:_MAX_SIZE]
+ cmpdata = cmpdata[_MAX_SIZE:]
else:
- # Nothing needs to be done
- pass
- # self.num_bookinfo_pages = 0
- # self.num_chapter_pages = 0
- # self.num_link_pages = 0
- # self.num_xtextsize_pages = 0
- # self.first_bookinfo_page = -1
- # self.first_chapter_page = -1
- # self.first_link_page = -1
- # self.first_xtextsize_page = -1
-
- logging.debug('self.num_text_pages %d', self.num_text_pages)
- logging.debug('self.num_footnote_pages %d, self.first_footnote_page %d', self.num_footnote_pages , self.first_footnote_page)
- logging.debug('self.num_sidebar_pages %d, self.first_sidebar_page %d', self.num_sidebar_pages , self.first_sidebar_page)
- self.flags = struct.unpack('>L', r[4:8])[0]
- reqd_flags = (1<<9) | (1<<7) | (1<<10)
- if (self.flags & reqd_flags) != reqd_flags:
- print "Flags: 0x%X" % self.flags
- raise ValueError('incompatible eReader file')
- des = Des(fixKey(user_key))
- if version == 259:
- if drm_sub_version != 7:
- raise ValueError('incorrect eReader version %d (error 3)' % drm_sub_version)
- encrypted_key_sha = r[44:44+20]
- encrypted_key = r[64:64+8]
- elif version == 260:
- if drm_sub_version != 13 and drm_sub_version != 11:
- raise ValueError('incorrect eReader version %d (error 3)' % drm_sub_version)
- if drm_sub_version == 13:
- encrypted_key = r[44:44+8]
- encrypted_key_sha = r[52:52+20]
- else:
- encrypted_key = r[64:64+8]
- encrypted_key_sha = r[44:44+20]
- elif version == 272:
- encrypted_key = r[172:172+8]
- encrypted_key_sha = r[56:56+20]
- self.content_key = des.decrypt(encrypted_key)
- if sha1(self.content_key).digest() != encrypted_key_sha:
- raise ValueError('Incorrect Name and/or Credit Card')
-
- def getNumImages(self):
- return self.num_image_pages
-
- def getImage(self, i):
- sect = self.section_reader(self.first_image_page + i)
- name = sect[4:4+32].strip('\0')
- data = sect[62:]
- return sanitizeFileName(unicode(name,'windows-1252')), data
-
-
- # def getChapterNamePMLOffsetData(self):
- # cv = ''
- # if self.num_chapter_pages > 0:
- # for i in xrange(self.num_chapter_pages):
- # chaps = self.section_reader(self.first_chapter_page + i)
- # j = i % self.xortable_size
- # offname = deXOR(chaps, j, self.xortable)
- # offset = struct.unpack('>L', offname[0:4])[0]
- # name = offname[4:].strip('\0')
- # cv += '%d|%s\n' % (offset, name)
- # return cv
-
- # def getLinkNamePMLOffsetData(self):
- # lv = ''
- # if self.num_link_pages > 0:
- # for i in xrange(self.num_link_pages):
- # links = self.section_reader(self.first_link_page + i)
- # j = i % self.xortable_size
- # offname = deXOR(links, j, self.xortable)
- # offset = struct.unpack('>L', offname[0:4])[0]
- # name = offname[4:].strip('\0')
- # lv += '%d|%s\n' % (offset, name)
- # return lv
-
- # def getExpandedTextSizesData(self):
- # ts = ''
- # if self.num_xtextsize_pages > 0:
- # tsize = deXOR(self.section_reader(self.first_xtextsize_page), 0, self.xortable)
- # for i in xrange(self.num_text_pages):
- # xsize = struct.unpack('>H', tsize[0:2])[0]
- # ts += "%d\n" % xsize
- # tsize = tsize[2:]
- # return ts
-
- # def getBookInfo(self):
- # bkinfo = ''
- # if self.num_bookinfo_pages > 0:
- # info = self.section_reader(self.first_bookinfo_page)
- # bkinfo = deXOR(info, 0, self.xortable)
- # bkinfo = bkinfo.replace('\0','|')
- # bkinfo += '\n'
- # return bkinfo
-
- def getText(self):
- des = Des(fixKey(self.content_key))
- r = ''
- for i in xrange(self.num_text_pages):
- logging.debug('get page %d', i)
- r += zlib.decompress(des.decrypt(self.section_reader(1 + i)))
-
- # now handle footnotes pages
- if self.num_footnote_pages > 0:
- r += '\n'
- # the record 0 of the footnote section must pass through the Xor Table to make it useful
- sect = self.section_reader(self.first_footnote_page)
- fnote_ids = deXOR(sect, 0, self.xortable)
- # the remaining records of the footnote sections need to be decoded with the content_key and zlib inflated
- des = Des(fixKey(self.content_key))
- for i in xrange(1,self.num_footnote_pages):
- logging.debug('get footnotepage %d', i)
- id_len = ord(fnote_ids[2])
- id = fnote_ids[3:3+id_len]
- fmarker = '\n' % id
- fmarker += zlib.decompress(des.decrypt(self.section_reader(self.first_footnote_page + i)))
- fmarker += '\n\n'
- r += fmarker
- fnote_ids = fnote_ids[id_len+4:]
-
- # TODO: Handle dictionary index (?) pages - which are also marked as
- # sidebar_pages (?). For now dictionary sidebars are ignored
- # For dictionaries - record 0 is null terminated strings, followed by
- # blocks of around 62000 bytes and a final block. Not sure of the
- # encoding
-
- # now handle sidebar pages
- if self.num_sidebar_pages > 0:
- r += '\n'
- # the record 0 of the sidebar section must pass through the Xor Table to make it useful
- sect = self.section_reader(self.first_sidebar_page)
- sbar_ids = deXOR(sect, 0, self.xortable)
- # the remaining records of the sidebar sections need to be decoded with the content_key and zlib inflated
- des = Des(fixKey(self.content_key))
- for i in xrange(1,self.num_sidebar_pages):
- id_len = ord(sbar_ids[2])
- id = sbar_ids[3:3+id_len]
- smarker = '\n' % id
- smarker += zlib.decompress(des.decrypt(self.section_reader(self.first_sidebar_page + i)))
- smarker += '\n\n'
- r += smarker
- sbar_ids = sbar_ids[id_len+4:]
-
- return r
-
-def cleanPML(pml):
- # Convert special characters to proper PML code. High ASCII start at (\x80, \a128) and go up to (\xff, \a255)
- pml2 = pml
- for k in xrange(128,256):
- badChar = chr(k)
- pml2 = pml2.replace(badChar, '\\a%03d' % k)
- return pml2
-
-def decryptBook(infile, outpath, make_pmlz, user_key):
- bookname = os.path.splitext(os.path.basename(infile))[0]
- if make_pmlz:
- # outpath is actually pmlz name
- pmlzname = outpath
- outdir = tempfile.mkdtemp()
- imagedirpath = os.path.join(outdir,u"images")
- else:
- pmlzname = None
- outdir = outpath
- imagedirpath = os.path.join(outdir,bookname + u"_img")
-
+ newdata = cmpdata
+ cmpdata = ''
+ newdata = dc.decompress(newdata)
+ unprocessed = dc.unconsumed_tail
+ if len(unprocessed) == 0:
+ newdata += dc.flush()
+ data += newdata
+ cmpdata += unprocessed
+ unprocessed = ''
+ return data
+
+def getfiledata(file, zi):
+ # get file name length and exta data length to find start of file data
+ local_header_offset = zi.header_offset
+
+ file.seek(local_header_offset + _FILENAME_LEN_OFFSET)
+ leninfo = file.read(2)
+ local_name_length, = struct.unpack(' 0:
- print u"Extracting images"
- if not os.path.exists(imagedirpath):
- os.makedirs(imagedirpath)
- for i in xrange(er.getNumImages()):
- name, contents = er.getImage(i)
- file(os.path.join(imagedirpath, name), 'wb').write(contents)
-
- print u"Extracting pml"
- pml_string = er.getText()
- pmlfilename = bookname + ".pml"
- file(os.path.join(outdir, pmlfilename),'wb').write(cleanPML(pml_string))
- if pmlzname is not None:
- import zipfile
- import shutil
- print u"Creating PMLZ file {0}".format(os.path.basename(pmlzname))
- myZipFile = zipfile.ZipFile(pmlzname,'w',zipfile.ZIP_STORED, False)
- list = os.listdir(outdir)
- for filename in list:
- localname = filename
- filePath = os.path.join(outdir,filename)
- if os.path.isfile(filePath):
- myZipFile.write(filePath, localname)
- elif os.path.isdir(filePath):
- imageList = os.listdir(filePath)
- localimgdir = os.path.basename(filePath)
- for image in imageList:
- localname = os.path.join(localimgdir,image)
- imagePath = os.path.join(filePath,image)
- if os.path.isfile(imagePath):
- myZipFile.write(imagePath, localname)
- myZipFile.close()
- # remove temporary directory
- shutil.rmtree(outdir, True)
- print u"Output is {0}".format(pmlzname)
- else :
- print u"Output is in {0}".format(outdir)
- print "done"
- except ValueError, e:
- print u"Error: {0}".format(e)
+ with open(infile,'rb') as infileobject:
+ bookdata = infileobject.read(58)
+ # Check for Zip
+ if bookdata[0:0+2] == "PK":
+ foundrights = False
+ foundencryption = False
+ inzip = zipfile.ZipFile(infile,'r')
+ namelist = set(inzip.namelist())
+ if 'META-INF/rights.xml' not in namelist or 'META-INF/encryption.xml' not in namelist:
+ encryption = "Unencrypted"
+ else:
+ rights = etree.fromstring(inzip.read('META-INF/rights.xml'))
+ adept = lambda tag: '{%s}%s' % (NSMAP['adept'], tag)
+ expr = './/%s' % (adept('encryptedKey'),)
+ bookkey = ''.join(rights.findtext(expr))
+ if len(bookkey) == 172:
+ encryption = "Adobe"
+ elif len(bookkey) == 64:
+ encryption = "B&N"
+ else:
+ encryption = "Unknown"
+ except:
traceback.print_exc()
- return 1
- return 0
-
-
-def usage():
- print u"Converts DRMed eReader books to PML Source"
- print u"Usage:"
- print u" erdr2pml [options] infile.pdb [outpath] \"your name\" credit_card_number"
- print u" "
- print u"Options: "
- print u" -h prints this message"
- print u" -p create PMLZ instead of source folder"
- print u" --make-pmlz create PMLZ instead of source folder"
- print u" "
- print u"Note:"
- print u" if outpath is ommitted, creates source in 'infile_Source' folder"
- print u" if outpath is ommitted and pmlz option, creates PMLZ 'infile.pmlz'"
- print u" if source folder created, images are in infile_img folder"
- print u" if pmlz file created, images are in images folder"
- print u" It's enough to enter the last 8 digits of the credit card number"
- return
-
-def getuser_key(name,cc):
- newname = "".join(c for c in name.lower() if c >= 'a' and c <= 'z' or c >= '0' and c <= '9')
- cc = cc.replace(" ","")
- return struct.pack('>LL', binascii.crc32(newname) & 0xffffffff,binascii.crc32(cc[-8:])& 0xffffffff)
-
-def cli_main():
- print u"eRdr2Pml v{0}. Copyright © 2009–2012 The Dark Reverser et al.".format(__version__)
+ return encryption
+def main():
argv=unicode_argv()
- try:
- opts, args = getopt.getopt(argv[1:], "hp", ["make-pmlz"])
- except getopt.GetoptError, err:
- print err.args[0]
- usage()
- return 1
- make_pmlz = False
- for o, a in opts:
- if o == "-h":
- usage()
- return 0
- elif o == "-p":
- make_pmlz = True
- elif o == "--make-pmlz":
- make_pmlz = True
-
- if len(args)!=3 and len(args)!=4:
- usage()
- return 1
-
- if len(args)==3:
- infile, name, cc = args
- if make_pmlz:
- outpath = os.path.splitext(infile)[0] + u".pmlz"
- else:
- outpath = os.path.splitext(infile)[0] + u"_Source"
- elif len(args)==4:
- infile, outpath, name, cc = args
-
- print getuser_key(name,cc).encode('hex')
-
- return decryptBook(infile, outpath, make_pmlz, getuser_key(name,cc))
-
+ print encryption(argv[1])
+ return 0
if __name__ == "__main__":
sys.stdout=SafeUnbuffered(sys.stdout)
sys.stderr=SafeUnbuffered(sys.stderr)
- sys.exit(cli_main())
-
+ sys.exit(main())
diff --git a/DeDRM_calibre_plugin/DeDRM_plugin/erdr2pml.py b/DeDRM_calibre_plugin/DeDRM_plugin/erdr2pml.py
index 4d83368..1dfef42 100644
--- a/DeDRM_calibre_plugin/DeDRM_plugin/erdr2pml.py
+++ b/DeDRM_calibre_plugin/DeDRM_plugin/erdr2pml.py
@@ -1,797 +1,597 @@
-#! /usr/bin/python
-# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
-# For use with Topaz Scripts Version 2.6
-
-import sys
-import csv
-import os
-import math
-import getopt
-from struct import pack
-from struct import unpack
-
-
-class DocParser(object):
- def __init__(self, flatxml, classlst, fileid, bookDir, gdict, fixedimage):
- self.id = os.path.basename(fileid).replace('.dat','')
- self.svgcount = 0
- self.docList = flatxml.split('\n')
- self.docSize = len(self.docList)
- self.classList = {}
- self.bookDir = bookDir
- self.gdict = gdict
- tmpList = classlst.split('\n')
- for pclass in tmpList:
- if pclass != '':
- # remove the leading period from the css name
- cname = pclass[1:]
- self.classList[cname] = True
- self.fixedimage = fixedimage
- self.ocrtext = []
- self.link_id = []
- self.link_title = []
- self.link_page = []
- self.link_href = []
- self.link_type = []
- self.dehyphen_rootid = []
- self.paracont_stemid = []
- self.parastems_stemid = []
-
-
- def getGlyph(self, gid):
- result = ''
- id='id="gl%d"' % gid
- return self.gdict.lookup(id)
-
- def glyphs_to_image(self, glyphList):
-
- def extract(path, key):
- b = path.find(key) + len(key)
- e = path.find(' ',b)
- return int(path[b:e])
-
- svgDir = os.path.join(self.bookDir,'svg')
-
- imgDir = os.path.join(self.bookDir,'img')
- imgname = self.id + '_%04d.svg' % self.svgcount
- imgfile = os.path.join(imgDir,imgname)
-
- # get glyph information
- gxList = self.getData('info.glyph.x',0,-1)
- gyList = self.getData('info.glyph.y',0,-1)
- gidList = self.getData('info.glyph.glyphID',0,-1)
-
- gids = []
- maxws = []
- maxhs = []
- xs = []
- ys = []
- gdefs = []
-
- # get path defintions, positions, dimensions for each glyph
- # that makes up the image, and find min x and min y to reposition origin
- minx = -1
- miny = -1
- for j in glyphList:
- gid = gidList[j]
- gids.append(gid)
-
- xs.append(gxList[j])
- if minx == -1: minx = gxList[j]
- else : minx = min(minx, gxList[j])
-
- ys.append(gyList[j])
- if miny == -1: miny = gyList[j]
- else : miny = min(miny, gyList[j])
-
- path = self.getGlyph(gid)
- gdefs.append(path)
-
- maxws.append(extract(path,'width='))
- maxhs.append(extract(path,'height='))
-
-
- # change the origin to minx, miny and calc max height and width
- maxw = maxws[0] + xs[0] - minx
- maxh = maxhs[0] + ys[0] - miny
- for j in xrange(0, len(xs)):
- xs[j] = xs[j] - minx
- ys[j] = ys[j] - miny
- maxw = max( maxw, (maxws[j] + xs[j]) )
- maxh = max( maxh, (maxhs[j] + ys[j]) )
-
- # open the image file for output
- ifile = open(imgfile,'w')
- ifile.write('\n')
- ifile.write('\n')
- ifile.write('')
- ifile.close()
-
- return 0
-
-
-
- # return tag at line pos in document
- def lineinDoc(self, pos) :
- if (pos >= 0) and (pos < self.docSize) :
- item = self.docList[pos]
- if item.find('=') >= 0:
- (name, argres) = item.split('=',1)
- else :
- name = item
- argres = ''
- return name, argres
-
-
- # find tag in doc if within pos to end inclusive
- def findinDoc(self, tagpath, pos, end) :
- result = None
- if end == -1 :
- end = self.docSize
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# erdr2pml.py
+# Copyright © 2008 The Dark Reverser
+#
+# Modified 2008–2012 by some_updates, DiapDealer and Apprentice Alf
+
+# This is a python script. You need a Python interpreter to run it.
+# For example, ActiveState Python, which exists for windows.
+# Changelog
+#
+# Based on ereader2html version 0.08 plus some later small fixes
+#
+# 0.01 - Initial version
+# 0.02 - Support more eReader files. Support bold text and links. Fix PML decoder parsing bug.
+# 0.03 - Fix incorrect variable usage at one place.
+# 0.03b - enhancement by DeBockle (version 259 support)
+# Custom version 0.03 - no change to eReader support, only usability changes
+# - start of pep-8 indentation (spaces not tab), fix trailing blanks
+# - version variable, only one place to change
+# - added main routine, now callable as a library/module,
+# means tools can add optional support for ereader2html
+# - outdir is no longer a mandatory parameter (defaults based on input name if missing)
+# - time taken output to stdout
+# - Psyco support - reduces runtime by a factor of (over) 3!
+# E.g. (~600Kb file) 90 secs down to 24 secs
+# - newstyle classes
+# - changed map call to list comprehension
+# may not work with python 2.3
+# without Psyco this reduces runtime to 90%
+# E.g. 90 secs down to 77 secs
+# Psyco with map calls takes longer, do not run with map in Psyco JIT!
+# - izip calls used instead of zip (if available), further reduction
+# in run time (factor of 4.5).
+# E.g. (~600Kb file) 90 secs down to 20 secs
+# - Python 2.6+ support, avoid DeprecationWarning with sha/sha1
+# 0.04 - Footnote support, PML output, correct charset in html, support more PML tags
+# - Feature change, dump out PML file
+# - Added supprt for footnote tags. NOTE footnote ids appear to be bad (not usable)
+# in some pdb files :-( due to the same id being used multiple times
+# - Added correct charset encoding (pml is based on cp1252)
+# - Added logging support.
+# 0.05 - Improved type 272 support for sidebars, links, chapters, metainfo, etc
+# 0.06 - Merge of 0.04 and 0.05. Improved HTML output
+# Placed images in subfolder, so that it's possible to just
+# drop the book.pml file onto DropBook to make an unencrypted
+# copy of the eReader file.
+# Using that with Calibre works a lot better than the HTML
+# conversion in this code.
+# 0.07 - Further Improved type 272 support for sidebars with all earlier fixes
+# 0.08 - fixed typos, removed extraneous things
+# 0.09 - fixed typos in first_pages to first_page to again support older formats
+# 0.10 - minor cleanups
+# 0.11 - fixups for using correct xml for footnotes and sidebars for use with Dropbook
+# 0.12 - Fix added to prevent lowercasing of image names when the pml code itself uses a different case in the link name.
+# 0.13 - change to unbuffered stdout for use with gui front ends
+# 0.14 - contributed enhancement to support --make-pmlz switch
+# 0.15 - enabled high-ascii to pml character encoding. DropBook now works on Mac.
+# 0.16 - convert to use openssl DES (very very fast) or pure python DES if openssl's libcrypto is not available
+# 0.17 - added support for pycrypto's DES as well
+# 0.18 - on Windows try PyCrypto first and OpenSSL next
+# 0.19 - Modify the interface to allow use of import
+# 0.20 - modify to allow use inside new interface for calibre plugins
+# 0.21 - Support eReader (drm) version 11.
+# - Don't reject dictionary format.
+# - Ignore sidebars for dictionaries (different format?)
+# 0.22 - Unicode and plugin support, different image folders for PMLZ and source
+# 0.23 - moved unicode_argv call inside main for Windows DeDRM compatibility
+
+__version__='0.23'
+
+import sys, re
+import struct, binascii, getopt, zlib, os, os.path, urllib, tempfile, traceback
+
+if 'calibre' in sys.modules:
+ inCalibre = True
+else:
+ inCalibre = False
+
+# Wrap a stream so that output gets flushed immediately
+# and also make sure that any unicode strings get
+# encoded using "replace" before writing them.
+class SafeUnbuffered:
+ def __init__(self, stream):
+ self.stream = stream
+ self.encoding = stream.encoding
+ if self.encoding == None:
+ self.encoding = "utf-8"
+ def write(self, data):
+ if isinstance(data,unicode):
+ data = data.encode(self.encoding,"replace")
+ self.stream.write(data)
+ self.stream.flush()
+ def __getattr__(self, attr):
+ return getattr(self.stream, attr)
+
+iswindows = sys.platform.startswith('win')
+isosx = sys.platform.startswith('darwin')
+
+def unicode_argv():
+ if iswindows:
+ # Uses shell32.GetCommandLineArgvW to get sys.argv as a list of Unicode
+ # strings.
+
+ # Versions 2.x of Python don't support Unicode in sys.argv on
+ # Windows, with the underlying Windows API instead replacing multi-byte
+ # characters with '?'.
+
+
+ from ctypes import POINTER, byref, cdll, c_int, windll
+ from ctypes.wintypes import LPCWSTR, LPWSTR
+
+ GetCommandLineW = cdll.kernel32.GetCommandLineW
+ GetCommandLineW.argtypes = []
+ GetCommandLineW.restype = LPCWSTR
+
+ CommandLineToArgvW = windll.shell32.CommandLineToArgvW
+ CommandLineToArgvW.argtypes = [LPCWSTR, POINTER(c_int)]
+ CommandLineToArgvW.restype = POINTER(LPWSTR)
+
+ cmd = GetCommandLineW()
+ argc = c_int(0)
+ argv = CommandLineToArgvW(cmd, byref(argc))
+ if argc.value > 0:
+ # Remove Python executable and commands if present
+ start = argc.value - len(sys.argv)
+ return [argv[i] for i in
+ xrange(start, argc.value)]
+ # if we don't have any arguments at all, just pass back script name
+ # this should never happen
+ return [u"mobidedrm.py"]
+ else:
+ argvencoding = sys.stdin.encoding
+ if argvencoding == None:
+ argvencoding = "utf-8"
+ return [arg if (type(arg) == unicode) else unicode(arg,argvencoding) for arg in sys.argv]
+
+Des = None
+if iswindows:
+ # first try with pycrypto
+ if inCalibre:
+ from calibre_plugins.dedrm import pycrypto_des
+ else:
+ import pycrypto_des
+ Des = pycrypto_des.load_pycrypto()
+ if Des == None:
+ # they try with openssl
+ if inCalibre:
+ from calibre_plugins.dedrm import openssl_des
else:
- end = min(self.docSize, end)
- foundat = -1
- for j in xrange(pos, end):
- item = self.docList[j]
- if item.find('=') >= 0:
- (name, argres) = item.split('=',1)
- else :
- name = item
- argres = ''
- if name.endswith(tagpath) :
- result = argres
- foundat = j
- break
- return foundat, result
-
-
- # return list of start positions for the tagpath
- def posinDoc(self, tagpath):
- startpos = []
- pos = 0
- res = ""
- while res != None :
- (foundpos, res) = self.findinDoc(tagpath, pos, -1)
- if res != None :
- startpos.append(foundpos)
- pos = foundpos + 1
- return startpos
-
-
- # returns a vector of integers for the tagpath
- def getData(self, tagpath, pos, end):
- argres=[]
- (foundat, argt) = self.findinDoc(tagpath, pos, end)
- if (argt != None) and (len(argt) > 0) :
- argList = argt.split('|')
- argres = [ int(strval) for strval in argList]
- return argres
-
-
- # get the class
- def getClass(self, pclass):
- nclass = pclass
-
- # class names are an issue given topaz may start them with numerals (not allowed),
- # use a mix of cases (which cause some browsers problems), and actually
- # attach numbers after "_reclustered*" to the end to deal classeses that inherit
- # from a base class (but then not actually provide all of these _reclustereed
- # classes in the stylesheet!
-
- # so we clean this up by lowercasing, prepend 'cl-', and getting any baseclass
- # that exists in the stylesheet first, and then adding this specific class
- # after
-
- # also some class names have spaces in them so need to convert to dashes
- if nclass != None :
- nclass = nclass.replace(' ','-')
- classres = ''
- nclass = nclass.lower()
- nclass = 'cl-' + nclass
- baseclass = ''
- # graphic is the base class for captions
- if nclass.find('cl-cap-') >=0 :
- classres = 'graphic' + ' '
- else :
- # strip to find baseclass
- p = nclass.find('_')
- if p > 0 :
- baseclass = nclass[0:p]
- if baseclass in self.classList:
- classres += baseclass + ' '
- classres += nclass
- nclass = classres
- return nclass
-
-
- # develop a sorted description of the starting positions of
- # groups and regions on the page, as well as the page type
- def PageDescription(self):
-
- def compare(x, y):
- (xtype, xval) = x
- (ytype, yval) = y
- if xval > yval:
- return 1
- if xval == yval:
- return 0
- return -1
-
- result = []
- (pos, pagetype) = self.findinDoc('page.type',0,-1)
-
- groupList = self.posinDoc('page.group')
- groupregionList = self.posinDoc('page.group.region')
- pageregionList = self.posinDoc('page.region')
- # integrate into one list
- for j in groupList:
- result.append(('grpbeg',j))
- for j in groupregionList:
- result.append(('gregion',j))
- for j in pageregionList:
- result.append(('pregion',j))
- result.sort(compare)
-
- # insert group end and page end indicators
- inGroup = False
- j = 0
- while True:
- if j == len(result): break
- rtype = result[j][0]
- rval = result[j][1]
- if not inGroup and (rtype == 'grpbeg') :
- inGroup = True
- j = j + 1
- elif inGroup and (rtype in ('grpbeg', 'pregion')):
- result.insert(j,('grpend',rval))
- inGroup = False
+ import openssl_des
+ Des = openssl_des.load_libcrypto()
+else:
+ # first try with openssl
+ if inCalibre:
+ from calibre_plugins.dedrm import openssl_des
+ else:
+ import openssl_des
+ Des = openssl_des.load_libcrypto()
+ if Des == None:
+ # then try with pycrypto
+ if inCalibre:
+ from calibre_plugins.dedrm import pycrypto_des
+ else:
+ import pycrypto_des
+ Des = pycrypto_des.load_pycrypto()
+
+# if that did not work then use pure python implementation
+# of DES and try to speed it up with Psycho
+if Des == None:
+ if inCalibre:
+ from calibre_plugins.dedrm import python_des
+ else:
+ import python_des
+ Des = python_des.Des
+ # Import Psyco if available
+ try:
+ # http://psyco.sourceforge.net
+ import psyco
+ psyco.full()
+ except ImportError:
+ pass
+
+try:
+ from hashlib import sha1
+except ImportError:
+ # older Python release
+ import sha
+ sha1 = lambda s: sha.new(s)
+
+import cgi
+import logging
+
+logging.basicConfig()
+#logging.basicConfig(level=logging.DEBUG)
+
+
+class Sectionizer(object):
+ bkType = "Book"
+
+ def __init__(self, filename, ident):
+ self.contents = file(filename, 'rb').read()
+ self.header = self.contents[0:72]
+ self.num_sections, = struct.unpack('>H', self.contents[76:78])
+ # Dictionary or normal content (TODO: Not hard-coded)
+ if self.header[0x3C:0x3C+8] != ident:
+ if self.header[0x3C:0x3C+8] == "PDctPPrs":
+ self.bkType = "Dict"
else:
- j = j + 1
- if inGroup:
- result.append(('grpend',-1))
- result.append(('pageend', -1))
- return pagetype, result
-
-
-
- # build a description of the paragraph
- def getParaDescription(self, start, end, regtype):
-
- result = []
-
- # paragraph
- (pos, pclass) = self.findinDoc('paragraph.class',start,end)
-
- pclass = self.getClass(pclass)
-
- # if paragraph uses extratokens (extra glyphs) then make it fixed
- (pos, extraglyphs) = self.findinDoc('paragraph.extratokens',start,end)
-
- # build up a description of the paragraph in result and return it
- # first check for the basic - all words paragraph
- (pos, sfirst) = self.findinDoc('paragraph.firstWord',start,end)
- (pos, slast) = self.findinDoc('paragraph.lastWord',start,end)
- if (sfirst != None) and (slast != None) :
- first = int(sfirst)
- last = int(slast)
-
- makeImage = (regtype == 'vertical') or (regtype == 'table')
- makeImage = makeImage or (extraglyphs != None)
- if self.fixedimage:
- makeImage = makeImage or (regtype == 'fixed')
-
- if (pclass != None):
- makeImage = makeImage or (pclass.find('.inverted') >= 0)
- if self.fixedimage :
- makeImage = makeImage or (pclass.find('cl-f-') >= 0)
-
- # before creating an image make sure glyph info exists
- gidList = self.getData('info.glyph.glyphID',0,-1)
-
- makeImage = makeImage & (len(gidList) > 0)
-
- if not makeImage :
- # standard all word paragraph
- for wordnum in xrange(first, last):
- result.append(('ocr', wordnum))
- return pclass, result
-
- # convert paragraph to svg image
- # translate first and last word into first and last glyphs
- # and generate inline image and include it
- glyphList = []
- firstglyphList = self.getData('word.firstGlyph',0,-1)
- gidList = self.getData('info.glyph.glyphID',0,-1)
- firstGlyph = firstglyphList[first]
- if last < len(firstglyphList):
- lastGlyph = firstglyphList[last]
- else :
- lastGlyph = len(gidList)
-
- # handle case of white sapce paragraphs with no actual glyphs in them
- # by reverting to text based paragraph
- if firstGlyph >= lastGlyph:
- # revert to standard text based paragraph
- for wordnum in xrange(first, last):
- result.append(('ocr', wordnum))
- return pclass, result
-
- for glyphnum in xrange(firstGlyph, lastGlyph):
- glyphList.append(glyphnum)
- # include any extratokens if they exist
- (pos, sfg) = self.findinDoc('extratokens.firstGlyph',start,end)
- (pos, slg) = self.findinDoc('extratokens.lastGlyph',start,end)
- if (sfg != None) and (slg != None):
- for glyphnum in xrange(int(sfg), int(slg)):
- glyphList.append(glyphnum)
- num = self.svgcount
- self.glyphs_to_image(glyphList)
- self.svgcount += 1
- result.append(('svg', num))
- return pclass, result
-
- # this type of paragraph may be made up of multiple spans, inline
- # word monograms (images), and words with semantic meaning,
- # plus glyphs used to form starting letter of first word
-
- # need to parse this type line by line
- line = start + 1
- word_class = ''
-
- # if end is -1 then we must search to end of document
- if end == -1 :
- end = self.docSize
-
- # seems some xml has last* coming before first* so we have to
- # handle any order
- sp_first = -1
- sp_last = -1
-
- gl_first = -1
- gl_last = -1
-
- ws_first = -1
- ws_last = -1
-
- word_class = ''
-
- word_semantic_type = ''
-
- while (line < end) :
-
- (name, argres) = self.lineinDoc(line)
-
- if name.endswith('span.firstWord') :
- sp_first = int(argres)
-
- elif name.endswith('span.lastWord') :
- sp_last = int(argres)
-
- elif name.endswith('word.firstGlyph') :
- gl_first = int(argres)
-
- elif name.endswith('word.lastGlyph') :
- gl_last = int(argres)
-
- elif name.endswith('word_semantic.firstWord'):
- ws_first = int(argres)
-
- elif name.endswith('word_semantic.lastWord'):
- ws_last = int(argres)
-
- elif name.endswith('word.class'):
- # we only handle spaceafter word class
- try:
- (cname, space) = argres.split('-',1)
- if space == '' : space = '0'
- if (cname == 'spaceafter') and (int(space) > 0) :
- word_class = 'sa'
- except:
- pass
-
- elif name.endswith('word.img.src'):
- result.append(('img' + word_class, int(argres)))
- word_class = ''
-
- elif name.endswith('region.img.src'):
- result.append(('img' + word_class, int(argres)))
-
- if (sp_first != -1) and (sp_last != -1):
- for wordnum in xrange(sp_first, sp_last):
- result.append(('ocr', wordnum))
- sp_first = -1
- sp_last = -1
-
- if (gl_first != -1) and (gl_last != -1):
- glyphList = []
- for glyphnum in xrange(gl_first, gl_last):
- glyphList.append(glyphnum)
- num = self.svgcount
- self.glyphs_to_image(glyphList)
- self.svgcount += 1
- result.append(('svg', num))
- gl_first = -1
- gl_last = -1
-
- if (ws_first != -1) and (ws_last != -1):
- for wordnum in xrange(ws_first, ws_last):
- result.append(('ocr', wordnum))
- ws_first = -1
- ws_last = -1
-
- line += 1
-
- return pclass, result
-
-
- def buildParagraph(self, pclass, pdesc, type, regtype) :
- parares = ''
- sep =''
-
- classres = ''
- if pclass :
- classres = ' class="' + pclass + '"'
-
- br_lb = (regtype == 'fixed') or (regtype == 'chapterheading') or (regtype == 'vertical')
-
- handle_links = len(self.link_id) > 0
-
- if (type == 'full') or (type == 'begin') :
- parares += ''
-
- if (type == 'end'):
- parares += ' '
-
- lstart = len(parares)
-
- cnt = len(pdesc)
-
- for j in xrange( 0, cnt) :
-
- (wtype, num) = pdesc[j]
-
- if wtype == 'ocr' :
- word = self.ocrtext[num]
- sep = ' '
-
- if handle_links:
- link = self.link_id[num]
- if (link > 0):
- linktype = self.link_type[link-1]
- title = self.link_title[link-1]
- if (title == "") or (parares.rfind(title) < 0):
- title=parares[lstart:]
- if linktype == 'external' :
- linkhref = self.link_href[link-1]
- linkhtml = '' % linkhref
- else :
- if len(self.link_page) >= link :
- ptarget = self.link_page[link-1] - 1
- linkhtml = '' % ptarget
- else :
- # just link to the current page
- linkhtml = ''
- linkhtml += title + ''
- pos = parares.rfind(title)
- if pos >= 0:
- parares = parares[0:pos] + linkhtml + parares[pos+len(title):]
- else :
- parares += linkhtml
- lstart = len(parares)
- if word == '_link_' : word = ''
- elif (link < 0) :
- if word == '_link_' : word = ''
-
- if word == '_lb_':
- if ((num-1) in self.dehyphen_rootid ) or handle_links:
- word = ''
- sep = ''
- elif br_lb :
- word = '
\n'
- sep = ''
- else :
- word = '\n'
- sep = ''
-
- if num in self.dehyphen_rootid :
- word = word[0:-1]
- sep = ''
-
- parares += word + sep
-
- elif wtype == 'img' :
- sep = ''
- parares += '' % num
- parares += sep
-
- elif wtype == 'imgsa' :
- sep = ' '
- parares += '' % num
- parares += sep
-
- elif wtype == 'svg' :
- sep = ''
- parares += '' % num
- parares += sep
-
- if len(sep) > 0 : parares = parares[0:-1]
- if (type == 'full') or (type == 'end') :
- parares += '
'
- return parares
-
-
- def buildTOCEntry(self, pdesc) :
- parares = ''
- sep =''
- tocentry = ''
- handle_links = len(self.link_id) > 0
-
- lstart = 0
-
- cnt = len(pdesc)
- for j in xrange( 0, cnt) :
-
- (wtype, num) = pdesc[j]
-
- if wtype == 'ocr' :
- word = self.ocrtext[num]
- sep = ' '
-
- if handle_links:
- link = self.link_id[num]
- if (link > 0):
- linktype = self.link_type[link-1]
- title = self.link_title[link-1]
- title = title.rstrip('. ')
- alt_title = parares[lstart:]
- alt_title = alt_title.strip()
- # now strip off the actual printed page number
- alt_title = alt_title.rstrip('01234567890ivxldIVXLD-.')
- alt_title = alt_title.rstrip('. ')
- # skip over any external links - can't have them in a books toc
- if linktype == 'external' :
- title = ''
- alt_title = ''
- linkpage = ''
- else :
- if len(self.link_page) >= link :
- ptarget = self.link_page[link-1] - 1
- linkpage = '%04d' % ptarget
- else :
- # just link to the current page
- linkpage = self.id[4:]
- if len(alt_title) >= len(title):
- title = alt_title
- if title != '' and linkpage != '':
- tocentry += title + '|' + linkpage + '\n'
- lstart = len(parares)
- if word == '_link_' : word = ''
- elif (link < 0) :
- if word == '_link_' : word = ''
-
- if word == '_lb_':
- word = ''
- sep = ''
-
- if num in self.dehyphen_rootid :
- word = word[0:-1]
- sep = ''
-
- parares += word + sep
-
- else :
- continue
-
- return tocentry
-
-
-
-
- # walk the document tree collecting the information needed
- # to build an html page using the ocrText
-
- def process(self):
-
- tocinfo = ''
- hlst = []
-
- # get the ocr text
- (pos, argres) = self.findinDoc('info.word.ocrText',0,-1)
- if argres : self.ocrtext = argres.split('|')
-
- # get information to dehyphenate the text
- self.dehyphen_rootid = self.getData('info.dehyphen.rootID',0,-1)
-
- # determine if first paragraph is continued from previous page
- (pos, self.parastems_stemid) = self.findinDoc('info.paraStems.stemID',0,-1)
- first_para_continued = (self.parastems_stemid != None)
-
- # determine if last paragraph is continued onto the next page
- (pos, self.paracont_stemid) = self.findinDoc('info.paraCont.stemID',0,-1)
- last_para_continued = (self.paracont_stemid != None)
-
- # collect link ids
- self.link_id = self.getData('info.word.link_id',0,-1)
-
- # collect link destination page numbers
- self.link_page = self.getData('info.links.page',0,-1)
-
- # collect link types (container versus external)
- (pos, argres) = self.findinDoc('info.links.type',0,-1)
- if argres : self.link_type = argres.split('|')
-
- # collect link destinations
- (pos, argres) = self.findinDoc('info.links.href',0,-1)
- if argres : self.link_href = argres.split('|')
-
- # collect link titles
- (pos, argres) = self.findinDoc('info.links.title',0,-1)
- if argres :
- self.link_title = argres.split('|')
+ raise ValueError('Invalid file format')
+ self.sections = []
+ for i in xrange(self.num_sections):
+ offset, a1,a2,a3,a4 = struct.unpack('>LBBBB', self.contents[78+i*8:78+i*8+8])
+ flags, val = a1, a2<<16|a3<<8|a4
+ self.sections.append( (offset, flags, val) )
+ def loadSection(self, section):
+ if section + 1 == self.num_sections:
+ end_off = len(self.contents)
else:
- self.link_title.append('')
-
- # get a descriptions of the starting points of the regions
- # and groups on the page
- (pagetype, pageDesc) = self.PageDescription()
- regcnt = len(pageDesc) - 1
-
- anchorSet = False
- breakSet = False
- inGroup = False
-
- # process each region on the page and convert what you can to html
-
- for j in xrange(regcnt):
-
- (etype, start) = pageDesc[j]
- (ntype, end) = pageDesc[j+1]
+ end_off = self.sections[section + 1][0]
+ off = self.sections[section][0]
+ return self.contents[off:end_off]
+
+# cleanup unicode filenames
+# borrowed from calibre from calibre/src/calibre/__init__.py
+# added in removal of control (<32) chars
+# and removal of . at start and end
+# and with some (heavily edited) code from Paul Durrant's kindlenamer.py
+def sanitizeFileName(name):
+ # substitute filename unfriendly characters
+ name = name.replace(u"<",u"[").replace(u">",u"]").replace(u" : ",u" – ").replace(u": ",u" – ").replace(u":",u"—").replace(u"/",u"_").replace(u"\\",u"_").replace(u"|",u"_").replace(u"\"",u"\'")
+ # delete control characters
+ name = u"".join(char for char in name if ord(char)>=32)
+ # white space to single space, delete leading and trailing while space
+ name = re.sub(ur"\s", u" ", name).strip()
+ # remove leading dots
+ while len(name)>0 and name[0] == u".":
+ name = name[1:]
+ # remove trailing dots (Windows doesn't like them)
+ if name.endswith(u'.'):
+ name = name[:-1]
+ return name
+
+def fixKey(key):
+ def fixByte(b):
+ return b ^ ((b ^ (b<<1) ^ (b<<2) ^ (b<<3) ^ (b<<4) ^ (b<<5) ^ (b<<6) ^ (b<<7) ^ 0x80) & 0x80)
+ return "".join([chr(fixByte(ord(a))) for a in key])
+
+def deXOR(text, sp, table):
+ r=''
+ j = sp
+ for i in xrange(len(text)):
+ r += chr(ord(table[j]) ^ ord(text[i]))
+ j = j + 1
+ if j == len(table):
+ j = 0
+ return r
+
+class EreaderProcessor(object):
+ def __init__(self, sect, user_key):
+ self.section_reader = sect.loadSection
+ data = self.section_reader(0)
+ version, = struct.unpack('>H', data[0:2])
+ self.version = version
+ logging.info('eReader file format version %s', version)
+ if version != 272 and version != 260 and version != 259:
+ raise ValueError('incorrect eReader version %d (error 1)' % version)
+ data = self.section_reader(1)
+ self.data = data
+ des = Des(fixKey(data[0:8]))
+ cookie_shuf, cookie_size = struct.unpack('>LL', des.decrypt(data[-8:]))
+ if cookie_shuf < 3 or cookie_shuf > 0x14 or cookie_size < 0xf0 or cookie_size > 0x200:
+ raise ValueError('incorrect eReader version (error 2)')
+ input = des.decrypt(data[-cookie_size:])
+ def unshuff(data, shuf):
+ r = [''] * len(data)
+ j = 0
+ for i in xrange(len(data)):
+ j = (j + shuf) % len(data)
+ r[j] = data[i]
+ assert len("".join(r)) == len(data)
+ return "".join(r)
+ r = unshuff(input[0:-8], cookie_shuf)
+
+ drm_sub_version = struct.unpack('>H', r[0:2])[0]
+ self.num_text_pages = struct.unpack('>H', r[2:4])[0] - 1
+ self.num_image_pages = struct.unpack('>H', r[26:26+2])[0]
+ self.first_image_page = struct.unpack('>H', r[24:24+2])[0]
+ # Default values
+ self.num_footnote_pages = 0
+ self.num_sidebar_pages = 0
+ self.first_footnote_page = -1
+ self.first_sidebar_page = -1
+ if self.version == 272:
+ self.num_footnote_pages = struct.unpack('>H', r[46:46+2])[0]
+ self.first_footnote_page = struct.unpack('>H', r[44:44+2])[0]
+ if (sect.bkType == "Book"):
+ self.num_sidebar_pages = struct.unpack('>H', r[38:38+2])[0]
+ self.first_sidebar_page = struct.unpack('>H', r[36:36+2])[0]
+ # self.num_bookinfo_pages = struct.unpack('>H', r[34:34+2])[0]
+ # self.first_bookinfo_page = struct.unpack('>H', r[32:32+2])[0]
+ # self.num_chapter_pages = struct.unpack('>H', r[22:22+2])[0]
+ # self.first_chapter_page = struct.unpack('>H', r[20:20+2])[0]
+ # self.num_link_pages = struct.unpack('>H', r[30:30+2])[0]
+ # self.first_link_page = struct.unpack('>H', r[28:28+2])[0]
+ # self.num_xtextsize_pages = struct.unpack('>H', r[54:54+2])[0]
+ # self.first_xtextsize_page = struct.unpack('>H', r[52:52+2])[0]
+
+ # **before** data record 1 was decrypted and unshuffled, it contained data
+ # to create an XOR table and which is used to fix footnote record 0, link records, chapter records, etc
+ self.xortable_offset = struct.unpack('>H', r[40:40+2])[0]
+ self.xortable_size = struct.unpack('>H', r[42:42+2])[0]
+ self.xortable = self.data[self.xortable_offset:self.xortable_offset + self.xortable_size]
+ else:
+ # Nothing needs to be done
+ pass
+ # self.num_bookinfo_pages = 0
+ # self.num_chapter_pages = 0
+ # self.num_link_pages = 0
+ # self.num_xtextsize_pages = 0
+ # self.first_bookinfo_page = -1
+ # self.first_chapter_page = -1
+ # self.first_link_page = -1
+ # self.first_xtextsize_page = -1
+
+ logging.debug('self.num_text_pages %d', self.num_text_pages)
+ logging.debug('self.num_footnote_pages %d, self.first_footnote_page %d', self.num_footnote_pages , self.first_footnote_page)
+ logging.debug('self.num_sidebar_pages %d, self.first_sidebar_page %d', self.num_sidebar_pages , self.first_sidebar_page)
+ self.flags = struct.unpack('>L', r[4:8])[0]
+ reqd_flags = (1<<9) | (1<<7) | (1<<10)
+ if (self.flags & reqd_flags) != reqd_flags:
+ print "Flags: 0x%X" % self.flags
+ raise ValueError('incompatible eReader file')
+ des = Des(fixKey(user_key))
+ if version == 259:
+ if drm_sub_version != 7:
+ raise ValueError('incorrect eReader version %d (error 3)' % drm_sub_version)
+ encrypted_key_sha = r[44:44+20]
+ encrypted_key = r[64:64+8]
+ elif version == 260:
+ if drm_sub_version != 13 and drm_sub_version != 11:
+ raise ValueError('incorrect eReader version %d (error 3)' % drm_sub_version)
+ if drm_sub_version == 13:
+ encrypted_key = r[44:44+8]
+ encrypted_key_sha = r[52:52+20]
+ else:
+ encrypted_key = r[64:64+8]
+ encrypted_key_sha = r[44:44+20]
+ elif version == 272:
+ encrypted_key = r[172:172+8]
+ encrypted_key_sha = r[56:56+20]
+ self.content_key = des.decrypt(encrypted_key)
+ if sha1(self.content_key).digest() != encrypted_key_sha:
+ raise ValueError('Incorrect Name and/or Credit Card')
+
+ def getNumImages(self):
+ return self.num_image_pages
+
+ def getImage(self, i):
+ sect = self.section_reader(self.first_image_page + i)
+ name = sect[4:4+32].strip('\0')
+ data = sect[62:]
+ return sanitizeFileName(unicode(name,'windows-1252')), data
+
+
+ # def getChapterNamePMLOffsetData(self):
+ # cv = ''
+ # if self.num_chapter_pages > 0:
+ # for i in xrange(self.num_chapter_pages):
+ # chaps = self.section_reader(self.first_chapter_page + i)
+ # j = i % self.xortable_size
+ # offname = deXOR(chaps, j, self.xortable)
+ # offset = struct.unpack('>L', offname[0:4])[0]
+ # name = offname[4:].strip('\0')
+ # cv += '%d|%s\n' % (offset, name)
+ # return cv
+
+ # def getLinkNamePMLOffsetData(self):
+ # lv = ''
+ # if self.num_link_pages > 0:
+ # for i in xrange(self.num_link_pages):
+ # links = self.section_reader(self.first_link_page + i)
+ # j = i % self.xortable_size
+ # offname = deXOR(links, j, self.xortable)
+ # offset = struct.unpack('>L', offname[0:4])[0]
+ # name = offname[4:].strip('\0')
+ # lv += '%d|%s\n' % (offset, name)
+ # return lv
+
+ # def getExpandedTextSizesData(self):
+ # ts = ''
+ # if self.num_xtextsize_pages > 0:
+ # tsize = deXOR(self.section_reader(self.first_xtextsize_page), 0, self.xortable)
+ # for i in xrange(self.num_text_pages):
+ # xsize = struct.unpack('>H', tsize[0:2])[0]
+ # ts += "%d\n" % xsize
+ # tsize = tsize[2:]
+ # return ts
+
+ # def getBookInfo(self):
+ # bkinfo = ''
+ # if self.num_bookinfo_pages > 0:
+ # info = self.section_reader(self.first_bookinfo_page)
+ # bkinfo = deXOR(info, 0, self.xortable)
+ # bkinfo = bkinfo.replace('\0','|')
+ # bkinfo += '\n'
+ # return bkinfo
+
+ def getText(self):
+ des = Des(fixKey(self.content_key))
+ r = ''
+ for i in xrange(self.num_text_pages):
+ logging.debug('get page %d', i)
+ r += zlib.decompress(des.decrypt(self.section_reader(1 + i)))
+
+ # now handle footnotes pages
+ if self.num_footnote_pages > 0:
+ r += '\n'
+ # the record 0 of the footnote section must pass through the Xor Table to make it useful
+ sect = self.section_reader(self.first_footnote_page)
+ fnote_ids = deXOR(sect, 0, self.xortable)
+ # the remaining records of the footnote sections need to be decoded with the content_key and zlib inflated
+ des = Des(fixKey(self.content_key))
+ for i in xrange(1,self.num_footnote_pages):
+ logging.debug('get footnotepage %d', i)
+ id_len = ord(fnote_ids[2])
+ id = fnote_ids[3:3+id_len]
+ fmarker = '\n' % id
+ fmarker += zlib.decompress(des.decrypt(self.section_reader(self.first_footnote_page + i)))
+ fmarker += '\n\n'
+ r += fmarker
+ fnote_ids = fnote_ids[id_len+4:]
+
+ # TODO: Handle dictionary index (?) pages - which are also marked as
+ # sidebar_pages (?). For now dictionary sidebars are ignored
+ # For dictionaries - record 0 is null terminated strings, followed by
+ # blocks of around 62000 bytes and a final block. Not sure of the
+ # encoding
+
+ # now handle sidebar pages
+ if self.num_sidebar_pages > 0:
+ r += '\n'
+ # the record 0 of the sidebar section must pass through the Xor Table to make it useful
+ sect = self.section_reader(self.first_sidebar_page)
+ sbar_ids = deXOR(sect, 0, self.xortable)
+ # the remaining records of the sidebar sections need to be decoded with the content_key and zlib inflated
+ des = Des(fixKey(self.content_key))
+ for i in xrange(1,self.num_sidebar_pages):
+ id_len = ord(sbar_ids[2])
+ id = sbar_ids[3:3+id_len]
+ smarker = '\n' % id
+ smarker += zlib.decompress(des.decrypt(self.section_reader(self.first_sidebar_page + i)))
+ smarker += '\n\n'
+ r += smarker
+ sbar_ids = sbar_ids[id_len+4:]
+
+ return r
+
+def cleanPML(pml):
+ # Convert special characters to proper PML code. High ASCII start at (\x80, \a128) and go up to (\xff, \a255)
+ pml2 = pml
+ for k in xrange(128,256):
+ badChar = chr(k)
+ pml2 = pml2.replace(badChar, '\\a%03d' % k)
+ return pml2
+
+def decryptBook(infile, outpath, make_pmlz, user_key):
+ bookname = os.path.splitext(os.path.basename(infile))[0]
+ if make_pmlz:
+ # outpath is actually pmlz name
+ pmlzname = outpath
+ outdir = tempfile.mkdtemp()
+ imagedirpath = os.path.join(outdir,u"images")
+ else:
+ pmlzname = None
+ outdir = outpath
+ imagedirpath = os.path.join(outdir,bookname + u"_img")
+
+ try:
+ if not os.path.exists(outdir):
+ os.makedirs(outdir)
+ print u"Decoding File"
+ sect = Sectionizer(infile, 'PNRdPPrs')
+ er = EreaderProcessor(sect, user_key)
+
+ if er.getNumImages() > 0:
+ print u"Extracting images"
+ if not os.path.exists(imagedirpath):
+ os.makedirs(imagedirpath)
+ for i in xrange(er.getNumImages()):
+ name, contents = er.getImage(i)
+ file(os.path.join(imagedirpath, name), 'wb').write(contents)
+
+ print u"Extracting pml"
+ pml_string = er.getText()
+ pmlfilename = bookname + ".pml"
+ file(os.path.join(outdir, pmlfilename),'wb').write(cleanPML(pml_string))
+ if pmlzname is not None:
+ import zipfile
+ import shutil
+ print u"Creating PMLZ file {0}".format(os.path.basename(pmlzname))
+ myZipFile = zipfile.ZipFile(pmlzname,'w',zipfile.ZIP_STORED, False)
+ list = os.listdir(outdir)
+ for filename in list:
+ localname = filename
+ filePath = os.path.join(outdir,filename)
+ if os.path.isfile(filePath):
+ myZipFile.write(filePath, localname)
+ elif os.path.isdir(filePath):
+ imageList = os.listdir(filePath)
+ localimgdir = os.path.basename(filePath)
+ for image in imageList:
+ localname = os.path.join(localimgdir,image)
+ imagePath = os.path.join(filePath,image)
+ if os.path.isfile(imagePath):
+ myZipFile.write(imagePath, localname)
+ myZipFile.close()
+ # remove temporary directory
+ shutil.rmtree(outdir, True)
+ print u"Output is {0}".format(pmlzname)
+ else :
+ print u"Output is in {0}".format(outdir)
+ print "done"
+ except ValueError, e:
+ print u"Error: {0}".format(e)
+ traceback.print_exc()
+ return 1
+ return 0
+
+
+def usage():
+ print u"Converts DRMed eReader books to PML Source"
+ print u"Usage:"
+ print u" erdr2pml [options] infile.pdb [outpath] \"your name\" credit_card_number"
+ print u" "
+ print u"Options: "
+ print u" -h prints this message"
+ print u" -p create PMLZ instead of source folder"
+ print u" --make-pmlz create PMLZ instead of source folder"
+ print u" "
+ print u"Note:"
+ print u" if outpath is ommitted, creates source in 'infile_Source' folder"
+ print u" if outpath is ommitted and pmlz option, creates PMLZ 'infile.pmlz'"
+ print u" if source folder created, images are in infile_img folder"
+ print u" if pmlz file created, images are in images folder"
+ print u" It's enough to enter the last 8 digits of the credit card number"
+ return
+
+def getuser_key(name,cc):
+ newname = "".join(c for c in name.lower() if c >= 'a' and c <= 'z' or c >= '0' and c <= '9')
+ cc = cc.replace(" ","")
+ return struct.pack('>LL', binascii.crc32(newname) & 0xffffffff,binascii.crc32(cc[-8:])& 0xffffffff)
+
+def cli_main():
+ print u"eRdr2Pml v{0}. Copyright © 2009–2012 The Dark Reverser et al.".format(__version__)
+
+ argv=unicode_argv()
+ try:
+ opts, args = getopt.getopt(argv[1:], "hp", ["make-pmlz"])
+ except getopt.GetoptError, err:
+ print err.args[0]
+ usage()
+ return 1
+ make_pmlz = False
+ for o, a in opts:
+ if o == "-h":
+ usage()
+ return 0
+ elif o == "-p":
+ make_pmlz = True
+ elif o == "--make-pmlz":
+ make_pmlz = True
+
+ if len(args)!=3 and len(args)!=4:
+ usage()
+ return 1
+
+ if len(args)==3:
+ infile, name, cc = args
+ if make_pmlz:
+ outpath = os.path.splitext(infile)[0] + u".pmlz"
+ else:
+ outpath = os.path.splitext(infile)[0] + u"_Source"
+ elif len(args)==4:
+ infile, outpath, name, cc = args
+ print getuser_key(name,cc).encode('hex')
- # set anchor for link target on this page
- if not anchorSet and not first_para_continued:
- hlst.append('\n')
- anchorSet = True
+ return decryptBook(infile, outpath, make_pmlz, getuser_key(name,cc))
- # handle groups of graphics with text captions
- if (etype == 'grpbeg'):
- (pos, grptype) = self.findinDoc('group.type', start, end)
- if grptype != None:
- if grptype == 'graphic':
- gcstr = ' class="' + grptype + '"'
- hlst.append('')
- inGroup = True
- elif (etype == 'grpend'):
- if inGroup:
- hlst.append('
\n')
- inGroup = False
+if __name__ == "__main__":
+ sys.stdout=SafeUnbuffered(sys.stdout)
+ sys.stderr=SafeUnbuffered(sys.stderr)
+ sys.exit(cli_main())
- else:
- (pos, regtype) = self.findinDoc('region.type',start,end)
-
- if regtype == 'graphic' :
- (pos, simgsrc) = self.findinDoc('img.src',start,end)
- if simgsrc:
- if inGroup:
- hlst.append('' % int(simgsrc))
- else:
- hlst.append('' % int(simgsrc))
-
- elif regtype == 'chapterheading' :
- (pclass, pdesc) = self.getParaDescription(start,end, regtype)
- if not breakSet:
- hlst.append('
\n')
- breakSet = True
- tag = 'h1'
- if pclass and (len(pclass) >= 7):
- if pclass[3:7] == 'ch1-' : tag = 'h1'
- if pclass[3:7] == 'ch2-' : tag = 'h2'
- if pclass[3:7] == 'ch3-' : tag = 'h3'
- hlst.append('<' + tag + ' class="' + pclass + '">')
- else:
- hlst.append('<' + tag + '>')
- hlst.append(self.buildParagraph(pclass, pdesc, 'middle', regtype))
- hlst.append('' + tag + '>')
-
- elif (regtype == 'text') or (regtype == 'fixed') or (regtype == 'insert') or (regtype == 'listitem'):
- ptype = 'full'
- # check to see if this is a continution from the previous page
- if first_para_continued :
- ptype = 'end'
- first_para_continued = False
- (pclass, pdesc) = self.getParaDescription(start,end, regtype)
- if pclass and (len(pclass) >= 6) and (ptype == 'full'):
- tag = 'p'
- if pclass[3:6] == 'h1-' : tag = 'h4'
- if pclass[3:6] == 'h2-' : tag = 'h5'
- if pclass[3:6] == 'h3-' : tag = 'h6'
- hlst.append('<' + tag + ' class="' + pclass + '">')
- hlst.append(self.buildParagraph(pclass, pdesc, 'middle', regtype))
- hlst.append('' + tag + '>')
- else :
- hlst.append(self.buildParagraph(pclass, pdesc, ptype, regtype))
-
- elif (regtype == 'tocentry') :
- ptype = 'full'
- if first_para_continued :
- ptype = 'end'
- first_para_continued = False
- (pclass, pdesc) = self.getParaDescription(start,end, regtype)
- tocinfo += self.buildTOCEntry(pdesc)
- hlst.append(self.buildParagraph(pclass, pdesc, ptype, regtype))
-
- elif (regtype == 'vertical') or (regtype == 'table') :
- ptype = 'full'
- if inGroup:
- ptype = 'middle'
- if first_para_continued :
- ptype = 'end'
- first_para_continued = False
- (pclass, pdesc) = self.getParaDescription(start, end, regtype)
- hlst.append(self.buildParagraph(pclass, pdesc, ptype, regtype))
-
-
- elif (regtype == 'synth_fcvr.center'):
- (pos, simgsrc) = self.findinDoc('img.src',start,end)
- if simgsrc:
- hlst.append('' % int(simgsrc))
-
- else :
- print ' Making region type', regtype,
- (pos, temp) = self.findinDoc('paragraph',start,end)
- (pos2, temp) = self.findinDoc('span',start,end)
- if pos != -1 or pos2 != -1:
- print ' a "text" region'
- orig_regtype = regtype
- regtype = 'fixed'
- ptype = 'full'
- # check to see if this is a continution from the previous page
- if first_para_continued :
- ptype = 'end'
- first_para_continued = False
- (pclass, pdesc) = self.getParaDescription(start,end, regtype)
- if not pclass:
- if orig_regtype.endswith('.right') : pclass = 'cl-right'
- elif orig_regtype.endswith('.center') : pclass = 'cl-center'
- elif orig_regtype.endswith('.left') : pclass = 'cl-left'
- elif orig_regtype.endswith('.justify') : pclass = 'cl-justify'
- if pclass and (ptype == 'full') and (len(pclass) >= 6):
- tag = 'p'
- if pclass[3:6] == 'h1-' : tag = 'h4'
- if pclass[3:6] == 'h2-' : tag = 'h5'
- if pclass[3:6] == 'h3-' : tag = 'h6'
- hlst.append('<' + tag + ' class="' + pclass + '">')
- hlst.append(self.buildParagraph(pclass, pdesc, 'middle', regtype))
- hlst.append('' + tag + '>')
- else :
- hlst.append(self.buildParagraph(pclass, pdesc, ptype, regtype))
- else :
- print ' a "graphic" region'
- (pos, simgsrc) = self.findinDoc('img.src',start,end)
- if simgsrc:
- hlst.append('' % int(simgsrc))
-
-
- htmlpage = "".join(hlst)
- if last_para_continued :
- if htmlpage[-4:] == '
':
- htmlpage = htmlpage[0:-4]
- last_para_continued = False
-
- return htmlpage, tocinfo
-
-
-def convert2HTML(flatxml, classlst, fileid, bookDir, gdict, fixedimage):
- # create a document parser
- dp = DocParser(flatxml, classlst, fileid, bookDir, gdict, fixedimage)
- htmlpage, tocinfo = dp.process()
- return htmlpage, tocinfo
diff --git a/DeDRM_calibre_plugin/DeDRM_plugin/flatxml2html.py b/DeDRM_calibre_plugin/DeDRM_plugin/flatxml2html.py
index 4dfd6c7..991591b 100644
--- a/DeDRM_calibre_plugin/DeDRM_plugin/flatxml2html.py
+++ b/DeDRM_calibre_plugin/DeDRM_plugin/flatxml2html.py
@@ -1,63 +1,127 @@
#! /usr/bin/python
# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
+# For use with Topaz Scripts Version 2.6
import sys
import csv
import os
+import math
import getopt
from struct import pack
from struct import unpack
-class PParser(object):
- def __init__(self, gd, flatxml, meta_array):
- self.gd = gd
- self.flatdoc = flatxml.split('\n')
- self.docSize = len(self.flatdoc)
- self.temp = []
-
- self.ph = -1
- self.pw = -1
- startpos = self.posinDoc('page.h') or self.posinDoc('book.h')
- for p in startpos:
- (name, argres) = self.lineinDoc(p)
- self.ph = max(self.ph, int(argres))
- startpos = self.posinDoc('page.w') or self.posinDoc('book.w')
- for p in startpos:
- (name, argres) = self.lineinDoc(p)
- self.pw = max(self.pw, int(argres))
-
- if self.ph <= 0:
- self.ph = int(meta_array.get('pageHeight', '11000'))
- if self.pw <= 0:
- self.pw = int(meta_array.get('pageWidth', '8500'))
-
- res = []
- startpos = self.posinDoc('info.glyph.x')
- for p in startpos:
- argres = self.getDataatPos('info.glyph.x', p)
- res.extend(argres)
- self.gx = res
-
- res = []
- startpos = self.posinDoc('info.glyph.y')
- for p in startpos:
- argres = self.getDataatPos('info.glyph.y', p)
- res.extend(argres)
- self.gy = res
-
- res = []
- startpos = self.posinDoc('info.glyph.glyphID')
- for p in startpos:
- argres = self.getDataatPos('info.glyph.glyphID', p)
- res.extend(argres)
- self.gid = res
+class DocParser(object):
+ def __init__(self, flatxml, classlst, fileid, bookDir, gdict, fixedimage):
+ self.id = os.path.basename(fileid).replace('.dat','')
+ self.svgcount = 0
+ self.docList = flatxml.split('\n')
+ self.docSize = len(self.docList)
+ self.classList = {}
+ self.bookDir = bookDir
+ self.gdict = gdict
+ tmpList = classlst.split('\n')
+ for pclass in tmpList:
+ if pclass != '':
+ # remove the leading period from the css name
+ cname = pclass[1:]
+ self.classList[cname] = True
+ self.fixedimage = fixedimage
+ self.ocrtext = []
+ self.link_id = []
+ self.link_title = []
+ self.link_page = []
+ self.link_href = []
+ self.link_type = []
+ self.dehyphen_rootid = []
+ self.paracont_stemid = []
+ self.parastems_stemid = []
+
+
+ def getGlyph(self, gid):
+ result = ''
+ id='id="gl%d"' % gid
+ return self.gdict.lookup(id)
+
+ def glyphs_to_image(self, glyphList):
+
+ def extract(path, key):
+ b = path.find(key) + len(key)
+ e = path.find(' ',b)
+ return int(path[b:e])
+
+ svgDir = os.path.join(self.bookDir,'svg')
+
+ imgDir = os.path.join(self.bookDir,'img')
+ imgname = self.id + '_%04d.svg' % self.svgcount
+ imgfile = os.path.join(imgDir,imgname)
+
+ # get glyph information
+ gxList = self.getData('info.glyph.x',0,-1)
+ gyList = self.getData('info.glyph.y',0,-1)
+ gidList = self.getData('info.glyph.glyphID',0,-1)
+
+ gids = []
+ maxws = []
+ maxhs = []
+ xs = []
+ ys = []
+ gdefs = []
+
+ # get path defintions, positions, dimensions for each glyph
+ # that makes up the image, and find min x and min y to reposition origin
+ minx = -1
+ miny = -1
+ for j in glyphList:
+ gid = gidList[j]
+ gids.append(gid)
+
+ xs.append(gxList[j])
+ if minx == -1: minx = gxList[j]
+ else : minx = min(minx, gxList[j])
+
+ ys.append(gyList[j])
+ if miny == -1: miny = gyList[j]
+ else : miny = min(miny, gyList[j])
+
+ path = self.getGlyph(gid)
+ gdefs.append(path)
+
+ maxws.append(extract(path,'width='))
+ maxhs.append(extract(path,'height='))
+
+
+ # change the origin to minx, miny and calc max height and width
+ maxw = maxws[0] + xs[0] - minx
+ maxh = maxhs[0] + ys[0] - miny
+ for j in xrange(0, len(xs)):
+ xs[j] = xs[j] - minx
+ ys[j] = ys[j] - miny
+ maxw = max( maxw, (maxws[j] + xs[j]) )
+ maxh = max( maxh, (maxhs[j] + ys[j]) )
+
+ # open the image file for output
+ ifile = open(imgfile,'w')
+ ifile.write('\n')
+ ifile.write('\n')
+ ifile.write('')
+ ifile.close()
+
+ return 0
+
# return tag at line pos in document
def lineinDoc(self, pos) :
if (pos >= 0) and (pos < self.docSize) :
- item = self.flatdoc[pos]
+ item = self.docList[pos]
if item.find('=') >= 0:
(name, argres) = item.split('=',1)
else :
@@ -65,6 +129,7 @@ class PParser(object):
argres = ''
return name, argres
+
# find tag in doc if within pos to end inclusive
def findinDoc(self, tagpath, pos, end) :
result = None
@@ -74,7 +139,7 @@ class PParser(object):
end = min(self.docSize, end)
foundat = -1
for j in xrange(pos, end):
- item = self.flatdoc[j]
+ item = self.docList[j]
if item.find('=') >= 0:
(name, argres) = item.split('=',1)
else :
@@ -86,6 +151,7 @@ class PParser(object):
break
return foundat, result
+
# return list of start positions for the tagpath
def posinDoc(self, tagpath):
startpos = []
@@ -98,152 +164,638 @@ class PParser(object):
pos = foundpos + 1
return startpos
- def getData(self, path):
- result = None
- cnt = len(self.flatdoc)
- for j in xrange(cnt):
- item = self.flatdoc[j]
- if item.find('=') >= 0:
- (name, argt) = item.split('=')
- argres = argt.split('|')
- else:
- name = item
- argres = []
- if (name.endswith(path)):
- result = argres
- break
- if (len(argres) > 0) :
- for j in xrange(0,len(argres)):
- argres[j] = int(argres[j])
- return result
- def getDataatPos(self, path, pos):
- result = None
- item = self.flatdoc[pos]
- if item.find('=') >= 0:
- (name, argt) = item.split('=')
- argres = argt.split('|')
- else:
- name = item
- argres = []
- if (len(argres) > 0) :
- for j in xrange(0,len(argres)):
- argres[j] = int(argres[j])
- if (name.endswith(path)):
- result = argres
- return result
-
- def getDataTemp(self, path):
- result = None
- cnt = len(self.temp)
- for j in xrange(cnt):
- item = self.temp[j]
- if item.find('=') >= 0:
- (name, argt) = item.split('=')
- argres = argt.split('|')
- else:
- name = item
- argres = []
- if (name.endswith(path)):
- result = argres
- self.temp.pop(j)
- break
- if (len(argres) > 0) :
- for j in xrange(0,len(argres)):
- argres[j] = int(argres[j])
- return result
+ # returns a vector of integers for the tagpath
+ def getData(self, tagpath, pos, end):
+ argres=[]
+ (foundat, argt) = self.findinDoc(tagpath, pos, end)
+ if (argt != None) and (len(argt) > 0) :
+ argList = argt.split('|')
+ argres = [ int(strval) for strval in argList]
+ return argres
+
+
+ # get the class
+ def getClass(self, pclass):
+ nclass = pclass
+
+ # class names are an issue given topaz may start them with numerals (not allowed),
+ # use a mix of cases (which cause some browsers problems), and actually
+ # attach numbers after "_reclustered*" to the end to deal classeses that inherit
+ # from a base class (but then not actually provide all of these _reclustereed
+ # classes in the stylesheet!
+
+ # so we clean this up by lowercasing, prepend 'cl-', and getting any baseclass
+ # that exists in the stylesheet first, and then adding this specific class
+ # after
+
+ # also some class names have spaces in them so need to convert to dashes
+ if nclass != None :
+ nclass = nclass.replace(' ','-')
+ classres = ''
+ nclass = nclass.lower()
+ nclass = 'cl-' + nclass
+ baseclass = ''
+ # graphic is the base class for captions
+ if nclass.find('cl-cap-') >=0 :
+ classres = 'graphic' + ' '
+ else :
+ # strip to find baseclass
+ p = nclass.find('_')
+ if p > 0 :
+ baseclass = nclass[0:p]
+ if baseclass in self.classList:
+ classres += baseclass + ' '
+ classres += nclass
+ nclass = classres
+ return nclass
+
+
+ # develop a sorted description of the starting positions of
+ # groups and regions on the page, as well as the page type
+ def PageDescription(self):
+
+ def compare(x, y):
+ (xtype, xval) = x
+ (ytype, yval) = y
+ if xval > yval:
+ return 1
+ if xval == yval:
+ return 0
+ return -1
- def getImages(self):
result = []
- self.temp = self.flatdoc
- while (self.getDataTemp('img') != None):
- h = self.getDataTemp('img.h')[0]
- w = self.getDataTemp('img.w')[0]
- x = self.getDataTemp('img.x')[0]
- y = self.getDataTemp('img.y')[0]
- src = self.getDataTemp('img.src')[0]
- result.append('\n' % (src, x, y, w, h))
- return result
-
- def getGlyphs(self):
+ (pos, pagetype) = self.findinDoc('page.type',0,-1)
+
+ groupList = self.posinDoc('page.group')
+ groupregionList = self.posinDoc('page.group.region')
+ pageregionList = self.posinDoc('page.region')
+ # integrate into one list
+ for j in groupList:
+ result.append(('grpbeg',j))
+ for j in groupregionList:
+ result.append(('gregion',j))
+ for j in pageregionList:
+ result.append(('pregion',j))
+ result.sort(compare)
+
+ # insert group end and page end indicators
+ inGroup = False
+ j = 0
+ while True:
+ if j == len(result): break
+ rtype = result[j][0]
+ rval = result[j][1]
+ if not inGroup and (rtype == 'grpbeg') :
+ inGroup = True
+ j = j + 1
+ elif inGroup and (rtype in ('grpbeg', 'pregion')):
+ result.insert(j,('grpend',rval))
+ inGroup = False
+ else:
+ j = j + 1
+ if inGroup:
+ result.append(('grpend',-1))
+ result.append(('pageend', -1))
+ return pagetype, result
+
+
+
+ # build a description of the paragraph
+ def getParaDescription(self, start, end, regtype):
+
result = []
- if (self.gid != None) and (len(self.gid) > 0):
- glyphs = []
- for j in set(self.gid):
- glyphs.append(j)
- glyphs.sort()
- for gid in glyphs:
- id='id="gl%d"' % gid
- path = self.gd.lookup(id)
- if path:
- result.append(id + ' ' + path)
- return result
-
-
-def convert2SVG(gdict, flat_xml, pageid, previd, nextid, svgDir, raw, meta_array, scaledpi):
- mlst = []
- pp = PParser(gdict, flat_xml, meta_array)
- mlst.append('\n')
- if (raw):
- mlst.append('\n')
- mlst.append('