diff --git a/KindleBooks_Tools/MobiDeDRM.py b/KindleBooks_Tools/MobiDeDRM.py index 0555140..536eb78 100644 --- a/KindleBooks_Tools/MobiDeDRM.py +++ b/KindleBooks_Tools/MobiDeDRM.py @@ -41,12 +41,12 @@ # 0.19 - It seems that multibyte entries aren't encrypted in a v6 file either. # 0.20 - Correction: It seems that multibyte entries are encrypted in a v6 file. # 0.21 - Added support for multiple pids +# 0.22 - revised structure to hold MobiBook as a class to allow an extended interface +# 0.23 - fixed problem with older files with no EXTH section -__version__ = '0.21' +__version__ = '0.23' import sys -import struct -import binascii class Unbuffered: def __init__(self, stream): @@ -56,10 +56,19 @@ class Unbuffered: self.stream.flush() def __getattr__(self, attr): return getattr(self.stream, attr) +sys.stdout=Unbuffered(sys.stdout) + +import struct +import binascii class DrmException(Exception): pass + +# +# MobiBook Utility Routines +# + # Implementation of Pukall Cipher 1 def PC1(key, src, decryption=True): sum1 = 0; @@ -71,7 +80,6 @@ def PC1(key, src, decryption=True): wkey = [] for i in xrange(8): wkey.append(ord(key[i*2])<<8 | ord(key[i*2+1])) - dst = "" for i in xrange(len(src)): temp1 = 0; @@ -132,7 +140,9 @@ def getSizeOfTrailingDataEntries(ptr, size, flags): num += (ord(ptr[size - num - 1]) & 0x3) + 1 return num -class DrmStripper: + + +class MobiBook: def loadSection(self, section): if (section + 1 == self.num_sections): endoff = len(self.data_file) @@ -141,6 +151,83 @@ class DrmStripper: off = self.sections[section][0] return self.data_file[off:endoff] + def __init__(self, infile): + # initial sanity check on file + self.data_file = file(infile, 'rb').read() + self.header = self.data_file[0:78] + if self.header[0x3C:0x3C+8] != 'BOOKMOBI': + raise DrmException("invalid file format") + + # build up section offset and flag info + self.num_sections, = struct.unpack('>H', self.header[76:78]) + self.sections = [] + for i in xrange(self.num_sections): + offset, a1,a2,a3,a4 = struct.unpack('>LBBBB', self.data_file[78+i*8:78+i*8+8]) + flags, val = a1, a2<<16|a3<<8|a4 + self.sections.append( (offset, flags, val) ) + + # parse information from section 0 + self.sect = self.loadSection(0) + self.records, = struct.unpack('>H', self.sect[0x8:0x8+2]) + self.mobi_length, = struct.unpack('>L',self.sect[0x14:0x18]) + self.mobi_version, = struct.unpack('>L',self.sect[0x68:0x6C]) + print "MOBI header version = %d, length = %d" %(self.mobi_version, self.mobi_length) + self.extra_data_flags = 0 + if (self.mobi_length >= 0xE4) and (self.mobi_version >= 5): + self.extra_data_flags, = struct.unpack('>H', self.sect[0xF2:0xF4]) + print "Extra Data Flags = %d" % self.extra_data_flags + if self.mobi_version < 7: + # multibyte utf8 data is included in the encryption for mobi_version 6 and below + # so clear that byte so that we leave it to be decrypted. + self.extra_data_flags &= 0xFFFE + + # if exth region exists parse it for metadata array + self.meta_array = {} + try: + exth_flag, = struct.unpack('>L', self.sect[0x80:0x84]) + exth = 'NONE' + if exth_flag & 0x40: + exth = self.sect[16 + self.mobi_length:] + if (len(exth) >= 4) and (exth[:4] == 'EXTH'): + nitems, = struct.unpack('>I', exth[8:12]) + pos = 12 + for i in xrange(nitems): + type, size = struct.unpack('>II', exth[pos: pos + 8]) + content = exth[pos + 8: pos + size] + self.meta_array[type] = content + pos += size + except: + self.meta_array = {} + pass + + def getBookTitle(self): + title = '' + if 503 in self.meta_array: + title = self.meta_array[503] + else : + toff, tlen = struct.unpack('>II', self.sect[0x54:0x5c]) + tend = toff + tlen + title = self.sect[toff:tend] + if title == '': + title = self.header[:32] + title = title.split("\0")[0] + return title + + def getPIDMetaInfo(self): + rec209 = None + token = None + if 209 in self.meta_array: + rec209 = self.meta_array[209] + data = rec209 + # Parse the 209 data to find the the exth record with the token data. + # The last character of the 209 data points to the record with the token. + # Always 208 from my experience, but I'll leave the logic in case that changes. + for i in xrange(len(data)): + if ord(data[i]) != 0: + if self.meta_array[ord(data[i])] != None: + token = self.meta_array[ord(data[i])] + return rec209, token + def patch(self, off, new): self.data_file = self.data_file[:off] + new + self.data_file[off+len(new):] @@ -154,6 +241,7 @@ class DrmStripper: self.patch(off + in_off, new) def parseDRM(self, data, count, pidlist): + found_key = None keyvec1 = "\x72\x38\x33\xB0\xB4\xF2\xE3\xCA\xDF\x09\x01\xD6\xE2\xE0\x3F\x96" for pid in pidlist: bigpid = pid.ljust(16,'\0') @@ -185,111 +273,76 @@ class DrmStripper: break return [found_key,pid] - def __init__(self, data_file, pidlist): - # initial sanity check on file - self.data_file = data_file - header = data_file[0:72] - if header[0x3C:0x3C+8] != 'BOOKMOBI': - raise DrmException("invalid file format") - self.num_sections, = struct.unpack('>H', data_file[76:78]) - - self.sections = [] - for i in xrange(self.num_sections): - offset, a1,a2,a3,a4 = struct.unpack('>LBBBB', data_file[78+i*8:78+i*8+8]) - flags, val = a1, a2<<16|a3<<8|a4 - self.sections.append( (offset, flags, val) ) - - sect = self.loadSection(0) - records, = struct.unpack('>H', sect[0x8:0x8+2]) - mobi_length, = struct.unpack('>L',sect[0x14:0x18]) - mobi_version, = struct.unpack('>L',sect[0x68:0x6C]) - extra_data_flags = 0 - print "MOBI header version = %d, length = %d" %(mobi_version, mobi_length) - if (mobi_length >= 0xE4) and (mobi_version >= 5): - extra_data_flags, = struct.unpack('>H', sect[0xF2:0xF4]) - print "Extra Data Flags = %d" %extra_data_flags - if mobi_version < 7: - # multibyte utf8 data is included in the encryption for mobi_version 6 and below - # so clear that byte so that we leave it to be decrypted. - extra_data_flags &= 0xFFFE - - crypto_type, = struct.unpack('>H', sect[0xC:0xC+2]) + def processBook(self, pidlist): + crypto_type, = struct.unpack('>H', self.sect[0xC:0xC+2]) if crypto_type == 0: print "This book is not encrypted." - else: - if crypto_type == 1: - raise DrmException("Cannot decode Mobipocket encryption type 1") - if crypto_type != 2: - raise DrmException("Cannot decode unknown Mobipocket encryption type %d" % crypto_type) + return self.data_file + if crypto_type == 1: + raise DrmException("Cannot decode Mobipocket encryption type 1") + if crypto_type != 2: + raise DrmException("Cannot decode unknown Mobipocket encryption type %d" % crypto_type) - goodpids = [] - for pid in pidlist: - if len(pid)==10: - if checksumPid(pid[0:-2]) != pid: - print "PID " + pid + " has incorrect checksum, should have been "+checksumPid(pid[0:-2]) - else: - goodpids.append(pid[0:-2]) - elif len(pid)==8: - print "PID without checksum given. With checksum PID is "+checksumPid(pid) - goodpids.append(pid) + goodpids = [] + for pid in pidlist: + if len(pid)==10: + if checksumPid(pid[0:-2]) != pid: + print "Warning: PID " + pid + " has incorrect checksum, should have been "+checksumPid(pid[0:-2]) + goodpids.append(pid[0:-2]) + elif len(pid)==8: + goodpids.append(pid) - if len(goodpids) == 0: - raise DrmException("No valid PIDs supplied.") - - # calculate the keys - drm_ptr, drm_count, drm_size, drm_flags = struct.unpack('>LLLL', sect[0xA8:0xA8+16]) - if drm_count == 0: - raise DrmException("Not yet initialised with PID. Must be opened with Mobipocket Reader first.") - found_key, pid = self.parseDRM(sect[drm_ptr:drm_ptr+drm_size], drm_count, goodpids) - if not found_key: - raise DrmException("No key found. Most likely the correct PID has not been given.") + # calculate the keys + drm_ptr, drm_count, drm_size, drm_flags = struct.unpack('>LLLL', self.sect[0xA8:0xA8+16]) + if drm_count == 0: + raise DrmException("Not yet initialised with PID. Must be opened with Mobipocket Reader first.") + found_key, pid = self.parseDRM(self.sect[drm_ptr:drm_ptr+drm_size], drm_count, goodpids) + if not found_key: + raise DrmException("No key found. Most likely the correct PID has not been given.") - if pid=="00000000": - print "File has default encryption, no specific PID." - else: - print "File is encoded with PID "+checksumPid(pid)+"." + if pid=="00000000": + print "File has default encryption, no specific PID." + else: + print "File is encoded with PID "+checksumPid(pid)+"." - # kill the drm keys - self.patchSection(0, "\0" * drm_size, drm_ptr) - # kill the drm pointers - self.patchSection(0, "\xff" * 4 + "\0" * 12, 0xA8) - # clear the crypto type - self.patchSection(0, "\0" * 2, 0xC) + # kill the drm keys + self.patchSection(0, "\0" * drm_size, drm_ptr) + # kill the drm pointers + self.patchSection(0, "\xff" * 4 + "\0" * 12, 0xA8) + # clear the crypto type + self.patchSection(0, "\0" * 2, 0xC) - # decrypt sections - print "Decrypting. Please wait . . .", - new_data = self.data_file[:self.sections[1][0]] - for i in xrange(1, records+1): - data = self.loadSection(i) - extra_size = getSizeOfTrailingDataEntries(data, len(data), extra_data_flags) - if i%100 == 0: - print ".", - # print "record %d, extra_size %d" %(i,extra_size) - new_data += PC1(found_key, data[0:len(data) - extra_size]) - if extra_size > 0: - new_data += data[-extra_size:] - if self.num_sections > records+1: - new_data += self.data_file[self.sections[records+1][0]:] - self.data_file = new_data - print "done" - - def getResult(self): + # decrypt sections + print "Decrypting. Please wait . . .", + new_data = self.data_file[:self.sections[1][0]] + for i in xrange(1, self.records+1): + data = self.loadSection(i) + extra_size = getSizeOfTrailingDataEntries(data, len(data), self.extra_data_flags) + if i%100 == 0: + print ".", + # print "record %d, extra_size %d" %(i,extra_size) + new_data += PC1(found_key, data[0:len(data) - extra_size]) + if extra_size > 0: + new_data += data[-extra_size:] + if self.num_sections > self.records+1: + new_data += self.data_file[self.sections[self.records+1][0]:] + self.data_file = new_data + print "done" return self.data_file def getUnencryptedBook(infile,pid): - sys.stdout=Unbuffered(sys.stdout) - data_file = file(infile, 'rb').read() - strippedFile = DrmStripper(data_file, [pid]) - return strippedFile.getResult() + if not os.path.isfile(infile): + raise DrmException('Input File Not Found') + book = MobiBook(infile) + return book.processBook([pid]) def getUnencryptedBookWithList(infile,pidlist): - sys.stdout=Unbuffered(sys.stdout) - data_file = file(infile, 'rb').read() - strippedFile = DrmStripper(data_file, pidlist) - return strippedFile.getResult() + if not os.path.isfile(infile): + raise DrmException('Input File Not Found') + book = MobiBook(infile) + return book.processBook(pidlist) def main(argv=sys.argv): - sys.stdout=Unbuffered(sys.stdout) print ('MobiDeDrm v%(__version__)s. ' 'Copyright 2008-2010 The Dark Reverser.' % globals()) if len(argv)<4: @@ -302,7 +355,7 @@ def main(argv=sys.argv): outfile = argv[2] pidlist = argv[3].split(',') try: - stripped_file = getUnencryptedBook(infile, pidlist) + stripped_file = getUnencryptedBookWithList(infile, pidlist) file(outfile, 'wb').write(stripped_file) except DrmException, e: print "Error: %s" % e