From 694dfafd39ccb5016deb5b2801dd7f8fc4de0cbf Mon Sep 17 00:00:00 2001
From: Apprentice Alf <apprenticealf@gmail.com>
Date: Wed, 5 Jan 2011 06:53:31 +0000
Subject: [PATCH] MobiDeDRM 0.23

---
 KindleBooks_Tools/MobiDeDRM.py | 255 ++++++++++++++++++++-------------
 1 file changed, 154 insertions(+), 101 deletions(-)

diff --git a/KindleBooks_Tools/MobiDeDRM.py b/KindleBooks_Tools/MobiDeDRM.py
index 0555140..536eb78 100644
--- a/KindleBooks_Tools/MobiDeDRM.py
+++ b/KindleBooks_Tools/MobiDeDRM.py
@@ -41,12 +41,12 @@
 #  0.19 - It seems that multibyte entries aren't encrypted in a v6 file either.
 #  0.20 - Correction: It seems that multibyte entries are encrypted in a v6 file.
 #  0.21 - Added support for multiple pids
+#  0.22 - revised structure to hold MobiBook as a class to allow an extended interface
+#  0.23 - fixed problem with older files with no EXTH section
 
-__version__ = '0.21'
+__version__ = '0.23'
 
 import sys
-import struct
-import binascii
 
 class Unbuffered:
     def __init__(self, stream):
@@ -56,10 +56,19 @@ class Unbuffered:
         self.stream.flush()
     def __getattr__(self, attr):
         return getattr(self.stream, attr)
+sys.stdout=Unbuffered(sys.stdout)
+
+import struct
+import binascii
 
 class DrmException(Exception):
     pass
 
+
+#
+# MobiBook Utility Routines
+#
+
 # Implementation of Pukall Cipher 1
 def PC1(key, src, decryption=True):
     sum1 = 0;
@@ -71,7 +80,6 @@ def PC1(key, src, decryption=True):
     wkey = []
     for i in xrange(8):
         wkey.append(ord(key[i*2])<<8 | ord(key[i*2+1]))
-
     dst = ""
     for i in xrange(len(src)):
         temp1 = 0;
@@ -132,7 +140,9 @@ def getSizeOfTrailingDataEntries(ptr, size, flags):
         num += (ord(ptr[size - num - 1]) & 0x3) + 1
     return num
 
-class DrmStripper:
+
+
+class MobiBook:
     def loadSection(self, section):
         if (section + 1 == self.num_sections):
             endoff = len(self.data_file)
@@ -141,6 +151,83 @@ class DrmStripper:
         off = self.sections[section][0]
         return self.data_file[off:endoff]
 
+    def __init__(self, infile):
+        # initial sanity check on file
+        self.data_file = file(infile, 'rb').read()
+        self.header = self.data_file[0:78]
+        if self.header[0x3C:0x3C+8] != 'BOOKMOBI':
+            raise DrmException("invalid file format")
+
+        # build up section offset and flag info
+        self.num_sections, = struct.unpack('>H', self.header[76:78])
+        self.sections = []
+        for i in xrange(self.num_sections):
+            offset, a1,a2,a3,a4 = struct.unpack('>LBBBB', self.data_file[78+i*8:78+i*8+8])
+            flags, val = a1, a2<<16|a3<<8|a4
+            self.sections.append( (offset, flags, val) )
+
+        # parse information from section 0
+        self.sect = self.loadSection(0)
+        self.records, = struct.unpack('>H', self.sect[0x8:0x8+2])
+        self.mobi_length, = struct.unpack('>L',self.sect[0x14:0x18])
+        self.mobi_version, = struct.unpack('>L',self.sect[0x68:0x6C])
+        print "MOBI header version = %d, length = %d" %(self.mobi_version, self.mobi_length)
+        self.extra_data_flags = 0
+        if (self.mobi_length >= 0xE4) and (self.mobi_version >= 5):
+            self.extra_data_flags, = struct.unpack('>H', self.sect[0xF2:0xF4])
+            print "Extra Data Flags = %d" % self.extra_data_flags
+        if self.mobi_version < 7:
+            # multibyte utf8 data is included in the encryption for mobi_version 6 and below
+            # so clear that byte so that we leave it to be decrypted.
+            self.extra_data_flags &= 0xFFFE
+
+        # if exth region exists parse it for metadata array
+        self.meta_array = {}
+        try:
+            exth_flag, = struct.unpack('>L', self.sect[0x80:0x84])
+            exth = 'NONE'
+            if exth_flag & 0x40:
+                exth = self.sect[16 + self.mobi_length:]
+            if (len(exth) >= 4) and (exth[:4] == 'EXTH'):
+                nitems, = struct.unpack('>I', exth[8:12])
+                pos = 12
+                for i in xrange(nitems):
+                    type, size = struct.unpack('>II', exth[pos: pos + 8])
+                    content = exth[pos + 8: pos + size]
+                    self.meta_array[type] = content
+                    pos += size
+        except:
+            self.meta_array = {}
+            pass
+
+    def getBookTitle(self):
+        title = ''
+        if 503 in self.meta_array:
+            title = self.meta_array[503]
+        else :
+            toff, tlen = struct.unpack('>II', self.sect[0x54:0x5c])
+            tend = toff + tlen
+            title = self.sect[toff:tend]
+        if title == '':
+            title = self.header[:32]
+            title = title.split("\0")[0]
+        return title
+
+    def getPIDMetaInfo(self):
+        rec209 = None
+        token = None
+        if 209 in self.meta_array:
+            rec209 = self.meta_array[209]
+            data = rec209
+            # Parse the 209 data to find the the exth record with the token data.
+            # The last character of the 209 data points to the record with the token.
+            # Always 208 from my experience, but I'll leave the logic in case that changes.
+            for i in xrange(len(data)):
+                if ord(data[i]) != 0:
+                    if self.meta_array[ord(data[i])] != None:
+                        token = self.meta_array[ord(data[i])]
+        return rec209, token
+
     def patch(self, off, new):
         self.data_file = self.data_file[:off] + new + self.data_file[off+len(new):]
 
@@ -154,6 +241,7 @@ class DrmStripper:
         self.patch(off + in_off, new)
 
     def parseDRM(self, data, count, pidlist):
+        found_key = None
         keyvec1 = "\x72\x38\x33\xB0\xB4\xF2\xE3\xCA\xDF\x09\x01\xD6\xE2\xE0\x3F\x96"
         for pid in pidlist:
             bigpid = pid.ljust(16,'\0')
@@ -185,111 +273,76 @@ class DrmStripper:
                         break
         return [found_key,pid]
 
-    def __init__(self, data_file, pidlist):
-        # initial sanity check on file
-        self.data_file = data_file
-        header = data_file[0:72]
-        if header[0x3C:0x3C+8] != 'BOOKMOBI':
-            raise DrmException("invalid file format")
-        self.num_sections, = struct.unpack('>H', data_file[76:78])
-
-        self.sections = []
-        for i in xrange(self.num_sections):
-            offset, a1,a2,a3,a4 = struct.unpack('>LBBBB', data_file[78+i*8:78+i*8+8])
-            flags, val = a1, a2<<16|a3<<8|a4
-            self.sections.append( (offset, flags, val) )
-
-        sect = self.loadSection(0)
-        records, = struct.unpack('>H', sect[0x8:0x8+2])
-        mobi_length, = struct.unpack('>L',sect[0x14:0x18])
-        mobi_version, = struct.unpack('>L',sect[0x68:0x6C])
-        extra_data_flags = 0
-        print "MOBI header version = %d, length = %d" %(mobi_version, mobi_length)
-        if (mobi_length >= 0xE4) and (mobi_version >= 5):
-            extra_data_flags, = struct.unpack('>H', sect[0xF2:0xF4])
-            print "Extra Data Flags = %d" %extra_data_flags
-        if mobi_version < 7:
-            # multibyte utf8 data is included in the encryption for mobi_version 6 and below
-            # so clear that byte so that we leave it to be decrypted.
-            extra_data_flags &= 0xFFFE
-
-        crypto_type, = struct.unpack('>H', sect[0xC:0xC+2])
+    def processBook(self, pidlist):
+        crypto_type, = struct.unpack('>H', self.sect[0xC:0xC+2])
         if crypto_type == 0:
             print "This book is not encrypted."
-        else:
-            if crypto_type == 1:
-                raise DrmException("Cannot decode Mobipocket encryption type 1")
-            if crypto_type != 2:
-                raise DrmException("Cannot decode unknown Mobipocket encryption type %d" % crypto_type)
-
-            goodpids = []
-            for pid in pidlist:
-                if len(pid)==10:
-                    if checksumPid(pid[0:-2]) != pid:
-                        print "PID " + pid + " has incorrect checksum, should have been "+checksumPid(pid[0:-2])
-                    else:
-                        goodpids.append(pid[0:-2])
-                elif len(pid)==8:
-                    print "PID without checksum given. With checksum PID is "+checksumPid(pid)
-                    goodpids.append(pid)
-
-            if len(goodpids) == 0:
-                raise DrmException("No valid PIDs supplied.")
-
-            # calculate the keys
-            drm_ptr, drm_count, drm_size, drm_flags = struct.unpack('>LLLL', sect[0xA8:0xA8+16])
-            if drm_count == 0:
-                raise DrmException("Not yet initialised with PID. Must be opened with Mobipocket Reader first.")
-            found_key, pid = self.parseDRM(sect[drm_ptr:drm_ptr+drm_size], drm_count, goodpids)
-            if not found_key:
-                raise DrmException("No key found. Most likely the correct PID has not been given.")
+            return self.data_file
+        if crypto_type == 1:
+            raise DrmException("Cannot decode Mobipocket encryption type 1")
+        if crypto_type != 2:
+            raise DrmException("Cannot decode unknown Mobipocket encryption type %d" % crypto_type)
+
+        goodpids = []
+        for pid in pidlist:
+            if len(pid)==10:
+                if checksumPid(pid[0:-2]) != pid:
+                    print "Warning: PID " + pid + " has incorrect checksum, should have been "+checksumPid(pid[0:-2])
+                goodpids.append(pid[0:-2])
+            elif len(pid)==8:
+                goodpids.append(pid)
+
+        # calculate the keys
+        drm_ptr, drm_count, drm_size, drm_flags = struct.unpack('>LLLL', self.sect[0xA8:0xA8+16])
+        if drm_count == 0:
+            raise DrmException("Not yet initialised with PID. Must be opened with Mobipocket Reader first.")
+        found_key, pid = self.parseDRM(self.sect[drm_ptr:drm_ptr+drm_size], drm_count, goodpids)
+        if not found_key:
+            raise DrmException("No key found. Most likely the correct PID has not been given.")
             
-            if pid=="00000000":
-                print "File has default encryption, no specific PID."
-            else:
-                print "File is encoded with PID "+checksumPid(pid)+"."
-
-            # kill the drm keys
-            self.patchSection(0, "\0" * drm_size, drm_ptr)
-            # kill the drm pointers
-            self.patchSection(0, "\xff" * 4 + "\0" * 12, 0xA8)
-            # clear the crypto type
-            self.patchSection(0, "\0" * 2, 0xC)
-
-            # decrypt sections
-            print "Decrypting. Please wait . . .",
-            new_data = self.data_file[:self.sections[1][0]]
-            for i in xrange(1, records+1):
-                data = self.loadSection(i)
-                extra_size = getSizeOfTrailingDataEntries(data, len(data), extra_data_flags)
-                if i%100 == 0:
-                    print ".",
-                # print "record %d, extra_size %d" %(i,extra_size)
-                new_data += PC1(found_key, data[0:len(data) - extra_size])
-                if extra_size > 0:
-                    new_data += data[-extra_size:]
-            if self.num_sections > records+1:
-                new_data += self.data_file[self.sections[records+1][0]:]
-            self.data_file = new_data
-            print "done"
-
-    def getResult(self):
+        if pid=="00000000":
+            print "File has default encryption, no specific PID."
+        else:
+            print "File is encoded with PID "+checksumPid(pid)+"."
+
+        # kill the drm keys
+        self.patchSection(0, "\0" * drm_size, drm_ptr)
+        # kill the drm pointers
+        self.patchSection(0, "\xff" * 4 + "\0" * 12, 0xA8)
+        # clear the crypto type
+        self.patchSection(0, "\0" * 2, 0xC)
+
+        # decrypt sections
+        print "Decrypting. Please wait . . .",
+        new_data = self.data_file[:self.sections[1][0]]
+        for i in xrange(1, self.records+1):
+            data = self.loadSection(i)
+            extra_size = getSizeOfTrailingDataEntries(data, len(data), self.extra_data_flags)
+            if i%100 == 0:
+                print ".",
+            # print "record %d, extra_size %d" %(i,extra_size)
+            new_data += PC1(found_key, data[0:len(data) - extra_size])
+            if extra_size > 0:
+                new_data += data[-extra_size:]
+        if self.num_sections > self.records+1:
+            new_data += self.data_file[self.sections[self.records+1][0]:]
+        self.data_file = new_data
+        print "done"
         return self.data_file
 
 def getUnencryptedBook(infile,pid):
-    sys.stdout=Unbuffered(sys.stdout)
-    data_file = file(infile, 'rb').read()
-    strippedFile = DrmStripper(data_file, [pid])
-    return strippedFile.getResult()
+    if not os.path.isfile(infile):
+        raise DrmException('Input File Not Found')
+    book = MobiBook(infile)
+    return book.processBook([pid])
 
 def getUnencryptedBookWithList(infile,pidlist):
-    sys.stdout=Unbuffered(sys.stdout)
-    data_file = file(infile, 'rb').read()
-    strippedFile = DrmStripper(data_file, pidlist)
-    return strippedFile.getResult()
+    if not os.path.isfile(infile):
+        raise DrmException('Input File Not Found')
+    book = MobiBook(infile)
+    return book.processBook(pidlist)
 
 def main(argv=sys.argv):
-    sys.stdout=Unbuffered(sys.stdout)
     print ('MobiDeDrm v%(__version__)s. '
 	   'Copyright 2008-2010 The Dark Reverser.' % globals())
     if len(argv)<4:
@@ -302,7 +355,7 @@ def main(argv=sys.argv):
         outfile = argv[2]
         pidlist = argv[3].split(',')
         try:
-            stripped_file = getUnencryptedBook(infile, pidlist)
+            stripped_file = getUnencryptedBookWithList(infile, pidlist)
             file(outfile, 'wb').write(stripped_file)
         except DrmException, e:
             print "Error: %s" % e