mirror of
https://github.com/apprenticeharper/DeDRM_tools
synced 2024-11-03 09:40:32 +00:00
ineptpdf 7
This commit is contained in:
parent
f027848bff
commit
4f19f5ac11
134
ineptpdf.pyw
134
ineptpdf.pyw
@ -1,6 +1,7 @@
|
||||
#! /usr/bin/python
|
||||
|
||||
# ineptpdf.pyw, version 6.1
|
||||
# ineptpdf7.pyw
|
||||
# ineptpdf, version 7
|
||||
|
||||
# To run this program install Python 2.6 from http://www.python.org/download/
|
||||
# and PyCrypto from http://www.voidspace.org.uk/python/modules.shtml#pycrypto
|
||||
@ -15,6 +16,10 @@
|
||||
# 5 - removing small bug with V3 ebooks (anon)
|
||||
# 6 - changed to adeptkey4.der format for 1.7.2 support (anon)
|
||||
# 6.1 - backward compatibility for 1.7.1 and old adeptkey.der
|
||||
# 7 - Get cross reference streams and object streams working for input.
|
||||
# Not yet supported on output but this only affects file size,
|
||||
# not functionality. (by anon2)
|
||||
|
||||
"""
|
||||
Decrypt Adobe ADEPT-encrypted PDF files.
|
||||
"""
|
||||
@ -42,6 +47,10 @@ try:
|
||||
except ImportError:
|
||||
ARC4 = None
|
||||
RSA = None
|
||||
try:
|
||||
from cStringIO import StringIO
|
||||
except ImportError:
|
||||
from StringIO import StringIO
|
||||
|
||||
|
||||
class ADEPTError(Exception):
|
||||
@ -569,16 +578,17 @@ class PSBaseParser(object):
|
||||
pos = self.fp.tell()
|
||||
buf = ''
|
||||
while 0 < pos:
|
||||
prevpos = pos
|
||||
pos = max(0, pos-self.BUFSIZ)
|
||||
self.fp.seek(pos)
|
||||
s = self.fp.read(self.BUFSIZ)
|
||||
s = self.fp.read(prevpos-pos)
|
||||
if not s: break
|
||||
while 1:
|
||||
n = max(s.rfind('\r'), s.rfind('\n'))
|
||||
if n == -1:
|
||||
buf = s + buf
|
||||
break
|
||||
yield buf+s[n:]
|
||||
yield s[n:]+buf
|
||||
s = s[:n]
|
||||
buf = ''
|
||||
return
|
||||
@ -867,7 +877,7 @@ class PDFStream(PDFObject):
|
||||
(self.objid, len(self.rawdata), self.dic)
|
||||
|
||||
def decode(self):
|
||||
assert self.data == None and self.rawdata != None
|
||||
assert self.data is None and self.rawdata is not None
|
||||
data = self.rawdata
|
||||
if self.decipher:
|
||||
# Handle encryption
|
||||
@ -884,10 +894,6 @@ class PDFStream(PDFObject):
|
||||
# will get errors if the document is encrypted.
|
||||
data = zlib.decompress(data)
|
||||
elif f in LITERALS_LZW_DECODE:
|
||||
try:
|
||||
from cStringIO import StringIO
|
||||
except ImportError:
|
||||
from StringIO import StringIO
|
||||
data = ''.join(LZWDecoder(StringIO(data)).run())
|
||||
elif f in LITERALS_ASCII85_DECODE:
|
||||
data = ascii85decode(data)
|
||||
@ -926,7 +932,7 @@ class PDFStream(PDFObject):
|
||||
return
|
||||
|
||||
def get_data(self):
|
||||
if self.data == None:
|
||||
if self.data is None:
|
||||
self.decode()
|
||||
return self.data
|
||||
|
||||
@ -934,6 +940,13 @@ class PDFStream(PDFObject):
|
||||
return self.rawdata
|
||||
|
||||
def get_decdata(self):
|
||||
if self.data is not None:
|
||||
# Data has already been decrypted and decoded. This is the case
|
||||
# for object streams. Note: this data is wrong to put in the
|
||||
# output because it should be stored decrypted but
|
||||
# uncompressed. This can be done by storing the intermediate
|
||||
# data. For now object streams are useless in the output.
|
||||
return self.data
|
||||
data = self.rawdata
|
||||
if self.decipher and data:
|
||||
# Handle encryption
|
||||
@ -989,7 +1002,7 @@ class PDFXRef(object):
|
||||
if len(f) != 2:
|
||||
raise PDFNoValidXRef('Trailer not found: %r: line=%r' % (parser, line))
|
||||
try:
|
||||
(start, nobjs) = map(long, f)
|
||||
(start, nobjs) = map(int, f)
|
||||
except ValueError:
|
||||
raise PDFNoValidXRef('Invalid line: %r: line=%r' % (parser, line))
|
||||
for objid in xrange(start, start+nobjs):
|
||||
@ -1002,7 +1015,7 @@ class PDFXRef(object):
|
||||
raise PDFNoValidXRef('Invalid XRef format: %r, line=%r' % (parser, line))
|
||||
(pos, genno, use) = f
|
||||
if use != 'n': continue
|
||||
self.offsets[objid] = (int(genno), long(pos))
|
||||
self.offsets[objid] = (int(genno), int(pos))
|
||||
self.load_trailer(parser)
|
||||
return
|
||||
|
||||
@ -1040,7 +1053,7 @@ class PDFXRefStream(object):
|
||||
return
|
||||
|
||||
def __repr__(self):
|
||||
return '<PDFXRef: objid=%d-%d>' % (self.objid_first, self.objid_last)
|
||||
return '<PDFXRef: objids=%s>' % self.index
|
||||
|
||||
def objids(self):
|
||||
for first, size in self.index:
|
||||
@ -1298,12 +1311,45 @@ class PDFDocument(object):
|
||||
except KeyError:
|
||||
pass
|
||||
else:
|
||||
return
|
||||
#if STRICT:
|
||||
# raise PDFSyntaxError('Cannot locate objid=%r' % objid)
|
||||
return None
|
||||
if stmid:
|
||||
return PDFObjStmRef(objid, stmid, index)
|
||||
# Later try to introduce PDFObjStmRef's
|
||||
# return PDFObjStmRef(objid, stmid, index)
|
||||
# Stuff from pdfminer
|
||||
stream = stream_value(self.getobj(stmid))
|
||||
if stream.dic.get('Type') is not LITERAL_OBJSTM:
|
||||
if STRICT:
|
||||
raise PDFSyntaxError('Not a stream object: %r' % stream)
|
||||
try:
|
||||
n = stream.dic['N']
|
||||
except KeyError:
|
||||
if STRICT:
|
||||
raise PDFSyntaxError('N is not defined: %r' % stream)
|
||||
n = 0
|
||||
|
||||
if stmid in self.parsed_objs:
|
||||
objs = self.parsed_objs[stmid]
|
||||
else:
|
||||
parser = PDFObjStrmParser(stream.get_data(), self)
|
||||
objs = []
|
||||
try:
|
||||
while 1:
|
||||
(_,obj) = parser.nextobject()
|
||||
objs.append(obj)
|
||||
except PSEOF:
|
||||
pass
|
||||
self.parsed_objs[stmid] = objs
|
||||
genno = 0
|
||||
i = n*2+index
|
||||
try:
|
||||
obj = objs[i]
|
||||
except IndexError:
|
||||
raise PDFSyntaxError('Invalid object number: objid=%r' % (objid))
|
||||
if isinstance(obj, PDFStream):
|
||||
obj.set_objid(objid, 0)
|
||||
###
|
||||
else:
|
||||
self.parser.seek(index)
|
||||
(_,objid1) = self.parser.nexttoken() # objid
|
||||
@ -1316,9 +1362,9 @@ class PDFDocument(object):
|
||||
(_,obj) = self.parser.nextobject()
|
||||
if isinstance(obj, PDFStream):
|
||||
obj.set_objid(objid, genno)
|
||||
if self.decipher:
|
||||
obj = decipher_all(self.decipher, objid, genno, obj)
|
||||
self.objs[objid] = obj
|
||||
if self.decipher:
|
||||
obj = decipher_all(self.decipher, objid, genno, obj)
|
||||
return obj
|
||||
|
||||
class PDFObjStmRef(object):
|
||||
@ -1419,7 +1465,7 @@ class PDFParser(PSStackParser):
|
||||
prev = line
|
||||
else:
|
||||
raise PDFNoValidXRef('Unexpected EOF')
|
||||
return long(prev)
|
||||
return int(prev)
|
||||
|
||||
# read xref table
|
||||
def read_xref_from(self, start, xrefs):
|
||||
@ -1482,6 +1528,34 @@ class PDFParser(PSStackParser):
|
||||
xrefs.append(xref)
|
||||
return xrefs
|
||||
|
||||
## PDFObjStrmParser
|
||||
##
|
||||
class PDFObjStrmParser(PDFParser):
|
||||
|
||||
def __init__(self, data, doc):
|
||||
PSStackParser.__init__(self, StringIO(data))
|
||||
self.doc = doc
|
||||
return
|
||||
|
||||
def flush(self):
|
||||
self.add_results(*self.popall())
|
||||
return
|
||||
|
||||
KEYWORD_R = KWD('R')
|
||||
def do_keyword(self, pos, token):
|
||||
if token is self.KEYWORD_R:
|
||||
# reference to indirect object
|
||||
try:
|
||||
((_,objid), (_,genno)) = self.pop(2)
|
||||
(objid, genno) = (int(objid), int(genno))
|
||||
obj = PDFObjRef(self.doc, objid, genno)
|
||||
self.push((pos, obj))
|
||||
except PSSyntaxError:
|
||||
pass
|
||||
return
|
||||
# others
|
||||
self.push((pos, token))
|
||||
return
|
||||
|
||||
###
|
||||
### My own code, for which there is none else to blame
|
||||
@ -1521,8 +1595,9 @@ class PDFSerializer(object):
|
||||
if isinstance(obj, PDFObjStmRef):
|
||||
xrefstm[objid] = obj
|
||||
continue
|
||||
xrefs[objid] = self.tell()
|
||||
self.serialize_indirect(objid, obj)
|
||||
if obj is not None:
|
||||
xrefs[objid] = self.tell()
|
||||
self.serialize_indirect(objid, obj)
|
||||
startxref = self.tell()
|
||||
self.write('xref\n')
|
||||
self.write('0 %d\n' % (maxobj + 1,))
|
||||
@ -1611,11 +1686,18 @@ class PDFSerializer(object):
|
||||
self.write(' ')
|
||||
self.write('%d %d R' % (obj.objid, 0))
|
||||
elif isinstance(obj, PDFStream):
|
||||
data = obj.get_decdata()
|
||||
self.serialize_object(obj.dic)
|
||||
self.write('stream\n')
|
||||
self.write(data)
|
||||
self.write('\nendstream')
|
||||
### For now, we have extracted all objects from an Object Stream,
|
||||
### so we don't need these any more. Therefore leave them out
|
||||
### of the output. Later we could try to use object streams in
|
||||
### the output again to get smaller output.
|
||||
if obj.dic.get('Type') == LITERAL_OBJSTM:
|
||||
self.write('(deleted)')
|
||||
else:
|
||||
data = obj.get_decdata()
|
||||
self.serialize_object(obj.dic)
|
||||
self.write('stream\n')
|
||||
self.write(data)
|
||||
self.write('\nendstream')
|
||||
else:
|
||||
data = str(obj)
|
||||
if data[0].isalnum() and self.last.isalnum():
|
||||
@ -1697,7 +1779,7 @@ class DecryptionDialog(Tkinter.Frame):
|
||||
def get_inpath(self):
|
||||
inpath = tkFileDialog.askopenfilename(
|
||||
parent=None, title='Select ADEPT-encrypted PDF file to decrypt',
|
||||
defaultextension='.epub', filetypes=[('PDF files', '.pdf'),
|
||||
defaultextension='.pdf', filetypes=[('PDF files', '.pdf'),
|
||||
('All files', '.*')])
|
||||
if inpath:
|
||||
inpath = os.path.normpath(inpath)
|
||||
@ -1708,7 +1790,7 @@ class DecryptionDialog(Tkinter.Frame):
|
||||
def get_outpath(self):
|
||||
outpath = tkFileDialog.asksaveasfilename(
|
||||
parent=None, title='Select unencrypted PDF file to produce',
|
||||
defaultextension='.epub', filetypes=[('PDF files', '.pdf'),
|
||||
defaultextension='.pdf', filetypes=[('PDF files', '.pdf'),
|
||||
('All files', '.*')])
|
||||
if outpath:
|
||||
outpath = os.path.normpath(outpath)
|
||||
|
Loading…
Reference in New Issue
Block a user