#! /usr/bin/python # ineptpdf8.4.51.pyw # ineptpdf, version 8.4.51 # To run this program install Python 2.7 from http://www.python.org/download/ # # PyCrypto from http://www.voidspace.org.uk/python/modules.shtml#pycrypto # # and PyWin Extension (Win32API module) from # http://sourceforge.net/projects/pywin32/files/ # # Make sure to install the dedicated versions for Python 2.7. # # It's recommended to use the 32-Bit Python Windows versions (even with a 64-bit # Windows system). # # Save this script file as # ineptpdf8.4.51.pyw and double-click on it to run it. # Revision history: # 1 - Initial release # 2 - Improved determination of key-generation algorithm # 3 - Correctly handle PDF >=1.5 cross-reference streams # 4 - Removal of ciando's personal ID (anon) # 5 - removing small bug with V3 ebooks (anon) # 6 - changed to adeptkey4.der format for 1.7.2 support (anon) # 6.1 - backward compatibility for 1.7.1 and old adeptkey.der (anon) # 7 - Get cross reference streams and object streams working for input. # Not yet supported on output but this only effects file size, # not functionality. (anon2) # 7.1 - Correct a problem when an old trailer is not followed by startxref (anon2) # 7.2 - Correct malformed Mac OS resource forks for Stanza # - Support for cross ref streams on output (decreases file size) (anon2) # 7.3 - Correct bug in trailer with cross ref stream that caused the error (anon2) # "The root object is missing or invalid" in Adobe Reader. # 7.4 - Force all generation numbers in output file to be 0, like in v6. # Fallback code for wrong xref improved (search till last trailer # instead of first) (anon2) # 8 - fileopen user machine identifier support (Tetrachroma) # 8.1 - fileopen user cookies support (Tetrachroma) # 8.2 - fileopen user name/password support (Tetrachroma) # 8.3 - fileopen session cookie support (Tetrachroma) # 8.3.1 - fix for the "specified key file does not exist" error (Tetrachroma) # 8.3.2 - improved server result parsing (Tetrachroma) # 8.4 - Ident4D and encrypted Uuid support (Tetrachroma) # 8.4.1 - improved MAC address processing (Tetrachroma) # 8.4.2 - FowP3Uuid fallback file processing (Tetrachroma) # 8.4.3 - improved user/password pdf file detection (Tetrachroma) # 8.4.4 - small bugfix (Tetrachroma) # 8.4.5 - improved cookie host searching (Tetrachroma) # 8.4.6 - STRICT parsing disabled (non-standard pdf processing) (Tetrachroma) # 8.4.7 - UTF-8 input file conversion (Tetrachroma) # 8.4.8 - fix for more rare utf8 problems (Tetrachroma) # 8.4.9 - solution for utf8 in comination with # ident4id method (Tetrachroma) # 8.4.10 - line feed processing, non c system drive patch, nrbook support (Tetrachroma) # 8.4.11 - alternative ident4id calculation (Tetrachroma) # 8.4.12 - fix for capital username characters and # other unusual user login names (Tetrachroma & ZeroPoint) # 8.4.13 - small bug fixes (Tetrachroma) # 8.4.14 - fix for non-standard-conform fileopen pdfs (Tetrachroma) # 8.4.15 - 'bad file descriptor'-fix (Tetrachroma) # 8.4.16 - improves user/pass detection (Tetrachroma) # 8.4.17 - fix for several '=' chars in a DPRM entity (Tetrachroma) # 8.4.18 - follow up bug fix for the DPRM problem, # more readable error messages (Tetrachroma) # 8.4.19 - 2nd fix for 'bad file descriptor' problem (Tetrachroma) # 8.4.20 - follow up patch (Tetrachroma) # 8.4.21 - 3rd patch for 'bad file descriptor' (Tetrachroma) # 8.4.22 - disable prints for exception prevention (Tetrachroma) # 8.4.23 - check for additional security attributes (Tetrachroma) # 8.4.24 - improved cookie session support (Tetrachroma) # 8.4.25 - more compatibility with unicode files (Tetrachroma) # 8.4.26 - automated session/user cookie request function (works # only with Firefox 3.x+) (Tetrachroma) # 8.4.27 - user/password fallback # 8.4.28 - AES decryption, improved misconfigured pdf handling, # limited experimental APS support (Tetrachroma & Neisklar) # 8.4.29 - backport for bad formatted rc4 encrypted pdfs (Tetrachroma) # 8.4.30 - extended authorization attributes support (Tetrachroma) # 8.4.31 - improved session cookie and better server response error # handling (Tetrachroma) # 8.4.33 - small cookie optimizations (Tetrachroma) # 8.4.33 - debug output option (Tetrachroma) # 8.4.34 - better user/password management # handles the 'AskUnp' response) (Tetrachroma) # 8.4.35 - special handling for non-standard systems (Tetrachroma) # 8.4.36 - previous machine/disk handling [PrevMach/PrevDisk] (Tetrachroma) # 8.4.36 - FOPN_flock support (Tetrachroma) # 8.4.37 - patch for unicode paths/filenames (Tetrachroma) # 8.4.38 - small fix for user/password dialog (Tetrachroma) # 8.4.39 - sophisticated request mode differentiation, forced # uuid calculation (Tetrachroma) # 8.4.40 - fix for non standard server responses (Tetrachroma) # 8.4.41 - improved user/password request windows, # better server response tolerance (Tetrachroma) # 8.4.42 - improved nl/cr server response parsing (Tetrachroma) # 8.4.43 - fix for user names longer than 13 characters and special # uuid encryption (Tetrachroma) # 8.4.44 - another fix for ident4d problem (Tetrachroma) # 8.4.45 - 2nd fix for ident4d problem (Tetrachroma) # 8.4.46 - script cleanup and optimizations (Tetrachroma) # 8.4.47 - script identification change to Adobe Reader (Tetrachroma) # 8.4.48 - improved tolerance for false file/registry entries (Tetrachroma) # 8.4.49 - improved username encryption (Tetrachroma) # 8.4.50 - improved (experimental) APS support (Tetrachroma & Neisklar) # 8.4.51 - automatic APS offline key retrieval (works only for # Onleihe right now) (80ka80 & Tetrachroma) """ Decrypts Adobe ADEPT-encrypted and Fileopen PDF files. """ from __future__ import with_statement __license__ = 'GPL v3' import sys import os import re import zlib import struct import hashlib from itertools import chain, islice import xml.etree.ElementTree as etree import Tkinter import Tkconstants import tkFileDialog import tkMessageBox # added for fileopen support import urllib import urlparse import time import socket import string import uuid import subprocess import time import getpass from ctypes import * import traceback import inspect import tempfile import sqlite3 import httplib try: from Crypto.Cipher import ARC4 # needed for newer pdfs from Crypto.Cipher import AES from Crypto.Hash import SHA256 from Crypto.PublicKey import RSA except ImportError: ARC4 = None RSA = None try: from cStringIO import StringIO except ImportError: from StringIO import StringIO class ADEPTError(Exception): pass # global variable (needed for fileopen and password decryption) INPUTFILEPATH = '' KEYFILEPATH = '' PASSWORD = '' DEBUG_MODE = False IVERSION = '8.4.51' # Do we generate cross reference streams on output? # 0 = never # 1 = only if present in input # 2 = always GEN_XREF_STM = 1 # This is the value for the current document gen_xref_stm = False # will be set in PDFSerializer ### ### ASN.1 parsing code from tlslite def bytesToNumber(bytes): total = 0L for byte in bytes: total = (total << 8) + byte return total class ASN1Error(Exception): pass class ASN1Parser(object): class Parser(object): def __init__(self, bytes): self.bytes = bytes self.index = 0 def get(self, length): if self.index + length > len(self.bytes): raise ASN1Error("Error decoding ASN.1") x = 0 for count in range(length): x <<= 8 x |= self.bytes[self.index] self.index += 1 return x def getFixBytes(self, lengthBytes): bytes = self.bytes[self.index : self.index+lengthBytes] self.index += lengthBytes return bytes def getVarBytes(self, lengthLength): lengthBytes = self.get(lengthLength) return self.getFixBytes(lengthBytes) def getFixList(self, length, lengthList): l = [0] * lengthList for x in range(lengthList): l[x] = self.get(length) return l def getVarList(self, length, lengthLength): lengthList = self.get(lengthLength) if lengthList % length != 0: raise ASN1Error("Error decoding ASN.1") lengthList = int(lengthList/length) l = [0] * lengthList for x in range(lengthList): l[x] = self.get(length) return l def startLengthCheck(self, lengthLength): self.lengthCheck = self.get(lengthLength) self.indexCheck = self.index def setLengthCheck(self, length): self.lengthCheck = length self.indexCheck = self.index def stopLengthCheck(self): if (self.index - self.indexCheck) != self.lengthCheck: raise ASN1Error("Error decoding ASN.1") def atLengthCheck(self): if (self.index - self.indexCheck) < self.lengthCheck: return False elif (self.index - self.indexCheck) == self.lengthCheck: return True else: raise ASN1Error("Error decoding ASN.1") def __init__(self, bytes): p = self.Parser(bytes) p.get(1) self.length = self._getASN1Length(p) self.value = p.getFixBytes(self.length) def getChild(self, which): p = self.Parser(self.value) for x in range(which+1): markIndex = p.index p.get(1) length = self._getASN1Length(p) p.getFixBytes(length) return ASN1Parser(p.bytes[markIndex:p.index]) def _getASN1Length(self, p): firstLength = p.get(1) if firstLength<=127: return firstLength else: lengthLength = firstLength & 0x7F return p.get(lengthLength) ### ### PDF parsing routines from pdfminer, with changes for EBX_HANDLER ## Utilities ## def choplist(n, seq): '''Groups every n elements of the list.''' r = [] for x in seq: r.append(x) if len(r) == n: yield tuple(r) r = [] return def nunpack(s, default=0): '''Unpacks up to 4 bytes big endian.''' l = len(s) if not l: return default elif l == 1: return ord(s) elif l == 2: return struct.unpack('>H', s)[0] elif l == 3: return struct.unpack('>L', '\x00'+s)[0] elif l == 4: return struct.unpack('>L', s)[0] else: return TypeError('invalid length: %d' % l) STRICT = 0 ## PS Exceptions ## class PSException(Exception): pass class PSEOF(PSException): pass class PSSyntaxError(PSException): pass class PSTypeError(PSException): pass class PSValueError(PSException): pass ## Basic PostScript Types ## # PSLiteral class PSObject(object): pass class PSLiteral(PSObject): ''' PS literals (e.g. "/Name"). Caution: Never create these objects directly. Use PSLiteralTable.intern() instead. ''' def __init__(self, name): self.name = name return def __repr__(self): name = [] for char in self.name: if not char.isalnum(): char = '#%02x' % ord(char) name.append(char) return '/%s' % ''.join(name) # PSKeyword class PSKeyword(PSObject): ''' PS keywords (e.g. "showpage"). Caution: Never create these objects directly. Use PSKeywordTable.intern() instead. ''' def __init__(self, name): self.name = name return def __repr__(self): return self.name # PSSymbolTable class PSSymbolTable(object): ''' Symbol table that stores PSLiteral or PSKeyword. ''' def __init__(self, classe): self.dic = {} self.classe = classe return def intern(self, name): if name in self.dic: lit = self.dic[name] else: lit = self.classe(name) self.dic[name] = lit return lit PSLiteralTable = PSSymbolTable(PSLiteral) PSKeywordTable = PSSymbolTable(PSKeyword) LIT = PSLiteralTable.intern KWD = PSKeywordTable.intern KEYWORD_BRACE_BEGIN = KWD('{') KEYWORD_BRACE_END = KWD('}') KEYWORD_ARRAY_BEGIN = KWD('[') KEYWORD_ARRAY_END = KWD(']') KEYWORD_DICT_BEGIN = KWD('<<') KEYWORD_DICT_END = KWD('>>') def literal_name(x): if not isinstance(x, PSLiteral): if STRICT: raise PSTypeError('Literal required: %r' % x) else: return str(x) return x.name def keyword_name(x): if not isinstance(x, PSKeyword): if STRICT: raise PSTypeError('Keyword required: %r' % x) else: return str(x) return x.name ## PSBaseParser ## EOL = re.compile(r'[\r\n]') SPC = re.compile(r'\s') NONSPC = re.compile(r'\S') HEX = re.compile(r'[0-9a-fA-F]') END_LITERAL = re.compile(r'[#/%\[\]()<>{}\s]') END_HEX_STRING = re.compile(r'[^\s0-9a-fA-F]') HEX_PAIR = re.compile(r'[0-9a-fA-F]{2}|.') END_NUMBER = re.compile(r'[^0-9]') END_KEYWORD = re.compile(r'[#/%\[\]()<>{}\s]') END_STRING = re.compile(r'[()\134]') OCT_STRING = re.compile(r'[0-7]') ESC_STRING = { 'b':8, 't':9, 'n':10, 'f':12, 'r':13, '(':40, ')':41, '\\':92 } class PSBaseParser(object): ''' Most basic PostScript parser that performs only basic tokenization. ''' BUFSIZ = 4096 def __init__(self, fp): self.fp = fp self.seek(0) return def __repr__(self): return '' % (self.fp, self.bufpos) def flush(self): return def close(self): self.flush() return def tell(self): return self.bufpos+self.charpos def poll(self, pos=None, n=80): pos0 = self.fp.tell() if not pos: pos = self.bufpos+self.charpos self.fp.seek(pos) ##print >>sys.stderr, 'poll(%d): %r' % (pos, self.fp.read(n)) self.fp.seek(pos0) return def seek(self, pos): ''' Seeks the parser to the given position. ''' self.fp.seek(pos) # reset the status for nextline() self.bufpos = pos self.buf = '' self.charpos = 0 # reset the status for nexttoken() self.parse1 = self.parse_main self.tokens = [] return def fillbuf(self): if self.charpos < len(self.buf): return # fetch next chunk. self.bufpos = self.fp.tell() self.buf = self.fp.read(self.BUFSIZ) if not self.buf: raise PSEOF('Unexpected EOF') self.charpos = 0 return def parse_main(self, s, i): m = NONSPC.search(s, i) if not m: return (self.parse_main, len(s)) j = m.start(0) c = s[j] self.tokenstart = self.bufpos+j if c == '%': self.token = '%' return (self.parse_comment, j+1) if c == '/': self.token = '' return (self.parse_literal, j+1) if c in '-+' or c.isdigit(): self.token = c return (self.parse_number, j+1) if c == '.': self.token = c return (self.parse_float, j+1) if c.isalpha(): self.token = c return (self.parse_keyword, j+1) if c == '(': self.token = '' self.paren = 1 return (self.parse_string, j+1) if c == '<': self.token = '' return (self.parse_wopen, j+1) if c == '>': self.token = '' return (self.parse_wclose, j+1) self.add_token(KWD(c)) return (self.parse_main, j+1) def add_token(self, obj): self.tokens.append((self.tokenstart, obj)) return def parse_comment(self, s, i): m = EOL.search(s, i) if not m: self.token += s[i:] return (self.parse_comment, len(s)) j = m.start(0) self.token += s[i:j] # We ignore comments. #self.tokens.append(self.token) return (self.parse_main, j) def parse_literal(self, s, i): m = END_LITERAL.search(s, i) if not m: self.token += s[i:] return (self.parse_literal, len(s)) j = m.start(0) self.token += s[i:j] c = s[j] if c == '#': self.hex = '' return (self.parse_literal_hex, j+1) self.add_token(LIT(self.token)) return (self.parse_main, j) def parse_literal_hex(self, s, i): c = s[i] if HEX.match(c) and len(self.hex) < 2: self.hex += c return (self.parse_literal_hex, i+1) if self.hex: self.token += chr(int(self.hex, 16)) return (self.parse_literal, i) def parse_number(self, s, i): m = END_NUMBER.search(s, i) if not m: self.token += s[i:] return (self.parse_number, len(s)) j = m.start(0) self.token += s[i:j] c = s[j] if c == '.': self.token += c return (self.parse_float, j+1) try: self.add_token(int(self.token)) except ValueError: pass return (self.parse_main, j) def parse_float(self, s, i): m = END_NUMBER.search(s, i) if not m: self.token += s[i:] return (self.parse_float, len(s)) j = m.start(0) self.token += s[i:j] self.add_token(float(self.token)) return (self.parse_main, j) def parse_keyword(self, s, i): m = END_KEYWORD.search(s, i) if not m: self.token += s[i:] return (self.parse_keyword, len(s)) j = m.start(0) self.token += s[i:j] if self.token == 'true': token = True elif self.token == 'false': token = False else: token = KWD(self.token) self.add_token(token) return (self.parse_main, j) def parse_string(self, s, i): m = END_STRING.search(s, i) if not m: self.token += s[i:] return (self.parse_string, len(s)) j = m.start(0) self.token += s[i:j] c = s[j] if c == '\\': self.oct = '' return (self.parse_string_1, j+1) if c == '(': self.paren += 1 self.token += c return (self.parse_string, j+1) if c == ')': self.paren -= 1 if self.paren: self.token += c return (self.parse_string, j+1) self.add_token(self.token) return (self.parse_main, j+1) def parse_string_1(self, s, i): c = s[i] if OCT_STRING.match(c) and len(self.oct) < 3: self.oct += c return (self.parse_string_1, i+1) if self.oct: self.token += chr(int(self.oct, 8)) return (self.parse_string, i) if c in ESC_STRING: self.token += chr(ESC_STRING[c]) return (self.parse_string, i+1) def parse_wopen(self, s, i): c = s[i] if c.isspace() or HEX.match(c): return (self.parse_hexstring, i) if c == '<': self.add_token(KEYWORD_DICT_BEGIN) i += 1 return (self.parse_main, i) def parse_wclose(self, s, i): c = s[i] if c == '>': self.add_token(KEYWORD_DICT_END) i += 1 return (self.parse_main, i) def parse_hexstring(self, s, i): m = END_HEX_STRING.search(s, i) if not m: self.token += s[i:] return (self.parse_hexstring, len(s)) j = m.start(0) self.token += s[i:j] token = HEX_PAIR.sub(lambda m: chr(int(m.group(0), 16)), SPC.sub('', self.token)) self.add_token(token) return (self.parse_main, j) def nexttoken(self): while not self.tokens: self.fillbuf() (self.parse1, self.charpos) = self.parse1(self.buf, self.charpos) token = self.tokens.pop(0) return token def nextline(self): ''' Fetches a next line that ends either with \\r or \\n. ''' linebuf = '' linepos = self.bufpos + self.charpos eol = False while 1: self.fillbuf() if eol: c = self.buf[self.charpos] # handle '\r\n' if c == '\n': linebuf += c self.charpos += 1 break m = EOL.search(self.buf, self.charpos) if m: linebuf += self.buf[self.charpos:m.end(0)] self.charpos = m.end(0) if linebuf[-1] == '\r': eol = True else: break else: linebuf += self.buf[self.charpos:] self.charpos = len(self.buf) return (linepos, linebuf) def revreadlines(self): ''' Fetches a next line backword. This is used to locate the trailers at the end of a file. ''' self.fp.seek(0, 2) pos = self.fp.tell() buf = '' while 0 < pos: prevpos = pos pos = max(0, pos-self.BUFSIZ) self.fp.seek(pos) s = self.fp.read(prevpos-pos) if not s: break while 1: n = max(s.rfind('\r'), s.rfind('\n')) if n == -1: buf = s + buf break yield s[n:]+buf s = s[:n] buf = '' return ## PSStackParser ## class PSStackParser(PSBaseParser): def __init__(self, fp): PSBaseParser.__init__(self, fp) self.reset() return def reset(self): self.context = [] self.curtype = None self.curstack = [] self.results = [] return def seek(self, pos): PSBaseParser.seek(self, pos) self.reset() return def push(self, *objs): self.curstack.extend(objs) return def pop(self, n): objs = self.curstack[-n:] self.curstack[-n:] = [] return objs def popall(self): objs = self.curstack self.curstack = [] return objs def add_results(self, *objs): self.results.extend(objs) return def start_type(self, pos, type): self.context.append((pos, self.curtype, self.curstack)) (self.curtype, self.curstack) = (type, []) return def end_type(self, type): if self.curtype != type: raise PSTypeError('Type mismatch: %r != %r' % (self.curtype, type)) objs = [ obj for (_,obj) in self.curstack ] (pos, self.curtype, self.curstack) = self.context.pop() return (pos, objs) def do_keyword(self, pos, token): return def nextobject(self, direct=False): ''' Yields a list of objects: keywords, literals, strings, numbers, arrays and dictionaries. Arrays and dictionaries are represented as Python sequence and dictionaries. ''' while not self.results: (pos, token) = self.nexttoken() ##print (pos,token), (self.curtype, self.curstack) if (isinstance(token, int) or isinstance(token, float) or isinstance(token, bool) or isinstance(token, str) or isinstance(token, PSLiteral)): # normal token self.push((pos, token)) elif token == KEYWORD_ARRAY_BEGIN: # begin array self.start_type(pos, 'a') elif token == KEYWORD_ARRAY_END: # end array try: self.push(self.end_type('a')) except PSTypeError: if STRICT: raise elif token == KEYWORD_DICT_BEGIN: # begin dictionary self.start_type(pos, 'd') elif token == KEYWORD_DICT_END: # end dictionary try: (pos, objs) = self.end_type('d') if len(objs) % 2 != 0: raise PSSyntaxError( 'Invalid dictionary construct: %r' % objs) d = dict((literal_name(k), v) \ for (k,v) in choplist(2, objs)) self.push((pos, d)) except PSTypeError: if STRICT: raise else: self.do_keyword(pos, token) if self.context: continue else: if direct: return self.pop(1)[0] self.flush() obj = self.results.pop(0) return obj LITERAL_CRYPT = PSLiteralTable.intern('Crypt') LITERALS_FLATE_DECODE = (PSLiteralTable.intern('FlateDecode'), PSLiteralTable.intern('Fl')) LITERALS_LZW_DECODE = (PSLiteralTable.intern('LZWDecode'), PSLiteralTable.intern('LZW')) LITERALS_ASCII85_DECODE = (PSLiteralTable.intern('ASCII85Decode'), PSLiteralTable.intern('A85')) ## PDF Objects ## class PDFObject(PSObject): pass class PDFException(PSException): pass class PDFTypeError(PDFException): pass class PDFValueError(PDFException): pass class PDFNotImplementedError(PSException): pass ## PDFObjRef ## class PDFObjRef(PDFObject): def __init__(self, doc, objid, genno): if objid == 0: if STRICT: raise PDFValueError('PDF object id cannot be 0.') self.doc = doc self.objid = objid self.genno = genno return def __repr__(self): return '' % (self.objid, self.genno) def resolve(self): return self.doc.getobj(self.objid) # resolve def resolve1(x): ''' Resolve an object. If this is an array or dictionary, it may still contains some indirect objects inside. ''' while isinstance(x, PDFObjRef): x = x.resolve() return x def resolve_all(x): ''' Recursively resolve X and all the internals. Make sure there is no indirect reference within the nested object. This procedure might be slow. ''' while isinstance(x, PDFObjRef): x = x.resolve() if isinstance(x, list): x = [ resolve_all(v) for v in x ] elif isinstance(x, dict): for (k,v) in x.iteritems(): x[k] = resolve_all(v) return x def decipher_all(decipher, objid, genno, x): ''' Recursively decipher X. ''' if isinstance(x, str): return decipher(objid, genno, x) decf = lambda v: decipher_all(decipher, objid, genno, v) if isinstance(x, list): x = [decf(v) for v in x] elif isinstance(x, dict): x = dict((k, decf(v)) for (k, v) in x.iteritems()) return x # Type cheking def int_value(x): x = resolve1(x) if not isinstance(x, int): if STRICT: raise PDFTypeError('Integer required: %r' % x) return 0 return x def float_value(x): x = resolve1(x) if not isinstance(x, float): if STRICT: raise PDFTypeError('Float required: %r' % x) return 0.0 return x def num_value(x): x = resolve1(x) if not (isinstance(x, int) or isinstance(x, float)): if STRICT: raise PDFTypeError('Int or Float required: %r' % x) return 0 return x def str_value(x): x = resolve1(x) if not isinstance(x, str): if STRICT: raise PDFTypeError('String required: %r' % x) return '' return x def list_value(x): x = resolve1(x) if not (isinstance(x, list) or isinstance(x, tuple)): if STRICT: raise PDFTypeError('List required: %r' % x) return [] return x def dict_value(x): x = resolve1(x) if not isinstance(x, dict): if STRICT: raise PDFTypeError('Dict required: %r' % x) return {} return x def stream_value(x): x = resolve1(x) if not isinstance(x, PDFStream): if STRICT: raise PDFTypeError('PDFStream required: %r' % x) return PDFStream({}, '') return x # ascii85decode(data) def ascii85decode(data): n = b = 0 out = '' for c in data: if '!' <= c and c <= 'u': n += 1 b = b*85+(ord(c)-33) if n == 5: out += struct.pack('>L',b) n = b = 0 elif c == 'z': assert n == 0 out += '\0\0\0\0' elif c == '~': if n: for _ in range(5-n): b = b*85+84 out += struct.pack('>L',b)[:n-1] break return out ## PDFStream type class PDFStream(PDFObject): def __init__(self, dic, rawdata, decipher=None): length = int_value(dic.get('Length', 0)) eol = rawdata[length:] # quick and dirty fix for false length attribute, # might not work if the pdf stream parser has a problem if decipher != None and decipher.__name__ == 'decrypt_aes': if (len(rawdata) % 16) != 0: cutdiv = len(rawdata) // 16 rawdata = rawdata[:16*cutdiv] else: if eol in ('\r', '\n', '\r\n'): rawdata = rawdata[:length] self.dic = dic self.rawdata = rawdata self.decipher = decipher self.data = None self.decdata = None self.objid = None self.genno = None return def set_objid(self, objid, genno): self.objid = objid self.genno = genno return def __repr__(self): if self.rawdata: return '' % \ (self.objid, len(self.rawdata), self.dic) else: return '' % \ (self.objid, len(self.data), self.dic) def decode(self): assert self.data is None and self.rawdata is not None data = self.rawdata if self.decipher: # Handle encryption data = self.decipher(self.objid, self.genno, data) if gen_xref_stm: self.decdata = data # keep decrypted data if 'Filter' not in self.dic: self.data = data self.rawdata = None ##print self.dict return filters = self.dic['Filter'] if not isinstance(filters, list): filters = [ filters ] for f in filters: if f in LITERALS_FLATE_DECODE: # will get errors if the document is encrypted. data = zlib.decompress(data) elif f in LITERALS_LZW_DECODE: data = ''.join(LZWDecoder(StringIO(data)).run()) elif f in LITERALS_ASCII85_DECODE: data = ascii85decode(data) elif f == LITERAL_CRYPT: raise PDFNotImplementedError('/Crypt filter is unsupported') else: raise PDFNotImplementedError('Unsupported filter: %r' % f) # apply predictors if 'DP' in self.dic: params = self.dic['DP'] else: params = self.dic.get('DecodeParms', {}) if 'Predictor' in params: pred = int_value(params['Predictor']) if pred: if pred != 12: raise PDFNotImplementedError( 'Unsupported predictor: %r' % pred) if 'Columns' not in params: raise PDFValueError( 'Columns undefined for predictor=12') columns = int_value(params['Columns']) buf = '' ent0 = '\x00' * columns for i in xrange(0, len(data), columns+1): pred = data[i] ent1 = data[i+1:i+1+columns] if pred == '\x02': ent1 = ''.join(chr((ord(a)+ord(b)) & 255) \ for (a,b) in zip(ent0,ent1)) buf += ent1 ent0 = ent1 data = buf self.data = data self.rawdata = None return def get_data(self): if self.data is None: self.decode() return self.data def get_rawdata(self): return self.rawdata def get_decdata(self): if self.decdata is not None: return self.decdata data = self.rawdata if self.decipher and data: # Handle encryption data = self.decipher(self.objid, self.genno, data) return data ## PDF Exceptions ## class PDFSyntaxError(PDFException): pass class PDFNoValidXRef(PDFSyntaxError): pass class PDFEncryptionError(PDFException): pass class PDFPasswordIncorrect(PDFEncryptionError): pass # some predefined literals and keywords. LITERAL_OBJSTM = PSLiteralTable.intern('ObjStm') LITERAL_XREF = PSLiteralTable.intern('XRef') LITERAL_PAGE = PSLiteralTable.intern('Page') LITERAL_PAGES = PSLiteralTable.intern('Pages') LITERAL_CATALOG = PSLiteralTable.intern('Catalog') ## XRefs ## ## PDFXRef ## class PDFXRef(object): def __init__(self): self.offsets = None return def __repr__(self): return '' % len(self.offsets) def objids(self): return self.offsets.iterkeys() def load(self, parser): self.offsets = {} while 1: try: (pos, line) = parser.nextline() except PSEOF: raise PDFNoValidXRef('Unexpected EOF - file corrupted?') if not line: raise PDFNoValidXRef('Premature eof: %r' % parser) if line.startswith('trailer'): parser.seek(pos) break f = line.strip().split(' ') if len(f) != 2: raise PDFNoValidXRef('Trailer not found: %r: line=%r' % (parser, line)) try: (start, nobjs) = map(int, f) except ValueError: raise PDFNoValidXRef('Invalid line: %r: line=%r' % (parser, line)) for objid in xrange(start, start+nobjs): try: (_, line) = parser.nextline() except PSEOF: raise PDFNoValidXRef('Unexpected EOF - file corrupted?') f = line.strip().split(' ') if len(f) != 3: raise PDFNoValidXRef('Invalid XRef format: %r, line=%r' % (parser, line)) (pos, genno, use) = f if use != 'n': continue self.offsets[objid] = (int(genno), int(pos)) self.load_trailer(parser) return KEYWORD_TRAILER = PSKeywordTable.intern('trailer') def load_trailer(self, parser): try: (_,kwd) = parser.nexttoken() assert kwd is self.KEYWORD_TRAILER (_,dic) = parser.nextobject(direct=True) except PSEOF: x = parser.pop(1) if not x: raise PDFNoValidXRef('Unexpected EOF - file corrupted') (_,dic) = x[0] self.trailer = dict_value(dic) return def getpos(self, objid): try: (genno, pos) = self.offsets[objid] except KeyError: raise return (None, pos) ## PDFXRefStream ## class PDFXRefStream(object): def __init__(self): self.index = None self.data = None self.entlen = None self.fl1 = self.fl2 = self.fl3 = None return def __repr__(self): return '' % self.index def objids(self): for first, size in self.index: for objid in xrange(first, first + size): yield objid def load(self, parser, debug=0): (_,objid) = parser.nexttoken() # ignored (_,genno) = parser.nexttoken() # ignored (_,kwd) = parser.nexttoken() (_,stream) = parser.nextobject() if not isinstance(stream, PDFStream) or \ stream.dic['Type'] is not LITERAL_XREF: raise PDFNoValidXRef('Invalid PDF stream spec.') size = stream.dic['Size'] index = stream.dic.get('Index', (0,size)) self.index = zip(islice(index, 0, None, 2), islice(index, 1, None, 2)) (self.fl1, self.fl2, self.fl3) = stream.dic['W'] self.data = stream.get_data() self.entlen = self.fl1+self.fl2+self.fl3 self.trailer = stream.dic return def getpos(self, objid): offset = 0 for first, size in self.index: if first <= objid and objid < (first + size): break offset += size else: raise KeyError(objid) i = self.entlen * ((objid - first) + offset) ent = self.data[i:i+self.entlen] f1 = nunpack(ent[:self.fl1], 1) if f1 == 1: pos = nunpack(ent[self.fl1:self.fl1+self.fl2]) genno = nunpack(ent[self.fl1+self.fl2:]) return (None, pos) elif f1 == 2: objid = nunpack(ent[self.fl1:self.fl1+self.fl2]) index = nunpack(ent[self.fl1+self.fl2:]) return (objid, index) # this is a free object raise KeyError(objid) ## PDFDocument ## ## A PDFDocument object represents a PDF document. ## Since a PDF file is usually pretty big, normally it is not loaded ## at once. Rather it is parsed dynamically as processing goes. ## A PDF parser is associated with the document. ## class PDFDocument(object): def __init__(self): self.xrefs = [] self.objs = {} self.parsed_objs = {} self.root = None self.catalog = None self.parser = None self.encryption = None self.decipher = None # dictionaries for fileopen self.fileopen = {} self.urlresult = {} self.ready = False return # set_parser(parser) # Associates the document with an (already initialized) parser object. def set_parser(self, parser): if self.parser: return self.parser = parser # The document is set to be temporarily ready during collecting # all the basic information about the document, e.g. # the header, the encryption information, and the access rights # for the document. self.ready = True # Retrieve the information of each header that was appended # (maybe multiple times) at the end of the document. self.xrefs = parser.read_xref() for xref in self.xrefs: trailer = xref.trailer if not trailer: continue # If there's an encryption info, remember it. if 'Encrypt' in trailer: #assert not self.encryption try: self.encryption = (list_value(trailer['ID']), dict_value(trailer['Encrypt'])) # fix for bad files except: self.encryption = ('ffffffffffffffffffffffffffffffffffff', dict_value(trailer['Encrypt'])) if 'Root' in trailer: self.set_root(dict_value(trailer['Root'])) break else: raise PDFSyntaxError('No /Root object! - Is this really a PDF?') # The document is set to be non-ready again, until all the # proper initialization (asking the password key and # verifying the access permission, so on) is finished. self.ready = False return # set_root(root) # Set the Root dictionary of the document. # Each PDF file must have exactly one /Root dictionary. def set_root(self, root): self.root = root self.catalog = dict_value(self.root) if self.catalog.get('Type') is not LITERAL_CATALOG: if STRICT: raise PDFSyntaxError('Catalog not found!') return # initialize(password='') # Perform the initialization with a given password. # This step is mandatory even if there's no password associated # with the document. def initialize(self, password=''): if not self.encryption: self.is_printable = self.is_modifiable = self.is_extractable = True self.ready = True return (docid, param) = self.encryption type = literal_name(param['Filter']) if type == 'Adobe.APS': return self.initialize_adobe_ps(password, docid, param) if type == 'Standard': return self.initialize_standard(password, docid, param) if type == 'EBX_HANDLER': return self.initialize_ebx(password, docid, param) if type == 'FOPN_fLock': # remove of unnecessairy password attribute return self.initialize_fopn_flock(docid, param) if type == 'FOPN_foweb': # remove of unnecessairy password attribute return self.initialize_fopn(docid, param) raise PDFEncryptionError('Unknown filter: param=%r' % param) def initialize_adobe_ps(self, password, docid, param): global KEYFILEPATH self.decrypt_key = self.genkey_adobe_ps(param) self.genkey = self.genkey_v4 self.decipher = self.decrypt_aes self.ready = True return def getPrincipalKey(self, k=None, url=None, referer=None): if url == None: url="ssl://edc.bibliothek-digital.de/edcws/services/urn:EDCLicenseService" data1='<wsse:Security '+\ 'xmlns:wsse="http://docs.oasis-open.org/wss/2004/01/oasis-200401-wss-wssecurity-secext-'+\ '1.0.xsd"><wsse:UsernameToken><wsse:Username>edc_anonymous</wsse:Username&'+\ 'gt;<wsse:Password Type="http://docs.oasis-open.org/wss/2004/01/oasis-200401-wss-username-'+\ 'token-profile-1.0#PasswordText">edc_anonymous</wsse:Password></wsse:UsernameToken&'+\ 'gt;</wsse:Security>7de-de'+\ '1010<'+\ 'watermarkTemplateSeqNum>0' if k not in url[:40]: return None #~ extract host and path: host=re.compile(r'[a-zA-Z]://([^/]+)/.+', re.I).search(url).group(1) urlpath=re.compile(r'[a-zA-Z]://[^/]+(/.+)', re.I).search(url).group(1) # open a socket connection on port 80 conn = httplib.HTTPSConnection(host, 443) #~ Headers for request headers={"Accept": "*/*", "Host": host, "User-Agent": "Mozilla/3.0 (compatible; Acrobat EDC SOAP 1.0)", "Content-Type": "text/xml; charset=utf-8", "Cache-Control": "no-cache", "SOAPAction": ""} # send data1 and headers try: conn.request("POST", urlpath, data1, headers) except: raise ADEPTError("Could not post request to '"+host+"'.") # read respose try: response = conn.getresponse() responsedata=response.read() except: raise ADEPTError("Could not read response from '"+host+"'.") # close connection conn.close() try: key=re.compile(r'PricipalKey"((?!).)*]*>(((?!).)*)', re.I).search(responsedata).group(2) except : key=None return key def genkey_adobe_ps(self, param): # nice little offline principal keys dictionary principalkeys = { 'bibliothek-digital.de': 'Dzqx8McQUNd2CDzBVmtnweUxVWlqJTMqyYtiDIc4dZI='.decode('base64')} for k, v in principalkeys.iteritems(): result = self.getPrincipalKey(k) #print result if result != None: principalkeys[k] = result.decode('base64') else: raise ADEPTError("No (Online) PrincipalKey found.") self.is_printable = self.is_modifiable = self.is_extractable = True ## print 'keyvalue' ## print len(keyvalue) ## print keyvalue.encode('hex') length = int_value(param.get('Length', 0)) / 8 edcdata = str_value(param.get('EDCData')).decode('base64') pdrllic = str_value(param.get('PDRLLic')).decode('base64') pdrlpol = str_value(param.get('PDRLPol')).decode('base64') #print 'ecd rights' edclist = [] for pair in edcdata.split('\n'): edclist.append(pair) ## print edclist ## print 'edcdata decrypted' ## print edclist[0].decode('base64').encode('hex') ## print edclist[1].decode('base64').encode('hex') ## print edclist[2].decode('base64').encode('hex') ## print edclist[3].decode('base64').encode('hex') ## print 'offlinekey' ## print len(edclist[9].decode('base64')) ## print pdrllic # principal key request for key in principalkeys: if key in pdrllic: principalkey = principalkeys[key] else: raise ADEPTError('Cannot find principal key for this pdf') ## print 'minorversion' ## print int(edclist[8]) # fix for minor version ## minorversion = int(edclist[8]) - 100 ## if minorversion < 1: ## minorversion = 1 ## print int(minorversion) shakey = SHA256.new() shakey.update(principalkey) ## for i in range(0,minorversion): ## shakey.update(principalkey) shakey = shakey.digest() ## shakey = SHA256.new(principalkey).digest() ivector = 16 * chr(0) #print shakey plaintext = AES.new(shakey,AES.MODE_CBC,ivector).decrypt(edclist[9].decode('base64')) if plaintext[-16:] != 16 * chr(16): raise ADEPTError('Offlinekey cannot be decrypted, aborting (hint: redownload pdf) ...') pdrlpol = AES.new(plaintext[16:32],AES.MODE_CBC,edclist[2].decode('base64')).decrypt(pdrlpol) if ord(pdrlpol[-1]) < 1 or ord(pdrlpol[-1]) > 16: raise ADEPTError('Could not decrypt PDRLPol, aborting ...') else: cutter = -1 * ord(pdrlpol[-1]) #print cutter pdrlpol = pdrlpol[:cutter] #print plaintext.encode('hex') #print 'pdrlpol' #print pdrlpol return plaintext[:16] PASSWORD_PADDING = '(\xbfN^Nu\x8aAd\x00NV\xff\xfa\x01\x08..' \ '\x00\xb6\xd0h>\x80/\x0c\xa9\xfedSiz' # experimental aes pw support def initialize_standard(self, password, docid, param): # copy from a global variable V = int_value(param.get('V', 0)) if (V <=0 or V > 4): raise PDFEncryptionError('Unknown algorithm: param=%r' % param) length = int_value(param.get('Length', 40)) # Key length (bits) O = str_value(param['O']) R = int_value(param['R']) # Revision if 5 <= R: raise PDFEncryptionError('Unknown revision: %r' % R) U = str_value(param['U']) P = int_value(param['P']) try: EncMetadata = str_value(param['EncryptMetadata']) except: EncMetadata = 'True' self.is_printable = bool(P & 4) self.is_modifiable = bool(P & 8) self.is_extractable = bool(P & 16) self.is_annotationable = bool(P & 32) self.is_formsenabled = bool(P & 256) self.is_textextractable = bool(P & 512) self.is_assemblable = bool(P & 1024) self.is_formprintable = bool(P & 2048) # Algorithm 3.2 password = (password+self.PASSWORD_PADDING)[:32] # 1 hash = hashlib.md5(password) # 2 hash.update(O) # 3 hash.update(struct.pack('= 3: # Algorithm 3.5 hash = hashlib.md5(self.PASSWORD_PADDING) # 2 hash.update(docid[0]) # 3 x = ARC4.new(key).decrypt(hash.digest()[:16]) # 4 for i in xrange(1,19+1): k = ''.join( chr(ord(c) ^ i) for c in key ) x = ARC4.new(k).decrypt(x) u1 = x+x # 32bytes total if R == 2: is_authenticated = (u1 == U) else: is_authenticated = (u1[:16] == U[:16]) if not is_authenticated: raise ADEPTError('Password is not correct.') ## raise PDFPasswordIncorrect self.decrypt_key = key # genkey method if V == 1 or V == 2: self.genkey = self.genkey_v2 elif V == 3: self.genkey = self.genkey_v3 elif V == 4: self.genkey = self.genkey_v2 #self.genkey = self.genkey_v3 if V == 3 else self.genkey_v2 # rc4 if V != 4: self.decipher = self.decipher_rc4 # XXX may be AES # aes elif V == 4 and Length == 128: elf.decipher = self.decipher_aes elif V == 4 and Length == 256: raise PDFNotImplementedError('AES256 encryption is currently unsupported') self.ready = True return def initialize_ebx(self, password, docid, param): global KEYFILEPATH self.is_printable = self.is_modifiable = self.is_extractable = True # keyfile path is wrong if KEYFILEPATH == False: errortext = 'Cannot find adeptkey.der keyfile. Use ineptkey to generate it.' raise ADEPTError(errortext) with open(password, 'rb') as f: keyder = f.read() # KEYFILEPATH = '' key = ASN1Parser([ord(x) for x in keyder]) key = [bytesToNumber(key.getChild(x).value) for x in xrange(1, 4)] rsa = RSA.construct(key) length = int_value(param.get('Length', 0)) / 8 rights = str_value(param.get('ADEPT_LICENSE')).decode('base64') rights = zlib.decompress(rights, -15) rights = etree.fromstring(rights) expr = './/{http://ns.adobe.com/adept}encryptedKey' bookkey = ''.join(rights.findtext(expr)).decode('base64') bookkey = rsa.decrypt(bookkey) if bookkey[0] != '\x02': raise ADEPTError('error decrypting book session key') index = bookkey.index('\0') + 1 bookkey = bookkey[index:] ebx_V = int_value(param.get('V', 4)) ebx_type = int_value(param.get('EBX_ENCRYPTIONTYPE', 6)) # added because of the booktype / decryption book session key error if ebx_V == 3: V = 3 elif ebx_V < 4 or ebx_type < 6: V = ord(bookkey[0]) bookkey = bookkey[1:] else: V = 2 if length and len(bookkey) != length: raise ADEPTError('error decrypting book session key') self.decrypt_key = bookkey self.genkey = self.genkey_v3 if V == 3 else self.genkey_v2 self.decipher = self.decrypt_rc4 self.ready = True return # fileopen support def initialize_fopn_flock(self, docid, param): raise ADEPTError('FOPN_fLock not supported, yet ...') # debug mode processing global DEBUG_MODE global IVERSION if DEBUG_MODE == True: if os.access('.',os.W_OK) == True: debugfile = open('ineptpdf-'+IVERSION+'-debug.txt','w') else: raise ADEPTError('Cannot write debug file, current directory is not writable') self.is_printable = self.is_modifiable = self.is_extractable = True # get parameters and add it to the fo dictionary self.fileopen['V'] = int_value(param.get('V',2)) # crypt base (docid, param) = self.encryption #rights = dict_value(param['Info']) rights = param['Info'] #print rights if DEBUG_MODE == True: debugfile.write(rights + '\n\n') ## for pair in rights.split(';'): ## try: ## key, value = pair.split('=',1) ## self.fileopen[key] = value ## # fix for some misconfigured INFO variables ## except: ## pass ## kattr = { 'SVID': 'ServiceID', 'DUID': 'DocumentID', 'I3ID': 'Ident3ID', \ ## 'I4ID': 'Ident4ID', 'VERS': 'EncrVer', 'PRID': 'USR'} ## for keys in kattr: ## try: ## self.fileopen[kattr[keys]] = self.fileopen[keys] ## del self.fileopen[keys] ## except: ## continue # differentiate OS types ## sysplatform = sys.platform ## # if ostype is Windows ## if sysplatform=='win32': ## self.osuseragent = 'Windows NT 6.0' ## self.get_macaddress = self.get_win_macaddress ## self.fo_sethwids = self.fo_win_sethwids ## self.BrowserCookie = WinBrowserCookie ## elif sysplatform=='linux2': ## adeptout = 'Linux is not supported, yet.\n' ## raise ADEPTError(adeptout) ## self.osuseragent = 'Linux i686' ## self.get_macaddress = self.get_linux_macaddress ## self.fo_sethwids = self.fo_linux_sethwids ## else: ## adeptout = '' ## adeptout = adeptout + 'Due to various privacy violations from Apple\n' ## adeptout = adeptout + 'Mac OS X support is disabled by default.' ## raise ADEPTError(adeptout) ## # add static arguments for http/https request ## self.fo_setattributes() ## # add hardware specific arguments for http/https request ## self.fo_sethwids() ## ## if 'Code' in self.urlresult: ## if self.fileopen['Length'] == len(self.urlresult['Code']): ## self.decrypt_key = self.urlresult['Code'] ## else: ## self.decrypt_key = self.urlresult['Code'].decode('hex') ## else: ## raise ADEPTError('Cannot find decryption key.') self.decrypt_key = 'stuff' self.genkey = self.genkey_v2 self.decipher = self.decrypt_rc4 self.ready = True return def initialize_fopn(self, docid, param): # debug mode processing global DEBUG_MODE global IVERSION if DEBUG_MODE == True: if os.access('.',os.W_OK) == True: debugfile = open('ineptpdf-'+IVERSION+'-debug.txt','w') else: raise ADEPTError('Cannot write debug file, current directory is not writable') self.is_printable = self.is_modifiable = self.is_extractable = True # get parameters and add it to the fo dictionary self.fileopen['Length'] = int_value(param.get('Length', 0)) / 8 self.fileopen['VEID'] = str_value(param.get('VEID')) self.fileopen['BUILD'] = str_value(param.get('BUILD')) self.fileopen['SVID'] = str_value(param.get('SVID')) self.fileopen['DUID'] = str_value(param.get('DUID')) self.fileopen['V'] = int_value(param.get('V',2)) # crypt base rights = str_value(param.get('INFO')).decode('base64') rights = self.genkey_fileopeninfo(rights) if DEBUG_MODE == True: debugfile.write(rights + '\n\n') for pair in rights.split(';'): try: key, value = pair.split('=',1) self.fileopen[key] = value # fix for some misconfigured INFO variables except: pass kattr = { 'SVID': 'ServiceID', 'DUID': 'DocumentID', 'I3ID': 'Ident3ID', \ 'I4ID': 'Ident4ID', 'VERS': 'EncrVer', 'PRID': 'USR'} for keys in kattr: # fishing some misconfigured slashs out of it try: self.fileopen[kattr[keys]] = urllib.quote(self.fileopen[keys],safe='') del self.fileopen[keys] except: continue # differentiate OS types sysplatform = sys.platform # if ostype is Windows if sysplatform=='win32': self.osuseragent = 'Windows NT 6.0' self.get_macaddress = self.get_win_macaddress self.fo_sethwids = self.fo_win_sethwids self.BrowserCookie = WinBrowserCookie elif sysplatform=='linux2': adeptout = 'Linux is not supported, yet.\n' raise ADEPTError(adeptout) self.osuseragent = 'Linux i686' self.get_macaddress = self.get_linux_macaddress self.fo_sethwids = self.fo_linux_sethwids else: adeptout = '' adeptout = adeptout + 'Mac OS X is not supported, yet.' adeptout = adeptout + 'Read the blogs FAQs for more information' raise ADEPTError(adeptout) # add static arguments for http/https request self.fo_setattributes() # add hardware specific arguments for http/https request self.fo_sethwids() #if DEBUG_MODE == True: debugfile.write(self.fileopen) if 'UURL' in self.fileopen: buildurl = self.fileopen['UURL'] else: buildurl = self.fileopen['PURL'] # fix for bad DPRM structure if self.fileopen['DPRM'][0] != r'/': self.fileopen['DPRM'] = r'/' + self.fileopen['DPRM'] # genius fix for bad server urls (IMHO) if '?' in self.fileopen['DPRM']: buildurl = buildurl + self.fileopen['DPRM'] + '&' else: buildurl = buildurl + self.fileopen['DPRM'] + '?' # debug customization #self.fileopen['Machine'] = '' #self.fileopen['Disk'] = '' surl = ( 'Stamp', 'Mode', 'USR', 'ServiceID', 'DocumentID',\ 'Ident3ID', 'Ident4ID','DocStrFmt', 'OSType', 'OSName', 'OSData', 'Language',\ 'LngLCID', 'LngRFC1766', 'LngISO4Char', 'Build', 'ProdVer', 'EncrVer',\ 'Machine', 'Disk', 'Uuid', 'PrevMach', 'PrevDisk',\ 'FormHFT',\ 'SelServer', 'AcroVersion', 'AcroProduct', 'AcroReader',\ 'AcroCanEdit', 'AcroPrefIDib', 'InBrowser', 'CliAppName',\ 'DocIsLocal', 'DocPathUrl', 'VolName', 'VolType', 'VolSN',\ 'FSName', 'FowpKbd', 'OSBuild',\ 'RequestSchema') #settings request and special modes if 'EVER' in self.fileopen and float(self.fileopen['EVER']) < 3.8: self.fileopen['Mode'] = 'ICx' origurl = buildurl buildurl = buildurl + 'Request=Setting' for keys in surl: try: buildurl = buildurl + '&' + keys + '=' + self.fileopen[keys] except: continue if DEBUG_MODE == True: debugfile.write( 'settings url:\n') if DEBUG_MODE == True: debugfile.write( buildurl+'\n\n') # custom user agent identification? if 'AGEN' in self.fileopen: useragent = self.fileopen['AGEN'] urllib.URLopener.version = useragent # attribute doesn't exist - take the default user agent else: urllib.URLopener.version = self.osuseragent # try to open the url try: u = urllib.urlopen(buildurl) u.geturl() result = u.read() except: raise ADEPTError('No internet connection or a blocking firewall!') ## finally: ## u.close() # getting rid of the line feed if DEBUG_MODE == True: debugfile.write('Settings'+'\n') if DEBUG_MODE == True: debugfile.write(result+'\n\n') #get rid of unnecessary characters result = result.rstrip('\n') result = result.rstrip(chr(13)) result = result.lstrip('\n') result = result.lstrip(chr(13)) self.surlresult = {} for pair in result.split('&'): try: key, value = pair.split('=',1) # fix for bad server response if key not in self.surlresult: self.surlresult[key] = value except: pass if 'RequestSchema' in self.surlresult: self.fileopen['RequestSchema'] = self.surlresult['RequestSchema'] if 'ServerSessionData' in self.surlresult: self.fileopen['ServerSessionData'] = self.surlresult['ServerSessionData'] if 'SetScope' in self.surlresult: self.fileopen['RequestSchema'] = self.surlresult['SetScope'] #print self.surlresult if 'RetVal' in self.surlresult and 'SEMO' not in self.fileopen and(('Reason' in self.surlresult and \ self.surlresult['Reason'] == 'AskUnp') or ('SetTarget' in self.surlresult and\ self.surlresult['SetTarget'] == 'UnpDlg')): # get user and password dialog try: self.gen_pw_dialog(self.surlresult['UnpUiName'], self.surlresult['UnpUiPass'],\ self.surlresult['UnpUiTitle'], self.surlresult['UnpUiOk'],\ self.surlresult['UnpUiSunk'], self.surlresult['UnpUiComm']) except: self.gen_pw_dialog() # the fileopen check might not be always right because of strange server responses if 'SEMO' in self.fileopen and (self.fileopen['SEMO'] == '1'\ or self.fileopen['SEMO'] == '2') and ('CSES' in self.fileopen and\ self.fileopen['CSES'] != 'fileopen'): # get the url name for the cookie(s) if 'CURL' in self.fileopen: self.surl = self.fileopen['CURL'] if 'CSES' in self.fileopen: self.cses = self.fileopen['CSES'] elif 'PHOS' in self.fileopen: self.surl = self.fileopen['PHOS'] elif 'LHOS' in self.fileopen: self.surl = self.fileopen['LHOS'] else: raise ADEPTError('unknown Cookie name.\n Check ineptpdf forum for further assistance') self.pwfieldreq = 1 # session cookie processing if self.fileopen['SEMO'] == '1': cookies = self.BrowserCookie() #print self.cses #print self.surl csession = cookies.getcookie(self.cses,self.surl) if csession != None: self.fileopen['Session'] = csession self.gui = False # fallback else: self.pwtk = Tkinter.Tk() self.pwtk.title('Ineptpdf8') self.pwtk.minsize(150, 0) infotxt1 = 'Get the session cookie key manually (Firefox step-by-step:\n'+\ 'Start Firefox -> Tools -> Options -> Privacy -> Show Cookies\n'+\ '-> Search for a cookie from ' + self.surl +' with the\n'+\ 'name ' + self.cses +' and copy paste the content field in the\n'+\ 'Session Content field. Remove possible spaces or new lines at the '+\ 'end\n (cursor must be blinking right behind the last character)' self.label0 = Tkinter.Label(self.pwtk, text=infotxt1) self.label0.pack() self.label1 = Tkinter.Label(self.pwtk, text="Session Content") self.pwfieldreq = 0 self.gui = True # user cookie processing elif self.fileopen['SEMO'] == '2': cookies = self.BrowserCookie() #print self.cses #print self.surl name = cookies.getcookie('name',self.surl) passw = cookies.getcookie('pass',self.surl) if name != None or passw != None: self.fileopen['UserName'] = urllib.quote(name) self.fileopen['UserPass'] = urllib.quote(passw) self.gui = False # fallback else: self.pwtk = Tkinter.Tk() self.pwtk.title('Ineptpdf8') self.pwtk.minsize(150, 0) self.label1 = Tkinter.Label(self.pwtk, text="Username") infotxt1 = 'Get the user cookie keys manually (Firefox step-by-step:\n'+\ 'Start Firefox -> Tools -> Options -> Privacy -> Show Cookies\n'+\ '-> Search for cookies from ' + self.surl +' with the\n'+\ 'name name in the user field and copy paste the content field in the\n'+\ 'username field. Do the same with the name pass in the password field).' self.label0 = Tkinter.Label(self.pwtk, text=infotxt1) self.label0.pack() self.pwfieldreq = 1 self.gui = True ## else: ## self.pwtk = Tkinter.Tk() ## self.pwtk.title('Ineptpdf8') ## self.pwtk.minsize(150, 0) ## self.pwfieldreq = 0 ## self.label1 = Tkinter.Label(self.pwtk, text="Username") ## self.pwfieldreq = 1 ## self.gui = True if self.gui == True: self.un_entry = Tkinter.Entry(self.pwtk) # cursor here self.un_entry.focus() self.label2 = Tkinter.Label(self.pwtk, text="Password") self.pw_entry = Tkinter.Entry(self.pwtk, show="*") self.button = Tkinter.Button(self.pwtk, text='Go for it!', command=self.fo_save_values) # widget layout, stack vertical self.label1.pack() self.un_entry.pack() # create a password label and field if self.pwfieldreq == 1: self.label2.pack() self.pw_entry.pack() self.button.pack() self.pwtk.update() # start the event loop self.pwtk.mainloop() # original request # drive through tupple for building the permission url burl = ( 'Stamp', 'Mode', 'USR', 'ServiceID', 'DocumentID',\ 'Ident3ID', 'Ident4ID','DocStrFmt', 'OSType', 'Language',\ 'LngLCID', 'LngRFC1766', 'LngISO4Char', 'Build', 'ProdVer', 'EncrVer',\ 'Machine', 'Disk', 'Uuid', 'PrevMach', 'PrevDisk', 'User', 'SaUser', 'SaSID',\ # special security measures 'HostIsDomain', 'PhysHostname', 'LogiHostname', 'SaRefDomain',\ 'FormHFT', 'UserName', 'UserPass', 'Session', \ 'SelServer', 'AcroVersion', 'AcroProduct', 'AcroReader',\ 'AcroCanEdit', 'AcroPrefIDib', 'InBrowser', 'CliAppName',\ 'DocIsLocal', 'DocPathUrl', 'VolName', 'VolType', 'VolSN',\ 'FSName', 'ServerSessionData', 'FowpKbd', 'OSBuild', \ 'DocumentSessionData', 'RequestSchema') buildurl = origurl buildurl = buildurl + 'Request=DocPerm' for keys in burl: try: buildurl = buildurl + '&' + keys + '=' + self.fileopen[keys] except: continue if DEBUG_MODE == True: debugfile.write('1st url:'+'\n') if DEBUG_MODE == True: debugfile.write(buildurl+'\n\n') # custom user agent identification? if 'AGEN' in self.fileopen: useragent = self.fileopen['AGEN'] urllib.URLopener.version = useragent # attribute doesn't exist - take the default user agent else: urllib.URLopener.version = self.osuseragent # try to open the url try: u = urllib.urlopen(buildurl) u.geturl() result = u.read() except: raise ADEPTError('No internet connection or a blocking firewall!') ## finally: ## u.close() # getting rid of the line feed if DEBUG_MODE == True: debugfile.write('1st preresult'+'\n') if DEBUG_MODE == True: debugfile.write(result+'\n\n') #get rid of unnecessary characters result = result.rstrip('\n') result = result.rstrip(chr(13)) result = result.lstrip('\n') result = result.lstrip(chr(13)) self.urlresult = {} for pair in result.split('&'): try: key, value = pair.split('=',1) self.urlresult[key] = value except: pass ## if 'RequestSchema' in self.surlresult: ## self.fileopen['RequestSchema'] = self.urlresult['RequestSchema'] #self.urlresult #result[0:8] == 'RetVal=1') or (result[0:8] == 'RetVal=2'): if ('RetVal' in self.urlresult and (self.urlresult['RetVal'] != '1' and \ self.urlresult['RetVal'] != '2' and \ self.urlresult['RetVal'] != 'Update' and \ self.urlresult['RetVal'] != 'Answer')): if ('Reason' in self.urlresult and (self.urlresult['Reason'] == 'BadUserPwd'\ or self.urlresult['Reason'] == 'AskUnp')) or ('SwitchTo' in self.urlresult\ and (self.urlresult['SwitchTo'] == 'Dialog')): if 'ServerSessionData' in self.urlresult: self.fileopen['ServerSessionData'] = self.urlresult['ServerSessionData'] if 'DocumentSessionData' in self.urlresult: self.fileopen['DocumentSessionData'] = self.urlresult['DocumentSessionData'] buildurl = origurl buildurl = buildurl + 'Request=DocPerm' self.gen_pw_dialog() # password not found - fallback for keys in burl: try: buildurl = buildurl + '&' + keys + '=' + self.fileopen[keys] except: continue if DEBUG_MODE == True: debugfile.write( '2ndurl:') if DEBUG_MODE == True: debugfile.write( buildurl+'\n\n') # try to open the url try: u = urllib.urlopen(buildurl) u.geturl() result = u.read() except: raise ADEPTError('No internet connection or a blocking firewall!') # getting rid of the line feed if DEBUG_MODE == True: debugfile.write( '2nd preresult') if DEBUG_MODE == True: debugfile.write( result+'\n\n') #get rid of unnecessary characters result = result.rstrip('\n') result = result.rstrip(chr(13)) result = result.lstrip('\n') result = result.lstrip(chr(13)) self.urlresult = {} for pair in result.split('&'): try: key, value = pair.split('=',1) self.urlresult[key] = value except: pass # did it work? if ('RetVal' in self.urlresult and (self.urlresult['RetVal'] != '1' and \ self.urlresult['RetVal'] != '2' and self.urlresult['RetVal'] != 'Update' and \ self.urlresult['RetVal'] != 'Answer')): raise ADEPTError('Decryption was not successfull.\nReason: ' + self.urlresult['Error']) # fix for non-standard-conform fileopen pdfs ## if self.fileopen['Length'] != 5 and self.fileopen['Length'] != 16: ## if self.fileopen['V'] == 1: ## self.fileopen['Length'] = 5 ## else: ## self.fileopen['Length'] = 16 # patch for malformed pdfs #print len(self.urlresult['Code']) #print self.urlresult['Code'].encode('hex') if 'code' in self.urlresult: self.urlresult['Code'] = self.urlresult['code'] if 'Code' in self.urlresult: if len(self.urlresult['Code']) == 5 or len(self.urlresult['Code']) == 16: self.decrypt_key = self.urlresult['Code'] else: self.decrypt_key = self.urlresult['Code'].decode('hex') else: raise ADEPTError('Cannot find decryption key.') self.genkey = self.genkey_v2 self.decipher = self.decrypt_rc4 self.ready = True return def gen_pw_dialog(self, Username='Username', Password='Password', Title='User/Password Authentication',\ OK='Proceed', Text1='Authorization', Text2='Enter Required Data'): self.pwtk = Tkinter.Tk() self.pwtk.title(Title) self.pwtk.minsize(150, 0) self.label1 = Tkinter.Label(self.pwtk, text=Text1) self.label2 = Tkinter.Label(self.pwtk, text=Text2) self.label3 = Tkinter.Label(self.pwtk, text=Username) self.pwfieldreq = 1 self.gui = True self.un_entry = Tkinter.Entry(self.pwtk) # cursor here self.un_entry.focus() self.label4 = Tkinter.Label(self.pwtk, text=Password) self.pw_entry = Tkinter.Entry(self.pwtk, show="*") self.button = Tkinter.Button(self.pwtk, text=OK, command=self.fo_save_values) # widget layout, stack vertical self.label1.pack() self.label2.pack() self.label3.pack() self.un_entry.pack() # create a password label and field if self.pwfieldreq == 1: self.label4.pack() self.pw_entry.pack() self.button.pack() self.pwtk.update() # start the event loop self.pwtk.mainloop() # genkey functions def genkey_v2(self, objid, genno): objid = struct.pack(' -1: mac = line.split()[4] break return mac.replace(':','') except: raise ADEPTError('Cannot find MAC address. Get forum help.') def get_win_macaddress(self): try: gasize = c_ulong(5000) p = create_string_buffer(5000) GetAdaptersInfo = windll.iphlpapi.GetAdaptersInfo GetAdaptersInfo(byref(p),byref(gasize)) return p[0x194:0x19a].encode('hex') except: raise ADEPTError('Cannot find MAC address. Get forum help.') # custom conversion 5 bytes to 8 chars method def fo_convert5to8(self, edisk): # byte to number/char mapping table darray=[0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x41,0x42,0x43,0x44,0x45,\ 0x46,0x47,0x48,0x4A,0x4B,0x4C,0x4D,0x4E,0x50,0x51,0x52,0x53,0x54,\ 0x55,0x56,0x57,0x58,0x59,0x5A] pdid = struct.pack('> 5 outputhw = outputhw + chr(darray[index]) pdid = (ord(edisk[4]) << 2)|pdid # get the last 2 bits from the hwid + low part of the cpuid for i in range(0,2): index = pdid & 0x1f # shift the disk id 5 bits to the right pdid = pdid >> 5 outputhw = outputhw + chr(darray[index]) return outputhw # Linux processing def fo_linux_sethwids(self): # linux specific attributes self.fileopen['OSType']='Linux' self.fileopen['AcroProduct']='AcroReader' self.fileopen['AcroReader']='Yes' self.fileopen['AcroVersion']='9.101' self.fileopen['FSName']='ext3' self.fileopen['Build']='878' self.fileopen['ProdVer']='1.8.5.1' self.fileopen['OSBuild']='2.6.33' # write hardware keys hwkey = 0 pmac = self.get_macaddress().decode("hex"); self.fileopen['Disk'] = self.fo_convert5to8(pmac[1:]) # get primary used default mac address self.fileopen['Machine'] = self.fo_convert5to8(pmac[1:]) # get uuid # check for reversed offline handler 6AB83F4Ah + AFh 6AB83F4Ah if 'LILA' in self.fileopen: pass if 'Ident4ID' in self.fileopen: self.fileopen['User'] = getpass.getuser() self.fileopen['SaUser'] = getpass.getuser() try: cuser = winreg.HKEY_CURRENT_USER FOW3_UUID = 'Software\\Fileopen' regkey = winreg.OpenKey(cuser, FOW3_UUID) userkey = winreg.QueryValueEx(regkey, 'Fowp3Uuid')[0] # if self.genkey_cryptmach(userkey)[0:4] != 'ec20': self.fileopen['Uuid'] = self.genkey_cryptmach(userkey)[4:] ## elif self.genkey_cryptmach(userkey)[0:4] != 'ec20': ## self.fileopen['Uuid'] = self.genkey_cryptmach(userkey,1)[4:] ## else: except: raise ADEPTError('Cannot find FowP3Uuid file - reason might be Adobe (Reader) X.'\ 'Read the FAQs for more information how to solve the problem.') else: self.fileopen['Uuid'] = str(uuid.uuid1()) # get time stamp self.fileopen['Stamp'] = str(time.time())[:-3] # get fileopen input pdf name + path self.fileopen['DocPathUrl'] = 'file%3a%2f%2f%2f'\ + urllib.quote(os.path.normpath(INPUTFILEPATH)) # clear the link #INPUTFILEPATH = '' ## # get volume name (urllib quote necessairy?) urllib.quote( ## self.fileopen['VolName'] = win32api.GetVolumeInformation("C:\\")[0] ## # get volume serial number ## self.fileopen['VolSN'] = str(win32api.GetVolumeInformation("C:\\")[1]) return # Windows processing def fo_win_sethwids(self): # Windows specific attributes self.fileopen['OSType']='Windows' self.fileopen['OSName']='Vista' self.fileopen['OSData']='Service%20Pack%204' self.fileopen['AcroProduct']='Reader' self.fileopen['AcroReader']='Yes' self.fileopen['OSBuild']='7600' self.fileopen['AcroVersion']='9.1024' self.fileopen['Build']='879' # write hardware keys hwkey = 0 # get the os type and save it in ostype try: import win32api import win32security import win32file import winreg except: raise ADEPTError('PyWin Extension (Win32API module) needed.\n'+\ 'Download from http://sourceforge.net/projects/pywin32/files/ ') try: v0 = win32api.GetVolumeInformation('C:\\') v1 = win32api.GetSystemInfo()[6] # fix for possible negative integer (Python problem) volserial = v0[1] & 0xffffffff lowcpu = v1 & 255 highcpu = (v1 >> 8) & 255 # changed to int volserial = struct.pack(' 0 and mode == True: m.update(key_string[:(13-len(uname))]) md5sum = m.digest()[0:16] # print md5sum.encode('hex') # normal ident4id calculation retval = [] for sdata in data: retval.append(ARC4.new(md5sum).decrypt(sdata)) for rval in retval: if rval[:4] == 'ec20': return rval[4:] return False # start normal execution # list for username variants unamevars = [] # fill username variants list unamevars.append(self.user) unamevars.append(self.user + chr(0)) unamevars.append(self.user.lower()) unamevars.append(self.user.lower() + chr(0)) unamevars.append(self.user.upper()) unamevars.append(self.user.upper() + chr(0)) # go through it for uname in unamevars: result = genkeysub(uname, True) if result != False: return result result = genkeysub(uname) if result != False: return result # didn't find it, return false return False ## raise ADEPTError('Unsupported Ident4D Decryption,\n'+\ ## 'report the bug to the ineptpdf script forum') KEYWORD_OBJ = PSKeywordTable.intern('obj') def getobj(self, objid): if not self.ready: raise PDFException('PDFDocument not initialized') #assert self.xrefs if objid in self.objs: genno = 0 obj = self.objs[objid] else: for xref in self.xrefs: try: (stmid, index) = xref.getpos(objid) break except KeyError: pass else: #if STRICT: # raise PDFSyntaxError('Cannot locate objid=%r' % objid) return None if stmid: if gen_xref_stm: return PDFObjStmRef(objid, stmid, index) # Stuff from pdfminer: extract objects from object stream stream = stream_value(self.getobj(stmid)) if stream.dic.get('Type') is not LITERAL_OBJSTM: if STRICT: raise PDFSyntaxError('Not a stream object: %r' % stream) try: n = stream.dic['N'] except KeyError: if STRICT: raise PDFSyntaxError('N is not defined: %r' % stream) n = 0 if stmid in self.parsed_objs: objs = self.parsed_objs[stmid] else: parser = PDFObjStrmParser(stream.get_data(), self) objs = [] try: while 1: (_,obj) = parser.nextobject() objs.append(obj) except PSEOF: pass self.parsed_objs[stmid] = objs genno = 0 i = n*2+index try: obj = objs[i] except IndexError: raise PDFSyntaxError('Invalid object number: objid=%r' % (objid)) if isinstance(obj, PDFStream): obj.set_objid(objid, 0) ### else: self.parser.seek(index) (_,objid1) = self.parser.nexttoken() # objid (_,genno) = self.parser.nexttoken() # genno #assert objid1 == objid, (objid, objid1) (_,kwd) = self.parser.nexttoken() # #### hack around malformed pdf files # assert objid1 == objid, (objid, objid1) ## if objid1 != objid: ## x = [] ## while kwd is not self.KEYWORD_OBJ: ## (_,kwd) = self.parser.nexttoken() ## x.append(kwd) ## if x: ## objid1 = x[-2] ## genno = x[-1] ## if kwd is not self.KEYWORD_OBJ: raise PDFSyntaxError( 'Invalid object spec: offset=%r' % index) (_,obj) = self.parser.nextobject() if isinstance(obj, PDFStream): obj.set_objid(objid, genno) if self.decipher: obj = decipher_all(self.decipher, objid, genno, obj) self.objs[objid] = obj return obj # helper class for cookie retrival class WinBrowserCookie(): def __init__(self): pass def getcookie(self, cname, chost): # check firefox db fprofile = os.environ['AppData']+r'\Mozilla\Firefox' pinifile = 'profiles.ini' fini = os.path.normpath(fprofile + '\\' + pinifile) try: with open(fini,'r') as ffini: firefoxini = ffini.read() # Firefox not installed or on an USB stick except: return None for pair in firefoxini.split('\n'): try: key, value = pair.split('=',1) if key == 'Path': fprofile = os.path.normpath(fprofile+'//'+value+'//'+'cookies.sqlite') break # asdf except: continue if os.path.isfile(fprofile): try: con = sqlite3.connect(fprofile,1) except: raise ADEPTError('Firefox Cookie data base locked. Close Firefox and try again') cur = con.cursor() try: cur.execute("select value from moz_cookies where name=? and host=?", (cname, chost)) except Exception: raise ADEPTError('Firefox Cookie database is locked. Close Firefox and try again') try: return cur.fetchone()[0] except Exception: # sometimes is a dot in front of the host chost = '.'+chost cur.execute("select value from moz_cookies where name=? and host=?", (cname, chost)) try: return cur.fetchone()[0] except: return None class PDFObjStmRef(object): maxindex = 0 def __init__(self, objid, stmid, index): self.objid = objid self.stmid = stmid self.index = index if index > PDFObjStmRef.maxindex: PDFObjStmRef.maxindex = index ## PDFParser ## class PDFParser(PSStackParser): def __init__(self, doc, fp): PSStackParser.__init__(self, fp) self.doc = doc self.doc.set_parser(self) return def __repr__(self): return '' KEYWORD_R = PSKeywordTable.intern('R') KEYWORD_ENDOBJ = PSKeywordTable.intern('endobj') KEYWORD_STREAM = PSKeywordTable.intern('stream') KEYWORD_XREF = PSKeywordTable.intern('xref') KEYWORD_STARTXREF = PSKeywordTable.intern('startxref') def do_keyword(self, pos, token): if token in (self.KEYWORD_XREF, self.KEYWORD_STARTXREF): self.add_results(*self.pop(1)) return if token is self.KEYWORD_ENDOBJ: self.add_results(*self.pop(4)) return if token is self.KEYWORD_R: # reference to indirect object try: ((_,objid), (_,genno)) = self.pop(2) (objid, genno) = (int(objid), int(genno)) obj = PDFObjRef(self.doc, objid, genno) self.push((pos, obj)) except PSSyntaxError: pass return if token is self.KEYWORD_STREAM: # stream object ((_,dic),) = self.pop(1) dic = dict_value(dic) try: objlen = int_value(dic['Length']) except KeyError: if STRICT: raise PDFSyntaxError('/Length is undefined: %r' % dic) objlen = 0 self.seek(pos) try: (_, line) = self.nextline() # 'stream' except PSEOF: if STRICT: raise PDFSyntaxError('Unexpected EOF') return pos += len(line) self.fp.seek(pos) data = self.fp.read(objlen) self.seek(pos+objlen) while 1: try: (linepos, line) = self.nextline() except PSEOF: if STRICT: raise PDFSyntaxError('Unexpected EOF') break if 'endstream' in line: i = line.index('endstream') objlen += i data += line[:i] break objlen += len(line) data += line self.seek(pos+objlen) obj = PDFStream(dic, data, self.doc.decipher) self.push((pos, obj)) return # others self.push((pos, token)) return def find_xref(self): # search the last xref table by scanning the file backwards. prev = None for line in self.revreadlines(): line = line.strip() if line == 'startxref': break if line: prev = line else: raise PDFNoValidXRef('Unexpected EOF') return int(prev) # read xref table def read_xref_from(self, start, xrefs): self.seek(start) self.reset() try: (pos, token) = self.nexttoken() except PSEOF: raise PDFNoValidXRef('Unexpected EOF') if isinstance(token, int): # XRefStream: PDF-1.5 if GEN_XREF_STM == 1: global gen_xref_stm gen_xref_stm = True self.seek(pos) self.reset() xref = PDFXRefStream() xref.load(self) else: if token is not self.KEYWORD_XREF: raise PDFNoValidXRef('xref not found: pos=%d, token=%r' % (pos, token)) self.nextline() xref = PDFXRef() xref.load(self) xrefs.append(xref) trailer = xref.trailer if 'XRefStm' in trailer: pos = int_value(trailer['XRefStm']) self.read_xref_from(pos, xrefs) if 'Prev' in trailer: # find previous xref pos = int_value(trailer['Prev']) self.read_xref_from(pos, xrefs) return # read xref tables and trailers def read_xref(self): xrefs = [] trailerpos = None try: pos = self.find_xref() self.read_xref_from(pos, xrefs) except PDFNoValidXRef: # fallback self.seek(0) pat = re.compile(r'^(\d+)\s+(\d+)\s+obj\b') offsets = {} xref = PDFXRef() while 1: try: (pos, line) = self.nextline() except PSEOF: break if line.startswith('trailer'): trailerpos = pos # remember last trailer m = pat.match(line) if not m: continue (objid, genno) = m.groups() offsets[int(objid)] = (0, pos) if not offsets: raise xref.offsets = offsets if trailerpos: self.seek(trailerpos) xref.load_trailer(self) xrefs.append(xref) return xrefs ## PDFObjStrmParser ## class PDFObjStrmParser(PDFParser): def __init__(self, data, doc): PSStackParser.__init__(self, StringIO(data)) self.doc = doc return def flush(self): self.add_results(*self.popall()) return KEYWORD_R = KWD('R') def do_keyword(self, pos, token): if token is self.KEYWORD_R: # reference to indirect object try: ((_,objid), (_,genno)) = self.pop(2) (objid, genno) = (int(objid), int(genno)) obj = PDFObjRef(self.doc, objid, genno) self.push((pos, obj)) except PSSyntaxError: pass return # others self.push((pos, token)) return ### ### My own code, for which there is none else to blame class PDFSerializer(object): def __init__(self, inf, keypath): global GEN_XREF_STM, gen_xref_stm gen_xref_stm = GEN_XREF_STM > 1 self.version = inf.read(8) inf.seek(0) self.doc = doc = PDFDocument() parser = PDFParser(doc, inf) doc.initialize(keypath) self.objids = objids = set() for xref in reversed(doc.xrefs): trailer = xref.trailer for objid in xref.objids(): objids.add(objid) trailer = dict(trailer) trailer.pop('Prev', None) trailer.pop('XRefStm', None) if 'Encrypt' in trailer: objids.remove(trailer.pop('Encrypt').objid) self.trailer = trailer def dump(self, outf): self.outf = outf self.write(self.version) self.write('\n%\xe2\xe3\xcf\xd3\n') doc = self.doc objids = self.objids xrefs = {} maxobj = max(objids) trailer = dict(self.trailer) trailer['Size'] = maxobj + 1 for objid in objids: obj = doc.getobj(objid) if isinstance(obj, PDFObjStmRef): xrefs[objid] = obj continue if obj is not None: try: genno = obj.genno except AttributeError: genno = 0 xrefs[objid] = (self.tell(), genno) self.serialize_indirect(objid, obj) startxref = self.tell() if not gen_xref_stm: self.write('xref\n') self.write('0 %d\n' % (maxobj + 1,)) for objid in xrange(0, maxobj + 1): if objid in xrefs: # force the genno to be 0 self.write("%010d 00000 n \n" % xrefs[objid][0]) else: self.write("%010d %05d f \n" % (0, 65535)) self.write('trailer\n') self.serialize_object(trailer) self.write('\nstartxref\n%d\n%%%%EOF' % startxref) else: # Generate crossref stream. # Calculate size of entries maxoffset = max(startxref, maxobj) maxindex = PDFObjStmRef.maxindex fl2 = 2 power = 65536 while maxoffset >= power: fl2 += 1 power *= 256 fl3 = 1 power = 256 while maxindex >= power: fl3 += 1 power *= 256 index = [] first = None prev = None data = [] # Put the xrefstream's reference in itself startxref = self.tell() maxobj += 1 xrefs[maxobj] = (startxref, 0) for objid in sorted(xrefs): if first is None: first = objid elif objid != prev + 1: index.extend((first, prev - first + 1)) first = objid prev = objid objref = xrefs[objid] if isinstance(objref, PDFObjStmRef): f1 = 2 f2 = objref.stmid f3 = objref.index else: f1 = 1 f2 = objref[0] # we force all generation numbers to be 0 # f3 = objref[1] f3 = 0 data.append(struct.pack('>B', f1)) data.append(struct.pack('>L', f2)[-fl2:]) data.append(struct.pack('>L', f3)[-fl3:]) index.extend((first, prev - first + 1)) data = zlib.compress(''.join(data)) dic = {'Type': LITERAL_XREF, 'Size': prev + 1, 'Index': index, 'W': [1, fl2, fl3], 'Length': len(data), 'Filter': LITERALS_FLATE_DECODE[0], 'Root': trailer['Root'],} if 'Info' in trailer: dic['Info'] = trailer['Info'] xrefstm = PDFStream(dic, data) self.serialize_indirect(maxobj, xrefstm) self.write('startxref\n%d\n%%%%EOF' % startxref) def write(self, data): self.outf.write(data) self.last = data[-1:] def tell(self): return self.outf.tell() def escape_string(self, string): string = string.replace('\\', '\\\\') string = string.replace('\n', r'\n') string = string.replace('(', r'\(') string = string.replace(')', r'\)') # get rid of ciando id regularexp = re.compile(r'http://www.ciando.com/index.cfm/intRefererID/\d{5}') if regularexp.match(string): return ('http://www.ciando.com') return string def serialize_object(self, obj): if isinstance(obj, dict): # Correct malformed Mac OS resource forks for Stanza if 'ResFork' in obj and 'Type' in obj and 'Subtype' not in obj \ and isinstance(obj['Type'], int): obj['Subtype'] = obj['Type'] del obj['Type'] # end - hope this doesn't have bad effects self.write('<<') for key, val in obj.items(): self.write('/%s' % key) self.serialize_object(val) self.write('>>') elif isinstance(obj, list): self.write('[') for val in obj: self.serialize_object(val) self.write(']') elif isinstance(obj, str): self.write('(%s)' % self.escape_string(obj)) elif isinstance(obj, bool): if self.last.isalnum(): self.write(' ') self.write(str(obj).lower()) elif isinstance(obj, (int, long, float)): if self.last.isalnum(): self.write(' ') self.write(str(obj)) elif isinstance(obj, PDFObjRef): if self.last.isalnum(): self.write(' ') self.write('%d %d R' % (obj.objid, 0)) elif isinstance(obj, PDFStream): ### If we don't generate cross ref streams the object streams ### are no longer useful, as we have extracted all objects from ### them. Therefore leave them out from the output. if obj.dic.get('Type') == LITERAL_OBJSTM and not gen_xref_stm: self.write('(deleted)') else: data = obj.get_decdata() self.serialize_object(obj.dic) self.write('stream\n') self.write(data) self.write('\nendstream') else: data = str(obj) if data[0].isalnum() and self.last.isalnum(): self.write(' ') self.write(data) def serialize_indirect(self, objid, obj): self.write('%d 0 obj' % (objid,)) self.serialize_object(obj) if self.last.isalnum(): self.write('\n') self.write('endobj\n') def cli_main(argv=sys.argv): progname = os.path.basename(argv[0]) if RSA is None: print "%s: This script requires PyCrypto, which must be installed " \ "separately. Read the top-of-script comment for details." % \ (progname,) return 1 if len(argv) != 4: print "usage: %s KEYFILE INBOOK OUTBOOK" % (progname,) return 1 keypath, inpath, outpath = argv[1:] with open(inpath, 'rb') as inf: serializer = PDFSerializer(inf, keypath) # hope this will fix the 'bad file descriptor' problem with open(outpath, 'wb') as outf: # help construct to make sure the method runs to the end serializer.dump(outf) return 0 class DecryptionDialog(Tkinter.Frame): def __init__(self, root): # debug mode debugging global DEBUG_MODE Tkinter.Frame.__init__(self, root, border=5) ltext='Select file for decryption\n(Ignore Password / Key file option for Fileopen/APS PDFs)' self.status = Tkinter.Label(self, text=ltext) self.status.pack(fill=Tkconstants.X, expand=1) body = Tkinter.Frame(self) body.pack(fill=Tkconstants.X, expand=1) sticky = Tkconstants.E + Tkconstants.W body.grid_columnconfigure(1, weight=2) Tkinter.Label(body, text='Password\nor Key file').grid(row=0) self.keypath = Tkinter.Entry(body, width=30) self.keypath.grid(row=0, column=1, sticky=sticky) if os.path.exists('adeptkey.der'): self.keypath.insert(0, 'adeptkey.der') button = Tkinter.Button(body, text="...", command=self.get_keypath) button.grid(row=0, column=2) Tkinter.Label(body, text='Input file').grid(row=1) self.inpath = Tkinter.Entry(body, width=30) self.inpath.grid(row=1, column=1, sticky=sticky) button = Tkinter.Button(body, text="...", command=self.get_inpath) button.grid(row=1, column=2) Tkinter.Label(body, text='Output file').grid(row=2) self.outpath = Tkinter.Entry(body, width=30) self.outpath.grid(row=2, column=1, sticky=sticky) debugmode = Tkinter.Checkbutton(self, text = "Debug Mode (writable directory required)", command=self.debug_toggle, height=2, \ width = 40) debugmode.pack() button = Tkinter.Button(body, text="...", command=self.get_outpath) button.grid(row=2, column=2) buttons = Tkinter.Frame(self) buttons.pack() botton = Tkinter.Button( buttons, text="Decrypt", width=10, command=self.decrypt) botton.pack(side=Tkconstants.LEFT) Tkinter.Frame(buttons, width=10).pack(side=Tkconstants.LEFT) button = Tkinter.Button( buttons, text="Quit", width=10, command=self.quit) button.pack(side=Tkconstants.RIGHT) def get_keypath(self): keypath = tkFileDialog.askopenfilename( parent=None, title='Select ADEPT key file', defaultextension='.der', filetypes=[('DER-encoded files', '.der'), ('All Files', '.*')]) if keypath: keypath = os.path.normpath(os.path.realpath(keypath)) self.keypath.delete(0, Tkconstants.END) self.keypath.insert(0, keypath) return def get_inpath(self): inpath = tkFileDialog.askopenfilename( parent=None, title='Select ADEPT or FileOpen-encrypted PDF file to decrypt', defaultextension='.pdf', filetypes=[('PDF files', '.pdf'), ('All files', '.*')]) if inpath: inpath = os.path.normpath(os.path.realpath(inpath)) self.inpath.delete(0, Tkconstants.END) self.inpath.insert(0, inpath) return def debug_toggle(self): global DEBUG_MODE if DEBUG_MODE == False: DEBUG_MODE = True else: DEBUG_MODE = False def get_outpath(self): outpath = tkFileDialog.asksaveasfilename( parent=None, title='Select unencrypted PDF file to produce', defaultextension='.pdf', filetypes=[('PDF files', '.pdf'), ('All files', '.*')]) if outpath: outpath = os.path.normpath(os.path.realpath(outpath)) self.outpath.delete(0, Tkconstants.END) self.outpath.insert(0, outpath) return def decrypt(self): global INPUTFILEPATH global KEYFILEPATH global PASSWORD keypath = self.keypath.get() inpath = self.inpath.get() outpath = self.outpath.get() if not keypath or not os.path.exists(keypath): # keyfile doesn't exist KEYFILEPATH = False PASSWORD = keypath if not inpath or not os.path.exists(inpath): self.status['text'] = 'Specified input file does not exist' return if not outpath: self.status['text'] = 'Output file not specified' return if inpath == outpath: self.status['text'] = 'Must have different input and output files' return # patch for non-ascii characters INPUTFILEPATH = inpath.encode('utf-8') argv = [sys.argv[0], keypath, inpath, outpath] self.status['text'] = 'Processing ...' try: cli_main(argv) except Exception, a: self.status['text'] = 'Error: ' + str(a) return self.status['text'] = 'File successfully decrypted.\n'+\ 'Close this window or decrypt another pdf file.' return def gui_main(): root = Tkinter.Tk() if RSA is None: root.withdraw() tkMessageBox.showerror( "INEPT PDF and FileOpen Decrypter", "This script requires PyCrypto, which must be installed " "separately. Read the top-of-script comment for details.") return 1 root.title('INEPT PDF Decrypter 8.4.51 (FileOpen/APS-Support)') root.resizable(True, False) root.minsize(370, 0) DecryptionDialog(root).pack(fill=Tkconstants.X, expand=1) root.mainloop() return 0 if __name__ == '__main__': if len(sys.argv) > 1: sys.exit(cli_main()) sys.exit(gui_main())