#!/usr/bin/env python3 # # This software is Copyright (c) 2012-2013 Dhiru Kholia # and Copyright (c) 2013-2026 magnum, # and is hereby released to the general public under the following terms: # Redistribution and use in source and binary forms, with or without # modification, are permitted. import sys import os try: from olefile import isOleFile, OleFileIO except ImportError: print(f"{os.path.basename(sys.argv[0])}: olefile python module is missing, please install your distro's", file=sys.stderr) print("package, eg. 'sudo apt-get install python3-olefile' if available, otherwise", file=sys.stderr) print("'pip install --user olefile' (activate your venv if you already did that)", file=sys.stderr) sys.exit(1) PY3 = sys.version_info[0] == 3 if not PY3: reload(sys) sys.setdefaultencoding("utf8") if PY3: from io import BytesIO as StringIO else: from StringIO import StringIO from struct import unpack import binascii def find_rc4_passinfo_xls(filename, stream): """ Initial version of this function was based on a blog entry posted by Worawit (sleepya) at http://auntitled.blogspot.in site. Since then this function has been heavily modified and extended. http://msdn.microsoft.com/en-us/library/dd908560%28v=office.12%29 http://msdn.microsoft.com/en-us/library/dd920360%28v=office.12%29 """ while True: pos = stream.tell() if pos >= stream.size: break # eof type = unpack("= 2 and minor_version == 2: # RC4 CryptoAPI Encryption Header unpack("= 2 and minor_version == 2: # RC4 CryptoAPI Encryption Header unpack("= 2 and minor_version == 2: pass else: continue # RC4 CryptoAPI Encryption Header, Section 2.3.5.1 - RC4 CryptoAPI # Encryption Header in [MS-OFFCRYPTO].pdf unpack(" -1: sys.stderr.write("%s uses un-supported cipher algorithm %s, please file a bug! \n" \ % (filename, cipherAlgorithm)) return -4 saltValue = node.attrib.get("saltValue") assert(saltValue) encryptedVerifierHashInput = node.attrib.get("encryptedVerifierHashInput") encryptedVerifierHashValue = node.attrib.get("encryptedVerifierHashValue") if PY3: encryptedVerifierHashValue = binascii.hexlify(base64.decodebytes(encryptedVerifierHashValue.encode())) else: encryptedVerifierHashValue = binascii.hexlify(base64.decodestring(encryptedVerifierHashValue.encode())) if PY3: saltAscii = binascii.hexlify(base64.decodebytes(saltValue.encode())).decode("ascii") encryptedVerifierHashAscii = binascii.hexlify(base64.decodebytes(encryptedVerifierHashInput.encode())).decode("ascii") else: saltAscii = binascii.hexlify(base64.decodestring(saltValue.encode())).decode("ascii") encryptedVerifierHashAscii = binascii.hexlify(base64.decodestring(encryptedVerifierHashInput.encode())).decode("ascii") sys.stdout.write("%s:$office$*%d*%d*%d*%d*%s*%s*%s\n" % \ (os.path.basename(filename), version, int(spinCount), int(keyBits), int(saltSize), saltAscii, encryptedVerifierHashAscii, encryptedVerifierHashValue[0:64].decode("ascii"))) return 0 have_summary = False summary = [] import re from binascii import unhexlify def remove_html_tags(data): p = re.compile(r'<.*?>', re.DOTALL) return p.sub('', str(data)) def remove_extra_spaces(data): p = re.compile(r'\s+') return p.sub(' ', data) def process_file(filename): # Test if a file is an OLE container try: f = open(filename, "rb") data = f.read(81920) # is this enough? if data[0:2] == b"PK": sys.stderr.write("%s : zip container found, file is " \ "unencrypted?, invalid OLE file!\n" % filename) f.close() return 1 f.close() # ACCDB handling hack for MS Access >= 2007 (Office 12) accdb_magic = b"Standard ACE DB" accdb_xml_start = b'' if accdb_magic in data and accdb_xml_start in data: # find start and the end of the XML metadata stream start = data.find(accdb_xml_start) trailer = data.find(accdb_xml_trailer) xml_metadata_parser(data[start:trailer+len(accdb_xml_trailer)], filename) return elif accdb_magic in data: # Access 2007 files using CryptoAPI process_access_2007_older_crypto(filename) return # OneNote handling hack for OneNote versions >= 2013, see [MS-ONESTORE].pdf onenote_magic = unhexlify("e4525c7b8cd8") onenote_xml_start = b'' if data.startswith(onenote_magic) and onenote_xml_start in data: # find start and the end of the XML metadata stream start = data.find(onenote_xml_start) trailer = data.find(onenote_xml_trailer) xml_metadata_parser(data[start:trailer+len(onenote_xml_trailer)], filename) return if not isOleFile(filename): sys.stderr.write("%s : Invalid OLE file\n" % filename) return 1 except Exception: e = sys.exc_info()[1] import traceback traceback.print_exc() sys.stderr.write("%s : OLE check failed, %s\n" % (filename, str(e))) return 2 # Open OLE file: ole = OleFileIO(filename) stream = None # find "summary" streams global have_summary, summary have_summary = False summary = [] for streamname in ole.listdir(): streamname = streamname[-1] if streamname[0] == "\005": have_summary = True props = ole.getproperties(streamname) for k, v in props.items(): if v is None: continue if not PY3: if not isinstance(v, unicode): # We are only interested in strings continue else: if not isinstance(v, str): # We are only interested in strings continue v = remove_html_tags(v) v = v.replace(":", "") v = remove_extra_spaces(v) #words = v.split() #words = filter(lambda x: len(x) < 20, words) #v = " ".join(words) summary.append(v) summary = " ".join(summary) summary = remove_extra_spaces(summary) if ["EncryptionInfo"] in ole.listdir(): # process Office 2003 / 2010 / 2013 files return process_new_office(filename) if ["Workbook"] in ole.listdir(): stream = "Workbook" elif ["Book"] in ole.listdir(): stream = "Book" elif ["WordDocument"] in ole.listdir(): typ = 1 sdoc = ole.openstream("WordDocument") stream = find_table(filename, sdoc) if stream == "none": return 5 elif ["PowerPoint Document"] in ole.listdir(): stream = "Current User" else: sys.stderr.write("%s : No supported streams found\n" % filename) return 2 try: workbookStream = ole.openstream(stream) except: import traceback traceback.print_exc() sys.stderr.write("%s : stream %s not found!\n" % (filename, stream)) return 2 if workbookStream is None: sys.stderr.write("%s : Error opening stream, %s\n" % filename) (filename, stream) return 3 if stream == "Workbook" or stream == "Book": typ = 0 passinfo = find_rc4_passinfo_xls(filename, workbookStream) if passinfo is None: return 4 elif stream == "0Table" or stream == "1Table": passinfo = find_rc4_passinfo_doc(filename, workbookStream) if passinfo is None: return 4 else: sppt = ole.openstream("Current User") offset = find_ppt_type(filename, sppt) sppt = ole.openstream("PowerPoint Document") ret = find_rc4_passinfo_ppt(filename, sppt, offset) if not ret: find_rc4_passinfo_ppt_bf(filename, sppt, offset) return 6 (salt, verifier, verifierHash) = passinfo summary_extra = "" if have_summary: summary_extra = ":::%s::%s" % (summary, filename) sys.stdout.write("%s:$oldoffice$%s*%s*%s*%s%s\n" % (os.path.basename(filename), typ, binascii.hexlify(salt).decode("ascii"), binascii.hexlify(verifier).decode("ascii"), binascii.hexlify(verifierHash).decode("ascii"), summary_extra)) workbookStream.close() ole.close() return 0 if __name__ == "__main__": if len(sys.argv) < 2: sys.stderr.write("Usage: %s \n" % sys.argv[0]) sys.exit(1) # set_debug_mode(1) for i in range(1, len(sys.argv)): if not PY3: ret = process_file(sys.argv[i].decode("utf8")) else: ret = process_file(sys.argv[i])