#!/usr/bin/env python __description__ = 'Strings command in Python' __author__ = 'Didier Stevens' __version__ = '0.0.1' __date__ = '2017/01/28' """ Source code put in public domain by Didier Stevens, no Copyright https://DidierStevens.com Use at your own risk History: 2015/07/02: start 2015/07/07: continue 2015/07/28: continue 2015/10/24: added option -L 2016/03/24: fix -S bug 2016/08/03: added support for unicode; option r 2017/01/22: added option -p 2017/01/28: added option -g Todo: """ import optparse import sys import os import zipfile import cStringIO import textwrap import re import pickle import gzip MALWARE_PASSWORD = 'infected' REGEX_STANDARD = '[\x09\x20-\x7E]' REGEX_WHITESPACE = '[\x09-\x0D\x20-\x7E]' FILENAME_GOODWAREDB = 'good-strings.db' def PrintManual(): manual = ''' Manual: ''' for line in manual.split('\n'): print(textwrap.fill(line)) #Convert 2 Bytes If Python 3 def C2BIP3(string): if sys.version_info[0] > 2: return bytes([ord(x) for x in string]) else: return string # CIC: Call If Callable def CIC(expression): if callable(expression): return expression() else: return expression # IFF: IF Function def IFF(expression, valueTrue, valueFalse): if expression: return CIC(valueTrue) else: return CIC(valueFalse) def ExtractStringsASCII(data, options): if options.regex != '': regex = options.regex + '{%d,}' elif options.whitespace: regex = REGEX_WHITESPACE + '{%d,}' else: regex = REGEX_STANDARD + '{%d,}' return re.findall(regex % options.bytes, data) def ExtractStringsUNICODE(data, options): if options.regex != '': regex = '((' + options.regex + '\x00){%d,})' elif options.whitespace: regex = '((' + REGEX_WHITESPACE + '\x00){%d,})' else: regex = '((' + REGEX_STANDARD + '\x00){%d,})' return [foundunicodestring.replace('\x00', '') for foundunicodestring, dummy in re.findall(regex % options.bytes, data)] def ExtractStrings(data, options): if options.type == 'all': return ExtractStringsASCII(data, options) + ExtractStringsUNICODE(data, options) elif options.type == 'ascii': return ExtractStringsASCII(data, options) elif options.type == 'unicode': return ExtractStringsUNICODE(data, options) else: print('Unknown type option: %s' % options.type) return [] def File2String(filename): try: f = open(filename, 'rb') except: return None try: return f.read() except: return None finally: f.close() def IfWIN32SetBinary(io): if sys.platform == 'win32': import msvcrt msvcrt.setmode(io.fileno(), os.O_BINARY) #Fix for http://bugs.python.org/issue11395 def StdoutWriteChunked(data): while data != '': sys.stdout.write(data[0:10000]) try: sys.stdout.flush() except IOError: return data = data[10000:] def File2Strings(filename): try: f = open(filename, 'r') except: return None try: return map(lambda line:line.rstrip('\n'), f.readlines()) except: return None finally: f.close() def ProcessAt(argument): if argument.startswith('@'): strings = File2Strings(argument[1:]) if strings == None: raise Exception('Error reading %s' % argument) else: return strings else: return [argument] def ExpandFilenameArguments(filenames): return list(collections.OrderedDict.fromkeys(sum(map(glob.glob, sum(map(ProcessAt, filenames), [])), []))) def ConsecutiveLettersLength(data): return max([0] + [len(letters) for letters in re.findall(r'[a-z]+', data, re.I)]) def StringsSub(extractedString, dUnique, oExtraSensical, options): if options.search == '' or options.search in extractedString: doPrint = True if options.sensical: doPrint = doPrint and oExtraSensical.Test(extractedString) if options.letters: doPrint = doPrint and ConsecutiveLettersLength(extractedString) >= options.letters if options.unique: doPrint = doPrint and not extractedString in dUnique dUnique[extractedString] = True if doPrint and not options.invert or not doPrint and options.invert: if options.whitespace: StdoutWriteChunked(extractedString) else: print(extractedString) def Filter(extractedStrings, imported): return [extractedString for extractedString in extractedStrings if not extractedString in imported] def LoadGoodwareStrings(): filename = os.path.join(os.path.dirname(sys.argv[0]), FILENAME_GOODWAREDB) try: fDB = gzip.GzipFile(filename, 'rb') except: print('Error opening goodware strings DB file: %s' % filename) return None collection = pickle.loads(fDB.read()) fDB.close() return collection def Strings(filename, options): if filename == '': IfWIN32SetBinary(sys.stdin) oStringIO = cStringIO.StringIO(sys.stdin.read()) elif filename.lower().endswith('.zip'): oZipfile = zipfile.ZipFile(filename, 'r') oZipContent = oZipfile.open(oZipfile.infolist()[0], 'r', C2BIP3(MALWARE_PASSWORD)) oStringIO = cStringIO.StringIO(oZipContent.read()) oZipContent.close() oZipfile.close() else: oStringIO = cStringIO.StringIO(open(filename, 'rb').read()) imported = [] if options.pefile: try: import pefile oPE = pefile.PE(data=oStringIO.read()) for entry in oPE.DIRECTORY_ENTRY_IMPORT: imported.append(entry.dll) for imp in entry.imports: imported.append(imp.name) except: pass oStringIO.seek(0) oExtraSensical = None if options.sensical: import reextra oExtraSensical = reextra.cExtraSensical(True) if options.whitespace: IfWIN32SetBinary(sys.stdout) dUnique = {} selectedStrings = Filter(ExtractStrings(oStringIO.read(), options), imported) if options.goodwarestrings: goodware = LoadGoodwareStrings() if goodware == None: return selectedStrings = Filter(selectedStrings, goodware) if options.length: selectedStrings = sorted(selectedStrings, key=len) for extractedString in selectedStrings: StringsSub(extractedString, dUnique, oExtraSensical, options) def Main(): oParser = optparse.OptionParser(usage='usage: %prog [options] [file]\n' + __description__, version='%prog ' + __version__) oParser.add_option('-m', '--man', action='store_true', default=False, help='Print manual') oParser.add_option('-n', '--bytes', type=int, default=4, help='Minimum string length (default 4)') oParser.add_option('-w', '--whitespace', action='store_true', default=False, help='Include whitespace characters') oParser.add_option('-s', '--search', default='', help='String to search for') oParser.add_option('-l', '--letters', type=int, default=0, help='Minimum amount of consecutive letters (default 0)') oParser.add_option('-S', '--sensical', action='store_true', default=False, help='Output only sensical strings (e.g. no gibberish)') oParser.add_option('-v', '--invert', action='store_true', default=False, help='Invert selection') oParser.add_option('-u', '--unique', action='store_true', default=False, help='Remove repeated strings') oParser.add_option('-L', '--length', action='store_true', default=False, help='Sort by string length') oParser.add_option('-t', '--type', default='all', help='Type of strings ascii, unicode or all (default)') oParser.add_option('-r', '--regex', default='', help='Regex to be used to match characters') oParser.add_option('-p', '--pefile', action='store_true', default=False, help='Parse file as PE file and remove imported symbols') oParser.add_option('-g', '--goodwarestrings', action='store_true', default=False, help='Use the goodware strings db to filter out strings') (options, args) = oParser.parse_args() if options.man: oParser.print_help() PrintManual() return 0 if len(args) > 1: oParser.print_help() print('') print(' Source code put in the public domain by Didier Stevens, no Copyright') print(' Use at your own risk') print(' https://DidierStevens.com') return 0 elif len(args) == 0: return Strings('', options) else: return Strings(args[0], options) if __name__ == '__main__': sys.exit(Main())