Looking for bad guys.

This script looks for traces of malicious code including code injections, modified .htaccess that makes images executable, and so on.

"); if($UseAbsoluteFilePaths) $StartPath = $RealPath; # ================================================================================ # START OF SEARCH ROUTINES. # ================================================================================ /* This program does two things: 1) finds files, and 2) does something with each one. When designing a search, the two questions to ask are: 1) Which types of files (by their names) do I want to find or perform an action on? 2) What action do I want to do on each one? Each search requires these data items to be defined: 1) An array that is a list of Perl-Compatible Regular Expressions (PCRE) of filenames to match. The program searches directories for all the filenames that match any of the regexes. 2) Another array that is a list of PCREs of fullpaths NOT to match. This allows excluding files in certain directories. If a file's NAME matches any regex in list 1) and its PATH+NAME does NOT match any regexes list 2) (the exclusions), its name gets passed to the handler function. 3) The handler function. It can perform any action you want on the file whose name is given to it. Some of the handler functions below merely report that the filename is suspicious, but do nothing else. Another handler searches the file extensively for malicious snippets and reports each one found. You could write a handler that automatically cleans the snippet out of the file, or even deletes the file automatically. The handler can do anything. */ # ================================================================================ # 1) SUSPICIOUS FILENAMES. # Files with these strings in their *names* will be reported as suspicious. # There is currently no method provided to check for suspiciously named folders. # ================================================================================ # FILENAMES TO MATCH $FileMatchRegexes = array ( # '/root/i', # '/kit/i', '/c(99|100)/i', '/r57/i', '/gifimg/i' ); # AND FULLPATHS TO EXCLUDE FROM EXAMINATION $FullpathExcludeRegexes = array ( '#lookforbadguys\.php$#i' ); # -------------------------------------------------------------------------------- # HANDLER FUNCTION - THIS IS THE ACTION PERFORMED ON A FILE WHOSE NAME IS A MATCH. function badnames($filename) { echo CleanColorText($filename, 'blue') . " is a " . CleanColorText('suspicious file name', 'red') . ".
"; } # -------------------------------------------------------------------------------- # THIS CODE ACTUALLY DOES THE SEARCH. echo CleanColorText("Searching for files with suspicious names...", 'green') . "
"; FindAndProcessFiles($StartPath, $FileMatchRegexes, $FullpathExcludeRegexes, 'badnames'); # ================================================================================ # 2) WORDPRESS PHARMA HACK SUSPICIOUS FILENAMES. # Files matching these names will be reported as possible pharma hack files. # Regexes are based on the naming conventions described at # http://www.pearsonified.com/2010/04/wordpress-pharma-hack.php # ================================================================================ # FILENAMES TO MATCH $FileMatchRegexes = array ( '/^\..*(cache|bak|old)\.php/i', # HIDDEN FILES WITH PSEUDO-EXTENSIONS IN THE MIDDLE OF THE FILENAME '/^db-.*\.php/i', # Permit the standard WordPress files that start with class-, but flag all others as suspicious. # The (?!) is called a negative lookahead assertion. It means "not followed by..." '/^class-(?!snoopy|smtp|feed|pop3|IXR|phpmailer|json|simplepie|phpass|http|oembed|ftp-pure|wp-filesystem-ssh2|wp-filesystem-ftpsockets|ftp|wp-filesystem-ftpext|pclzip|wp-importer|wp-upgrader|wp-filesystem-base|ftp-sockets|wp-filesystem-direct)\.php/i' ); # AND FULLPATHS TO EXCLUDE FROM EXAMINATION $FullpathExcludeRegexes = array ( '#lookforbadguys\.php$#i' ); # -------------------------------------------------------------------------------- # HANDLER FUNCTION - THIS IS THE ACTION PERFORMED ON A FILE WHOSE NAME IS A MATCH. function pharma($filename) { echo CleanColorText($filename, 'blue') . " is most likely a " . CleanColorText('pharma hack', 'red') . ".
"; } # -------------------------------------------------------------------------------- # THIS CODE ACTUALLY DOES THE SEARCH. echo "
" . CleanColorText("Searching for files with names related to Wordpress pharma hack...", 'green') . "
"; FindAndProcessFiles($StartPath, $FileMatchRegexes, $FullpathExcludeRegexes, 'pharma'); # ================================================================================ # 3) MALICIOUS CODE SNIPPETS. # Search text files for snippets of malicious code and report all that are found. # ================================================================================ # FILENAMES TO MATCH # Ideally, this list should contain all common extensions of text files # that can become hazardous when malicious text is injected into them. $FileMatchRegexes = array ( '/\.htaccess$/i', '/\.php[45]?$/i', '/\.html?$/i', '/\.aspx?$/i', '/\.inc$/i', '/\.cfm$/i', '/\.js$/i', '/\.txt$/i', '/\.css$/i' ); # AND FULLPATHS TO EXCLUDE FROM EXAMINATION $FullpathExcludeRegexes = array ( '#lookforbadguys\.php$#i' ); # -------------------------------------------------------------------------------- # HANDLER FUNCTION - THIS IS THE ACTION PERFORMED ON A FILE WHOSE NAME IS A MATCH. function FindMaliciousCodeSnippets($filename) { if(!is_readable($filename)) { echo "Warning: Unable to read " . CleanColorText($filename, 'blue') . ". Check it manually and check its access permissions.
"; return; } # READ THE FILE INTO A STRING, WITH LINE ENDS REMOVED AND WHITESPACE COMPRESSED. $file = file_get_contents($filename); $file = preg_replace('/\s+/', ' ', $file); # The file is searched for each of these snippets of suspicious text. # These are regular expressions with the required /DELIMITERS/ and with metachars escaped. # /i at the end means case insensitive. # PHP function names are case-insensitive. # If your regex itself contains / chars, you can use a different # char as a delimiter like this: '#delimited#i' to avoid confusion. $SuspiciousSnippets = array ( # POTENTIALLY SUSPICIOUS CODE '/edoced_46esab/i', '/passthru\s*\(/i', '/shell_exec\s*\(/i', '/document\.write\s*\(unescape\s*\(/i', # THESE CAN GIVE MANY FALSE POSITIVES WHEN CHECKING WORDPRESS AND OTHER CMS. # NONETHELESS, THEY CAN BE IMPORTANT TO FIND, ESPECIALLY BASE64_DECODE. # THIS IS MUCH MORE SUSPICIOUS IF THE MATCHED TEXT CONTAINS THE EVAL() CODE. '/(eval\s*\(.{0,40})?base64_decode\s*\(/i', '/system\s*\(/i', # PHP BACKTICK OPERATOR INVOKES SYSTEM FUNCTIONS, SAME AS system(), # BUT IT IS ALSO A DATABASE,TABLE,FIELD DELIMITER IN SQL DATABASE QUERIES. '/`[^`]+`/', '/phpinfo\s*\(/i', # THIS SET GENERATES MANY FALSE POSITIVES # '/chmod\s*\(/i', # '/mkdir\s*\(/i', # '/fopen\s*\(/i', # '/fclose\s*\(/i', # '/readfile\s*\(/i', # THESE WERE PREVIOUSLY SPECIAL CASES; NOW MOVED INTO THIS ARRAY. '/RewriteRule\s/i', # SUSPICIOUS IF THE DESTINATION IS A DIFFERENT SITE OR SUSPICIOUS FILE. '/AddHandler\s/i', # THIS CAN MAKE IMAGE OR OTHER FILES EXECUTABLE. # JAVASCRIPT SNIPPETS WHOSE SRC= REFERENCES AN HTTP:// SOURCE OTHER THAN ONES KNOWN TO BE SAFE. # EVEN WITH EXCEPTIONS, THIS CAN GIVE MANY FALSE POSITIVES. '@]+src=[\x22\x27]?http://(?!(www\.(google-analytics|gmodules)\.com|pagead2\.googlesyndication\.com/pagead/|(ws\.|((www|cls)\.assoc-))amazon\.com/))[^>]*>@i', # IFRAMES, WITH A KNOWN-HARMLESS EXCLUSION. # IFRAME SEARCH CAN GIVE MANY FALSE POSITIVES IN SOME WEBSITES. '@]+src=[\x22\x27]?http://(?!(rcm\.amazon\.com/))[^>]*>@i', # SUSPICIOUS NAMES. SOME HACKERS SIGN THEIR SCRIPTS. MANY NAMES COULD BE PUT INTO THIS LIST. # HERE IS A GENERIC EXAMPLE OF TEXT FROM A DEFACED WEB PAGE. '/hacked by\s/i', # OTHER SUSPICIOUS TEXT STRINGS '/web[\s-]*shell/i', # TO FIND BACKDOOR WEB SHELL SCRIPTS. '/c(99|100)/i', # THE NAMES OF SOME POPULAR WEB SHELLS. '/r57/i', # YOU COULD/SHOULD ADD TO THIS LIST SOME REGULAR EXPRESSIONS TO MATCH THE NAMES OF # MALICIOUS DOMAINS AND IP ADDRESSES MENTIONED IN YOUR # GOOGLE SAFE BROWSING DIAGNOSTIC REPORT. # SOME EXAMPLES: '/gumblar\.cn/i', '/martuz\.cn/i', '/beladen\.net/i', '/gooqle/i', # NOTE THIS HAS A Q IN IT. # '/127\.0\.0\.1/', # COMMENTED-OUT EXAMPLE OF AN IP ADDRESS REGEX # THESE 2 ARE THE WORDPRESS CODE INJECTION IN FRONT OF EVERY INDEX.PHP AND SOME OTHERS '/_analist/i', # EACH LIST ENTRY MUST BE TERMINATED WITH A COMMA... '/anaiytics/i' # EXCEPT THE LAST ENTRY MUST NOT HAVE A COMMA. ); # ACCUMULATES ALL THE WARNING MESSAGES FOR THIS FILE. $OutputText = array(CleanColorText($filename, 'blue')); # SEARCH THE FILE FOR EACH OF THE ABOVE SNIPPETS. foreach($SuspiciousSnippets as $snippet) { $matches = array(); if($matchcount = preg_match_all($snippet, $file, $matches, PREG_PATTERN_ORDER | PREG_OFFSET_CAPTURE)) { $i = 0; foreach($matches[0] as $occurrence) # $occurrence is an array itself 0=>string, 1=>offset { $i++; # THE 80 CHARACTERS AFTER START OF MATCH INSTANCE $s = substr($file, $occurrence[1], 80); $newline = (($i === 1) ? '

' : '
'); $OutputText[] = $newline . CleanColorText("Regex ($i of $matchcount): ", 'black') . CleanColorText($snippet, 'red') . CleanColorText(": " . $s, 'black'); } } } # REPORT ALL THREAT MESSAGES AT ONCE, IF THERE WERE ANY. # TO PRINT EVERY FILENAME EXAMINED, MAKE THE THRESHOLD 0. if(count($OutputText) > 1) { foreach($OutputText as $s) echo $s; echo '

'; } } # -------------------------------------------------------------------------------- # THIS CODE ACTUALLY DOES THE SEARCH. echo "
" . CleanColorText("Searching for files containing suspicious code or other text...", 'green') . "
"; FindAndProcessFiles($StartPath, $FileMatchRegexes, $FullpathExcludeRegexes, 'FindMaliciousCodeSnippets'); # -------------------------------------------------------------------------------- # END OF THE SEARCH ROUTINES # ================================================================================ # ================================================================================ # FUNCTION LIBRARY # -------------------------------------------------------------------------------- # Output text in specified color, cleaning it with htmlentities(). # Malicious text snippets could by definition be hazardous, so # always use this to put text on the web page # unless it is going into a text (input) box or textarea. function CleanColorText($text, $color) { $outputcolor = 'black'; $color = trim($color); if(preg_match('/^(red|blue|green|black|#[0-9A-F]{6})$/i', $color)) $outputcolor = $color; return '' . htmlentities($text, ENT_QUOTES) . ''; } # -------------------------------------------------------------------------------- function ResetCounts() { global $FilesCount, $FilesMatchedCount, $DirectoriesCount, $DirectoriesMatchedCount, $AllFilesToProcess; $FilesCount = $FilesMatchedCount = $DirectoriesCount = $DirectoriesMatchedCount = 0; $AllFilesToProcess = array(); } # -------------------------------------------------------------------------------- function ShowCounts() { global $FilesCount, $FilesMatchedCount, $DirectoriesCount, $DirectoriesMatchedCount; $s = "Files encountered = $FilesCount" . ', ' . "Matching regex and processed = $FilesMatchedCount" . '; ' . "Directories encountered = $DirectoriesCount" . ', ' . "Matched and processed = $DirectoriesMatchedCount"; echo CleanColorText($s, 'green') . "
"; } # -------------------------------------------------------------------------------- # Returns path translated to canonical absolute filesystem path, # or FALSE if it fails (path does not exist or PHP cannot enter/read it). function GetCanonicalPath($path) { # CLEAN IT UP AND CONVERT TO STANDARD PHP FORMAT (/) $path = str_replace('\\', '/', $path); $path = rtrim($path, '/'); $path .= '/'; $RealPath = realpath($path); # FALSE IF PHP CANNOT READ ANY DIR IN HIERARCHY if($RealPath === FALSE) return FALSE; $RealPath = str_replace('\\', '/', $RealPath); $RealPath = rtrim($RealPath, '/'); $RealPath .= '/'; return $RealPath; } # -------------------------------------------------------------------------------- /* Recursively search the starting directory and all below it to find files whose names match the given regex(es). Since this performs no action on the files found, it is now a generic file-finder like the Linux "find" command. You can do whatever you want with the list once it's built. $FileMatchRegexes can be either a string or an array. Passing them all at once allows the filesystem to be traversed only once to find all matches (20+% faster). */ function BuildFileList($StartDir, $FileMatchRegexes, $FullpathExcludeRegexes) { # NOTE THAT THIS FUNCTION REQUIRES THE GLOBAL VARIABLES DECLARED EARLIER. global $FilesCount, $FilesMatchedCount, $DirectoriesCount, $DirectoriesMatchedCount, $AllFilesToProcess; # CHANGE BACKSLASHES TO FORWARD, WHICH IS OK IN PHP, EVEN IN WINDOWS. # THEN REMOVE ANY TRAILING SLASHES AND ADD EXACTLY ONE. $StartDir = str_replace('\\', '/', $StartDir); $StartDir = rtrim($StartDir, '/'); $StartDir .= '/'; # ENSURE THAT THE CURRENT DIRECTORY EXISTS AND IS READABLE BY PHP. if(!is_dir($StartDir)) { echo "Warning: Directory does not exist: " . CleanColorText($StartDir, 'blue') . "
"; return; } $DirectoriesCount++; # COUNT IT AS A DIRECTORY (READABLE OR NOT) if(!is_readable($StartDir)) { echo CleanColorText("Warning: Directory is not readable by PHP: ", 'red') . CleanColorText($StartDir, 'blue') . ". Check its owner/group permissions.
"; return; } # THE DIR IS READABLE, SO IT WILL BE PROCESSED. # A DIR IS NEVER ACTUALLY EXCLUDED FROM PROCESSING UNLESS IT CAN'T BE READ. # ONLY FILES ARE AFFECTED BY THE EXCLUSION RULES. $DirectoriesMatchedCount++; # IF THESE ARE NOT ARRAYS, TURN THEM INTO ARRAYS. if(!is_array($FileMatchRegexes)) $FileMatchRegexes = array($FileMatchRegexes); if(!is_array($FullpathExcludeRegexes)) $FullpathExcludeRegexes = array($FullpathExcludeRegexes); # DETERMINE IF EACH ENTRY IN THE CURRENT DIRECTORY IS A CANDIDATE FOR INCLUSION IN THE FILE LIST. $dir = dir($StartDir); while(($filename = $dir->read()) !== FALSE) { $fullname = $dir->path . $filename; if(is_file($fullname)) { $FilesCount++; # ADD IT TO THE COUNT OF *ALL* FILES, PROCESSED OR NOT. # IF ITS NAME MATCHES ANY OF THE REGEXES, IT MIGHT GO INTO THE LIST... $matches = 0; foreach($FileMatchRegexes as $regex) { if(preg_match($regex, $filename)) { $matches = 1; # UNLESS ITS FULLPATH MATCHES ANY OF THE EXCLUSION REGEXES. foreach($FullpathExcludeRegexes as $exclude) { if(preg_match($exclude, $fullname)) { $matches = 0; break; } } break; } } if($matches) { $FilesMatchedCount++; $AllFilesToProcess[] = $fullname; } } else if(is_dir($fullname)) { # ELSE IF IT IS A DIRECTORY AND NOT THE CURRENT ONE OR ITS PARENT, # RECURSIVELY CALL THIS FUNCTION TO PROCESS ALL *ITS* ENTRIES # BEFORE CONTINUING WITH THE CURRENT DIRECTORY. if(($filename !== '.') && ($filename !== '..')) BuildFileList($fullname, $FileMatchRegexes, $FullpathExcludeRegexes); } } $dir->close(); } # -------------------------------------------------------------------------------- # BUILD A MASTER LIST OF ALL THE FILES TO PROCESS, # THEN SORT THE ARRAY AND PROCESS ALL ITS ENTRIES AT ONCE. function FindAndProcessFiles($StartDir, $FileMatchRegexes, $FullpathExcludeRegexes, $FileHandlerFunction) { global $AllFilesToProcess; ResetCounts(); BuildFileList($StartDir, $FileMatchRegexes, $FullpathExcludeRegexes); sort($AllFilesToProcess, SORT_STRING); foreach($AllFilesToProcess as $filename) { call_user_func($FileHandlerFunction, $filename); } ShowCounts(); } # -------------------------------------------------------------------------------- # END FUNCTION LIBRARY # ================================================================================ echo "
" . CleanColorText("Done!", 'green') . "
"; ?>