#!/usr/bin/perl
#
# Queries a HTML file, executable file, an URL, a host, an IP
# or an e-mail at SPFBL.net's URIBL. http://spfbl.net/en/uribl
#
# This script will follow all URL redirections, until no more redirections.
# The target is final URL, that will be viewed by user.
#
# Returns:
# 0 - not listed.
# 1 - listed as phishing or as SPAM resource.
# 2 - listed as rejected executable file.
# 3 - malware found at executable file.
# 4 - undefined executable file found.
#
# SPFBL is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# SPFBL is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with SPFBL. If not, see .
#
# Project SPFBL - Copyright Leandro Carlos Rodrigues - leandro@spfbl.net
# https://github.com/leonamp/SPFBL
#
# Version: 3.0
use strict;
use Cwd;
use Cwd qw(abs_path);
use URI;
use URI::Encode qw(uri_decode);
use HTML::TreeBuilder;
use HTML::Entities qw(decode_entities);
use Email::Valid;
use Set::Scalar;
use WWW::Mechanize;
use WWW::Mechanize::Firefox;
use WWW::Scripter;
use HTTP::Request;
use Data::Validate::URI;
use Data::Validate::IP qw(is_ip is_ipv4 is_ipv6);
use Data::Validate::Domain qw(is_domain);
use Net::IP qw(ip_expand_address ip_reverse);
use Mail::RBL;
use Net::DNS;
use ClamAV::Client;
use Digest::MD5 qw(md5_hex);
use File::Spec;
use File::Slurper qw(read_text write_text read_lines);
use JSON::Parse qw(parse_json);
use Image::ExifTool qw(:Public);
use DateTime;
use DateTime::Format::Strptime;
use MIME::Lite;
use IO::Handle;
my $FINAL = '(/unsubscribe\.php?|(//|\.)facebook\.com/|(//|\.)instagram\.com/|(//|\.)twitter\.com/|(//|\.)linkedin\.com/|(//|\.)strava\.com/|(//|\.)youtube\.com/|(//|\.)myspace\.com/|(//|\.)support\.icewarp\.com/|(//|\.)google-analytics\.com/|(//|\.)hubspotemail\.net/|(//|\.)pinterest\.com/|(//|\.)list-manage\.com/|(//|\.)avast\.com/|(//|\.)sendgrid\.net/|(//|\.)rdstation\.com\.br/|(//|\.)rdstation\.email/|\.gov\.br/)'; # REGEX of all URLs that must be considered final. Do not access it!
my $IGNORE = '((//|\.)avg\.com(/|$)|\.avast\.com(/|$)|//tinyurl\.com/nospam\.php?)'; # REGEX of all URLs that must be ignored.
my $NOREDIR = '((//|\.)netflix\.com(/|$)|(//|\.)netflix\.com\.br(/|$)|(//|\.)bgp\.net\.br(/|$))'; # REGEX to demilit redirecions. Do not redirect to it!
my $SHORTENERS = '^https?\:\/\/(1link\.in|1url\.com|2big\.at|2pl\.us|2tu\.us|2ya\.com|4url\.cc|6url\.com|a\.gg|a\.nf|a2a\.me|abbrr\.com|adf\.ly|adjix\.com|alturl\.com|atu\.ca|b23\.ru|back\.ly|bacn\.me|bc\.vc|bit\.do|bit\.ly|bitly\.com|bkite\.com|bloat\.me|budurl\.com|buk\.me|burnurl\.com|buzurl\.com|c-o\.in|chilp\.it|clck\.ru|cli\.gs|clickmeter\.com|cort\.as|cur\.lv|cutt\.us|cuturl\.com|db\.tt|decenturl\.com|dfl8\.me|digbig\.com|digg\.com|doiop\.com|dwarfurl\.com|dy\.fi|easyuri\.com|easyurl\.net|eepurl\.com|esyurl\.com|ewerl\.com|fa\.b|ff\.im|fff\.to|fhurl\.com|filoops\.info|fire\.to|firsturl\.de|flic\.kr|fly2\.ws|fon\.gs|fwd4\.me|gl\.am|go\.9nl\.com|go2\.me|go2cut\.com|goo\.gl|goshrink\.com|gowat\.ch|gri\.ms|gurl\.es|hellotxt\.com|hex\.io|hover\.com|href\.in|htxt\.it|hugeurl\.com|hurl\.it|hurl\.me|hurl\.ws|icanhaz\.com|idek\.net|inreply\.to|inx\.lv|is\.gd|iscool\.net|iterasi\.net|ity\.im|j\.mp|jijr\.com|jmp2\.net|just\.as|kissa\.be|kl\.am|klck\.me|korta\.nu|krunchd\.com|liip\.to|liltext\.com|lin\.cr|link\.zip\.net|linkbee\.com|linkbun\.ch|liurl\.cn|ln-s\.net|ln-s\.ru|lnk\.gd|lnk\.in|lnkd\.in|loopt\.us|lru\.jp|lt\.tl|lurl\.no|metamark\.net|migre\.me|minilien\.com|miniurl\.com|minurl\.fr|moourl\.com|myurl\.in|ne1\.net|njx\.me|nn\.nf|notlong\.com|nsfw\.in|o-x\.fr|om\.ly|ouo\.io|ow\.ly|pd\.am|pic\.gd|ping\.fm|piurl\.com|pnt\.me|po\.st|poprl\.com|post\.ly|posted\.at|prettylinkpro\.com|profile\.to|q\.gs|qicute\.com|qlnk\.net|qr\.ae|qr\.net|quip-art\.com|rb6\.me|redirx\.com|ri\.ms|rickroll\.it|riz\.gd|rsmonkey\.com|ru\.ly|rubyurl\.com|s7y\.us|safe\.mn|scrnch\.me|sharein\.com|sharetabs\.com|shorl\.com|short\.ie|short\.to|shortlinks\.co\.uk|shortna\.me|shorturl\.com|shoturl\.us|shrinkify\.com|shrinkster\.com|shrt\.st|shrten\.com|shrunkin\.com|shw\.me|simurl\.com|sn\.im|snipr\.com|snipurl\.com|snurl\.com|sp2\.ro|spedr\.com|sqrl\.it|starturl\.com|sturly\.com|su\.pr|t\.co|tcrn\.ch|thrdl\.es|tighturl\.com|tiny\.cc|tiny\.pl|tiny123\.com|tinyarro\.ws|tinyarrows\.com|tinytw\.it|tinyuri\.ca|tinyurl\.com|tinyvid\.io|tnij\.org|to\.ly|togoto\.us|tr\.im|tr\.my|traceurl\.com|turo\.us|tweetburner\.com|tweez\.me|twirl\.at|twit\.ac|twitterpan\.com|twitthis\.com|twiturl\.de|twurl\.cc|twurl\.nl|u\.bb|u\.mavrev\.com|u\.nu|u\.to|u6e\.de|ub0\.cc|ulvis\.net|updating\.me|ur1\.ca|url\.co\.uk|url\.ie|url4\.eu|urlao\.com|urlbrief\.com|urlcover\.com|urlcut\.com|urlenco\.de|urlhawk\.com|urlkiss\.com|urlot\.com|urlpire\.com|urlx\.ie|urlx\.org|urlzen\.com|v\.gd|virl\.com|vl\.am|vzturl\.com|w3t\.org|wapurl\.co\.uk|we\.tl|wipi\.es|wp\.me|x\.co|x\.se|xaddr\.com|xeeurl\.com|xr\.com|xrl\.in|xrl\.us|xurl\.es|xurl\.jp|xzb\.cc|yep\.it|yfrog\.com|yourls\.org|yweb\.com|zi\.ma|zi\.pe|zipmyurl\.com|zz\.gd|ujeb\.se|soo\.gd|gee\.su|gmy\.su|v\.ht|tini\.to)\/'; # List of all know shorteners.
my $USERAGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:59.0) Gecko/20100101 Firefox/59.0';
my $VALIDATOR = Data::Validate::URI->new();
my $CLAMAV = ClamAV::Client->new(socket_name => '/var/run/clamav/clamd.ctl'); # Set the ClamaAV socket.
my $GSBKEY = ''; # Google SafeBrowsing API key.
my $AGRESSIVE = 1; # Will visit all URL contents if 1 or only shorteners if 0.
sub startsWith {
return substr($_[0], 0, length($_[1])) eq $_[1];
}
# Search HTTP redirection at META tag.
sub redirectionHTML {
eval {
my ($tree) = @_;
for my $meta ($tree->look_down(_tag => 'meta')) {
my $equiv = lc($meta->attr('http-equiv'));
if ($equiv eq 'refresh') {
my $content = $meta->attr('content');
if ($content =~ m/(https?\:\/\/[A-Za-z0-9\-\._~!\$&\(\)\*+,;=:\/?@]+)/g) {
if ($VALIDATOR->is_uri($1)) {
if ($1 !~ m/$NOREDIR/g) {
return $1;
}
}
}
}
}
};
}
# Search JavaScript redirection.
sub redirectionJavascript {
eval {
my ($addressset, $tree, $uri) = @_;
my $head = $tree->look_down(_tag => q{head});
for my $script ($head->look_down(_tag => 'script')) {
my $type = $script->attr('type');
if ($type eq 'text/javascript') {
my @content = $script->content_list();
my $content = @content[0];
while($content =~ m/\bwindow\.location\.href *= *('|") *(https?\:\/\/[^\s]+[\/\w]) *('|")/gi) {
my $newuri = uri_decode($2);
if ($VALIDATOR->is_uri($newuri)) {
if ($newuri !~ m/$NOREDIR/g) {
return $newuri;
}
}
}
if ($content =~ m/\bwindow\.location\.href *= */) {
if ($content =~ m/('|") *(https?\:\/\/([a-z0-9\._-]+|\[[a-f0-9\:]+\])(:[0-9]{1,6})?[a-z0-9\-\._~!\$&\(\)\*+,;\=:\/?@#%]*) *('|")/i) {
my $newuri = uri_decode($2);
if ($newuri !~ m/$NOREDIR/g) {
return $newuri;
}
}
}
}
}
my $body = $tree->look_down(_tag => q{body});
for my $script ($tree->look_down(_tag => 'script')) {
my $type = $script->attr('type');
if ($type eq 'text/javascript') {
my @content = $script->content_list();
my $content = @content[0];
while($content =~ m/\bdocument\.location *= *('|") *([a-z0-9\-\._~!\$&\(\)\*+,;\=:\/?@]+) *('|")/gi) {
my $newuri = repath($uri, uri_decode($2));
if ($VALIDATOR->is_uri($newuri)) {
if ($newuri !~ m/$NOREDIR/g) {
return $newuri;
}
}
}
if ($content =~ m/\bdocument\.location *= */) {
if ($content =~ m/('|") *(https?\:\/\/([a-z0-9\._-]+|\[[a-f0-9\:]+\])(:[0-9]{1,6})?[a-z0-9\-\._~!\$&\(\)\*+,;\=:\/?@#%]*) *('|")/i) {
my $newuri = uri_decode($2);
if ($newuri !~ m/$NOREDIR/g) {
return $newuri;
}
}
}
}
}
# Looking for a special JavaScript redireciton using iframe.
for my $iframe ($tree->look_down(_tag => 'iframe')) {
my $onload = $iframe->attr('onload');
if ($onload =~ m/^top.location=/) {
$onload =~ s/\\\//\//g;
while($onload =~ m/\btop\.location *= *' *(https?\:\/\/[^\s]+[\/\w]) *'/g) {
if ($VALIDATOR->is_uri($1)) {
if ($1 !~ m/$NOREDIR/g) {
return $1;
}
}
}
}
}
# Looking for a special redirection at body tag that is invoked by JavaScript.
for my $meta ($tree->look_down(_tag => 'body')) {
my $redirect = lc($meta->attr('data-redirect'));
if ($redirect eq 'true') {
my $location = decode_entities($meta->attr('data-url'));
if ($location =~ m/(https?\:\/\/[A-Za-z0-9\-\._~!\$&\(\)\*+,;=:\/?@]+)/g) {
if ($VALIDATOR->is_uri($1)) {
if ($1 !~ m/$NOREDIR/g) {
return $1;
}
}
}
}
for my $script ($tree->look_down(_tag => 'script')) {
my @content = $script->content_list();
my $content = @content[0];
if ($content =~ m/\blocation\.pathname = document\.getElementById\('[^']+'\)\.getAttribute\('[^']+'\);/gi) {
$addressset->insert("MALWARE=SPFBL.HTML.Redirect.Evasion");
}
}
}
return;
};
}
# Search URL flagged as suspicious by bit.ly within your shortening.
sub redirectionBitly {
eval {
my ($tree) = @_;
for my $link ($tree->look_down(_tag => 'a')) {
my $id = $link->attr('id');
if ($id eq 'clickthrough') {
my $clickthrough = $link->attr('href');
if ($VALIDATOR->is_uri($clickthrough)) {
return $clickthrough;
}
}
}
return;
};
}
# Search for any redirection.
sub redirection {
my ($addressset, $tree, $uri) = @_;
my $redir;
if ($redir = redirectionHTML($tree)) {
return $redir;
} elsif ($redir = redirectionJavascript($addressset, $tree, $uri)) {
return $redir;
} elsif (($uri =~ m/https?\:\/\/bit\.ly\//i) && ($redir = redirectionBitly($tree))) {
return $redir;
} else {
return;
}
}
# Scan file at ClamAV.
sub clamavScan {
my ($filename) = @_;
my ($path, $result);
if (-e $filename) {
eval {
($path, $result) = $CLAMAV->scan_path($filename);
};
}
return $result;
}
# Try to LOG at Exim or STDOUT for exception.
sub logWrite {
my ($text) = @_;
eval {
Exim::log_write($text);
};
# print("$text\n");
}
# Calculate MD5 hex sum of a file.
sub md5sum {
my ($filename) = @_;
if (-e $filename) {
my $digest = Digest::MD5->new;
open(FILE, "$filename");
binmode(FILE);
$digest->addfile(*FILE);
close FILE;open
return $digest->hexdigest();
}
return;
}
# Process an executable file.
sub processExecutable {
my ($filename, $extension, $addressset) = @_;
logWrite("EXEC $filename");
# Generate executable signature.
my $length = -s $filename;
my $signature = md5sum($filename);
my $name = "$signature.$length.$extension";
$addressset->insert($name);
# Store cache.
my $folder = "/var/spfbl";
if (-d $folder) {
system("cp '$filename' '$folder/$name'");
}
# ClamAV scan.
my $result = clamavScan($filename);
if ($result) {
$addressset->insert("MALWARE=$result");
logWrite("MLWR $result");
} elsif ($extension =~ m/^(com|vbs|vbe|bat|cmd|pif|scr|prf|lnk|shs|js|hta|msi)$/i) {
$addressset->insert("MALWARE=SPFBL.Executable.$extension");
}
return $name;
}
# Recursive routine to check a file or a folder.
sub checkFile {
my ($dir, $filename, $content_type, $uriset, $addressset) = @_;
my $executable;
if (-l $filename) {
logWrite("LINK inode/symlink $filename");
} elsif (-d $filename) {
logWrite("FILE inode/directory $filename");
if (substr($filename, -1) ne "/") {
$filename =~ s/ /\\ /g;
$filename = "$filename/";
}
my @children = glob("$filename*");
foreach my $child (@children) {
my $result = checkFile($filename, $child, '', $uriset, $addressset);
if ($result) {
$executable = $result;
}
}
} elsif (-e $filename) {
if ($filename =~ m/\.lnk$/i) {
$executable = processExecutable($filename, 'lnk', $addressset);
# Check if the LNK file is calling msiexec.exe to install by URL.
my $info = ImageInfo($filename);
my $target = $info->{TargetFileDOSName};
if ($target eq 'msiexec.exe') {
$addressset->insert("MALWARE=SPFBL.Script.msiexec.exe");
my $arguments = $info->{CommandLineArguments};
if ($arguments =~ m/\/i +(https?\:\/\/[A-Za-z0-9\-\._~!\$&\(\)\*+,;=:\/?@]+)\b/i) {
$uriset->insert(uri_decode($1));
}
}
# } elsif ($filename =~ m/\.ace$/i) {
# # TODO: decompress ACE files with unace.
} elsif ($filename =~ m/\.cab$/i) {
# TODO: decompress CAB files with cabextract.
logWrite("FILE application/vnd.ms-cab-compressed $filename");
} elsif ($filename =~ m/\.(com|vbs|vbe|bat|cmd|pif|scr|prf|exe|shs|arj|hta|jar|ace|js|msi|sh)$/i) {
my $extension = lc($1);
$executable = processExecutable($filename, $extension, $addressset);
} else {
my $type = `file --brief --mime-type "$filename"`;
print("$type\n");
$type =~ s/\n//g;
if ($content_type ne '') {
if ($type eq 'text/plain') {
$type = $content_type;
} elsif ($type eq 'application/octet-stream') {
$type = $content_type;
}
}
if ($type eq 'application/gzip') {
# MIME exceptions for Gzip compression.
if ($filename !~ m/\.wmz$/i) {
# Compressed Windows Metafile
$type = 'application/x-msmetafile';
} elsif ($filename !~ m/\.emz$/i) {
# Compressed Windows Enhanced Metafile
$type = 'application/x-msmetafile';
}
}
if ($type eq 'application/msword' || $filename =~ m/\.doc$/i) {
logWrite("FILE $content_type $filename");
my $code = system("egrep --binary --ignore-case '\\b(AutoOpen|Document_Open|word/vbaProject\\.bin)\\b' '$filename' > /dev/null");
if ($code == 0) {
$executable = processExecutable($filename, 'doc', $addressset);
$addressset->insert("MALWARE=SPFBL.Document.AutoOpen.doc");
} else {
my $result = clamavScan($filename);
if ($result) {
$addressset->insert("MALWARE=$result");
logWrite("MLWR $result");
}
}
} elsif ($type eq 'application/x-dosexec') {
$executable = processExecutable($filename, 'exe', $addressset);
} elsif ($type eq 'text/x-msdos-batch') {
$executable = processExecutable($filename, 'cmd', $addressset);
} elsif ($type eq 'application/x-elf') {
$executable = processExecutable($filename, 'elf', $addressset);
} elsif ($type eq 'application/x-sh') {
$executable = processExecutable($filename, 'sh', $addressset);
} elsif ($type eq 'application/jar') {
$executable = processExecutable($filename, 'jar', $addressset);
} elsif ($type eq 'application/x-msdownload') {
$executable = processExecutable($filename, 'exe', $addressset);
} elsif ($type eq 'application/x-ms-installer') {
$executable = processExecutable($filename, 'msi', $addressset);
} elsif ($type eq 'application/zip') {
logWrite("FILE $type $filename");
my $directory = "$filename.d";
if (system("unzip -qq -P password '$filename' -d '$directory'") < 3) {
my $result = checkFile($directory, $directory, "inode/directory", $uriset, $addressset);
if ($result) {
$executable = $result;
}
} else {
# Encrypted file. Find any executable by filename list.
my $list = `unzip -Z -1 '$filename'`;
my @lines = split /\n/, $list;
foreach my $line (@lines) {
if ($line =~ m/\.(com|vbs|vbe|bat|cmd|pif|scr|prf|lnk|exe|shs|arj|hta|jar|ace|js|msi|sh|doc|xls|docx|docm|xlsx|xlsm|xlsb|zip)$/i) {
$executable = processExecutable($filename, 'zip', $addressset);
if ($line =~ m/\.doc$/i) {
$addressset->insert("MALWARE=SPFBL.Encrypted.doc");
}
last;
}
}
}
system("rm -R '$directory'");
} elsif ($type eq 'application/gzip') {
logWrite("FILE $type $filename");
my $directory = "$filename.d";
system("mkdir '$directory'");
system("cp '$filename' '$directory'");
system("gunzip --quiet --recursive '$directory'");
my $result = checkFile($directory, $directory, 'inode/directory', $uriset, $addressset);
if ($result) {
$executable = $result;
}
system("rm -R '$directory'");
} elsif ($type eq 'application/x-tar') {
logWrite("FILE $type $filename");
my $directory = "$filename.d";
system("mkdir '$directory'");
system("tar --extract --file '$filename' --directory '$directory'");
my $result = checkFile($directory, $directory, 'inode/directory', $uriset, $addressset);
if ($result) {
$executable = $result;
}
system("rm -R '$directory'");
} elsif ($type eq 'application/x-7z-compressed') {
logWrite("FILE $type $filename");
my $directory = "$filename.d";
if (system("7z x -bd -ppassword '$filename' '-o$directory' > /dev/null") < 2) {
my $result = checkFile($directory, $directory, 'inode/directory', $uriset, $addressset);
if ($result) {
$executable = $result;
}
} else {
# Encrypted file. Find any executable by filename list.
my $list = `7z l -bd '$filename'`;
my @lines = split /\n/, $list;
foreach my $line (@lines) {
if ($line =~ m/^[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2} .+\.(com|vbs|vbe|bat|cmd|pif|scr|prf|lnk|exe|shs|arj|hta|jar|ace|js|msi|sh|doc|xls|docx|docm|xlsx|xlsm|xlsb|zip)$/i) {
$executable = processExecutable($filename, '7z', $addressset);
last;
}
}
}
system("rm -R '$directory'");
} elsif ($type eq 'application/x-rar') {
logWrite("FILE $type $filename");
my $directory = "$filename.d";
system("mkdir '$directory'");
if (system("unrar x -ppassword -inul '$filename' '$directory'") < 2) {
my $result = checkFile($directory, $directory, "inode/directory", $uriset, $addressset);
if ($result) {
$executable = $result;
}
} else {
# Encrypted file. Find any executable by filename list.
my $list = `unrar l '$filename'`;
my @lines = split /\n/, $list;
foreach my $line (@lines) {
if ($line =~ m/..[rwx-]{9} +[0-9]+ [0-9]{2}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2} .+\.(com|vbs|vbe|bat|cmd|pif|scr|prf|lnk|exe|shs|arj|hta|jar|ace|js|msi|sh|doc|xls|docx|docm|xlsx|xlsm|xlsb|zip)/i) {
$executable = processExecutable($filename, 'rar', $addressset);
last;
}
}
}
system("rm -R '$directory'");
} elsif ($type eq 'application/x-ace-compressed') {
# TODO: decompress ACE files with unace.
} elsif ($type eq 'application/vnd.ms-cab-compressed') {
# TODO: decompress CAB files with cabextract.
}
}
}
return $executable;
}
sub signature {
eval {
my ($key) = @_;
if ($key =~ m/^(https?)\:\/\/([a-z0-9\._-]+|\[[a-f0-9\:]+\])(:([0-9]{1,6}))?(\/|\?|#|$)/i) {
$key = uri_decode($key);
my $protocol = lc($1);
my $host = lc($2);
my $port = $4;
if (!$port) {
if ($protocol eq 'http') {
$port = '80';
} else {
$port = '443';
}
}
if (is_ipv4($host)) {
$host = ip_reverse($host);
$host = substr($host, 0, -14);
} elsif ($host =~ /\[([a-f0-9\:]+)\]/ && is_ipv6($1)) {
$host = ip_reverse($1);
$host = substr($host, 0, -10);
}
my $signature = md5_hex($key);
return "$signature.$host.$port.$protocol";
} elsif ($key =~ m/^[0-9a-f]{32}(\.[a-z0-9_-]+)+\.[0-9]+\.https?$/) {
return $key;
} elsif ($key =~ m/^[0-9a-f]{32}\.[0-9]+\.(com|vbs|vbe|bat|cmd|pif|scr|prf|lnk|exe|shs|arj|hta|jar|ace|js|msi|sh|doc|xls|docx|docm|xlsx|xlsm|xlsb|zip)$/) {
return $key;
} elsif ($key =~ m/^MALWARE=/) {
return $key;
}
return;
};
}
sub loadCache {
eval {
my ($folder, $key, $expiration) = @_;
if (-d $folder) {
my $name = signature($key);
if ($name) {
my $file = "$folder/$name";
if (-e $file) {
if (!$expiration || -M $file < $expiration) {
return read_text($file);
}
}
}
}
return;
};
}
sub loadLastCache {
eval {
my ($folder, $key) = @_;
my $count = 0;
my $cache;
do {
$cache = loadCache($folder, $key);
if ($cache eq '200') {
return $key;
} elsif ($cache eq '404') {
return $key;
} elsif ($cache eq '500') {
return $key;
} else {
$key = $cache;
}
$count++;
} while ($count < 32 && $key =~ m/^(https?\:\/\/)/i);
return $key;
};
}
sub linesCache {
eval {
my ($folder, $key) = @_;
if (-d $folder) {
my $name = signature($key);
if ($name) {
my $file = "$folder/$name";
if (-e $file) {
if (-M $file < 1) {
return read_lines($file);
}
}
}
}
return;
};
}
sub storeCache {
eval {
my ($folder, $key, $value) = @_;
if (-d $folder && $value) {
my $name = signature($key);
if ($name) {
my $file = "$folder/$name";
write_text($file, $value);
chmod 0664, $file;
}
}
};
}
sub appendCache {
eval {
my ($folder, $key, $value) = @_;
if (-d $folder && $value) {
my $name = signature($key);
if ($name) {
my $file = "$folder/$name";
open(my $fh, '>>', $file);
say $fh "$value";
close $fh;
chmod 0664, $file;
}
}
};
}
sub storeParameter {
eval {
my ($folder, $name, $value) = @_;
if (-d $folder && $value) {
my $file = "$folder/$name";
write_text($file, $value);
chmod 0664, $file;
}
};
}
sub loadParameter {
eval {
my ($folder, $name, $standard) = @_;
if (-d $folder) {
my $file = "$folder/$name";
if (-e $file) {
my $value = read_text($file);
if ($value) {
return $value;
}
}
}
return $standard;
};
}
sub repath {
my ($uri, $location) = @_;
if ($location =~ m/^mailto:([^?]*)/) {
return $location;
} elsif ($location =~ m/^https?\:\/\/([a-z0-9\._-]+|\[[a-f0-9\:]+\])(:[0-9]{1,6})?(\/|\?|#|$)/i) {
return $location;
} elsif ($location =~ m/^\/\/([a-z0-9\._-]+|\[[a-f0-9\:]+\])(:[0-9]{1,6})?(\/|\?|#|$)/i) {
if ($uri =~ m/^(https?)\:\/\/([a-z0-9\._-]+|\[[a-f0-9\:]+\])/gi) {
return "$1:$location";
} else {
return "http:$location";
}
} elsif ($uri =~ m/^((https?)\:\/\/([a-z0-9\._-]+|\[[a-f0-9\:]+\])(:[0-9]{1,6})?)\//i) {
my $root = $1;
if ($location =~ m/^\.\.\//i) {
my $index = rindex($uri, "/");
$uri = substr($uri, 0, $index);
$location = substr($location, 1);
return repath($uri, $location);
} elsif ($location =~ m/^\.\//i) {
my $index = rindex($uri, "/");
$uri = substr($uri, 0, $index + 1);
$location = substr($location, 2);
return repath($uri, $location);
} elsif ($location =~ m/^\//i) {
return "$root$location";
} else {
my $index = rindex($uri, "/");
$uri = substr($uri, 0, $index + 1);
return "$uri$location";
}
} else {
return $uri;
}
}
# Process all URIs to respective addresses.
sub processURI {
my ($uriset, $addressset, $dir, $getall, $suspect) = @_;
if ($uriset) {
my $ua = WWW::Mechanize->new(keep_alive => 0, timeout => 5, autocheck => 0);
$ua->agent($USERAGENT);
$ua->requests_redirectable(['HEAD']);
# Redirect check.
my $start = DateTime->now();
my $redircount = 0;
my $visitedset = new Set::Scalar->new;
my $errorset = new Set::Scalar->new;
my $successset = new Set::Scalar->new;
while ($uriset) {
my $uri = @$uriset[0];
$uriset->delete($uri);
if (startsWith($uri, 'https://www.google.com/url?q=http')) {
# Decode Google redirection.
my $length = length($uri);
$uri = substr($uri, 29, $length);
$uri = uri_decode($uri);
} elsif (startsWith($uri, 'http://www.google.com/url?q=http')) {
# Decode Google redirection.
my $length = length($uri);
$uri = substr($uri, 28, $length);
$uri = uri_decode($uri);
}
if (!$visitedset->contains($uri)) {
$visitedset->insert($uri);
##############
if ($uri =~ m/^https?\:\/\/([a-z0-9\._-]+|\[[a-f0-9\:]+\])(:[0-9]{1,6})?(\/[a-z0-9\._-]+)+\.zip$/i) {
my $signature = signature($uri);
$addressset->insert($signature);
}
##############
if ($uri !~ m/$FINAL/gi) {
my $cache = loadCache('/var/spfbl', $uri, 1);
if ($cache eq '201') {
$successset->insert($uri);
} elsif ($cache eq '404') {
$errorset->insert($uri);
next;
} elsif ($cache eq '500') {
# Do nothing.
} elsif ($VALIDATOR->is_uri($cache)) {
$uriset->insert(uri_decode($cache));
next;
} elsif (Email::Valid->address($cache)) {
$addressset->insert($cache);
next;
} elsif ($cache =~ m/^MALWARE=/) {
$addressset->insert($cache);
$successset->insert($uri);
next;
} elsif ($cache =~ m/^[0-9a-f]{32}\.[0-9]+\.(com|vbs|vbe|bat|cmd|pif|scr|prf|lnk|exe|shs|arj|hta|jar|ace|js|msi|sh|doc|xls|docx|docm|xlsx|xlsm|xlsb|zip)$/) {
my $signature = signature($uri);
$addressset->insert($signature);
$addressset->insert($cache);
my $filename = "/var/spfbl/$cache";
if (-e $filename) {
# ClamAV scan.
my $result = clamavScan($filename);
if ($result) {
$addressset->insert("MALWARE=$result");
logWrite("MLWR $result");
}
}
} elsif (($getall || $uri =~ m/$SHORTENERS/i) && (DateTime->now - $start)->in_units('seconds') < 30) {
my $response = $ua->get($uri);
eval {
my $location = $ua->uri();
if ($uri ne $location) {
storeCache('/var/spfbl', $uri, $location);
$uri = $location;
$visitedset->insert($uri);
}
};
if ($response->code == 200) {
my $filename = $response->filename;
my $type = $response->header('Content-Type');
(my $mime) = $type =~ m/^[a-z]+\/[a-z0-9+.-]+\b/g;
if ($mime eq 'text/html') {
my $tree = HTML::TreeBuilder->new_from_content($response->decoded_content);
my $redir = redirection($addressset, $tree, $uri);
if ($redir =~ m/^https?\:\/\//i) {
if ($redircount++ < 32) {
$uriset->insert(uri_decode($redir));
storeCache('/var/spfbl', $uri, $redir);
next;
}
} elsif ($redir =~ m/^MALWARE=/) {
$addressset->insert($redir);
storeCache('/var/spfbl', $uri, $redir);
next;
} else {
storeCache('/var/spfbl', $uri, '200');
}
} elsif ($mime eq 'application/msword') {
my $headers = $response->headers;
my $length = $headers->content_length;
if ($length < 1048576) {
logWrite("WGET $mime $uri");
eval {
my $folder = $dir."download";
system("mkdir '$folder'");
$filename = "$folder/$filename";
open FILE, ">", $filename;
binmode FILE;
print FILE $response->decoded_content;
close FILE;
logWrite("FILE $mime $filename");
my $code = system("egrep --binary --ignore-case '\\b(AutoOpen|Document_Open|word/vbaProject\\.bin)\\b' '$filename' > /dev/null");
if ($code == 0) {
processExecutable($filename, 'doc', $addressset);
$addressset->insert("MALWARE=SPFBL.Document.AutoOpen.doc");
} else {
my $result = clamavScan($filename);
if ($result) {
$addressset->insert("MALWARE=$result");
logWrite("MLWR $result");
}
}
system("rm -R '$folder'");
};
}
} elsif ($mime eq 'application/pdf') {
my $headers = $response->headers;
my $length = $headers->content_length;
if ($length < 1048576) {
logWrite("WGET $mime $uri");
eval {
my $folder = $dir."download";
system("mkdir '$folder'");
$filename = "$folder/$filename";
my $name;
if (-e $filename) {
my $result = clamavScan($filename);
if ($result) {
$addressset->insert("MALWARE=$result");
logWrite("MLWR $result");
storeCache('/var/spfbl', $uri, "MALWARE=$result");
}
} else {
open FILE, ">", $filename;
binmode FILE;
print FILE $response->decoded_content;
close FILE;
my $result = clamavScan($filename);
if ($result) {
$addressset->insert("MALWARE=$result");
logWrite("MLWR $result");
storeCache('/var/spfbl', $uri, "MALWARE=$result");
}
unlink($filename);
}
system("rm -R '$folder'");
};
}
} elsif ($filename =~ m/\.(com|vbs|vbe|bat|cmd|pif|scr|prf|lnk|exe|shs|arj|hta|jar|ace|msi|sh|doc|xls|docx|docm|xlsx|xlsm|xlsb|zip|gz|tar|rar|7z|z)$/i) {
my $extension = lc($1);
my $headers = $response->headers;
my $length = $headers->content_length;
if ($length < 1048576) {
my $signature = signature($uri);
$addressset->insert($signature);
logWrite("WGET $mime $uri");
eval {
my $folder = $dir."download";
system("mkdir '$folder'");
$filename = "$folder/$filename";
my $name;
if (-e $filename) {
$name = checkFile($folder, $filename, $mime, $uriset, $addressset);
} else {
open FILE, ">", $filename;
binmode FILE;
print FILE $response->decoded_content;
close FILE;
$name = checkFile($folder, $filename, $mime, $uriset, $addressset);
unlink($filename);
}
system("rm -R '$folder'");
storeCache('/var/spfbl', $uri, $name);
};
} else {
storeCache('/var/spfbl', $uri, '200');
}
}
$successset->insert($uri);
} elsif ($response->code == 404) {
$errorset->insert($uri);
storeCache('/var/spfbl', $uri, '404');
next;
} elsif ($response->code == 500) {
storeCache('/var/spfbl', $uri, '500');
} elsif ($response->code == 301 || $response->code == 302) {
# This is a redirection URL.
my $location = $response->header('Location');
$location = repath($uri, $location);
if ($location !~ m/$NOREDIR/gi) {
if ($location =~ m/^mailto:([^?]*)/) {
my $email = $1;
if ($email =~ m/<(.+)>/) {
$email = $1;
}
$email = lc($email);
if (Email::Valid->address($email)) {
$addressset->insert($email);
storeCache('/var/spfbl', $uri, $email);
next;
} else {
storeCache('/var/spfbl', $uri, '200');
}
} elsif ($location =~ m/^https?\:\/\/([a-z0-9\._-]+|\[[a-f0-9\:]+\])(:[0-9]{1,6})?(\/|\?|#|$)/gi) {
if ($redircount++ < 32) {
$uriset->insert(uri_decode($location));
storeCache('/var/spfbl', $uri, $location);
next;
}
} else {
storeCache('/var/spfbl', $uri, '200');
}
} else {
storeCache('/var/spfbl', $uri, '200');
}
$successset->insert($uri);
} else {
$successset->insert($uri);
}
} elsif ($cache eq '') {
$successset->insert($uri);
}
}
}
}
my $processset;
if ($successset) {
$processset = $successset;
} else {
$processset = $errorset;
}
for my $uri ($processset->elements) {
if ($uri !~ m/$IGNORE/g) {
if ($uri =~ m/^https?\:\/\/([a-z0-9\._-]+|\[[a-f0-9\:]+\])(:[0-9]{1,6})?(\/|\?|#|$)/i) {
my $host = lc($1);
if (is_ipv4($host)) {
my $signature = signature($uri);
$addressset->insert($host);
$addressset->insert($signature);
} elsif ($host =~ /\[([a-f0-9\:]+)\]/ && is_ipv6($1)) {
my $signature = signature($uri);
$addressset->insert(lc(ip_expand_address($1, 6)));
$addressset->insert($signature);
} elsif (is_domain($host)) {
$addressset->insert($host);
}
}
}
}
if ($getall && $suspect && $GSBKEY && $successset) {
# Google Safe Browsing.
my $gsbTime = loadParameter('/var/tmp', 'GSB_BEGIN_TIME', '2001-01-01T00:00:00');
my $strp = DateTime::Format::Strptime->new( pattern => '%Y-%m-%dT%H:%M:%S' );
my $gsbDate = $strp->parse_datetime( $gsbTime );
my $now = DateTime->now();
if ($now > $gsbDate) {
eval {
my $entrieset = new Set::Scalar->new;
for my $uri ($successset->elements) {
if ($VALIDATOR->is_uri($uri) && $uri !~ m/$SHORTENERS/i) {
if ($uri =~ m/^https?\:\/\/(([a-z0-9\_-]+\.)+[a-z0-9\_-]+|\[[a-f0-9\:]+\])(:[0-9]{1,6})?(\/|\?|#|$)/i) {
my @threats = linesCache('/var/spfbl/gsb', $uri);
if (@threats > 0 && $threats[0] eq '200') {
my $host = lc($1);
if (is_ipv4($host)) {
$addressset->insert("MALWARE=Google.SafeBrowsing.IP");
} elsif ($host =~ /\[([a-f0-9\:]+)\]/ && is_ipv6($1)) {
$addressset->insert("MALWARE=Google.SafeBrowsing.IP");
} elsif (is_domain($host)) {
$addressset->insert("MALWARE=Google.SafeBrowsing.$host");
}
my $signature = signature($uri);
$addressset->insert($signature);
} else {
my $index = index($uri, '?');
if ($index > 0) {
$uri = substr($uri, 0, $index);
}
$entrieset->insert($uri);
storeCache('/var/spfbl/gsb', $uri, "200\n");
}
}
}
}
if ($entrieset) {
my $request = '{';
$request = join("\n", $request, ' "client": {');
$request = join("\n", $request, ' "clientId": "SPFBL",');
$request = join("\n", $request, ' "clientVersion": "2.9.0"');
$request = join("\n", $request, ' },');
$request = join("\n", $request, ' "threatInfo": {');
$request = join("\n", $request, ' "threatTypes": ["MALWARE", "SOCIAL_ENGINEERING", "UNWANTED_SOFTWARE", "POTENTIALLY_HARMFUL_APPLICATION"],');
$request = join("\n", $request, ' "platformTypes": ["LINUX", "ANDROID", "OSX", "IOS", "WINDOWS"],');
$request = join("\n", $request, ' "threatEntryTypes": ["URL"],');
$request = join("\n", $request, ' "threatEntries": [');
for my $uri ($entrieset->elements) {
logWrite("GSBR $uri");
$request = join("\n", $request, " {\"url\": \"$uri\"},");
}
$request = join("\n", $request, ' ]');
$request = join("\n", $request, ' }');
$request = join("\n", $request, '}');
my $url = "https://safebrowsing.googleapis.com/v4/threatMatches:find?key=$GSBKEY";
my $response = $ua->post($url, 'Content-Type' => 'application/json', Content => $request);
if ($response->code == 200) {
foreach my $match (@{parse_json($response->decoded_content)->{'matches'}}) {
my $threat = $match->{'threatType'};
my $url = $match->{'threat'}->{'url'};
appendCache('/var/spfbl/gsb', $url, $threat);
if ($url =~ m/^https?\:\/\/([a-z0-9\._-]+|\[[a-f0-9\:]+\])(:[0-9]{1,6})?(\/|\?|#|$)/i) {
my $host = lc($1);
if (is_ipv4($host)) {
$addressset->insert("MALWARE=Google.SafeBrowsing.IP");
} elsif ($host =~ /\[([a-f0-9\:]+)\]/ && is_ipv6($1)) {
$addressset->insert("MALWARE=Google.SafeBrowsing.IP");
} elsif (is_domain($host)) {
$addressset->insert("MALWARE=Google.SafeBrowsing.$host");
}
my $signature = signature($url);
$addressset->insert($signature);
}
}
} elsif ($response->code == 400) {
my $content = $response->decoded_content;
logWrite("WARN Google Safe Browsing received an invalid parameter: $content");
} elsif ($response->code == 401) {
$gsbDate = DateTime->now( time_zone => 'UTC' );
$gsbDate->add( years => 1 );
my $value = $gsbDate->stringify();
storeParameter('/var/tmp', 'GSB_BEGIN_TIME', $value);
logWrite('WARN Google Safe Browsing was disabled by invalid credentials.');
} elsif ($response->code == 403) {
$gsbDate = DateTime->now( time_zone => 'UTC' );
$gsbDate->add( hours => 1 );
my $value = $gsbDate->stringify();
storeParameter('/var/tmp', 'GSB_BEGIN_TIME', $value);
logWrite('WARN Google Safe Browsing paused for a while by quota exceeded.');
} elsif ($response->code == 429) {
$gsbDate = DateTime->now( time_zone => 'UTC' );
$gsbDate->add( hours => 1 );
my $value = $gsbDate->stringify();
storeParameter('/var/tmp', 'GSB_BEGIN_TIME', $value);
logWrite('WARN Google Safe Browsing paused for a while by resource exhausted.');
} else {
my $code = $response->code;
my $content = $response->decoded_content;
logWrite("ERROR $code $uriset");
for my $uri ($entrieset->elements) {
storeCache('/var/spfbl/gsb', $uri, "$code\n");
}
}
}
};
}
}
}
}
# Exim function to get HREF address list.
sub getListHREF {
my ($filenames, $getall, $suspect, $recipient) = @_;
# logWrite("MIME $filenames");
my @fields = split(';' , $filenames);
my $n = scalar(@fields);
if ($n < 2) {
return '';
} else {
my $folder;
my $uriset = new Set::Scalar->new;
my $addressset = new Set::Scalar->new;
for (my $i=1; $i < $n; $i++) {
my $filename = @fields[$i-1];
if (-e $filename) {
$filename = abs_path($filename);
my $content_type = @fields[$i];
my ($volume,$dir,$file) = File::Spec->splitpath($filename);
my $size = -s $filename;
logWrite("MIME $file $content_type $size");
my $tree;
if ($content_type eq 'text/html') {
$tree = HTML::TreeBuilder->new_from_file($filename);
} elsif ($content_type eq 'text/plain') {
$tree = HTML::TreeBuilder->new_from_file($filename);
} elsif ($content_type eq 'application/pdf') {
system("pdftohtml -i -noframes '$filename' '$filename.html'");
if (-e "$filename.html") {
$tree = HTML::TreeBuilder->new_from_file("$filename.html");
system("rm '$filename.html'");
}
} elsif ($content_type eq 'application/msword' || $filename =~ m/\.doc$/i) {
checkFile($dir, $filename, $content_type, $uriset, $addressset);
} elsif ($suspect) {
checkFile($dir, $filename, $content_type, $uriset, $addressset);
}
if ($tree) {
$folder = $dir;
my $redir = redirection($addressset, $tree);
if ($redir =~ m/^https?\:\/\//i) {
$uriset->insert(uri_decode($redir));
} elsif ($redir =~ m/^MALWARE=/) {
$addressset->insert($redir);
} else {
for my $element ($tree->look_down(_tag => 'a', href => qr/./)) {
my $uri = $element->attr('href');
if ($uri =~ m/^mailto:([^?]*)/) {
if ($addressset->size < 8) {
my $email = $1;
if ($email =~ m/<(.+)>/) {
$email = $1;
}
if (Email::Valid->address($email)) {
$addressset->insert(lc($email));
}
}
} elsif ($uri =~ m/^https?\:\/\//i && $VALIDATOR->is_uri($uri)) {
$uriset->insert(uri_decode($uri));
}
}
for my $element ($tree->look_down(_tag => 'area', href => qr/./)) {
my $uri = $element->attr('href');
if ($uri =~ m/^mailto:([^?]*)/) {
if ($addressset->size < 8) {
my $email = $1;
if ($email =~ m/<(.+)>/) {
$email = $1;
}
if (Email::Valid->address($email)) {
$addressset->insert(lc($email));
}
}
} elsif ($uri =~ m/^https?\:\/\//i && $VALIDATOR->is_uri($uri)) {
$uriset->insert(uri_decode($uri));
}
}
}
my $body = $tree->look_down(_tag => q{body});
############################################################
if ($body->as_text =~ /\b$recipient[^[:graph:]]+(https?\:\/\/([a-z0-9\._-]+|\[[a-f0-9\:]+\])(:[0-9]{1,6})?[a-z0-9\-\._~!\$&\(\)\*+,;\=:\/?@#]*)\b/gi) {
my $url = $1;
if ($url =~ m/$SHORTENERS/i) {
$addressset->insert("MALWARE=SPFBL.Body.Scam");
}
}
############################################################
my $text = $body->as_HTML();
$text = decode_entities($text);
while ($text =~ /\b([0-9a-z_+-][0-9a-z._+-]*@([a-z0-9]|[a-z0-9][a-z0-9_-]{0,61}[a-z0-9])(\.([a-z0-9]|[a-z0-9][a-z0-9_-]{0,61}[a-z0-9]))*\.(com|org|net|int|edu|gov|mil|ac|ad|ae|af|ag|ai|al|am|an|ao|aq|ar|as|at|au|aw|ax|az|ba|bb|bd|be|bf|bg|bh|bi|bj|bl|bm|bn|bo|bq|br|bs|bt|bv|bw|by|bz|ca|cc|cd|cf|cg|ch|ci|ck|cl|cm|cn|co|cr|cu|cv|cw|cx|cy|cz|de|dj|dk|dm|do|dz|ec|ee|eg|eh|er|es|et|eu|fi|fj|fk|fm|fo|fr|ga|gb|gd|ge|gf|gg|gh|gi|gl|gm|gn|gp|gq|gr|gs|gt|gu|gw|gy|hk|hm|hn|hr|ht|hu|id|ie|il|im|in|io|iq|ir|is|it|je|jm|jo|jp|ke|kg|kh|ki|km|kn|kp|kr|kw|ky|kz|la|lb|lc|li|lk|lr|ls|lt|lu|lv|ly|ma|mc|md|me|mf|mg|mh|mk|ml|mm|mn|mo|mp|mq|mr|ms|mt|mu|mv|mw|mx|my|mz|na|nc|ne|nf|ng|ni|nl|no|np|nr|nu|nz|om|pa|pe|pf|pg|ph|pk|pl|pm|pn|pr|ps|pt|pw|py|qa|re|ro|rs|ru|rw|sa|sb|sc|sd|se|sg|sh|si|sj|sk|sl|sm|sn|so|sr|ss|st|su|sv|sx|sy|sz|tc|td|tf|tg|th|tj|tk|tl|tm|tn|to|tp|tr|tt|tv|tw|tz|ua|ug|uk|um|us|uy|uz|va|vc|ve|vg|vi|vn|vu|wf|ws|ye|yt|za|zm|zw))\b/gi) {
if ($addressset->size < 8) {
my $email = $1;
if (Email::Valid->address($email)) {
$addressset->insert(lc($email));
}
}
}
if (!$uriset) {
while ($text =~ /\b(https?\:\/\/([a-z0-9\._-]+|\[[a-f0-9\:]+\])(:[0-9]{1,6})?[a-z0-9\-\._~!\$&\(\)\*+,;\=:\/?@#]*)\b/gi) {
my $url = $1;
if ($VALIDATOR->is_uri($url)) {
$uriset->insert(uri_decode($url));
}
}
while ($text =~ /\b(www\.[a-z0-9\._-]+\.(com|org|net|int|edu|gov|mil|ac|ad|ae|af|ag|ai|al|am|an|ao|aq|ar|as|at|au|aw|ax|az|ba|bb|bd|be|bf|bg|bh|bi|bj|bl|bm|bn|bo|bq|br|bs|bt|bv|bw|by|bz|ca|cc|cd|cf|cg|ch|ci|ck|cl|cm|cn|co|cr|cu|cv|cw|cx|cy|cz|de|dj|dk|dm|do|dz|ec|ee|eg|eh|er|es|et|eu|fi|fj|fk|fm|fo|fr|ga|gb|gd|ge|gf|gg|gh|gi|gl|gm|gn|gp|gq|gr|gs|gt|gu|gw|gy|hk|hm|hn|hr|ht|hu|id|ie|il|im|in|io|iq|ir|is|it|je|jm|jo|jp|ke|kg|kh|ki|km|kn|kp|kr|kw|ky|kz|la|lb|lc|li|lk|lr|ls|lt|lu|lv|ly|ma|mc|md|me|mf|mg|mh|mk|ml|mm|mn|mo|mp|mq|mr|ms|mt|mu|mv|mw|mx|my|mz|na|nc|ne|nf|ng|ni|nl|no|np|nr|nu|nz|om|pa|pe|pf|pg|ph|pk|pl|pm|pn|pr|ps|pt|pw|py|qa|re|ro|rs|ru|rw|sa|sb|sc|sd|se|sg|sh|si|sj|sk|sl|sm|sn|so|sr|ss|st|su|sv|sx|sy|sz|tc|td|tf|tg|th|tj|tk|tl|tm|tn|to|tp|tr|tt|tv|tw|tz|ua|ug|uk|um|us|uy|uz|va|vc|ve|vg|vi|vn|vu|wf|ws|ye|yt|za|zm|zw)(\/[a-z0-9\-\._~!\$&\(\)\*+,;=:\/?@#]*)?)\b/gi) {
my $url = "http://$1";
if ($VALIDATOR->is_uri($url)) {
$uriset->insert(uri_decode($url));
}
}
while ($text =~ /\b([a-z0-9\._-]+\.(com|org|net|int|edu|gov|mil|ac|ad|ae|af|ag|ai|al|am|an|ao|aq|ar|as|at|au|aw|ax|az|ba|bb|bd|be|bf|bg|bh|bi|bj|bl|bm|bn|bo|bq|br|bs|bt|bv|bw|by|bz|ca|cc|cd|cf|cg|ch|ci|ck|cl|cm|cn|co|cr|cu|cv|cw|cx|cy|cz|de|dj|dk|dm|do|dz|ec|ee|eg|eh|er|es|et|eu|fi|fj|fk|fm|fo|fr|ga|gb|gd|ge|gf|gg|gh|gi|gl|gm|gn|gp|gq|gr|gs|gt|gu|gw|gy|hk|hm|hn|hr|ht|hu|id|ie|il|im|in|io|iq|ir|is|it|je|jm|jo|jp|ke|kg|kh|ki|km|kn|kp|kr|kw|ky|kz|la|lb|lc|li|lk|lr|ls|lt|lu|lv|ly|ma|mc|md|me|mf|mg|mh|mk|ml|mm|mn|mo|mp|mq|mr|ms|mt|mu|mv|mw|mx|my|mz|na|nc|ne|nf|ng|ni|nl|no|np|nr|nu|nz|om|pa|pe|pf|pg|ph|pk|pl|pm|pn|pr|ps|pt|pw|py|qa|re|ro|rs|ru|rw|sa|sb|sc|sd|se|sg|sh|si|sj|sk|sl|sm|sn|so|sr|ss|st|su|sv|sx|sy|sz|tc|td|tf|tg|th|tj|tk|tl|tm|tn|to|tp|tr|tt|tv|tw|tz|ua|ug|uk|um|us|uy|uz|va|vc|ve|vg|vi|vn|vu|wf|ws|ye|yt|za|zm|zw)(\/[a-z0-9\-\._~!\$&\(\)\*+,;=:\/?@#]*))\b/gi) {
my $url = "http://$1";
if ($VALIDATOR->is_uri($url)) {
$uriset->insert(uri_decode($url));
}
}
while ($text =~ /\b([a-z0-9\._-]+\.[a-z]{3,5}(\/[0-9a-z.-]+)+)\b/gi) {
my $url = "http://$1";
if ($VALIDATOR->is_uri($url)) {
$uriset->insert(uri_decode($url));
}
}
}
for my $uri ($uriset->elements) {
if ($uri =~ m/$SHORTENERS/i) {
my $signature = signature($uri);
$addressset->insert($signature);
}
}
if ($getall eq '1' || $getall eq 'true' || $getall eq 'yes') {
$getall = 1;
} else {
$getall = 0;
}
if ($suspect eq '1' || $suspect eq 'true' || $suspect eq 'yes') {
$suspect = 1;
} else {
$suspect = 0;
}
}
}
}
if ($folder) {
processURI($uriset, $addressset, $folder, $getall, $suspect);
}
my $list = '';
for my $address ($addressset->elements) {
if ($list eq '') {
$list = "$address";
} else {
$list = "$list $address";
}
}
if ($list ne '') {
logWrite("HREF $list");
}
return $list;
}
}
my $n = $#ARGV + 1;
if ($n == 1){
main();
}
# Main code for command line.
sub main() {
my $arg = $ARGV[0];
if ($arg eq 'porcupine') {
my $dir = '/var/spfbl/porcupine';
if (-d $dir) {
my $uribl = new Mail::RBL('uribl.spfbl.net');
opendir(DIR, '/var/spfbl');
while (my $file = readdir(DIR)) {
if ($file =~ m/^[0-9a-f]{32}((\.[a-z0-9_-]+)+)\.[0-9]+\.https?$/) {
my $host1 = substr($1, 1);
if ("http://$host1/" =~ m/$SHORTENERS/i) {
if (!$uribl->check_rhsbl($file)) {
my $cache = loadLastCache('/var/spfbl', $file);
if ($cache =~ m/^[0-9a-f]{32}\.[0-9]+\.(com|vbs|vbe|bat|cmd|pif|scr|prf|lnk|exe|shs|arj|hta|jar|ace|js|msi|sh|doc|xls|docx|docm|xlsx|xlsm|xlsb|zip)$/) {
my $result = clamavScan("/var/spfbl/$cache");
if ($result) {
print("$file\n");
} elsif ($uribl->check_rhsbl($cache)) {
print("$file\n");
}
} elsif ($cache =~ m/^[0-9a-f]{32}((\.[a-z0-9_-]+)+)\.[0-9]+\.https?$/) {
my $host2 = substr($1, 1);
if ("http://$host2/" =~ m/$SHORTENERS/i) {
print("$file\n");
}
} elsif ($cache =~ m/^https?\:\/\/([a-z0-9\._-]+|\[[a-f0-9\:]+\])(:[0-9]{1,6})?(\/|\?|#|$)/gi) {
my $host2 = substr($1, 1);
if (is_domain($host2) && $uribl->check_rhsbl($host2)) {
print("$file\n");
}
}
}
}
} elsif ($file =~ m/^[0-9a-f]{32}\.[0-9]+\.(com|vbs|vbe|bat|cmd|pif|scr|prf|lnk|exe|shs|arj|hta|jar|ace|msi|sh|doc|xls|docx|docm|xlsx|xlsm|xlsb|zip)$/) {
my $path = "/var/spfbl/$file";
my $result = clamavScan($path);
my $listed = $uribl->check_rhsbl($file);
if ($result && !$listed) {
print("$file\n");
} elsif (!$result && $listed) {
my $cache = "/var/spfbl/porcupine/$file.zip";
next if -e $cache;
if (system("zip -qq --junk-paths -P infected '$cache' '$path'") == 0) {
my $msg = MIME::Lite->new(
From => 'admin@spfbl.net',
To => 'abuse@base64.com.br',
Subject => 'Malware submission',
Type => 'multipart/mixed',
);
$msg->attach(
Type => 'TEXT',
Data => "New malware inside compressed with 'infected' as password.",
);
$msg->attach(
Type => 'application/zip',
Path => "$cache",
Filename => "$file",
);
$msg->send;
print("$file\n");
}
}
}
}
closedir(DIR);
}
return 0;
} else {
my ($volume,$dir,$file) = File::Spec->splitpath(abs_path("./temp"));
my $uriset = new Set::Scalar->new;
my $addressset = new Set::Scalar->new;
if (-e $arg) {
# The argument is a file.
my $filename = $arg;
$filename = abs_path($filename);
($volume,$dir,$file) = File::Spec->splitpath($filename);
my $type = `file --brief --mime-type "$filename"`;
$type =~ s/\n//g;
my $tree;
if ($filename =~ m/\.(com|vbs|vbe|bat|cmd|pif|scr|prf|lnk|exe|shs|arj|hta|jar|ace|js|msi|sh|doc|xls|docx|docm|xlsx|xlsm|xlsb|zip|sh|zip|gz|tar|rar|7z|z)$/i) {
checkFile($dir, $filename, $type, $uriset, $addressset);
} elsif ($type eq 'text/html') {
$tree = HTML::TreeBuilder->new_from_file($filename);
} elsif ($type eq 'text/plain') {
$tree = HTML::TreeBuilder->new_from_file($filename);
} elsif ($type eq 'application/pdf') {
if (-e "$filename.html") {
$tree = HTML::TreeBuilder->new_from_file("$filename.html");
} else {
system("pdftohtml -i -noframes '$filename' '$filename.html'");
if (-e "$filename.html") {
$tree = HTML::TreeBuilder->new_from_file("$filename.html");
system("rm '$filename.html'");
}
}
} else {
checkFile($dir, $filename, $type, $uriset, $addressset);
}
if ($tree) {
my $redir = redirection($addressset, $tree);
if ($redir =~ m/^https?\:\/\//i) {
$uriset->insert(uri_decode($redir));
} elsif ($redir =~ m/^MALWARE=/) {
$addressset->insert($redir);
} else {
for my $element ($tree->look_down(_tag => 'a', href => qr/./)) {
my $uri = $element->attr('href');
if ($uri =~ m/^mailto:([^?]*)/) {
my $email = $1;
if ($email =~ m/<(.+)>/) {
$email = $1;
}
if (Email::Valid->address($email)) {
$addressset->insert(lc($email));
}
} elsif ($uri =~ m/^https?\:\/\//i && $VALIDATOR->is_uri($uri)) {
$uriset->insert(uri_decode($uri));
}
}
for my $element ($tree->look_down(_tag => 'area', href => qr/./)) {
my $uri = $element->attr('href');
if ($uri =~ m/^mailto:([^?]*)/) {
my $email = $1;
if ($email =~ m/<(.+)>/) {
$email = $1;
}
if (Email::Valid->address($email)) {
$addressset->insert(lc($email));
}
} elsif ($uri =~ m/^https?\:\/\//i && $VALIDATOR->is_uri($uri)) {
$uriset->insert(uri_decode($uri));
}
}
}
my $body = $tree->look_down(_tag => q{body});
my $text = $body->as_HTML();
$text = decode_entities($text);
while ($text =~ /\b([0-9a-z_+-][0-9a-z._+-]*@([a-z0-9]|[a-z0-9][a-z0-9_-]{0,61}[a-z0-9])(\.([a-z0-9]|[a-z0-9][a-z0-9_-]{0,61}[a-z0-9]))*\.(com|org|net|int|edu|gov|mil|ac|ad|ae|af|ag|ai|al|am|an|ao|aq|ar|as|at|au|aw|ax|az|ba|bb|bd|be|bf|bg|bh|bi|bj|bl|bm|bn|bo|bq|br|bs|bt|bv|bw|by|bz|ca|cc|cd|cf|cg|ch|ci|ck|cl|cm|cn|co|cr|cu|cv|cw|cx|cy|cz|de|dj|dk|dm|do|dz|ec|ee|eg|eh|er|es|et|eu|fi|fj|fk|fm|fo|fr|ga|gb|gd|ge|gf|gg|gh|gi|gl|gm|gn|gp|gq|gr|gs|gt|gu|gw|gy|hk|hm|hn|hr|ht|hu|id|ie|il|im|in|io|iq|ir|is|it|je|jm|jo|jp|ke|kg|kh|ki|km|kn|kp|kr|kw|ky|kz|la|lb|lc|li|lk|lr|ls|lt|lu|lv|ly|ma|mc|md|me|mf|mg|mh|mk|ml|mm|mn|mo|mp|mq|mr|ms|mt|mu|mv|mw|mx|my|mz|na|nc|ne|nf|ng|ni|nl|no|np|nr|nu|nz|om|pa|pe|pf|pg|ph|pk|pl|pm|pn|pr|ps|pt|pw|py|qa|re|ro|rs|ru|rw|sa|sb|sc|sd|se|sg|sh|si|sj|sk|sl|sm|sn|so|sr|ss|st|su|sv|sx|sy|sz|tc|td|tf|tg|th|tj|tk|tl|tm|tn|to|tp|tr|tt|tv|tw|tz|ua|ug|uk|um|us|uy|uz|va|vc|ve|vg|vi|vn|vu|wf|ws|ye|yt|za|zm|zw))\b/gi) {
my $email = $1;
if (Email::Valid->address($email)) {
$addressset->insert(lc($email));
}
}
if (!$uriset) {
while ($text =~ /\b(https?\:\/\/([a-z0-9\._-]+|\[[a-f0-9\:]+\])(:[0-9]{1,6})?[a-z0-9\-\._~!\$&\(\)\*+,;\=:\/?@#]*)\b/gi) {
my $url = $1;
if ($VALIDATOR->is_uri($url)) {
$uriset->insert(uri_decode($url));
}
}
while ($text =~ /\b(www\.[a-z0-9\._-]+\.(com|org|net|int|edu|gov|mil|ac|ad|ae|af|ag|ai|al|am|an|ao|aq|ar|as|at|au|aw|ax|az|ba|bb|bd|be|bf|bg|bh|bi|bj|bl|bm|bn|bo|bq|br|bs|bt|bv|bw|by|bz|ca|cc|cd|cf|cg|ch|ci|ck|cl|cm|cn|co|cr|cu|cv|cw|cx|cy|cz|de|dj|dk|dm|do|dz|ec|ee|eg|eh|er|es|et|eu|fi|fj|fk|fm|fo|fr|ga|gb|gd|ge|gf|gg|gh|gi|gl|gm|gn|gp|gq|gr|gs|gt|gu|gw|gy|hk|hm|hn|hr|ht|hu|id|ie|il|im|in|io|iq|ir|is|it|je|jm|jo|jp|ke|kg|kh|ki|km|kn|kp|kr|kw|ky|kz|la|lb|lc|li|lk|lr|ls|lt|lu|lv|ly|ma|mc|md|me|mf|mg|mh|mk|ml|mm|mn|mo|mp|mq|mr|ms|mt|mu|mv|mw|mx|my|mz|na|nc|ne|nf|ng|ni|nl|no|np|nr|nu|nz|om|pa|pe|pf|pg|ph|pk|pl|pm|pn|pr|ps|pt|pw|py|qa|re|ro|rs|ru|rw|sa|sb|sc|sd|se|sg|sh|si|sj|sk|sl|sm|sn|so|sr|ss|st|su|sv|sx|sy|sz|tc|td|tf|tg|th|tj|tk|tl|tm|tn|to|tp|tr|tt|tv|tw|tz|ua|ug|uk|um|us|uy|uz|va|vc|ve|vg|vi|vn|vu|wf|ws|ye|yt|za|zm|zw)(\/[a-z0-9\-\._~!\$&\(\)\*+,;=:\/?@#]*)?)\b/gi) {
my $url = "http://$1";
if ($VALIDATOR->is_uri($url)) {
$uriset->insert(uri_decode($url));
}
}
while ($text =~ /\b([a-z0-9\._-]+\.(com|org|net|int|edu|gov|mil|ac|ad|ae|af|ag|ai|al|am|an|ao|aq|ar|as|at|au|aw|ax|az|ba|bb|bd|be|bf|bg|bh|bi|bj|bl|bm|bn|bo|bq|br|bs|bt|bv|bw|by|bz|ca|cc|cd|cf|cg|ch|ci|ck|cl|cm|cn|co|cr|cu|cv|cw|cx|cy|cz|de|dj|dk|dm|do|dz|ec|ee|eg|eh|er|es|et|eu|fi|fj|fk|fm|fo|fr|ga|gb|gd|ge|gf|gg|gh|gi|gl|gm|gn|gp|gq|gr|gs|gt|gu|gw|gy|hk|hm|hn|hr|ht|hu|id|ie|il|im|in|io|iq|ir|is|it|je|jm|jo|jp|ke|kg|kh|ki|km|kn|kp|kr|kw|ky|kz|la|lb|lc|li|lk|lr|ls|lt|lu|lv|ly|ma|mc|md|me|mf|mg|mh|mk|ml|mm|mn|mo|mp|mq|mr|ms|mt|mu|mv|mw|mx|my|mz|na|nc|ne|nf|ng|ni|nl|no|np|nr|nu|nz|om|pa|pe|pf|pg|ph|pk|pl|pm|pn|pr|ps|pt|pw|py|qa|re|ro|rs|ru|rw|sa|sb|sc|sd|se|sg|sh|si|sj|sk|sl|sm|sn|so|sr|ss|st|su|sv|sx|sy|sz|tc|td|tf|tg|th|tj|tk|tl|tm|tn|to|tp|tr|tt|tv|tw|tz|ua|ug|uk|um|us|uy|uz|va|vc|ve|vg|vi|vn|vu|wf|ws|ye|yt|za|zm|zw)(\/[a-z0-9\-\._~!\$&\(\)\*+,;=:\/?@#]*))\b/gi) {
my $url = "http://$1";
if ($VALIDATOR->is_uri($url)) {
$uriset->insert(uri_decode($url));
}
}
while ($text =~ /\b([a-z0-9\._-]+\.[a-z]{3,5}(\/[0-9a-z.-]+)+)\b/gi) {
my $url = "http://$1";
if ($VALIDATOR->is_uri($url)) {
$uriset->insert(uri_decode($url));
}
}
}
}
} elsif ($VALIDATOR->is_uri($arg)) {
# The argument is an URI.
$uriset->insert(uri_decode($arg));
my $signature = signature($arg);
print("$signature $arg\n");
} elsif (is_domain($arg)) {
# The argument is a hostname.
$addressset->insert(lc($arg));
} elsif (is_ipv4($arg)) {
# The argument is an IPv4.
$addressset->insert($arg);
} elsif (is_ipv6($arg)) {
# The argument is an IPv6.
$addressset->insert(lc(ip_expand_address($arg, 6)));
} elsif (Email::Valid->address($arg)) {
# The argument is an e-mail.
$addressset->insert(lc($arg));
} else {
print("Invalid query.\n");
exit 0;
}
my $list = new Mail::RBL('uribl.spfbl.net');
for my $uri ($uriset->elements) {
my $signature = signature($uri);
if ($list->check_rhsbl($signature)) {
print("The URL signature $signature is listed in 'uribl.spfbl.net'.\n");
exit 1;
}
}
processURI($uriset, $addressset, $dir, $AGRESSIVE, 1);
for my $address ($addressset->elements) {
if ($address =~ m/^MALWARE=(.*)$/) {
$addressset->delete($address);
my $malware = $1;
print("$malware malware was found in file.\n");
exit 3;
}
}
my $executable = "";
for my $address ($addressset->elements) {
if ($address =~ m/^[0-9a-f]{32}\.[0-9]+\.(com|vbs|vbe|bat|cmd|pif|scr|prf|lnk|exe|shs|arj|hta|jar|ace|js|msi|sh|doc|xls|docx|docm|xlsx|xlsm|xlsb|zip)$/) {
eval {
$addressset->delete($address);
$executable = $address;
if ($list->check_rhsbl($address)) {
print("The executable with signature $address is listed in 'uribl.spfbl.net'.\n");
exit 2;
}
};
}
}
for my $address ($addressset->elements) {
if (is_ip($address)) {
eval {
if ($list->check($address)) {
print("$address is listed in 'uribl.spfbl.net'.\n");
exit 1;
}
};
} else {
eval {
if ($list->check_rhsbl($address)) {
print("$address is listed in 'uribl.spfbl.net'.\n");
exit 1;
}
};
}
}
my $resolver = new Net::DNS::Resolver();
for my $address ($addressset->elements) {
if (is_domain($address)) {
eval {
my $query = $resolver->query($address, 'A');
if ($query) {
foreach my $rr ($query->answer) {
my $ip = $rr->address;
if (is_ip($ip)) {
eval {
if ($list->check($ip)) {
print("$ip is listed in 'uribl.spfbl.net'.\n");
exit 1;
}
};
}
}
}
};
eval {
my $query = $resolver->query($address, 'AAAA');
if ($query) {
foreach my $rr ($query->answer) {
my $ip = $rr->address;
if (is_ip($ip)) {
eval {
if ($list->check($ip)) {
print("$ip is listed in 'uribl.spfbl.net'.\n");
exit 1;
}
};
}
}
}
};
}
}
if ($executable) {
print("The undefined executable with signature $executable was found.\n");
exit 4;
} elsif ($addressset) {
print("$addressset is not listed in 'uribl.spfbl.net'.\n");
exit 0;
} else {
print("Not listed in 'uribl.spfbl.net'.\n");
exit 0;
}
}
}