#!/usr/bin/ruby # # Copyright (c) 2015 netnea, AG. (https://www.netnea.com/) # # A ruby script to analyse modsecurity core rules anomaly scores in # STDIN. # The data is then extracted and summarized in a statistical table # that gives an overview over the anomaly scores in the data. # # This script was written by Christian Folini. Feel free to use it # and to adopt it to your needs. # # Run with --help to get an usage overview. # # TODO # - option to suppress lines / scores with 0 results / requests # ----------------------------------------------------------- # INIT # ----------------------------------------------------------- require "optparse" require "date" require "pp" require "rubygems" $params = Hash.new $params[:verbose] = false $params[:debug] = false $params[:incoming] = true; $params[:outgoing] = true; $params[:headers] = true; $params[:totals] = true; $params[:empty] = true; $params[:summary] = true; $params[:baseline] = 0; # Number of requests with scores 0/0 to be added to stats # ----------------------------------------------------------- # SUB-FUNCTIONS (those that are specific to this script) # ----------------------------------------------------------- def read_stdin() # Purpose: import data out of STDIN # Input : none # Output : Array of inbound scores, array ouf outbound scores # Remarks: Empty values are mapped to nil, non-integer values are mapped to 0 vprint "Starting to read STDIN" def map_data(data,nils,stats) if data == "-" or data == "" nils = nils + 1 else stats << data.to_i end return nils, stats end stats_in = Array.new() stats_out = Array.new() nils_in = 0 nils_out = 0 n = 0 formatcheck_ok = false STDIN.each do |line| # we checked for STDIN in check parameter phase n = n + 1 dprint "Processing line ##{n}: #{line.chomp}" begin in_data, out_data = line.chomp.split(";") unless formatcheck_ok if in_data == "-" # we accept the empty value "-" formatcheck_ok = true elsif in_data != in_data.to_i.to_s puts_error("Input's first line indicates, input is not in CSV format as") puts_error("explained by help text. This is fatal. Aborting.") exit 1 else formatcheck_ok = true end end nils_in, stats_in = map_data(in_data, nils_in, stats_in) nils_out, stats_out = map_data(out_data, nils_out, stats_out) rescue => detail puts_error("Could not read line ##{n}: \"#{line.chomp}\". Ignoring.") end end 1.upto($params[:baseline]) do stats_in << 0 stats_out << 0 end vprint "Done reading STDIN (imported #{n} lines of data)" return nils_in, stats_in, nils_out, stats_out end def print_stats_wrapper(nils_in, stats_in, nils_out, stats_out) # Purpose: print statistics about anomaly score data # Input : stats arrays, number of nil values # Output : statistics to STDOUT # Remarks: none vprint "Starting to calculate and print statistics" def avg(arr) return arr.inject(0.0){ |sum, el| sum + el } / arr.size end def median(arr) sorted = arr.sort len = sorted.length return (sorted[(len - 1) / 2] + sorted[len / 2]) / 2.0 end def sample_variance(arr) avg=avg(arr) sum=arr.inject(0){ |acc,i| acc + (i - avg)**2 } return(1/arr.length.to_f*sum) end def standard_deviation(arr) return Math.sqrt(sample_variance(arr)) end def print_stats(verb, nils, stats) def round(f, n) # The ruby "round" function before ruby 1.9 works differently, so we implement it ourselves return (f * 10 ** n).to_i.to_f / 10 ** n end total = stats.length + nils max = stats.max {|a,b| a <=> b } if max freq = Hash.new 0.upto(max) { |n| freq[n] = stats.select{|x| x == n }.length } puts "#{verb.upcase} Num of req. | % of req. | Sum of % | Missing %" if $params[:headers] printf("Number of %s req. (total) |%7i | %8.4f%% | %8.4f%% | %8.4f%%\n\n", verb, total, 100, 100, 0) if $params[:totals] sum_perc = 0.0 if $params[:empty] perc = nils / total.to_f * 100 sum_perc = sum_perc + perc printf("Empty or miss. #{verb} score | %6d | %8.4f%% | %8.4f%% | %8.4f%%\n", nils, perc, sum_perc, 100 - sum_perc) end freq.sort_by{ |key, value| key }.each { |key, value| perc = value / total.to_f * 100 sum_perc = sum_perc + perc printf("Reqs with #{verb} score of %3d | %6d | %8.4f%% | %8.4f%% | %8.4f%%\n", key, value, round(perc, 4), round(sum_perc, 4), 100 - round(sum_perc, 4)) } printf("\n#{verb.capitalize} average: %8.4f Median %8.4f Standard deviation %8.4f\n", avg(stats), median(stats), standard_deviation(stats)) if $params[:summary] else # Variable max is empty. This happens when no values were received for incoming or outgoing. # So there is nothing to print really. But this is quite rare. puts "No values received for #{verb.upcase}, only empty entries. Thus nothing to print." puts "Please check your input." end puts if ($params[:incoming] && $params[:outgoing] && verb == "incoming" && $params[:outgoing]) puts if ($params[:incoming] && $params[:outgoing] && verb == "incoming" && $params[:outgoing] && ($params[:header] || $params[:summary])) end print_stats("incoming", nils_in, stats_in) if $params[:incoming] print_stats("outgoing", nils_out, stats_out) if $params[:outgoing] vprint "Done printing statistics" end # ----------------------------------------------------------- # GENERIC SUB-FUNCTIONS (those that come with every script) # ----------------------------------------------------------- def vprint(text) # Purpose: output text if global variable $verbose is set. # Input : String input # Output : stdout # Remarks: none if $params[:verbose] puts text + "\n" end end def dprint(text) # Purpose: output text if global variable $debug is set. # Input : String input # Output : stdout # Remarks: none if $params[:debug] puts text + "\n" end end def check_stdin () # Purpose: Check for access to STDIN # Input : none # Output : bool # Remarks: none if STDIN.tty? # no stdin return false else # stdin return true end end def check_parameters() # Purpose: check parameters # Input : global variable params # Output : stderr in case there is a problem with one of the parameters # Return : true if there is an error with one of the parameters; or false in absence of errors # Remarks: None err_status = false # unless /^foo$/.match($params["x"]) # $stderr.puts "Error in parameter x ..." # err_status = true # end unless check_stdin() puts_error("No STDIN available. This is fatal. Aborting.", nil) exit 1 end unless $params[:baseline].to_s.to_i == $params[:baseline] puts_error("Baseline parameter is not integer. This is fatal. Aborting.", nil) exit 1 end return err_status end def puts_error(msg, detail=nil) # Purpose: Print error message # Input : string msg and detail exception object # Output : $stderr # Return : None # Remarks: There is a ruby exception class hierarchy. # See http://makandracards.com/makandra/4851-ruby-exception-class-hierarchy err_status = false $stderr.puts msg $stderr.puts "Error: #{detail.message}" if detail $stderr.puts "Backtrace:" if detail $stderr.puts detail.backtrace.join("\n") if detail $stderr.puts "--------------------------" end # ----------------------------------------------------------- # COMMAND LINE PARAMETER EXTRACTION # ----------------------------------------------------------- # begin parser = OptionParser.new do|opts| opts.banner = <; I.e.: 0;0 1;0 0;0 0;2 12;3 0;0 2;0 0;1 2;5 ... ATTENTION: Missing anomaly scores are excluded from the calculation of the average, the median and the standard deviation. You get this stream of scores by defining the webserver's access log accordingly and then extract the data out of that format. Note that you can add an additional baseline of STR requests to the statistics. This makes sense if your STDIN comes without the requests which did not trigger any rules, but you want to include them in the calculation. Example: LogFormat "%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\" \\ %v %A %p %R %{BALANCER_WORKER_ROUTE}e \ %X \"%{cookie}n\" \\ %{UNIQUE_ID}e %I %O %{ratio}n%% %D \\ %{TX.perf_modsecinbound}M %{TX.perf_application}M %{TX.perf_modsecoutbound}M \\ %{TX.INBOUND_ANOMALY_SCORE}M %{TX.OUTBOUND_ANOMALY_SCORE}M" extended $> cat access.log | egrep -o "[0-9]+ [0-9]+$" | tr " " ";" | modsec-positive-stats.rb INCOMING Num of req. | % of req. | Sum of % | Missing % Number of incoming req. (total) | 10000 | 100.0000% | 100.0000% | 0.0000% Empty or miss. incoming score | 0 | 0.0000% | 0.0000% | 100.0000% Reqs with incoming score of 0 | 9970 | 99.7000% | 99.7000% | 0.3000% Reqs with incoming score of 1 | 4 | 0.0400% | 99.7400% | 0.2600% Reqs with incoming score of 2 | 21 | 0.2100% | 99.9500% | 0.0500% Reqs with incoming score of 3 | 0 | 0.0000% | 99.9500% | 0.0500% Reqs with incoming score of 4 | 4 | 0.0400% | 99.9900% | 0.0100% Reqs with incoming score of 5 | 1 | 0.0100% | 100.0000% | 0.0000% Incoming average: 0.0067 Median 0.0000 Standard deviation 0.1329 OUTGOING Num of req. | % of req. | Sum of % | Missing % Number of outgoing req. (total) | 10000 | 100.0000% | 100.0000% | 0.0000% Empty or miss. outgoing score | 0 | 0.0000% | 0.0000% | 100.0000% Reqs with outgoing score of 0 | 9997 | 99.9700% | 99.9700% | 0.0300% Reqs with outgoing score of 1 | 0 | 0.0000% | 99.9700% | 0.0300% Reqs with outgoing score of 2 | 0 | 0.0000% | 99.9700% | 0.0300% Reqs with outgoing score of 3 | 0 | 0.0000% | 99.9700% | 0.0300% Reqs with outgoing score of 4 | 2 | 0.0200% | 99.9900% | 0.0100% Reqs with outgoing score of 5 | 1 | 0.0100% | 100.0000% | 0.0000% Outgoing average: 0.0013 Median 0.0000 Standard deviation 0.0755 Note that you can add an additional baseline of STR requests to the statistics. This makes sense if your STDIN comes without the requests with a score of 0/0, but you want to include them in the calculation for the sake of a clean statistic. Filtering out scores of 0/0 is very useful on big logfiles where script takes a long time to run. EOF opts.on_tail(notes) end parser.parse! unless $params[:baseline_str].nil? if $params[:baseline_str].to_i.to_s != $params[:baseline_str] $stderr.puts "Baseline parameter is not integer. This is fatal. Aborting." exit 1 else $params[:baseline] = $params[:baseline_str].to_i end end rescue OptionParser::InvalidOption => detail puts_error("Invalid Option in command line parameter extraction. This is fatal. Aborting.", detail) exit 1 rescue => detail puts_error("Unknown error in command line parameter extraction. This is fatal. Aborting.", detail) exit 1 end # ----------------------------------------------------------- # MAIN # ----------------------------------------------------------- vprint "Starting parameter checking" exit 1 if (check_parameters) vprint "Starting main program" nils_in, stats_in, nils_out, stats_out = read_stdin() print_stats_wrapper(nils_in, stats_in, nils_out, stats_out) vprint "Done. Bailing out."