#!/usr/bin/awk -f # usage: ncsa_response_times.awk [-v "option=value"] access_log... | sort -n # (First chmod a+x ncsa_response_times.awk) # The sort is needed because statistics are stored in an associative array dumped at the end and it's unordered. # Default LogFormat is assumed to be LogFormat "%h %l %u %t \"%r\" %>s %b %D \"%{WAS}e\" %X" common # Field descriptions: https://httpd.apache.org/docs/current/mod/mod_log_config.html#formats # # Output (CSV): # 010SortableDateTime,Date,Day,Time,ArrivalsPer%s,ThroughputAllPer%s,Throughput4xxPer%s,Throughput5xxPer%s,ResponseTimesSumPer%s(%s),MeanResponseTimePer%s(%s),MaxResponseTimePer%s(%s),ResponseBytesSumPer%s,MeanResponseSizeBytesPer%s,MaxResponseSizeBytesPer%s # # Options: # Column offset of %D starting from the left (1 is the first column, 2 is second, etc.): # -v response_time_offset_left=X # Column offset of %D starting from the right (0 is the last column, 1 is second-to-last, etc.): # -v response_time_offset_right=X # Column offset of %t starting from the left (1 is the first column, 2 is second, etc.): # -v date_offset_left=X # Column offset of %t starting from the right (0 is the last column, 1 is second-to-last, etc.): # -v date_offset_right=X # Column offset of %s starting from the left (1 is the first column, 2 is second, etc.): # -v status_offset_left=X # Column offset of %s starting from the right (0 is the last column, 1 is second-to-last, etc.): # -v status_offset_right=X # Column offset of %b starting from the left (1 is the first column, 2 is second, etc.): # -v response_size_offset_left=X # Column offset of %b starting from the right (0 is the last column, 1 is second-to-last, etc.): # -v response_size_offset_right=X # Column offset of %{WAS}e starting from the left (1 is the first column, 2 is second, etc.): # -v jvm_offset_left=X # Column offset of %{WAS}e starting from the right (0 is the last column, 1 is second-to-last, etc.): # -v jvm_offset_right=X # Per-minute instead of per-second: # -v per_minute=true # Suppress printing the CSV header: # -v noheader=true # Response times in milliseconds instead of microseconds: # -v responsems=true # Debug: # -v debug=true # Divide transaction rates: # -v divtranrates=X # Don't skip rows with 0 responses: # -v noskip0=true # ODR %t time format: # -v odrtimeformat=true BEGIN { per_second = 1; per_title = "S"; if (per_minute) { per_second = 0; per_title = "M"; } if (length(date_offset_left) == 0 && length(date_offset_right) == 0) { date_offset_left = 4; } if (length(status_offset_left) == 0 && length(status_offset_right) == 0) { status_offset_left = 9; } if (length(response_time_offset_left) == 0 && length(response_time_offset_right) == 0) { response_time_offset_right = 2; } if (length(response_size_offset_left) == 0 && length(response_size_offset_right) == 0) { response_size_offset_left = 10; } if (odrtimeformat) { responsems = 1; } responseunit = "us"; if (responsems) { responseunit = "ms"; } if (!divtranrates) { divtranrates = 1; } hasJVMs = 0; } function printHeader() { if (!noheader) { printf( \ "010SortableDateTime,Date,Day,Time,ArrivalsPer%s,ThroughputAllPer%s,Throughput4xxPer%s,Throughput5xxPer%s,ResponseTimesSumPer%s(%s),MeanResponseTimePer%s(%s),MaxResponseTimePer%s(%s),ResponseBytesSumPer%s,MeanResponseSizeBytesPer%s,MaxResponseSizeBytesPer%s", \ per_title, \ per_title, \ per_title, \ per_title, \ per_title, \ responseunit, \ per_title, \ responseunit, \ per_title, \ responseunit, \ per_title, \ per_title, \ per_title \ ); if (hasJVMs) { printf(",JVM"); } printf("\n"); } } # Match every line { if (date_offset_left) { date = $(date_offset_left); } else { date = $(NF - date_offset_right); } if (status_offset_left) { status = $(status_offset_left); } else { status = $(NF - status_offset_right); } if (!odrtimeformat) { gsub(/\[/, "", date); gsub(/\//, ":", date); split(date, date_pieces, ":"); isoihsdate = sprintf("%d%02d%02d%02d%02d%02d", date_pieces[3], getMonthInteger(date_pieces[2]), date_pieces[1], date_pieces[4], date_pieces[5], date_pieces[6]); day = sprintf("%d-%02d-%02d", date_pieces[3], getMonthInteger(date_pieces[2]), date_pieces[1]); time = sprintf("%02d:%02d:%02d", date_pieces[4], date_pieces[5], date_pieces[6]); } else { # 09/sept./2021:10:36:54 split(date, pieces, "/"); split(pieces[3], date_pieces, ":"); isoihsdate = sprintf("%d%02d%02d%02d%02d%02d", date_pieces[1], getMonthIntegerODR(pieces[2]), pieces[1], date_pieces[2], date_pieces[3], date_pieces[4]); day = sprintf("%d-%02d-%02d", date_pieces[1], getMonthIntegerODR(pieces[2]), pieces[1]); time = sprintf("%02d:%02d:%02d", date_pieces[2], date_pieces[3], date_pieces[4]); } if (!per_second) { gsub(/..$/, "", isoihsdate); gsub(/:..$/, "", time); } if (response_time_offset_left) { response_time = $(response_time_offset_left); } else { response_time = $(NF - response_time_offset_right); } if (response_size_offset_left) { response_size = $(response_size_offset_left); } else { response_size = $(NF - response_size_offset_right); } arrivalisoihsdate = isoihsdate + 0; if (!per_second) { arrivalisoihsdate *= 100; } originalarrivalisoihsdate = arrivalisoihsdate; if (!odrtimeformat) { if (responsems) { response_time = (response_time + 0) / 1000; if (response_time >= 1000) { arrivalisoihsdate -= response_time / 1000; arrivalisoihsdate = int(arrivalisoihsdate); } } else { # microseconds if (response_time >= 1000000) { arrivalisoihsdate -= response_time / 1000000; arrivalisoihsdate = int(arrivalisoihsdate); } } } else { response_time = response_time + 0; arrivalisoihsdate -= response_time / 1000; arrivalisoihsdate = int(arrivalisoihsdate); } if (int(arrivalisoihsdate % 100) >= 60) { arrivalisoihsdate -= 40; } if (!per_second) { arrivalisoihsdate = int(arrivalisoihsdate / 100); } if (arrivalisoihsdate == 0) { printf("001WARNING (%s): Could not parse date for %s %s\n", FILENAME, $0, isoihsdate) > "/dev/stderr"; } key = "all"; if (jvm_offset_left) { key = $(jvm_offset_left); gsub(/"/, "", key); hasJVMs = 1; } else if (length(jvm_offset_right) > 0) { key = $(NF - jvm_offset_right); gsub(/"/, "", key); hasJVMs = 1; } arrival_count = arrival_counts[key, arrivalisoihsdate] + 0; arrival_counts[key, arrivalisoihsdate] = arrival_count + 1; arrival_counts[key, originalarrivalisoihsdate] = arrival_counts[key, originalarrivalisoihsdate] + 0; if (debug) { printDebug($0 "; date: " date ", status: " status ", isoihsdate: " isoihsdate ", day: " day ", time: " time ", responsetime: " response_time); } sum = response_time_sums[key, isoihsdate] + 0; response_time_sums[key, isoihsdate] = sum + response_time; sum = response_size_sums[key, isoihsdate] + 0; response_size_sums[key, isoihsdate] = sum + response_size; count = response_time_counts[key, isoihsdate] + 0; response_time_counts[key, isoihsdate] = count + 1; max = response_time_maximums[key, isoihsdate] + 0; if (response_time > max) { response_time_maximums[key, isoihsdate] = response_time; } max = response_size_maximums[key, isoihsdate] + 0; if (response_size > max) { response_size_maximums[key, isoihsdate] = response_size; } count = errors4xx_counts[key, isoihsdate] + 0; if (status >= 400 && status < 500) { count = count + 1; } errors4xx_counts[key, isoihsdate] = count; count = errors5xx_counts[key, isoihsdate] + 0; if (status >= 500) { count = count + 1; } errors5xx_counts[key, isoihsdate] = count; days[key, isoihsdate] = day; times[key, isoihsdate] = time; } function getMonthInteger(month) { if (month == "Jan") { return 1; } else if (month == "Feb") { return 2; } else if (month == "Mar") { return 3; } else if (month == "Apr") { return 4; } else if (month == "May") { return 5; } else if (month == "Jun") { return 6; } else if (month == "Jul") { return 7; } else if (month == "Aug") { return 8; } else if (month == "Sep") { return 9; } else if (month == "Oct") { return 10; } else if (month == "Nov") { return 11; } else if (month == "Dec") { return 12; } return 0; } function getMonthIntegerODR(month) { if (month ~ /jan/) { return 1; } else if (month ~ /feb/) { return 2; } else if (month ~ /mar/) { return 3; } else if (month ~ /apr/) { return 4; } else if (month ~ /may/) { return 5; } else if (month ~ /jun/) { return 6; } else if (month ~ /jul/) { return 7; } else if (month ~ /aug/) { return 8; } else if (month ~ /sep/) { return 9; } else if (month ~ /oct/) { return 10; } else if (month ~ /nov/) { return 11; } else if (month ~ /dec/) { return 12; } return 0; } function printDebug(message) { printf("DEBUG: %s\n", message); } function array2d_tokeys(array) { delete a2d2k; for (key in array) { split(key, pieces, SUBSEP); if (length(a2d2k[pieces[1]]) == 0) { a2d2k[pieces[1]] = pieces[2]; } else { a2d2k[pieces[1]] = a2d2k[pieces[1]] SUBSEP pieces[2]; } } } END { array2d_tokeys(arrival_counts); printHeader(); for (key in a2d2k) { split(a2d2k[key], pieces, SUBSEP); for (piecesKey in pieces) { date = pieces[piecesKey]; dt = days[key, date]; tm = times[key, date]; if (!dt) { if (per_second) { dt = sprintf("%d-%02d-%02d", date / 10000000000, (date / 100000000) % 1000, (date / 1000000) % 100); tm = sprintf("%d:%02d:%02d", (date / 10000) % 100, (date / 100) % 100, date % 100); } else { dt = sprintf("%d-%02d-%02d", date / 100000000, (date / 1000000) % 1000, (date / 10000) % 100); tm = sprintf("%d:%02d", (date / 100) % 100, date % 100); } } arrivals = (arrival_counts[key, date] + 0) / divtranrates; responses = (response_time_counts[key, date] + 0) / divtranrates; # ct is to compute averages. We don't divide by divtranrates here as that would skew the average ct = response_time_counts[key, date] + 0 if (ct == 0) { ct = 1; } if (responses > 0 || (responses == 0 && noskip0)) { printf( \ "%s,%s %s,%s,%s,%d,%d,%d,%d,%d,%.2f,%d,%d,%.2f,%d", \ date, \ dt, \ tm, \ dt, \ tm, \ arrivals, \ responses, \ errors4xx_counts[key, date] + 0, \ errors5xx_counts[key, date] + 0, \ response_time_sums[key, date] + 0, \ (response_time_sums[key, date] + 0) / ct, \ response_time_maximums[key, date] + 0, \ response_size_sums[key, date], \ (response_size_sums[key, date] + 0) / ct, \ response_size_maximums[key, date] \ ); if (key != "all") { printf(",%s", key); } printf("\n"); } } } }