package software.amazon.event.ruler;
import java.io.IOException;
import java.io.PrintWriter;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Collections;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import org.junit.Assume;
import org.junit.Test;
import static software.amazon.event.ruler.Benchmarks.ANYTHING_BUT_IGNORE_CASE_RULES;
import static software.amazon.event.ruler.Benchmarks.ANYTHING_BUT_PREFIX_RULES;
import static software.amazon.event.ruler.Benchmarks.ANYTHING_BUT_RULES;
import static software.amazon.event.ruler.Benchmarks.ANYTHING_BUT_SUFFIX_RULES;
import static software.amazon.event.ruler.Benchmarks.ANYTHING_BUT_WILDCARD_RULES;
import static software.amazon.event.ruler.Benchmarks.COMPLEX_ARRAYS_RULES;
import static software.amazon.event.ruler.Benchmarks.EQUALS_IGNORE_CASE_RULES;
import static software.amazon.event.ruler.Benchmarks.EXACT_RULES;
import static software.amazon.event.ruler.Benchmarks.NUMERIC_RULES;
import static software.amazon.event.ruler.Benchmarks.PREFIX_EQUALS_IGNORE_CASE_RULES;
import static software.amazon.event.ruler.Benchmarks.PREFIX_RULES;
import static software.amazon.event.ruler.Benchmarks.SUFFIX_EQUALS_IGNORE_CASE_RULES;
import static software.amazon.event.ruler.Benchmarks.SUFFIX_RULES;
import static software.amazon.event.ruler.Benchmarks.WILDCARD_RULES;
import static software.amazon.event.ruler.Benchmarks.readCityLots2;
/**
* Warmup + averaged perf benchmarks for {@code rulesForJSONEvent} against the
* {@code citylots2} dataset. Complements {@link Benchmarks#CL2Benchmark},
* which is a single-shot quick-eyeball benchmark; this one is meant for
* before/after comparison during CR review.
*
*
Why this exists
*
* {@link Benchmarks#CL2Benchmark} runs each rule type once and reports
* events/sec. On a JVM the first pass pays for JIT compilation and class
* loading, so single-pass variance between runs of the same code routinely
* hits 10-20%. That's fine for eyeballing a big win, not fine for deciding
* whether a subtle change regressed anything by 2%.
*
*
{@code StableBenchmarks} runs N warmup passes (discarded), then M
* measured passes per rule type, and reports mean, standard deviation, min,
* max. With the default 3/5 passes, measured variance is usually under 2% —
* good enough for regression review.
*
*
Running
*
* Gated off by default. Flip it on with {@code -Druler.perf.run=true}:
*
*
* # Full run, all 14 rule types, default 3 warmup + 5 measure
* mvn test -Dtest=StableBenchmarks -Druler.perf.run=true
*
* # Tighter error bars (slower, ~6 min)
* mvn test -Dtest=StableBenchmarks -Druler.perf.run=true \
* -Druler.perf.warmup=5 -Druler.perf.measure=10
*
* # Focus on specific rule types (substring match, case-insensitive)
* mvn test -Dtest=StableBenchmarks -Druler.perf.run=true \
* -Druler.perf.only=wildcard,suffix
*
* # Verbose: include per-pass timings
* mvn test -Dtest=StableBenchmarks -Druler.perf.run=true \
* -Druler.perf.verbose=true
*
* # Write machine-readable results for later diffing
* mvn test -Dtest=StableBenchmarks -Druler.perf.run=true \
* -Druler.perf.csv=/tmp/ruler-perf.csv
*
*
* Comparing two revisions
*
* The easy path is {@code scripts/perf-compare.sh <before> <after>},
* which checks out each ref, runs this harness on both, and prints a
* noise-aware delta table. See the Performance section of the README.
*
*
Manual path, if you need control over the flow:
*
*
* git checkout <before>
* mvn clean test -Dtest=StableBenchmarks -Druler.perf.run=true \
* -Druler.perf.csv=/tmp/before.csv | tee /tmp/before.log
*
* git checkout <after>
* mvn clean test -Dtest=StableBenchmarks -Druler.perf.run=true \
* -Druler.perf.csv=/tmp/after.csv | tee /tmp/after.log
*
* # Quick side-by-side of summary lines
* grep MEAN /tmp/before.log /tmp/after.log | sort
*
* # Or diff the CSVs
* diff /tmp/before.csv /tmp/after.csv
*
*
* Output format
*
* Each rule type produces a single-line summary prefixed with {@code MEAN=}:
*
*
* [WILDCARD ] MEAN=122693 STDDEV=492 (0.4%) MIN=121887 MAX=123221 events/sec
*
*
* Columns are space-aligned so {@code grep | sort} gives a readable diff
* across revisions without further formatting.
*
*
Scope
*
* Covers the same fourteen rule types as
* {@link Benchmarks#CL2Benchmark}. Not a replacement for the JMH benchmarks in
* {@code jmh/} — those are the right tool for publication numbers. This is
* the right tool for "did my change regress anything?" during CR.
*/
public class StableBenchmarks {
private static final int DEFAULT_WARMUP_PASSES = 3;
private static final int DEFAULT_MEASURE_PASSES = 5;
// Label column width chosen to fit the longest rule type name
// ("ANYTHING_BUT_IGNORE_CASE") plus a small margin, so the MEAN columns
// in the output align cleanly.
private static final int LABEL_COL_WIDTH = 24;
// Expected match counts per rule bank. Duplicated from Benchmarks.java
// because the originals are package-private instance fields, not static.
// If these drift, CL2Benchmark will also fail — they're tied to the
// citylots2 dataset.
private static final int[] EXACT_MATCHES = { 1, 101, 35, 655, 1 };
private static final int[] WILDCARD_MATCHES = { 490, 713, 43, 2540, 1 };
private static final int[] PREFIX_MATCHES = { 24, 442, 38, 2387, 328 };
private static final int[] PREFIX_EQUALS_IGNORE_CASE_MATCHES = { 24, 442, 38, 2387, 328 };
private static final int[] SUFFIX_MATCHES = { 17921, 871, 13, 1963, 682 };
private static final int[] SUFFIX_EQUALS_IGNORE_CASE_MATCHES = { 17921, 871, 13, 1963, 682 };
private static final int[] EQUALS_IGNORE_CASE_MATCHES = { 131, 211, 1758, 825, 116386 };
private static final int[] NUMERIC_MATCHES = { 2, 120, 148948, 64120, 127053 };
private static final int[] ANYTHING_BUT_MATCHES = { 211158, 210411, 96682, 120, 210615 };
private static final int[] ANYTHING_BUT_IGNORE_CASE_MATCHES = { 211158, 210411, 96682, 120, 210615 };
private static final int[] ANYTHING_BUT_PREFIX_MATCHES = { 211158, 210118, 96667, 120, 209091 };
private static final int[] ANYTHING_BUT_SUFFIX_MATCHES = { 211136, 210411, 94908, 0, 209055 };
private static final int[] ANYTHING_BUT_WILDCARD_MATCHES = { 212578, 212355, 213025, 210528, 213067 };
private static final int[] COMPLEX_ARRAYS_MATCHES = { 218, 1, 149446, 64368, 127485 };
private final List citylots2 = new ArrayList<>();
/** Results collected across all rule types in this run, for CSV output. */
private final Map results = new LinkedHashMap<>();
/** Run every rule type end-to-end, unless filtered via {@code ruler.perf.only}. */
@Test
public void runAll() throws Exception {
Assume.assumeTrue(
"Skipped: set -Druler.perf.run=true to run the stable benchmarks. "
+ "See this class's javadoc for usage.",
Boolean.getBoolean("ruler.perf.run"));
readCityLots2(citylots2);
int warmup = getIntProp("ruler.perf.warmup", DEFAULT_WARMUP_PASSES);
int measure = getIntProp("ruler.perf.measure", DEFAULT_MEASURE_PASSES);
List only = getOnlyFilter();
boolean verbose = Boolean.getBoolean("ruler.perf.verbose");
printHeader(warmup, measure, only, verbose);
// Rule types grouped by complexity. Order is stable so output diffs
// between revisions are easy to read.
maybeRun("EXACT", EXACT_RULES, EXACT_MATCHES, only, warmup, measure, verbose);
maybeRun("WILDCARD", WILDCARD_RULES, WILDCARD_MATCHES, only, warmup, measure, verbose);
maybeRun("PREFIX", PREFIX_RULES, PREFIX_MATCHES, only, warmup, measure, verbose);
maybeRun("PREFIX_EIC",
PREFIX_EQUALS_IGNORE_CASE_RULES, PREFIX_EQUALS_IGNORE_CASE_MATCHES,
only, warmup, measure, verbose);
maybeRun("SUFFIX", SUFFIX_RULES, SUFFIX_MATCHES, only, warmup, measure, verbose);
maybeRun("SUFFIX_EIC",
SUFFIX_EQUALS_IGNORE_CASE_RULES, SUFFIX_EQUALS_IGNORE_CASE_MATCHES,
only, warmup, measure, verbose);
maybeRun("EQUALS_IGNORE_CASE",
EQUALS_IGNORE_CASE_RULES, EQUALS_IGNORE_CASE_MATCHES,
only, warmup, measure, verbose);
maybeRun("NUMERIC", NUMERIC_RULES, NUMERIC_MATCHES, only, warmup, measure, verbose);
maybeRun("ANYTHING_BUT",
ANYTHING_BUT_RULES, ANYTHING_BUT_MATCHES, only, warmup, measure, verbose);
maybeRun("ANYTHING_BUT_IGNORE_CASE",
ANYTHING_BUT_IGNORE_CASE_RULES, ANYTHING_BUT_IGNORE_CASE_MATCHES,
only, warmup, measure, verbose);
maybeRun("ANYTHING_BUT_PREFIX",
ANYTHING_BUT_PREFIX_RULES, ANYTHING_BUT_PREFIX_MATCHES,
only, warmup, measure, verbose);
maybeRun("ANYTHING_BUT_SUFFIX",
ANYTHING_BUT_SUFFIX_RULES, ANYTHING_BUT_SUFFIX_MATCHES,
only, warmup, measure, verbose);
maybeRun("ANYTHING_BUT_WILDCARD",
ANYTHING_BUT_WILDCARD_RULES, ANYTHING_BUT_WILDCARD_MATCHES,
only, warmup, measure, verbose);
maybeRun("COMPLEX_ARRAYS",
COMPLEX_ARRAYS_RULES, COMPLEX_ARRAYS_MATCHES, only, warmup, measure, verbose);
maybeWriteCsv();
}
// --- Header / output -----------------------------------------------
private void printHeader(int warmup, int measure, List only, boolean verbose) {
System.out.println("========== StableBenchmarks ==========");
System.out.printf(Locale.ROOT, " warmup passes : %d%n", warmup);
System.out.printf(Locale.ROOT, " measure passes : %d%n", measure);
System.out.printf(Locale.ROOT, " events : %d%n", citylots2.size());
System.out.printf(Locale.ROOT, " verbose : %s%n", verbose);
System.out.printf(Locale.ROOT, " only filter : %s%n", only == null ? "(all)" : only);
System.out.println(" --- environment ---");
System.out.printf(Locale.ROOT, " jvm : %s %s%n",
System.getProperty("java.vm.name"), System.getProperty("java.version"));
System.out.printf(Locale.ROOT, " os : %s %s / %s%n",
System.getProperty("os.name"), System.getProperty("os.version"),
System.getProperty("os.arch"));
System.out.printf(Locale.ROOT, " cores : %d%n",
Runtime.getRuntime().availableProcessors());
System.out.printf(Locale.ROOT, " heap max : %d MB%n",
Runtime.getRuntime().maxMemory() / (1024 * 1024));
String csvPath = System.getProperty("ruler.perf.csv");
if (csvPath != null) {
System.out.printf(Locale.ROOT, " csv output : %s%n", csvPath);
}
System.out.println();
}
// --- Runner --------------------------------------------------------
private void maybeRun(String label, String[] rules, int[] expectedMatches,
List only, int warmupPasses, int measurePasses,
boolean verbose)
throws Exception {
if (only != null && !matchesOnlyFilter(label, only)) {
return;
}
runRuleType(label, rules, expectedMatches, warmupPasses, measurePasses, verbose);
}
private void runRuleType(String label, String[] rules, int[] expectedMatches,
int warmupPasses, int measurePasses, boolean verbose)
throws Exception {
List samples = new ArrayList<>(measurePasses);
for (int i = 0; i < warmupPasses; i++) {
double eps = timeOnePass(rules, expectedMatches);
if (verbose) {
System.out.printf(Locale.ROOT,
" [%s] warmup %d/%d: %.1f events/sec%n",
padLabel(label), i + 1, warmupPasses, eps);
}
}
for (int i = 0; i < measurePasses; i++) {
double eps = timeOnePass(rules, expectedMatches);
samples.add(eps);
if (verbose) {
System.out.printf(Locale.ROOT,
" [%s] measure %d/%d: %.1f events/sec%n",
padLabel(label), i + 1, measurePasses, eps);
}
}
double mean = mean(samples);
double stddev = stddev(samples, mean);
double relStddev = 100.0 * stddev / mean;
double min = Collections.min(samples);
double max = Collections.max(samples);
// One-liner summary. Column widths are fixed so that piping through
// `grep MEAN | sort` lines up nicely, and so before/after logs are
// visually diffable.
System.out.printf(Locale.ROOT,
" [%s] MEAN=%7.0f STDDEV=%6.0f (%4.1f%%) MIN=%7.0f MAX=%7.0f events/sec%n",
padLabel(label), mean, stddev, relStddev, min, max);
results.put(label, new RuleTypeResult(label, mean, stddev, min, max, samples));
}
/**
* Build a fresh machine for this rule set and time a single scan of
* citylots2. Each pass compiles the rules from scratch, so compile cost
* is included. Returns events/sec for the scan.
*/
private double timeOnePass(String[] rules, int[] expectedMatches) throws Exception {
Machine machine = new Machine();
int[] gotCounts = new int[rules.length];
for (int i = 0; i < rules.length; i++) {
machine.addRule("r" + i, rules[i]);
}
long before = System.nanoTime();
for (String event : citylots2) {
List matches = machine.rulesForJSONEvent(event);
for (String match : matches) {
int idx = Integer.parseInt(match.substring(1));
gotCounts[idx]++;
}
}
long afterNs = System.nanoTime() - before;
// Sanity: reject the result if counts don't match. A perf number for
// a match function that doesn't return correct results is useless.
for (int i = 0; i < rules.length; i++) {
if (gotCounts[i] != expectedMatches[i]) {
throw new AssertionError("match count mismatch for rule " + i
+ ": expected=" + expectedMatches[i] + " got=" + gotCounts[i]);
}
}
return (1_000_000_000.0 * citylots2.size()) / afterNs;
}
// --- CSV output ----------------------------------------------------
private void maybeWriteCsv() throws IOException {
String csvPath = System.getProperty("ruler.perf.csv");
if (csvPath == null || csvPath.isEmpty()) {
return;
}
Path out = Paths.get(csvPath);
if (out.getParent() != null) {
Files.createDirectories(out.getParent());
}
try (PrintWriter w = new PrintWriter(
Files.newBufferedWriter(out, StandardCharsets.UTF_8))) {
w.println("rule_type,mean_eps,stddev_eps,rel_stddev_pct,min_eps,max_eps,samples");
for (RuleTypeResult r : results.values()) {
w.printf(Locale.ROOT, "%s,%.2f,%.2f,%.4f,%.2f,%.2f,%s%n",
r.label, r.mean, r.stddev,
100.0 * r.stddev / r.mean,
r.min, r.max,
formatSamples(r.samples));
}
}
System.out.println();
System.out.println("Wrote CSV: " + out.toAbsolutePath());
}
private static String formatSamples(List samples) {
StringBuilder sb = new StringBuilder();
for (int i = 0; i < samples.size(); i++) {
if (i > 0) {
sb.append('|');
}
sb.append(String.format(Locale.ROOT, "%.2f", samples.get(i)));
}
return sb.toString();
}
// --- Helpers -------------------------------------------------------
private static String padLabel(String label) {
if (label.length() >= LABEL_COL_WIDTH) {
return label;
}
StringBuilder sb = new StringBuilder(LABEL_COL_WIDTH);
sb.append(label);
while (sb.length() < LABEL_COL_WIDTH) {
sb.append(' ');
}
return sb.toString();
}
private static double mean(List xs) {
double sum = 0;
for (double x : xs) {
sum += x;
}
return sum / xs.size();
}
private static double stddev(List xs, double mean) {
if (xs.size() < 2) {
return 0;
}
double sum = 0;
for (double x : xs) {
double d = x - mean;
sum += d * d;
}
return Math.sqrt(sum / (xs.size() - 1));
}
private static int getIntProp(String name, int defaultValue) {
String v = System.getProperty(name);
if (v == null || v.isEmpty()) {
return defaultValue;
}
return Integer.parseInt(v);
}
/**
* Parse {@code -Druler.perf.only=wildcard,suffix} into a list of lowercase
* substrings. Match is case-insensitive substring — so
* {@code only=wildcard} matches {@code WILDCARD} and {@code ANYTHING_BUT_WILDCARD}.
* Returns {@code null} if the property is unset, meaning "run all".
*/
private static List getOnlyFilter() {
String v = System.getProperty("ruler.perf.only");
if (v == null || v.isEmpty()) {
return null;
}
List out = new ArrayList<>();
for (String part : v.split(",")) {
String trimmed = part.trim().toLowerCase(Locale.ROOT);
if (!trimmed.isEmpty()) {
out.add(trimmed);
}
}
return out;
}
private static boolean matchesOnlyFilter(String label, List only) {
String low = label.toLowerCase(Locale.ROOT);
for (String needle : only) {
if (low.contains(needle)) {
return true;
}
}
return false;
}
// --- Result record -------------------------------------------------
private static final class RuleTypeResult {
final String label;
final double mean;
final double stddev;
final double min;
final double max;
final List samples;
RuleTypeResult(String label, double mean, double stddev,
double min, double max, List samples) {
this.label = label;
this.mean = mean;
this.stddev = stddev;
this.min = min;
this.max = max;
this.samples = new ArrayList<>(samples);
}
}
}