name: Benchmark # Weekly lake-build benchmark to track total build wall time and peak RSS. # # Two jobs, both weekly, both persisting append-only JSONL to the # `benchmark-history` orphan branch (no PR gate, no hard threshold — proof # times are noisy by design, see docs/benchmark-workflow-design.md): # * benchmark — `lake build` wall time + peak RSS (kind="build"). # * lakeprof — per-MODULE build time (kind="lakeprof", top_modules) AND, as # of R-F2, per-module compiled-artifact SIZE (olean_sizes). # Size is deterministic where time is noisy, so a ballooning # proof (the n=4 division proofs are the prime suspects) # surfaces as a monotone size trend even on a noisy runner. # # This workflow is independent of PR CI (`build.yml`) and does not gate any # pull request. NEVER bump maxHeartbeats to make a slow/large proof "pass" — the # trend is for human reading, not a forcing function (report §6 non-goals). on: schedule: # Mondays 06:00 UTC. - cron: '0 6 * * 1' workflow_dispatch: # Avoid stacking concurrent benchmark runs (e.g. cron + manual dispatch). concurrency: group: benchmark cancel-in-progress: false permissions: contents: write jobs: benchmark: runs-on: ubuntu-latest timeout-minutes: 360 steps: - name: Checkout (with submodules) uses: actions/checkout@v4 with: submodules: recursive # Install elan + the toolchain pinned in `lean-toolchain`, and fetch the # Mathlib cache. We deliberately do NOT use `actions/cache` for `.lake` # here — the benchmark should reflect a clean from-source build of # EvmAsm itself (Mathlib stays cached so we measure our work, not theirs). - uses: leanprover/lean-action@v1 with: use-mathlib-cache: true use-github-cache: false lake-package-directory: . # Skip the inner `lake build`; we run our own timed build below. build: "false" # AGENTS.md mandates `lake exe cache get` BEFORE the first `lake build`, # otherwise the build pays a Mathlib-rebuild tax that swamps EvmAsm's # own compile time. lean-action with `use-mathlib-cache: true` already # runs this; we re-run as a no-op safety belt and to record its time # separately from the EvmAsm build. - name: Mathlib cache get (timed, excluded from EvmAsm metric) id: cache_get run: | set -o pipefail /usr/bin/time -v -o cache_get.time lake exe cache get 2>&1 | tee cache_get.log || true echo "::group::cache_get /usr/bin/time -v" cat cache_get.time echo "::endgroup::" - name: Lake build (timed) id: build run: | set -o pipefail /usr/bin/time -v -o build.time lake build 2>&1 | tee build.log echo "::group::build /usr/bin/time -v" cat build.time echo "::endgroup::" - name: Extract metrics id: metrics if: always() && steps.build.outcome == 'success' run: | set -euo pipefail # /usr/bin/time -v "Elapsed (wall clock) time" is in either # h:mm:ss or m:ss format. Convert to integer seconds. wall_raw=$(grep 'Elapsed (wall clock) time' build.time | awk -F': ' '{print $NF}') # Convert h:mm:ss or m:ss(.fraction) -> seconds (integer floor). wall_seconds=$(awk -v t="$wall_raw" 'BEGIN { n = split(t, a, ":"); s = 0; for (i = 1; i <= n; i++) s = s * 60 + a[i]; printf("%d", s); }') peak_rss_kb=$(grep 'Maximum resident set size' build.time | awk -F': ' '{print $NF}') echo "wall_raw=$wall_raw" >> "$GITHUB_OUTPUT" echo "wall_seconds=$wall_seconds" >> "$GITHUB_OUTPUT" echo "peak_rss_kb=$peak_rss_kb" >> "$GITHUB_OUTPUT" - name: Write job summary if: always() run: | { echo "## EvmAsm lake build benchmark" echo echo "| field | value |" echo "|---------------|-------|" echo "| commit | \`${GITHUB_SHA}\` |" echo "| ref | \`${GITHUB_REF}\` |" echo "| timestamp | $(date -u +%Y-%m-%dT%H:%M:%SZ) |" echo "| trigger | ${GITHUB_EVENT_NAME} |" echo "| runner os | $(uname -s) $(uname -r) |" echo "| runner cpu | $(grep -m1 'model name' /proc/cpuinfo | sed 's/.*: //') |" echo "| runner cores | $(nproc) |" if [ -f build.time ] && [ "${{ steps.build.outcome }}" = "success" ]; then echo "| wall (raw) | ${{ steps.metrics.outputs.wall_raw }} |" echo "| wall seconds | ${{ steps.metrics.outputs.wall_seconds }} |" echo "| peak RSS (KB) | ${{ steps.metrics.outputs.peak_rss_kb }} |" else echo "| status | build failed (see log) |" fi } >> "$GITHUB_STEP_SUMMARY" - name: Persist record to benchmark-history branch if: steps.metrics.outcome == 'success' env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} WALL_RAW: ${{ steps.metrics.outputs.wall_raw }} WALL_SECONDS: ${{ steps.metrics.outputs.wall_seconds }} PEAK_RSS_KB: ${{ steps.metrics.outputs.peak_rss_kb }} run: | set -euo pipefail # Clone a fresh copy of the repo at a temp path and check out (or # initialize) the long-lived `benchmark-history` orphan branch. # Keeping this in its own working tree means we don't disturb the # checkout used for the build above. tmpdir=$(mktemp -d) git -c protocol.version=2 clone --no-checkout --filter=blob:none \ "https://x-access-token:${GITHUB_TOKEN}@github.com/${GITHUB_REPOSITORY}.git" \ "$tmpdir/history" cd "$tmpdir/history" git config user.name 'github-actions[bot]' git config user.email '41898282+github-actions[bot]@users.noreply.github.com' if git ls-remote --exit-code --heads origin benchmark-history >/dev/null 2>&1; then git fetch --depth=1 origin benchmark-history git checkout benchmark-history else # Initialize a fresh orphan branch with a README and an empty log. git checkout --orphan benchmark-history git rm -rf --quiet . 2>/dev/null || true printf '%s\n' \ '# benchmark-history' \ '' \ 'Append-only record of weekly `lake build` benchmark runs from' \ '`.github/workflows/benchmark.yml` (issue #949 slice 3).' \ '' \ 'One JSON object per line in `history.jsonl`:' \ '' \ '- `commit` : full SHA of the commit benchmarked' \ '- `ref` : the branch / ref that triggered the run' \ '- `timestamp` : ISO 8601 UTC timestamp of when the record was appended' \ '- `trigger` : GitHub event name (`schedule` or `workflow_dispatch`)' \ '- `run_id` : GitHub Actions run ID (link target)' \ '- `wall_seconds` : `lake build` wall-clock time, integer seconds' \ '- `wall_raw` : raw `Elapsed (wall clock) time` string from `/usr/bin/time -v`' \ '- `peak_rss_kb` : peak resident set size in kilobytes' \ '- `runner_os` : `uname -s` of the runner' \ '- `runner_cores` : `nproc` on the runner' \ > README.md : > history.jsonl fi # Build a single JSONL record for this run via a tempfile script # (avoids Python's strictness about leading whitespace in `-c`). export TIMESTAMP="$(date -u +%Y-%m-%dT%H:%M:%SZ)" export RUNNER_OS_NAME="$(uname -s)" export RUNNER_CORES="$(nproc)" script="$tmpdir/append_record.py" { echo 'import json, os' echo 'rec = {' echo ' "commit": os.environ["GITHUB_SHA"],' echo ' "ref": os.environ["GITHUB_REF"],' echo ' "timestamp": os.environ["TIMESTAMP"],' echo ' "trigger": os.environ["GITHUB_EVENT_NAME"],' echo ' "run_id": os.environ["GITHUB_RUN_ID"],' echo ' "wall_raw": os.environ["WALL_RAW"],' echo ' "wall_seconds": int(os.environ["WALL_SECONDS"]),' echo ' "peak_rss_kb": int(os.environ["PEAK_RSS_KB"]),' echo ' "runner_os": os.environ["RUNNER_OS_NAME"],' echo ' "runner_cores": int(os.environ["RUNNER_CORES"]),' echo '}' echo 'with open("history.jsonl", "a") as f:' echo ' f.write(json.dumps(rec, sort_keys=True) + "\n")' } > "$script" python3 "$script" git add history.jsonl README.md # `--allow-empty` keeps the run idempotent if an immediate re-run # on the same commit produced identical metrics (extremely unlikely # but defensive). git commit -m "benchmark: ${GITHUB_SHA::12} wall=${WALL_SECONDS}s rss=${PEAK_RSS_KB}KB" \ --allow-empty # Push with retries: the `benchmark` concurrency group should # serialize runs, but a manual workflow_dispatch could still race # against the cron run. On rejection, re-fetch the remote branch, # re-append our record, amend the commit, and try again. We avoid # `git rebase` per the project's no-rebase policy. for attempt in 1 2 3; do if git push origin benchmark-history; then echo "history push: ok on attempt $attempt" break fi echo "history push: attempt $attempt failed, refetching + re-applying" >&2 git fetch origin benchmark-history git reset --hard origin/benchmark-history python3 "$script" git add history.jsonl README.md git commit -m "benchmark: ${GITHUB_SHA::12} wall=${WALL_SECONDS}s rss=${PEAK_RSS_KB}KB" \ --allow-empty sleep 5 done - name: Upload raw timing artifacts if: always() uses: actions/upload-artifact@v4 with: name: benchmark-${{ github.run_id }} path: | build.time build.log cache_get.time cache_get.log if-no-files-found: warn retention-days: 90 # Per-module timing via lakeprof (https://github.com/Kha/lakeprof). # Independent of `benchmark` so a lakeprof break never gates the wall/RSS # metric. Same trigger; appends `kind=lakeprof` records to # `benchmark-history`. See docs/949-lakeprof-design.md (#949 follow-up). lakeprof: runs-on: ubuntu-latest timeout-minutes: 360 env: LAKEPROF_TOP_N: '20' steps: - name: Checkout (with submodules) uses: actions/checkout@v4 with: submodules: recursive - uses: leanprover/lean-action@v1 with: use-mathlib-cache: true use-github-cache: false lake-package-directory: . # We run our own build via `lakeprof record`. build: "false" - name: Mathlib cache get run: lake exe cache get - name: Install uv (for uvx) uses: astral-sh/setup-uv@v3 - name: lakeprof record id: record run: | set -o pipefail uvx --from git+https://github.com/Kha/lakeprof \ lakeprof record -o lakeprof.log lake build 2>&1 | tee lakeprof.record.log - name: lakeprof report (chrome-trace + crit-path) id: report if: steps.record.outcome == 'success' run: | set -o pipefail uvx --from git+https://github.com/Kha/lakeprof \ lakeprof report \ -i lakeprof.log \ --save-chrome-trace lakeprof.trace.json \ --print-crit-path \ --print-avg-crit \ 2>&1 | tee lakeprof.report.txt - name: Extract top-N id: topn if: steps.report.outcome == 'success' run: | python3 .github/workflows/scripts/lakeprof_topn.py \ --in lakeprof.trace.json \ --out lakeprof.topn.json \ --n "${LAKEPROF_TOP_N}" # R-F2 proof-SIZE trend: merge top-N largest .olean sizes into # lakeprof.topn.json so the append step carries them. Runs after the # build (oleans exist) and never fails the job (size is advisory trend). - name: Collect per-module .olean sizes id: oleansize if: steps.record.outcome == 'success' continue-on-error: true env: OLEANSIZE_TOP_N: '30' TOPN_JSON: lakeprof.topn.json run: bash .github/workflows/scripts/oleansize_collect.sh - name: Write job summary if: always() run: | { echo "## lakeprof top-${LAKEPROF_TOP_N} slowest modules" echo echo "| field | value |" echo "|------------|-------|" echo "| commit | \`${GITHUB_SHA}\` |" echo "| ref | \`${GITHUB_REF}\` |" echo "| trigger | ${GITHUB_EVENT_NAME} |" echo "| timestamp | $(date -u +%Y-%m-%dT%H:%M:%SZ) |" if [ -f lakeprof.topn.json ]; then python3 .github/workflows/scripts/lakeprof_md.py \ --in lakeprof.topn.json else echo echo "_lakeprof did not produce a top-N file (record/report failed)._" fi } >> "$GITHUB_STEP_SUMMARY" # Gate on `record` (the build), NOT `topn`: the deterministic olean-size # trend (R-F2) must persist even when the noisy `lakeprof report`/`topn` # steps flake — that's the whole point of a size metric on a noisy runner. # oleansize_collect.sh creates/merges lakeprof.topn.json (with olean_sizes # and possibly no top_modules); lakeprof_append.sh tolerates either key # being absent. - name: Append top-modules + olean sizes to benchmark-history if: steps.record.outcome == 'success' && (steps.topn.outcome == 'success' || steps.oleansize.outcome == 'success') env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} LAKEPROF_TOPN_JSON: lakeprof.topn.json run: bash .github/workflows/scripts/lakeprof_append.sh - name: Upload lakeprof artifacts if: always() uses: actions/upload-artifact@v4 with: name: lakeprof-${{ github.run_id }} path: | lakeprof.log lakeprof.trace.json lakeprof.report.txt lakeprof.record.log lakeprof.topn.json if-no-files-found: warn retention-days: 90