name: Benchmark

# Weekly lake-build benchmark to track total build wall time and peak RSS.
#
# Two jobs, both weekly, both persisting append-only JSONL to the
# `benchmark-history` orphan branch (no PR gate, no hard threshold — proof
# times are noisy by design, see docs/benchmark-workflow-design.md):
#   * benchmark — `lake build` wall time + peak RSS (kind="build").
#   * lakeprof  — per-MODULE build time (kind="lakeprof", top_modules) AND, as
#                 of R-F2, per-module compiled-artifact SIZE (olean_sizes).
#                 Size is deterministic where time is noisy, so a ballooning
#                 proof (the n=4 division proofs are the prime suspects)
#                 surfaces as a monotone size trend even on a noisy runner.
#
# This workflow is independent of PR CI (`build.yml`) and does not gate any
# pull request. NEVER bump maxHeartbeats to make a slow/large proof "pass" — the
# trend is for human reading, not a forcing function (report §6 non-goals).

on:
  schedule:
    # Mondays 06:00 UTC.
    - cron: '0 6 * * 1'
  workflow_dispatch:

# Avoid stacking concurrent benchmark runs (e.g. cron + manual dispatch).
concurrency:
  group: benchmark
  cancel-in-progress: false

permissions:
  contents: write

jobs:
  benchmark:
    runs-on: ubuntu-latest
    timeout-minutes: 360

    steps:
      - name: Checkout (with submodules)
        uses: actions/checkout@v4
        with:
          submodules: recursive

      # Install elan + the toolchain pinned in `lean-toolchain`, and fetch the
      # Mathlib cache. We deliberately do NOT use `actions/cache` for `.lake`
      # here — the benchmark should reflect a clean from-source build of
      # EvmAsm itself (Mathlib stays cached so we measure our work, not theirs).
      - uses: leanprover/lean-action@v1
        with:
          use-mathlib-cache: true
          use-github-cache: false
          lake-package-directory: .
          # Skip the inner `lake build`; we run our own timed build below.
          build: "false"

      # AGENTS.md mandates `lake exe cache get` BEFORE the first `lake build`,
      # otherwise the build pays a Mathlib-rebuild tax that swamps EvmAsm's
      # own compile time. lean-action with `use-mathlib-cache: true` already
      # runs this; we re-run as a no-op safety belt and to record its time
      # separately from the EvmAsm build.
      - name: Mathlib cache get (timed, excluded from EvmAsm metric)
        id: cache_get
        run: |
          set -o pipefail
          /usr/bin/time -v -o cache_get.time lake exe cache get 2>&1 | tee cache_get.log || true
          echo "::group::cache_get /usr/bin/time -v"
          cat cache_get.time
          echo "::endgroup::"

      - name: Lake build (timed)
        id: build
        run: |
          set -o pipefail
          /usr/bin/time -v -o build.time lake build 2>&1 | tee build.log
          echo "::group::build /usr/bin/time -v"
          cat build.time
          echo "::endgroup::"

      - name: Extract metrics
        id: metrics
        if: always() && steps.build.outcome == 'success'
        run: |
          set -euo pipefail
          # /usr/bin/time -v "Elapsed (wall clock) time" is in either
          # h:mm:ss or m:ss format. Convert to integer seconds.
          wall_raw=$(grep 'Elapsed (wall clock) time' build.time | awk -F': ' '{print $NF}')
          # Convert h:mm:ss or m:ss(.fraction) -> seconds (integer floor).
          wall_seconds=$(awk -v t="$wall_raw" 'BEGIN {
            n = split(t, a, ":");
            s = 0;
            for (i = 1; i <= n; i++) s = s * 60 + a[i];
            printf("%d", s);
          }')
          peak_rss_kb=$(grep 'Maximum resident set size' build.time | awk -F': ' '{print $NF}')
          echo "wall_raw=$wall_raw"           >> "$GITHUB_OUTPUT"
          echo "wall_seconds=$wall_seconds"   >> "$GITHUB_OUTPUT"
          echo "peak_rss_kb=$peak_rss_kb"     >> "$GITHUB_OUTPUT"

      - name: Write job summary
        if: always()
        run: |
          {
            echo "## EvmAsm lake build benchmark"
            echo
            echo "| field         | value |"
            echo "|---------------|-------|"
            echo "| commit        | \`${GITHUB_SHA}\` |"
            echo "| ref           | \`${GITHUB_REF}\` |"
            echo "| timestamp     | $(date -u +%Y-%m-%dT%H:%M:%SZ) |"
            echo "| trigger       | ${GITHUB_EVENT_NAME} |"
            echo "| runner os     | $(uname -s) $(uname -r) |"
            echo "| runner cpu    | $(grep -m1 'model name' /proc/cpuinfo | sed 's/.*: //') |"
            echo "| runner cores  | $(nproc) |"
            if [ -f build.time ] && [ "${{ steps.build.outcome }}" = "success" ]; then
              echo "| wall (raw)    | ${{ steps.metrics.outputs.wall_raw }} |"
              echo "| wall seconds  | ${{ steps.metrics.outputs.wall_seconds }} |"
              echo "| peak RSS (KB) | ${{ steps.metrics.outputs.peak_rss_kb }} |"
            else
              echo "| status        | build failed (see log) |"
            fi
          } >> "$GITHUB_STEP_SUMMARY"

      - name: Persist record to benchmark-history branch
        if: steps.metrics.outcome == 'success'
        env:
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
          WALL_RAW: ${{ steps.metrics.outputs.wall_raw }}
          WALL_SECONDS: ${{ steps.metrics.outputs.wall_seconds }}
          PEAK_RSS_KB: ${{ steps.metrics.outputs.peak_rss_kb }}
        run: |
          set -euo pipefail
          # Clone a fresh copy of the repo at a temp path and check out (or
          # initialize) the long-lived `benchmark-history` orphan branch.
          # Keeping this in its own working tree means we don't disturb the
          # checkout used for the build above.
          tmpdir=$(mktemp -d)
          git -c protocol.version=2 clone --no-checkout --filter=blob:none \
            "https://x-access-token:${GITHUB_TOKEN}@github.com/${GITHUB_REPOSITORY}.git" \
            "$tmpdir/history"
          cd "$tmpdir/history"
          git config user.name  'github-actions[bot]'
          git config user.email '41898282+github-actions[bot]@users.noreply.github.com'
          if git ls-remote --exit-code --heads origin benchmark-history >/dev/null 2>&1; then
            git fetch --depth=1 origin benchmark-history
            git checkout benchmark-history
          else
            # Initialize a fresh orphan branch with a README and an empty log.
            git checkout --orphan benchmark-history
            git rm -rf --quiet . 2>/dev/null || true
            printf '%s\n' \
              '# benchmark-history' \
              '' \
              'Append-only record of weekly `lake build` benchmark runs from' \
              '`.github/workflows/benchmark.yml` (issue #949 slice 3).' \
              '' \
              'One JSON object per line in `history.jsonl`:' \
              '' \
              '- `commit`        : full SHA of the commit benchmarked' \
              '- `ref`           : the branch / ref that triggered the run' \
              '- `timestamp`     : ISO 8601 UTC timestamp of when the record was appended' \
              '- `trigger`       : GitHub event name (`schedule` or `workflow_dispatch`)' \
              '- `run_id`        : GitHub Actions run ID (link target)' \
              '- `wall_seconds`  : `lake build` wall-clock time, integer seconds' \
              '- `wall_raw`      : raw `Elapsed (wall clock) time` string from `/usr/bin/time -v`' \
              '- `peak_rss_kb`   : peak resident set size in kilobytes' \
              '- `runner_os`     : `uname -s` of the runner' \
              '- `runner_cores`  : `nproc` on the runner' \
              > README.md
            : > history.jsonl
          fi
          # Build a single JSONL record for this run via a tempfile script
          # (avoids Python's strictness about leading whitespace in `-c`).
          export TIMESTAMP="$(date -u +%Y-%m-%dT%H:%M:%SZ)"
          export RUNNER_OS_NAME="$(uname -s)"
          export RUNNER_CORES="$(nproc)"
          script="$tmpdir/append_record.py"
          {
            echo 'import json, os'
            echo 'rec = {'
            echo '  "commit":       os.environ["GITHUB_SHA"],'
            echo '  "ref":          os.environ["GITHUB_REF"],'
            echo '  "timestamp":    os.environ["TIMESTAMP"],'
            echo '  "trigger":      os.environ["GITHUB_EVENT_NAME"],'
            echo '  "run_id":       os.environ["GITHUB_RUN_ID"],'
            echo '  "wall_raw":     os.environ["WALL_RAW"],'
            echo '  "wall_seconds": int(os.environ["WALL_SECONDS"]),'
            echo '  "peak_rss_kb":  int(os.environ["PEAK_RSS_KB"]),'
            echo '  "runner_os":    os.environ["RUNNER_OS_NAME"],'
            echo '  "runner_cores": int(os.environ["RUNNER_CORES"]),'
            echo '}'
            echo 'with open("history.jsonl", "a") as f:'
            echo '  f.write(json.dumps(rec, sort_keys=True) + "\n")'
          } > "$script"
          python3 "$script"
          git add history.jsonl README.md
          # `--allow-empty` keeps the run idempotent if an immediate re-run
          # on the same commit produced identical metrics (extremely unlikely
          # but defensive).
          git commit -m "benchmark: ${GITHUB_SHA::12} wall=${WALL_SECONDS}s rss=${PEAK_RSS_KB}KB" \
            --allow-empty
          # Push with retries: the `benchmark` concurrency group should
          # serialize runs, but a manual workflow_dispatch could still race
          # against the cron run. On rejection, re-fetch the remote branch,
          # re-append our record, amend the commit, and try again. We avoid
          # `git rebase` per the project's no-rebase policy.
          for attempt in 1 2 3; do
            if git push origin benchmark-history; then
              echo "history push: ok on attempt $attempt"
              break
            fi
            echo "history push: attempt $attempt failed, refetching + re-applying" >&2
            git fetch origin benchmark-history
            git reset --hard origin/benchmark-history
            python3 "$script"
            git add history.jsonl README.md
            git commit -m "benchmark: ${GITHUB_SHA::12} wall=${WALL_SECONDS}s rss=${PEAK_RSS_KB}KB" \
              --allow-empty
            sleep 5
          done

      - name: Upload raw timing artifacts
        if: always()
        uses: actions/upload-artifact@v4
        with:
          name: benchmark-${{ github.run_id }}
          path: |
            build.time
            build.log
            cache_get.time
            cache_get.log
          if-no-files-found: warn
          retention-days: 90

  # Per-module timing via lakeprof (https://github.com/Kha/lakeprof).
  # Independent of `benchmark` so a lakeprof break never gates the wall/RSS
  # metric. Same trigger; appends `kind=lakeprof` records to
  # `benchmark-history`. See docs/949-lakeprof-design.md (#949 follow-up).
  lakeprof:
    runs-on: ubuntu-latest
    timeout-minutes: 360

    env:
      LAKEPROF_TOP_N: '20'

    steps:
      - name: Checkout (with submodules)
        uses: actions/checkout@v4
        with:
          submodules: recursive

      - uses: leanprover/lean-action@v1
        with:
          use-mathlib-cache: true
          use-github-cache: false
          lake-package-directory: .
          # We run our own build via `lakeprof record`.
          build: "false"

      - name: Mathlib cache get
        run: lake exe cache get

      - name: Install uv (for uvx)
        uses: astral-sh/setup-uv@v3

      - name: lakeprof record
        id: record
        run: |
          set -o pipefail
          uvx --from git+https://github.com/Kha/lakeprof \
            lakeprof record -o lakeprof.log lake build 2>&1 | tee lakeprof.record.log

      - name: lakeprof report (chrome-trace + crit-path)
        id: report
        if: steps.record.outcome == 'success'
        run: |
          set -o pipefail
          uvx --from git+https://github.com/Kha/lakeprof \
            lakeprof report \
              -i lakeprof.log \
              --save-chrome-trace lakeprof.trace.json \
              --print-crit-path \
              --print-avg-crit \
              2>&1 | tee lakeprof.report.txt

      - name: Extract top-N
        id: topn
        if: steps.report.outcome == 'success'
        run: |
          python3 .github/workflows/scripts/lakeprof_topn.py \
            --in lakeprof.trace.json \
            --out lakeprof.topn.json \
            --n "${LAKEPROF_TOP_N}"

      # R-F2 proof-SIZE trend: merge top-N largest .olean sizes into
      # lakeprof.topn.json so the append step carries them. Runs after the
      # build (oleans exist) and never fails the job (size is advisory trend).
      - name: Collect per-module .olean sizes
        id: oleansize
        if: steps.record.outcome == 'success'
        continue-on-error: true
        env:
          OLEANSIZE_TOP_N: '30'
          TOPN_JSON: lakeprof.topn.json
        run: bash .github/workflows/scripts/oleansize_collect.sh

      - name: Write job summary
        if: always()
        run: |
          {
            echo "## lakeprof top-${LAKEPROF_TOP_N} slowest modules"
            echo
            echo "| field      | value |"
            echo "|------------|-------|"
            echo "| commit     | \`${GITHUB_SHA}\` |"
            echo "| ref        | \`${GITHUB_REF}\` |"
            echo "| trigger    | ${GITHUB_EVENT_NAME} |"
            echo "| timestamp  | $(date -u +%Y-%m-%dT%H:%M:%SZ) |"
            if [ -f lakeprof.topn.json ]; then
              python3 .github/workflows/scripts/lakeprof_md.py \
                --in lakeprof.topn.json
            else
              echo
              echo "_lakeprof did not produce a top-N file (record/report failed)._"
            fi
          } >> "$GITHUB_STEP_SUMMARY"

      # Gate on `record` (the build), NOT `topn`: the deterministic olean-size
      # trend (R-F2) must persist even when the noisy `lakeprof report`/`topn`
      # steps flake — that's the whole point of a size metric on a noisy runner.
      # oleansize_collect.sh creates/merges lakeprof.topn.json (with olean_sizes
      # and possibly no top_modules); lakeprof_append.sh tolerates either key
      # being absent.
      - name: Append top-modules + olean sizes to benchmark-history
        if: steps.record.outcome == 'success' && (steps.topn.outcome == 'success' || steps.oleansize.outcome == 'success')
        env:
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
          LAKEPROF_TOPN_JSON: lakeprof.topn.json
        run: bash .github/workflows/scripts/lakeprof_append.sh

      - name: Upload lakeprof artifacts
        if: always()
        uses: actions/upload-artifact@v4
        with:
          name: lakeprof-${{ github.run_id }}
          path: |
            lakeprof.log
            lakeprof.trace.json
            lakeprof.report.txt
            lakeprof.record.log
            lakeprof.topn.json
          if-no-files-found: warn
          retention-days: 90