name: CI

on:
  push:
    branches: [main, develop]
  pull_request:
    branches: [main, develop]

concurrency:
  group: ${{ github.workflow }}-${{ github.ref }}
  cancel-in-progress: true

jobs:
  # ──────────────────────────────────────────────
  # Lint & type-check the Next.js web app
  # ──────────────────────────────────────────────
  web-lint:
    name: Web — Lint & Type-check
    runs-on: ubuntu-latest
    defaults:
      run:
        working-directory: apps/web
    steps:
      - uses: actions/checkout@v6
      - uses: pnpm/action-setup@v4
      - uses: actions/setup-node@v6
        with:
          node-version: '22'
          cache: pnpm
          cache-dependency-path: pnpm-lock.yaml
      - run: pnpm install --frozen-lockfile
      - run: pnpm run lint
      - run: pnpm run type-check

  # ──────────────────────────────────────────────
  # Vitest smoke tests for the Next.js web app
  #   The web app used to have *zero* tests gated in CI — lint + typecheck
  #   were the only signal. These smoke tests render the components a
  #   reviewer cares about (Hero, ComparisonTable, Sidebar, TopBar,
  #   MarketplaceView, CaseWorkspace) and assert on the wording we agreed
  #   to keep honest (no "every commit", alert reduction tagged as a
  #   measurement, etc.). They run with jsdom; no browser required.
  # ──────────────────────────────────────────────
  web-test:
    name: Web — Vitest smoke tests
    runs-on: ubuntu-latest
    needs: web-lint
    defaults:
      run:
        working-directory: apps/web
    steps:
      - uses: actions/checkout@v6
      - uses: pnpm/action-setup@v4
      - uses: actions/setup-node@v6
        with:
          node-version: '22'
          cache: pnpm
          cache-dependency-path: pnpm-lock.yaml
      - run: pnpm install --frozen-lockfile
      - run: pnpm test

  # ──────────────────────────────────────────────
  # Build the Next.js web app
  # ──────────────────────────────────────────────
  web-build:
    name: Web — Build
    runs-on: ubuntu-latest
    needs: web-lint
    defaults:
      run:
        working-directory: apps/web
    steps:
      - uses: actions/checkout@v6
      - uses: pnpm/action-setup@v4
      - uses: actions/setup-node@v6
        with:
          node-version: '22'
          cache: pnpm
          cache-dependency-path: pnpm-lock.yaml
      - run: pnpm install --frozen-lockfile
      - run: pnpm build

  # ──────────────────────────────────────────────
  # @aisoc/mcp — MCP server for Claude/Cursor/Cody
  #   Typechecks, runs the vitest contract tests (config + tools registry +
  #   installer file-write behaviour), and builds the dist bundle so the
  #   `bin/aisoc-mcp` shebang is exercised on the same Node version we
  #   publish under. Lives in the pnpm workspace; we reuse the root
  #   lockfile to keep installs fast and reproducible.
  # ──────────────────────────────────────────────
  mcp-test:
    name: MCP — Type-check, Test & Build
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v6
      - uses: pnpm/action-setup@v4
      - uses: actions/setup-node@v6
        with:
          node-version: '22'
          cache: pnpm
          cache-dependency-path: pnpm-lock.yaml
      - name: Install workspace deps
        run: pnpm install --frozen-lockfile
      - name: Type-check
        run: pnpm --filter @aisoc/mcp typecheck
      - name: Test
        run: pnpm --filter @aisoc/mcp test
      - name: Build
        run: pnpm --filter @aisoc/mcp build

  # ──────────────────────────────────────────────
  # Python API & services
  # ──────────────────────────────────────────────
  python-lint:
    name: Python — Lint & Type-check
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v6
      - uses: actions/setup-python@v6
        with:
          python-version: '3.12'
      - name: Install Ruff & Mypy
        # Pin to versions compatible with the constraints declared in each
        # service's ``pyproject.toml`` (``ruff = "^0.4.4"``, ``mypy = "^1.10.0"``).
        # The CI gate previously left these unpinned, so every new ruff release
        # (e.g. 0.15.x adding ``UP037`` enforcement) silently broke main without
        # any dependency-file change. Dependabot bumps to the pyproject
        # constraints (PRs #186, #187) update these pins in the same diff.
        run: pip install 'ruff>=0.4.4,<0.5' 'mypy>=1.10,<2'
      # All services share the repo-root `ruff.toml`. Linting and formatting
      # cover every Python service, not just the original four — the gate
      # used to soft-fail with `|| true`; that's gone, so this is the real
      # signal.
      - name: Ruff lint (all services)
        run: ruff check services/
      - name: Ruff format check (all services)
        run: ruff format --check services/
      # The aisoc-api Docker image is built with `services/api` as its build
      # context, so anything under `services/agents` is *not* shipped in the
      # container. The natural-language query translator is consumed by both
      # the agents runtime and the API's `/nl-query/*` endpoints, so we keep
      # an in-tree vendored mirror at `services/api/app/_vendor/nl_query/`.
      # This guard fails the build the moment the two trees drift — re-run
      # `python scripts/sync_vendored_nl_query.py` locally to fix.
      - name: Verify vendored nl_query is in sync
        run: python scripts/sync_vendored_nl_query.py --check
      # Same story for the correlation-narrative builder. It lives canonically
      # in `services/fusion/app/services/narrative.py` (called at fusion time
      # to populate `FusedAlert.narrative`) and is mirrored into
      # `services/api/app/_vendor/narrative.py` so the API can lazily compute
      # the narrative on first read of an alert whose `narrative` column is
      # still NULL — without depending on `services/fusion` being inside the
      # Docker build context. Re-run `python scripts/sync_vendored_narrative.py`
      # locally if this check fails.
      - name: Verify vendored narrative builder is in sync
        run: python scripts/sync_vendored_narrative.py --check

  python-test:
    name: Python — Tests
    runs-on: ubuntu-latest
    needs: python-lint
    env:
      API_DEPS: >-
        fastapi "pydantic[email]" pydantic-settings "sqlalchemy[asyncio]" asyncpg
        structlog "python-jose[cryptography]" "passlib[bcrypt]" tenacity pyyaml
        prometheus-client "opentelemetry-sdk" "opentelemetry-api"
        "opentelemetry-exporter-otlp-proto-grpc"
        "opentelemetry-instrumentation-fastapi"
        "strawberry-graphql[fastapi]"
        neo4j redis celery httpx aiofiles email-validator PyJWT
        "sqlglot>=23,<27" aiosqlite
        pytest pytest-asyncio
    steps:
      - uses: actions/checkout@v6
      - uses: actions/setup-python@v6
        with:
          python-version: '3.12'
          cache: pip
      - name: Install API dependencies
        run: pip install --quiet ${{ env.API_DEPS }}
      # Boot-test: verify the FastAPI app can actually import without
      # NameError / ImportError before spending time on the full pytest
      # suite.  PR #152 fixed two import-time NameErrors (model_validator,
      # rule_tuning) that CI never caught because pytest mocks imports and
      # never loads the real entrypoint.  This step prevents that class of
      # regression from reaching main again.
      - name: Boot-test API service (catch import-time errors)
        working-directory: services/api
        env:
          ENVIRONMENT: development
          SECRET_KEY: ci-dummy-secret-key-at-least-32bytes!
          DATABASE_URL: postgresql+asyncpg://x:x@localhost/x
        run: |
          python -c "from app.main import app; print('api boot-test ok')"
      - name: Run API tests (unit + GraphQL schema)
        working-directory: services/api
        env:
          ENVIRONMENT: development
          SECRET_KEY: ci-dummy-secret-key-at-least-32bytes!
          DATABASE_URL: postgresql+asyncpg://x:x@localhost/x
        run: python -m pytest tests/ -v --tb=short
      - name: Run agents unit tests (Pillar-1 eval suites + audit helpers + fusion client)
        # `test_audit_helpers.py` imports `app.investigator.state` directly
        # (it stubs out the package __init__ to avoid pulling in LangGraph),
        # so it only needs stdlib + pydantic — already installed via API_DEPS.
        # `test_fusion_client.py` (Issue #190) exercises the thin httpx
        # client that wires `DetectAgent.process` to the fusion service's
        # `POST /process` endpoint; it mocks the network with respx so the
        # suite stays offline-safe. The test imports `DetectAgent` from
        # `app.agents`, which transitively loads sibling sub-agents
        # (auto_triage / phishing / identity / insider_threat / cloud) that
        # import `langchain_core` and `langchain_openai` at module scope —
        # so both packages are required at import time, even though the
        # fusion-client test itself only calls into `app.tools.fusion`.
        run: |
          pip install --quiet pytest pytest-asyncio respx langchain-core langchain-openai
          cd services/agents
          python3 -m pytest \
            tests/test_mitre_accuracy.py \
            tests/test_alert_reduction.py \
            tests/test_investigation_completeness.py \
            tests/test_response_quality.py \
            tests/test_audit_helpers.py \
            tests/test_fusion_client.py \
            -v --tb=short

  # ──────────────────────────────────────────────
  # Python micro-services unit tests
  #   `services/fusion`, `services/honeytokens`, and `services/purple-team`
  #   used to ship with no Python tests gated in CI at all — only ruff +
  #   format check. These suites cover the pure helpers in each service
  #   (alert fingerprinting/correlation keys, honeytoken generators,
  #   ATT&CK coverage matrix builder) plus the thin FastAPI router contracts
  #   that don't need Postgres / Kafka / Redis to exercise (e.g. the
  #   fusion service's ``POST /process`` endpoint added in Issue #190,
  #   which uses an injected fake worker via ``set_worker(...)``). Real
  #   signal, fast — no service boot required.
  # ──────────────────────────────────────────────
  python-services-test:
    name: Python — Service unit tests (fusion, honeytokens, purple-team)
    runs-on: ubuntu-latest
    needs: python-lint
    steps:
      - uses: actions/checkout@v6
      - uses: actions/setup-python@v6
        with:
          python-version: '3.12'
          cache: pip
      - name: Install minimal deps
        # ``fastapi`` + ``httpx`` are required by the fusion router-contract
        # tests (Issue #190): they import ``app.api.router`` and drive it
        # with FastAPI's ``ASGITransport`` + ``httpx.AsyncClient``. ``aiokafka``
        # is needed because ``app/api/router.py`` exposes ``set_worker(...)``
        # which type-references ``FusionWorker`` from ``app.workers.consumer``;
        # importing the router module therefore eagerly evaluates the
        # consumer module, which itself imports ``aiokafka`` at module scope.
        # We deliberately do NOT install the full API dep set here — the goal
        # of this job stays "pure helpers + thin router contracts, no
        # service boot, no Postgres/Kafka/Redis".
        run: pip install --quiet pydantic pydantic-settings pytest pytest-asyncio structlog redis aioredis fastapi httpx aiokafka
      - name: Run fusion unit tests
        working-directory: services/fusion
        run: python -m pytest tests/ -v --tb=short
      - name: Run honeytokens unit tests
        working-directory: services/honeytokens
        run: python -m pytest tests/ -v --tb=short
      - name: Run purple-team unit tests
        working-directory: services/purple-team
        run: python -m pytest tests/ -v --tb=short

  # ──────────────────────────────────────────────
  # SDK packages (Python)
  #   The three Python SDKs ship as pip packages but never had a CI gate —
  #   regressions could land silently. This job installs each in editable
  #   mode and runs its bundled pytest suite. They're intentionally light
  #   on dependencies (httpx, pydantic, click, pyyaml) so the job is fast.
  # ──────────────────────────────────────────────
  sdk-python-test:
    name: SDK — Python (pytest)
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v6
      - uses: actions/setup-python@v6
        with:
          python-version: '3.12'
          cache: pip
      - name: Install SDKs in editable mode (with dev extras)
        # Each `[dev]` extra pins pytest + pytest-asyncio; pytest-httpx is
        # only needed by `aisoc-sdk` (the typed client tests), so we add it
        # explicitly rather than expanding the package extras.
        run: |
          pip install --quiet \
            -e packages/plugin-sdk-py \
            -e packages/sdk-py \
            -e packages/aisoc-cli \
            pytest pytest-asyncio pytest-httpx
      - name: Run aisoc-plugin-sdk tests
        run: pytest packages/plugin-sdk-py/tests/ -v --tb=short
      - name: Run aisoc-sdk tests
        run: pytest packages/sdk-py/tests/ -v --tb=short
      - name: Run aisoc-cli tests
        run: pytest packages/aisoc-cli/tests/ -v --tb=short

  # ──────────────────────────────────────────────
  # SDK packages (TypeScript)
  #   `@aisoc/sdk` is a typed openapi-fetch client with a self-contained
  #   vitest suite (HTTP mocked, no real server). Same pnpm workspace
  #   pattern as the MCP job — install once at the root, then filter.
  # ──────────────────────────────────────────────
  sdk-ts-test:
    name: SDK — TypeScript (vitest)
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v6
      - uses: pnpm/action-setup@v4
      - uses: actions/setup-node@v6
        with:
          node-version: '22'
          cache: pnpm
          cache-dependency-path: pnpm-lock.yaml
      - name: Install workspace deps
        run: pnpm install --frozen-lockfile
      - name: Type-check
        run: pnpm --filter @aisoc/sdk typecheck
      - name: Test
        run: pnpm --filter @aisoc/sdk test
      - name: Build
        run: pnpm --filter @aisoc/sdk build

  # ──────────────────────────────────────────────
  # Go services (enrichment, ingest, demo-producer)
  # ──────────────────────────────────────────────
  go-build:
    name: Go — Build & Vet
    runs-on: ubuntu-latest
    strategy:
      matrix:
        service: [enrichment, ingest, demo-producer]
    steps:
      - uses: actions/checkout@v6
      - uses: actions/setup-go@v6
        with:
          go-version: '1.25'
          cache-dependency-path: services/${{ matrix.service }}/go.sum
      - name: Build
        run: |
          cd services/${{ matrix.service }}
          go build ./...
      - name: Vet
        run: |
          cd services/${{ matrix.service }}
          go vet ./...
      - name: Test
        run: |
          cd services/${{ matrix.service }}
          go test ./... -count=1

  # ──────────────────────────────────────────────
  # Go SDK (`packages/plugin-sdk-go`)
  #   Lives outside `services/` so the matrix above doesn't pick it up.
  #   Has `client_test.go`, `loader_test.go`, `registry_test.go` already —
  #   they were never gated. Build + vet + test, no soft-fails.
  # ──────────────────────────────────────────────
  sdk-go-test:
    name: SDK — Go (build, vet, test)
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v6
      - uses: actions/setup-go@v6
        with:
          go-version: '1.25'
          cache-dependency-path: packages/plugin-sdk-go/go.sum
      - name: Build
        run: |
          cd packages/plugin-sdk-go
          go build ./...
      - name: Vet
        run: |
          cd packages/plugin-sdk-go
          go vet ./...
      - name: Test
        run: |
          cd packages/plugin-sdk-go
          go test ./... -count=1

  # ──────────────────────────────────────────────
  # Pillar-1 Eval Gate: 4-suite eval harness (200-incident synthetic dataset)
  #   - Alert reduction ratio          (≥70%)  — REAL measurement against a
  #                                              fixed noisy alert stream
  #   - MITRE ATT&CK tactic accuracy   (≥80%)  — substrate self-consistency
  #                                              gate (extractor vs. dataset
  #                                              that's written to feed it)
  #   - Investigation completeness     (≥85%)  — substrate self-consistency
  #                                              gate (templated report vs.
  #                                              keywords from the same data)
  #   - Response-plan quality          (≥80%)  — substrate self-consistency
  #                                              gate (templated plan vs.
  #                                              rubric using the same data)
  # All suites run offline (no LLM, no DB) against the deterministic
  # `services/agents/tests/eval_data/synthetic_incidents.json` dataset.
  # This harness gates the substrate (extractors, fusion, templates, judges)
  # — it does NOT call the live LLM agent, so three of the four metrics are
  # self-consistency gates rather than agent accuracy scores. See
  # `apps/docs/docs/benchmark.md` for what each suite actually measures.
  # The job posts a markdown summary, uploads `eval_report.json`, and
  # commits historical numbers to the `eval-results` branch as
  # `eval/results/<commit_sha>.json` so the public eval harness page can
  # query them over time.
  # ──────────────────────────────────────────────
  p1-eval:
    name: P1 Eval — Pillar-1 Eval Harness Gate
    runs-on: ubuntu-latest
    permissions:
      contents: write
    steps:
      - uses: actions/checkout@v6
      - uses: actions/setup-python@v6
        with:
          python-version: '3.12'
      - name: Pull last published baseline from eval-results branch
        # Wave 2 — w2-dac. We fetch `eval/results/latest.json` from the
        # `eval-results` orphan branch (published by the publish step below
        # on main). When present, the eval runner gates a ≥1pp MITRE
        # regression vs. that baseline; missing baseline = no-op.
        if: github.event_name == 'pull_request'
        env:
          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
        run: |
          set -e
          mkdir -p /tmp/eval-baseline
          if git fetch origin eval-results:eval-results 2>/dev/null; then
            git --no-pager show eval-results:eval/results/latest.json \
              > /tmp/eval-baseline/latest.json 2>/dev/null \
              || rm -f /tmp/eval-baseline/latest.json
          fi
          if [ -f /tmp/eval-baseline/latest.json ]; then
            echo "Baseline found at /tmp/eval-baseline/latest.json"
          else
            echo "No baseline yet — first run will publish one."
          fi
      - name: Install eval dependencies
        run: pip install --quiet pyyaml pytest pytest-asyncio structlog pydantic
      - name: Run all eval suites
        # On PRs we additionally gate against the published baseline so a
        # detection-as-code merge that drops MITRE accuracy by ≥ 1pp blocks
        # CI (exit code 2). On push to main, `--baseline` is omitted so the
        # publish step below establishes a fresh baseline.
        run: |
          if [ -f /tmp/eval-baseline/latest.json ]; then
            python3 scripts/run_evals.py \
              --ci \
              --baseline /tmp/eval-baseline/latest.json \
              --max-regression-pp 1.0 \
              --out eval_report.json
          else
            python3 scripts/run_evals.py --ci --out eval_report.json
          fi
      - name: Post job summary
        if: always()
        run: |
          python3 - <<'PY' >> "$GITHUB_STEP_SUMMARY"
          import json, pathlib
          d = json.loads(pathlib.Path("eval_report.json").read_text())
          print("## AiSOC Pillar-1 Eval Harness — 200-incident synthetic dataset\n")
          print("Alert-reduction is a real measurement; the other three suites are substrate self-consistency gates.\n")
          print(f"Dataset: {d['dataset']}  ")
          print(f"Generated: {d['generated_at']}\n")
          print("| Suite | Metric | Value | Target | Result |")
          print("|---|---|---:|---:|:---:|")
          for name, suite in d["suites"].items():
              mark = "PASS" if suite["passed"] else "FAIL"
              print(
                  f"| `{name}` | {suite['metric']} | "
                  f"{suite['value']:.3f} | ≥ {suite['target']:.2f} | {mark} |"
              )
          verdict = "ALL GATES PASSED" if d["all_passed"] else "REGRESSION DETECTED"
          print(f"\n{verdict}\n")
          PY
      - name: Upload eval report
        if: always()
        uses: actions/upload-artifact@v7
        with:
          name: aisoc-eval-report
          path: eval_report.json
      - name: Publish historical numbers to eval-results branch
        if: github.event_name == 'push' && github.ref == 'refs/heads/main'
        env:
          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
          COMMIT_SHA: ${{ github.sha }}
        run: |
          set -e
          # Snapshot the report
          cp eval_report.json /tmp/eval_report.json

          git fetch origin eval-results:eval-results 2>/dev/null \
            && git checkout eval-results \
            || git checkout --orphan eval-results

          # Clear any previous workspace state on the orphan branch
          if [ ! -f .gitkeep ]; then
            find . -mindepth 1 -maxdepth 1 \
              ! -name '.git' ! -name 'eval' ! -name '.gitkeep' \
              -exec rm -rf {} + 2>/dev/null || true
          fi

          mkdir -p eval/results
          cp /tmp/eval_report.json "eval/results/${COMMIT_SHA}.json"

          # Maintain a stable `latest.json` for the eval harness page badge
          cp /tmp/eval_report.json eval/results/latest.json

          # Identity policy: every commit on this repo must be authored by
          # Beenu Arora <beenu@cyble.com>. The eval-results branch is no
          # exception — even though it is machine-written by CI, attributing
          # it to a separate "AiSOC CI" identity would re-pollute the
          # contributors graph after the 2026-05-08 history rewrite.
          git config user.email "beenu@cyble.com"
          git config user.name "Beenu Arora"
          git add eval/results
          git commit -m "eval: ${COMMIT_SHA::7} (auto)" || echo "no changes"
          git push origin eval-results || echo "push skipped"

  # ──────────────────────────────────────────────
  # Pillar-2: Playbook JSON Schema lint
  # Validates all playbook JSON files against schemas/playbook.schema.json
  # ──────────────────────────────────────────────
  p2-playbook-schema-lint:
    name: P2 — Playbook Schema Lint
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v6
      - uses: actions/setup-python@v6
        with:
          python-version: '3.12'
      - name: Install jsonschema
        run: pip install jsonschema
      - name: Lint playbook files
        run: python3 scripts/lint_playbooks.py

  # ──────────────────────────────────────────────
  # Docker Compose smoke test
  # ──────────────────────────────────────────────
  docker-compose-check:
    name: Docker Compose — Validate
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v6
      - name: Validate compose file
        run: docker compose config --quiet