{
  "title": "L55 CoT-Integrity probe is template-locked epiphenomenal (Qwen3.6-27B)",
  "author": "caiovicentino",
  "type": "adversarial-finding",
  "license": "apache-2.0",
  "model_id": "Qwen/Qwen3.6-27B-Instruct",
  "claim": "N=240. AUROC 0.91. Bidirectional steering up to α=+200 (>‖residual‖) produces ZERO behavioral change for probe AND random direction. Mechanism: enable_thinking=False chat template injects <think></think> in input tokens — thinking decision is not in residual stream.",
  "numbers": {
    "auroc": 0.91,
    "n_samples": 240,
    "layer": 55,
    "position": "mid_think",
    "alpha_tested_max": 200,
    "behavioral_change_rate": 0.0,
    "verdict_class": "epiphenomenal-template"
  },
  "artifacts": [
    "phase8_results.json",
    "phase8_redux_top5.json"
  ],
  "methodology_check": {
    "verdict": "epiphenomenal-template",
    "baselines_run": [
      "random_direction_random_acts",
      "random_direction_real_acts"
    ],
    "control_token_normalization": true,
    "structural_rigidity_sweep": true,
    "amplitude_tested_x_norm": 2.0
  },
  "reproduces": null,
  "schema_version": 1,
  "created_at": "2026-05-11T01:02:45Z",
  "manifest_sha256": "a0c01e67c97d6d1beed9539b9259d774c6d67cd8bd7f9dbfafb552572fb48663",
  "hf_repo_id": null,
  "hf_url": null,
  "doi": null,
  "paper_url": "https://openinterp.org/research/papers/two-forms-epiphenomenal-probes"
}