{ "title": "L55 CoT-Integrity probe is template-locked epiphenomenal (Qwen3.6-27B)", "author": "caiovicentino", "type": "adversarial-finding", "license": "apache-2.0", "model_id": "Qwen/Qwen3.6-27B-Instruct", "claim": "N=240. AUROC 0.91. Bidirectional steering up to α=+200 (>‖residual‖) produces ZERO behavioral change for probe AND random direction. Mechanism: enable_thinking=False chat template injects in input tokens — thinking decision is not in residual stream.", "numbers": { "auroc": 0.91, "n_samples": 240, "layer": 55, "position": "mid_think", "alpha_tested_max": 200, "behavioral_change_rate": 0.0, "verdict_class": "epiphenomenal-template" }, "artifacts": [ "phase8_results.json", "phase8_redux_top5.json" ], "methodology_check": { "verdict": "epiphenomenal-template", "baselines_run": [ "random_direction_random_acts", "random_direction_real_acts" ], "control_token_normalization": true, "structural_rigidity_sweep": true, "amplitude_tested_x_norm": 2.0 }, "reproduces": null, "schema_version": 1, "created_at": "2026-05-11T01:02:45Z", "manifest_sha256": "a0c01e67c97d6d1beed9539b9259d774c6d67cd8bd7f9dbfafb552572fb48663", "hf_repo_id": null, "hf_url": null, "doi": null, "paper_url": "https://openinterp.org/research/papers/two-forms-epiphenomenal-probes" }