{
  "benchmark": "ImplicitMemBench",
  "status": "candidate_public_claim",
  "dataset": {
    "source": null,
    "license": null,
    "vendored": false
  },
  "run": {
    "commit": null,
    "packageVersion": null,
    "command": "phase-61 live full-300 (run-phase61-full300-20260505T170001Z)",
    "executionFailures": 0
  },
  "model": {
    "answerModel": "gpt-5.5",
    "judgeModel": "gpt-5.5",
    "sameModelJudge": true
  },
  "metrics": {
    "primary": "overall score (Full-300)",
    "score": 0.7109,
    "baseline": 0.4267
  },
  "coverage": {
    "complete": true,
    "note": "Full-300; scorer families priming_pair_judge / text_behavior_judge (judge-scored) + structured_first_action (deterministic)"
  },
  "claimBoundary": {
    "publicClaimAllowed": false,
    "reason": "Blocked: most scorer families are LLM-judge-based with a same-model judge (gpt-5.5 answer + gpt-5.5 judge); dataset source/license unverified; exact commit and v0.3 package version unpinned. Needs an independent judge and verified provenance before a public claim."
  }
}