import Specimen.DeriveConstrainedProducer
import Specimen.DeriveChecker
import SpecimenTest.CommonDefinitions.BinaryTree
import SpecimenTest.DeriveDecOpt.DeriveBSTChecker
import SpecimenTest.DeriveArbitrarySuchThat.DeriveBSTGenerator
import SpecimenTest.DeriveArbitrarySuchThat.DeriveSTLCGenerator
import SpecimenTest.DeriveArbitrarySuchThat.DeriveRegExpMatchGenerator

/-!
# Schedule Quality Regression Tests

Snapshots the kernel-elaborated code for derived generators via `#print`.
Any change to the schedule derivation algorithm that alters the generated
code will cause these `#guard_msgs` checks to fail, catching regressions.

Sections 1–4 snapshot representative generators (Between, BST, STLC, RegExp).
Section 5 is a direct regression test for the bug where relations with `≠`
hypotheses alongside recursive hypotheses produced bad schedules.
-/

open Plausible

set_option guard_msgs.diff true
set_option linter.unusedVariables false
set_option match.ignoreUnusedAlts true

-- ============================================================
-- Section 1: Between generator
-- ============================================================

-- Sections 1-4 previously snapshotted the aux_arb internal functions.
-- With derive_mutual, instances use global defs instead. Verify instances exist:
#check (inferInstance : ArbitrarySizedSuchThat Nat (fun x => Between 0 x 10))
#check (inferInstance : ArbitrarySizedSuchThat BinaryTree (fun t => BST 0 10 t))
#check (inferInstance : ArbitrarySizedSuchThat term (fun e => typing [] e .Nat))
#check (inferInstance : ArbitrarySizedSuchThat (List Nat) (fun s => ExpMatch s (.Char 1)))

-- ============================================================
-- Section 5: ≠ + recursion bug regression
-- ============================================================

/-- List membership with an inequality guard — the minimal trigger for the bug.
    Before the fix, derive_generator failed with a DecOpt synthesis error. -/
inductive MemNat : List Nat → Nat → Prop where
| here  : ∀ n xs, MemNat (n :: xs) n
| there : ∀ n m xs, n ≠ m → MemNat xs n → MemNat (m :: xs) n

#guard_msgs(drop info) in
derive_checker (fun xs n => MemNat xs n)

#guard_msgs(drop info) in
derive_generator (fun n => ∃ xs, MemNat xs n)

/--
info: def instArbitrarySizedSuchThatListNatMemNat.aux_arb : Nat → Nat → Nat → Nat → Gen (List Nat) :=
fun fuel initSize size n_1 =>
  Nat.brecOn (motive := fun fuel => Nat → Gen (List Nat)) fuel
    (instArbitrarySizedSuchThatListNatMemNat.aux_arb._f initSize n_1) size
-/
#guard_msgs in
#print instArbitrarySizedSuchThatListNatMemNat.aux_arb

-- ============================================================
-- Section 6: Generator quality sampling
-- ============================================================

/-! Sampling-based quality metrics for derived generators.
    Measures success rate (non-discards), distinct outputs, and size
    distribution to catch regressions in generation quality.
    Thresholds are set conservatively — a regression should trip these
    without false-positiving on random variance. -/

section GeneratorQuality

private def treeDepth : BinaryTree → Nat
  | .Leaf => 0
  | .Node _ l r => 1 + max (treeDepth l) (treeDepth r)

structure QualityStats where
  trials : Nat
  successes : Nat
  uniques : Nat
  maxDepth : Nat
  depthBuckets : Array Nat  -- [0-1, 2-3, 4-5, 6+]
  elapsedMs : Nat := 0
  deriving Repr

private def emptyStats (n : Nat) : QualityStats :=
  { trials := n, successes := 0, uniques := 0, maxDepth := 0, depthBuckets := #[0, 0, 0, 0] }

private def depthBucket (d : Nat) : Nat :=
  if d ≤ 1 then 0 else if d ≤ 3 then 1 else if d ≤ 5 then 2 else 3

/-- Sample a generator `trials` times, measuring success rate, unique count,
    depth distribution, and wall-clock time. -/
private def sampleQuality (gen : Nat → Gen α) (depth : α → Nat) (eq : α → α → Bool)
    (trials : Nat := 200) (size : Nat := 3) : IO QualityStats := do
  let startNs ← IO.monoNanosNow
  let mut stats := emptyStats trials
  let mut seen : Array α := #[]
  for i in [:trials] do
    try
      let v ← Gen.run (gen size) (size + i % 4)
      stats := { stats with successes := stats.successes + 1 }
      let d := depth v
      stats := { stats with maxDepth := max stats.maxDepth d }
      let bucket := depthBucket d
      stats := { stats with depthBuckets := stats.depthBuckets.modify bucket (· + 1) }
      if !seen.any (eq v ·) then
        seen := seen.push v
    catch _ => pure ()
  let elapsedNs ← IO.monoNanosNow
  stats := { stats with uniques := seen.size, elapsedMs := (elapsedNs - startNs) / 1000000 }
  return stats

private def ppStats (name : String) (s : QualityStats) : String :=
  let rate := (s.successes.toFloat / s.trials.toFloat * 100.0).round.toUInt32.toNat
  let bucketStr := s!"{s.depthBuckets[0]!}/{s.depthBuckets[1]!}/{s.depthBuckets[2]!}/{s.depthBuckets[3]!}"
  s!"{name}: {rate}% success, {s.uniques} uniques, maxDepth={s.maxDepth}, buckets=[{bucketStr}], {s.elapsedMs}ms"

/-- Quality assertions: generators should meet minimum quality bars. -/
private def assertQuality (name : String) (s : QualityStats)
    (minSuccessRate : Float := 0.5) (minUniques : Nat := 10) : IO Unit := do
  let rate := s.successes.toFloat / s.trials.toFloat
  if rate < minSuccessRate then
    throw <| IO.userError s!"{name}: success rate {rate} below threshold {minSuccessRate}"
  if s.uniques < minUniques then
    throw <| IO.userError s!"{name}: only {s.uniques} uniques, expected at least {minUniques}"

end GeneratorQuality

-- ============================================================
-- Section 7: Scoring strategy comparison
-- ============================================================

/-! Derives the same relation under each scoring strategy and compares
    sampling quality. This catches cases where a strategy change causes
    a previously-good generator to degrade. -/

section StrategyComparison

-- Compare scoring strategies on the BST generator (deps already derived above).
-- Since derive_mutual registers global instances, we can't derive three different
-- instances for the same relation. Instead, we derive once per strategy using
-- distinct wrapper relations that are isomorphic but have separate instances.

inductive BST_Default : Nat → Nat → BinaryTree → Prop
  | bstLeaf : BST_Default lo hi .Leaf
  | bstNode : ∀ x l r lo hi,
      Between lo x hi → BST_Default lo x l → BST_Default x hi r →
      BST_Default lo hi (.Node x l r)

inductive BST_WorstLeaf : Nat → Nat → BinaryTree → Prop
  | bstLeaf : BST_WorstLeaf lo hi .Leaf
  | bstNode : ∀ x l r lo hi,
      Between lo x hi → BST_WorstLeaf lo x l → BST_WorstLeaf x hi r →
      BST_WorstLeaf lo hi (.Node x l r)

inductive BST_Density : Nat → Nat → BinaryTree → Prop
  | bstLeaf : BST_Density lo hi .Leaf
  | bstNode : ∀ x l r lo hi,
      Between lo x hi → BST_Density lo x l → BST_Density x hi r →
      BST_Density lo hi (.Node x l r)

set_option specimen.autoDeriveDeps true
set_option specimen.multiOutput true

set_option specimen.scoreType "Scoring.DefaultScore" in
#guard_msgs(drop info) in
derive_mutual (fun lo hi => ∃ t, BST_Default lo hi t)

set_option specimen.scoreType "Scoring.WorstLeafScore" in
#guard_msgs(drop info) in
derive_mutual (fun lo hi => ∃ t, BST_WorstLeaf lo hi t)

set_option specimen.scoreType "Scoring.DensityScore" in
#guard_msgs(drop info) in
derive_mutual (fun lo hi => ∃ t, BST_Density lo hi t)

#guard_msgs(drop info) in
#eval do
  let sample (gen : Nat → Gen BinaryTree) (name : String) : IO Unit := do
    let stats ← sampleQuality gen treeDepth
      (fun a b => toString (repr a) == toString (repr b)) 200 8
    IO.println (ppStats name stats)
    assertQuality name stats (minSuccessRate := 0.5) (minUniques := 10)

  IO.println "=== Strategy comparison: BST [0,10] generator ==="
  let instD : ArbitrarySizedSuchThat BinaryTree (fun t => BST_Default 0 10 t) := inferInstance
  sample instD.arbitrarySizedST "  DefaultScore "
  let instW : ArbitrarySizedSuchThat BinaryTree (fun t => BST_WorstLeaf 0 10 t) := inferInstance
  sample instW.arbitrarySizedST "  WorstLeafScore"
  let instDn : ArbitrarySizedSuchThat BinaryTree (fun t => BST_Density 0 10 t) := inferInstance
  sample instDn.arbitrarySizedST "  DensityScore  "

end StrategyComparison