import Mathlib
import Langlib.Automata.FiniteState.Equivalence.RegularDFAEquiv
import Langlib.Utilities.ClosurePredicates

/-! # Regular Closure Under Kleene Star

This file proves that the class of regular languages is closed under Kleene star
by constructing an ε-NFA from a DFA.

## Main results

- `Language.IsRegular.kstar'` — if `L` is regular, then `L∗` is regular.
-/

namespace Language

open Classical

variable {α : Type*}

section StarεNFA

variable {σ : Type*} [Fintype σ]

/-- ε-NFA for the Kleene star of a DFA.

States are `σ ⊕ Unit`. The fresh state `Sum.inr ()` is both the start and the
sole accepting state. It has an ε-transition into the DFA's start state.
Accepting states of the DFA have an ε-transition back to the fresh state. -/
noncomputable def starεNFA (M : DFA α σ) : εNFA α (σ ⊕ Unit) where
  step := fun s c =>
    match s, c with
    | Sum.inl q, some a => {Sum.inl (M.step q a)}
    | Sum.inl q, none => if q ∈ M.accept then {Sum.inr ()} else ∅
    | Sum.inr (), some _ => ∅
    | Sum.inr (), none => {Sum.inl M.start}
  start := {Sum.inr ()}
  accept := {Sum.inr ()}

variable (M : DFA α σ)

/-
ε-closure of `{Sum.inr ()}` includes `Sum.inl M.start`.
-/
private lemma εClosure_fresh :
    (starεNFA M).εClosure {Sum.inr ()} = {Sum.inr (), Sum.inl M.start} := by
      apply le_antisymm;
      · intro s hs;
        induction hs ; aesop;
        unfold starεNFA at * ; aesop;
      · intro x hx;
        induction' n : x with s t ih;
        · have h_eps : Sum.inl M.start ∈ (starεNFA M).εClosure {Sum.inr ()} := by
            apply εNFA.εClosure.step;
            rotate_right;
            exact Sum.inr ();
            · exact Set.mem_singleton _;
            · exact εNFA.εClosure.base _ ( by simp +decide );
          aesop;
        · constructor;
          aesop

/-
ε-closure of `{Sum.inl q}` when `q ∉ M.accept`.
-/
private lemma εClosure_inl_not_accept (q : σ) (hq : q ∉ M.accept) :
    (starεNFA M).εClosure {Sum.inl q} = {Sum.inl q} := by
      ext; simp [starεNFA, hq];
      constructor <;> intro h;
      · induction h;
        · aesop;
        · aesop;
      · constructor;
        aesop

/-
ε-closure of `{Sum.inl q}` when `q ∈ M.accept`.
-/
private lemma εClosure_inl_accept (q : σ) (hq : q ∈ M.accept) :
    (starεNFA M).εClosure {Sum.inl q} = {Sum.inl q, Sum.inr (), Sum.inl M.start} := by
      apply Set.Subset.antisymm;
      · intro x hx; induction hx; aesop;
        unfold starεNFA at *; aesop;
      · simp +decide [ Set.subset_def, starεNFA ];
        refine' ⟨ _, _, _ ⟩;
        · exact Set.mem_setOf.mpr ( by tauto );
        · refine' εNFA.εClosure.step _ _ _ _;
          exact Sum.inl q;
          · grind;
          · exact εNFA.εClosure.base _ rfl;
        · refine' εNFA.εClosure.step _ _ _ _;
          exact Sum.inr ();
          · exact Set.mem_singleton _;
          · refine' εNFA.εClosure.step _ _ _ _;
            exact Sum.inl q;
            · grind;
            · exact?

/-
`Sum.inl (M.evalFrom M.start w)` is always reachable from `{Sum.inl M.start}`.
-/
private lemma evalFrom_inl_contains (w : List α) :
    Sum.inl (M.evalFrom M.start w) ∈ (starεNFA M).evalFrom {Sum.inl M.start} w := by
      induction' w using List.reverseRecOn with w a ih <;> simp +decide [ *, List.foldl_append ];
      · exact εNFA.εClosure.base _ ( by simp +decide );
      · refine' Or.inl ⟨ _, ih, _ ⟩;
        exact εNFA.εClosure.base _ ( by simp +decide [ starεNFA ] )

/-
evalFrom distributes over append for the star εNFA.
-/
private lemma evalFrom_append (S : Set (σ ⊕ Unit)) (u v : List α) :
    (starεNFA M).evalFrom S (u ++ v) =
    (starεNFA M).evalFrom ((starεNFA M).evalFrom S u) v := by
      unfold εNFA.evalFrom;
      induction' u using List.reverseRecOn with u ih;
      · induction' v using List.reverseRecOn with v ih <;> simp_all +decide [ List.foldl_append ];
        refine' le_antisymm _ _;
        · exact Set.mem_setOf_eq.mpr ( by tauto );
        · intro x hx;
          induction' hx with x hx ih;
          · exact hx;
          · rename_i h₁ h₂ h₃;
            refine' εNFA.εClosure.step _ _ _ _;
            exacts [ ih, h₁, h₃ ];
      · simp_all +decide [ List.foldl_append, εNFA.stepSet ];
        have h_eps_closure_union : ∀ (S : Set (σ ⊕ Unit)), (starεNFA M).εClosure (⋃ s ∈ S, (starεNFA M).εClosure ((starεNFA M).step s (some ih))) = ⋃ s ∈ S, (starεNFA M).εClosure ((starεNFA M).step s (some ih)) := by
          intro S
          apply le_antisymm;
          · intro x hx;
            induction' hx with x hx ih;
            · exact hx;
            · rename_i h₁ h₂ h₃;
              cases ih <;> cases ‹σ ⊕ Unit› <;> simp_all +decide [ starεNFA ];
              · obtain ⟨ a, ha₁, ha₂ ⟩ := h₃;
                refine' ⟨ a, ha₁, _ ⟩;
                exact εNFA.εClosure.step _ _ ( by simp +decide [ h₁ ] ) ha₂;
              · obtain ⟨ a, ha₁, ha₂ ⟩ := h₃;
                exact ⟨ a, ha₁, by
                  exact εNFA.εClosure.step _ _ ( by simp +decide [ starεNFA ] ) ha₂ ⟩;
          · exact fun x hx => by rcases Set.mem_iUnion₂.1 hx with ⟨ s, hs, hx ⟩ ; exact Set.mem_of_mem_of_subset hx ( Set.Subset.trans ( by aesop_cat ) ( εNFA.subset_εClosure _ _ ) ) ;
        rw [ h_eps_closure_union ]

/-
evalFrom is monotone in the start set.
-/
private lemma evalFrom_mono (S T : Set (σ ⊕ Unit)) (w : List α) (h : S ⊆ T) :
    (starεNFA M).evalFrom S w ⊆ (starεNFA M).evalFrom T w := by
      induction' w using List.reverseRecOn with w ih generalizing S T <;> simp_all +decide [ εNFA.evalFrom ];
      · intro x hx;
        induction' hx with x hx ih;
        · exact Set.mem_setOf_eq.mpr ( by tauto );
        · apply_rules [ εNFA.εClosure.step ];
      · rename_i h';
        exact Set.biUnion_mono ( h' S T h ) fun _ _ => by tauto;

/-
If `q ∈ M.accept` and `Sum.inl q` is reachable from S, then
`Sum.inr ()` is also reachable from S.
-/
private lemma fresh_reachable_of_accept (S : Set (σ ⊕ Unit)) (q : σ) (w : List α)
    (hq : q ∈ M.accept)
    (h : Sum.inl q ∈ (starεNFA M).evalFrom S w) :
    Sum.inr () ∈ (starεNFA M).evalFrom S w := by
      have h_closure : Sum.inr () ∈ (starεNFA M).εClosure {Sum.inl q} := by
        exact εClosure_inl_accept M q hq ▸ Set.mem_insert_of_mem _ ( Set.mem_insert _ _ );
      induction' w using List.reverseRecOn with w ih generalizing S <;> simp_all +decide [ εNFA.stepSet ];
      · have h_closure : ∀ s, s ∈ (starεNFA M).εClosure {Sum.inl q} → s ∈ (starεNFA M).εClosure S := by
          intro s hs
          induction' hs with s hs ih;
          · aesop;
          · exact εNFA.εClosure.step _ _ ‹_› ‹_›;
        exact h_closure _ ‹_›;
      · rcases h with ( ⟨ a, ha, ha' ⟩ | ⟨ b, hb, hb' ⟩ );
        · refine' Or.inl ⟨ a, ha, _ ⟩;
          have h_closure : (starεNFA M).εClosure {Sum.inl q} ⊆ (starεNFA M).εClosure ((starεNFA M).step (Sum.inl a) (some ih)) := by
            intro x hx;
            induction' hx with x hx ih;
            · grind;
            · apply_rules [ εNFA.εClosure.step ];
          exact h_closure ‹_›;
        · cases b ; simp_all +decide [ starεNFA ]

/-
stepSet distributes over union.
-/
private lemma stepSet_union (S T : Set (σ ⊕ Unit)) (a : α) :
    (starεNFA M).stepSet (S ∪ T) a = (starεNFA M).stepSet S a ∪ (starεNFA M).stepSet T a := by
  simp only [εNFA.stepSet]
  rw [Set.biUnion_union]

/-
foldl stepSet distributes over union.
-/
private lemma foldl_stepSet_union (S T : Set (σ ⊕ Unit)) (w : List α) :
    List.foldl (starεNFA M).stepSet (S ∪ T) w =
    List.foldl (starεNFA M).stepSet S w ∪ List.foldl (starεNFA M).stepSet T w := by
  induction w generalizing S T with
  | nil => simp
  | cons a w ih =>
    simp only [List.foldl_cons]
    rw [stepSet_union, ih]

/-
foldl stepSet is monotone in the start set.
-/
private lemma foldl_stepSet_mono (S T : Set (σ ⊕ Unit)) (w : List α) (h : S ⊆ T) :
    List.foldl (starεNFA M).stepSet S w ⊆ List.foldl (starεNFA M).stepSet T w := by
  induction w generalizing S T with
  | nil => exact h
  | cons a w ih =>
    simp only [List.foldl_cons]
    apply ih
    exact Set.biUnion_mono h fun _ _ => le_refl _

/-
If q ∉ M.accept, evalFrom {Sum.inl q} (a :: w) = evalFrom {Sum.inl (M.step q a)} w.
-/
private lemma evalFrom_inl_cons_not_accept (q : σ) (hq : q ∉ M.accept) (a : α) (w : List α) :
    (starεNFA M).evalFrom {Sum.inl q} (a :: w) =
    (starεNFA M).evalFrom {Sum.inl (M.step q a)} w := by
      unfold εNFA.evalFrom;
      -- Since q ∉ M.accept, the ε-closure of {Sum.inl q} is {Sum.inl q} itself.
      have h_εClosure : (starεNFA M).εClosure {Sum.inl q} = {Sum.inl q} := by
        exact?;
      simp +decide [ h_εClosure, εNFA.stepSet ];
      congr

/-
If q ∈ M.accept, evalFrom {Sum.inl q} (a :: w) =
  evalFrom {Sum.inl (M.step q a)} w ∪ evalFrom {Sum.inl (M.step M.start a)} w.
-/
private lemma evalFrom_inl_cons_accept (q : σ) (hq : q ∈ M.accept) (a : α) (w : List α) :
    (starεNFA M).evalFrom {Sum.inl q} (a :: w) =
    (starεNFA M).evalFrom {Sum.inl (M.step q a)} w ∪
    (starεNFA M).evalFrom {Sum.inl (M.step M.start a)} w := by
      simp +decide [ εNFA.evalFrom, εNFA.stepSet ];
      rw [ εClosure_inl_accept M q hq ];
      -- Apply the foldl_stepSet_union lemma to split the foldl into the union of two foldls, each starting from a different ε-closure.
      have h_foldl_union : ∀ (S T : Set (σ ⊕ Unit)) (w : List α), List.foldl (starεNFA M).stepSet (S ∪ T) w = List.foldl (starεNFA M).stepSet S w ∪ List.foldl (starεNFA M).stepSet T w := by
        exact?;
      rw [ ← h_foldl_union ] ; congr ; ext ; simp +decide [ starεNFA ] ;

/-
evalFrom {Sum.inr ()} (a :: w) = evalFrom {Sum.inl (M.step M.start a)} w.
-/
private lemma evalFrom_fresh_cons (a : α) (w : List α) :
    (starεNFA M).evalFrom {Sum.inr ()} (a :: w) =
    (starεNFA M).evalFrom {Sum.inl (M.step M.start a)} w := by
      unfold εNFA.evalFrom;
      simp +decide [ εClosure_fresh, εNFA.stepSet ];
      unfold starεNFA at *; aesop;

/-
Backward: `KStar.kstar M.accepts ⊆ (starεNFA M).accepts`.
-/
private lemma star_backward {w : List α}
    (hw : w ∈ KStar.kstar M.accepts) :
    w ∈ (starεNFA M).accepts := by
      unfold KStar.kstar at hw
      generalize_proofs at *;
      have h_ind : ∀ L : List (List α), (∀ y ∈ L, y ∈ M.accepts) → Sum.inr () ∈ (starεNFA M).evalFrom {Sum.inr ()} L.flatten := by
        intro L hL; induction' L with y L ih <;> simp_all +decide [ Set.subset_def ] ;
        · exact Set.mem_setOf.mpr ( by tauto );
        · have h_eval_y : Sum.inl (M.evalFrom M.start y) ∈ (starεNFA M).evalFrom {Sum.inr ()} y := by
            have h_eval_y : Sum.inl (M.evalFrom M.start y) ∈ (starεNFA M).evalFrom {Sum.inl M.start} y := by
              grind +suggestions
            generalize_proofs at *; (
            have h_eval_y : {Sum.inl M.start} ⊆ (starεNFA M).evalFrom {Sum.inr ()} [] := by
              simp +decide [ εClosure_fresh ]
            generalize_proofs at *; (
            have h_eval_y : (starεNFA M).evalFrom {Sum.inl M.start} y ⊆ (starεNFA M).evalFrom ((starεNFA M).evalFrom {Sum.inr ()} []) y := by
              exact?
            generalize_proofs at *; (
            convert h_eval_y ‹_› using 1
            generalize_proofs at *; (
            simp +decide [ εNFA.evalFrom ];
            rw [ εClosure_fresh ];
            rw [ show ( starεNFA M ).εClosure { Sum.inr (), Sum.inl M.start } = { Sum.inr (), Sum.inl M.start } from ?_ ];
            refine' Set.Subset.antisymm _ _ <;> simp +decide [ Set.subset_def, εNFA.εClosure ];
            · intro a ha; contrapose! ha; simp_all +decide [ εNFA.εClosure ] ;
              intro h; have := h; simp_all +decide [ εNFA.εClosure ] ;
              have h_eval_y : ∀ s ∈ (starεNFA M).εClosure {Sum.inr (), Sum.inl M.start}, s = Sum.inr () ∨ s = Sum.inl M.start := by
                intro s hs; induction hs; aesop;
                unfold starεNFA at *; aesop;
              generalize_proofs at *; (
              grind);
            · exact ⟨ by exact Set.mem_of_mem_of_subset ( Set.mem_insert _ _ ) ( εNFA.subset_εClosure _ _ ), by exact Set.mem_of_mem_of_subset ( Set.mem_insert_of_mem _ ( Set.mem_singleton _ ) ) ( εNFA.subset_εClosure _ _ ) ⟩))))
          generalize_proofs at *; (
          have h_eval_y : Sum.inr () ∈ (starεNFA M).evalFrom {Sum.inr ()} y := by
            apply_rules [ fresh_reachable_of_accept ] ; aesop;
          generalize_proofs at *; (
          exact Set.mem_of_mem_of_subset ih ( evalFrom_mono _ _ _ _ ( Set.singleton_subset_iff.mpr h_eval_y ) ) |> fun h => by simpa [ evalFrom_append ] using h;));
      unfold εNFA.accepts;
      obtain ⟨ L, rfl, hL ⟩ := hw;
      use Sum.inr ();
      aesop

/-
If `Sum.inr ()` is reachable from `{Sum.inl q}` after processing `w`,
then some prefix of `w` takes `M` from `q` to an accepting state.
-/
private lemma inl_to_fresh_split (q : σ) (w : List α)
    (h : Sum.inr () ∈ (starεNFA M).evalFrom {Sum.inl q} w) :
    ∃ u v, w = u ++ v ∧ M.evalFrom q u ∈ M.accept ∧
      Sum.inr () ∈ (starεNFA M).evalFrom {Sum.inr ()} v := by
        induction' w with a w ih generalizing q;
        · by_cases hq : q ∈ M.accept <;> simp_all +decide [ εClosure_fresh, εClosure_inl_not_accept, εClosure_inl_accept ];
        · by_cases hq : q ∈ M.accept;
          · rw [ evalFrom_inl_cons_accept ] at h;
            · rcases h with ( h | h );
              · rcases ih _ h with ⟨ u, v, rfl, hu, hv ⟩ ; exact ⟨ a :: u, v, rfl, by simpa [ DFA.evalFrom ] using hu, hv ⟩;
              · obtain ⟨ u, v, rfl, hu, hv ⟩ := ih _ h;
                refine' ⟨ [ ], a :: ( u ++ v ), _, _, _ ⟩ <;> simp_all +decide [ DFA.evalFrom ];
                rw [ evalFrom_fresh_cons ] ; aesop;
            · exact hq;
          · -- By definition of `evalFrom`, we have:
            have h_eval : (starεNFA M).evalFrom {Sum.inl q} (a :: w) = (starεNFA M).evalFrom {Sum.inl (M.step q a)} w := by
              exact?;
            rcases ih _ ( h_eval ▸ h ) with ⟨ u, v, rfl, hu, hv ⟩ ; exact ⟨ a :: u, v, by simp +decide, by simpa [ DFA.evalFrom ] using hu, hv ⟩

/-
Forward: `(starεNFA M).accepts ⊆ KStar.kstar M.accepts`.
-/
private lemma star_forward {w : List α}
    (hw : w ∈ (starεNFA M).accepts) :
    w ∈ KStar.kstar M.accepts := by
      -- By induction on the length of w, we can show that if Sum.inr () is in the evaluation from {Sum.inr ()} of w, then w is in the Kleene star of M's accepts.
      induction' n : w.length using Nat.strong_induction_on with n ih generalizing w;
      rcases w with ( _ | ⟨ a, w ⟩ ) <;> simp_all +decide;
      · exact ⟨ [ ], rfl, by simp +decide ⟩;
      · -- By `inl_to_fresh_split` on `(M.step M.start a, w)`, we get `∃ u v, w = u ++ v ∧ M.evalFrom (M.step M.start a) u ∈ M.accept ∧ Sum.inr () ∈ (starεNFA M).evalFrom {Sum.inr ()} v`.
        obtain ⟨u, v, hw_eq, hu_accept, hv_accept⟩ : ∃ u v, w = u ++ v ∧ M.evalFrom (M.step M.start a) u ∈ M.accept ∧ Sum.inr () ∈ (starεNFA M).evalFrom {Sum.inr ()} v := by
          apply inl_to_fresh_split;
          have h_eval : Sum.inr () ∈ (starεNFA M).evalFrom {Sum.inr ()} (a :: w) := by
            exact?;
          rw [ evalFrom_fresh_cons ] at h_eval ; aesop;
        -- Let p = a :: u. Then:
        set p : List α := a :: u
        have hp_accept : p ∈ M.accepts := by
          convert hu_accept using 1
        have hw_eq_p_v : a :: w = p ++ v := by
          aesop
        have hv_accept : v ∈ (starεNFA M).accepts := by
          exact ⟨ _, rfl, hv_accept ⟩
        have hv_kstar : v ∈ KStar.kstar M.accepts := by
          grind
        have hw_kstar : a :: w ∈ KStar.kstar M.accepts := by
          obtain ⟨ L, hL₁, hL₂ ⟩ := hv_kstar;
          use p :: L;
          grind
        exact hw_kstar

/-- The Kleene star ε-NFA accepts exactly the Kleene star of the DFA language. -/
theorem starεNFA_correct :
    (starεNFA M).accepts = KStar.kstar M.accepts :=
  Set.eq_of_subset_of_subset
    (fun _ hw => star_forward M hw)
    (fun _ hw => star_backward M hw)

end StarεNFA

/-- Regular languages are closed under Kleene star. -/
theorem IsRegular.kstar' {L : Language α}
    (hL : L.IsRegular) :
    (KStar.kstar L).IsRegular := by
  obtain ⟨σ, _, M, rfl⟩ := hL
  rw [← starεNFA_correct M]
  refine ⟨_, inferInstance, (starεNFA M).toNFA.toDFA, ?_⟩
  rw [NFA.toDFA_correct, εNFA.toNFA_correct]

end Language

/-- The class of regular languages is closed under Kleene star. -/
theorem RG_closedUnderKleeneStar [Fintype α] : ClosedUnderKleeneStar (α := α) is_RG := by
  intro L hL
  exact is_RG_of_isRegular (isRegular_of_is_RG hL |>.kstar')