import Mathlib import Langlib.Automata.FiniteState.Equivalence.RegularDFAEquiv import Langlib.Utilities.ClosurePredicates /-! # Regular Closure Under Concatenation This file proves that the class of regular languages is closed under concatenation by constructing an ε-NFA from two DFAs. ## Main results - `Language.IsRegular.mul'` — if `L₁` and `L₂` are regular, then `L₁ * L₂` is regular. -/ namespace Language open Classical variable {α : Type*} section ConcatεNFA variable {σ₁ σ₂ : Type*} [Fintype σ₁] [Fintype σ₂] /-- ε-NFA for the concatenation of two DFAs. States are `σ₁ ⊕ σ₂`. On the left side we simulate `M₁`; on the right, `M₂`. Accepting states of `M₁` get an ε-transition to `M₂`'s start state. -/ noncomputable def concatεNFA (M₁ : DFA α σ₁) (M₂ : DFA α σ₂) : εNFA α (σ₁ ⊕ σ₂) where step := fun s c => match s, c with | Sum.inl q, some a => {Sum.inl (M₁.step q a)} | Sum.inl q, none => if q ∈ M₁.accept then {Sum.inr M₂.start} else ∅ | Sum.inr q, some a => {Sum.inr (M₂.step q a)} | Sum.inr _, none => ∅ start := {Sum.inl M₁.start} accept := Sum.inr '' M₂.accept variable (M₁ : DFA α σ₁) (M₂ : DFA α σ₂) /- ε-closure of a right-side singleton: no ε-transitions from `Sum.inr`. -/ private lemma εClosure_inr (q : σ₂) : (concatεNFA M₁ M₂).εClosure {Sum.inr q} = {Sum.inr q} := by refine' Set.Subset.antisymm _ _; · intro s hs; induction hs; aesop; unfold concatεNFA at * ; aesop; · exact Set.singleton_subset_iff.mpr ( εNFA.εClosure.base _ ( by simp +decide ) ) /- Processing a word from a single `Sum.inr` state follows `M₂` exactly. -/ private lemma evalFrom_inr (q : σ₂) (w : List α) : (concatεNFA M₁ M₂).evalFrom {Sum.inr q} w = {Sum.inr (M₂.evalFrom q w)} := by induction' w using List.reverseRecOn with w a ih; · convert εClosure_inr M₁ M₂ q; · simp_all +decide [ List.foldl_append, εNFA.stepSet ]; convert εClosure_inr M₁ M₂ ( M₂.step ( M₂.evalFrom q w ) a ) using 1 /- ε-closure of a left-side singleton when the state is NOT accepting in `M₁`. -/ private lemma εClosure_inl_not_accept (q : σ₁) (hq : q ∉ M₁.accept) : (concatεNFA M₁ M₂).εClosure {Sum.inl q} = {Sum.inl q} := by -- Since $q \notin M₁.accept$, there are no ε-transitions from $Sum.inl q$ to $Sum.inr M₂.start$. have h_no_ε_transitions : ∀ s, s ∈ (concatεNFA M₁ M₂).εClosure {Sum.inl q} → s = Sum.inl q := by intro s hs; induction hs; · aesop; · unfold concatεNFA at *; aesop; exact Set.eq_singleton_iff_unique_mem.mpr ⟨ by tauto, h_no_ε_transitions ⟩ /- ε-closure of a left-side singleton when the state IS accepting in `M₁`. -/ private lemma εClosure_inl_accept (q : σ₁) (hq : q ∈ M₁.accept) : (concatεNFA M₁ M₂).εClosure {Sum.inl q} = {Sum.inl q, Sum.inr M₂.start} := by refine' le_antisymm _ _; · intro x hx; induction' hx with x y hxy ih; · grind; · cases hxy <;> simp_all +decide [ concatεNFA ]; · intro x hx; induction x <;> simp_all +decide [ εNFA.εClosure ]; · -- Since the ε-closure of a singleton set is the set itself, we have: apply εNFA.εClosure.base; simp [hx]; · apply εNFA.εClosure.step; any_goals exact Sum.inl q; · unfold concatεNFA; aesop; · apply εNFA.εClosure.base; simp +decide /- The set of states reachable after processing `w` from `Sum.inl q` always includes `Sum.inl (M₁.evalFrom q w)`. It may also include `Sum.inr` states from ε-transitions at intermediate accepting states of `M₁`. -/ private lemma evalFrom_inl_contains (q : σ₁) (w : List α) : Sum.inl (M₁.evalFrom q w) ∈ (concatεNFA M₁ M₂).evalFrom {Sum.inl q} w := by induction' w using List.reverseRecOn with w ih generalizing q; · exact εNFA.εClosure.base _ ( by simp +decide ); · simp_all +decide [ εNFA.evalFrom ]; refine' Or.inl ⟨ _, ‹∀ q : σ₁, Sum.inl ( M₁.evalFrom q w ) ∈ List.foldl ( concatεNFA M₁ M₂ ).stepSet ( ( concatεNFA M₁ M₂ ).εClosure { Sum.inl q } ) w› q, _ ⟩; exact εNFA.εClosure.base _ ( by simp +decide [ concatεNFA ] ) /- evalFrom is monotone in the starting set. -/ private lemma evalFrom_mono (S T : Set (σ₁ ⊕ σ₂)) (w : List α) (h : S ⊆ T) : (concatεNFA M₁ M₂).evalFrom S w ⊆ (concatεNFA M₁ M₂).evalFrom T w := by grind +suggestions /- If `Sum.inl q` is reachable, then `Sum.inr M₂.start` is reachable when `q ∈ M₁.accept`. -/ private lemma inr_start_reachable_of_inl_accept (q : σ₁) (w : List α) (hq : q ∈ M₁.accept) (hreach : Sum.inl q ∈ (concatεNFA M₁ M₂).evalFrom {Sum.inl M₁.start} w) : Sum.inr M₂.start ∈ (concatεNFA M₁ M₂).evalFrom {Sum.inl M₁.start} w := by contrapose! hreach; induction' w using List.reverseRecOn with w IH <;> simp_all +decide [ εNFA.evalFrom ]; · cases eq_or_ne q M₁.start <;> simp_all +decide [ εNFA.εClosure ]; · exact False.elim ( hreach ( by rw [ εClosure_inl_accept M₁ M₂ M₁.start hq ] ; simp +decide ) ); · rintro ⟨ s, hs ⟩; · aesop; · rename_i s hs₁ hs₂; cases s <;> simp_all +decide [ concatεNFA ]; · constructor <;> intro x hx; · intro h; have h_eps : (concatεNFA M₁ M₂).step (Sum.inl x) (some IH) = {Sum.inl (M₁.step x IH)} := by grind +locals; by_cases h : M₁.step x IH ∈ M₁.accept <;> simp_all +decide [ εClosure_inl_accept, εClosure_inl_not_accept ]; have := hreach.1 x hx; simp_all +decide [ εClosure_inl_accept ] ; · intro h; convert εClosure_inr M₁ M₂ ( M₂.step x IH ) ▸ h using 1; simp +decide [ Set.ext_iff ] /- evalFrom distributes: `evalFrom S (u ++ v) = evalFrom (evalFrom S u) v`. -/ private lemma evalFrom_append (S : Set (σ₁ ⊕ σ₂)) (u v : List α) : (concatεNFA M₁ M₂).evalFrom S (u ++ v) = (concatεNFA M₁ M₂).evalFrom ((concatεNFA M₁ M₂).evalFrom S u) v := by simp +decide [ εNFA.evalFrom ]; -- By definition of ε-closure, we know that if q is in the ε-closure of S, then q is in the ε-closure of any state reachable from S. have h_εClosure : ∀ (S : Set (σ₁ ⊕ σ₂)), (concatεNFA M₁ M₂).εClosure S = S ∪ (if ∃ q ∈ M₁.accept, Sum.inl q ∈ S then {Sum.inr M₂.start} else ∅) := by intro S; refine' le_antisymm _ _; · intro q hq; induction' hq with q hq ih; · exact Or.inl hq; · unfold concatεNFA at * ; aesop; · intro q hq; split_ifs at hq <;> simp_all +decide [ εNFA.εClosure ]; · rcases hq with ( rfl | hq ); · grind +suggestions; · exact Set.mem_setOf_eq.mpr ( by tauto ); · exact Set.mem_setOf_eq.mpr ( by tauto ); induction v using List.reverseRecOn <;> simp_all +decide [ Set.ext_iff ]; intro b x hx hx' hb; induction u using List.reverseRecOn <;> simp_all +decide [ εNFA.stepSet ] ; · exact Or.inr ⟨ x, hx, hx' ⟩; · grind /- Backward direction: `M₁.accepts * M₂.accepts ⊆ concatεNFA.accepts`. -/ private lemma concat_backward {w : List α} (hw : w ∈ M₁.accepts * M₂.accepts) : w ∈ (concatεNFA M₁ M₂).accepts := by obtain ⟨ u, hu, v, hv, rfl ⟩ := Set.mem_image2.mp hw; have h_eval_from : Sum.inr (M₂.evalFrom M₂.start v) ∈ (concatεNFA M₁ M₂).evalFrom {Sum.inl M₁.start} (u ++ v) := by rw [ evalFrom_append ]; have h_eval_from : {Sum.inr M₂.start} ⊆ (concatεNFA M₁ M₂).evalFrom {Sum.inl M₁.start} u := by apply Set.singleton_subset_iff.mpr; apply inr_start_reachable_of_inl_accept; exact hu; exact?; have h_eval_from : (concatεNFA M₁ M₂).evalFrom {Sum.inr M₂.start} v ⊆ (concatεNFA M₁ M₂).evalFrom ((concatεNFA M₁ M₂).evalFrom {Sum.inl M₁.start} u) v := by exact?; exact h_eval_from ( by rw [ evalFrom_inr ] ; simp +decide ); exact ⟨ _, Set.mem_image_of_mem _ hv, h_eval_from ⟩ /- Any `Sum.inr` state reachable from `{Sum.inl q}` comes from a split of the input where the prefix takes `M₁` to an accepting state. -/ private lemma inr_reachable_split (q : σ₁) (w : List α) (q₂ : σ₂) (h : Sum.inr q₂ ∈ (concatεNFA M₁ M₂).evalFrom {Sum.inl q} w) : ∃ u v, w = u ++ v ∧ M₁.evalFrom q u ∈ M₁.accept ∧ q₂ = M₂.evalFrom M₂.start v := by contrapose! h; -- By definition of `evalFrom`, we know that `Sum.inr q₂` is reachable from `{Sum.inl q}` if and only if there exists a path from `{Sum.inl q}` to `Sum.inr q₂` in the ε-NFA. have h_path : ∀ w, ∀ q₁, Sum.inl q₁ ∈ (concatεNFA M₁ M₂).evalFrom {Sum.inl q} w → q₁ = M₁.evalFrom q w := by intro w q₁ hw; induction' w using List.reverseRecOn with w a ih generalizing q₁; · by_cases hq : q ∈ M₁.accept <;> simp_all +decide [ εClosure_inl_not_accept, εClosure_inl_accept ]; · simp_all +decide [ εNFA.evalFrom_append_singleton ]; rcases hw with ( ⟨ q₁, hq₁, hq₂ ⟩ | ⟨ q₂, hq₁, hq₂ ⟩ ); · have h_step : (concatεNFA M₁ M₂).step (Sum.inl q₁) (some a) = {Sum.inl (M₁.step q₁ a)} := by exact?; have h_eps_closure : ∀ s : σ₁ ⊕ σ₂, (concatεNFA M₁ M₂).εClosure {s} = if s ∈ Set.range Sum.inl then if s ∈ Set.image Sum.inl M₁.accept then {s, Sum.inr M₂.start} else {s} else if s ∈ Set.range Sum.inr then {s} else ∅ := by intro s; rcases s with ( s | s ) <;> simp +decide [ εClosure_inl_not_accept, εClosure_inl_accept, εClosure_inr ] ; split_ifs with hs <;> simp +decide [ hs, εClosure_inl_not_accept, εClosure_inl_accept ]; grind; · -- Since the ε-closure of {Sum.inr (M₂.step q₂ a)} is just {Sum.inr (M₂.step q₂ a)}, we have Sum.inl q₁ = Sum.inr (M₂.step q₂ a), which is a contradiction. have h_contra : Sum.inl q₁ = Sum.inr (M₂.step q₂ a) := by have h_contra : (concatεNFA M₁ M₂).εClosure {Sum.inr (M₂.step q₂ a)} = {Sum.inr (M₂.step q₂ a)} := by exact?; exact h_contra.subset hq₂; cases h_contra; induction' w using List.reverseRecOn with w a ih generalizing q q₂; · by_cases hq : q ∈ M₁.accept <;> simp +decide [ hq, εClosure_inl_accept, εClosure_inl_not_accept ] at h ⊢; exact h; · simp +decide [ εNFA.stepSet ]; constructor; · intro x hx; rw [ show ( concatεNFA M₁ M₂ ).step ( Sum.inl x ) ( some a ) = { Sum.inl ( M₁.step x a ) } from ?_ ]; · rw [ show ( concatεNFA M₁ M₂ ).εClosure { Sum.inl ( M₁.step x a ) } = if M₁.step x a ∈ M₁.accept then { Sum.inl ( M₁.step x a ), Sum.inr M₂.start } else { Sum.inl ( M₁.step x a ) } from ?_ ]; · split_ifs <;> simp +decide [ h ]; specialize h ( w ++ [ a ] ) [ ] ; simp_all +decide; exact h ( by rw [ ← h_path _ _ hx ] ; assumption ); · split_ifs with h; · exact?; · exact εClosure_inl_not_accept M₁ M₂ _ h; · grind; · intro x hx; rw [ show ( concatεNFA M₁ M₂ ).step ( Sum.inr x ) ( some a ) = { Sum.inr ( M₂.step x a ) } by rfl ]; grind +suggestions /- Forward direction: `concatεNFA.accepts ⊆ M₁.accepts * M₂.accepts`. -/ private lemma concat_forward {w : List α} (hw : w ∈ (concatεNFA M₁ M₂).accepts) : w ∈ M₁.accepts * M₂.accepts := by obtain ⟨s, hs⟩ := hw; obtain ⟨q₂, hq₂⟩ : ∃ q₂, s = Sum.inr q₂ ∧ q₂ ∈ M₂.accept := by unfold concatεNFA at hs; aesop; have := inr_reachable_split M₁ M₂ M₁.start w q₂ ?_; · rcases this with ⟨ u, v, rfl, hu, hv ⟩ ; exact ⟨ u, hu, v, by aesop ⟩ ; · aesop /-- The concatenation ε-NFA accepts exactly the concatenation of the two DFA languages. -/ theorem concatεNFA_correct : (concatεNFA M₁ M₂).accepts = M₁.accepts * M₂.accepts := Set.eq_of_subset_of_subset (fun _ hw => concat_forward M₁ M₂ hw) (fun _ hw => concat_backward M₁ M₂ hw) end ConcatεNFA /-- Regular languages are closed under concatenation. -/ theorem IsRegular.mul' {L₁ L₂ : Language α} (h₁ : L₁.IsRegular) (h₂ : L₂.IsRegular) : (L₁ * L₂).IsRegular := by obtain ⟨σ₁, _, M₁, rfl⟩ := h₁ obtain ⟨σ₂, _, M₂, rfl⟩ := h₂ rw [← concatεNFA_correct M₁ M₂] refine ⟨_, inferInstance, (concatεNFA M₁ M₂).toNFA.toDFA, ?_⟩ rw [NFA.toDFA_correct, εNFA.toNFA_correct] end Language /-- The class of regular languages is closed under concatenation. -/ theorem RG_closedUnderConcatenation [Fintype α] : ClosedUnderConcatenation (α := α) is_RG := by intro L₁ L₂ h₁ h₂ exact is_RG_of_isRegular (isRegular_of_is_RG h₁ |>.mul' (isRegular_of_is_RG h₂))