import Mathlib import Langlib.Classes.ContextSensitive.Inclusion.RecursivelyEnumerable import Langlib.Grammars.ContextSensitive.UnrestrictedCharacterization import Langlib.Grammars.NonContracting.Definition /-! # Non-Contracting and Context-Sensitive This file contains class-level comparison results between non-contracting grammars and context-sensitive grammars. main results - `CS_is_noncontracting` — every CS grammar induces a non-contracting unrestricted grammar - `noncontracting_transforms_length_le` — non-contracting steps don't decrease length - `csg_of_noncontracting` — translate a non-contracting grammar into a context-sensitive grammar -/ /-- The unrestricted grammar obtained from a context-sensitive grammar (via `grammar_of_csg`) is non-contracting. -/ theorem CS_is_noncontracting (g : CS_grammar T) : grammar_noncontracting (grammar_of_csg g) := by intro r hr simp [grammar_of_csg] at hr obtain ⟨cr, hcr, rfl⟩ := hr simp have hne := g.output_nonempty cr hcr have : cr.output_string.length ≥ 1 := List.length_pos_of_ne_nil hne omega /-- Every context-sensitive language is generated by a non-contracting grammar. -/ theorem is_CS_implies_is_noncontracting {L : Language T} (h : is_CS L) : is_noncontracting L := by obtain ⟨g, rfl⟩ := is_CS_implies_is_CS_via_csg h exact ⟨grammar_of_csg g, CS_is_noncontracting g, (CS_language_eq_grammar_language g).symm⟩ /-- A single transformation step in a non-contracting grammar does not decrease the length of the sentential form. -/ lemma noncontracting_transforms_length_le (g : grammar T) (hg : grammar_noncontracting g) {w₁ w₂ : List (symbol T g.nt)} (h : grammar_transforms g w₁ w₂) : w₁.length ≤ w₂.length := by obtain ⟨r, hr, u, v, rfl, rfl⟩ := h simp_all [List.length_append] linarith [hg r hr] /-- In a non-contracting grammar, derivation does not decrease the length. -/ lemma noncontracting_derives_length_le (g : grammar T) (hg : grammar_noncontracting g) {w₁ w₂ : List (symbol T g.nt)} (h : grammar_derives g w₁ w₂) : w₁.length ≤ w₂.length := by induction h with | refl => exact le_refl _ | tail h₁ h₂ ih => exact le_trans ih (noncontracting_transforms_length_le g hg h₂) /-! ## Non-contracting → CS The conversion from a non-contracting grammar to a context-sensitive grammar is a classical construction. Given a non-contracting rule `L ++ [A] ++ R → output` with `|output| ≥ |L| + 1 + |R|`, we simulate it using context-sensitive rules with fresh auxiliary nonterminals. ### Construction The nonterminal type of the constructed CS grammar is `g.nt ⊕ (T ⊕ (ℕ × ℕ))`: - `Sum.inl n`: original nonterminal `n` - `Sum.inr (Sum.inl t)`: a "lifted" nonterminal representing terminal `t` - `Sum.inr (Sum.inr (i, j))`: auxiliary nonterminal for rule index `i`, step `j` The sentential forms in the CS grammar always consist entirely of nonterminals (either `Sum.inl n` or `Sum.inr (Sum.inl t)`), except at the very end when un-lifting rules convert `Sum.inr (Sum.inl t)` to `terminal t`. The rules are: 1. **Un-lifting rules**: `Sum.inr (Sum.inl t) → [terminal t]` 2. **Direct rules** (n = 1): `Sum.inl A → map liftSym output` 3. **Phase 1** (n ≥ 2, marking): Replace pattern symbols with auxiliaries left-to-right 4. **Phase 2** (n ≥ 2, output): Replace auxiliaries with output symbols right-to-left -/ section NoncontractingToCS variable {T : Type} /-- Nonterminal type for the CS grammar constructed from a non-contracting grammar. -/ private abbrev NC_NT (g : grammar T) := g.nt ⊕ (T ⊕ (ℕ × ℕ)) /-- Convert a symbol to a nonterminal in the new grammar (lifting terminals). -/ private def nc_symToNT {g : grammar T} : symbol T g.nt → NC_NT g | .terminal t => .inr (.inl t) | .nonterminal n => .inl n /-- Lift a symbol: all symbols become `nonterminal` symbols in the new grammar. -/ private def nc_liftSym {g : grammar T} (s : symbol T g.nt) : symbol T (NC_NT g) := .nonterminal (nc_symToNT s) /-- Auxiliary nonterminal symbol for rule index `ri`, step `k`. -/ private def nc_aux {g : grammar T} (ri k : ℕ) : symbol T (NC_NT g) := .nonterminal (.inr (.inr (ri, k))) /-- Collect all terminals from a list of symbols. -/ private def nc_collectTerminals {N : Type} : List (symbol T N) → List T | [] => [] | (.terminal t) :: rest => t :: nc_collectTerminals rest | (.nonterminal _) :: rest => nc_collectTerminals rest /-- All terminals appearing in the grammar's rules (inputs and outputs). -/ private def nc_grammarTerminals (g : grammar T) : List T := g.rules.flatMap fun r => nc_collectTerminals (r.input_L ++ r.input_R ++ r.output_string) /-- Un-lifting rules: convert lifted terminal nonterminals back to actual terminals. -/ private def nc_unliftRules (g : grammar T) : List (csrule T (NC_NT g)) := (nc_grammarTerminals g).map fun t => { context_left := [], input_nonterminal := .inr (.inl t), context_right := [], output_string := [.terminal t] } /-- Generate CS rules for simulating one grammar rule at index `ri`. -/ private def nc_simRules {g : grammar T} (ri : ℕ) (r : grule T g.nt) : List (csrule T (NC_NT g)) := let pattern := r.input_L ++ [.nonterminal r.input_N] ++ r.input_R let n := pattern.length let out := r.output_string let dflt : symbol T g.nt := .nonterminal r.input_N if n ≤ 1 then [{ context_left := [], input_nonterminal := .inl r.input_N, context_right := [], output_string := out.map nc_liftSym }] else let phase1 := (List.range (n - 1)).map fun k => { context_left := (List.range k).map (nc_aux ri), input_nonterminal := nc_symToNT (pattern.getD k dflt), context_right := ((pattern.drop (k + 1)).map nc_liftSym), output_string := [nc_aux ri k] } let phase2_0 : csrule T (NC_NT g) := { context_left := (List.range (n - 1)).map (nc_aux ri), input_nonterminal := nc_symToNT (pattern.getD (n - 1) dflt), context_right := [], output_string := (out.drop (n - 1)).map nc_liftSym } let phase2_rest := ((List.range (n - 1)).reverse).map fun j => { context_left := (List.range j).map (nc_aux ri), input_nonterminal := .inr (.inr (ri, j)), context_right := (out.drop (j + 1)).map nc_liftSym, output_string := [nc_liftSym (out.getD j dflt)] } phase1 ++ [phase2_0] ++ phase2_rest /-- All CS rules: un-lifting rules plus simulation rules for each grammar rule. -/ private def nc_allRules (g : grammar T) : List (csrule T (NC_NT g)) := nc_unliftRules g ++ ((List.range g.rules.length).zip g.rules).flatMap fun ⟨i, r⟩ => nc_simRules i r /-- Every rule in `nc_unliftRules` has nonempty output. -/ private lemma nc_unliftRules_output_nonempty (g : grammar T) (r : csrule T (NC_NT g)) (hr : r ∈ nc_unliftRules g) : r.output_string ≠ [] := by simp [nc_unliftRules, nc_grammarTerminals] at hr obtain ⟨_, _, _, rfl⟩ := hr exact List.cons_ne_nil _ _ /-- Every rule in `nc_simRules` has nonempty output, provided the non-contracting property. -/ private lemma nc_simRules_output_nonempty (g : grammar T) (hg : grammar_noncontracting g) (ri : ℕ) (grule : grule T g.nt) (hgrule : grule ∈ g.rules) (r : csrule T (NC_NT g)) (hr : r ∈ nc_simRules ri grule) : r.output_string ≠ [] := by unfold nc_simRules at hr norm_num +zetaDelta at * split_ifs at hr <;> simp_all +decide [List.mem_append, List.mem_map] · have := hg grule hgrule; aesop · rcases hr with (⟨a, ha, rfl⟩ | rfl | ⟨a, ha, rfl⟩) <;> simp +decide [List.length_pos_iff] at * linarith [hg grule hgrule] /-- Construct a context-sensitive grammar equivalent to a given non-contracting grammar. -/ noncomputable def csg_of_noncontracting (g : grammar T) (hg : grammar_noncontracting g) : CS_grammar T where nt := NC_NT g initial := .inl g.initial rules := nc_allRules g output_nonempty := by intro r hr simp [nc_allRules] at hr rcases hr with hr_unlift | ⟨ri, grule, hgrule, hr_sim⟩ · exact nc_unliftRules_output_nonempty g r hr_unlift · have hmem : grule ∈ g.rules := (List.of_mem_zip hgrule).2 exact nc_simRules_output_nonempty g hg ri grule hmem r hr_sim /-! ### Helper lemmas for language equivalence -/ /- PROBLEM CS derivation with additional context (prefix and suffix). PROVIDED SOLUTION By induction on the CS_derives derivation h. Base case (refl): trivial. Step case (tail): if CS_derives g w₁ w₃ and CS_transforms g w₃ w₂, by IH CS_derives g (p ++ w₁ ++ s) (p ++ w₃ ++ s). For the single transform CS_transforms g w₃ w₂, we have r, u, v with w₃ = u ++ CL ++ [nt A] ++ CR ++ v and w₂ = u ++ CL ++ OS ++ CR ++ v. Then p ++ w₃ ++ s = (p ++ u) ++ CL ++ [nt A] ++ CR ++ (v ++ s), and p ++ w₂ ++ s = (p ++ u) ++ CL ++ OS ++ CR ++ (v ++ s). So CS_transforms g (p ++ w₃ ++ s) (p ++ w₂ ++ s) with u' = p ++ u, v' = v ++ s. Then CS_deri_of_deri_tran gives the result. -/ private lemma CS_deri_with_context {g : CS_grammar T} {w₁ w₂ : List (symbol T g.nt)} (p s : List (symbol T g.nt)) (h : CS_derives g w₁ w₂) : CS_derives g (p ++ w₁ ++ s) (p ++ w₂ ++ s) := by induction h; · constructor; · rename_i h₁ h₂ h₃; obtain ⟨ r, u, v, hr, rfl, rfl ⟩ := h₂; exact h₃.tail ⟨ r, p ++ u, v ++ s, hr, by simp +decide [ ← List.append_assoc ], by simp +decide [ ← List.append_assoc ] ⟩ /- PROBLEM Un-lifting a single terminal in the CS grammar. PROVIDED SOLUTION We need to show CS_transforms (csg_of_noncontracting g hg) (u ++ [nt (inr (inl t))] ++ v) (u ++ [terminal t] ++ v). Construct the witness: use the CS rule { context_left := [], input_nonterminal := .inr (.inl t), context_right := [], output_string := [.terminal t] }, with u and v as the prefix and suffix. This rule is in (csg_of_noncontracting g hg).rules because: - t ∈ nc_grammarTerminals g (by ht) - So the rule is in nc_unliftRules g (by definition, it's in the map) - nc_unliftRules g is a prefix of nc_allRules g - nc_allRules g = (csg_of_noncontracting g hg).rules The matching: - u ++ [] ++ [nt (inr (inl t))] ++ [] ++ v = u ++ [nt (inr (inl t))] ++ v ✓ - u ++ [] ++ [terminal t] ++ [] ++ v = u ++ [terminal t] ++ v ✓ Use exists with the rule, u, and v. -/ private lemma nc_unlift_one (g : grammar T) (hg : grammar_noncontracting g) (t : T) (ht : t ∈ nc_grammarTerminals g) (u v : List (symbol T (NC_NT g))) : CS_transforms (csg_of_noncontracting g hg) (u ++ [.nonterminal (.inr (.inl t))] ++ v) (u ++ [.terminal t] ++ v) := by use ⟨ [ ], Sum.inr ( Sum.inl t ), [ ], [ symbol.terminal t ] ⟩; refine' ⟨ u, v, _, _, _ ⟩ <;> simp +decide [ csg_of_noncontracting ]; exact List.mem_append_left _ ( List.mem_map.mpr ⟨ t, ht, rfl ⟩ ) /- PROBLEM Un-lifting all terminals in a list. PROVIDED SOLUTION By induction on w. Base case (w = []): Both sides are [], so CS_deri_self. Step case (w = t :: w'): The source is [nt (inr (inl t))] ++ (w'.map (fun t => nt (inr (inl t)))). The target is [terminal t] ++ (w'.map terminal). Step 1: Apply nc_unlift_one with u = [] and v = w'.map (fun t => nt (inr (inl t))) to get: CS_transforms g' ([nt (inr (inl t))] ++ v) ([terminal t] ++ v) where v = w'.map (fun t => nt (inr (inl t))) Step 2: By IH (with hw restricted to w'), CS_derives g' (w'.map (fun t => nt (inr (inl t)))) (w'.map terminal). Step 3: Use CS_deri_with_context (or grammar_deri_with_prefix) with prefix [terminal t] to get: CS_derives g' ([terminal t] ++ w'.map (fun t => nt (inr (inl t)))) ([terminal t] ++ w'.map terminal) Step 4: Combine steps 1 and 3 by transitivity. Use hw t (List.mem_cons_self t w') for the base terminal t, and (fun t' ht' => hw t' (List.mem_cons_of_mem t ht')) for the IH. -/ private lemma nc_unlift_all (g : grammar T) (hg : grammar_noncontracting g) (w : List T) (hw : ∀ t ∈ w, t ∈ nc_grammarTerminals g) : CS_derives (csg_of_noncontracting g hg) (w.map (fun t => symbol.nonterminal (.inr (.inl t) : NC_NT g))) (w.map symbol.terminal) := by -- We proceed by induction on the length of the list `w`. induction' w with t w ih; · constructor; · -- Apply the `nc_unlift_one` lemma to transform `[nt (inr (inl t))]` to `[terminal t]`. have h_transform : CS_transforms (csg_of_noncontracting g hg) ([.nonterminal (.inr (.inl t))] ++ (w.map (fun t => .nonterminal (.inr (.inl t))))) ([.terminal t] ++ (w.map (fun t => .nonterminal (.inr (.inl t))))) := by simpa using nc_unlift_one g hg t (hw t (by simp +decide)) [] (w.map (fun t => symbol.nonterminal (Sum.inr (Sum.inl t)))); -- Apply the induction hypothesis to transform `w.map (fun t => nt (inr (inl t)))` to `w.map terminal`. have h_ind : CS_derives (csg_of_noncontracting g hg) ([symbol.terminal t] ++ (w.map (fun t => .nonterminal (.inr (.inl t))))) ([symbol.terminal t] ++ (w.map symbol.terminal)) := by simpa using CS_deri_with_context [symbol.terminal t] [] (ih (fun t ht => hw t (List.mem_cons_of_mem _ ht))) exact .single h_transform |> Relation.ReflTransGen.trans <| h_ind /- PROBLEM A terminal appearing in a rule's output is in nc_grammarTerminals. PROVIDED SOLUTION By definition, nc_grammarTerminals g = g.rules.flatMap (fun r => nc_collectTerminals (r.input_L ++ r.input_R ++ r.output_string)). Since r ∈ g.rules, the contribution of r is included. We need t ∈ nc_collectTerminals (r.input_L ++ r.input_R ++ r.output_string). Since terminal t ∈ r.output_string, and r.output_string is a suffix of r.input_L ++ r.input_R ++ r.output_string, terminal t appears in the combined list. Then nc_collectTerminals extracts t from terminal t. So t ∈ nc_collectTerminals (r.input_L ++ r.input_R ++ r.output_string), and by flatMap membership, t ∈ nc_grammarTerminals g. -/ private lemma terminal_in_output_mem_grammarTerminals (g : grammar T) (r : grule T g.nt) (hr : r ∈ g.rules) (t : T) (ht : symbol.terminal t ∈ r.output_string) : t ∈ nc_grammarTerminals g := by unfold nc_grammarTerminals; -- By definition of `nc_collectTerminals`, we know that `t ∈ nc_collectTerminals (r.input_L ++ (r.input_R ++ r.output_string))` if and only if `t ∈ r.output_string`. have h_nc_collectTerminals : ∀ (l : List (symbol T g.nt)), t ∈ nc_collectTerminals l ↔ symbol.terminal t ∈ l := by intro l induction' l with s l ih; · simp +decide [ nc_collectTerminals ]; · cases s <;> simp_all +decide [ nc_collectTerminals ]; aesop /- PROBLEM A simulation rule for rule index ri is in the CS grammar's rules. PROVIDED SOLUTION The rules of csg_of_noncontracting g hg are nc_allRules g = nc_unliftRules g ++ ((List.range g.rules.length).zip g.rules).flatMap (fun ⟨i, r⟩ => nc_simRules i r). cr ∈ nc_simRules ri r, and (ri, r) ∈ (List.range g.rules.length).zip g.rules (since ri < g.rules.length and g.rules[ri] = r). So cr is in the flatMap part, hence in nc_allRules g, hence in (csg_of_noncontracting g hg).rules. Use List.mem_append_right, List.mem_flatMap, and the fact that (ri, r) ∈ (List.range g.rules.length).zip g.rules. -/ private lemma nc_simRule_mem (g : grammar T) (hg : grammar_noncontracting g) (ri : ℕ) (r : grule T g.nt) (hri : ri < g.rules.length) (hr : g.rules[ri] = r) (cr : csrule T (NC_NT g)) (hcr : cr ∈ nc_simRules ri r) : cr ∈ (csg_of_noncontracting g hg).rules := by have h_zip : (ri, r) ∈ (List.range g.rules.length).zip g.rules := by rw [ List.mem_iff_get ]; use ⟨ ri, by simpa using hri ⟩ ; aesop; exact List.mem_append_right _ ( List.mem_flatMap.mpr ⟨ _, h_zip, hcr ⟩ ) /- PROBLEM The core simulation: transforming the lifted pattern to lifted output. For n = 1 (input_L = [], input_R = []), this is a direct CS step. For n ≥ 2, this requires the Phase 1 + Phase 2 rule chain. PROVIDED SOLUTION Let pattern = r.input_L ++ [nonterminal r.input_N] ++ r.input_R, n = pattern.length, out = r.output_string. Case 1: n ≤ 1 (so r.input_L = [] and r.input_R = []): The first rule in nc_simRules ri r is: { context_left := [], input_nonterminal := .inl r.input_N, context_right := [], output_string := out.map nc_liftSym } This is a single CS_transforms step: - Use this rule with u = [] and v = [] - w₁ = [] ++ [] ++ [nt (inl r.input_N)] ++ [] ++ [] = [nt (inl r.input_N)] - w₂ = [] ++ [] ++ out.map nc_liftSym ++ [] ++ [] = out.map nc_liftSym And map nc_liftSym [nonterminal r.input_N] = [nonterminal (inl r.input_N)] = [nt (inl r.input_N)]. So it works. Apply CS_deri_of_tran. The rule membership: it's in nc_simRules ri r (the first element of the list), hence in (csg_of_noncontracting g hg).rules by nc_simRule_mem. Case 2: n ≥ 2: sorry. To handle both cases: split on whether r.input_L.length + 1 + r.input_R.length ≤ 1. For the n ≤ 1 case, r.input_L = [] and r.input_R = [] (since both are lists and their total length is 0). Use List.length_eq_zero to derive input_L = [] and input_R = []. Then simplify and construct the single CS step. -/ private lemma nc_sim_pattern (g : grammar T) (hg : grammar_noncontracting g) (ri : ℕ) (r : grule T g.nt) (hri : ri < g.rules.length) (hr : g.rules[ri] = r) : CS_derives (csg_of_noncontracting g hg) ((r.input_L ++ [symbol.nonterminal r.input_N] ++ r.input_R).map nc_liftSym) (r.output_string.map nc_liftSym) := by -- Let's denote the pattern as $pattern$ and its length as $n$. set pattern : List (symbol T g.nt) := r.input_L ++ [symbol.nonterminal r.input_N] ++ r.input_R set n := pattern.length set out := r.output_string; by_cases hn : n ≤ 1; · apply CS_deri_of_tran; use ⟨ [], .inl r.input_N, [], out.map nc_liftSym ⟩; use [], [] simp [pattern, out]; constructor; · apply nc_simRule_mem g hg ri r hri hr; unfold nc_simRules; aesop; · cases h : r.input_L <;> cases h' : r.input_R <;> aesop; · -- For the case when $n \geq 2$, we can use the phase1 and phase2 rules to transform the pattern into the output. have h_phase1 : CS_derives (csg_of_noncontracting g hg) (pattern.map nc_liftSym) ((List.range (n - 1)).map (nc_aux ri) ++ [nc_liftSym (pattern.getD (n - 1) (symbol.nonterminal r.input_N))]) := by have h_phase1 : ∀ (k : ℕ), k < n - 1 → CS_transforms (csg_of_noncontracting g hg) ((List.range k).map (nc_aux ri) ++ [nc_liftSym (pattern.getD k (symbol.nonterminal r.input_N))] ++ (pattern.drop (k + 1)).map nc_liftSym) ((List.range (k + 1)).map (nc_aux ri) ++ (pattern.drop (k + 1)).map nc_liftSym) := by intro k hk have h_phase1_rule : csrule.mk (List.map (nc_aux ri) (List.range k)) (nc_symToNT (pattern.getD k (symbol.nonterminal r.input_N))) (List.map nc_liftSym (pattern.drop (k + 1))) [nc_aux ri k] ∈ nc_simRules ri r := by grind +locals generalize_proofs at *; ( use csrule.mk (List.map (nc_aux ri) (List.range k)) (nc_symToNT (pattern.getD k (symbol.nonterminal r.input_N))) (List.map nc_liftSym (pattern.drop (k + 1))) [nc_aux ri k]; use [], [] ; simp_all +decide [ List.range_succ ] ; exact ⟨ nc_simRule_mem g hg ri r hri hr _ h_phase1_rule, rfl ⟩); have h_phase1_seq : ∀ (k : ℕ), k ≤ n - 1 → CS_derives (csg_of_noncontracting g hg) (pattern.map nc_liftSym) ((List.range k).map (nc_aux ri) ++ [nc_liftSym (pattern.getD k (symbol.nonterminal r.input_N))] ++ (pattern.drop (k + 1)).map nc_liftSym) := by intro k hk induction' k with k ih; · convert CS_deri_self using 1; cases h : r.input_L <;> aesop; · convert CS_deri_of_deri_deri ( ih ( Nat.le_of_succ_le hk ) ) ( CS_deri_of_tran ( h_phase1 k ( Nat.lt_of_succ_le hk ) ) ) using 1; rw [show List.drop (k + 1) pattern = [pattern.getD (k + 1) (symbol.nonterminal r.input_N)] ++ List.drop (k + 1 + 1) pattern from ?_] · simp +decide [List.map_append] · rw [List.drop_eq_getElem_cons] · rw [List.getD_eq_getElem] · simp · omega · omega grind; -- Apply the phase2_0 rule to transform the list. have h_phase2_0 : CS_derives (csg_of_noncontracting g hg) ((List.range (n - 1)).map (nc_aux ri) ++ [nc_liftSym (pattern.getD (n - 1) (symbol.nonterminal r.input_N))]) ((List.range (n - 1)).map (nc_aux ri) ++ (out.drop (n - 1)).map nc_liftSym) := by apply_rules [ CS_deri_of_tran ]; have h_phase2_0 : ∃ cr ∈ nc_simRules ri r, cr.context_left = (List.range (n - 1)).map (nc_aux ri) ∧ cr.input_nonterminal = nc_symToNT (pattern.getD (n - 1) (symbol.nonterminal r.input_N)) ∧ cr.context_right = [] ∧ cr.output_string = (out.drop (n - 1)).map nc_liftSym := by simp +zetaDelta at *; unfold nc_simRules; simp +decide [ List.range_succ ] ; split_ifs <;> simp_all +decide [ List.range_succ ]; · linarith; · exact ⟨ _, Or.inr <| Or.inl rfl, rfl, rfl, rfl, rfl ⟩; obtain ⟨ cr, hcr₁, hcr₂, hcr₃, hcr₄, hcr₅ ⟩ := h_phase2_0; use cr; use [], []; exact ⟨ nc_simRule_mem g hg ri r hri hr cr hcr₁, by aesop ⟩; -- Apply the phase2_rest rules to transform the list. have h_phase2_rest : ∀ k < n - 1, CS_derives (csg_of_noncontracting g hg) ((List.range (k + 1)).map (nc_aux ri) ++ (out.drop (k + 1)).map nc_liftSym) ((List.range k).map (nc_aux ri) ++ (out.drop k).map nc_liftSym) := by intro k hk have h_phase2_rest_step : CS_transforms (csg_of_noncontracting g hg) ((List.range (k + 1)).map (nc_aux ri) ++ (out.drop (k + 1)).map nc_liftSym) ((List.range k).map (nc_aux ri) ++ (out.drop k).map nc_liftSym) := by have hr_mem : r ∈ g.rules := by rw [List.mem_iff_get] exact ⟨⟨ri, hri⟩, by simpa using hr⟩ have hk_out : k < out.length := by have hnc : n ≤ out.length := by simpa [n, pattern, out] using hg r hr_mem omega use ⟨ (List.range k).map (nc_aux ri), .inr (.inr (ri, k)), (out.drop (k + 1)).map nc_liftSym, [nc_liftSym (out.getD k (symbol.nonterminal r.input_N))] ⟩; refine' ⟨ [ ], [ ], _, _, _ ⟩ <;> simp +decide [ List.range_succ ]; · apply_rules [ nc_simRule_mem ]; simp +decide [ nc_simRules ]; grind +splitIndPred; · rfl; · rw [List.drop_eq_getElem_cons (by simpa using hk_out : k < (List.map nc_liftSym out).length)] rw [List.getElem_map] have hget : out[k]?.getD (symbol.nonterminal r.input_N) = out[k] := by rw [List.getElem?_eq_getElem hk_out] simp simp [hget] exact .single h_phase2_rest_step; have h_phase2_rest_seq : ∀ k ≤ n - 1, CS_derives (csg_of_noncontracting g hg) ((List.range (n - 1)).map (nc_aux ri) ++ (out.drop (n - 1)).map nc_liftSym) ((List.range k).map (nc_aux ri) ++ (out.drop k).map nc_liftSym) := by intro k hk; induction' hk : n - 1 - k with m ih generalizing k <;> simp_all +decide [ Nat.sub_sub ]; · rw [ show k = n - 1 by omega ]; constructor; · convert ih ( k + 1 ) ( by omega ) ( by omega ) |> fun h => h.trans ( h_phase2_rest k ( by omega ) ) using 1; exact h_phase1.trans ( h_phase2_0.trans ( h_phase2_rest_seq 0 bot_le ) ) |> fun h => h.trans ( by aesop ) /-- A single grammar step can be simulated by CS derivation on lifted forms. -/ private lemma nc_sim_one_transform (g : grammar T) (hg : grammar_noncontracting g) {w₁ w₂ : List (symbol T g.nt)} (h : grammar_transforms g w₁ w₂) : CS_derives (csg_of_noncontracting g hg) (w₁.map nc_liftSym) (w₂.map nc_liftSym) := by obtain ⟨r, hr_mem, u, v, hw1, hw2⟩ := h subst hw1; subst hw2 obtain ⟨⟨ri_val, hri_lt⟩, hri_eq⟩ := List.mem_iff_get.mp hr_mem have hsim := nc_sim_pattern g hg ri_val r hri_lt hri_eq have hctx := CS_deri_with_context (u.map nc_liftSym) (v.map nc_liftSym) hsim simp only [List.map_append, List.map_cons, List.map_nil] at hctx ⊢ convert hctx using 1 <;> simp [List.append_assoc] /- PROBLEM Multi-step grammar derivation lifts to CS derivation. PROVIDED SOLUTION By induction on the grammar_derives derivation h. Base case (refl): CS_deri_self. Step case (tail): grammar_derives g w₁ w₃ and grammar_transforms g w₃ w₂. By IH, CS_derives g' (w₁.map nc_liftSym) (w₃.map nc_liftSym). By nc_sim_one_transform, CS_derives g' (w₃.map nc_liftSym) (w₂.map nc_liftSym). By CS_deri_of_deri_deri (transitivity), CS_derives g' (w₁.map nc_liftSym) (w₂.map nc_liftSym). -/ private lemma nc_sim_derives (g : grammar T) (hg : grammar_noncontracting g) {w₁ w₂ : List (symbol T g.nt)} (h : grammar_derives g w₁ w₂) : CS_derives (csg_of_noncontracting g hg) (w₁.map nc_liftSym) (w₂.map nc_liftSym) := by induction h; · constructor; · exact CS_deri_of_deri_deri ‹_› ( nc_sim_one_transform g hg ‹_› ) /- PROBLEM Any terminal in a word derived by the grammar appears in nc_grammarTerminals. PROVIDED SOLUTION We need to show that any terminal t appearing in a word w derived by the grammar is in nc_grammarTerminals g. By definition, grammar_language g w means grammar_derives g [nt g.initial] (map terminal w). We need to show t ∈ nc_grammarTerminals g. Proceed by induction on the derivation. The initial sentential form [nt g.initial] has no terminals. Each grammar_transforms step introduces terminals only from rule outputs. By terminal_in_output_mem_grammarTerminals, any terminal in a rule output is in nc_grammarTerminals. More precisely: induct on the derivation grammar_derives g [nt g.initial] (map terminal w). Show that every terminal symbol in the sentential form at each step is in nc_grammarTerminals. For the initial form, vacuously true (no terminals). For each step using rule r at position u, v: the new form is u ++ r.output_string ++ v. Terminals in u and v are inherited from the previous form (by IH, they're in nc_grammarTerminals). Terminals in r.output_string are in nc_grammarTerminals by terminal_in_output_mem_grammarTerminals. Since the final form map terminal w has all its symbols as terminals, every t ∈ w gives symbol.terminal t in the final form, hence t ∈ nc_grammarTerminals. Actually, a simpler approach: since map terminal w is derived, there must be some step that introduces each terminal t. That step uses a rule r with terminal t ∈ r.output_string, so t ∈ nc_grammarTerminals by terminal_in_output_mem_grammarTerminals. First prove a helper: for any grammar_derives g w₁ w₂, if all terminals in w₁ are in nc_grammarTerminals g, then all terminals in w₂ are also in nc_grammarTerminals g. This is by induction on the derivation. The base case is trivial. The step case: grammar_transforms g mid w₂ gives mid = u ++ L ++ [nt A] ++ R ++ v and w₂ = u ++ out ++ v. A terminal in w₂ is either in u (inherited from mid), in out (by terminal_in_output_mem_grammarTerminals), or in v (inherited from mid). Terminals in u and v are also in mid, so by IH they're in nc_grammarTerminals. Then apply this helper with w₁ = [nt g.initial] (which has no terminals, so the base condition is vacuously true) and w₂ = map terminal w. -/ private lemma nc_derived_terminal_in_grammarTerminals (g : grammar T) (hg : grammar_noncontracting g) (w : List T) (hw : w ∈ grammar_language g) (t : T) (ht : t ∈ w) : t ∈ nc_grammarTerminals g := by -- Apply induction on the derivation to show that every terminal in the sentential form at each step is in nc_grammarTerminals. have h_ind : ∀ {w₁ w₂ : List (symbol T g.nt)}, grammar_derives g w₁ w₂ → (∀ s ∈ w₁, ∀ t, s = .terminal t → t ∈ nc_grammarTerminals g) → (∀ s ∈ w₂, ∀ t, s = .terminal t → t ∈ nc_grammarTerminals g) := by intros w₁ w₂ h_deriv h_terminals_in_w₁ induction' h_deriv with w₁ w₂ h_deriv h_terminals_in_w₁ ih; · assumption; · rcases h_terminals_in_w₁ with ⟨ r, hr, u, v, hw₁, hw₂ ⟩ ; simp_all +decide [ List.map_append, List.map_cons ] ; rintro s ( hs | hs | hs ) t rfl <;> [ exact ih _ ( Or.inl hs ) _ rfl; exact terminal_in_output_mem_grammarTerminals g r hr _ ( by aesop ) ; exact ih _ ( Or.inr <| Or.inr <| Or.inr <| Or.inr hs ) _ rfl ] ; specialize @h_ind [ symbol.nonterminal g.initial ] ( List.map symbol.terminal w ) hw ; aesop /- PROBLEM Forward direction: grammar_language g ⊆ CS_language (csg_of_noncontracting g hg). PROVIDED SOLUTION Given hw : w ∈ grammar_language g, which means grammar_derives g [nt g.initial] (map terminal w). Step 1: By nc_sim_derives, CS_derives g' (map nc_liftSym [nt g.initial]) (map nc_liftSym (map terminal w)). Note: map nc_liftSym [nt g.initial] = [nt (inl g.initial)] = [nt g'.initial] And: map nc_liftSym (map terminal w) = map (fun t => nt (inr (inl t))) w Step 2: By nc_unlift_all (with hw' showing all terminals in w are in nc_grammarTerminals, which follows from nc_derived_terminal_in_grammarTerminals), CS_derives g' (map (fun t => nt (inr (inl t))) w) (map terminal w). Step 3: By CS_deri_of_deri_deri (transitivity), CS_derives g' [nt g'.initial] (map terminal w). This is exactly w ∈ CS_language g'. Key: show map nc_liftSym [nt g.initial] = [nt (Sum.inl g.initial)] by unfolding nc_liftSym and nc_symToNT. And map nc_liftSym (map terminal w) = map (fun t => nt (Sum.inr (Sum.inl t))) w by unfolding nc_liftSym and nc_symToNT on terminal. -/ private lemma nc_forward (g : grammar T) (hg : grammar_noncontracting g) (w : List T) (hw : w ∈ grammar_language g) : w ∈ CS_language (csg_of_noncontracting g hg) := by obtain ⟨hw₁, hw₂⟩ : grammar_derives g [symbol.nonterminal g.initial] (w.map symbol.terminal) ∧ ∀ t ∈ w, t ∈ nc_grammarTerminals g := by exact ⟨ hw, fun t ht => nc_derived_terminal_in_grammarTerminals g hg w hw t ht ⟩; have h_map : CS_derives (csg_of_noncontracting g hg) (List.map symbol.nonterminal (List.cons (Sum.inl g.initial) [])) (List.map symbol.nonterminal (List.map (fun t => Sum.inr (Sum.inl t)) w)) := by convert nc_sim_derives g hg hw₁ using 1; unfold nc_liftSym; aesop; have h_unlift : CS_derives (csg_of_noncontracting g hg) (List.map symbol.nonterminal (List.map (fun t => Sum.inr (Sum.inl t)) w)) (List.map symbol.terminal w) := by convert nc_unlift_all g hg w hw₂ using 1; rw [ List.map_map ]; rfl; exact Relation.ReflTransGen.trans h_map h_unlift /-! ### Backward direction helpers -/ /-- Projection from extended grammar symbols to original grammar symbols. For clean forms (no aux nonterminals), this is the natural "un-lifting" map. -/ private def nc_proj_sym (g : grammar T) : symbol T (NC_NT g) → symbol T g.nt | .terminal t => .terminal t | .nonterminal (.inl n) => .nonterminal n | .nonterminal (.inr (.inl t)) => .terminal t | .nonterminal (.inr (.inr _)) => .nonterminal g.initial -- dummy for aux /-- Projection on lists: apply nc_proj_sym pointwise. -/ private def nc_proj (g : grammar T) (s : List (symbol T (NC_NT g))) : List (symbol T g.nt) := s.map (nc_proj_sym g) /-- proj ∘ liftSym = id: projecting a lifted symbol recovers the original. -/ private lemma nc_proj_liftSym (g : grammar T) (s : symbol T g.nt) : nc_proj_sym g (nc_liftSym s) = s := by cases s <;> rfl /-- Projecting a list of lifted symbols recovers the original list. -/ private lemma nc_proj_map_liftSym (g : grammar T) (l : List (symbol T g.nt)) : nc_proj g (l.map nc_liftSym) = l := by unfold nc_proj rw [List.map_map] conv_rhs => rw [← List.map_id l] congr 1 ext s exact nc_proj_liftSym g s /-- A sentential form is "clean" if it has no auxiliary nonterminals. -/ private def nc_is_clean (g : grammar T) (s : List (symbol T (NC_NT g))) : Prop := ∀ x ∈ s, ∀ p : ℕ × ℕ, x ≠ symbol.nonterminal (Sum.inr (Sum.inr p)) /-- The initial form is clean. -/ private lemma nc_initial_clean (g : grammar T) : nc_is_clean g [symbol.nonterminal (Sum.inl g.initial)] := by intro x hx p heq simp at hx subst hx exact absurd heq (by simp) /-- Terminal strings are clean. -/ private lemma nc_terminal_clean (g : grammar T) (w : List T) : nc_is_clean g (w.map symbol.terminal) := by intro x hx p simp [List.mem_map] at hx obtain ⟨t, _, rfl⟩ := hx intro h; cases h /-- Projection of the initial symbol. -/ private lemma nc_proj_initial (g : grammar T) : nc_proj g [symbol.nonterminal (Sum.inl g.initial)] = [symbol.nonterminal g.initial] := by simp [nc_proj, nc_proj_sym] /-- Projection of a terminal string. -/ private lemma nc_proj_terminal (g : grammar T) (w : List T) : nc_proj g (w.map symbol.terminal) = w.map symbol.terminal := by simp [nc_proj, nc_proj_sym, List.map_map] /- PROBLEM From a clean form, only un-lifting and direct simulation rules can produce another clean form. Phase 1 rules always introduce aux nonterminals, and phase 2/phase 2_rest rules require aux nonterminals as input. This lemma shows: if both forms are clean, the projection gives a grammar derivation. PROVIDED SOLUTION From a clean form s₁ (no aux nonterminals inr(inr _)), only certain CS rules of csg_of_noncontracting can produce another clean form s₂: 1. **Un-lifting rules**: These replace nonterminal(inr(inl t)) with terminal t. Under nc_proj, both map to terminal t, so nc_proj g s₁ = nc_proj g s₂. Use grammar_deri_self. 2. **Direct simulation rules** (n ≤ 1 case): These replace nonterminal(inl A) with out.map nc_liftSym. The output only contains nc_liftSym symbols, which are nonterminal(inl _) or nonterminal(inr(inl _)), so s₂ is clean. Under nc_proj, this changes [nonterminal A] to out (since nc_proj_sym ∘ nc_liftSym = id by nc_proj_liftSym). This corresponds to a grammar_transforms step with the original rule. 3. **Phase 1 rules**: These always produce nc_aux(ri,k) = nonterminal(inr(inr(ri,k))) in the output, which is an aux nonterminal. So s₂ would NOT be clean. Contradiction with h₂. 4. **Phase 2_0 rules**: The context_left contains (range(n-1)).map(nc_aux ri), which are aux nonterminals. For this rule to apply to s₁, these aux nonterminals must appear in s₁. But s₁ is clean (no aux). Contradiction with h₁. 5. **Phase 2_rest rules**: The input_nonterminal is inr(inr(ri,j)), an aux nonterminal. For this rule to apply to s₁, this nonterminal must appear in s₁. But s₁ is clean. Contradiction with h₁. So only cases 1 and 2 are possible. In case 1, the projections are equal (grammar_deri_self). In case 2, the projection gives a grammar_transforms step. Key approach: Unfold CS_transforms to get the rule r ∈ (csg_of_noncontracting g hg).rules. The rules are nc_allRules g = nc_unliftRules g ++ simulation_rules. Analyze which rules can apply to a clean form and produce a clean form. Use nc_is_clean hypotheses to derive contradictions for phase 1/2 rules. For case 2 (direct simulation), construct the grammar_transforms witness: use the original grammar rule r' with input_L = [], input_N = A, input_R = [], output_string = out. The projected form gives u' = nc_proj g u, v' = nc_proj g v. The grammar_transforms step is: nc_proj g s₁ = u' ++ [nonterminal A] ++ v' → u' ++ out ++ v' = nc_proj g s₂. -/ private lemma nc_clean_step_grammar_derives (g : grammar T) (hg : grammar_noncontracting g) {s₁ s₂ : List (symbol T (NC_NT g))} (h : CS_transforms (csg_of_noncontracting g hg) s₁ s₂) (h₁ : nc_is_clean g s₁) (h₂ : nc_is_clean g s₂) : grammar_derives g (nc_proj g s₁) (nc_proj g s₂) := by revert h₁ h₂ h; rintro ⟨ r, u, v, hr, h₁, h₂ ⟩ h₁' h₂'; unfold csg_of_noncontracting at hr; simp_all +decide [ nc_allRules ] ; rcases hr with ( hr | ⟨ a, b, hab, hr ⟩ ) <;> simp_all +decide [ nc_unliftRules, nc_simRules ] ; · rcases hr with ⟨ a, ha, rfl ⟩ ; simp_all +decide [ nc_proj ] ; constructor; · split_ifs at hr <;> simp_all +decide [ List.mem_append, List.mem_map ]; · -- Since the length of the input is at most 1, the rule must be a direct simulation rule. have h_direct : b.input_L = [] ∧ b.input_R = [] := by exact ⟨ List.eq_nil_of_length_eq_zero ( by linarith ), List.eq_nil_of_length_eq_zero ( by linarith ) ⟩; convert grammar_deri_of_tran _ using 1; use b; unfold nc_proj; simp +decide [ h_direct ] ; refine' ⟨ _, _, _, rfl, _ ⟩; · rw [ List.mem_iff_get ] at hab; aesop; · exact congr_arg _ ( by rw [ show nc_proj_sym g ∘ nc_liftSym = id from funext fun x => nc_proj_liftSym g x ] ; simp +decide ); · rcases hr with ( ⟨ k, hk, rfl ⟩ | rfl | ⟨ k, hk, rfl ⟩ ) <;> simp_all +decide [ nc_is_clean ] ; · specialize h₂' ( nc_aux a k ) ; simp_all +decide [ nc_aux ] ; · contrapose! h₁'; exact ⟨ _, Or.inr <| Or.inl ⟨ 0, by linarith, rfl ⟩, a, 0, rfl ⟩; · exact False.elim ( h₁' _ ( Or.inr <| Or.inr <| Or.inl rfl ) _ _ rfl ) /-- Inductive step-count version of CS_derives. -/ private inductive CS_derives_n (g : CS_grammar T) : ℕ → List (symbol T g.nt) → List (symbol T g.nt) → Prop | zero : ∀ s, CS_derives_n g 0 s s | step : ∀ {n s₁ s₂ s₃}, CS_transforms g s₁ s₂ → CS_derives_n g n s₂ s₃ → CS_derives_n g (n+1) s₁ s₃ /- PROVIDED SOLUTION By induction on the CS_derives_n proof. Case zero: CS_deri_self. Case step: use CS_deri_of_tran_deri to combine the single step with the inductive hypothesis. -/ private lemma CS_derives_n_to_derives {g : CS_grammar T} {n : ℕ} {s₁ s₂ : List (symbol T g.nt)} (h : CS_derives_n g n s₁ s₂) : CS_derives g s₁ s₂ := by induction h; · exact CS_deri_self; · exact? /- PROVIDED SOLUTION By induction on h (ReflTransGen). Case refl: use ⟨0, CS_derives_n.zero s₁⟩. Case tail: obtain ⟨n, hn⟩ from IH, then use ⟨n+1, CS_derives_n.step (the step) hn⟩. Wait, ReflTransGen.tail gives h₁₂ : ReflTransGen R s₁ s₂ and h₂₃ : R s₂ s₃. The IH is for h₁₂. So we get ⟨n, hn⟩ from IH. Then CS_derives_n n s₁ s₂ and CS_transforms s₂ s₃. We need CS_derives_n for s₁ to s₃. But CS_derives_n.step prepends a step, not appends. So we need to show CS_derives_n (n+1) s₁ s₃ from CS_derives_n n s₁ s₂ and CS_transforms s₂ s₃. Define a helper by induction on CS_derives_n that appends a step. -/ private lemma CS_derives_to_derives_n {g : CS_grammar T} {s₁ s₂ : List (symbol T g.nt)} (h : CS_derives g s₁ s₂) : ∃ n, CS_derives_n g n s₁ s₂ := by have h_ind : ∀ {s₁ s₂ : List (symbol T g.nt)}, CS_derives g s₁ s₂ → ∃ n, CS_derives_n g n s₁ s₂ := by intros s₁ s₂ h_trans induction' h_trans with s₁ s₂ h_trans ih; · exact ⟨ 0, CS_derives_n.zero s₁ ⟩; · rename_i h₁ h₂; obtain ⟨ n, hn ⟩ := h₂; have h_append : ∀ {n : ℕ} {s₁ s₂ s₃ : List (symbol T g.nt)}, CS_derives_n g n s₁ s₂ → CS_transforms g s₂ s₃ → ∃ m, CS_derives_n g m s₁ s₃ := by intros n s₁ s₂ s₃ hn h_trans induction' hn with n s₁ s₂ hn ih generalizing s₃; · exact ⟨ 1, CS_derives_n.step h_trans ( CS_derives_n.zero _ ) ⟩; · exact Exists.elim ( ‹∀ { s₃ : List ( symbol T g.nt ) }, CS_transforms g ih s₃ → ∃ m, CS_derives_n g m hn s₃› h_trans ) fun m hm => ⟨ m + 1, CS_derives_n.step ‹_› hm ⟩; exact h_append hn ih; exact h_ind h /- PROBLEM Tail decomposition: CS_derives_n can be split from the end. PROVIDED SOLUTION By induction on h : CS_derives_n g (n+1) s₁ s₂. The only constructor that can produce n+1 is CS_derives_n.step. So h = step h_step h_rest where h_step : CS_transforms g s₁ s_mid, h_rest : CS_derives_n g n s_mid s₂. Now induct on h_rest: if n = 0, h_rest is zero, so s_mid = s₂, and s_last = s₁ with CS_derives_n 0 s₁ s₁ and CS_transforms s₁ s₂. If n = k+1, h_rest = step h_step' h_rest'. By IH on h_rest (which has n steps, one less), get s_last' with CS_derives_n k s_mid s_last' and CS_transforms s_last' s₂. Then s_last = s_last' with CS_derives_n (k+1) s₁ s_last' (using step h_step (CS_derives_n k part)) and CS_transforms s_last' s₂. -/ private lemma CS_derives_n_tail {g : CS_grammar T} {n : ℕ} {s₁ s₂ : List (symbol T g.nt)} (h : CS_derives_n g (n+1) s₁ s₂) : ∃ s_last, CS_derives_n g n s₁ s_last ∧ CS_transforms g s_last s₂ := by induction' n with n ih generalizing s₁ s₂; · obtain ⟨ s_last, h₁, h₂ ⟩ := h; rename_i h₁ h₂; cases h₂; exact ⟨ s₁, CS_derives_n.zero s₁, h₁ ⟩; · rcases h with ⟨ s_mid, h_step, h_rest ⟩; obtain ⟨ s_last, hs_last₁, hs_last₂ ⟩ := ih ‹_›; exact ⟨ s_last, by exact?, hs_last₂ ⟩ ; /- PROBLEM Split CS_derives_n at any point. PROVIDED SOLUTION By induction on m. Base case m = 0: s₂ = s₁, use ⟨s₁, CS_derives_n.zero s₁, h⟩. Inductive case m+1: h : CS_derives_n g ((m+1)+n) s₁ s₃. This is CS_derives_n.step h_step h_rest where h_step : CS_transforms s₁ s_mid, h_rest : CS_derives_n g (m+n) s_mid s₃. By IH on h_rest: ∃ s₂, CS_derives_n g m s_mid s₂ ∧ CS_derives_n g n s₂ s₃. Prepend h_step: CS_derives_n g (m+1) s₁ s₂ and CS_derives_n g n s₂ s₃. -/ private lemma CS_derives_n_split {g : CS_grammar T} {m n : ℕ} {s₁ s₃ : List (symbol T g.nt)} (h : CS_derives_n g (m + n) s₁ s₃) : ∃ s₂, CS_derives_n g m s₁ s₂ ∧ CS_derives_n g n s₂ s₃ := by induction' m with m ih generalizing s₁ s₃; · exact ⟨ s₁, CS_derives_n.zero s₁, by simpa using h ⟩; · rw [ Nat.succ_add ] at h; rcases h with ⟨ s₂, hs₂ ⟩; obtain ⟨ s₂, hs₂ ⟩ := ih ‹_›; exact ⟨ s₂, by exact CS_derives_n.step ‹_› hs₂.1, hs₂.2 ⟩ /-- Prepend a step to CS_derives_n. -/ private lemma CS_derives_n_cons {g : CS_grammar T} {n : ℕ} {s₁ s₂ s₃ : List (symbol T g.nt)} (h₁ : CS_transforms g s₁ s₂) (h₂ : CS_derives_n g n s₂ s₃) : CS_derives_n g (n+1) s₁ s₃ := CS_derives_n.step h₁ h₂ -- TODO complete the proof that Noncontracting grammars are CS and show equivalence end NoncontractingToCS variable {T : Type}