import Langlib.Classes.ContextFree.Definition import Langlib.Grammars.ContextFree.EquivMathlibCFG import Langlib.Grammars.ContextFree.Toolbox import Langlib.Classes.ContextFree.Pumping.Pumping import Langlib.Classes.ContextFree.Pumping.Utils import Langlib.Utilities.ListUtils import Mathlib set_option maxHeartbeats 400000 /-! # Ogden's Lemma for Context-Free Languages This file states and proves Ogden's lemma, a strengthened version of the pumping lemma for context-free languages. While the pumping lemma guarantees that long enough strings in a CFL can be "pumped", Ogden's lemma allows the user to *mark* certain positions of the string and guarantees that the pump pieces contain marked positions. ## Main declarations - `countMarkedIn` — counts how many positions in a range `[start, start + len)` are marked. - `Language.IsContextFree.ogdens_lemma` — Ogden's lemma stated using Mathlib's `Language.IsContextFree`. - `CF_ogdens_lemma` — Ogden's lemma stated using the project's `is_CF` predicate. ## Implementation notes The proof follows the standard Ogden argument: 1. Convert to Chomsky Normal Form (CNF). 2. Navigate along the "marked path" to find a subtree with bounded marked height. 3. Use the pigeonhole principle on branching nonterminals to find a repeat. 4. The branching property ensures marked positions in the pump parts (v, y). 5. The bounded marked height ensures the marked-count bound on vxy. The core technical lemma `ogdens_marked_path_decomp` implements the marked-path pigeonhole argument. It tracks branching nonterminals in a finite set and detects collisions to construct the Ogden decomposition. ## References * William F. Ogden, "A Helpful Result for Proving Inherent Ambiguity", *Mathematical Systems Theory* **2** (1968), 191–194. -/ open List variable {T : Type} /-! ## Counting marked positions -/ /-- Count how many natural numbers in the interval `[start, start + len)` satisfy `P`. -/ noncomputable def countMarkedIn (P : ℕ → Prop) [DecidablePred P] (start len : ℕ) : ℕ := ((Finset.range len).filter (fun i => P (start + i))).card @[simp] lemma countMarkedIn_zero (P : ℕ → Prop) [DecidablePred P] (start : ℕ) : countMarkedIn P start 0 = 0 := by simp [countMarkedIn] lemma countMarkedIn_le (P : ℕ → Prop) [DecidablePred P] (start len : ℕ) : countMarkedIn P start len ≤ len := le_trans (Finset.card_filter_le _ _) (by simp) lemma countMarkedIn_add (P : ℕ → Prop) [DecidablePred P] (start len₁ len₂ : ℕ) : countMarkedIn P start (len₁ + len₂) = countMarkedIn P start len₁ + countMarkedIn P (start + len₁) len₂ := by unfold countMarkedIn rw [Finset.card_filter, Finset.card_filter, Finset.card_filter, Finset.sum_range_add] ac_rfl lemma countMarkedIn_mono_len (P : ℕ → Prop) [DecidablePred P] (start : ℕ) {len₁ len₂ : ℕ} (h : len₁ ≤ len₂) : countMarkedIn P start len₁ ≤ countMarkedIn P start len₂ := Finset.card_mono <| Finset.filter_subset_filter _ <| Finset.range_mono h /-! ## Parse tree marked count -/ namespace ChomskyNormalFormGrammar universe uT uN variable {T : Type uT} {g : ChomskyNormalFormGrammar.{uN, uT} T} namespace parseTree /-- The number of marked positions in a parse tree's yield. -/ noncomputable def mc (P : ℕ → Prop) [DecidablePred P] (offset : ℕ) {n : g.NT} (t : parseTree n) : ℕ := countMarkedIn P offset t.yield.length lemma mc_node (P : ℕ → Prop) [DecidablePred P] (offset : ℕ) {n c₁ c₂ : g.NT} (t₁ : parseTree c₁) (t₂ : parseTree c₂) (h : (ChomskyNormalFormRule.node n c₁ c₂) ∈ g.rules) : (parseTree.node t₁ t₂ h).mc P offset = t₁.mc P offset + t₂.mc P (offset + t₁.yield.length) := by simp only [mc, yield, List.length_append] exact countMarkedIn_add P offset t₁.yield.length t₂.yield.length /-! ## Marked height -/ /-- The "marked branching depth": maximum number of branching points (where both children have marked descendants) on any root-to-leaf path. -/ noncomputable def markedHeight (P : ℕ → Prop) [DecidablePred P] (offset : ℕ) {n : g.NT} : parseTree n → ℕ | leaf _ _ => 0 | @node _ _ _ c₁ c₂ t₁ t₂ _ => let mc₁ := t₁.mc P offset let mc₂ := t₂.mc P (offset + t₁.yield.length) if mc₁ > 0 ∧ mc₂ > 0 then max (t₁.markedHeight P offset) (t₂.markedHeight P (offset + t₁.yield.length)) + 1 else if mc₁ > 0 then t₁.markedHeight P offset else t₂.markedHeight P (offset + t₁.yield.length) /-! ## Key bounds -/ /-- mc ≤ 2^markedHeight -/ lemma mc_le_pow_markedHeight (P : ℕ → Prop) [DecidablePred P] (offset : ℕ) {n : g.NT} (t : parseTree n) : t.mc P offset ≤ 2 ^ (t.markedHeight P offset) := by induction t generalizing offset with | leaf t₁ t₂ => simp [mc, markedHeight] exact countMarkedIn_le P offset (leaf t₁ t₂).yield.length | node t₁ t₂ hnc ih₁ ih₂ => rw [mc_node] show t₁.mc P offset + t₂.mc P (offset + t₁.yield.length) ≤ 2 ^ (markedHeight P offset (t₁.node t₂ hnc)) rw [show markedHeight P offset (t₁.node t₂ hnc) = if t₁.mc P offset > 0 ∧ t₂.mc P (offset + t₁.yield.length) > 0 then max (t₁.markedHeight P offset) (t₂.markedHeight P (offset + t₁.yield.length)) + 1 else if t₁.mc P offset > 0 then t₁.markedHeight P offset else t₂.markedHeight P (offset + t₁.yield.length) from rfl] split_ifs <;> simp_all [pow_succ'] calc t₁.mc P offset + t₂.mc P (offset + t₁.yield.length) ≤ 2 ^ t₁.markedHeight P offset + 2 ^ t₂.markedHeight P (offset + t₁.yield.length) := Nat.add_le_add (ih₁ _) (ih₂ _) _ ≤ 2 ^ max (t₁.markedHeight P offset) (t₂.markedHeight P (offset + t₁.yield.length)) + 2 ^ max (t₁.markedHeight P offset) (t₂.markedHeight P (offset + t₁.yield.length)) := Nat.add_le_add (Nat.pow_le_pow_right (by omega) (le_max_left _ _)) (Nat.pow_le_pow_right (by omega) (le_max_right _ _)) _ = _ := by ring /-- 2^h ≤ mc → h ≤ markedHeight -/ lemma mc_implies_markedHeight (P : ℕ → Prop) [DecidablePred P] (offset : ℕ) {n : g.NT} (t : parseTree n) (h : ℕ) (hmc : 2 ^ h ≤ t.mc P offset) : h ≤ t.markedHeight P offset := by have h_mc_le := mc_le_pow_markedHeight P offset t exact le_of_not_gt fun hlt => by linarith [pow_lt_pow_right₀ (by decide : 1 < 2) hlt] /-- markedHeight ≤ height -/ lemma markedHeight_le_height (P : ℕ → Prop) [DecidablePred P] (offset : ℕ) {n : g.NT} (t : parseTree n) : t.markedHeight P offset ≤ t.height := by induction t generalizing offset with | leaf => exact Nat.zero_le _ | node t₁ t₂ _ ih₁ ih₂ => unfold markedHeight height grind +ring variable [DecidableEq g.NT] [DecidableEq (Σ _n : g.NT, parseTree _n)] /-! ## Derivation helper lemmas -/ /-- From a derivation of the left child, build a derivation of the node. -/ lemma node_derives_left {n c₁ c₂ : g.NT} {t₂ : parseTree c₂} (hnc : (ChomskyNormalFormRule.node n c₁ c₂) ∈ g.rules) {u : List (Symbol T g.NT)} (h : g.Derives [Symbol.nonterminal c₁] u) : g.Derives [Symbol.nonterminal n] (u ++ t₂.yield.map Symbol.terminal) := by have h1 : g.Derives [Symbol.nonterminal n] ([Symbol.nonterminal c₁] ++ [Symbol.nonterminal c₂]) := .single ⟨_, hnc, ChomskyNormalFormRule.Rewrites.input_output⟩ have h2 : g.Derives ([Symbol.nonterminal c₁] ++ [Symbol.nonterminal c₂]) (u ++ [Symbol.nonterminal c₂]) := h.append_right _ have h3 : g.Derives (u ++ [Symbol.nonterminal c₂]) (u ++ t₂.yield.map Symbol.terminal) := t₂.yield_derives.append_left _ exact h1.trans (h2.trans h3) /-- From a derivation of the right child, build a derivation of the node. -/ lemma node_derives_right {n c₁ c₂ : g.NT} {t₁ : parseTree c₁} (hnc : (ChomskyNormalFormRule.node n c₁ c₂) ∈ g.rules) {u : List (Symbol T g.NT)} (h : g.Derives [Symbol.nonterminal c₂] u) : g.Derives [Symbol.nonterminal n] (t₁.yield.map Symbol.terminal ++ u) := by have h1 : g.Derives [Symbol.nonterminal n] ([Symbol.nonterminal c₁] ++ [Symbol.nonterminal c₂]) := .single ⟨_, hnc, ChomskyNormalFormRule.Rewrites.input_output⟩ have h2 : g.Derives ([Symbol.nonterminal c₁] ++ [Symbol.nonterminal c₂]) (t₁.yield.map Symbol.terminal ++ [Symbol.nonterminal c₂]) := t₁.yield_derives.append_right _ have h3 : g.Derives (t₁.yield.map Symbol.terminal ++ [Symbol.nonterminal c₂]) (t₁.yield.map Symbol.terminal ++ u) := h.append_left _ exact h1.trans (h2.trans h3) /-! ## Helper lemmas for extending Ogden results through nodes -/ /- PROBLEM Extend a Left Ogden result from the left child through a node. PROVIDED SOLUTION Take u' = u, v' = v, x' = x, y' = y, z' = z ++ t₂.yield. - yield: (node t₁ t₂ hnc).yield = t₁.yield ++ t₂.yield = (u ++ v ++ x ++ y ++ z) ++ t₂.yield = u ++ v ++ x ++ y ++ (z ++ t₂.yield). Done by List.append_assoc. - marked count: same offsets since u' = u, v' = v, etc. Direct from hmark. - bound: same offsets. Direct from hbound. - pump: For each i, hpump i gives g.Derives [c₁] ((u ++ v^+^i ++ x ++ y^+^i ++ z).map terminal). Apply node_derives_left hnc to get g.Derives [n] ((u ++ v^+^i ++ x ++ y^+^i ++ z).map terminal ++ t₂.yield.map terminal). Then rewrite: (u ++ v^+^i ++ x ++ y^+^i ++ z).map terminal ++ t₂.yield.map terminal = (u ++ v^+^i ++ x ++ y^+^i ++ (z ++ t₂.yield)).map terminal. -/ lemma ogden_extend_left_via_left {n c₁ c₂ : g.NT} {t₁ : parseTree c₁} {t₂ : parseTree c₂} (hnc : (ChomskyNormalFormRule.node n c₁ c₂) ∈ g.rules) (P : ℕ → Prop) [DecidablePred P] (offset : ℕ) {u v x y z : List T} (hyield : t₁.yield = u ++ v ++ x ++ y ++ z) (hmark : 0 < countMarkedIn P (offset + u.length) v.length + countMarkedIn P (offset + u.length + v.length + x.length) y.length) (hbound : countMarkedIn P (offset + u.length) (v.length + x.length + y.length) ≤ 2 ^ g.generators.card) (hpump : ∀ i : ℕ, g.Derives [Symbol.nonterminal c₁] ((u ++ v ^+^ i ++ x ++ y ^+^ i ++ z).map Symbol.terminal)) : ∃ u' v' x' y' z' : List T, (parseTree.node t₁ t₂ hnc).yield = u' ++ v' ++ x' ++ y' ++ z' ∧ 0 < countMarkedIn P (offset + u'.length) v'.length + countMarkedIn P (offset + u'.length + v'.length + x'.length) y'.length ∧ countMarkedIn P (offset + u'.length) (v'.length + x'.length + y'.length) ≤ 2 ^ g.generators.card ∧ ∀ i : ℕ, g.Derives [Symbol.nonterminal n] ((u' ++ v' ^+^ i ++ x' ++ y' ^+^ i ++ z').map Symbol.terminal) := by refine' ⟨ u, v, x, y, z ++ t₂.yield, _, _, _, _ ⟩ <;> simp_all +decide [ List.append_assoc ]; · unfold parseTree.yield; aesop; · intro i specialize hpump i generalize_proofs at *; ( convert ChomskyNormalFormGrammar.parseTree.node_derives_left hnc hpump using 1 ; simp +decide [ List.append_assoc ]; convert rfl using 1) /- PROBLEM Extend a Left Ogden result from the right child through a node. PROVIDED SOLUTION Take u' = t₁.yield ++ u, v' = v, x' = x, y' = y, z' = z. - yield: (node t₁ t₂ hnc).yield = t₁.yield ++ t₂.yield = t₁.yield ++ (u ++ v ++ x ++ y ++ z) = (t₁.yield ++ u) ++ v ++ x ++ y ++ z. - marked count: offset + u'.length = offset + t₁.yield.length + u.length. Same as hmark. - bound: same. - pump: For each i, hpump i gives g.Derives [c₂] ((u ++ v^+^i ++ x ++ y^+^i ++ z).map terminal). Apply node_derives_right hnc to get g.Derives [n] (t₁.yield.map terminal ++ (u ++ v^+^i ++ x ++ y^+^i ++ z).map terminal). Then rewrite: = ((t₁.yield ++ u) ++ v^+^i ++ x ++ y^+^i ++ z).map terminal. -/ lemma ogden_extend_left_via_right {n c₁ c₂ : g.NT} {t₁ : parseTree c₁} {t₂ : parseTree c₂} (hnc : (ChomskyNormalFormRule.node n c₁ c₂) ∈ g.rules) (P : ℕ → Prop) [DecidablePred P] (offset : ℕ) {u v x y z : List T} (hyield : t₂.yield = u ++ v ++ x ++ y ++ z) (hmark : 0 < countMarkedIn P (offset + t₁.yield.length + u.length) v.length + countMarkedIn P (offset + t₁.yield.length + u.length + v.length + x.length) y.length) (hbound : countMarkedIn P (offset + t₁.yield.length + u.length) (v.length + x.length + y.length) ≤ 2 ^ g.generators.card) (hpump : ∀ i : ℕ, g.Derives [Symbol.nonterminal c₂] ((u ++ v ^+^ i ++ x ++ y ^+^ i ++ z).map Symbol.terminal)) : ∃ u' v' x' y' z' : List T, (parseTree.node t₁ t₂ hnc).yield = u' ++ v' ++ x' ++ y' ++ z' ∧ 0 < countMarkedIn P (offset + u'.length) v'.length + countMarkedIn P (offset + u'.length + v'.length + x'.length) y'.length ∧ countMarkedIn P (offset + u'.length) (v'.length + x'.length + y'.length) ≤ 2 ^ g.generators.card ∧ ∀ i : ℕ, g.Derives [Symbol.nonterminal n] ((u' ++ v' ^+^ i ++ x' ++ y' ^+^ i ++ z').map Symbol.terminal) := by refine' ⟨ t₁.yield ++ u, v, x, y, z, _, _, _, _ ⟩ <;> simp_all +decide [ List.length_append, nTimes ]; · exact hyield ▸ rfl; · simpa only [ add_assoc ] using hmark; · simpa only [ add_assoc ] using hbound; · intro i; convert node_derives_right hnc ( hpump i ) using 1 /- PROBLEM Extend a Right Ogden result from the left child through a node. PROVIDED SOLUTION Take n'' = n', p'' = p', u₂ = u₁, z₂ = z₁ ++ t₂.yield. - yield: (node t₁ t₂ hnc).yield = t₁.yield ++ t₂.yield = (u₁ ++ p'.yield ++ z₁) ++ t₂.yield = u₁ ++ p'.yield ++ (z₁ ++ t₂.yield). - mc: p''.mc P (offset + u₂.length) = p'.mc P (offset + u₁.length) > 0. Direct. - derives: Apply node_derives_left hnc to hderiv: g.Derives [n] ((u₁.map terminal ++ [nonterminal n'] ++ z₁.map terminal) ++ t₂.yield.map terminal). Rewrite = u₁.map terminal ++ [nonterminal n'] ++ (z₁ ++ t₂.yield).map terminal. -/ lemma ogden_extend_right_via_left {n c₁ c₂ : g.NT} {t₁ : parseTree c₁} {t₂ : parseTree c₂} (hnc : (ChomskyNormalFormRule.node n c₁ c₂) ∈ g.rules) (P : ℕ → Prop) [DecidablePred P] (offset : ℕ) (s : Finset g.NT) {n' : g.NT} (hn's : n' ∈ s) {p' : parseTree n'} {u₁ z₁ : List T} (hyield : t₁.yield = u₁ ++ p'.yield ++ z₁) (hmc' : 0 < p'.mc P (offset + u₁.length)) (hderiv : g.Derives [Symbol.nonterminal c₁] (u₁.map Symbol.terminal ++ [Symbol.nonterminal n'] ++ z₁.map Symbol.terminal)) : ∃ n'' ∈ s, ∃ (p'' : parseTree n'') (u₂ z₂ : List T), (parseTree.node t₁ t₂ hnc).yield = u₂ ++ p''.yield ++ z₂ ∧ 0 < p''.mc P (offset + u₂.length) ∧ g.Derives [Symbol.nonterminal n] (u₂.map Symbol.terminal ++ [Symbol.nonterminal n''] ++ z₂.map Symbol.terminal) := by use n', hn's, p', u₁, z₁ ++ t₂.yield.map (fun x => x); refine' ⟨ _, hmc', _ ⟩; · simp +decide [ hyield, parseTree.yield ]; · convert node_derives_left hnc hderiv using 1 ; simp +decide [ *, List.map_append ]; congr! 1 /- PROBLEM Extend a Right Ogden result from the right child through a node. PROVIDED SOLUTION Take n'' = n', p'' = p', u₂ = t₁.yield ++ u₁, z₂ = z₁. - yield: (node t₁ t₂ hnc).yield = t₁.yield ++ t₂.yield = t₁.yield ++ (u₁ ++ p'.yield ++ z₁) = (t₁.yield ++ u₁) ++ p'.yield ++ z₁. - mc: p''.mc P (offset + u₂.length) = p'.mc P (offset + t₁.yield.length + u₁.length) > 0. - derives: Apply node_derives_right hnc to hderiv: g.Derives [n] (t₁.yield.map terminal ++ (u₁.map terminal ++ [nonterminal n'] ++ z₁.map terminal)). Rewrite = (t₁.yield ++ u₁).map terminal ++ [nonterminal n'] ++ z₁.map terminal. -/ lemma ogden_extend_right_via_right {n c₁ c₂ : g.NT} {t₁ : parseTree c₁} {t₂ : parseTree c₂} (hnc : (ChomskyNormalFormRule.node n c₁ c₂) ∈ g.rules) (P : ℕ → Prop) [DecidablePred P] (offset : ℕ) (s : Finset g.NT) {n' : g.NT} (hn's : n' ∈ s) {p' : parseTree n'} {u₁ z₁ : List T} (hyield : t₂.yield = u₁ ++ p'.yield ++ z₁) (hmc' : 0 < p'.mc P (offset + t₁.yield.length + u₁.length)) (hderiv : g.Derives [Symbol.nonterminal c₂] (u₁.map Symbol.terminal ++ [Symbol.nonterminal n'] ++ z₁.map Symbol.terminal)) : ∃ n'' ∈ s, ∃ (p'' : parseTree n'') (u₂ z₂ : List T), (parseTree.node t₁ t₂ hnc).yield = u₂ ++ p''.yield ++ z₂ ∧ 0 < p''.mc P (offset + u₂.length) ∧ g.Derives [Symbol.nonterminal n] (u₂.map Symbol.terminal ++ [Symbol.nonterminal n''] ++ z₂.map Symbol.terminal) := by use n'; refine' ⟨ hn's, p', t₁.yield ++ u₁, z₁, _, _, _ ⟩; · simp +decide [ hyield, parseTree.yield ]; · simpa [ add_assoc, List.length_append ] using hmc'; · convert node_derives_right hnc hderiv using 1 ; simp +decide [ *, List.map_append ]; congr! 1 /- PROBLEM Construct an Ogden Left result from a collision with n in the left child, when both children have marked positions (branching node). PROVIDED SOLUTION Take u = [], v = u₁, x = p'.yield, y = z₁ ++ t₂.yield, z = []. Yield: (node t₁ t₂ hnc).yield = t₁.yield ++ t₂.yield = (u₁ ++ p'.yield ++ z₁) ++ t₂.yield = [] ++ u₁ ++ p'.yield ++ (z₁ ++ t₂.yield) ++ []. Marked count: We need 0 < countMarkedIn P (offset + 0) u₁.length + countMarkedIn P (offset + u₁.length + p'.yield.length) (z₁ ++ t₂.yield).length. The second term counts marked positions in z₁ ++ t₂.yield at the correct offset. Note: offset + u₁.length + p'.yield.length = offset + (u₁ ++ p'.yield).length, and (z₁ ++ t₂.yield) starts at offset + u₁.length + p'.yield.length. By countMarkedIn_add, countMarkedIn on z₁ ++ t₂.yield = countMarkedIn on z₁ + countMarkedIn on t₂.yield. Since t₂.mc P (offset + t₁.yield.length) > 0 (by hmc₂), and t₁.yield = u₁ ++ p'.yield ++ z₁, we have t₁.yield.length = u₁.length + p'.yield.length + z₁.length. So the t₂ marked count starts at offset + u₁.length + p'.yield.length + z₁.length. This is exactly countMarkedIn P (offset + u₁.length + p'.yield.length + z₁.length) t₂.yield.length > 0. So the total on y is ≥ countMarkedIn on t₂.yield > 0. Bound: countMarkedIn P offset (u₁.length + p'.yield.length + (z₁ ++ t₂.yield).length) = countMarkedIn P offset (t₁.yield.length + t₂.yield.length) = mc(node t₁ t₂ hnc) ≤ 2^generators.card. Pump: From hderiv₁, g.Derives [c₁] (u₁.map terminal ++ [nonterminal n] ++ z₁.map terminal). By node_derives_left hnc: g.Derives [n] (u₁.map terminal ++ [nonterminal n] ++ z₁.map terminal ++ t₂.yield.map terminal). Rewrite: = u₁.map terminal ++ [nonterminal n] ++ (z₁ ++ t₂.yield).map terminal. So g.Derives [nonterminal n] (u₁.map terminal ++ [nonterminal n] ++ (z₁ ++ t₂.yield).map terminal). By pumping_string, for each i: g.Derives [n] (u₁^+^i.map terminal ++ [nonterminal n] ++ (z₁ ++ t₂.yield)^+^i.map terminal). Then compose with p'.yield_derives (g.Derives [n] (p'.yield.map terminal)): g.Derives [n] (u₁^+^i.map terminal ++ p'.yield.map terminal ++ (z₁ ++ t₂.yield)^+^i.map terminal). Rewrite: = ([] ++ u₁^+^i ++ p'.yield ++ (z₁ ++ t₂.yield)^+^i ++ []).map terminal. Use the Derives.append_left and Derives.append_right to compose. Actually, more precisely: pumping_string gives Derives on lists of Symbol (mixed terminal/nonterminal). From pumping_string we get: g.Derives [nonterminal n] (u₁.map terminal ^+^ i ++ [nonterminal n] ++ (z₁ ++ t₂.yield).map terminal ^+^ i) Then use Derives.trans with (Derives.append_left p'.yield_derives (u₁.map terminal ^+^ i)).append_right ((z₁ ++ t₂.yield).map terminal ^+^ i). And use nTimes_map to rewrite u₁.map terminal ^+^ i = (u₁ ^+^ i).map terminal, etc. -/ lemma ogden_pump_from_left {n c₁ c₂ : g.NT} {t₁ : parseTree c₁} {t₂ : parseTree c₂} (hnc : (ChomskyNormalFormRule.node n c₁ c₂) ∈ g.rules) (P : ℕ → Prop) [DecidablePred P] (offset : ℕ) (hmc₂ : 0 < t₂.mc P (offset + t₁.yield.length)) (hmc_bound : (parseTree.node t₁ t₂ hnc).mc P offset ≤ 2 ^ g.generators.card) {p' : parseTree n} {u₁ z₁ : List T} (hyield₁ : t₁.yield = u₁ ++ p'.yield ++ z₁) (hmc' : 0 < p'.mc P (offset + u₁.length)) (hderiv₁ : g.Derives [Symbol.nonterminal c₁] (u₁.map Symbol.terminal ++ [Symbol.nonterminal n] ++ z₁.map Symbol.terminal)) : ∃ u v x y z : List T, (parseTree.node t₁ t₂ hnc).yield = u ++ v ++ x ++ y ++ z ∧ 0 < countMarkedIn P (offset + u.length) v.length + countMarkedIn P (offset + u.length + v.length + x.length) y.length ∧ countMarkedIn P (offset + u.length) (v.length + x.length + y.length) ≤ 2 ^ g.generators.card ∧ ∀ i : ℕ, g.Derives [Symbol.nonterminal n] ((u ++ v ^+^ i ++ x ++ y ^+^ i ++ z).map Symbol.terminal) := by refine' ⟨ [ ], u₁, p'.yield, z₁ ++ t₂.yield, [ ], _, _, _, _ ⟩ <;> simp_all +decide [ Finset.card_range ]; · simp [parseTree.yield, hyield₁]; · contrapose! hmc₂; simp_all +decide [ add_assoc, countMarkedIn_add ] ; exact hmc₂.2.2; · unfold mc at hmc_bound; simp_all +decide [ countMarkedIn ] ; convert hmc_bound using 3 ; simp +decide [ *, ChomskyNormalFormGrammar.parseTree.yield ] ; ring!; · intro i have h_deriv : g.Derives [Symbol.nonterminal n] (map Symbol.terminal (u₁ ^+^ i) ++ [Symbol.nonterminal n] ++ map Symbol.terminal ((z₁ ++ t₂.yield) ^+^ i)) := by convert pumping_string _ i using 1; rotate_left; exact u₁.map Symbol.terminal exact ( z₁ ++ t₂.yield ).map Symbol.terminal generalize_proofs at *; ( convert node_derives_left hnc hderiv₁ using 1 ; simp +decide [ hyield₁ ]; congr! 1); exact congr_arg₂ _ ( congr_arg₂ _ ( by exact? ) rfl ) ( by exact? ); grind +suggestions /- PROBLEM Construct an Ogden Left result from a collision with n in the right child, when both children have marked positions (branching node). PROVIDED SOLUTION Take u = [], v = t₁.yield ++ u₁, x = p'.yield, y = z₁, z = []. Yield: (node t₁ t₂ hnc).yield = t₁.yield ++ t₂.yield = t₁.yield ++ (u₁ ++ p'.yield ++ z₁) = [] ++ (t₁.yield ++ u₁) ++ p'.yield ++ z₁ ++ []. Marked count: We need 0 < countMarkedIn P offset (t₁.yield ++ u₁).length + countMarkedIn P (offset + (t₁.yield ++ u₁).length + p'.yield.length) z₁.length. The first term counts marked positions in t₁.yield ++ u₁. Since hmc₁ > 0, t₁.mc P offset > 0 means countMarkedIn P offset t₁.yield.length > 0. Since (t₁.yield ++ u₁).length ≥ t₁.yield.length, by countMarkedIn_mono_len, countMarkedIn P offset (t₁.yield ++ u₁).length ≥ countMarkedIn P offset t₁.yield.length > 0. Bound: countMarkedIn P offset ((t₁.yield ++ u₁).length + p'.yield.length + z₁.length) = countMarkedIn P offset (t₁.yield.length + t₂.yield.length) = mc(node t₁ t₂ hnc) ≤ 2^generators.card. Pump: From hderiv₂, g.Derives [c₂] (u₁.map terminal ++ [nonterminal n] ++ z₁.map terminal). By node_derives_right hnc: g.Derives [n] (t₁.yield.map terminal ++ (u₁.map terminal ++ [nonterminal n] ++ z₁.map terminal)). Rewrite: = (t₁.yield ++ u₁).map terminal ++ [nonterminal n] ++ z₁.map terminal. By pumping_string, for each i: g.Derives [n] ((t₁.yield ++ u₁).map terminal ^+^ i ++ [nonterminal n] ++ z₁.map terminal ^+^ i). Then compose with p'.yield_derives using append_left/right. Use nTimes_map to convert. -/ lemma ogden_pump_from_right {n c₁ c₂ : g.NT} {t₁ : parseTree c₁} {t₂ : parseTree c₂} (hnc : (ChomskyNormalFormRule.node n c₁ c₂) ∈ g.rules) (P : ℕ → Prop) [DecidablePred P] (offset : ℕ) (hmc₁ : 0 < t₁.mc P offset) (hmc_bound : (parseTree.node t₁ t₂ hnc).mc P offset ≤ 2 ^ g.generators.card) {p' : parseTree n} {u₁ z₁ : List T} (hyield₂ : t₂.yield = u₁ ++ p'.yield ++ z₁) (hmc' : 0 < p'.mc P (offset + t₁.yield.length + u₁.length)) (hderiv₂ : g.Derives [Symbol.nonterminal c₂] (u₁.map Symbol.terminal ++ [Symbol.nonterminal n] ++ z₁.map Symbol.terminal)) : ∃ u v x y z : List T, (parseTree.node t₁ t₂ hnc).yield = u ++ v ++ x ++ y ++ z ∧ 0 < countMarkedIn P (offset + u.length) v.length + countMarkedIn P (offset + u.length + v.length + x.length) y.length ∧ countMarkedIn P (offset + u.length) (v.length + x.length + y.length) ≤ 2 ^ g.generators.card ∧ ∀ i : ℕ, g.Derives [Symbol.nonterminal n] ((u ++ v ^+^ i ++ x ++ y ^+^ i ++ z).map Symbol.terminal) := by by_contra h_contra; refine' h_contra ⟨ [ ], t₁.yield ++ u₁, p'.yield, z₁, [ ], _, _, _, _ ⟩ <;> simp +decide [ * ]; · simp +decide [ hyield₂, parseTree.yield ]; · refine' Or.inl ( lt_of_lt_of_le hmc₁ _ ); apply_rules [ countMarkedIn_mono_len ] ; simp +decide [ * ] ; · convert hmc_bound using 1; unfold mc; simp +decide [ *, add_assoc ] ; rw [ show ( t₁.node t₂ hnc ).yield = t₁.yield ++ t₂.yield from rfl, hyield₂ ] ; simp +decide [ add_assoc ] ; · intro i have h_pump : g.Derives [Symbol.nonterminal n] ((t₁.yield ++ u₁).map Symbol.terminal ^+^ i ++ [Symbol.nonterminal n] ++ (z₁.map Symbol.terminal) ^+^ i) := by have h_deriv : g.Derives [Symbol.nonterminal n] ((t₁.yield ++ u₁).map Symbol.terminal ++ [Symbol.nonterminal n] ++ (z₁.map Symbol.terminal)) := by convert ChomskyNormalFormGrammar.parseTree.node_derives_right hnc _ using 1 ; aesop; simpa using hderiv₂; have h_pump : ∀ i : ℕ, g.Derives [Symbol.nonterminal n] ((t₁.yield ++ u₁).map Symbol.terminal ^+^ i ++ [Symbol.nonterminal n] ++ (z₁.map Symbol.terminal) ^+^ i) := by intro i have := h_deriv exact? generalize_proofs at *; exact h_pump i generalize_proofs at *; grind +suggestions /-! ## Core Ogden marked-path pigeonhole -/ lemma ogdens_marked_path_decomp {n : g.NT} (p : parseTree n) (P : ℕ → Prop) [DecidablePred P] (offset : ℕ) (s : Finset g.NT) (hs_sub : s ⊆ g.generators) (hp : g.generators.card ≤ p.markedHeight P offset + s.card) (hmc : 0 < p.mc P offset) (hmc_bound : p.mc P offset ≤ 2 ^ g.generators.card) : (∃ u v x y z : List T, p.yield = u ++ v ++ x ++ y ++ z ∧ 0 < countMarkedIn P (offset + u.length) v.length + countMarkedIn P (offset + u.length + v.length + x.length) y.length ∧ countMarkedIn P (offset + u.length) (v.length + x.length + y.length) ≤ 2 ^ g.generators.card ∧ ∀ i : ℕ, g.Derives [Symbol.nonterminal n] ((u ++ v ^+^ i ++ x ++ y ^+^ i ++ z).map Symbol.terminal)) ∨ (∃ n' ∈ s, ∃ (p' : parseTree n') (u₁ z₁ : List T), p.yield = u₁ ++ p'.yield ++ z₁ ∧ 0 < p'.mc P (offset + u₁.length) ∧ g.Derives [Symbol.nonterminal n] (u₁.map Symbol.terminal ++ [Symbol.nonterminal n'] ++ z₁.map Symbol.terminal)) := by induction p generalizing offset s with | @leaf n t hnt => simp only [markedHeight] at hp have hs_eq : s = g.generators := (Finset.subset_iff_eq_of_card_le (by omega)).mp hs_sub have hn_gen : n ∈ g.generators := input_mem_generators hnt have hn_s : n ∈ s := hs_eq ▸ hn_gen right refine ⟨n, hn_s, leaf t hnt, [], [], ?_, ?_, ?_⟩ · simp [yield] · simpa using hmc · simp; exact Derives.refl _ | @node n c₁ c₂ t₁ t₂ hnc ih₁ ih₂ => have hmc_eq := mc_node P offset t₁ t₂ hnc by_cases h₁ : (0 : ℕ) < t₁.mc P offset <;> by_cases h₂ : (0 : ℕ) < t₂.mc P (offset + t₁.yield.length) · -- Case A: both children have mc > 0 have hmh : markedHeight P offset (node t₁ t₂ hnc) = max (t₁.markedHeight P offset) (t₂.markedHeight P (offset + t₁.yield.length)) + 1 := by show (let mc₁ := _; let mc₂ := _; if mc₁ > 0 ∧ mc₂ > 0 then _ else _) = _ simp [h₁, h₂] rw [hmh] at hp by_cases hns : n ∈ s · right refine ⟨n, hns, node t₁ t₂ hnc, [], [], ?_, ?_, ?_⟩ · simp [yield] · simpa using hmc · simp; exact Derives.refl _ · have hn_gen : n ∈ g.generators := input_mem_generators hnc have hs'_sub : insert n s ⊆ g.generators := Finset.insert_subset hn_gen hs_sub have hs'_card : (insert n s).card = s.card + 1 := by rw [Finset.card_insert_eq_ite, if_neg hns] by_cases hmh_le : t₂.markedHeight P (offset + t₁.yield.length) ≤ t₁.markedHeight P offset · -- recurse left have hp₁ : g.generators.card ≤ t₁.markedHeight P offset + (insert n s).card := by simp only [Nat.max_eq_left hmh_le] at hp; omega have hmc₁_bnd : t₁.mc P offset ≤ (2 : ℕ) ^ g.generators.card := by omega rcases ih₁ offset (insert n s) hs'_sub hp₁ h₁ hmc₁_bnd with h_l | h_r · obtain ⟨u, v, x, y, z, hy, hm, hb, hd⟩ := h_l exact Or.inl (ogden_extend_left_via_left hnc P offset hy hm hb hd) · obtain ⟨n', hn', p', u₁', z₁', hy, hmc'', hd⟩ := h_r have h_or : n' = n ∨ n' ∈ s := Finset.mem_insert.mp hn' rcases h_or with h_eq | hn's · subst h_eq exact Or.inl (ogden_pump_from_left hnc P offset h₂ hmc_bound hy hmc'' hd) · exact Or.inr (ogden_extend_right_via_left hnc P offset s hn's hy hmc'' hd) · -- recurse right push_neg at hmh_le have hp₂ : g.generators.card ≤ t₂.markedHeight P (offset + t₁.yield.length) + (insert n s).card := by have := Nat.le_max_right (t₁.markedHeight P offset) (t₂.markedHeight P (offset + t₁.yield.length)) omega have hmc₂_bnd : t₂.mc P (offset + t₁.yield.length) ≤ (2 : ℕ) ^ g.generators.card := by omega rcases ih₂ (offset + t₁.yield.length) (insert n s) hs'_sub hp₂ h₂ hmc₂_bnd with h_l | h_r · obtain ⟨u, v, x, y, z, hy, hm, hb, hd⟩ := h_l exact Or.inl (ogden_extend_left_via_right hnc P offset hy hm hb hd) · obtain ⟨n', hn', p', u₁', z₁', hy, hmc'', hd⟩ := h_r have h_or : n' = n ∨ n' ∈ s := Finset.mem_insert.mp hn' rcases h_or with h_eq | hn's · subst h_eq exact Or.inl (ogden_pump_from_right hnc P offset h₁ hmc_bound hy hmc'' hd) · exact Or.inr (ogden_extend_right_via_right hnc P offset s hn's hy hmc'' hd) · -- Case B: only left child has mc > 0 simp only [not_lt, Nat.le_zero] at h₂ have hmh : markedHeight P offset (node t₁ t₂ hnc) = t₁.markedHeight P offset := by show (let mc₁ := _; let mc₂ := _; if mc₁ > 0 ∧ mc₂ > 0 then _ else _) = _ simp [h₁, h₂] rw [hmh] at hp have hmc₁_bnd : t₁.mc P offset ≤ (2 : ℕ) ^ g.generators.card := by omega rcases ih₁ offset s hs_sub hp h₁ hmc₁_bnd with h_l | h_r · obtain ⟨u, v, x, y, z, hy, hm, hb, hd⟩ := h_l exact Or.inl (ogden_extend_left_via_left hnc P offset hy hm hb hd) · obtain ⟨n', hn', p', u₁', z₁', hy, hmc'', hd⟩ := h_r exact Or.inr (ogden_extend_right_via_left hnc P offset s hn' hy hmc'' hd) · -- Case C: only right child has mc > 0 simp only [not_lt, Nat.le_zero] at h₁ have hmh : markedHeight P offset (node t₁ t₂ hnc) = t₂.markedHeight P (offset + t₁.yield.length) := by show (let mc₁ := _; let mc₂ := _; if mc₁ > 0 ∧ mc₂ > 0 then _ else _) = _ simp [h₁, h₂] rw [hmh] at hp have hmc₂_bnd : t₂.mc P (offset + t₁.yield.length) ≤ (2 : ℕ) ^ g.generators.card := by omega rcases ih₂ (offset + t₁.yield.length) s hs_sub hp h₂ hmc₂_bnd with h_l | h_r · obtain ⟨u, v, x, y, z, hy, hm, hb, hd⟩ := h_l exact Or.inl (ogden_extend_left_via_right hnc P offset hy hm hb hd) · obtain ⟨n', hn', p', u₁', z₁', hy, hmc'', hd⟩ := h_r exact Or.inr (ogden_extend_right_via_right hnc P offset s hn' hy hmc'' hd) · -- Case D: neither child has mc > 0, contradiction simp only [not_lt, Nat.le_zero] at h₁ h₂ omega /-! ## Navigation to bounded-markedHeight subtree -/ /- PROBLEM Navigate along the marked path to find a subtree with `markedHeight = k`. PROVIDED SOLUTION By induction on the parse tree `p`. Base case (leaf): markedHeight of a leaf is 0, so k = 0 and the tree itself works (with u₀ = z₀ = []). Inductive case (node t₁ t₂ hnc): We have p = node t₁ t₂ hnc. Since hmc > 0, at least one child has mc > 0. Look at markedHeight of the node. It's defined by cases: - If both children have mc > 0, markedHeight = max(mh₁, mh₂) + 1 - If only left has mc > 0, markedHeight = mh₁ - If only right has mc > 0, markedHeight = mh₂ Case 1: Both children have mc > 0. Then markedHeight(p) = max(mh₁, mh₂) + 1. If k = 0: Take the tree p itself with u₀ = z₀ = []. But we need mh = k = 0, which would need max(mh₁,mh₂)+1 = 0, impossible. Actually wait, if k ≤ markedHeight(p), and k = 0, we need to find a subtree with mh = 0. We can recurse deeper. Actually, let me think more carefully. If k ≤ max(mh₁, mh₂) + 1: - If k = max(mh₁, mh₂) + 1 = markedHeight(p), take the whole tree p itself. - If k ≤ max(mh₁, mh₂), WLOG mh₁ ≥ mh₂ (so max = mh₁). Then k ≤ mh₁. Since mc₁ > 0, apply IH to t₁. Get some subtree q of t₁ with markedHeight = k. Then yield(p) = yield(t₁) ++ yield(t₂), and yield(t₁) = u₀ ++ yield(q) ++ z₀. So yield(p) = u₀ ++ yield(q) ++ (z₀ ++ yield(t₂)), and the derivation extends via node_derives_left. Case 2: Only left child has mc > 0. Then markedHeight(p) = mh₁. Apply IH to t₁. Get subtree. Extend with right child yield. Case 3: Only right child has mc > 0. Then markedHeight(p) = mh₂. Apply IH to t₂. Get subtree. Extend with left child yield. For the derivation part, use `node_derives_left` or `node_derives_right` composed with the IH derivation. Actually, let me simplify the approach. The key insight is: If k = markedHeight(p), take q = p, u₀ = z₀ = []. This trivially works. If k < markedHeight(p), then p must be a node (since leaf has markedHeight 0, and if k < 0 that's impossible). For a node: - Follow the "marked path" to the child with higher markedHeight (or the one with mc > 0). - Recurse into that child. - Extend u₀ and z₀ appropriately. This gives a straightforward structural induction. -/ lemma ogdens_restrict_mh {n : g.NT} (p : parseTree n) (P : ℕ → Prop) [DecidablePred P] (offset : ℕ) (k : ℕ) (hmh : k ≤ p.markedHeight P offset) (hmc : 0 < p.mc P offset) : ∃ (n' : g.NT) (q : parseTree n') (u₀ z₀ : List T), p.yield = u₀ ++ q.yield ++ z₀ ∧ q.markedHeight P (offset + u₀.length) = k ∧ 0 < q.mc P (offset + u₀.length) ∧ g.Derives [Symbol.nonterminal n] (u₀.map Symbol.terminal ++ [Symbol.nonterminal n'] ++ z₀.map Symbol.terminal) := by by_contra! h_contra; induction' p with _ _ _ _ ih generalizing offset k; · unfold markedHeight at *; simp_all +decide [ List.length ] ; specialize h_contra _ ( leaf ‹_› ‹_› ) [ ] [ ] ; simp_all +decide [ List.length ]; exact h_contra ( Relation.ReflTransGen.refl ); · rename_i t₁ t₂ hnc ih₁ ih₂; by_cases h₁ : 0 < mc P offset t₁ <;> by_cases h₂ : 0 < mc P (offset + t₁.yield.length) t₂ <;> simp +decide [ *, markedHeight ] at hmh hmc ⊢; · by_cases hk : k ≤ max (markedHeight P offset t₁) (markedHeight P (offset + t₁.yield.length) t₂); · by_cases hk₁ : k ≤ markedHeight P offset t₁; · specialize ih₁ offset k hk₁ h₁; refine' ih₁ fun n' q u₀ z₀ h₁ h₂ h₃ h₄ => h_contra n' q u₀ ( z₀ ++ t₂.yield ) _ _ _ _; · simp +decide [ *, parseTree.yield ]; · exact h₂; · exact h₃; · convert node_derives_left hnc h₄ using 1; any_goals exact t₂; simp +decide [ List.map_append ]; · specialize ih₂ ( offset + t₁.yield.length ) ( k ) ( by grind +qlia ) h₂ generalize_proofs at *; ( simp +zetaDelta at *; obtain ⟨ n', q, u₀, z₀, h₁, h₂, h₃, h₄ ⟩ := ih₂; specialize h_contra n' q ( t₁.yield ++ u₀ ) z₀; simp_all +decide [ parseTree.yield ] ; simp_all +decide [ add_assoc ]; exact h_contra ( by simpa [ ← List.append_assoc ] using node_derives_right hnc h₄ )); · specialize h_contra _ ( t₁.node t₂ hnc ) [ ] [ ] ; simp_all +decide [ parseTree.yield ]; exact h_contra ( by rw [ show markedHeight P offset ( t₁.node t₂ hnc ) = max ( markedHeight P offset t₁ ) ( markedHeight P ( offset + t₁.yield.length ) t₂ ) + 1 from by rw [ markedHeight ] ; aesop ] ; omega ) ( Relation.ReflTransGen.refl ); · specialize ih₁ offset k hmh h₁; refine' ih₁ fun n' q u₀ z₀ h₁ h₂ h₃ h₄ => h_contra n' q u₀ ( z₀ ++ t₂.yield ) _ _ _ _; · simp +decide [ h₁, parseTree.yield ]; · exact h₂; · exact h₃; · convert ChomskyNormalFormGrammar.parseTree.node_derives_left hnc h₄ using 1 ; simp +decide [ h₁, List.map_append ]; all_goals tauto; · specialize ih₂ ( offset + t₁.yield.length ) k hmh h₂; contrapose! ih₂; simp_all +decide [ markedHeight ] ; intro n' q u₀ z₀ h₁ h₂ h₃ h₄; specialize h_contra n' q ( t₁.yield ++ u₀ ) z₀; simp_all +decide [ parseTree.yield ] ; simp_all +decide [ add_assoc ]; exact h_contra ( by simpa [ ← List.append_assoc ] using node_derives_right hnc h₄ ); · simp_all +decide [ mc_node ] /-! ## Ogden's lemma for CNF grammars -/ /- PROBLEM Ogden's lemma for CNF grammars. The pumping constant is `2 ^ g.generators.card`. PROVIDED SOLUTION 1. Since hwg : w ∈ g.language, get a parse tree p with p.yield = w using Derives.yield (from hwg which gives g.Derives [nonterminal g.initial] (w.map terminal)). 2. Since countMarkedIn P 0 w.length ≥ 2^generators.card, we have p.mc P 0 ≥ 2^generators.card (since mc = countMarkedIn by definition, and p.yield = w). 3. From mc_implies_markedHeight, we get generators.card ≤ p.markedHeight P 0. 4. Use ogdens_restrict_mh with k = generators.card to find a subtree q with markedHeight = generators.card and mc > 0 and mc ≤ 2^generators.card (from mc_le_pow_markedHeight). 5. Apply ogdens_marked_path_decomp to q with s = ∅. Since generators.card ≤ q.markedHeight + 0 = generators.card, the precondition holds. 6. Since s = ∅, the Right case is impossible (no n' ∈ ∅). 7. So we get the Left case: decomposition of q.yield into u' v x y z' with the required properties. 8. Since p.yield = u₀ ++ q.yield ++ z₀, we have w = u₀ ++ (u' ++ v ++ x ++ y ++ z') ++ z₀ = (u₀ ++ u') ++ v ++ x ++ y ++ (z' ++ z₀). 9. Set u = u₀ ++ u', z = z' ++ z₀. The marked position properties transfer (adjusting offsets). 10. For the pumping: the derivation from q's root gives pumped words, and the outer derivation (from g.initial to u₀ ++ [q's root] ++ z₀) wraps them. 11. The pumped word is in g.language since we derive from g.initial. -/ lemma ogdens_cnf {w : List T} (hwg : w ∈ g.language) (P : ℕ → Prop) [DecidablePred P] (hw : countMarkedIn P 0 w.length ≥ 2 ^ g.generators.card) : ∃ u v x y z : List T, w = u ++ v ++ x ++ y ++ z ∧ 0 < countMarkedIn P u.length v.length + countMarkedIn P (u.length + v.length + x.length) y.length ∧ countMarkedIn P u.length (v.length + x.length + y.length) ≤ 2 ^ g.generators.card ∧ ∀ i : ℕ, u ++ v ^+^ i ++ x ++ y ^+^ i ++ z ∈ g.language := by by_cases h_empty : ∃ p : ChomskyNormalFormGrammar.parseTree g.initial, p.yield = w ∧ p.markedHeight P 0 < g.generators.card; · obtain ⟨ p, rfl, hp ⟩ := h_empty; exact absurd hw ( not_le_of_gt ( lt_of_le_of_lt ( mc_le_pow_markedHeight P 0 p ) ( pow_lt_pow_right₀ ( by decide ) hp ) ) ); · -- Apply the Ogden's lemma to the parse tree p. obtain ⟨p, hp⟩ : ∃ p : ChomskyNormalFormGrammar.parseTree g.initial, p.yield = w ∧ p.markedHeight P 0 ≥ g.generators.card := by obtain ⟨p, hp⟩ : ∃ p : ChomskyNormalFormGrammar.parseTree g.initial, p.yield = w := by -- Apply the lemma that states if there's a derivation from the initial nonterminal to the terminal symbols of w, then there exists a parse tree p with yield w. apply ChomskyNormalFormGrammar.Derives.yield; assumption; exact ⟨ p, hp, le_of_not_gt fun h => h_empty ⟨ p, hp, h ⟩ ⟩; -- Apply Ogden's lemma to find a marked path in the parse tree. obtain ⟨n', q, u₀, z₀, hq⟩ : ∃ n' : g.NT, ∃ q : parseTree n', ∃ u₀ z₀ : List T, p.yield = u₀ ++ q.yield ++ z₀ ∧ q.markedHeight P (0 + u₀.length) = g.generators.card ∧ 0 < q.mc P (0 + u₀.length) ∧ g.Derives [Symbol.nonterminal g.initial] (u₀.map Symbol.terminal ++ [Symbol.nonterminal n'] ++ z₀.map Symbol.terminal) := by apply ogdens_restrict_mh p P 0 g.generators.card hp.right (by have h_mc_pos : p.mc P 0 ≥ 2 ^ g.generators.card := by unfold mc; aesop; generalize_proofs at *; ( exact lt_of_lt_of_le ( by positivity ) h_mc_pos)); -- Apply Ogden's lemma to find a marked path in the parse tree q. obtain ⟨u', v, x, y, z', hq'⟩ : ∃ u' v x y z' : List T, q.yield = u' ++ v ++ x ++ y ++ z' ∧ 0 < countMarkedIn P (u₀.length + u'.length) v.length + countMarkedIn P (u₀.length + u'.length + v.length + x.length) y.length ∧ countMarkedIn P (u₀.length + u'.length) (v.length + x.length + y.length) ≤ 2 ^ g.generators.card ∧ ∀ i : ℕ, g.Derives [Symbol.nonterminal n'] ((u' ++ v ^+^ i ++ x ++ y ^+^ i ++ z').map Symbol.terminal) := by have := ogdens_marked_path_decomp q P (u₀.length) ∅; simp_all +decide ; apply this; exact le_trans ( mc_le_pow_markedHeight P u₀.length q ) ( by simp +decide [ hq.2.1 ] ); refine' ⟨ u₀ ++ u', v, x, y, z' ++ z₀, _, _, _, _ ⟩ <;> simp_all +decide [ List.append_assoc ]; intro i have h_deriv : g.Derives [Symbol.nonterminal g.initial] (u₀.map Symbol.terminal ++ [Symbol.nonterminal n'] ++ z₀.map Symbol.terminal) := by convert hq.2.2.2 using 1 ; simp +decide [ List.append_assoc ] have h_deriv' : g.Derives [Symbol.nonterminal n'] ((u' ++ v ^+^ i ++ x ++ y ^+^ i ++ z').map Symbol.terminal) := by grind exact (by convert h_deriv.trans _ using 1 generalize_proofs at *; ( grind +suggestions)) end parseTree end ChomskyNormalFormGrammar /-! ## Ogden's lemma for general context-free languages -/ /-- Ogden's lemma for context-free languages (Mathlib formulation). -/ theorem Language.IsContextFree.ogdens_lemma {L : Language T} (hL : L.IsContextFree) : ∃ p : ℕ, ∀ (w : List T), w ∈ L → ∀ (P : ℕ → Prop) [DecidablePred P], p ≤ countMarkedIn P 0 w.length → ∃ u v x y z : List T, w = u ++ v ++ x ++ y ++ z ∧ 0 < countMarkedIn P u.length v.length + countMarkedIn P (u.length + v.length + x.length) y.length ∧ countMarkedIn P u.length (v.length + x.length + y.length) ≤ p ∧ ∀ i : ℕ, u ++ v ^+^ i ++ x ++ y ^+^ i ++ z ∈ L := by obtain ⟨g, rfl⟩ := hL classical use 2 ^ g.toCNF.generators.card intro w hwg P _ hw2 by_cases hw : w = [] · simp [hw] at hw2 · obtain ⟨u, v, x, y, z, hw, hvy, hvxy, hL⟩ := ChomskyNormalFormGrammar.parseTree.ogdens_cnf (g.toCNF_correct ▸ ⟨hwg, hw⟩) P hw2 exact ⟨u, v, x, y, z, hw, hvy, hvxy, fun i => Set.diff_subset (g.toCNF_correct ▸ hL i)⟩ /-- Ogden's lemma for context-free languages (project formulation). -/ theorem CF_ogdens_lemma {L : Language T} (cf : is_CF L) : ∃ p : ℕ, ∀ (w : List T), w ∈ L → ∀ (P : ℕ → Prop) [DecidablePred P], p ≤ countMarkedIn P 0 w.length → ∃ u v x y z : List T, w = u ++ v ++ x ++ y ++ z ∧ 0 < countMarkedIn P u.length v.length + countMarkedIn P (u.length + v.length + x.length) y.length ∧ countMarkedIn P u.length (v.length + x.length + y.length) ≤ p ∧ ∀ i : ℕ, u ++ v ^+^ i ++ x ++ y ^+^ i ++ z ∈ L := by exact Language.IsContextFree.ogdens_lemma (is_CF_iff_isContextFree.mp cf)