/- Copyright (c) 2025 Harmonic. All rights reserved. Released under Apache 2.0 license as described in the file LICENSE. -/ import Mathlib import Langlib.Grammars.Unrestricted.Definition /-! # Left-Regular (Type-3) Grammar Definitions This file defines left-regular grammars as a restricted variant of the unrestricted (Type-0) grammar framework. A left-regular grammar is a variant of the Type-3 class in the Chomsky hierarchy, generating exactly the regular languages. ## Grammar restriction A left-regular grammar restricts every production rule to one of three forms: - `A → B a` (a nonterminal followed by a terminal) - `A → a` (a single terminal) - `A → ε` (the empty string) where `A`, `B` are nonterminals and `a` is a terminal symbol. ## Main declarations - `LG_rule` — A production rule in a left-regular grammar. - `LG_grammar` — A left-regular grammar. - `LG_transforms` — One-step derivation in a left-regular grammar. - `LG_derives` — Reflexive-transitive closure of `LG_transforms`. - `LG_language` — The language generated by a left-regular grammar. - `is_LG` — A language is left-regular if it is generated by some left-regular grammar. ## References * Hopcroft, Motwani, Ullman. *Introduction to Automata Theory, Languages, and Computation*, 3rd ed. Section 3.3. -/ open Relation /-- A production rule in a left-regular grammar. - `cons A a B` represents the rule `A → B a`. - `single A a` represents the rule `A → a`. - `epsilon A` represents the rule `A → ε`. -/ inductive LG_rule (T : Type) (N : Type) where | cons : N → T → N → LG_rule T N -- A → Ba | single : N → T → LG_rule T N -- A → a | epsilon : N → LG_rule T N -- A → ε deriving DecidableEq /-- A left-regular (Type-3) grammar over terminal alphabet `T`. This is a variant of the most restricted class in the Chomsky hierarchy. Every production has the form `A → Ba`, `A → a`, or `A → ε`. -/ structure LG_grammar (T : Type) where /-- Type of nonterminals -/ nt : Type /-- Start symbol -/ initial : nt /-- Production rules -/ rules : List (LG_rule T nt) variable {T : Type} /-- Extract the left-hand-side nonterminal from a left-regular rule. -/ def LG_rule.lhs {N : Type} : LG_rule T N → N | .cons A _ _ => A | .single A _ => A | .epsilon A => A /-- Convert an LG rule to its output as a list of symbols. - `A → Ba` outputs `[B, a]` - `A → a` outputs `[a]` - `A → ε` outputs `[]` -/ def LG_rule.output {N : Type} : LG_rule T N → List (symbol T N) | .cons _ a B => [symbol.nonterminal B, symbol.terminal a] | .single _ a => [symbol.terminal a] | .epsilon _ => [] /-- One step of left-regular grammar derivation. Rewrites a nonterminal `A` in context `u ++ [A] ++ v` according to a rule. -/ def LG_transforms (g : LG_grammar T) (w₁ w₂ : List (symbol T g.nt)) : Prop := ∃ r ∈ g.rules, ∃ u v : List (symbol T g.nt), w₁ = u ++ [symbol.nonterminal r.lhs] ++ v ∧ w₂ = u ++ r.output ++ v /-- Reflexive-transitive closure of `LG_transforms`. -/ def LG_derives (g : LG_grammar T) : List (symbol T g.nt) → List (symbol T g.nt) → Prop := ReflTransGen (LG_transforms g) /-- A word (list of terminals) is generated by a left-regular grammar. -/ def LG_generates (g : LG_grammar T) (w : List T) : Prop := LG_derives g [symbol.nonterminal g.initial] (List.map symbol.terminal w) /-- The language generated by a left-regular grammar. -/ def LG_language (g : LG_grammar T) : Language T := setOf (LG_generates g) /-- A language is left-regular if it is generated by some left-regular grammar. -/ def is_LG (L : Language T) : Prop := ∃ g : LG_grammar T, LG_language g = L /-- The class of left-regular languages. -/ def LG : Set (Language T) := setOf is_LG -- ============================================================================ -- Basic derivation lemmas (mirroring the right-regular grammar toolkit) -- ============================================================================ lemma LG_deri_self (g : LG_grammar T) {w : List (symbol T g.nt)} : LG_derives g w w := ReflTransGen.refl lemma LG_deri_of_tran {g : LG_grammar T} {u v : List (symbol T g.nt)} (h : LG_transforms g u v) : LG_derives g u v := ReflTransGen.single h lemma LG_deri_of_deri_tran {g : LG_grammar T} {u v w : List (symbol T g.nt)} (huv : LG_derives g u v) (hvw : LG_transforms g v w) : LG_derives g u w := ReflTransGen.tail huv hvw lemma LG_deri_of_deri_deri {g : LG_grammar T} {u v w : List (symbol T g.nt)} (huv : LG_derives g u v) (hvw : LG_derives g v w) : LG_derives g u w := ReflTransGen.trans huv hvw