/- Copyright (c) 2025 Harmonic. All rights reserved. Released under Apache 2.0 license as described in the file LICENSE. -/ import Mathlib import Langlib.Grammars.Unrestricted.Definition /-! # Right-Regular (Type-3) Grammar Definitions This file defines right-regular grammars as a restricted variant of the unrestricted (Type-0) grammar framework. A right-regular grammar is the most restricted class in the Chomsky hierarchy, generating exactly the regular languages. ## Grammar restriction An unrestricted grammar allows rules of the form `α A β → γ` where `α`, `β`, `γ` are arbitrary strings of terminals and nonterminals. A right-regular grammar restricts every production rule to one of three forms: - `A → a B` (a terminal followed by a nonterminal) - `A → a` (a single terminal) - `A → ε` (the empty string) where `A`, `B` are nonterminals and `a` is a terminal symbol. ## Main declarations - `RG_rule` — A production rule in a right-regular grammar. - `RG_grammar` — A right-regular grammar. - `RG_transforms` — One-step derivation in a right-regular grammar. - `RG_derives` — Reflexive-transitive closure of `RG_transforms`. - `RG_language` — The language generated by a right-regular grammar. ## References * Hopcroft, Motwani, Ullman. *Introduction to Automata Theory, Languages, and Computation*, 3rd ed. Section 3.3. -/ open Relation /-- A production rule in a right-regular grammar. - `cons A a B` represents the rule `A → a B`. - `single A a` represents the rule `A → a`. - `epsilon A` represents the rule `A → ε`. -/ inductive RG_rule (T : Type) (N : Type) where | cons : N → T → N → RG_rule T N -- A → aB | single : N → T → RG_rule T N -- A → a | epsilon : N → RG_rule T N -- A → ε deriving DecidableEq /-- A right-regular (Type-3) grammar over terminal alphabet `T`. This is the most restricted class in the Chomsky hierarchy. Every production has the form `A → aB`, `A → a`, or `A → ε`. -/ structure RG_grammar (T : Type) where /-- Type of nonterminals -/ nt : Type /-- Start symbol -/ initial : nt /-- Production rules -/ rules : List (RG_rule T nt) variable {T : Type} /-- Extract the left-hand-side nonterminal from a rule. -/ def RG_rule.lhs {N : Type} : RG_rule T N → N | .cons A _ _ => A | .single A _ => A | .epsilon A => A /-- Convert an RG rule to its output as a list of symbols. -/ def RG_rule.output {N : Type} : RG_rule T N → List (symbol T N) | .cons _ a B => [symbol.terminal a, symbol.nonterminal B] | .single _ a => [symbol.terminal a] | .epsilon _ => [] /-- One step of right-regular grammar derivation. Rewrites a nonterminal `A` in context `u ++ [A] ++ v` according to a rule. -/ def RG_transforms (g : RG_grammar T) (w₁ w₂ : List (symbol T g.nt)) : Prop := ∃ r ∈ g.rules, ∃ u v : List (symbol T g.nt), w₁ = u ++ [symbol.nonterminal r.lhs] ++ v ∧ w₂ = u ++ r.output ++ v /-- Reflexive-transitive closure of `RG_transforms`. -/ def RG_derives (g : RG_grammar T) : List (symbol T g.nt) → List (symbol T g.nt) → Prop := ReflTransGen (RG_transforms g) /-- A word (list of terminals) is generated by a right-regular grammar. -/ def RG_generates (g : RG_grammar T) (w : List T) : Prop := RG_derives g [symbol.nonterminal g.initial] (List.map symbol.terminal w) /-- The language generated by a right-regular grammar. -/ def RG_language (g : RG_grammar T) : Language T := setOf (RG_generates g) -- ============================================================================ -- Basic derivation lemmas (mirroring the unrestricted grammar toolkit) -- ============================================================================ lemma RG_deri_self (g : RG_grammar T) {w : List (symbol T g.nt)} : RG_derives g w w := ReflTransGen.refl lemma RG_deri_of_tran {g : RG_grammar T} {u v : List (symbol T g.nt)} (h : RG_transforms g u v) : RG_derives g u v := ReflTransGen.single h lemma RG_deri_of_deri_tran {g : RG_grammar T} {u v w : List (symbol T g.nt)} (huv : RG_derives g u v) (hvw : RG_transforms g v w) : RG_derives g u w := ReflTransGen.tail huv hvw lemma RG_deri_of_deri_deri {g : RG_grammar T} {u v w : List (symbol T g.nt)} (huv : RG_derives g u v) (hvw : RG_derives g v w) : RG_derives g u w := ReflTransGen.trans huv hvw