/- SPDX-License-Identifier: MIT SPDX-FileCopyrightText: 2025 Shangtong Zhang -/ import Mathlib.Probability.ProbabilityMassFunction.Basic import Mathlib.LinearAlgebra.FiniteDimensional.Basic import RLTheory.Defs import RLTheory.MeasureTheory.MeasurableSpace.Constructions import RLTheory.Probability.MarkovChain.Defs import RLTheory.Probability.MarkovChain.Finite.Defs import RLTheory.Probability.MarkovChain.Trajectory import RLTheory.MarkovDecisionProcess.MarkovRewardProcess import RLTheory.StochasticApproximation.Lyapunov import RLTheory.StochasticApproximation.DiscreteGronwall open Real Finset Filter TopologicalSpace Filter MeasureTheory.Filtration MeasureTheory ProbabilityTheory StochasticMatrix Preorder RLTheory open scoped MeasureTheory ProbabilityTheory Topology InnerProductSpace RealInnerProductSpace namespace StochasticApproximation variable {d : ℕ} variable {x₀ : E d} variable {α : ℕ → ℝ} variable {f : E d → E d} lemma linear_growth_of_lipschitz {f : E d → E d} (hf : ∃ C, 0 ≤ C ∧ ∀ x y, ‖f x - f y‖ ≤ C * ‖x - y‖) : ∃ C, 0 ≤ C ∧ ∀ x, ‖f x‖ ≤ C * (‖x‖ + 1) := by obtain ⟨C, hCnonneg, hC⟩ := hf refine ⟨?L, ?hLnonneg, ?hL⟩ case L => exact max C ‖f 0‖ case hLnonneg => positivity case hL => intro z have := hC z 0 grw [norm_le_norm_sub_add (b := f 0)] grw [this] simp rw [mul_add] apply add_le_add have : C ≤ max C ‖f 0‖ := by apply le_max_left gcongr simp lemma linear_growth_of_lipschitz' {α : Type*} [Nonempty α] [Fintype α] {f : E d → α → E d} (hf : ∃ C, 0 ≤ C ∧ ∀ x y z, ‖f x z - f y z‖ ≤ C * ‖x - y‖) : ∃ C, 0 ≤ C ∧ ∀ x z, ‖f x z‖ ≤ C * (‖x‖ + 1) := by obtain ⟨C, hCnonneg, hC⟩ := hf let CF := Finset.univ.sup' (by simp) fun z => ‖f 0 z‖ refine ⟨?L, ?hLnonneg, ?hL⟩ case L => exact max CF C case hLnonneg => positivity case hL => intro y z grw [norm_le_norm_sub_add (b := f 0 z)] grw [hC y 0 z] simp rw [mul_add] apply add_le_add have : C ≤ max CF C := by apply le_max_right gcongr simp apply Or.inl apply Finset.le_sup' fun z => ‖f 0 z‖ simp section extraneous_errors variable {Ω : Type*} [MeasurableSpace Ω] variable {μ : Measure Ω} variable {e₁ e₂ : ℕ → Ω → E d} class Iterates (x : ℕ → Ω → E d) (x₀ : E d) (f : E d → E d) (e₁ e₂ : ℕ → Ω → E d) (α : ℕ → ℝ) where init : ∀ ω, x 0 ω = x₀ step : ∀ n ω, x (n + 1) ω = x n ω + (α n) • (f (x n ω) - x n ω) + e₁ (n + 1) ω + e₂ (n + 1) ω lemma bdd_of_iterates {x : ℕ → Ω → E d} (hx : Iterates x x₀ f e₁ e₂ α) (he₁ : ∃ C, 0 ≤ C ∧ ∀ᵐ ω ∂μ, ∀ n, ‖e₁ (n + 1) ω‖≤ C * (α n) * (‖x n ω‖ + 1)) (he₂ : ∃ C, 0 ≤ C ∧ ∀ᵐ ω ∂μ, ∀ n, ‖e₂ (n + 1) ω‖≤ C * (α n) ^ 2 * (‖x n ω‖ + 1)) (hfLip : ∃ L, LipschitzWith L f) (hαpos : ∀ n, 0 < α n) : ∀ n, ∃ C, 0 ≤ C ∧ ∀ᵐ ω ∂μ, ‖x n ω‖ ≤ C := by intro n induction n with | zero => ?_ | succ n hn => ?_ case zero => refine ⟨‖x₀‖, by simp, ?h⟩ apply Eventually.of_forall intro ω rw [hx.init] case succ => obtain ⟨Lf, hfLip⟩ := hfLip have hfLip := lipschitzWith_iff_norm_sub_le.mp hfLip obtain ⟨C₁, hC₁nonneg, hC₁⟩ := he₁ obtain ⟨C₂, hC₂nonneg, hC₂⟩ := he₂ obtain ⟨C₃, hC₃pos, hC₃⟩ := hn let C := C₃ + |α n| * (↑Lf * C₃ + ‖f 0‖ + C₃) + C₁ * α n * (C₃ + 1) + C₂ * α n ^ 2 * (C₃ + 1) refine ⟨C, ?hCnonneg, ?hC⟩ case hC => apply Eventually.mono ((hC₁.and hC₂).and hC₃) intro ω hω obtain ⟨⟨hC₁, hC₂⟩, hC₃⟩ := hω rw [hx.step] grw [norm_add_le] grw [norm_add_le] grw [norm_add_le] rw [norm_smul] grw [norm_sub_le] grw [hC₁ n] grw [hC₂ n] have := hfLip (x n ω) 0 simp at this have := (norm_sub_norm_le (f (x n ω)) (f 0)).trans this have := sub_le_iff_le_add.mp this grw [this] simp grw [hC₃] apply mul_nonneg hC₁nonneg (hαpos n).le case hCnonneg => have := hαpos n positivity lemma bdd_of_φ {x : ℕ → Ω → E d} (hx : Iterates x x₀ f e₁ e₂ α) (he₁ : ∃ C, 0 ≤ C ∧ ∀ᵐ ω ∂μ, ∀ n, ‖e₁ (n + 1) ω‖≤ C * (α n) * (‖x n ω‖ + 1)) (he₂ : ∃ C, 0 ≤ C ∧ ∀ᵐ ω ∂μ, ∀ n, ‖e₂ (n + 1) ω‖≤ C * (α n) ^ 2 * (‖x n ω‖ + 1)) (hfLip : ∃ L, LipschitzWith L f) (hf : ∃ z, z = f z) (hαpos : ∀ n, 0 < α n) {φ : E d → ℝ} {φ' : E d → E d} [LyapunovFunction φ φ' f] : ∀ n, ∃ C, 0 ≤ C ∧ ∀ᵐ ω ∂μ, φ (x n ω - hf.choose) ≤ C := by have hEnergy : LyapunovFunction φ φ' f := by infer_instance obtain ⟨C₁, hC₁nonneg, hC₁⟩ := hEnergy.le_norm intro n have := bdd_of_iterates hx he₁ he₂ hfLip hαpos n obtain ⟨C₂, hC₂nonneg, hC₂⟩ := this let C := (C₁ * (C₂ + ‖hf.choose‖)) ^ 2 refine ⟨C, ?hCnonneg, ?hC⟩ case hC => apply Eventually.mono hC₂ intro ω hC₂ rw [←Real.sq_sqrt (hEnergy.nonneg (x n ω - hf.choose))] grw [hC₁ (x n ω - hf.choose)] grw [norm_sub_le] grw [hC₂] case hCnonneg => positivity lemma bdd_of_grad_φ_inner {x : ℕ → Ω → E d} (hx : Iterates x x₀ f e₁ e₂ α) (he₁ : ∃ C, 0 ≤ C ∧ ∀ᵐ ω ∂μ, ∀ n, ‖e₁ (n + 1) ω‖≤ C * (α n) * (‖x n ω‖ + 1)) (he₂ : ∃ C, 0 ≤ C ∧ ∀ᵐ ω ∂μ, ∀ n, ‖e₂ (n + 1) ω‖≤ C * (α n) ^ 2 * (‖x n ω‖ + 1)) (hfLip : ∃ L, LipschitzWith L f) (hf : ∃ z, z = f z) (hαpos : ∀ n, 0 < α n) {φ : E d → ℝ} {φ' : E d → E d} [LyapunovFunction φ φ' f] : ∀ n z, ∃ C, 0 ≤ C ∧ ∀ᵐ ω ∂μ, ⟪φ' (x n ω - hf.choose), z⟫ ≤ C * √(φ z) := by have hEnergy : LyapunovFunction φ φ' f := by infer_instance obtain ⟨C₁, hC₁nonneg, hC₁⟩ := hEnergy.le_norm intro n have := bdd_of_iterates hx he₁ he₂ hfLip hαpos n obtain ⟨C₂, hC₂nonneg, hC₂⟩ := this obtain ⟨C₃, hC₃pos, hC₃⟩ := hEnergy.inner_grad_le intro z let C := C₃ * √((C₁ * (C₂ + ‖hf.choose‖)) ^ 2) refine ⟨C, ?hCnonneg, ?hC⟩ case hC => apply Eventually.mono hC₂ intro ω hC₂ grw [hC₃ (x n ω - hf.choose) z] rw [←Real.sq_sqrt (hEnergy.nonneg (x n ω - hf.choose))] grw [hC₁ (x n ω - hf.choose)] grw [norm_sub_le] grw [hC₂] case hCnonneg => positivity end extraneous_errors section adapted variable {S Z: Type*} [MeasurableSpace S] [MeasurableSpace Z] [Norm Z] instance : ∀ n : ℕ, MeasurableSpace (Iic n → S) := by infer_instance class AdaptedOnSamplePath (x : ℕ → (ℕ → S) → Z) where property : ∀ n, ∃ xn : (Iic n → S) → Z, Measurable xn ∧ ∀ ω, x n ω = xn (frestrictLe (a := n) ω) end adapted section intravenous_errors variable {S : Type*} [Fintype S] [Nonempty S] variable {F : E d → (S × S) → E d} variable {x : ℕ → (ℕ → S × S) → E d} omit [Nonempty S] in lemma lipschitz_of_expectation {f : E d → E d} {μ : S → ℝ} [StochasticVec μ] {P : Matrix S S ℝ} [RowStochastic P] (hfF : ∀ w, f w = ∑ s, ∑ s', (μ s * P s s') • F w (s, s')) (hFlip : ∃ C, 0 ≤ C ∧ ∀ w w' y, ‖F w y - F w' y‖ ≤ C * ‖w - w'‖) : ∃ C, 0 ≤ C ∧ ∀ x y, ‖f x - f y‖ ≤ C * ‖x - y‖ := by have hP : RowStochastic P := by infer_instance have hμ : StochasticVec μ := by infer_instance obtain ⟨C, hCnonneg, hC⟩ := hFlip use C constructor exact hCnonneg intro w w' simp [hfF] simp_rw [←sum_sub_distrib, ←smul_sub] grw [norm_sum_le, sum_le_sum] rotate_left intro s hs grw [norm_sum_le] simp_rw [norm_smul, norm_mul] simp grw [sum_le_sum] rotate_left intro s hs grw [sum_le_sum] intro s' hs' apply LE.le.trans grw [hC] rw [abs_of_nonneg, abs_of_nonneg] apply (hP.stochastic s).nonneg apply hμ.nonneg simp_rw [mul_assoc, ←mul_sum, ←sum_mul, (hP.stochastic ?_).rowsum, ←sum_mul, hμ.rowsum] simp class IteratesOfResidual (x : ℕ → (ℕ → (S × S)) → E d) (x₀ : E d) (α : ℕ → ℝ) (F : E d → (S × S) → E d) where init : ∀ ω, x 0 ω = x₀ step : ∀ n ω, x (n + 1) ω = x n ω + α n • (F (x n ω) (ω (n + 1)) - x n ω) lemma IteratesOfResidual.bdd (hx : IteratesOfResidual x x₀ α F) (hF : ∃ C, 0 ≤ C ∧ ∀ w w' y, ‖F w y - F w' y‖ ≤ C * ‖w - w'‖) : ∀ n, ∃ C, 0 ≤ C ∧ ∀ ω, ‖x n ω‖ ≤ C := by intro n induction n with | zero => ?_ | succ n hn => ?_ case zero => use ‖x₀‖ constructor simp intro ω rw [hx.init] case succ => obtain ⟨C₁, hC₁nonneg, hC₁⟩ := hn obtain ⟨C₂, hC₂nonneg, hC₂⟩ := linear_growth_of_lipschitz' hF refine ⟨?C, ?hCnonneg, ?hC⟩ case C => exact C₁ + |α n| * (C₂ * (C₁ + 1) + C₁) case hCnonneg => positivity case hC => intro ω rw [hx.step] grw [norm_add_le] rw [norm_smul] grw [norm_sub_le, hC₂, hC₁] simp lemma IteratesOfResidual.growth (hx : IteratesOfResidual x x₀ α F) (hα : ∀ n, 0 ≤ α n) (hF : ∃ C, 0 ≤ C ∧ ∀ w w' y, ‖F w y - F w' y‖ ≤ C * ‖w - w'‖) : ∃ C, 0 ≤ C ∧ ∀ ω n m, ∀ i ∈ Ico n m, ‖x i ω - x n ω‖ ≤ (∑ k ∈ Ico n m, α k * C * (‖x n ω‖ + 1)) * exp (∑ j ∈ Ico n m, α j * C) := by obtain ⟨C₁, hC₁nonneg, hC₁⟩ := linear_growth_of_lipschitz' hF obtain ⟨C₂, hC₂nonneg, hC₂⟩ := hF set C := C₁ + 1 refine ⟨C, ?hCnonneg, ?hC⟩ case hCnonneg => positivity case hC => intro ω n have : ∀ i ≥ n, ‖x (i + 1) ω - x n ω‖ ≤ (1 + α i * C) * ‖x i ω - x n ω‖ + α i * C * (‖x n ω‖ + 1) := by intro i hi rw [hx.step, ←sub_add_eq_add_sub] grw [norm_add_le] rw [norm_smul] nth_grw 2 [norm_sub_le] grw [hC₁] have : x i ω = x n ω + (x i ω - x n ω) := by simp nth_rw 2 [this] nth_rw 3 [this] grw [norm_add_le] simp [abs_of_nonneg (hα i), C] ring_nf simp exact hα i intro m i hi have := discrete_gronwall_Ico (n₁ := m) (hu := this) (u := fun k => ‖x k ω - x n ω‖) ?_ ?_ ?_ i hi simp at this exact this simp; intro j hi; simp; have := hα j; positivity simp; intro k hk; have := hα k; positivity variable [MeasurableSpace S] [MeasurableSingletonClass S] omit [Nonempty S] in lemma IteratesOfResidual.adaptedOnSamplePath (hx : IteratesOfResidual x x₀ α F) : AdaptedOnSamplePath x := by constructor intro n induction n with | zero => ?_ | succ n hn => ?_ case zero => use fun _ => x₀ simp [hx.init] case succ => obtain ⟨xn, hxn⟩ := hn use fun ω => let ωn := frestrictLe₂ («π» := fun _ : ℕ => S × S) (a := n) (b := n + 1) (by simp) ω let m : Iic (n + 1) := ⟨n + 1, by simp⟩ xn ωn + (α n) • (F (xn ωn) (ω m) - xn ωn) refine ⟨?hm, ?heq⟩ case heq => intro ω simp [hx.step, hxn] rfl case hm => measurability end intravenous_errors end StochasticApproximation