/-
Copyright (c) 2024 Hongjia Chen, Chenyi Li, Wanyi He, Zaiwen Wen. All rights reserved.
Released under Apache 2.0 license as described in the file LICENSE.
Author: Hongjia Chen, Chenyi Li, Wanyi He, Zaiwen Wen
-/
import Mathlib.Analysis.Normed.Group.Basic
import Mathlib.Data.Fintype.Order
import Optlib.Convex.Subgradient

/-!
# SubgradientMethod

## Main results

  This file mainly concentrates on the subgradient algorithm for
  unconstrained nonsmooth optimization problems.

  We prove the convergence rate with different kinds of step size.

-/

open Filter Topology Set InnerProductSpace Finset

/-! ### Convergence of Subgradient method -/

section

variable {E : Type*} [NormedAddCommGroup E] [InnerProductSpace ℝ E] [CompleteSpace E]

variable {S :Set E} {f : E → ℝ} {g : E} {x : E}

variable {G : NNReal}

theorem bounded_subgradient_to_Lipschitz (hf : ConvexOn ℝ univ f) (hc : ContinuousOn f univ)
    (h : ∀ ⦃x : E⦄ , ∀ ⦃g⦄ , g ∈ SubderivAt f x → ‖g‖ ≤ G) :
    LipschitzWith G f := by
  intro x y
  have hx₂' : Nonempty (SubderivAt f x) := SubderivAt.nonempty hf hc x
  simp at hx₂'
  rcases hx₂' with ⟨gx, hx₁⟩
  have hx₃ : ‖gx‖ ≤ G := by rcases h hx₁ with hx; apply hx
  rcases hx₁ y with hx₂
  have hx₄ : f x - f y ≤ inner gx (x - y) := by
    rw [add_comm] at hx₂
    have : f x ≤ f y - inner gx (y - x) := le_sub_left_of_add_le hx₂
    rw [sub_eq_add_neg, ← inner_neg_right, neg_sub] at this
    exact sub_left_le_of_le_add this
  have hy₂' : Nonempty (SubderivAt f y) := SubderivAt.nonempty hf hc y
  simp at hy₂'
  rcases hy₂' with ⟨gy, hy₁⟩
  have hy₃ : ‖gy‖ ≤ G := by rcases h hy₁ with hy; apply hy
  rcases hy₁ x with hy₂
  have hy₄: f x - f y ≥ inner gy (x - y) := by
    calc
      _ ≥ f y + inner gy (x - y) - f y := by apply sub_le_sub_right hy₂
      _ = inner gy (x - y) := by ring
  have hG₁: ↑G = ENNReal.ofReal ↑G := by simp
  rw [edist_dist, edist_dist, hG₁]
  have hG₂ : ENNReal.ofReal (↑G * (dist x y)) = ENNReal.ofReal ↑G * ENNReal.ofReal (dist x y) := by
    apply ENNReal.ofReal_mul; simp
  rw[← hG₂]
  have hG₃ : 0 ≤ ↑G * dist x y := Right.mul_nonneg NNReal.zero_le_coe dist_nonneg
  apply (ENNReal.ofReal_le_ofReal_iff hG₃).mpr
  have h₁ : dist (f x) (f y) = |f x - f y| := rfl
  rw [h₁, dist_eq_norm x y]
  apply abs_le.mpr
  constructor
  · calc
      f x - f y ≥ inner gy (x - y) := hy₄
      _ ≥ - (‖gy‖ * ‖x - y‖) := by
        apply neg_le_of_neg_le
        rw  [← inner_neg_right, neg_sub, norm_sub_rev]
        apply real_inner_le_norm
      _ ≥ - (↑G * ‖x - y‖) := neg_le_neg (mul_le_mul_of_nonneg_right hy₃ (norm_nonneg _))
  · calc
      f x - f y ≤ inner gx (x - y) := hx₄
      _ ≤ ‖gx‖ * ‖x - y‖ := real_inner_le_norm _ _
      _ ≤ ↑G * ‖x - y‖ := mul_le_mul_of_nonneg_right hx₃ (norm_nonneg _)

omit [CompleteSpace E] in
theorem Lipschitz_to_bounded_subgradient (h : LipschitzWith G f ) :
    ∀ ⦃x : E⦄ , ∀ ⦃g⦄ , g ∈ SubderivAt f x → ‖g‖ ≤ G := by
  by_contra h₁
  push_neg at h₁
  rcases h₁ with ⟨x, g, h₂, h₃⟩
  let y : E := x + ((1 / ‖g‖) • g)
  have hy : y = x + ((1 / ‖g‖) • g) := by rfl
  rcases h₂ y with hy₂
  rw[LipschitzWith] at h
  have hg₁ : ‖g‖ ≠ 0 := by
    apply ne_of_gt (lt_of_le_of_lt _ h₃)
    simp only [NNReal.zero_le_coe]
  have hl : inner g (y - x) = ‖g‖ := by
    rw[hy ,add_comm, ← add_sub, sub_self, add_zero, inner_smul_right, inner_self_eq_norm_sq_to_K]
    field_simp; apply pow_two
  rw [hl] at hy₂
  have _ : f y - f x ≥ ‖g‖ := by
    calc
      _ ≥ f x + ‖g‖ - f x := by apply sub_le_sub_right hy₂
      _ = ‖g‖:= by ring
  rcases h x y with h₅
  have hG₁: ↑G = ENNReal.ofReal ↑G := by rw [ENNReal.ofReal_coe_nnreal]
  rw [edist_dist, edist_dist, hG₁] at h₅
  have hG₂ : ENNReal.ofReal (↑G * (dist x y)) = ENNReal.ofReal ↑G * ENNReal.ofReal (dist x y) := by
    apply ENNReal.ofReal_mul; simp
  rw[← hG₂] at h₅
  have hG₃ : 0 ≤ ↑G * dist x y := Right.mul_nonneg NNReal.zero_le_coe dist_nonneg
  have h₃' : dist (f x) (f y) ≤ ↑G * dist x y := (ENNReal.ofReal_le_ofReal_iff hG₃).mp h₅
  have h₁ : dist (f x) (f y) = |f x - f y| := by rfl
  rw[h₁, dist_eq_norm] at h₃'; nth_rw 2 [hy] at h₃'
  rw[sub_add_eq_sub_sub, sub_self, zero_sub, norm_neg, norm_smul, abs_sub_comm] at h₃'
  have h₄' : f y - f x < ‖g‖:= by
    calc
      f y - f x ≤ |f y - f x|:= by apply le_abs_self
      _ ≤ ↑G * (‖1 / ‖g‖‖ * ‖g‖) := by apply h₃'
      _ = ↑G := by field_simp
      _ < ‖g‖ := by apply h₃
  linarith

/- Subgradient of `f` is bounded if and only if `f` is Lipschitz -/
theorem bounded_subgradient_iff_Lipschitz (hf : ConvexOn ℝ univ f) (hc : ContinuousOn f univ) :
    (∀ ⦃x : E⦄ , ∀ ⦃g⦄ , g ∈ SubderivAt f x → ‖g‖ ≤ G)  ↔ LipschitzWith G f :=
  ⟨bounded_subgradient_to_Lipschitz hf hc, Lipschitz_to_bounded_subgradient⟩

end

section

variable {E : Type*} [NormedAddCommGroup E] [InnerProductSpace ℝ E] [CompleteSpace E]

variable {S :Set E} {f : E → ℝ} {g : E} {x : E}

variable (hf : ConvexOn ℝ univ f)

open Finset

class subgradient_method (f : E → ℝ) (x₀ : E) :=
  (x g : ℕ → E)
  (a : ℕ → ℝ) (ha : ∀ n, a n > 0)
  (G : NNReal) (lipschitz : LipschitzWith G f)
  (initial : x 0 = x₀)
  (update : ∀ k, (x (k + 1)) = x k - a k • (g k))
  (hg : ∀ n, g n ∈ SubderivAt f (x n))

variable (xm x₀ : E) {alg : subgradient_method f x₀}

/- Convergence of general subgradient method -/
omit [CompleteSpace E] in
theorem subgradient_method_converge:
    ∀ k, 2 * ((Finset.range (k + 1)).sum alg.a) *
    (sInf {x | ∃ i ∈ Finset.range (k + 1), f (alg.x i) = x} - f xm)
    ≤ ‖x₀ - xm‖ ^ 2 + alg.G ^ 2 * (Finset.range (k + 1)).sum (fun i => alg.a i ^ 2) := by
  intro k
  have h' : ∀ ⦃x : E⦄ , ∀ ⦃g⦄ , g ∈ SubderivAt f x → ‖g‖ ≤ alg.G := Lipschitz_to_bounded_subgradient alg.lipschitz
  by_cases k₀ : k = 0
  · simp [k₀]
    rcases (alg.hg 0) xm with hk₀
    rcases h' (alg.hg 0) with h₀'
    rw [← mul_pow]
    apply le_trans _ (two_mul_le_add_sq _ _)
    rw [mul_assoc, mul_assoc]; apply (mul_le_mul_left two_pos).mpr
    rw [mul_comm, ← mul_assoc]; apply (mul_le_mul_right (alg.ha 0)).mpr
    have : f (alg.x 0) - f xm ≤ - inner (alg.g 0) (xm - alg.x 0) := by
      simp [hk₀]; rw[add_comm]; apply hk₀
    apply le_trans this _
    rw [← inner_neg_right,neg_sub, alg.initial]
    apply le_trans (real_inner_le_norm _ _) _; rw [mul_comm]
    apply mul_le_mul_of_nonneg_left h₀' (norm_nonneg _)
  · have heq : (Set.range fun (x : Finset.range (k + 1)) => f (alg.x x)) =
        {x | ∃ i ∈ Finset.range (k + 1), f (alg.x i) = x} := by simp [Set.ext_iff]
    have h₁ :  ∀ ⦃i : ℕ⦄, i ≥ 0 ∧ i ≤ k → ‖alg.x (i+1) - xm‖ ^ 2 ≤ ‖alg.x i - xm‖ ^ 2 - 2 * alg.a i
        * (sInf {f (alg.x i) | i ∈ Finset.range (k + 1)} - f xm) + alg.G ^ 2 * alg.a i ^ 2 := by
      intro i ⟨ _ ,hi₂⟩
      rw [alg.update i, sub_right_comm, norm_sub_sq_real, norm_smul, mul_pow, sub_eq_add_neg]
      have : ‖alg.x i - xm‖ ^ 2 - 2 * alg.a i * (sInf {x | ∃ i ∈ Finset.range (k + 1), f (alg.x i) =
          x} - f xm) + ↑alg.G ^ 2 * alg.a i ^ 2 = ‖alg.x i - xm‖ ^ 2 + - (2 * alg.a i * (sInf {x | ∃ i ∈
          Finset.range (k + 1), f (alg.x i) = x} - f xm)) + ↑alg.G ^ 2 * alg.a i ^ 2 := by ring
      rw [this]
      have inq₁: ‖alg.a i‖ ^ 2 * ‖alg.g i‖ ^ 2 ≤ ↑alg.G ^ 2 * alg.a i ^ 2 := by
        rw[mul_comm]; simp
        rcases h' (alg.hg i) with hi
        apply mul_le_mul_of_nonneg_right _ (sq_nonneg _)
        · apply pow_le_pow_left; apply norm_nonneg; apply hi
      have inq₂: 2 * alg.a i * (sInf {x | ∃ i ∈ Finset.range (k + 1), f (alg.x i) = x} - f xm)
          ≤ 2 * inner (alg.x i - xm) (alg.a i • alg.g i) := by
        rw [mul_assoc]; apply (mul_le_mul_left two_pos).mpr
        rw [inner_smul_right]; apply (mul_le_mul_left (alg.ha i)).mpr
        rcases (alg.hg i) xm with hxm
        calc
          _ = sInf (Set.range fun (x : Finset.range (k + 1)) => f (alg.x x)) - f xm := by rw [← heq]
          _ ≤ f (alg.x i) - f xm := by
            simp
            have : f (alg.x i) ∈ Set.range fun (x : Finset.range (k + 1)) => f (alg.x x) := by
              simp; use i
              constructor
              · apply lt_of_le_of_lt hi₂; apply (Nat.lt_succ_self k)
              · simp
            apply csInf_le _ this; apply Finite.bddBelow_range
          _ ≤ inner (alg.x i - xm) (alg.g i) := by
            simp; apply le_add_of_sub_left_le
            rw [sub_eq_add_neg, ← inner_neg_left, neg_sub, real_inner_comm]
            apply hxm
      rw [add_assoc, add_assoc]
      apply add_le_add_left; apply add_le_add
      · apply neg_le_neg; apply inq₂
      · apply inq₁
    have h₁' : ∀ ⦃i : ℕ⦄, i ≥ 0 ∧ i ≤ k → alg.a i * (2 * (sInf {f (alg.x i) | i ∈ Finset.range (k +
        1)} - f xm)) ≤ ‖alg.x i - xm‖ ^ 2 - ‖alg.x (i+1) - xm‖ ^ 2 + alg.G ^ 2 * (alg.a i) ^ 2 := by
      intro i ⟨hi₁, hi₂⟩
      rcases h₁ ⟨hi₁, hi₂⟩ with hii
      have : 2 * (alg.a i) * (sInf {f (alg.x i) | i ∈ Finset.range (k + 1)} - f xm) ≤
          ‖alg.x i - xm‖ ^ 2 - ‖alg.x (i+1) - xm‖ ^ 2 + alg.G ^ 2 * (alg.a i) ^ 2:= by
        linarith [hii]
      rw [mul_assoc, mul_comm, mul_assoc, mul_comm _ 2] at this
      apply this
    have h₂ : (Finset.range (k + 1)).sum (fun i => (alg.a i) * (2 * (sInf {f (alg.x i) | i ∈
        Finset.range (k + 1)} - f xm))) ≤ (Finset.range (k + 1)).sum
        (fun i => ‖alg.x i - xm‖ ^ 2 - ‖alg.x (i+1) - xm‖ ^ 2 + alg.G ^ 2 * (alg.a i) ^ 2) := by
      apply Finset.sum_le_sum; intro i hi; apply h₁'
      constructor
      · simp
      · have : i < k + 1 := by
          apply Finset.mem_range.mp; apply hi
        apply (Nat.lt_add_one_iff).mp this
    rw [← sum_mul, ← mul_assoc, mul_comm _ 2, sum_add_distrib] at h₂
    rw [sum_range_sub' _, ← mul_sum, alg.initial] at h₂
    calc
      _ = (2 * Finset.sum (Finset.range (k + 1)) fun x => alg.a x) * (sInf {x | ∃ i ∈
          Finset.range (k + 1), f (alg.x i) = x} - f xm) := by simp
      _ ≤ ‖x₀ - xm‖ ^ 2 - ‖alg.x (k + 1) - xm‖ ^ 2 + ↑alg.G ^ 2 * Finset.sum (Finset.range
          (k + 1)) fun x => alg.a x ^ 2 := by apply h₂
      _ ≤ ‖x₀ - xm‖ ^ 2 + alg.G ^ 2 * Finset.sum (Finset.range (k + 1)) fun x => alg.a x ^ 2 := by simp

/-- convergence with fixed step size --/
theorem subgradient_method_fix_step_size {t : ℝ}
    (ha' : ∀ (n : ℕ), alg.a n = t) :
    ∀ (k : ℕ) , sInf {f (alg.x i) | i ∈ Finset.range (k + 1)} - f xm
      ≤ ‖x₀ - xm‖ ^ 2 / (2 * (k + 1) * t) + alg.G ^ 2 * t / 2 := by
  intro k
  have ht : t > 0 := by rw[← ha' 0]; apply alg.ha 0
  have h₁ : ∀ (k : ℕ), 2 * ((Finset.range (k + 1)).sum alg.a) *
      (sInf {f (alg.x i) | i ∈ Finset.range (k + 1)} - (f xm))
      ≤ ‖x₀ - xm‖ ^ 2 + alg.G ^ 2 * (Finset.range (k + 1)).sum (fun i => (alg.a i) ^ 2) := by
    apply subgradient_method_converge
  rcases h₁ k with hk
  simp [ha'] at hk
  have hpos :  2 * ((↑k + 1) * t) > 0 := by
    simp
    apply mul_pos _ ht
    · apply add_pos_of_nonneg_of_pos (Nat.cast_nonneg k) zero_lt_one
  apply (mul_le_mul_left hpos).mp
  calc
    2 * ((↑k + 1) * t) * (sInf {x | ∃ i ∈ Finset.range (k + 1), f (alg.x i) = x} - f xm)
      = 2 * ((↑k + 1) * t) * (sInf {x | ∃ i < k + 1, f (alg.x i) = x} - f xm) := by simp
    _ ≤ ‖x₀ - xm‖ ^ 2 + ↑alg.G ^ 2 * ((↑k + 1) * t ^ 2) := by apply hk
    _ = 2 * ((↑k + 1) * t) * (‖x₀ - xm‖ ^ 2 / (2 * (↑k + 1) * t) + ↑alg.G ^ 2 * t / 2) := by
      field_simp; ring

/-- convergence with fixed $‖x^{i+1}-x^{i}‖$ --/
theorem subgradient_method_fixed_distance {s : ℝ} (hm : IsMinOn f univ xm)
   (ha' : ∀ (n : ℕ), alg.a n * ‖alg.g n‖ = s) (hs : s > 0):
    ∀ (k : ℕ) ,(sInf {x | ∃ i ∈ Finset.range (k + 1), f (alg.x i) = x}) - (f xm)
      ≤ alg.G * ‖x₀ - xm‖ ^ 2 / (2 * (k + 1) * s) + alg.G * s / 2 := by
  intro k
  have heq : (Set.range fun (x : Finset.range (k + 1)) => f (alg.x x)) =
      {x | ∃ i ∈ Finset.range (k + 1), f (alg.x i) = x} := by simp [Set.ext_iff]
  have hnek : Nonempty (Finset.range (k + 1)) := by
    simp; use 0; apply Nat.succ_pos k
  obtain h' := Lipschitz_to_bounded_subgradient alg.lipschitz
  have h₁ :  ∀ ⦃i : ℕ⦄ , i ≥ 0 ∧ i ≤ k → ‖alg.x (i+1) - xm‖ ^ 2 ≤ ‖alg.x i - xm‖ ^ 2 - 2 * (alg.a i)
      * (sInf {f (alg.x i) | i ∈ Finset.range (k + 1)} - f xm) + ‖alg.a i‖ ^ 2 * ‖alg.g i‖ ^ 2:= by
    intro i ⟨_, hi₂⟩
    rw [alg.update i, sub_right_comm, norm_sub_sq_real, norm_smul,mul_pow, sub_eq_add_neg]
    have : ‖alg.x i - xm‖ ^ 2 - 2 * alg.a i * (sInf {x | ∃ i ∈ Finset.range (k + 1),
        f (alg.x i) = x} - f xm) + ‖alg.a i‖ ^ 2 * ‖alg.g i‖ ^ 2 = ‖alg.x i - xm‖ ^ 2 +
        -(2 * alg.a i * (sInf {x | ∃ i ∈ Finset.range (k + 1), f (alg.x i) = x} - f xm))
        + ‖alg.a i‖ ^ 2 * ‖alg.g i‖ ^ 2 := by ring
    rw [this]
    have inq₂: 2 * alg.a i * (sInf {x | ∃ i ∈ Finset.range (k + 1), f (alg.x i) = x} - f xm)
        ≤ 2 * inner (alg.x i - xm) (alg.a i • alg.g i) := by
      rw [mul_assoc]; apply (mul_le_mul_left two_pos).mpr
      rw[inner_smul_right]; apply (mul_le_mul_left (alg.ha i)).mpr
      rcases (alg.hg i) xm with hxm
      calc
        _ = sInf (Set.range fun (x : Finset.range (k + 1)) => f (alg.x x)) - f xm := by rw [← heq]
        _ ≤ f (alg.x i)- f xm := by
          simp
          have : f (alg.x i) ∈ Set.range fun (x : Finset.range (k + 1)) => f (alg.x x) := by
            simp; use i
            constructor
            · apply lt_of_le_of_lt hi₂; apply (Nat.lt_succ_self k)
            · simp
          apply csInf_le _ this; apply Finite.bddBelow_range
        _ ≤ inner (alg.x i - xm) (alg.g i) := by
          simp; apply le_add_of_sub_left_le
          rw[sub_eq_add_neg, ← inner_neg_left, neg_sub, real_inner_comm]; apply hxm
    rw[add_assoc, add_assoc]
    apply add_le_add_left
    apply add_le_add
    · apply neg_le_neg; apply inq₂
    · simp
  have h₁' :  ∀ ⦃i : ℕ⦄ , i ≥ 0 ∧ i ≤ k → alg.a i * (2 * (sInf {f (alg.x i) | i ∈ Finset.range (k + 1)}
       - f xm)) ≤ ‖alg.x i - xm‖ ^ 2 - ‖alg.x (i+1) - xm‖ ^ 2 + s ^ 2 := by
    intro i ⟨hi₁, hi₂⟩
    rcases h₁ ⟨hi₁, hi₂⟩ with hii
    calc
      _ ≤ ‖alg.x i - xm‖ ^ 2 - ‖alg.x (i + 1) - xm‖ ^ 2 + ‖alg.a i‖ ^ 2 * ‖alg.g i‖ ^ 2 := by
        linarith [hii]
      _ = ‖alg.x i - xm‖ ^ 2 - ‖alg.x (i + 1) - xm‖ ^ 2 + s ^ 2 := by
        simp; rw[← mul_pow, (ha' i)]
  have h₂ : (Finset.range (k + 1)).sum (fun i => (alg.a i) * (2 * (sInf {f (alg.x i) |
      i ∈ Finset.range (k + 1)} - f xm))) ≤ (Finset.range (k + 1)).sum
      (fun i => ‖alg.x i - xm‖ ^ 2 - ‖alg.x (i+1) - xm‖ ^ 2 + s ^ 2) := by
    apply Finset.sum_le_sum
    intro i hi
    apply h₁'
    constructor
    · simp
    · have : i < k + 1 := Finset.mem_range.mp hi
      apply (Nat.lt_add_one_iff).mp this
  rw [← Finset.sum_mul, ← mul_assoc, mul_comm _ 2, Finset.sum_add_distrib] at h₂
  rw [Finset.sum_range_sub', alg.initial, Finset.sum_const] at h₂
  simp at h₂
  have hG : (NNReal.toReal alg.G) > 0 := by
      apply lt_of_lt_of_le _ (h' (alg.hg 0))
      have : alg.a 0 * ‖alg.g 0‖ > 0 := by rw[ha' 0]; apply hs
      apply (pos_iff_pos_of_mul_pos this).mp (alg.ha 0)
  have inq₁ : Finset.sum (Finset.range (k + 1)) (fun x => alg.a x) ≥ (k + 1) * (s / ↑alg.G) := by
    have : Finset.sum (range (k + 1)) (fun _ => s / (NNReal.toReal alg.G)) = (k + 1) * (s / ↑alg.G) := by
      rw [Finset.sum_const]; simp
    rw[← this]
    apply Finset.sum_le_sum
    intro i _
    rw [← (ha' i)]
    apply (div_le_iff₀ hG).mpr ((mul_le_mul_left (alg.ha i)).mpr (h' (alg.hg i)))
  have hpos₁ : (↑k + 1) * (s / ↑alg.G) > 0 := by
    apply mul_pos
    · apply add_pos_of_nonneg_of_pos (Nat.cast_nonneg k) zero_lt_one
    · apply div_pos hs hG
  have hpos₁' : 2 * (↑k + 1) * (s / ↑alg.G) > 0 :=by
    rw [mul_assoc]
    apply mul_pos
    · linarith
    · apply hpos₁
  have h₂' : (2 * (k + 1) * (s / ↑alg.G)) * (sInf {x | ∃ i < k + 1, f (alg.x i) = x} - f xm) ≤
      ‖x₀ - xm‖ ^ 2 - ‖alg.x (k + 1) - xm‖ ^ 2 + (↑k + 1) * s ^ 2 := by
    apply le_trans _ h₂
    apply mul_le_mul_of_nonneg_right
    · rw[mul_assoc]
      apply mul_le_mul_of_nonneg_left
      · apply inq₁
      · linarith
    · apply le_sub_right_of_add_le; simp
      apply le_csInf
      · simp at heq
        rw[← heq]
        apply Set.range_nonempty
      · intro b hb
        simp at hb
        rcases hb with ⟨i , _ , hb₂⟩
        rw[← hb₂]
        simp[isMinOn_univ_iff] at hm
        rcases hm (alg.x i) with hmi
        apply hmi
  calc
    _= sInf {x | ∃ i < k + 1, f (alg.x i) = x} - f xm := by simp
    _ ≤ (‖x₀ - xm‖ ^ 2 - ‖alg.x (k + 1) - xm‖ ^ 2 + (k + 1) * s ^ 2) / (2 * (k + 1) * (s / alg.G)) := by
      apply (le_div_iff₀' hpos₁').mpr h₂'
    _ ≤ (‖x₀ - xm‖ ^ 2 + (↑k + 1) * s ^ 2) / (2 * (↑k + 1) * (s / ↑alg.G)) := by
      apply (div_le_div_right hpos₁').mpr; simp
    _ = alg.G * ‖x₀ - xm‖ ^ 2 / (2 * (k + 1) * s) + alg.G * s / 2 := by
      field_simp; ring


/-
  convergence with diminishing step size
-/

omit [CompleteSpace E] in
lemma subgradient_method_diminishing_step_size (hm : IsMinOn f univ xm)
    (ha' : Tendsto alg.a atTop (𝓝 0))
    (ha'' : Tendsto (fun (k : ℕ) => (Finset.range (k + 1)).sum alg.a) atTop atTop) :
    Tendsto (fun k => sInf {f (alg.x i) | i ∈ Finset.range (k + 1)}) atTop (𝓝 (f xm)) := by
  have h₁ : Tendsto (fun k => sInf {f (alg.x i) | i ∈ Finset.range (k + 1)} - f xm)
      atTop (𝓝 0) := by
    simp [tendsto_def]; simp [tendsto_def] at ha' ha''
    intro s hs
    simp [mem_nhds_iff,Metric.isOpen_iff] at hs ha'
    rcases hs with ⟨t, hs₁, hs₂, hs₀⟩
    rcases hs₂ 0 hs₀ with ⟨ε, εpos, hs₂₀⟩
    have ha₁ : ∃ a₁, ∀ (b : ℕ), a₁ ≤ b → ‖x₀ - xm‖ ^ 2 /
        (2 * (Finset.range (b + 1)).sum alg.a) < ε / 2 := by
      let A := (‖x₀ - xm‖ ^ 2 / ε) + 1
      let s := {x | x > ‖x₀ - xm‖ ^ 2 / ε}
      have : ∀ (b : ℝ), A ≤ b → b ∈ s := by
        intro b hb
        simp [s]; apply lt_of_lt_of_le _ hb; simp [A]
      rcases ha'' s A this with ⟨a_1, hasA⟩
      use a_1
      intro b hb
      rcases hasA b hb with hb'; simp [s] at hb'
      obtain h₂ := (div_lt_iff₀ εpos).mp hb'
      have hpos : 2 * Finset.sum (Finset.range (b + 1)) alg.a > 0 := by
        apply mul_pos
        · simp
        · apply Finset.sum_pos
          · intro i _
            apply (alg.ha i)
          · simp
      apply (div_lt_iff₀' hpos).mpr
      calc
        ‖x₀ - xm‖ ^ 2 < Finset.sum (Finset.range (b + 1)) alg.a * ε := by apply h₂
        _ = 2 * Finset.sum (Finset.range (b + 1)) alg.a * (ε / 2) := by
          field_simp;ring
    have ha₂ : ∃ a₂, ∀ (b : ℕ), a₂ ≤ b → alg.G ^ 2 * (Finset.range (b + 1)).sum
        (fun i => (alg.a i) ^ 2) / (2 * (Finset.range (b + 1)).sum alg.a) < ε / 2 := by
      by_cases hG : ↑alg.G = 0
      · use 0; intro b _ ; rw[hG]; simp
        positivity
      · have hpos': (NNReal.toReal alg.G) ^ 2 > 0 := by
          apply (sq_pos_iff).mpr; simp[hG]
        let s := {x | |x| < ε / (2 * ↑alg.G ^ 2)}
        have c₁ : ∀ x_1 ∈ s, x_1 ∈ s := by simp
        have c₂ : ∀ x_1 ∈ s, ∃ ε, 0 < ε ∧ ∀ (x_2 : ℝ), dist x_2 x_1 < ε → x_2 ∈ s := by
          intro x₁ hx₁; simp [s] at hx₁
          use ((ε / (2 * ↑alg.G ^ 2)) -|x₁|) / 2
          constructor
          · apply div_pos
            · simp[hx₁]
            · simp
          · intro x₂ hx₂; simp [s]
            calc
              |x₂| ≤ |x₁| + dist x₂ x₁ := by
                rw [← Real.dist_0_eq_abs x₁, ← Real.dist_0_eq_abs x₂, dist_comm x₂ x₁]
                rw [dist_comm x₂ 0, dist_comm x₁ 0]
                apply dist_triangle
              _ < |x₁| + (ε / (2 * ↑alg.G ^ 2) - |x₁|) / 2 := by simp [hx₂]
              _ = (ε / (2 * ↑alg.G ^ 2) + |x₁|) / 2 := by
                field_simp; ring
              _ < (ε / (2 * ↑alg.G ^ 2) + ε / (2 * ↑alg.G ^ 2)) / 2 := by
                apply (mul_lt_mul_left zero_lt_two).mp
                rw [mul_div_cancel₀, mul_div_cancel₀]
                simp [hx₁]; simp; simp
              _ = ε / (2 * ↑alg.G ^ 2) := by field_simp; ring
        have c₃ : 0 ∈ s := by
          simp [s]
          apply div_pos
          · apply εpos
          · apply mul_pos
            · simp
            · apply hpos'
        rcases ha' s s c₁ c₂ c₃ with ⟨a₂,ha₂⟩
        simp [s] at ha₂
        let A := (2 * alg.G ^ 2 * (Finset.range (a₂ + 1)).sum fun x => (alg.a x) ^ 2) / ε + 1
        let s₁ := {x | x > (2 * alg.G ^ 2 * (Finset.range (a₂ + 1)).sum fun x => (alg.a x) ^ 2) / ε}
        have : ∀ (b : ℝ), A ≤ b → b ∈ s₁ := by
          intro b hb; simp [s₁]; apply lt_of_lt_of_le _ hb; simp [A]
        rcases ha'' s₁ A this with ⟨a₁, hasA⟩
        use max a₁ (a₂ + 1); intro b hb
        have hba₁ : b ≥ a₁ := by
          apply le_trans _ hb; apply le_max_left
        have hba₂' : b ≥ a₂ + 1 := by
          apply le_trans _ hb; apply le_max_right
        have hba₂ : b ≥ a₂ := by
          apply le_trans _ hba₂'; linarith
        have hpos : 2 * Finset.sum (Finset.range (b + 1)) alg.a > 0 := by
          apply mul_pos
          · simp
          · apply Finset.sum_pos
            · intro i _; apply (alg.ha i)
            · simp
        have hpos'' : Finset.sum (Finset.range (b + 1)) alg.a > 0 := by
          apply Finset.sum_pos
          · intro i _; apply (alg.ha i)
          · simp
        have hposG : ↑alg.G ^ 2 * (ε / (2 * ↑alg.G ^ 2)) > 0 := by
          apply mul_pos
          · apply hpos'
          · apply div_pos
            · apply εpos
            · apply mul_pos
              · simp
              · apply hpos'
        calc
          _ = (↑alg.G ^ 2 * Finset.sum (Finset.range (a₂ + 1)) fun x => alg.a x ^ 2) /
                  (2 * Finset.sum (Finset.range (b + 1)) alg.a) + (↑alg.G ^ 2 * Finset.sum
                  (Finset.range (b - a₂)) fun x => alg.a (a₂ + 1 + x) ^ 2) /
                  (2 * Finset.sum (Finset.range (b + 1)) alg.a) := by
                field_simp; rw[← mul_add]; simp
                left
                obtain heq := Finset.sum_range_add (fun i => alg.a i ^ 2) (a₂ + 1) (b - a₂)
                have h₃' : (b + 1) = a₂ + 1 + (b - a₂) := by
                  rw[(Nat.add_comm a₂ 1), Nat.add_assoc, (Nat.add_sub_cancel' hba₂), Nat.add_comm]
                rw[h₃']; apply heq
          _ < ε / 4 + ε / 4 := by
            apply add_lt_add
            · rcases hasA b hba₁ with h₃; simp [s₁] at h₃
              obtain h₃₁ := (div_lt_iff₀ εpos).mp h₃
              obtain h₃₂ := (div_lt_iff₀' hpos'').mpr h₃₁
              obtain h₃₃ := (div_lt_div_right zero_lt_four).mpr h₃₂
              calc
                _ = (2 * ↑alg.G ^ 2 * Finset.sum (Finset.range (a₂ + 1)) fun x => alg.a x ^ 2) /
                      Finset.sum (Finset.range (b + 1)) alg.a / 4 := by field_simp;ring
                _ < ε / 4 := by apply h₃₃
            · apply (div_lt_iff₀ hpos).mpr
              calc
                _ ≤ ↑alg.G ^ 2 * Finset.sum (Finset.range (b - a₂)) (fun x => (ε / (2 * ↑alg.G ^ 2))
                      * alg.a (a₂ + 1 + x)) := by
                    apply (mul_le_mul_left hpos').mpr; apply Finset.sum_le_sum; intro i _
                    have hposi : alg.a (a₂ + 1 + i) > 0 := by apply (alg.ha (a₂ + 1 + i))
                    rw [pow_two]; apply (mul_le_mul_right hposi).mpr
                    have : a₂ + 1 + i ≥ a₂ := by
                      rw[Nat.add_assoc]; apply Nat.le_add_right
                    rcases ha₂ (a₂ + 1 + i) this with hai₂
                    apply le_trans _ (le_of_lt hai₂); apply le_abs_self
                _ = ↑alg.G ^ 2 *  (ε / (2 * ↑alg.G ^ 2)) * Finset.sum (Finset.range (b - a₂))
                      fun x => alg.a (a₂ + 1 + x) := by
                    rw[mul_assoc]
                    apply (mul_eq_mul_left_iff).mpr
                    left
                    rw[← Finset.mul_sum]
                _ < ↑alg.G ^ 2 *  (ε / (2 * ↑alg.G ^ 2)) * Finset.sum (Finset.range (b + 1))
                      (fun x => alg.a x) := by
                    apply (mul_lt_mul_left hposG).mpr
                    obtain heq := Finset.sum_range_add (fun x => alg.a x) (a₂ + 1) (b - a₂)
                    have h₃' : (b + 1) = a₂ + 1 + (b - a₂) := by
                      rw [Nat.add_comm a₂, Nat.add_assoc, (Nat.add_sub_cancel' hba₂), Nat.add_comm]
                    rw [h₃', heq]
                    simp
                    apply Finset.sum_pos
                    · intro i _
                      apply (alg.ha i)
                    · simp
                _ = ε / 4 * (2 * Finset.sum (Finset.range (b + 1)) alg.a) := by
                  field_simp;ring
          _ = ε / 2 := by field_simp; ring
    rcases ha₁ with ⟨a₁,ha₁⟩; rcases ha₂ with ⟨a₂,ha₂⟩
    use max a₁ a₂
    intro b hb
    have hba₁: b ≥ a₁ :=by
      apply le_trans _ hb; apply le_max_left
    have hba₂: b ≥ a₂ :=by
      apply le_trans _ hb; apply le_max_right
    apply hs₁; apply hs₂₀; simp
    have hne: sInf {x | ∃ i < b + 1, f (alg.x i) = x} - f xm ≥ 0 := by
      have heq : (Set.range fun (x : Finset.range (b + 1)) => f (alg.x x)) =
          {x | ∃ i ∈ Finset.range (b + 1), f (alg.x i) = x} := by simp [Set.ext_iff]
      have hneb : Nonempty (Finset.range (b + 1)) := by
        simp; use 0; apply Nat.succ_pos b
      apply le_sub_right_of_add_le
      simp
      apply le_csInf
      · simp at heq; rw[← heq]; apply Set.range_nonempty
      · intro b hb; simp at hb
        rcases hb with ⟨i , _ , hb₂⟩
        rw[← hb₂]; simp[isMinOn_univ_iff] at hm
        rcases hm (alg.x i) with hmi
        apply hmi
    rw[(abs_of_nonneg hne)]
    have h₁ : ∀ (k : ℕ), 2 * ((Finset.range (k + 1)).sum alg.a) * (sInf {f (alg.x i) |
        i ∈ Finset.range (k + 1)} - (f xm)) ≤ ‖x₀ - xm‖ ^ 2 + alg.G ^ 2 *
        (Finset.range (k + 1)).sum (fun i => (alg.a i) ^ 2) := by
      apply subgradient_method_converge
    rcases h₁ b with hb₁
    rcases ha₁ b hba₁ with hba₁'
    rcases ha₂ b hba₂ with hba₂'
    have hpos : 2 * Finset.sum (Finset.range (b + 1)) alg.a > 0 := by
      apply mul_pos
      · simp
      · apply Finset.sum_pos
        · intro i _; apply (alg.ha i)
        · simp
    calc
      _ ≤ (‖x₀ - xm‖ ^ 2 + ↑alg.G ^ 2 * Finset.sum (Finset.range (b + 1)) fun i => alg.a i ^ 2)
            / (2 * Finset.sum (Finset.range (b + 1)) alg.a) := by
          apply (le_div_iff₀' hpos).mpr; simp at hb₁; apply hb₁
      _ = ‖x₀ - xm‖ ^ 2 / (2 * Finset.sum (Finset.range (b + 1)) alg.a) + (↑alg.G ^ 2 *
            Finset.sum (Finset.range (b + 1)) fun i => alg.a i ^ 2) /
            (2 * Finset.sum (Finset.range (b + 1)) alg.a) := by
          simp [div_add_div_same]
      _ < ε / 2 + ε / 2 := by
        apply add_lt_add; apply hba₁'; apply hba₂'
      _ = ε := by field_simp
  obtain h₁' := Filter.Tendsto.add_const (f xm) h₁
  simp at h₁'; simp; apply h₁'

end