diff --git a/Optlib/Algorithm/ADMM/Inv_bounded.lean b/Optlib/Algorithm/ADMM/Inv_bounded.lean index 827677e..409fe50 100644 --- a/Optlib/Algorithm/ADMM/Inv_bounded.lean +++ b/Optlib/Algorithm/ADMM/Inv_bounded.lean @@ -14,21 +14,21 @@ variable {X Y:Type*} [NormedAddCommGroup Y] [InnerProductSpace ℝ Y] (A : X →L[ℝ] Y)(fullrank: Injective A) -lemma KerA_bot (fullrank: Injective A): ker A = ⊥ := ker_eq_bot.2 fullrank +lemma KerA_bot (fullrank: Injective A): A.toLinearMap.ker = ⊥ := LinearMap.ker_eq_bot.2 fullrank variable [CompleteSpace X] [CompleteSpace Y] -lemma KerA_eq_KerA'A : ker A = ker (A†.comp A) := by +lemma KerA_eq_KerA'A : A.toLinearMap.ker = (A†.comp A).toLinearMap.ker := by ext x; constructor; simp · intro h; rw[h]; continuity · intro h; simp at h - have : ((inner (A x) (A x)):ℝ) = (0:ℝ) := by + have : ((inner ℝ (A x) (A x)):ℝ) = (0:ℝ) := by calc - _ = (inner x ((A†) (A x)):ℝ) := by rw [ContinuousLinearMap.adjoint_inner_right] + _ = (inner ℝ x ((A†) (A x)):ℝ) := by rw [ContinuousLinearMap.adjoint_inner_right] _ = (0:ℝ) := by rw [h, inner_zero_right] apply inner_self_eq_zero.1 this -lemma KerA'A_bot (fullrank: Injective A) : ker (A†.comp A) = ⊥ := by +lemma KerA'A_bot (fullrank: Injective A) : (A†.comp A).toLinearMap.ker = ⊥ := by rw[← KerA_eq_KerA'A] apply KerA_bot A fullrank diff --git a/Optlib/Algorithm/ADMM/Lemma.lean b/Optlib/Algorithm/ADMM/Lemma.lean index e02d256..5f0c97c 100644 --- a/Optlib/Algorithm/ADMM/Lemma.lean +++ b/Optlib/Algorithm/ADMM/Lemma.lean @@ -61,6 +61,7 @@ local notation "y'" => admm_kkt.y local notation "A₁†" => ContinuousLinearMap.adjoint A₁ local notation "A₂†" => ContinuousLinearMap.adjoint A₂ local notation "⟪" a₁ ", " a₂ "⟫" => @inner ℝ _ _ a₁ a₂ +local notation "inner" => (inner ℝ) lemma Satisfaction_ofthekkt : Convex_KKT x₁' x₂' y' admm.toOptProblem := admm_kkt.h @@ -189,7 +190,7 @@ lemma norm_covex1 [Setting E₁ E₂ F admm admm_kkt]:∀ n : ℕ+ , let g := A₁ have h2 : u ∘ g = f := by ext x - simp only [Function.comp_apply] + rfl rw[← h2] have h3 : ⇑g ⁻¹' univ = univ := by simp only [preimage_univ] @@ -223,7 +224,7 @@ lemma norm_covex2 [Setting E₁ E₂ F admm admm_kkt]:∀ n : ℕ+ , let g := A₂ have h2 : u ∘ g = f := by ext x - simp only [Function.comp_apply] + rfl rw[← h2] have h3 : ⇑g ⁻¹' univ = univ := by simp only [preimage_univ] @@ -280,10 +281,7 @@ lemma ADMM_iter_process₁'_eq3_2' [Setting E₁ E₂ F admm admm_kkt]: ∀ n : ((OptProblem.A₂ E₁) (ADMM.x₂ E₁ F n.natPred) - OptProblem.b E₁ E₂) rw[this] show HasGradientAt ((fun x => ⟪c , (A₁ x)⟫ + c₁)) (A₁† c) x - rw[hasGradientAt_iff_hasFDerivAt] - apply HasFDerivAt.add_const _ c₁ - show HasGradientAt ((fun x => ⟪c , (A₁ x)⟫)) (A₁† c) x - apply ADMM_iter_process₁'_eq3_2'_1 + exact (ADMM_iter_process₁'_eq3_2'_1 (c := c) x).add_const c₁ lemma inner_continuous1 [Setting E₁ E₂ F admm admm_kkt]:∀ n : ℕ+ , ContinuousOn (fun x => ⟪y n.natPred , (A₁ x) + (A₂ (x₂ n.natPred)) - b⟫) univ:= by @@ -325,8 +323,7 @@ lemma Gradient_of_quadratic_forms { α β : Type*} simp[h] have := norm_nonneg (s - x) rwa[mul_nonneg_iff_right_nonneg_of_pos εpos] - · use ε / ‖A‖ ^ 2 - field_simp + · refine ⟨ε / ‖A‖ ^ 2, div_pos εpos (sq_pos_of_pos h), ?_⟩ intro x hx have hzero : 0 < ‖A‖ ^ 2 := by apply sq_pos_of_pos h let t := x - s @@ -343,7 +340,7 @@ lemma Gradient_of_quadratic_forms { α β : Type*} rw[real_inner_smul_left,ContinuousLinearMap.adjoint_inner_left] ring rw[this,real_inner_self_eq_norm_sq] - simp only [abs_pow, abs_norm, ge_iff_le] + rw [Real.norm_eq_abs, abs_of_nonneg (sq_nonneg _)] calc _ = ‖A (s - x)‖ ^ 2 := by rw[norm_comm] @@ -367,7 +364,6 @@ lemma Gradient_of_quadratic_forms { α β : Type*} apply mul_le_mul_of_nonneg_left hx this _ = _ := by field_simp[hzero] - ring_nf #check add_sub lemma ADMM_iter_process₁'_eq3_3' [Setting E₁ E₂ F admm admm_kkt]: ∀ n : ℕ+ , @@ -558,10 +554,7 @@ lemma ADMM_iter_process₂'_eq3_2' [Setting E₁ E₂ F admm admm_kkt]: ∀ n : exact inner_add_right (y n.natPred) (A₂ x) (A₁ (x₁ n) - b) rw[this] show HasGradientAt (fun x => ⟪c , (A₂ x)⟫ + c₁) (A₂† c) x - rw[hasGradientAt_iff_hasFDerivAt] - apply HasFDerivAt.add_const _ c₁ - show HasGradientAt ((fun x => ⟪c , (A₂ x)⟫)) (A₂† c) x - apply inner_gradient + exact (inner_gradient (A := A₂) (c := c) x).add_const c₁ lemma inner_continuous2 [Setting E₁ E₂ F admm admm_kkt]:∀ n : ℕ+ , ContinuousOn (fun x => ⟪y n.natPred , (A₁ (x₁ n)) + (A₂ x) - b⟫) univ:= by @@ -854,7 +847,7 @@ lemma subgradientAt_mono_u [Setting E₁ E₂ F admm admm_kkt] : ∀ n : ℕ+, (0 : ℝ) ≤ (inner (u (n) + A₁† y') (x₁ (n) - x₁')) := by intro n calc - _= inner (u (n) - (- A₁† y')) (x₁ (n) - x₁') := by simp[v] + _= inner (u (n) - (- A₁† y')) (x₁ (n) - x₁') := by simp _≥ (0 : ℝ) := by apply subgradientAt_mono apply u_inthesubgradient @@ -864,7 +857,7 @@ lemma subgradientAt_mono_v [Setting E₁ E₂ F admm admm_kkt]: ∀ n : ℕ+, (0 : ℝ) ≤ (inner (v (n) + A₂† y') (x₂ (n) - x₂')) := by intro n calc - _= inner (v (n) - (- A₂† y')) (x₂ (n) - x₂') := by simp[v] + _= inner (v (n) - (- A₂† y')) (x₂ (n) - x₂') := by simp _≥ (0 : ℝ) := by apply subgradientAt_mono apply v_inthesubgradient @@ -883,7 +876,7 @@ lemma expended_u_gt_zero [Setting E₁ E₂ F admm admm_kkt]: ∀ n, (0 : ℝ) let x_diff := x₁ (n + 1) - x₁' let succ_n := Nat.toPNat' (n + 1) calc - _= inner (𝕜 := ℝ) block Ae1 := by rfl + _= inner block Ae1 := by rfl _= inner (A₁† block) (e') := by rw [ContinuousLinearMap.adjoint_inner_left] _= inner (u' + A₁† y') (x_diff) := by let block₁ := y (n + 1) + ((1-τ) * ρ) • (A₁ (e₁ (n + 1)) + A₂ (e₂ (n + 1))) + (ρ • (A₂ (x₂ (n) - x₂ (n+1)))) @@ -954,7 +947,7 @@ lemma expended_u_v_gt_zero [Setting E₁ E₂ F admm admm_kkt]: ∀ n , (inner ( let Ae2 := A₂ (e₂ (n + 1)) calc _ = inner ey' (-(A_e_sum)) - (1 - τ) * ρ * (inner A_e_sum A_e_sum) - + ρ * (inner (A_x_sum) (Ae1)) := by rw [norm_sq_eq_inner (𝕜:=ℝ) (A_e_sum)];rfl + + ρ * (inner (A_x_sum) (Ae1)) := by rw [← real_inner_self_eq_norm_sq A_e_sum] _ = inner ey' (-(A_e_sum)) + inner (- ((1 - τ) * ρ) • A_e_sum) A_e_sum + ρ * (inner A_x_sum Ae1) := by rw [smul_left,starRingEnd_eq_R];ring _ = inner (-ey') A_e_sum + inner (- ((1 - τ) * ρ) • A_e_sum) A_e_sum @@ -1244,7 +1237,7 @@ lemma Φ_isdescending_inequ5' [Setting E₁ E₂ F admm admm_kkt]: ∀ n : ℕ+, - 2 * (1-τ) * ρ * ‖A₁ (x₁ (n+1)) + A₂ (x₂ (n+1)) - b‖^2 + 2 * M (n+1) - 1 * ρ * ((‖A₂ (x₂ (n+1) - x₂ n)‖^2 + ‖A₂ (e₂ (n+1))‖^2 - ‖A₂ (e₂ n)‖^2)) - := by nth_rw 2 [div_eq_mul_inv]; rw [one_mul]; nth_rw 3 [pow_two]; simp [inv_mul_cancel] + := by nth_rw 2 [div_eq_mul_inv]; rw [one_mul]; nth_rw 3 [pow_two]; simp left; rw [mul_assoc] nth_rw 2 [← mul_assoc] nth_rw 2 [← mul_assoc] @@ -1399,7 +1392,11 @@ lemma basic_inequ₂ (n : ℕ+) : - 2 * inner (A₂ (x₂ (n+1) - x₂ n)) (A₁ apply Real.sqrt_ne_zero'.mpr rcases admm.htau with ⟨h₁, _⟩ assumption - have h3 : inner (𝕜 := ℝ) S1 S2 = inner (𝕜 := ℝ) (s1 • S1) (s1⁻¹ • S2) := by rw [inner_smul_left, inner_smul_right]; rw [← mul_assoc]; simp; rw [mul_inv_cancel₀, one_mul]; exact this + have h3 : inner S1 S2 = inner (s1 • S1) (s1⁻¹ • S2) := by + rw [inner_smul_left, inner_smul_right, ← mul_assoc] + simp + rw [mul_inv_cancel₀, one_mul] + exact this rw [h1, h2, h3] have : ‖s1 • S1‖ ^ 2 + ‖s1⁻¹ • S2‖ ^ 2 - -2 * ⟪s1 • S1, s1⁻¹ • S2⟫_ℝ = ‖s1 • S1‖ ^ 2 + 2 * ⟪s1 • S1, s1⁻¹ • S2⟫_ℝ + ‖s1⁻¹ • S2‖ ^ 2 := by ring_nf rw [this, ←norm_add_sq_real] @@ -1478,13 +1475,20 @@ lemma τ_min1_1 [Setting E₁ E₂ F admm admm_kkt] (h: 0 < τ ∧ τ ≤ 1) : m rcases h with ⟨h1, h2⟩ apply min_eq_left have h3: τ ^ 2 ≤ 1 := by - apply pow_le_one;linarith;linarith + have hτ : |τ| ≤ 1 := by simpa [abs_of_nonneg (le_of_lt h1)] using h2 + have hτ' : |τ| ≤ |(1 : ℝ)| := by simpa using hτ + have hsq : τ ^ 2 ≤ (1 : ℝ) ^ 2 := (sq_le_sq).2 hτ' + simpa using hsq linarith lemma τ_min1_2 [Setting E₁ E₂ F admm admm_kkt] (h: τ > 1 ) : min τ (1 + τ - τ ^ 2) = 1 + τ - τ ^ 2 := by apply min_eq_right have : 1 < τ ^ 2 := by - apply one_lt_pow;exact h;linarith + have hτ : 0 < τ := lt_trans zero_lt_one h + have hτabs : (1 : ℝ) < |τ| := by simpa [abs_of_pos hτ] using h + have hτabs' : |(1 : ℝ)| < |τ| := by simpa using hτabs + have hsq : (1 : ℝ) ^ 2 < τ ^ 2 := (sq_lt_sq).2 hτabs' + simpa using hsq linarith lemma τ_min2_1 [Setting E₁ E₂ F admm admm_kkt] (h: 0 < τ ∧ τ ≤ 1) : min 1 (1 + 1 / τ - τ ) = 1 := by @@ -1503,7 +1507,9 @@ lemma τ_min2_2 [Setting E₁ E₂ F admm admm_kkt] (h: τ > 1 ) : min 1 (1 + 1 calc _ > 1 := h _ > 1 / τ := by - rw [one_div, ← inv_one];apply inv_lt_inv_of_lt;linarith;exact h + have hτ : 0 < τ := lt_trans zero_lt_one h + have hdiv1 : 1 / τ < 1 := (div_lt_iff₀ hτ).2 (by simpa [one_mul] using h) + linarith [hdiv1] linarith lemma τ_min3_1 [Setting E₁ E₂ F admm admm_kkt] (h: 0 < τ ∧ τ ≤ 1) : max (1 - τ) (1 - 1 / τ) = 1 - τ := by @@ -1522,7 +1528,9 @@ lemma τ_min3_2 [Setting E₁ E₂ F admm admm_kkt] (h: τ > 1) : max (1 - τ) ( calc _ > 1 := h _ > 1 / τ := by - rw [one_div, ← inv_one];apply inv_lt_inv_of_lt;linarith;exact h + have hτ : 0 < τ := lt_trans zero_lt_one h + have hdiv1 : 1 / τ < 1 := (div_lt_iff₀ hτ).2 (by simpa [one_mul] using h) + linarith [hdiv1] linarith lemma Φ_isdescending [Setting E₁ E₂ F admm admm_kkt]: ∀ n : ℕ+, (Φ n ) - (Φ (n + 1) ) ≥ (min τ (1 + τ - τ ^ 2) )* ρ diff --git a/Optlib/Algorithm/ADMM/Scheme.lean b/Optlib/Algorithm/ADMM/Scheme.lean index 3eda488..953810b 100644 --- a/Optlib/Algorithm/ADMM/Scheme.lean +++ b/Optlib/Algorithm/ADMM/Scheme.lean @@ -35,7 +35,7 @@ def Admm_sub_Isunique {E : Type*}(f : E → ℝ)(x : E)(_h : IsMinOn f univ x): -- Augmented Lagrangian Function def Augmented_Lagrangian_Function (opt : OptProblem E₁ E₂ F) (ρ : ℝ) : E₁ × E₂ × F → ℝ := fun (x₁ , x₂ , y) => (opt.f₁ x₁) + (opt.f₂ x₂) + - inner y ((opt.A₁ x₁) + (opt.A₂ x₂) - opt.b) + ρ / 2 * ‖(opt.A₁ x₁) + (opt.A₂ x₂) - opt.b‖ ^ 2 + inner ℝ y ((opt.A₁ x₁) + (opt.A₂ x₂) - opt.b) + ρ / 2 * ‖(opt.A₁ x₁) + (opt.A₂ x₂) - opt.b‖ ^ 2 -- The basic iteration format of ADMM class ADMM extends (OptProblem E₁ E₂ F) where diff --git a/Optlib/Algorithm/ADMM/Theroem_converge.lean b/Optlib/Algorithm/ADMM/Theroem_converge.lean index d432b56..3d2be26 100644 --- a/Optlib/Algorithm/ADMM/Theroem_converge.lean +++ b/Optlib/Algorithm/ADMM/Theroem_converge.lean @@ -88,9 +88,9 @@ lemma nonneg₁ [Setting E₁ E₂ F admm admm_kkt]: min τ (1 + τ - τ ^ 2) > lemma nonneg₂ [Setting E₁ E₂ F admm admm_kkt]: min 1 (1 + 1 / τ - τ) > 0 := by rcases admm.htau with ⟨h1, _⟩ have h2: 1 + 1/τ - τ > 0 := by - field_simp;rw [← sq] - have h3 : 1 + τ - τ ^ 2 > 0 := nonneg_prime - linarith + have hτ : τ ≠ 0 := ne_of_gt h1 + rw [show 1 + 1 / τ - τ = (1 + τ - τ ^ 2) / τ by field_simp [hτ]; ring] + exact div_pos nonneg_prime h1 apply lt_min one_pos h2 lemma Φ₁_nonneg [Setting E₁ E₂ F admm admm_kkt]: @@ -165,21 +165,23 @@ lemma Φ_is_nonneg [Setting E₁ E₂ F admm admm_kkt]: ∀ n : ℕ , Φ n ≥ 0 lemma Φ_bd_above [Setting E₁ E₂ F admm admm_kkt]: ∃ C : ℝ, ∀ n : ℕ, Φ n < C := by let C := Max.max ((Φ 0) + 1) ((Φ 1) + 1); use C; intro n - induction' n with k h - · have : Φ 0 < (Φ 0) + 1 := by linarith + induction n with + | zero => + have : Φ 0 < (Φ 0) + 1 := by linarith apply lt_max_iff.2 left; exact this - · by_cases hh : k = 0 - · rw [hh,zero_add] - apply lt_max_iff.2 - right; linarith - · push_neg at hh - have k_pos : k > 0 := by apply Nat.pos_of_ne_zero hh - have : (Φ) (k.toPNat k_pos) ≥ (Φ) ((k.toPNat k_pos ) + 1) := by - apply Φ_is_monotone - have h' : Φ (k.toPNat k_pos) < C := by apply h - show Φ ((k.toPNat k_pos) + 1) < C - linarith + | succ k h => + by_cases hh : k = 0 + · rw [hh,zero_add] + apply lt_max_iff.2 + right; linarith + · push_neg at hh + have k_pos : k > 0 := by exact Nat.pos_of_ne_zero hh + have : (Φ) (k.toPNat k_pos) ≥ (Φ) ((k.toPNat k_pos) + 1) := by + apply Φ_is_monotone + have h' : Φ (k.toPNat k_pos) < C := by simpa using h + show Φ ((k.toPNat k_pos) + 1) < C + linarith lemma Φ_isBounded' [Setting E₁ E₂ F admm admm_kkt] : ∃ (r : ℝ), (range Φ) ⊆ ball 0 r := by rcases Φ_bd_above with ⟨C,bd⟩ @@ -317,9 +319,7 @@ lemma A₂e₂_isBounded' [Setting E₁ E₂ F admm admm_kkt]: ∃ (r : ℝ), (r exact h3; simp have h6: dist (A₂ (e₂ n)) 0 < √ (r_Φ / ρ) := by - rw[← sub_zero (A₂ (e₂ n))] at h5 - rw[SeminormedAddGroup.dist_eq (A₂ (e₂ n)) 0] - exact h5 + simpa [dist_eq_norm] using h5 rw [← hr] at h6 rw [← Metric.mem_ball] at h6 @@ -434,9 +434,7 @@ lemma A₁e₁_A₂e₂_isBounded'[Setting E₁ E₂ F admm admm_kkt] : ∃ (r : have h6: dist (A₁ (e₁ n) + A₂ (e₂ n)) 0 < r := by have h_n' := h_n n - rw[← sub_zero (A₁ (e₁ n) + A₂ (e₂ n))] at h_n' - rw[SeminormedAddGroup.dist_eq (A₁ (e₁ n) + A₂ (e₂ n)) 0] - exact h_n' + simpa [dist_eq_norm] using h_n' rw [← Metric.mem_ball] at h6; simp; simp at h6 exact h6 @@ -481,9 +479,7 @@ lemma A₁e₁_isBounded' [Setting E₁ E₂ F admm admm_kkt]: ∃ (r : ℝ), ra _ = r := hr have h_dist : dist (A₁ (e₁ n)) 0 < r := by - rw[← sub_zero (A₁ (e₁ n))] at h_norm - rw[SeminormedAddGroup.dist_eq (A₁ (e₁ n)) 0] - exact h_norm + simpa [dist_eq_norm] using h_norm rw [← Metric.mem_ball] at h_dist apply h_dist @@ -679,7 +675,8 @@ lemma Φ_Summable₁' [Setting E₁ E₂ F admm admm_kkt] : intro n let φ₀ := (fun i : ℕ => Φ i.succ) have : ∀ i ∈ Finset.range n , (φ₀ i)-(φ₀ (i+1)) = (Φ i.succ ) - (Φ (i.succ + 1)) := by - simp only [Finset.mem_range, Nat.succ_eq_add_one, implies_true] + intro i hi + rfl have h : Finset.range n =Finset.range n := rfl rw[← Finset.sum_congr h this , Finset.sum_range_sub'] simp only [φ₀] @@ -701,16 +698,7 @@ lemma Φ_isSummable [Setting E₁ E₂ F admm admm_kkt] : Summable (fun n : ℕ theorem summable_of_nonneg_of_le {β : Type*} {f : β → ℝ} {g : β → ℝ} (hg : ∀ (n : β), 0 ≤ g n) (hgf : ∀ (n : β), g n ≤ f n) (hf : Summable f) : Summable g:=by - rw[← NNReal.summable_mk] - have f_ge_zero :∀ (n : β), 0 ≤ f n := by - intro n - apply le_trans (hg n) (hgf n) - have :∀ (n : β), (⟨g n, hg n⟩ : NNReal) ≤ ⟨f n , f_ge_zero n⟩ := by - simp only [Subtype.mk_le_mk] - apply hgf - apply NNReal.summable_of_le this - rw[← NNReal.summable_coe] - exact hf + exact Summable.of_nonneg_of_le hg hgf hf lemma Φ_inequ₁ [Setting E₁ E₂ F admm admm_kkt] (m : ℕ+): (min 1 (1 + 1 / τ - τ )) * ρ * ‖A₁ (e₁ (m+1)) + A₂ (e₂ (m+1))‖ ^ 2 ≤ Φ m - Φ (m + 1) := by diff --git a/Optlib/Algorithm/BCD/Convergence.lean b/Optlib/Algorithm/BCD/Convergence.lean index 35be560..a3fc4fa 100644 --- a/Optlib/Algorithm/BCD/Convergence.lean +++ b/Optlib/Algorithm/BCD/Convergence.lean @@ -4,6 +4,7 @@ Released under Apache 2.0 license as described in the file LICENSE. Authors: Chenyi Li, Bowen Yang, Yifan Bai -/ import Optlib.Algorithm.BCD.Scheme +import Mathlib.Tactic /-! # Block Coordinate Descent @@ -36,16 +37,16 @@ section block_subdifferential variable {E : Type*} [NormedAddCommGroup E] -lemma infEdist_bound {s : Set E} : ∀ x ∈ s, ENNReal.ofReal ‖x‖ ≥ EMetric.infEdist 0 s := by +lemma infEdist_bound {s : Set E} : ∀ x ∈ s, ENNReal.ofReal ‖x‖ ≥ Metric.infEDist 0 s := by by_cases hs : s = ∅ simp [hs] push_neg at hs intro x xs - have : EMetric.infEdist 0 s ≤ edist 0 x := EMetric.infEdist_le_edist_of_mem xs + have : Metric.infEDist 0 s ≤ edist 0 x := Metric.infEDist_le_edist_of_mem xs rw [← dist_zero_left] apply (ENNReal.le_ofReal_iff_toReal_le _ _).2 · exact ENNReal.toReal_le_of_le_ofReal dist_nonneg (edist_dist 0 x ▸ this) - · exact Metric.infEdist_ne_top hs + · exact Metric.infEDist_ne_top hs · simp variable {F: Type*} [InnerProductSpace ℝ E] @@ -53,17 +54,28 @@ variable [NormedAddCommGroup F] [InnerProductSpace ℝ F] variable {f : E → ℝ} {g : F → ℝ} {x u : E} {y v : F} lemma f_subdiff_block (hf : u ∈ f_subdifferential f x) (hg : v ∈ f_subdifferential g y) : - ⟨u, v⟩ ∈ f_subdifferential (fun z ↦ f z.1 + g z.2 : WithLp 2 (E × F) → ℝ) ⟨x, y⟩ := by + ((u, v) : WithLp 2 (E × F)) ∈ + f_subdifferential (fun z ↦ f z.fst + g z.snd : WithLp 2 (E × F) → ℝ) + ((x, y) : WithLp 2 (E × F)) := by rw [has_f_subdiff_iff] at * intro ε εpos have ε2pos : 0 < ε / 2 := by positivity - filter_upwards [Eventually.prod_nhds (hf _ ε2pos) (hg _ ε2pos)] with z ⟨hfz, hyz⟩ + have hfx : ∀ᶠ z : WithLp 2 (E × F) in 𝓝 ((x, y) : WithLp 2 (E × F)), + f z.fst - f x - inner ℝ u (z.fst - x) ≥ -(ε / 2) * ‖z.fst - x‖ := + Filter.Tendsto.eventually + ((WithLp.continuous_fst (p := (2 : ENNReal)) (α := E) (β := F)).tendsto + ((x, y) : WithLp 2 (E × F))) (hf _ ε2pos) + have hgy : ∀ᶠ z : WithLp 2 (E × F) in 𝓝 ((x, y) : WithLp 2 (E × F)), + g z.snd - g y - inner ℝ v (z.snd - y) ≥ -(ε / 2) * ‖z.snd - y‖ := + Filter.Tendsto.eventually + ((WithLp.continuous_snd (p := (2 : ENNReal)) (α := E) (β := F)).tendsto + ((x, y) : WithLp 2 (E × F))) (hg _ ε2pos) + filter_upwards [hfx, hgy] with z hfz hyz rw [WithLp.prod_inner_apply] - simp only [WithLp.sub_fst, WithLp.sub_snd] let z' : WithLp 2 (E × F) := (x, y) - show f z.1 + g z.2 - (f x + g y) - (⟪u, z.1 - x⟫ + ⟪v, z.2 - y⟫) ≥ -ε * ‖z - z'‖ - have h1 : ‖z.1 - x‖ ≤ ‖z - z'‖ := fst_norm_le_prod_L2 (z - z') - have h2 : ‖z.2 - y‖ ≤ ‖z - z'‖ := snd_norm_le_prod_L2 (z - z') + show f z.fst + g z.snd - (f x + g y) - (⟪u, z.fst - x⟫ + ⟪v, z.snd - y⟫) ≥ -ε * ‖z - z'‖ + have h1 : ‖z.fst - x‖ ≤ ‖z - z'‖ := fst_norm_le_prod_L2 (z - z') + have h2 : ‖z.snd - y‖ ≤ ‖z - z'‖ := snd_norm_le_prod_L2 (z - z') linarith [(mul_le_mul_iff_of_pos_left ε2pos).mpr h1, (mul_le_mul_iff_of_pos_left ε2pos).mpr h2] end block_subdifferential @@ -91,16 +103,19 @@ theorem PALM_Descent (h : E → ℝ) {h' : E → E} (Lₕ : NNReal) rw [this] at u₁prox have : u₁ - (u - t • h' u) = (u₁ - u) + t • h' u := by abel rw [this] at u₁prox - simp [norm_add_sq_real, this] at u₁prox + simp [norm_add_sq_real] at u₁prox have ha : t * σ u₁ + ‖u₁ - u‖ ^ 2 / 2 + ⟪u₁ - u, t • h' u⟫ ≤ t * σ u := by linarith [u₁prox] rw [inner_smul_right] at ha - have : t * (‖u₁ - u‖ ^ 2 / (2 * t)) = ‖u₁ - u‖ ^ 2 / 2 := by field_simp; ring + have : t * (‖u₁ - u‖ ^ 2 / (2 * t)) = ‖u₁ - u‖ ^ 2 / 2 := by + field_simp [ne_of_gt h₅] rw [← this] at ha have : t * σ u₁ + t * (‖u₁ - u‖ ^ 2 / (2 * t)) + t * ⟪u₁ - u, h' u⟫ = t * (σ u₁ + ‖u₁ - u‖ ^ 2 / (2 * t) + ⟪u₁ - u, h' u⟫) := by ring rw [this] at ha have hne : ⟪u₁ - u, h' u⟫ ≤ σ u - σ u₁ - ‖u₁ - u‖ ^ 2 / (2 * t) := by - linarith [(mul_le_mul_left h₅).1 ha] + have hmul : σ u₁ + ‖u₁ - u‖ ^ 2 / (2 * t) + ⟪u₁ - u, h' u⟫ ≤ σ u := by + exact le_of_mul_le_mul_left ha h₅ + linarith [hmul] rw [real_inner_comm] at hne calc _ ≤ h u + σ u - σ u₁ - ‖u₁ - u‖ ^ 2 / (2 * t) + ↑Lₕ / 2 * ‖u₁ - u‖ ^ 2 + σ u₁ := by @@ -121,26 +136,64 @@ theorem Sufficient_Descent1 (γ : ℝ) (hγ : γ > 1) intro k have hHf : H (alg.x (k + 1), alg.y k) + f (alg.x (k + 1)) ≤ H (alg.x k, alg.y k) + f (alg.x k) - 1 / 2 * (γ - 1) * l * ‖alg.x (k + 1) - alg.x k‖ ^ 2 := by - let h := fun x ↦ H (x,alg.y k) + let h := fun x ↦ H (WithLp.toLp 2 (x, alg.y k)) let h' := fun x ↦ grad_fst H (alg.y k) x have h1 : ∀ x₁ : E, HasGradientAt h (h' x₁) x₁ := by intro x - apply DifferentiableAt.hasGradientAt - apply diff_prod₁; apply ContDiff.differentiable alg.conf (by simp) + rw [hasGradientAt_iff_hasFDerivAt] + have hH' : HasFDerivAt H ((InnerProductSpace.toDual ℝ (WithLp 2 (E × F))) + (gradient H (WithLp.toLp 2 (x, alg.y k)))) (WithLp.toLp 2 (x, alg.y k)) := by + exact (DifferentiableAt.hasGradientAt (h := (alg.Hdiff (WithLp.toLp 2 (x, alg.y k))))).hasFDerivAt + have htoLp : HasFDerivAt (WithLp.toLp 2) + (IsBoundedLinearMap.toContinuousLinearMap (WithLp.toLp 2) instIsBoundedLinearMapL2equiv) + ((x, alg.y k) : E × F) := instIsBoundedLinearMapL2equiv.hasFDerivAt + have hpair : HasFDerivAt (fun x : E => ((x, alg.y k) : E × F)) (ContinuousLinearMap.inl ℝ E F) x := by + simpa using (hasFDerivAt_prodMk_left (𝕜 := ℝ) x (alg.y k)) + have hcomp : HasFDerivAt (fun x : E => H (WithLp.toLp 2 (x, alg.y k))) + ((((InnerProductSpace.toDual ℝ (WithLp 2 (E × F))) (gradient H (WithLp.toLp 2 (x, alg.y k)))).comp + (IsBoundedLinearMap.toContinuousLinearMap (WithLp.toLp 2) instIsBoundedLinearMapL2equiv)).comp + (ContinuousLinearMap.inl ℝ E F)) x := by + simpa [Function.comp] using (hH'.comp x (htoLp.comp x hpair)) + refine hcomp.congr_fderiv ?_ + ext u + have hfst : ((IsBoundedLinearMap.toContinuousLinearMap (WithLp.toLp 2) instIsBoundedLinearMapL2equiv) + ((u, (0 : F)) : E × F)).fst = u := rfl + have hsnd : ((IsBoundedLinearMap.toContinuousLinearMap (WithLp.toLp 2) instIsBoundedLinearMapL2equiv) + ((u, (0 : F)) : E × F)).snd = 0 := rfl + simp [h', grad_fst, ContinuousLinearMap.comp_apply, hfst, hsnd] obtain prop := PALM_Descent h l h1 (l1 _) f (alg.c k) (alg.cpos γ hγ ck k) (alg.x _) (alg.x _) - apply le_of_eq_of_le' _ (prop (by rw [prox_set]; simp; exact (alg.s₁ k))) + apply le_of_le_of_eq (prop (by rw [prox_set]; simp; exact (alg.s₁ k))) rw [ck, one_div_one_div]; ring have hHg : H (alg.x (k + 1), alg.y (k + 1)) + g (alg.y (k + 1)) ≤ H (alg.x (k + 1), alg.y k) + g (alg.y k) - 1 / 2 * (γ - 1) * l * ‖alg.y (k + 1) - alg.y k‖ ^ 2 := by - let h := fun y ↦ H (alg.x (k + 1), y) + let h := fun y ↦ H (WithLp.toLp 2 (alg.x (k + 1), y)) let h':= fun y ↦ grad_snd H (alg.x (k + 1)) y have h1 : ∀ y₁ : F, HasGradientAt h (h' y₁) y₁ := by intro y - apply DifferentiableAt.hasGradientAt - apply diff_prod₂; apply ContDiff.differentiable alg.conf (by simp) + rw [hasGradientAt_iff_hasFDerivAt] + have hH' : HasFDerivAt H ((InnerProductSpace.toDual ℝ (WithLp 2 (E × F))) + (gradient H (WithLp.toLp 2 (alg.x (k + 1), y)))) (WithLp.toLp 2 (alg.x (k + 1), y)) := by + exact (DifferentiableAt.hasGradientAt (h := (alg.Hdiff (WithLp.toLp 2 (alg.x (k + 1), y))))).hasFDerivAt + have htoLp : HasFDerivAt (WithLp.toLp 2) + (IsBoundedLinearMap.toContinuousLinearMap (WithLp.toLp 2) instIsBoundedLinearMapL2equiv) + ((alg.x (k + 1), y) : E × F) := instIsBoundedLinearMapL2equiv.hasFDerivAt + have hpair : HasFDerivAt (fun y : F => ((alg.x (k + 1), y) : E × F)) (ContinuousLinearMap.inr ℝ E F) y := by + simpa using (hasFDerivAt_prodMk_right (𝕜 := ℝ) (alg.x (k + 1)) y) + have hcomp : HasFDerivAt (fun y : F => H (WithLp.toLp 2 (alg.x (k + 1), y))) + ((((InnerProductSpace.toDual ℝ (WithLp 2 (E × F))) (gradient H (WithLp.toLp 2 (alg.x (k + 1), y)))).comp + (IsBoundedLinearMap.toContinuousLinearMap (WithLp.toLp 2) instIsBoundedLinearMapL2equiv)).comp + (ContinuousLinearMap.inr ℝ E F)) y := by + simpa [Function.comp] using (hH'.comp y (htoLp.comp y hpair)) + refine hcomp.congr_fderiv ?_ + ext v + have hfst : ((IsBoundedLinearMap.toContinuousLinearMap (WithLp.toLp 2) instIsBoundedLinearMapL2equiv) + (((0 : E), v) : E × F)).fst = 0 := rfl + have hsnd : ((IsBoundedLinearMap.toContinuousLinearMap (WithLp.toLp 2) instIsBoundedLinearMapL2equiv) + (((0 : E), v) : E × F)).snd = v := rfl + simp [h', grad_snd, ContinuousLinearMap.comp_apply, hfst, hsnd] obtain prop := PALM_Descent h l h1 (l2 _) g (alg.d k) (alg.dpos γ hγ dk k) (alg.y k) (alg.y _) - apply le_of_eq_of_le' _ (prop (by rw [prox_set]; simp; exact (alg.s₂ k))) + apply le_of_le_of_eq (prop (by rw [prox_set]; simp; exact (alg.s₂ k))) rw [dk, one_div_one_div]; ring have eq (k : ℕ) : alg.ψ (alg.z k) = H (alg.x k, alg.y k) + f (alg.x k) + g (alg.y k) := by @@ -152,11 +205,13 @@ theorem Sufficient_Descent1 (γ : ℝ) (hγ : γ > 1) _ ≥ 1 / 2 * (γ - 1) * l * (‖alg.x (k + 1) - alg.x k‖ ^ 2 + ‖alg.y (k + 1) - alg.y k‖ ^ 2) := by linarith [hHf,hHg] _ = 1 / 2 * ρ₁ * (‖alg.x (k + 1) - alg.x k‖ ^ 2 + ‖alg.y (k + 1) - alg.y k‖ ^ 2) := by - unfold ρ₁; nth_rw 2 [mul_assoc] + unfold ρ₁; ring _ = _ := by - simp only [WithLp.prod_norm_sq_eq_of_L2] - rw [Prod.fst_sub, Prod.snd_sub, BCD.z, BCD.z] - ring_nf; simp + rw [WithLp.prod_norm_sq_eq_of_L2] + simp [BCD.z] + left + unfold ρ₁ + ring /- the value is monotone -/ theorem Sufficient_Descent2 (γ : ℝ) (hγ : γ > 1) @@ -177,19 +232,21 @@ theorem Sufficient_Descent3 (γ : ℝ) (hγ : γ > 1) (ck: ∀ k, alg.c k = 1 / ≤ 2 / ρ₁ * (alg.ψ (alg.z k) - alg.ψ (alg.z (k + 1))):= by intro k; specialize h2 k obtain h1 := mul_le_mul_of_nonneg_left h2 (a := 2 / ρ₁) (by positivity) - rw [← mul_assoc] at h1; field_simp at h1; field_simp; exact h1 + rw [← mul_assoc] at h1; field_simp at h1; field_simp; simpa [mul_comm] using h1 have hne : ∀ n, ∑ k ∈ Finset.range (n + 1), ‖alg.z (k + 1) - alg.z k‖ ^ 2 ≤ 2 / ρ₁ * ((alg.ψ (alg.z 0)) - (alg.ψ (alg.z (n + 1)))) := by intro n - induction' n with d hd - simp; specialize hDescent' 0 - simp [hDescent'] - rw [Finset.sum_range_succ _ (d + 1)] - have : 2 / ρ₁ * (alg.ψ (alg.z 0) - alg.ψ (alg.z (d + 1 + 1))) - = 2 / ρ₁ * (alg.ψ (alg.z 0) - alg.ψ (alg.z (d + 1))) - + 2 / ρ₁ * (alg.ψ (alg.z (d + 1)) - alg.ψ (alg.z (d + 1 + 1))) := by ring - rw [this] - apply add_le_add hd (hDescent' (d + 1)) + induction n with + | zero => + simp; specialize hDescent' 0 + simpa using hDescent' + | succ d hd => + rw [Finset.sum_range_succ _ (d + 1)] + have : 2 / ρ₁ * (alg.ψ (alg.z 0) - alg.ψ (alg.z (d + 1 + 1))) + = 2 / ρ₁ * (alg.ψ (alg.z 0) - alg.ψ (alg.z (d + 1))) + + 2 / ρ₁ * (alg.ψ (alg.z (d + 1)) - alg.ψ (alg.z (d + 1 + 1))) := by ring + rw [this] + exact add_le_add hd (hDescent' (d + 1)) simp [BddBelow,lowerBounds,Set.Nonempty] at lbdψ rcases lbdψ with ⟨ψ₀,hψ₀⟩ obtain hne' := fun n ↦ le_trans (hne n) (mul_le_mul_of_nonneg_left @@ -252,6 +309,7 @@ section Upperbound_subd variable {c : ℝ} {f' : E → ℝ} {x u u' : E} {y v : F} +set_option maxHeartbeats 800000 in theorem Ψ_subdiff_bound (γ : ℝ) (hγ : γ > 1) (ck: ∀ k, alg.c k = 1 / (γ * l)) (dk: ∀ k, alg.d k = 1 / (γ * l)) : ∃ ρ > 0, ∀ k, ∃ dΨ ∈ f_subdifferential alg.ψ (alg.z (k + 1)), @@ -273,29 +331,36 @@ theorem Ψ_subdiff_bound (γ : ℝ) (hγ : γ > 1) rw [lipschitzWith_iff_norm_sub_le] at lip have cpos' : (alg.c k)⁻¹ ≥ 0 := by simp; apply le_of_lt (alg.cpos γ hγ ck k) have dpos' : (alg.d k)⁻¹ ≥ 0 := by simp; apply le_of_lt (alg.dpos γ hγ dk k) - have h1 : ‖(alg.subdiff k).1‖ ≤ l * (γ + 1) * ‖alg.z (k + 1) - alg.z k‖ := by - simp only [BCD.subdiff, BCD.A_kx, Prod.fst_add, grad_fun_comp, grad_comp, sub_add]; - rw [A_k, A_kx, A_ky]; simp + have h1 : ‖(alg.subdiff k).fst‖ ≤ l * (γ + 1) * ‖alg.z (k + 1) - alg.z k‖ := by let a := (alg.c k)⁻¹ • (alg.x k - alg.x (k + 1)) calc - _ = ‖a + (gradient H (alg.x (k + 1), alg.y (k + 1))).1 - - grad_fst H (alg.y k) (alg.x k)‖ := by rw [sub_add_eq_add_sub] - _ = ‖a + (gradient H (alg.x (k + 1), alg.y (k + 1))).1 - - (gradient H (alg.x k, alg.y k)).1‖ := by - symm; rw [grad_eq_block_grad, grad_fun_comp, grad_comp, grad_fun_comp, grad_comp] - simp; apply alg.Hdiff - _ ≤ ‖a‖ + ‖(gradient H (alg.x (k + 1), alg.y (k + 1)) - gradient H (alg.x k, alg.y k)).1‖ := by - rw [add_sub_assoc, ← Prod.fst_sub]; apply norm_add_le - _ ≤ ‖a‖ + ‖(gradient H (alg.x (k + 1), alg.y (k + 1)) - gradient H (alg.x k, alg.y k))‖ := by - simp; rw [← Prod.fst_sub]; apply fst_norm_le_prod_L2 + ‖(alg.subdiff k).fst‖ + = ‖a + (gradient H (alg.x (k + 1), alg.y (k + 1))).fst + - grad_fst H (alg.y k) (alg.x k)‖ := by + simp [BCD.subdiff, BCD.A_k, BCD.A_kx, BCD.A_ky, a, sub_eq_add_neg, add_left_comm, add_comm] + _ = ‖a + (gradient H (alg.x (k + 1), alg.y (k + 1))).fst + - (gradient H (alg.x k, alg.y k)).fst‖ := by + simp [grad_fst] + _ ≤ ‖a‖ + ‖(gradient H (alg.x (k + 1), alg.y (k + 1))).fst + - (gradient H (alg.x k, alg.y k)).fst‖ := by + simpa [sub_eq_add_neg, add_assoc] using + (norm_add_le a ((gradient H (alg.x (k + 1), alg.y (k + 1))).fst - (gradient H (alg.x k, alg.y k)).fst)) + _ ≤ ‖a‖ + ‖gradient H (alg.x (k + 1), alg.y (k + 1)) - gradient H (alg.x k, alg.y k)‖ := by + have hfst : + ‖(gradient H (alg.x (k + 1), alg.y (k + 1))).fst + - (gradient H (alg.x k, alg.y k)).fst‖ + ≤ ‖gradient H (alg.x (k + 1), alg.y (k + 1)) - gradient H (alg.x k, alg.y k)‖ := by + simpa using (fst_norm_le_prod_L2 + (gradient H (alg.x (k + 1), alg.y (k + 1)) - gradient H (alg.x k, alg.y k))) + simpa [add_comm, add_left_comm, add_assoc] using add_le_add_left hfst ‖a‖ have inequ₁ : ‖a‖ ≤ (γ * l) * ‖alg.z (k+1) - alg.z k‖ := by calc _ = (1 / alg.c k) * ‖alg.x k - alg.x (k + 1)‖ := by simp [a]; rw [norm_smul_of_nonneg]; apply cpos' _ = (1 / alg.c k) * ‖alg.x (k + 1) - alg.x k‖ := by simp; left; apply norm_sub_rev - _ = (1 / alg.c k) * ‖(alg.z (k + 1) - alg.z k).1‖ := by rw [z]; simp; left; rw [z]; simp + _ = (1 / alg.c k) * ‖(alg.z (k + 1) - alg.z k).fst‖ := by rw [z]; simp; left; rw [z]; simp _ ≤ (1 / alg.c k) * ‖alg.z (k + 1) - alg.z k‖ := by - have : ‖(alg.z (k + 1) - alg.z k).1‖ ≤ ‖alg.z (k + 1) - alg.z k‖ := fst_norm_le_prod_L2 _ + have : ‖(alg.z (k + 1) - alg.z k).fst‖ ≤ ‖alg.z (k + 1) - alg.z k‖ := fst_norm_le_prod_L2 _ simp; apply mul_le_mul_of_nonneg_left this cpos' _ = (γ * l) * ‖alg.z (k + 1) - alg.z k‖ := by rw [ck k]; simp have inequ₂ : ‖gradient H (alg.x (k + 1), alg.y (k + 1)) - gradient H (alg.x k, alg.y k)‖ @@ -306,32 +371,36 @@ theorem Ψ_subdiff_bound (γ : ℝ) (hγ : γ > 1) apply lip _ = l * ‖alg.z (k+1) - alg.z k‖ := by repeat rw [z]; simp; left; rfl linarith - have h2 : ‖(alg.subdiff k).2‖ ≤ l * (γ + 1) * ‖alg.z (k + 1) - alg.z k‖ := by - simp only [BCD.subdiff, BCD.A_kx, Prod.fst_add, grad_fun_comp, grad_comp, sub_add]; - rw [A_k, A_kx, A_ky]; simp + have h2 : ‖(alg.subdiff k).snd‖ ≤ l * (γ + 1) * ‖alg.z (k + 1) - alg.z k‖ := by let a := (alg.d k)⁻¹ • (alg.y k - alg.y (k + 1)) calc - _ = ‖a + (gradient H (alg.x (k + 1), alg.y (k + 1))).2 - - grad_snd H (alg.x (k + 1)) (alg.y k)‖ := by rw [sub_add_eq_add_sub] - _ = ‖a + (gradient H (alg.x (k + 1), alg.y (k + 1))).2 - - (gradient H (alg.x (k + 1), alg.y k)).2‖ := by - symm; rw [grad_eq_block_grad, grad_fun_comp, grad_comp, grad_fun_comp, grad_comp] - simp; apply alg.Hdiff - _ = ‖a + (gradient H (alg.x (k + 1), alg.y (k + 1)) - - gradient H (alg.x (k + 1), alg.y k)).2‖ := by rw [add_sub_assoc, ← Prod.snd_sub] - _ ≤ ‖a‖ + ‖(gradient H (alg.x (k + 1), alg.y (k + 1)) - - gradient H (alg.x (k + 1), alg.y k)).2‖ := by apply norm_add_le - _ ≤ ‖a‖ + ‖(gradient H (alg.x (k + 1), alg.y (k + 1)) - - gradient H (alg.x (k + 1), alg.y k))‖ := by - simp; rw [← Prod.snd_sub]; apply snd_norm_le_prod_L2 + ‖(alg.subdiff k).snd‖ + = ‖a + (gradient H (alg.x (k + 1), alg.y (k + 1))).snd + - grad_snd H (alg.x (k + 1)) (alg.y k)‖ := by + simp [BCD.subdiff, BCD.A_k, BCD.A_kx, BCD.A_ky, a, sub_eq_add_neg, add_left_comm, add_comm] + _ = ‖a + (gradient H (alg.x (k + 1), alg.y (k + 1))).snd + - (gradient H (alg.x (k + 1), alg.y k)).snd‖ := by + simp [grad_snd] + _ ≤ ‖a‖ + ‖(gradient H (alg.x (k + 1), alg.y (k + 1))).snd + - (gradient H (alg.x (k + 1), alg.y k)).snd‖ := by + simpa [sub_eq_add_neg, add_assoc] using + (norm_add_le a ((gradient H (alg.x (k + 1), alg.y (k + 1))).snd - (gradient H (alg.x (k + 1), alg.y k)).snd)) + _ ≤ ‖a‖ + ‖gradient H (alg.x (k + 1), alg.y (k + 1)) - gradient H (alg.x (k + 1), alg.y k)‖ := by + have hsnd : + ‖(gradient H (alg.x (k + 1), alg.y (k + 1))).snd + - (gradient H (alg.x (k + 1), alg.y k)).snd‖ + ≤ ‖gradient H (alg.x (k + 1), alg.y (k + 1)) - gradient H (alg.x (k + 1), alg.y k)‖ := by + simpa using (snd_norm_le_prod_L2 + (gradient H (alg.x (k + 1), alg.y (k + 1)) - gradient H (alg.x (k + 1), alg.y k))) + simpa [add_comm, add_left_comm, add_assoc] using add_le_add_left hsnd ‖a‖ have inequ₁ : ‖a‖ ≤ (γ * l) * ‖alg.z (k + 1) - alg.z k‖ := by calc _ = (1 / alg.d k) * ‖alg.y k - alg.y (k + 1)‖ := by simp [a]; rw [norm_smul_of_nonneg]; apply dpos' _ = (1 / alg.d k) * ‖alg.y (k + 1) - alg.y k‖ := by simp; left; apply norm_sub_rev - _ = (1 / alg.d k) * ‖(alg.z (k + 1) - alg.z k).2‖ := by rw [z]; simp; left; rw [z]; simp + _ = (1 / alg.d k) * ‖(alg.z (k + 1) - alg.z k).snd‖ := by rw [z]; simp; left; rw [z]; simp _ ≤ (1 / alg.d k) * ‖alg.z (k + 1) - alg.z k‖ := by - have : ‖(alg.z (k + 1) - alg.z k).2‖ ≤ ‖alg.z (k + 1) - alg.z k‖ := by + have : ‖(alg.z (k + 1) - alg.z k).snd‖ ≤ ‖alg.z (k + 1) - alg.z k‖ := by apply snd_norm_le_prod_L2 simp; apply mul_le_mul_of_nonneg_left this dpos' _ = (γ * l) * ‖alg.z (k + 1) - alg.z k‖ := by rw [dk k]; simp @@ -341,11 +410,11 @@ theorem Ψ_subdiff_bound (γ : ℝ) (hγ : γ > 1) _ ≤ l * @norm (WithLp 2 (E × F)) (WithLp.instProdNorm 2 E F) ((alg.x (k + 1), alg.y (k + 1)) - (alg.x (k + 1), alg.y k)) := by apply lip - _ = l * ‖(alg.z (k+1) - alg.z k).2‖ := by - simp; left; repeat rw [z]; simp; apply norm_prod_left_zero + _ = l * ‖alg.y (k + 1) - alg.y k‖ := by + simp [WithLp.prod_norm_eq_of_L2] _ ≤ l * ‖alg.z (k+1) - alg.z k‖ := by apply mul_le_mul_of_nonneg_left _ (le_of_lt alg.lpos) - · apply snd_norm_le_prod_L2 + · simpa [z] using (snd_norm_le_prod_L2 (alg.z (k + 1) - alg.z k)) linarith linarith @@ -373,19 +442,20 @@ lemma fconv (γ : ℝ) (hγ : γ > 1) (ck : ∀ k, alg.c k = 1 / (γ * l)) (dk : (α : ℕ → ℕ) (z_ : WithLp 2 (E×F)) (monoa : StrictMono α) (conv : Tendsto (fun n ↦ alg.z (α n)) atTop (𝓝 z_)) (bd : Bornology.IsBounded (alg.z '' univ)) (lbdψ : BddBelow (alg.ψ '' univ)) : - Tendsto (fun n ↦ f (alg.z (α n)).1) atTop (𝓝 (f z_.1)) := by + Tendsto (fun n ↦ f (alg.z (α n)).fst) atTop (𝓝 (f z_.fst)) := by obtain lpos := alg.lpos - apply (nhds_basis_Ioo_pos (f z_.1)).tendsto_right_iff.mpr + apply (nhds_basis_Ioo_pos (f z_.fst)).tendsto_right_iff.mpr rintro ε epos simp only [Ioo] - have lef : ∀ᶠ x in atTop, f (alg.z (α x)).1 > f z_.1 - ε := - (Tendsto.fst_nhds conv) (by apply alg.hf z_.1; exact sub_lt_self (f z_.1) epos) - have rig : ∀ᶠ x in atTop, f (alg.z (α x)).1 < f z_.1 + ε := by + have lef : ∀ᶠ x in atTop, f (alg.z (α x)).fst > f z_.fst - ε := + ((WithLp.continuous_fst (p := (2 : ENNReal)) (α := E) (β := F)).tendsto z_ |>.comp conv) + (by apply alg.hf z_.fst; exact sub_lt_self (f z_.fst) epos) + have rig : ∀ᶠ x in atTop, f (alg.z (α x)).fst < f z_.fst + ε := by have ieq (q) (hq : 1 ≤ α q) : alg.c (α q -1) * f (alg.x (α q)) + ⟪alg.x (α q) - alg.x (α q -1), alg.c (α q -1) • grad_fst H (alg.y (α q -1)) (alg.x (α q -1))⟫ ≤ - alg.c (α q -1) * f z_.1 + ‖z_.1 - alg.x (α q -1)‖ ^ 2 / 2 + ⟪z_.1 - alg.x (α q -1), + alg.c (α q -1) * f z_.fst + ‖z_.fst - alg.x (α q -1)‖ ^ 2 / 2 + ⟪z_.fst - alg.x (α q -1), alg.c (α q -1) • grad_fst H (alg.y (α q -1)) (alg.x (α q -1))⟫:= by - rcases isMinOn_iff.mp (alg.s₁ (α q -1)) z_.1 trivial with ieq + rcases isMinOn_iff.mp (alg.s₁ (α q -1)) z_.fst trivial with ieq simp at ieq rw [← sub_add, norm_add_sq_real, ← sub_add, norm_add_sq_real] at ieq repeat rw [add_div] at ieq; repeat rw [← add_assoc] at ieq @@ -394,17 +464,20 @@ lemma fconv (γ : ℝ) (hγ : γ > 1) (ck : ∀ k, alg.c k = 1 / (γ * l)) (dk : linarith [ieq,this] have Hbd : ∃ C, ∀ q : ℕ, ‖(grad_fst H (alg.y (α q -1)) (alg.x (α q -1)))‖ ≤ C:= by rcases isBounded_iff_forall_norm_le.mp bd with ⟨C1,inin⟩ - have con11H : ContinuousOn (fun (x,y)↦grad_fst H y x) (Metric.closedBall (0:WithLp 2 (E×F)) C1) := by + have con11H : ContinuousOn (fun z : WithLp 2 (E × F) ↦ grad_fst H z.snd z.fst) + (Metric.closedBall (0:WithLp 2 (E×F)) C1) := by apply Continuous.continuousOn exact LipschitzWith.continuous (lip_grad_fst_of_lip alg.Hdiff alg.lip) rcases @IsCompact.exists_bound_of_continuousOn (WithLp 2 (E×F)) E _ _ _ - (isCompact_closedBall (0 : WithLp 2 (E × F)) C1) (fun (x, y)↦ grad_fst H y x) con11H with ⟨C, sq⟩ + (isCompact_closedBall (0 : WithLp 2 (E × F)) C1) + (fun z : WithLp 2 (E × F) ↦ grad_fst H z.snd z.fst) con11H with ⟨C, sq⟩ use C; rintro q - have : (alg.x (α q -1),alg.y (α q -1)) ∈ Metric.closedBall (0 : WithLp 2 (E × F)) C1 := by + have : ((alg.x (α q -1),alg.y (α q -1)) : WithLp 2 (E × F)) + ∈ Metric.closedBall (0 : WithLp 2 (E × F)) C1 := by apply mem_closedBall_iff_norm.mpr; simp - apply inin (alg.x (α q -1),alg.y (α q -1)) + apply inin ((alg.x (α q -1),alg.y (α q -1)) : WithLp 2 (E × F)) exact mem_image_of_mem alg.z trivial - obtain h'' := sq (alg.x (α q -1),alg.y (α q -1)) this + obtain h'' := sq ((alg.x (α q -1),alg.y (α q -1)) : WithLp 2 (E × F)) this simp at h''; exact h'' rcases Hbd with ⟨C,hbd⟩ have diflte1 : ∀ ε > 0, ∀ᶠ (q : ℕ) in atTop, ‖alg.x (α q) - alg.x (α q - 1)‖ < ε:= by @@ -426,9 +499,9 @@ lemma fconv (γ : ℝ) (hγ : γ > 1) (ck : ∀ k, alg.c k = 1 / (γ * l)) (dk : obtain ht := ie (α b - 1) (Nat.le_sub_one_of_lt a1leab) have eqq : (α b - 1 + 1) = α b:= by apply Nat.sub_add_cancel; linarith [a1leab] rwa [eqq] at ht - have diflte2 : ∀ ε > 0, ∀ᶠ (q : ℕ) in atTop, ‖z_.1 - alg.x (α q - 1)‖ < ε := by + have diflte2 : ∀ ε > 0, ∀ᶠ (q : ℕ) in atTop, ‖z_.fst - alg.x (α q - 1)‖ < ε := by rintro ε epos - have : ∀ᶠ (q : ℕ) in atTop, ‖z_.1 - alg.x (α q )‖ < ε / 2 := by + have : ∀ᶠ (q : ℕ) in atTop, ‖z_.fst - alg.x (α q )‖ < ε / 2 := by rcases (atTop_basis.tendsto_iff (@Metric.nhds_basis_ball _ _ z_)).mp conv (ε / 2) (half_pos epos) with ⟨n1,_,ieq1⟩ simp [dist_eq_norm] at ieq1; simp @@ -443,7 +516,7 @@ lemma fconv (γ : ℝ) (hγ : γ > 1) (ck : ∀ k, alg.c k = 1 / (γ * l)) (dk : rintro x ⟨h1,h2⟩ rw [← sub_add_sub_cancel] calc - _ ≤ ‖z_.1 - alg.x (α x)‖ + ‖alg.x (α x) - alg.x (α x - 1)‖ := norm_add_le _ _ + _ ≤ ‖z_.fst - alg.x (α x)‖ + ‖alg.x (α x) - alg.x (α x - 1)‖ := norm_add_le _ _ _ < ε := by linarith have hk (k : ℕ → E) (defle : ∀ ε > 0, ∀ᶠ (q : ℕ) in atTop, ‖k q‖ < ε) : ∀ ε > 0, @@ -452,15 +525,19 @@ lemma fconv (γ : ℝ) (hγ : γ > 1) (ck : ∀ k, alg.c k = 1 / (γ * l)) (dk : rintro ε epos simp at defle; simp by_cases Cpos : 0 < C - · rcases defle (ε / (C / (γ * l))) (by field_simp [alg.lpos, Cpos]) with ⟨nn,ieq⟩ + · have hdivpos : 0 < ε / (C / (γ * l)) := by + apply div_pos epos + apply div_pos Cpos + nlinarith [hγ, alg.lpos] + rcases defle (ε / (C / (γ * l))) hdivpos with ⟨nn,ieq⟩ use nn; rintro b nleb; rw [ck] calc _ ≤ ‖k b‖ * ‖(1 / (γ * ↑l)) • grad_fst H (alg.y (α b - 1)) (alg.x (α b - 1))‖ := by apply abs_real_inner_le_norm _ ≤ ε / (C / (γ * ↑l))*‖(1 / (γ * ↑l)) • grad_fst H (alg.y (α b - 1)) (alg.x (α b - 1))‖:= by - apply mul_le_mul (le_of_lt (ieq b nleb)); trivial + apply mul_le_mul (le_of_lt (ieq b nleb)) le_rfl repeat apply norm_nonneg - field_simp [alg.lpos, Cpos]; positivity + exact le_of_lt hdivpos _ = ε / (C / (γ * ↑l))*(1 / (γ * ↑l)) * ‖grad_fst H (alg.y (α b - 1)) (alg.x (α b - 1))‖:= by rw [mul_assoc]; apply mul_eq_mul_left_iff.mpr left; exact norm_smul_of_nonneg (by positivity) (grad_fst H _ _) @@ -494,21 +571,19 @@ lemma fconv (γ : ℝ) (hγ : γ > 1) (ck : ∀ k, alg.c k = 1 / (γ * l)) (dk : _≤ε:= by linarith simp only [ck] at ieq have h1 := hk (fun q ↦ alg.x (α q) - alg.x (α q - 1)) diflte1 (ε / (γ * l) / 3) (by positivity) - have h2 := hk (fun q ↦ z_.1 - alg.x (α q - 1)) diflte2 (ε / (γ * l) / 3) (by positivity) - have h3 : ∀ᶠ (q : ℕ) in atTop, ‖z_.1 - alg.x (α q - 1)‖ ^ 2 / 2 < (ε / (γ * l) / 3):= by + have h2 := hk (fun q ↦ z_.fst - alg.x (α q - 1)) diflte2 (ε / (γ * l) / 3) (by positivity) + have h3 : ∀ᶠ (q : ℕ) in atTop, ‖z_.fst - alg.x (α q - 1)‖ ^ 2 / 2 < (ε / (γ * l) / 3):= by refine Eventually.mono (diflte2 (√(2*(ε/(γ*l)/3))) ?_) ?_ apply Real.sqrt_pos_of_pos apply mul_pos;norm_num; positivity intro x assx - have :‖z_.1 - alg.x (α x - 1)‖^2<(2*(ε/(γ*l)/3)):= by + have :‖z_.fst - alg.x (α x - 1)‖^2<(2*(ε/(γ*l)/3)):= by refine (Real.lt_sqrt ?hx).mp ?_ apply norm_nonneg exact assx calc - ‖z_.1 - alg.x (α x - 1)‖ ^ 2 / 2<(2*(ε/(γ*l)/3))/2:= by - apply (div_lt_div_right _).mpr - apply this - linarith + ‖z_.fst - alg.x (α x - 1)‖ ^ 2 / 2<(2*(ε/(γ*l)/3))/2:= by + nlinarith [this] _=(ε/(γ*l)/3):= by apply mul_div_cancel_left₀ linarith @@ -529,7 +604,7 @@ lemma fconv (γ : ℝ) (hγ : γ > 1) (ck : ∀ k, alg.c k = 1 / (γ * l)) (dk : apply monoa linarith [Nat.le_of_add_right_le mleq] linarith - have key : 1 / (γ * ↑l) * f (alg.x (α q)) <1 / (γ * ↑l) * f z_.1 +ε / (γ * ↑l):= by + have key : 1 / (γ * ↑l) * f (alg.x (α q)) <1 / (γ * ↑l) * f z_.fst +ε / (γ * ↑l):= by linarith [ieq q this,(abs_le.mp (ie1 q (m1le.trans mleq))).1,(abs_le.mp (ie2 q (m2le.trans mleq))).2,ie3 q (m3le.trans mleq), add_thirds (ε / (γ * ↑l))] have ltt:0<γ*l:= by @@ -538,8 +613,9 @@ lemma fconv (γ : ℝ) (hγ : γ > 1) (ck : ∀ k, alg.c k = 1 / (γ * l)) (dk : _ = f (alg.x (α q)) := rfl _ =(γ * ↑l)*(1 / (γ * ↑l) * f (alg.x (α q))):= by rw [←mul_assoc,mul_one_div_cancel (LT.lt.ne ltt).symm,one_mul] - _ < (γ * ↑l)*(1 / (γ * ↑l) * f z_.1 + ε / (γ * ↑l)):=(mul_lt_mul_left ltt).mpr key - _=f z_.1 + ε:=by + _ < (γ * ↑l)*(1 / (γ * ↑l) * f z_.fst + ε / (γ * ↑l)) := by + exact mul_lt_mul_of_pos_left key ltt + _=f z_.fst + ε:=by rw [mul_add, ← mul_assoc, mul_one_div_cancel (LT.lt.ne ltt).symm, one_mul, mul_div_cancel₀ _ (LT.lt.ne ltt).symm] exact Eventually.and lef rig @@ -548,22 +624,23 @@ lemma fconv (γ : ℝ) (hγ : γ > 1) (ck : ∀ k, alg.c k = 1 / (γ * l)) (dk : lemma gconv (γ : ℝ) (hγ : γ > 1) (ck: ∀ k, alg.c k = 1 / (γ * l)) (dk: ∀ k, alg.d k = 1 / (γ * l)) (α:ℕ→ℕ)(z_:WithLp 2 (E×F))(monoa:StrictMono α )(conv:Tendsto (fun n ↦ alg.z (α n)) atTop (𝓝 z_)) (bd : Bornology.IsBounded (alg.z '' univ)) (lbdψ : BddBelow (alg.ψ '' univ)): - Tendsto (fun n ↦ g (alg.z (α n)).2) atTop (𝓝 (g z_.2)):=by - apply (nhds_basis_Ioo_pos (g z_.2)).tendsto_right_iff.mpr + Tendsto (fun n ↦ g (alg.z (α n)).snd) atTop (𝓝 (g z_.snd)):=by + apply (nhds_basis_Ioo_pos (g z_.snd)).tendsto_right_iff.mpr rintro ε epos simp only [Ioo] - have lef:∀ᶠ (x : ℕ) in atTop, g (alg.z (α x)).2>g z_.2-ε:= by - have semi: ∀ᶠ x' in 𝓝 z_.2, g z_.2 -ε < g x':= by - apply alg.hg z_.2 + have lef:∀ᶠ (x : ℕ) in atTop, g (alg.z (α x)).snd>g z_.snd-ε:= by + have semi: ∀ᶠ x' in 𝓝 z_.snd, g z_.snd -ε < g x':= by + apply alg.hg z_.snd linarith - have :Tendsto (fun n↦ (alg.z (α n)).2) atTop (𝓝 z_.2):= Tendsto.snd_nhds conv + have : Tendsto (fun n ↦ (alg.z (α n)).snd) atTop (𝓝 z_.snd) := + ((WithLp.continuous_snd (p := (2 : ENNReal)) (α := E) (β := F)).tendsto z_).comp conv exact this semi - have rig:∀ᶠ (x : ℕ) in atTop, g (alg.z (α x)).2 1) (ck: ∀ k, alg.c k = 1 / (γ * l)) (dk: linarith [ieq,this] have Hbd :∃C,∀q:ℕ ,‖(grad_snd H (alg.x (α q )) (alg.y (α q -1)))‖≤C:= by rcases isBounded_iff_forall_norm_le.mp bd with ⟨C1,inin⟩ - have con11H : ContinuousOn (fun (x,y) ↦ grad_snd H x y) + have con11H : ContinuousOn (fun z : WithLp 2 (E × F) ↦ grad_snd H z.fst z.snd) (Metric.closedBall (0:WithLp 2 (E×F)) (2*C1)) := by apply Continuous.continuousOn exact LipschitzWith.continuous (lip_grad_snd_of_lip alg.Hdiff alg.lip) rcases @IsCompact.exists_bound_of_continuousOn (WithLp 2 (E×F)) F _ _ _ (isCompact_closedBall (0:WithLp 2 (E×F)) (2*C1)) - (fun (x,y)↦grad_snd H x y) con11H with ⟨C,sqsq⟩ + (fun z : WithLp 2 (E × F) ↦ grad_snd H z.fst z.snd) con11H with ⟨C,sqsq⟩ use C rintro q - have :(alg.x (α q ),alg.y (α q -1))∈Metric.closedBall (0:WithLp 2 (E×F)) (2*C1) := by + have : ((alg.x (α q),alg.y (α q -1)) : WithLp 2 (E × F)) + ∈ Metric.closedBall (0:WithLp 2 (E×F)) (2*C1) := by apply mem_closedBall_iff_norm.mpr simp calc - @norm (WithLp 2 (E × F)) (WithLp.instProdNorm 2 E F) (alg.x (α q),alg.y (α q - 1)) ≤ + @norm (WithLp 2 (E × F)) (WithLp.instProdNorm 2 E F) + ((alg.x (α q),alg.y (α q - 1)) : WithLp 2 (E × F)) ≤ ‖alg.x (α q)‖+‖alg.y (α q - 1)‖ := by apply prod_norm_le_block_sum_L2 _≤‖alg.z (α q)‖+‖alg.z (α q -1)‖:=by have :‖alg.y (α q -1)‖≤‖alg.z (α q -1)‖:= by @@ -607,12 +686,12 @@ lemma gconv (γ : ℝ) (hγ : γ > 1) (ck: ∀ k, alg.c k = 1 / (γ * l)) (dk: linarith _≤C1+C1:=by apply add_le_add - apply inin + apply inin ((alg.x (α q), alg.y (α q)) : WithLp 2 (E × F)) exact mem_image_of_mem alg.z trivial - apply inin + apply inin ((alg.x (α q - 1), alg.y (α q - 1)) : WithLp 2 (E × F)) exact mem_image_of_mem alg.z trivial _=2*C1:=Eq.symm (two_mul C1) - have hhhh:= sqsq (alg.x (α q ),alg.y (α q -1)) this + have hhhh := sqsq ((alg.x (α q), alg.y (α q - 1)) : WithLp 2 (E × F)) this simp at hhhh exact hhhh rcases Hbd with ⟨C,hbd⟩ @@ -646,33 +725,33 @@ lemma gconv (γ : ℝ) (hγ : γ > 1) (ck: ∀ k, alg.c k = 1 / (γ * l)) (dk: linarith [a1leab] rw [eqq] at this assumption - have diflte2:∀ ε>0, ∀ᶠ (q : ℕ) in atTop,‖z_.2 - alg.y (α q - 1)‖ <ε:= by + have diflte2:∀ ε>0, ∀ᶠ (q : ℕ) in atTop,‖z_.snd - alg.y (α q - 1)‖ <ε:= by rintro ε epos - have : ∀ᶠ (q : ℕ) in atTop,‖z_.2 - alg.y (α q )‖ <ε/2:= by + have : ∀ᶠ (q : ℕ) in atTop,‖z_.snd - alg.y (α q )‖ <ε/2:= by rcases (atTop_basis.tendsto_iff (@Metric.nhds_basis_ball _ _ z_)).mp conv (ε/2) (half_pos epos) with ⟨n1,_,ieq1⟩ simp [dist_eq_norm] at ieq1;simp use n1 rintro b n1leb calc - ‖z_.2 - alg.y (α b)‖≤‖z_ - alg.z (α b)‖ :=by + ‖z_.snd - alg.y (α b)‖≤‖z_ - alg.z (α b)‖ :=by rw [WithLp.prod_norm_eq_of_L2] simp - refine (Real.le_sqrt (norm_nonneg (z_.2 - alg.y (α b))) - (Left.add_nonneg (sq_nonneg ‖z_.1 - alg.x (α b)‖) - (sq_nonneg ‖z_.2 - alg.y (α b)‖ ))).mpr - (le_add_of_nonneg_left (sq_nonneg ‖z_.1 - alg.x (α b)‖)) + refine (Real.le_sqrt (norm_nonneg (z_.snd - alg.y (α b))) + (Left.add_nonneg (sq_nonneg ‖z_.fst - alg.x (α b)‖) + (sq_nonneg ‖z_.snd - alg.y (α b)‖ ))).mpr + (le_add_of_nonneg_left (sq_nonneg ‖z_.fst - alg.x (α b)‖)) _<ε/2:=by rw [norm_sub_rev] exact ieq1 b n1leb - have :∀ᶠ (q : ℕ) in atTop,‖z_.2 - alg.y (α q )‖ <ε/2∧‖alg.y (α q) - alg.y (α q - 1)‖ <ε/2 + have :∀ᶠ (q : ℕ) in atTop,‖z_.snd - alg.y (α q )‖ <ε/2∧‖alg.y (α q) - alg.y (α q - 1)‖ <ε/2 := Eventually.and this (diflte1 (ε/2) (half_pos epos)) apply Eventually.mono this rintro x ⟨h1,h2⟩ calc - ‖z_.2 - alg.y (α x - 1)‖=‖z_.2 - alg.y (α x )+(alg.y (α x) - alg.y (α x -1))‖:= by + ‖z_.snd - alg.y (α x - 1)‖=‖z_.snd - alg.y (α x )+(alg.y (α x) - alg.y (α x -1))‖:= by simp - _≤‖z_.2 - alg.y (α x)‖+‖alg.y (α x) - alg.y (α x - 1)‖:= by + _≤‖z_.snd - alg.y (α x)‖+‖alg.y (α x) - alg.y (α x - 1)‖:= by apply norm_add_le _<ε/2+ε/2:= by linarith [h1,h2] _=ε := by exact add_halves ε @@ -749,23 +828,21 @@ lemma gconv (γ : ℝ) (hγ : γ > 1) (ck: ∀ k, alg.c k = 1 / (γ * l)) (dk: have h1:∀ᶠ (q : ℕ) in atTop,|⟪alg.y (α q) - alg.y (α q - 1), alg.d (α q - 1) • grad_snd H (alg.x (α q )) (alg.y (α q - 1))⟫| ≤ε / (γ * ↑l) / 3 := this (fun q↦alg.y (α q) - alg.y (α q - 1)) (diflte1) (ε/(γ*l)/3) finalpos - have h2: ∀ᶠ (q : ℕ) in atTop,|⟪z_.2 - alg.y (α q - 1), alg.d (α q - 1) • grad_snd H (alg.x (α q )) + have h2: ∀ᶠ (q : ℕ) in atTop,|⟪z_.snd - alg.y (α q - 1), alg.d (α q - 1) • grad_snd H (alg.x (α q )) (alg.y (α q - 1))⟫| ≤ ε / (γ * ↑l) / 3:= - this (fun q↦z_.2 - alg.y (α q - 1)) diflte2 (ε/(γ*l)/3) finalpos - have h3: ∀ᶠ (q : ℕ) in atTop,‖z_.2 - alg.y (α q - 1)‖ ^ 2 / 2<(ε/(γ*l)/3):= by + this (fun q↦z_.snd - alg.y (α q - 1)) diflte2 (ε/(γ*l)/3) finalpos + have h3: ∀ᶠ (q : ℕ) in atTop,‖z_.snd - alg.y (α q - 1)‖ ^ 2 / 2<(ε/(γ*l)/3):= by refine Eventually.mono (diflte2 (√(2*(ε/(γ*l)/3))) ?_) ?_ apply Real.sqrt_pos_of_pos apply mul_pos;norm_num;apply finalpos intro x assx - have :‖z_.2 - alg.y (α x - 1)‖^2<(2*(ε/(γ*l)/3)):= by + have :‖z_.snd - alg.y (α x - 1)‖^2<(2*(ε/(γ*l)/3)):= by refine (Real.lt_sqrt ?hy).mp ?_ apply norm_nonneg exact assx calc - ‖z_.2 - alg.y (α x - 1)‖ ^ 2 / 2<(2*(ε/(γ*l)/3))/2:= by - apply (div_lt_div_right _).mpr - apply this - linarith + ‖z_.snd - alg.y (α x - 1)‖ ^ 2 / 2<(2*(ε/(γ*l)/3))/2:= by + nlinarith [this] _=(ε/(γ*l)/3):= by apply mul_div_cancel_left₀ linarith @@ -785,17 +862,18 @@ lemma gconv (γ : ℝ) (hγ : γ > 1) (ck: ∀ k, alg.c k = 1 / (γ * l)) (dk: apply monoa linarith [Nat.le_of_add_right_le mleq] linarith - have key:1 / (γ * ↑l) * g (alg.y (α q)) <1 / (γ * ↑l) * g z_.2 +ε / (γ * ↑l):= by + have key:1 / (γ * ↑l) * g (alg.y (α q)) <1 / (γ * ↑l) * g z_.snd +ε / (γ * ↑l):= by linarith [ieq q this,(abs_le.mp (ie1 q (m1le.trans mleq))).1,(abs_le.mp (ie2 q (m2le.trans mleq))).2, ie3 q (m3le.trans mleq),add_thirds (ε / (γ * ↑l))] have ltt:0<γ*l:= by apply mul_pos;linarith;linarith [alg.lpos] calc - g (alg.z (α q)).2=g (alg.y (α q)):= rfl + g (alg.z (α q)).snd = g (alg.y (α q)):= rfl _=(γ * ↑l)*(1 / (γ * ↑l) * g (alg.y (α q))):= by rw [←mul_assoc,mul_one_div_cancel (LT.lt.ne ltt).symm,one_mul] - _<(γ * ↑l)*(1 / (γ * ↑l) * g z_.2 + ε / (γ * ↑l)):=(mul_lt_mul_left ltt).mpr key - _=g z_.2 + ε:=by + _ < (γ * ↑l) * (1 / (γ * ↑l) * g z_.snd + ε / (γ * ↑l)) := by + exact mul_lt_mul_of_pos_left key ltt + _=g z_.snd + ε:=by rw [mul_add, ← mul_assoc, mul_one_div_cancel (LT.lt.ne ltt).symm, one_mul, mul_div_cancel₀ _ (LT.lt.ne ltt).symm] exact Eventually.and lef rig @@ -819,20 +897,12 @@ lemma limitset_property_1 (γ : ℝ) (hγ : γ > 1) --nonempty have hz : ∀ (n : ℕ), alg.z n ∈ alg.z '' univ:= by intro n; use n; constructor; exact Set.mem_univ n; rfl rcases (tendsto_subseq_of_bounded (bd) (hz)) with ⟨a, _ , φ, ⟨hmφ,haφ⟩⟩ - use a; simp [limit_set] - rw [mapClusterPt_iff]; intro s hs - apply Filter.frequently_iff.mpr - intro U hU; rw [Filter.mem_atTop_sets] at hU - rcases hU with ⟨ax,hax⟩; rw [mem_nhds_iff] at hs - obtain ⟨t, t_s, ⟨isopent,a_t⟩⟩ := hs - rw [tendsto_atTop_nhds] at haφ - specialize haφ t a_t isopent - rcases haφ with ⟨N,hN⟩ - let n := N + ax - use φ n - constructor; apply hax - apply le_trans (Nat.le_add_left ax N); apply StrictMono_nat; exact hmφ - exact t_s (hN n (by simp[n])) + use a + simp [limit_set, MapClusterPt] + apply ClusterPt.mono (ClusterPt.of_le_nhds haφ) + calc + _ = map (fun n ↦ alg.z n) (map φ atTop) := by rw [map_map] + _ ≤ map (fun n ↦ alg.z n) atTop := map_mono (StrictMono.tendsto_atTop hmφ) --the folllowing shows that limit_set BCD.z ⊆ critial_point BCD.ψ intro z_ ha rcases TopologicalSpace.FirstCountableTopology.tendsto_subseq ha with ⟨φ,monoφ,conv⟩ @@ -860,16 +930,15 @@ lemma limitset_property_1 (γ : ℝ) (hγ : γ > 1) calc _ ≤ ρ * ‖alg.z (φ (b + 1)) - alg.z (φ (b + 1) - 1)‖:= (key b).2 _ < ρ * (ε / ρ) := by - apply (mul_lt_mul_left ρpos).mpr have : ‖alg.z (φ (b + 1)-1 + 1) - alg.z (φ (b + 1) - 1)‖ < ε / ρ := by apply ieq; apply aleb.trans exact Nat.sub_le_sub_right (StrictMono_nat φ monoφ (b+1)) 1 simp [subadd b] at this - exact this + exact mul_lt_mul_of_pos_left this ρpos _ = ε := by rw [mul_comm, div_mul_cancel₀]; linarith [ρpos] lemma limitset_property_2 (bd : Bornology.IsBounded (alg.z '' univ)) : - Tendsto (fun n ↦ (EMetric.infEdist (alg.z n) (limit_set alg.z)).toReal) atTop (𝓝 0) := by + Tendsto (fun n ↦ (Metric.infEDist (alg.z n) (limit_set alg.z)).toReal) atTop (𝓝 0) := by apply (nhds_basis_Ioo_pos 0).tendsto_right_iff.mpr rintro ε epos; by_contra h; simp at h --alg.z∘W is the subseq s.t. the dist is no less than ε @@ -894,14 +963,14 @@ lemma limitset_property_2 (bd : Bornology.IsBounded (alg.z '' univ)) : _ ≤ map (fun n ↦ alg.z n) atTop := by rw [← map_map]; apply map_mono (StrictMono.tendsto_atTop monoW) -- show the contradiction - have z_ge : (EMetric.infEdist z_ (limit_set alg.z)).toReal ≥ ε := by + have z_ge : (Metric.infEDist z_ (limit_set alg.z)).toReal ≥ ε := by apply ge_of_tendsto (continuous_iff_seqContinuous.mp (Metric.continuous_infDist_pt (limit_set alg.z)) conv) simp; use 1; rintro n len rw [← tsub_add_cancel_iff_le.mpr (Nat.one_le_of_lt (monoa len))] apply (Classical.choose_spec (h (W (α n -1) +1))).2 apply lt_of_le_of_lt' (ENNReal.toReal_nonneg) (neg_neg_iff_pos.mpr epos) - linarith [(ENNReal.toReal_eq_zero_iff _).mpr (by left; exact EMetric.infEdist_zero_of_mem z_in)] + linarith [(ENNReal.toReal_eq_zero_iff _).mpr (by left; exact Metric.infEDist_zero_of_mem z_in)] lemma limitset_property_3 (γ : ℝ) (hγ : γ > 1) (ck : ∀ k, alg.c k = 1 / (γ * l)) (dk : ∀ k, alg.d k = 1 / (γ * l)) @@ -930,26 +999,34 @@ lemma limitset_property_3 (γ : ℝ) (hγ : γ > 1) simp [A, B] rwa [← inter_inter_distrib_left (limit_set z) a b] -- ω is a function that shows the relation between z and A,B - let ω : WithLp 2 (E × F) -> ℝ := fun z => ((EMetric.infEdist z A).toReal) / - ((EMetric.infEdist z A).toReal+(EMetric.infEdist z B).toReal) - have sum_ne_zero : ∀ z, (EMetric.infEdist z A).toReal + (EMetric.infEdist z B).toReal ≠ 0:= by + let ω : WithLp 2 (E × F) -> ℝ := fun z => ((Metric.infEDist z A).toReal) / + ((Metric.infEDist z A).toReal+(Metric.infEDist z B).toReal) + have sum_ne_zero : ∀ z, (Metric.infEDist z A).toReal + (Metric.infEDist z B).toReal ≠ 0:= by intro z eq0 - have inA : z ∈ A := by - apply EMetric.mem_closure_iff_infEdist_zero.mpr - have : (EMetric.infEdist z A).toReal = 0 := by - linarith [eq0, @ENNReal.toReal_nonneg (EMetric.infEdist z A), - @ENNReal.toReal_nonneg (EMetric.infEdist z B)] + have hAclosed : IsClosed A := by + dsimp [A] + exact IsClosed.inter isClosed_setOf_clusterPt closea + have hzA : z ∈ closure A := by + apply Metric.mem_closure_iff_infEDist_zero.mpr + have : (Metric.infEDist z A).toReal = 0 := by + linarith [eq0, @ENNReal.toReal_nonneg (Metric.infEDist z A), + @ENNReal.toReal_nonneg (Metric.infEDist z B)] exact (((fun {x y} hx hy ↦ (ENNReal.toReal_eq_toReal_iff' hx hy).mp) - ENNReal.top_ne_zero.symm (Metric.infEdist_ne_top nez_a) (id (Eq.symm this)))).symm - simp; constructor; rw [isOpen_compl_iff]; apply IsClosed.inter isClosed_setOf_clusterPt closea - have inB : z ∈ B :=by - apply EMetric.mem_closure_iff_infEdist_zero.mpr - have : (EMetric.infEdist z B).toReal = 0 := by - linarith [eq0, @ENNReal.toReal_nonneg (EMetric.infEdist z A), - @ENNReal.toReal_nonneg (EMetric.infEdist z B)] + ENNReal.top_ne_zero.symm (Metric.infEDist_ne_top nez_a) (id (Eq.symm this)))).symm + have inA : z ∈ A := by + simpa [hAclosed.closure_eq] using hzA + have hBclosed : IsClosed B := by + dsimp [B] + exact IsClosed.inter isClosed_setOf_clusterPt closeb + have hzB : z ∈ closure B := by + apply Metric.mem_closure_iff_infEDist_zero.mpr + have : (Metric.infEDist z B).toReal = 0 := by + linarith [eq0, @ENNReal.toReal_nonneg (Metric.infEDist z A), + @ENNReal.toReal_nonneg (Metric.infEDist z B)] exact (((fun {x y} hx hy ↦ (ENNReal.toReal_eq_toReal_iff' hx hy).mp) - ENNReal.top_ne_zero.symm (Metric.infEdist_ne_top nez_b) (id (Eq.symm this)))).symm - simp; constructor; rw [isOpen_compl_iff]; apply IsClosed.inter isClosed_setOf_clusterPt closeb + ENNReal.top_ne_zero.symm (Metric.infEDist_ne_top nez_b) (id (Eq.symm this)))).symm + have inB : z ∈ B := by + simpa [hBclosed.closure_eq] using hzB obtain hzin : z ∈ A ∩ B := mem_inter inA inB rw [disjoint_AB] at hzin; contradiction have contω : Continuous ω := by @@ -959,10 +1036,10 @@ lemma limitset_property_3 (γ : ℝ) (hγ : γ > 1) let V := {z : WithLp 2 (E × F) | (3 / 4) < (ω z)} have A0 : ∀ z_ ∈ A, ω z_ = 0 := by rintro z_ zA; rw [div_eq_zero_iff]; left - rw [EMetric.infEdist_zero_of_mem zA]; rfl + rw [Metric.infEDist_zero_of_mem zA]; rfl have B1 : ∀ z_ ∈ B, ω z_ = 1 := by rintro z_ zB; simp [ω]; apply (div_eq_one_iff_eq (sum_ne_zero z_)).mpr; simp - rw [EMetric.infEdist_zero_of_mem zB]; rfl + rw [Metric.infEDist_zero_of_mem zB]; rfl --eventually alg.z falls in U or V have U_V_prop : ∃ k0, ∀ k, (k0 ≤ k) -> (alg.z k ∈ U) ∨ (alg.z k ∈ V) := by by_contra h @@ -1085,10 +1162,15 @@ lemma limitset_property_4 (γ : ℝ) (hγ : γ > 1) have decent_ψ : ∃ ψ_final, Tendsto (alg.ψ ∘ alg.z) Filter.atTop (nhds ψ_final) := by have monopsi : Antitone (alg.ψ ∘ alg.z) := antitone_nat_of_succ_le (Sufficient_Descent2 γ hγ ck dk) - rcases tendsto_of_antitone monopsi with h1 | h2 - obtain notbd := unbounded_of_tendsto_atBot h1 - apply absurd notbd; push_neg - exact BddBelow.mono (by simp; apply range_comp_subset_range) lbdψ; exact h2 + rcases tendsto_atTop_of_antitone monopsi with h1 | h2 + · rcases lbdψ with ⟨m, hm⟩ + rcases (Filter.tendsto_atTop_atBot.mp h1) (m - 1) with ⟨N, hN⟩ + have hmN : m ≤ (alg.ψ ∘ alg.z) N := hm (by + refine ⟨alg.z N, ?_, rfl⟩ + exact mem_univ _) + have hle : (alg.ψ ∘ alg.z) N ≤ m - 1 := hN N le_rfl + linarith + · exact h2 rcases decent_ψ with ⟨ψ_final, hψ⟩ -- show that ψ_final is what we need use ψ_final; intro z_1 hz_1 @@ -1138,8 +1220,8 @@ private lemma ENNReal.mul_pos_real {a : ℝ} {b : ENNReal} (ha : a > 0) (hm : 1 theorem Limited_length (γ : ℝ) (hγ : γ > 1) (ck : ∀ k, alg.c k = 1 / (γ * l)) (dk : ∀ k, alg.d k = 1 / (γ * l)) (bd : Bornology.IsBounded (alg.z '' univ)) (hψ : KL_function alg.ψ) - (lbdψ : BddBelow (alg.ψ '' univ)): ∃ M : ℝ, ∀ n, - ∑ k in Finset.range n, ‖alg.z (k + 1) - alg.z k‖ ≤ M := by + (lbdψ : BddBelow (alg.ψ '' univ)) : + ∃ M : ℝ, ∀ n, (Finset.range n).sum (fun k ↦ ‖alg.z (k + 1) - alg.z k‖) ≤ M := by have :∃ z_∈ closure (alg.z '' univ), ∃ α:ℕ → ℕ,StrictMono α∧Tendsto (fun n ↦ alg.z (α n)) atTop (𝓝 z_):= by have hcs : IsSeqCompact (closure (alg.z '' univ)) := by @@ -1187,8 +1269,8 @@ theorem Limited_length (γ : ℝ) (hγ : γ > 1) _ = alg.ψ (alg.z (α l1 + (k - α l1))):= by congr; exact Eq.symm (Nat.add_sub_of_le kge) _ ≤ alg.ψ (alg.z (α l1)) := by apply monopsi - _ < alg.ψ z_ + η := (ieq l1 left_mem_Ici).2 - have L2 : ∀ ε > 0, ∃ l2, ∀k > l2, (EMetric.infEdist (alg.z k) (limit_set alg.z)).toReal< ε := by + _ < alg.ψ z_ + η := (ieq l1 Set.self_mem_Ici).2 + have L2 : ∀ ε > 0, ∃ l2, ∀k > l2, (Metric.infEDist (alg.z k) (limit_set alg.z)).toReal< ε := by rintro ε epos rcases limitset_property_2 bd with tendt rcases (atTop_basis.tendsto_iff (nhds_basis_abs_sub_lt (0:ℝ))).mp tendt ε epos with ⟨l2,_,ieq⟩ @@ -1216,7 +1298,7 @@ theorem Limited_length (γ : ℝ) (hγ : γ > 1) rw [heq x xin,heq y yin] have kl: ∃ ε ∈ Set.Ioi (0 : ℝ), ∃ η ∈ Set.Ioi (0 : ℝ), ∃ φ ∈ desingularizing_function η, ∃ LL, ∀ n > LL, (alg.ψ z_ < alg.ψ (alg.z n) ∧ alg.ψ (alg.z n) < alg.ψ z_ + η) ∧ ENNReal.ofReal (deriv φ (alg.ψ (alg.z n) - - alg.ψ z_)) * EMetric.infEdist 0 (subdifferential alg.ψ (alg.z n)) ≥ 1 := by + - alg.ψ z_)) * Metric.infEDist 0 (subdifferential alg.ψ (alg.z n)) ≥ 1 := by rcases uniformized_KL_property (limitset_property_3 γ hγ ck dk bd lbdψ).1 wklpt cons with ⟨ε, eppos, η, etpos, φ, hφ, pro⟩ rcases L1 η etpos with ⟨l1,lem1⟩ @@ -1258,25 +1340,26 @@ theorem Limited_length (γ : ℝ) (hγ : γ > 1) exact h6 _ (hlin n) have hbd2 : 1 ≤ ρ * (c n) * d n := by obtain ⟨dpsi, hdp, hub⟩ := hsgub (n + LL) - obtain hdp := subdifferential_subset _ _ hdp - have := infEdist_bound _ hdp - apply (ENNReal.ofReal_le_ofReal_iff _).1 - rw [ENNReal.ofReal_mul] - show ENNReal.ofReal (ρ * c n) * ENNReal.ofReal (d n) ≥ ENNReal.ofReal 1 - calc - _ ≥ (ENNReal.ofReal ‖dpsi‖) * ENNReal.ofReal (d n) := by - repeat rw [← ENNReal.ofReal_mul] - apply (ENNReal.ofReal_le_ofReal_iff _).2 - apply (mul_le_mul_iff_of_pos_right hposd).mpr hub - field_simp; simp [c]; simp; field_simp; simp [c] - _ ≥ (EMetric.infEdist 0 (subdifferential ψ (z (n + LL + 1)))) * ENNReal.ofReal (d n) := by - apply mul_le_mul_right' this - _ ≥ 1 := by rw [mul_comm]; exact (ieq (n + LL + 1) (by linarith)).2 - simp - field_simp - simp [c] - field_simp - simp [c] + have hdp' : dpsi ∈ subdifferential alg.ψ (alg.z (n + LL + 1)) := + subdifferential_subset (alg.ψ) (alg.z (n + LL + 1)) hdp + let S := subdifferential alg.ψ (alg.z (n + LL + 1)) + have h_inf_edist : Metric.infEDist 0 S ≤ edist 0 dpsi := Metric.infEDist_le_edist_of_mem hdp' + have h_inf : Metric.infEDist 0 S ≤ ENNReal.ofReal ‖dpsi‖ := by + simpa [edist_dist] using h_inf_edist + have h_inf_real : (Metric.infEDist 0 S).toReal ≤ ‖dpsi‖ := by + exact ENNReal.toReal_le_of_le_ofReal (norm_nonneg _) h_inf + have h_en : 1 ≤ ENNReal.ofReal (d n) * Metric.infEDist 0 S := by + simpa [S] using (ieq (n + LL + 1) (by linarith)).2 + have h_real : d n * (Metric.infEDist 0 S).toReal ≥ 1 := by + exact ENNReal.mul_pos_real hposd h_en (lt_top_iff_ne_top.mpr (Metric.infEDist_ne_top (by + refine ⟨dpsi, hdp'⟩))) + have hub' : ‖dpsi‖ ≤ ρ * c n := by + simpa [c] using hub + have h_mul1 : d n * (Metric.infEDist 0 S).toReal ≤ d n * ‖dpsi‖ := by + exact mul_le_mul_of_nonneg_left h_inf_real (le_of_lt hposd) + have h_mul2 : d n * ‖dpsi‖ ≤ d n * (ρ * c n) := by + exact mul_le_mul_of_nonneg_left hub' (le_of_lt hposd) + nlinarith [h_real, h_mul1, h_mul2] have hsd : ρ1 / 2 * (c (n + 1)) ^ 2 ≤ b n := by obtain h := suff_des.2 (n + LL + 1) rw [add_right_comm n LL 1] at h @@ -1356,19 +1439,27 @@ theorem Limited_length (γ : ℝ) (hγ : γ > 1) calc _ ≤ alg.ψ (alg.z i) -alg.ψ (alg.z (i + 1)) := suff_des.2 i _ = 0 := by simp [this i ige,this (i+1) (Nat.le_add_right_of_le ige)] - apply dist_eq_zero.mp (by rw [NormedAddCommGroup.dist_eq, this]) - use ∑ k in Finset.range N, ‖alg.z (k + 1) - alg.z k‖ - intro n; by_cases nlen : n ≤ N - · refine Finset.sum_le_sum_of_subset_of_nonneg (GCongr.finset_range_subset_of_le nlen) ?_ - exact fun a _ _ ↦norm_nonneg (alg.z (a + 1) - alg.z a) - push_neg at nlen - have eq0 : ∑ i in (Finset.range n \ Finset.range N), ‖alg.z (i + 1) - alg.z i‖ = 0 := by - apply Finset.sum_eq_zero; rintro x xin; simp at xin - exact norm_sub_eq_zero_iff.mpr (eq0 x xin.2) - refine Finset.sum_sdiff_le_sum_sdiff.mp ?_ - rw [eq0, Finset.sdiff_eq_empty_iff_subset.mpr - (GCongr.finset_range_subset_of_le (Nat.le_of_succ_le nlen))] - exact Preorder.le_refl 0 + exact sub_eq_zero.mp (norm_eq_zero.mp this) + use (∑ k ∈ Finset.range N, ‖alg.z (k + 1) - alg.z k‖) + intro n + by_cases nlen : n ≤ N + · refine Finset.sum_le_sum_of_subset_of_nonneg ((Finset.range_subset_range).2 nlen) ?_ + exact fun a _ _ ↦ norm_nonneg (alg.z (a + 1) - alg.z a) + · have nlt : N < n := lt_of_not_ge nlen + have hsub : Finset.range N ⊆ Finset.range n := (Finset.range_subset_range).2 (Nat.le_of_lt nlt) + have hsum0 : ∑ i ∈ (Finset.range n \ Finset.range N), ‖alg.z (i + 1) - alg.z i‖ = 0 := by + apply Finset.sum_eq_zero + intro x hx + have hxN : N ≤ x := by + have hxnot : x ∉ Finset.range N := (Finset.mem_sdiff.mp hx).2 + have hxlt : ¬ x < N := by simpa [Finset.mem_range] using hxnot + exact Nat.le_of_not_lt hxlt + exact norm_sub_eq_zero_iff.mpr (eq0 x hxN) + have hsplit := Finset.sum_sdiff hsub (f := fun k ↦ ‖alg.z (k + 1) - alg.z k‖) + have hsumEq : ∑ k ∈ Finset.range n, ‖alg.z (k + 1) - alg.z k‖ + = ∑ k ∈ Finset.range N, ‖alg.z (k + 1) - alg.z k‖ := by + linarith [hsplit, hsum0] + exact le_of_eq hsumEq theorem Convergence_to_critpt (γ : ℝ) (hγ : γ > 1) (ck : ∀ k, alg.c k = 1 / (γ * l)) (dk : ∀ k, alg.d k = 1 / (γ * l)) @@ -1377,15 +1468,19 @@ theorem Convergence_to_critpt (γ : ℝ) (hγ : γ > 1) z_ ∈ (critial_point alg.ψ) ∧ Tendsto alg.z atTop (𝓝 z_):= by have : ∃ z_, Tendsto alg.z atTop (𝓝 z_) := by apply cauchySeq_tendsto_of_complete - apply cauchySeq_of_summable_dist - rcases Limited_length γ hγ ck dk bd hψ lbdψ with ⟨M,sumle⟩ - apply @summable_of_sum_range_le _ M _ _ - intro n; simp; exact dist_nonneg - intro n - calc - _ = ∑ k ∈ Finset.range n, ‖alg.z (k + 1) - alg.z k‖ := - Finset.sum_congr rfl fun x _ ↦ (dist_eq_norm' (alg.z x) (alg.z x.succ)) - _ ≤ M := sumle n + refine cauchySeq_of_summable_dist ?_ + rcases Limited_length γ hγ ck dk bd hψ lbdψ with ⟨M, sumle⟩ + refine summable_of_sum_range_le (c := M) ?_ ?_ + · intro n + exact dist_nonneg + · intro n + calc + ∑ k ∈ Finset.range n, dist (alg.z k) (alg.z k.succ) + = ∑ k ∈ Finset.range n, ‖alg.z (k + 1) - alg.z k‖ := by + apply Finset.sum_congr rfl + intro x hx + simpa using (dist_eq_norm' (alg.z x) (alg.z x.succ)) + _ ≤ M := sumle n rcases this with ⟨z_,hzz⟩ refine' ⟨z_, _, hzz⟩ have z_in : z_ ∈ limit_set alg.z := by diff --git a/Optlib/Algorithm/BCD/Scheme.lean b/Optlib/Algorithm/BCD/Scheme.lean index acc6539..f65235f 100644 --- a/Optlib/Algorithm/BCD/Scheme.lean +++ b/Optlib/Algorithm/BCD/Scheme.lean @@ -32,20 +32,20 @@ variable [NormedAddCommGroup E] [InnerProductSpace ℝ E] variable [NormedAddCommGroup F] [InnerProductSpace ℝ F] variable {H : WithLp 2 (E × F) → ℝ} -lemma diff_from_l2 (h : Differentiable ℝ H) : @Differentiable ℝ _ (E × F) _ _ ℝ _ _ H := by +lemma diff_from_l2 (h : Differentiable ℝ H) : Differentiable ℝ (fun z : E × F ↦ H z) := by apply Differentiable.comp h apply IsBoundedLinearMap.differentiable exact instIsBoundedLinearMapL2equiv theorem diff_prod₁ (h : Differentiable ℝ H) (y : F) : - Differentiable ℝ (fun x ↦ H (x, y)) := by + Differentiable ℝ (fun x : E ↦ H (x, y)) := by apply Differentiable.comp (diff_from_l2 h) - exact Differentiable.prod differentiable_id' (differentiable_const y) + exact Differentiable.prodMk differentiable_id (differentiable_const y) theorem diff_prod₂ (h : Differentiable ℝ H) (x : E) : - Differentiable ℝ (fun y ↦ H (x, y)) := by + Differentiable ℝ (fun y : F ↦ H (x, y)) := by apply Differentiable.comp (diff_from_l2 h) - exact Differentiable.prod (differentiable_const x) differentiable_id' + exact Differentiable.prodMk (differentiable_const x) differentiable_id end diff @@ -59,104 +59,69 @@ variable {H : WithLp 2 (E × F) → ℝ} {x : E} {y : F} {z : WithLp 2 (E × F)} open Set Bornology Filter BigOperators Topology /- The gradient of the first component -/ -def grad_fst (H : WithLp 2 (E × F) → ℝ) (y : F) : E → E := gradient (fun t ↦ H (t, y)) +def grad_fst (H : WithLp 2 (E × F) → ℝ) (y : F) : E → E := + fun x : E ↦ (gradient H (WithLp.toLp 2 (x, y))).fst /- The gradient function of the second component -/ -def grad_fun_fst (H : WithLp 2 (E × F) → ℝ) := fun (x, y) ↦ (grad_fst H y x) +def grad_fun_fst (H : WithLp 2 (E × F) → ℝ) := fun z : WithLp 2 (E × F) ↦ grad_fst H z.snd z.fst /- The gradient of the second component -/ -def grad_snd (H : WithLp 2 (E × F) → ℝ) (x : E) : F → F := gradient (fun t ↦ H (x, t)) +def grad_snd (H : WithLp 2 (E × F) → ℝ) (x : E) : F → F := + fun y : F ↦ (gradient H (WithLp.toLp 2 (x, y))).snd /- The gradient function of the second component -/ -def grad_fun_snd (H : WithLp 2 (E × F) → ℝ) := fun (x, y) ↦ (grad_snd H x y) +def grad_fun_snd (H : WithLp 2 (E × F) → ℝ) := fun z : WithLp 2 (E × F) ↦ grad_snd H z.fst z.snd /- The gradient of the prod domain -/ def grad_comp (H : WithLp 2 (E × F) → ℝ) (z : WithLp 2 (E × F)) : WithLp 2 (E × F) := - (WithLp.equiv 2 (E × F)).symm (grad_fst H z.2 z.1, grad_snd H z.1 z.2) + WithLp.toLp 2 (grad_fst H z.snd z.fst, grad_snd H z.fst z.snd) /- The gradient function of the prod domain -/ def grad_fun_comp (H : WithLp 2 (E × F) → ℝ) := fun z ↦ (grad_comp H z) -theorem grad_fst_eq (h : Differentiable ℝ H) (z : WithLp 2 (E × F)) : - (gradient H z).1 = grad_fst H z.2 z.1 := by - have h₁ : HasGradientAt (fun x ↦ H (x, z.2)) (grad_fst H z.2 z.1) z.1 := by - apply DifferentiableAt.hasGradientAt - apply diff_prod₁ h - have h₂ : HasGradientAt (fun x ↦ H (x, z.2)) (gradient H z).1 z.1 := by - have h₃ : HasGradientAt H (gradient H z) z := DifferentiableAt.hasGradientAt (h z) - rw [hasGradientAt_iff_isLittleO, Asymptotics.isLittleO_iff] at h₃ ⊢ - intro c hc - specialize h₃ hc - obtain h₃' := Filter.Eventually.curry_nhds h₃ - rw [Filter.eventually_iff_exists_mem] at h₃' ⊢ - rcases h₃' with ⟨v, ⟨hv1, hv2⟩⟩ - use v - constructor - · exact hv1 - · intro y yv - specialize hv2 y yv - obtain hv2' := Filter.Eventually.self_of_nhds hv2 - have : z = (z.1, z.2) := rfl - rw [this] at hv2' - rw [Prod.mk_sub_mk y z.1 z.2 z.2] at hv2' - simp at hv2' - rw [norm_prod_right_zero] at hv2' - exact hv2' - exact HasGradientAt.unique h₂ h₁ - -theorem grad_snd_eq (h : Differentiable ℝ H) (z : WithLp 2 (E × F)) : - (gradient H z).2 = grad_snd H z.1 z.2 := by - have h₁ : HasGradientAt (fun y ↦ H (z.1, y)) (grad_snd H z.1 z.2) z.2 := by - apply DifferentiableAt.hasGradientAt - apply diff_prod₂ h - have h₂ : HasGradientAt (fun y ↦ H (z.1, y)) (gradient H z).2 z.2 := by - have h₃ : HasGradientAt H (gradient H z) z := DifferentiableAt.hasGradientAt (h z) - rw [hasGradientAt_iff_isLittleO, Asymptotics.isLittleO_iff] at h₃ ⊢ - intro c hc - specialize h₃ hc - obtain h₃' := Filter.Eventually.curry_nhds h₃ - obtain h₃'' := Filter.Eventually.self_of_nhds h₃' - rw [Filter.eventually_iff_exists_mem] at h₃'' ⊢ - rcases h₃'' with ⟨v, ⟨hv1, hv2⟩⟩ - use v - constructor - · exact hv1 - · intro y yv - specialize hv2 y yv - have : z = (z.1, z.2) := rfl - nth_rw 5 [this] at hv2 - simp at hv2 - nth_rw 6 [this] at hv2 - rw [Prod.mk_sub_mk z.1 z.1 y z.2] at hv2 - simp at hv2 - rw [norm_prod_left_zero] at hv2 - exact hv2 - exact HasGradientAt.unique h₂ h₁ - -theorem grad_eq_block_grad (h : Differentiable ℝ H) : gradient H = grad_fun_comp H := by - ext z - calc - gradient H z = ((gradient H z).1, (gradient H z).2) := rfl - _ = (grad_fst H z.2 z.1, grad_snd H z.1 z.2) := by rw [← grad_fst_eq h, ← grad_snd_eq h] - _ = grad_fun_comp H z := rfl - -theorem lip_grad_fst_of_lip (h : Differentiable ℝ H) (hl : LipschitzWith l (gradient H)) : - LipschitzWith l (fun (z : WithLp 2 (E × F)) ↦ grad_fst H z.2 z.1) := by - rw [lipschitzWith_iff_norm_sub_le] at * +theorem grad_fst_eq (_h : Differentiable ℝ H) (z : WithLp 2 (E × F)) : + (gradient H z).fst = grad_fst H z.snd z.fst := by + cases z + simp [grad_fst] + +theorem grad_snd_eq (_h : Differentiable ℝ H) (z : WithLp 2 (E × F)) : + (gradient H z).snd = grad_snd H z.fst z.snd := by + cases z + simp [grad_snd] + +theorem grad_eq_block_grad (_h : Differentiable ℝ H) : gradient H = grad_fun_comp H := by + funext z + cases z with + | toLp p => + dsimp [grad_fun_comp, grad_comp, grad_fst, grad_snd] + cases h : gradient H (WithLp.toLp 2 p) + rfl + +theorem lip_grad_fst_of_lip (_h : Differentiable ℝ H) (hl : LipschitzWith l (gradient H)) : + LipschitzWith l (fun (z : WithLp 2 (E × F)) ↦ grad_fst H z.snd z.fst) := by + rw [lipschitzWith_iff_norm_sub_le] at hl ⊢ intro z z' calc - _ = ‖(gradient H z).1 - (gradient H z').1‖ := by rw [grad_fst_eq h, grad_fst_eq h] - _ = ‖(gradient H z - gradient H z').1‖ := rfl + ‖grad_fst H z.snd z.fst - grad_fst H z'.snd z'.fst‖ = + ‖(gradient H z).fst - (gradient H z').fst‖ := by + cases z + cases z' + simp [grad_fst] + _ = ‖(gradient H z - gradient H z').fst‖ := by simp _ ≤ ‖(gradient H z - gradient H z')‖ := fst_norm_le_prod_L2 _ _ ≤ _ := hl z z' -theorem lip_grad_snd_of_lip (h : Differentiable ℝ H) (hl : LipschitzWith l (gradient H)) : - LipschitzWith l (fun (z : WithLp 2 (E × F)) ↦ grad_snd H z.1 z.2) := by - rw [lipschitzWith_iff_norm_sub_le] at * +theorem lip_grad_snd_of_lip (_h : Differentiable ℝ H) (hl : LipschitzWith l (gradient H)) : + LipschitzWith l (fun (z : WithLp 2 (E × F)) ↦ grad_snd H z.fst z.snd) := by + rw [lipschitzWith_iff_norm_sub_le] at hl ⊢ intro z z' calc - _ = ‖(gradient H z).2 - (gradient H z').2‖ := by rw [grad_snd_eq h, grad_snd_eq h] - _ = ‖(gradient H z - gradient H z').2‖ := rfl + ‖grad_snd H z.fst z.snd - grad_snd H z'.fst z'.snd‖ = + ‖(gradient H z).snd - (gradient H z').snd‖ := by + cases z + cases z' + simp [grad_snd] + _ = ‖(gradient H z - gradient H z').snd‖ := by simp _ ≤ ‖(gradient H z - gradient H z')‖ := snd_norm_le_prod_L2 _ _ ≤ _ := hl z z' @@ -174,18 +139,22 @@ variable {H : (WithLp 2 (E × F)) → ℝ} {x0 : E} {y0 : F} {l : NNReal} instance Proper_Prod : ProperSpace (WithLp 2 (E × F)) where isCompact_closedBall := by - rintro ⟨x, y⟩ r - obtain h := IsCompact.prod (isCompact_closedBall x r) (isCompact_closedBall y r) - have {a b : ℝ} : a ≤ √(a ^ 2 + b ^ 2) := by apply Real.le_sqrt_of_sq_le; linarith [sq_nonneg b] - have hsub : @Metric.closedBall (WithLp 2 (E × F)) _ ⟨x, y⟩ r - ⊆ Metric.closedBall x r ×ˢ Metric.closedBall y r := by - rintro ⟨x', y'⟩ hball - rw [mem_prod] - simp only [mem_closedBall_iff_norm, WithLp.prod_norm_eq_of_L2] at * - constructor - · exact le_trans this hball - · exact le_trans this ((add_comm (‖x' - x‖ ^ 2) _) ▸ hball) - apply IsCompact.of_isClosed_subset h (@Metric.isClosed_ball (WithLp 2 (E × F)) _ _ _) hsub + intro z r + let x := z.fst + let y := z.snd + have hprod : IsCompact (Metric.closedBall x r ×ˢ Metric.closedBall y r) := + (isCompact_closedBall x r).prod (isCompact_closedBall y r) + have himage : IsCompact (WithLp.toLp 2 '' (Metric.closedBall x r ×ˢ Metric.closedBall y r)) := + hprod.image (WithLp.prod_continuous_toLp (p := 2) (α := E) (β := F)) + refine IsCompact.of_isClosed_subset himage Metric.isClosed_closedBall ?_ + intro w hw + have hnorm : ‖w - z‖ ≤ r := by simpa [dist_eq_norm] using hw + have hw1 : ‖w.fst - x‖ ≤ r := by + simpa [x] using (le_trans (fst_norm_le_prod_L2 (w - z)) hnorm) + have hw2 : ‖w.snd - y‖ ≤ r := by + simpa [y] using (le_trans (snd_norm_le_prod_L2 (w - z)) hnorm) + refine ⟨(w.fst, w.snd), ?_, by cases w; rfl⟩ + exact ⟨mem_closedBall_iff_norm.mpr hw1, mem_closedBall_iff_norm.mpr hw2⟩ /-- Assumption: f and g are lower semicontinuous, H is continuously differentiable @@ -221,13 +190,13 @@ def BCD.z {self : BCD f g H l x0 y0} : ℕ → WithLp 2 (E × F) := fun n ↦ (WithLp.equiv 2 (E × F)).symm (self.x n, self.y n) /- the notation ψ in BCD -/ -def BCD.ψ {_ : BCD f g H l x0 y0} := fun z : WithLp 2 (E × F) ↦ f z.1 + g z.2 + H z +def BCD.ψ {_ : BCD f g H l x0 y0} := fun z : WithLp 2 (E × F) ↦ f z.fst + g z.snd + H z variable {alg : BCD f g H l x0 y0} omit [ProperSpace E] [ProperSpace F] in lemma BCD.Hdiff {self : BCD f g H l x0 y0} : Differentiable ℝ H := - self.conf.differentiable (Preorder.le_refl 1) + self.conf.differentiable (by simp) omit [InnerProductSpace ℝ E] [CompleteSpace E] [ProperSpace E] [InnerProductSpace ℝ F] [CompleteSpace F] [ProperSpace F] in diff --git a/Optlib/Algorithm/GD/GradientDescent.lean b/Optlib/Algorithm/GD/GradientDescent.lean index 04590cf..d85c94b 100644 --- a/Optlib/Algorithm/GD/GradientDescent.lean +++ b/Optlib/Algorithm/GD/GradientDescent.lean @@ -33,16 +33,20 @@ lemma mono_sum_prop_primal (mono : ∀ k : ℕ, f (g (k + 1)) ≤ f (g k)): ∀ n : ℕ , (Finset.range (n + 1)).sum (fun k ↦ f (g (k + 1))) ≥ (n + (1 : ℝ)) * f (g (n + 2)) := by intro n - induction' n with q IH1 - · simp; apply mono 1 - · specialize mono (q + 2) - calc (Finset.range (q.succ + 1)).sum (fun k ↦ f (g (k + 1))) - = (Finset.range (q + 1)).sum (fun k ↦ f (g (k + 1))) + f (g (q + 2)) := by - rw [Finset.sum_range_succ (fun k ↦ f (g (k + 1))) q.succ] - _ ≥ (q + (1 : ℝ)) * (f (g (q + 2))) + f (g (q + 2)) := by linarith - _ = (q + 2) * (f (g (q + 2))) := by ring_nf - _ ≥ (q + 2) * (f (g (q + 3))) := mul_le_mul_of_nonneg_left mono (by linarith) - _ = ((q.succ) + 1) * f (g (q.succ + 2)) := by simp; left; ring_nf + induction n with + | zero => + simp; apply mono 1 + | succ q IH1 => + specialize mono (q + 2) + calc (Finset.range (q.succ + 1)).sum (fun k ↦ f (g (k + 1))) + = (Finset.range (q + 1)).sum (fun k ↦ f (g (k + 1))) + f (g (q + 2)) := by + rw [Finset.sum_range_succ (fun k ↦ f (g (k + 1))) q.succ] + _ ≥ (q + (1 : ℝ)) * (f (g (q + 2))) + f (g (q + 2)) := by linarith + _ = (q + 2) * (f (g (q + 2))) := by ring_nf + _ ≥ (q + 2) * (f (g (q + 3))) := mul_le_mul_of_nonneg_left mono (by linarith) + _ = ((q.succ) + 1) * f (g (q.succ + 2)) := by + simp [Nat.succ_eq_add_one, add_assoc, add_left_comm, add_comm] + exact Or.inl (by norm_num : (2 : ℝ) = 1 + 1) -- for a certain iteration, we can get the bound by the sum of the sequence omit [NormedAddCommGroup E] in @@ -58,7 +62,7 @@ lemma mono_sum_prop_primal' (mono : ∀ k : ℕ, f (g (k + 1)) ≤ f (g k)): + f (g (n.succ + 1)) / (n.succ + 1) := by rw [Finset.sum_range_succ, add_div] _ ≥ n.succ * f (g (n.succ + 1)) / (n.succ + 1) + f (g (n.succ + 1)) / (n.succ + 1) := by simp; exact h - _ = f (g (n + 2)) := by field_simp; ring_nf + _ = f (g (n + 2)) := by field_simp -- the sumation property of the gradient method omit [NormedAddCommGroup E] in @@ -66,21 +70,23 @@ lemma mono_sum_prop (mono : ∀ k: ℕ, f (g (k + 1)) ≤ f (g k)): ∀ n : ℕ , (f (g (n + 1)) - f xm) ≤ (Finset.range (n + 1)).sum (fun (k : ℕ) ↦ f (g (k + 1)) - f xm) / (n + 1) := by intro n - induction' n with j _ - · simp - · simp - calc f (g (j + 2)) ≤ (Finset.range (j.succ + 1)).sum - (fun (k : ℕ) ↦ f (g (k + 1))) / (j.succ + 1) := by - linarith [mono_sum_prop_primal' mono j] - _ = (Finset.range (j.succ + 1)).sum (fun (k : ℕ) ↦ f (g (k + 1))) - / (j + 2) - f xm * 1 + f xm := by - rw [Nat.succ_eq_add_one j]; simp - ring_nf; rw [add_assoc, one_add_one_eq_two] - _ = (Finset.range (j.succ + 1)).sum (fun (k : ℕ) ↦ f (g (k + 1))) / (j + 2) - - f xm * ((j + 2) / (j + 2)) + f xm := by field_simp - _ = ((Finset.range (j.succ + 1)).sum (fun (k : ℕ) ↦ f (g (k + 1))) - - (j + 1 + 1) * f xm) / (j + 1+1)+ f xm := by - simp; rw [← one_add_one_eq_two, ← add_assoc, mul_div, mul_comm, ← sub_div] + induction n with + | zero => + simp + | succ j IH => + simp + calc f (g (j + 2)) ≤ (Finset.range (j.succ + 1)).sum + (fun (k : ℕ) ↦ f (g (k + 1))) / (j.succ + 1) := by + linarith [mono_sum_prop_primal' mono j] + _ = (Finset.range (j.succ + 1)).sum (fun (k : ℕ) ↦ f (g (k + 1))) + / (j + 2) - f xm * 1 + f xm := by + rw [Nat.succ_eq_add_one j]; simp + ring_nf + _ = (Finset.range (j.succ + 1)).sum (fun (k : ℕ) ↦ f (g (k + 1))) / (j + 2) + - f xm * ((j + 2) / (j + 2)) + f xm := by field_simp + _ = ((Finset.range (j.succ + 1)).sum (fun (k : ℕ) ↦ f (g (k + 1))) + - (j + 1 + 1) * f xm) / (j + 1+1)+ f xm := by + simp; rw [← one_add_one_eq_two, ← add_assoc, mul_div, mul_comm, ← sub_div] end descent_lemma @@ -88,13 +94,13 @@ noncomputable section gradient_descent variable {E : Type*} [NormedAddCommGroup E] [InnerProductSpace ℝ E] [CompleteSpace E] -class GradientDescent (f : E → ℝ) (f' : E → E) (x0 : E) := +class GradientDescent (f : E → ℝ) (f' : E → E) (x0 : E) where (x : ℕ → E) (a : ℕ → ℝ) (l : NNReal) (diff : ∀ x₁, HasGradientAt f (f' x₁) x₁) (smooth : LipschitzWith l f') (update : ∀ k : ℕ, x (k + 1) = x k - a k • f' (x k)) (hl : l > 0) (step₁ : ∀ k, a k > 0) (initial : x 0 = x0) -class Gradient_Descent_fix_stepsize (f : E → ℝ) (f' : E → E) (x0 : E) := +class Gradient_Descent_fix_stepsize (f : E → ℝ) (f' : E → E) (x0 : E) where (x : ℕ → E) (a : ℝ) (l : NNReal) (diff : ∀ x₁, HasGradientAt f (f' x₁) x₁) (smooth : LipschitzWith l f') (update : ∀ k : ℕ, x (k + 1) = x k - a • f' (x k)) @@ -123,7 +129,7 @@ variable {alg : Gradient_Descent_fix_stepsize f f' x₀} -- equivalent description of the convexity of a smooth function lemma convex_function (h₁ : ∀ x₁ : E, HasGradientAt f (f' x₁) x₁) (hfun: ConvexOn ℝ Set.univ f) : - ∀ x y, f x ≤ f y + inner (f' x) (x - y) := by + ∀ x y, f x ≤ f y + inner ℝ (f' x) (x - y) := by intro x y obtain this := Convex_first_order_condition' (h₁ x) hfun (by trivial) y (by trivial) rw [← neg_sub, inner_neg_right] at this @@ -135,14 +141,14 @@ lemma convex_lipschitz (h₁ : ∀ x₁ : E, HasGradientAt f (f' x₁) x₁) ∀ x : E, f (x - a • (f' x)) ≤ f x - a / 2 * ‖f' x‖ ^ 2 := by intro x calc - _ ≤ f x + inner (f' x) (x - a • (f' x) - x) + l / 2 * ‖x - a • (f' x) - x‖ ^ 2 := + _ ≤ f x + inner ℝ (f' x) (x - a • (f' x) - x) + l / 2 * ‖x - a • (f' x) - x‖ ^ 2 := lipschitz_continuos_upper_bound' h₁ h₃ x (x - a • (f' x)) _ = f x + ((l.1 / 2 * a * a -a) * ‖f' x‖ ^ 2) := by simp; ring_nf; simp rw [real_inner_smul_right, real_inner_self_eq_norm_sq, norm_smul]; simp rw [abs_of_pos ha₂]; ring_nf _ ≤ f x + (- a / 2* ‖(f' x)‖ ^2) := by - simp only [add_le_add_iff_left, gt_iff_lt, norm_pos_iff, ne_eq] + simp only [add_le_add_iff_left] apply mul_le_mul_of_nonneg_right · simp; calc l / 2 * a * a = (l * a) * (a / 2) := by ring_nf @@ -167,25 +173,25 @@ lemma point_descent_for_convex (hfun : ConvexOn ℝ Set.univ f) (step₂ : alg.a intro x have t1 : 1 / ((2 : ℝ) * alg.a) * ((2 : ℝ) * alg.a) = 1 := by field_simp; ring_nf; apply mul_inv_cancel₀; linarith [alg.step₁] - have t2 : inner (f' x) (x - xm) - alg.a / 2 * ‖f' x‖ ^ 2 = + have t2 : inner ℝ (f' x) (x - xm) - alg.a / 2 * ‖f' x‖ ^ 2 = 1 / ((2 : ℝ) * alg.a) * (‖x - xm‖ ^ 2 - ‖x - alg.a • (f' x) - xm‖ ^ 2) := by symm have t2₁ : ‖x - alg.a • (f' x) - xm‖ ^ 2 = - ‖x - xm‖ ^ 2 - ((2 : ℝ) * alg.a) * inner (f' x) (x - xm) + ‖alg.a • (f' x)‖ ^ 2 := by + ‖x - xm‖ ^ 2 - ((2 : ℝ) * alg.a) * inner ℝ (f' x) (x - xm) + ‖alg.a • (f' x)‖ ^ 2 := by rw [sub_right_comm]; simp [norm_sub_sq_real (x - xm) _] ring_nf; rw [real_inner_smul_right, real_inner_comm]; calc - _ = 1 / ((2 : ℝ) * alg.a) * ((2 : ℝ) * alg.a) * (inner (f' x) (x - xm)) + _ = 1 / ((2 : ℝ) * alg.a) * ((2 : ℝ) * alg.a) * (inner ℝ (f' x) (x - xm)) + 1 / ((2 : ℝ) * alg.a) * (- ‖alg.a • (f' x)‖ ^ 2) := by rw [t2₁]; ring_nf - _ = inner (f' x) (x - xm) + 1 / ((2 : ℝ) * alg.a) + _ = inner ℝ (f' x) (x - xm) + 1 / ((2 : ℝ) * alg.a) * (- ‖alg.a • (f' x)‖ ^ 2) := by rw [t1, one_mul] - _ = inner (f' x) (x - xm) - 1 / ((2 : ℝ) * alg.a) * (alg.a * alg.a) * (‖f' x‖ ^ 2) := by + _ = inner ℝ (f' x) (x - xm) - 1 / ((2 : ℝ) * alg.a) * (alg.a * alg.a) * (‖f' x‖ ^ 2) := by rw [norm_smul _ _]; simp; rw [abs_of_pos alg.step₁]; ring_nf - _ = inner (f' x) (x - xm) - alg.a / (2 : ℝ) - * ‖f' x‖ ^ 2 := by ring_nf; simp; left; rw [pow_two,mul_self_mul_inv alg.a] + _ = inner ℝ (f' x) (x - xm) - alg.a / (2 : ℝ) + * ‖f' x‖ ^ 2 := by field_simp [alg.step₁.ne'] calc f (x - alg.a • (f' x)) ≤ f x - alg.a / 2 * ‖f' x‖ ^ 2 := by exact convex_lipschitz alg.diff this step₂ alg.step₁ alg.smooth x - _ ≤ f xm + inner (f' x) (x - xm) - alg.a / 2 * ‖f' x‖ ^ 2 := by + _ ≤ f xm + inner ℝ (f' x) (x - xm) - alg.a / 2 * ‖f' x‖ ^ 2 := by linarith [convex_function alg.diff hfun x xm] _ = f xm + 1 / ((2 : ℝ) * alg.a) * (‖x - xm‖ ^ 2 - ‖x - alg.a • (f' x) - xm‖ ^ 2) := by rw [add_sub_assoc, t2] @@ -217,27 +223,32 @@ lemma gradient_method (hfun: ConvexOn ℝ Set.univ f) (step₂ : alg.a ≤ 1 / a have sum_prop : ∀ n : ℕ, (Finset.range (n + 1)).sum (fun (k : ℕ) ↦ f (alg.x (k + 1)) - f xm) ≤ 1 / (2 * alg.a) * (‖x₀ - xm‖ ^ 2 - ‖alg.x (n + 1) - xm‖ ^ 2) := by intro n - induction' n with j IH - · specialize xdescent (0 : ℕ) - simp - calc - _ ≤ f xm + 1 / (2 * alg.a) * (‖alg.x 0 - xm‖ ^ 2 - ‖alg.x (0 + 1) - xm‖ ^ 2) := - xdescent - _ = alg.a⁻¹ * 2⁻¹ * (‖x₀ - xm‖^ 2 - ‖alg.x 1 - xm‖ ^ 2) + f xm := by - rw [alg.initial]; simp; ring_nf - · specialize xdescent (j + 1) - calc - _ = (Finset.range (j + 1)).sum (fun (k : ℕ) ↦ f (alg.x (k + 1)) - f xm) - + f (alg.x (j + 2)) - f xm := by - rw [Finset.sum_range_succ (fun (k : ℕ)↦ f (alg.x (k+1))-f (xm)) j.succ] - rw [Nat.succ_eq_add_one j]; ring_nf; rw [add_sub] - _ ≤ 1 / (2 * alg.a) * (‖x₀ - xm‖ ^ 2 - ‖alg.x (j + 1) - xm‖ ^ 2) - + f (alg.x (j + 2)) - f xm := by linarith - _ ≤ 1 / (2 * alg.a) * (‖x₀ - xm‖ ^ 2 - ‖alg.x (j + 1) - xm‖ ^ 2) - + 1 / (2 * alg.a) * (‖alg.x (j + 1) - xm‖ ^ 2 - ‖alg.x (j + 2) - xm‖ ^ 2) := by - rw [add_sub_right_comm]; linarith - _ = 1 / (2 * alg.a) * (‖x₀ - xm‖ ^ 2 - ‖alg.x (j.succ + 1) - xm‖ ^ 2) := by - ring_nf; simp; left; ring_nf + induction n with + | zero => + specialize xdescent (0 : ℕ) + simp + calc + _ ≤ f xm + 1 / (2 * alg.a) * (‖alg.x 0 - xm‖ ^ 2 - ‖alg.x (0 + 1) - xm‖ ^ 2) := + xdescent + _ = alg.a⁻¹ * 2⁻¹ * (‖x₀ - xm‖^ 2 - ‖alg.x 1 - xm‖ ^ 2) + f xm := by + rw [alg.initial]; simp; ring_nf + | succ j IH => + specialize xdescent (j + 1) + calc + _ = (Finset.range (j + 1)).sum (fun (k : ℕ) ↦ f (alg.x (k + 1)) - f xm) + + f (alg.x (j + 2)) - f xm := by + rw [Finset.sum_range_succ (fun (k : ℕ)↦ f (alg.x (k+1))-f (xm)) j.succ] + rw [Nat.succ_eq_add_one j] + simp [sub_eq_add_neg, add_assoc, add_left_comm, add_comm] + _ ≤ 1 / (2 * alg.a) * (‖x₀ - xm‖ ^ 2 - ‖alg.x (j + 1) - xm‖ ^ 2) + + f (alg.x (j + 2)) - f xm := by linarith + _ ≤ 1 / (2 * alg.a) * (‖x₀ - xm‖ ^ 2 - ‖alg.x (j + 1) - xm‖ ^ 2) + + 1 / (2 * alg.a) * (‖alg.x (j + 1) - xm‖ ^ 2 - ‖alg.x (j + 2) - xm‖ ^ 2) := by + linarith [xdescent] + _ = 1 / (2 * alg.a) * (‖x₀ - xm‖ ^ 2 - ‖alg.x (j.succ + 1) - xm‖ ^ 2) := by + let c : ℝ := 1 / (2 * alg.a) + simp [Nat.succ_eq_add_one, add_assoc] + ring obtain sum_prop_1 := mono_sum_prop mono specialize sum_prop_1 k specialize sum_prop k diff --git a/Optlib/Algorithm/GD/GradientDescentStronglyConvex.lean b/Optlib/Algorithm/GD/GradientDescentStronglyConvex.lean index d4cd46e..a6d8aa3 100644 --- a/Optlib/Algorithm/GD/GradientDescentStronglyConvex.lean +++ b/Optlib/Algorithm/GD/GradientDescentStronglyConvex.lean @@ -31,7 +31,7 @@ open Set theorem Strong_convex_Lipschitz_smooth (hsc: StrongConvexOn univ m f) (mp : m > 0) (hf : ∀ x, HasGradientAt f (f' x) x) (h₂ : LipschitzWith l f') (hl : l > (0 : ℝ)): - inner (f' x - f' y) (x - y) ≥ m * l / (m + l) * ‖x - y‖ ^ 2 + inner ℝ (f' x - f' y) (x - y) ≥ m * l / (m + l) * ‖x - y‖ ^ 2 + 1 / (m + l) * ‖f' x - f' y‖ ^ 2 := by rw [StrongConvexOn, UniformConvexOn] at hsc rcases hsc with ⟨cov, hsc⟩ @@ -52,13 +52,14 @@ theorem Strong_convex_Lipschitz_smooth (hsc: StrongConvexOn univ m f) (mp : m > use cov have convh : ConvexOn ℝ univ h := by have (x : E) : h x = phi x := by - field_simp [phi, h]; ring_nf + simp [phi, h, g] + ring rw [ConvexOn]; use cov; intro x xin y yin a b apos bpos absum1 rw [this, this, this] rw [ConvexOn] at convphi apply convphi.2 xin yin apos bpos absum1 by_cases coef: 0 < l - m - · have eq1 : inner (g' x - g' y) (x - y) ≥ 1 / (l - m) * ‖g' x - g' y‖ ^ 2 := by + · have eq1 : inner ℝ (g' x - g' y) (x - y) ≥ 1 / (l - m) * ‖g' x - g' y‖ ^ 2 := by apply convex_to_lower gderiv show ConvexOn ℝ univ h; apply convh; apply coef; apply convg let alpha : E := f' x - f' y @@ -67,60 +68,61 @@ theorem Strong_convex_Lipschitz_smooth (hsc: StrongConvexOn univ m f) (mp : m > simp [g']; rw [smul_sub]; rw [← sub_add, ← sub_add]; simp rw [sub_right_comm] rw [eq2] at eq1 - have eq3 (u v : E) : inner (u - m • v) v ≥ 1 / (l - m) * ‖u - m • v‖ ^ 2 - → inner u v ≥ m * l / (m + l) * ‖v‖ ^ 2 + 1 / (m + l) * ‖u‖ ^ 2 := by - have : ‖u - m • v‖ ^ 2 = ‖u‖ ^ 2 + m ^ 2 * ‖v‖ ^ 2 - 2 * m * inner u v := by + have eq3 (u v : E) : inner ℝ (u - m • v) v ≥ 1 / (l - m) * ‖u - m • v‖ ^ 2 + → inner ℝ u v ≥ m * l / (m + l) * ‖v‖ ^ 2 + 1 / (m + l) * ‖u‖ ^ 2 := by + have : ‖u - m • v‖ ^ 2 = ‖u‖ ^ 2 + m ^ 2 * ‖v‖ ^ 2 - 2 * m * inner ℝ u v := by rw [norm_sub_sq_real, inner_smul_right]; ring_nf; rw [norm_smul]; simp rw [mul_pow, sq_abs] rw [this] intro h0; rw [inner_sub_left, inner_smul_left] at h0; field_simp at h0 - rw [real_inner_self_eq_norm_sq, div_le_iff₀] at h0 - field_simp at h0; rw [sub_mul, sub_add_eq_add_sub, le_sub_iff_add_le] at h0 - rw [mul_right_comm, mul_sub] at h0; ring_nf at h0 - rw [mul_right_comm, ← add_mul] at h0 + rw [real_inner_self_eq_norm_sq] at h0 + have h0' : ‖u‖ ^ 2 + m ^ 2 * ‖v‖ ^ 2 - 2 * m * inner ℝ u v ≤ + (l - m) * (inner ℝ u v - m * ‖v‖ ^ 2) := by + simpa [mul_assoc, mul_left_comm, mul_comm] using h0 have mlpos : 0 < m + l := by linarith rw [ge_iff_le] - field_simp; rw [add_comm] - calc - _ ≤ ((m + l) * inner u v) / (m + l) := by - rw [div_le_div_right]; apply h0; apply mlpos - _ = inner u v := by field_simp - apply coef - show inner alpha beta ≥ m * l / (m + l) * ‖beta‖ ^ 2 + 1 / (m + l) * ‖alpha‖ ^ 2 + field_simp [mlpos.ne'] + nlinarith [h0'] + show inner ℝ alpha beta ≥ m * l / (m + l) * ‖beta‖ ^ 2 + 1 / (m + l) * ‖alpha‖ ^ 2 apply eq3 - show inner (alpha - m • beta) (x - y) ≥ 1 / (l - m) * ‖alpha - m • beta‖ ^ 2 + show inner ℝ (alpha - m • beta) (x - y) ≥ 1 / (l - m) * ‖alpha - m • beta‖ ^ 2 apply eq1 · let alpha : E := f' x - f' y let beta : E := x - y - have eq1 : inner alpha beta ≥ m * ‖beta‖ ^ 2 := by - show inner (f' x - f' y) (x - y) ≥ m * ‖x - y‖ ^ 2 + have eq1 : inner ℝ alpha beta ≥ m * ‖beta‖ ^ 2 := by + show inner ℝ (f' x - f' y) (x - y) ≥ m * ‖x - y‖ ^ 2 apply Strong_Convex_lower; rw [StrongConvexOn, UniformConvexOn] use cov; simp; apply hf; simp; simp - have eq2 : inner alpha beta ≥ 1 / l * ‖alpha‖ ^ 2 := by - show inner (f' x - f' y) (x - y) ≥ 1 / l * ‖f' x - f' y‖ ^ 2 + have eq2 : inner ℝ alpha beta ≥ 1 / l * ‖alpha‖ ^ 2 := by + show inner ℝ (f' x - f' y) (x - y) ≥ 1 / l * ‖f' x - f' y‖ ^ 2 apply lipschitz_to_lower hf h₂ apply StrictConvexOn.convexOn; apply StrongConvexOn.strictConvexOn rw [StrongConvexOn, UniformConvexOn]; use cov; apply mp; apply hl rw [ge_iff_le] at eq1 rw [ge_iff_le] at eq2 have mlpos : 0 < m + l := by linarith - have eq3 (u v : E) (h1 : m * ‖v‖ ^ 2 ≤ inner u v) (h2 : 1 / l * ‖u‖ ^ 2 ≤ inner u v): - inner u v ≥ m * l / (m + l) * ‖v‖ ^ 2 + 1 / (m + l) * ‖u‖ ^ 2 := by - field_simp; rw [div_le_iff₀ mlpos, mul_comm _ (m + l), add_mul] - have eq4 : m * l * ‖v‖ ^ 2 ≤ m * inner u v := by + have eq3 (u v : E) (h1 : m * ‖v‖ ^ 2 ≤ inner ℝ u v) (h2 : 1 / l * ‖u‖ ^ 2 ≤ inner ℝ u v): + inner ℝ u v ≥ m * l / (m + l) * ‖v‖ ^ 2 + 1 / (m + l) * ‖u‖ ^ 2 := by + rw [ge_iff_le] + field_simp [mlpos.ne'] + have hmnonneg : 0 ≤ m := by linarith [mp] + have hlnonneg : 0 ≤ (l : ℝ) := by linarith [hl] + have hlem : (l : ℝ) ≤ m := by linarith [coef] + have eq4 : m * l * ‖v‖ ^ 2 ≤ m * inner ℝ u v := by calc _ ≤ m * m * ‖v‖ ^ 2 := by - rw [mul_comm m l, mul_assoc, mul_assoc] - have : 0 ≤ ‖v‖ ^ 2 := by simp - apply mul_le_mul_of_nonneg_right - simp at coef; apply coef - rw [mul_nonneg_iff_right_nonneg_of_pos]; simp; apply mp - _ ≤ m * inner u v := by - rw [mul_assoc, mul_le_mul_left]; apply h1; apply mp - have eq5 : ‖u‖ ^ 2 ≤ l * inner u v := by - field_simp at h2; rw [mul_comm, ← div_le_iff₀]; apply h2; apply hl - linarith - show inner alpha beta ≥ m * l / (m + l) * ‖beta‖ ^ 2 + 1 / (m + l) * ‖alpha‖ ^ 2 + have : l * (m * ‖v‖ ^ 2) ≤ m * (m * ‖v‖ ^ 2) := by + apply mul_le_mul_of_nonneg_right hlem + exact mul_nonneg hmnonneg (by positivity) + simpa [mul_assoc, mul_left_comm, mul_comm] using this + _ ≤ m * inner ℝ u v := by + simpa [mul_assoc] using mul_le_mul_of_nonneg_left h1 hmnonneg + have eq5 : ‖u‖ ^ 2 ≤ l * inner ℝ u v := by + have hlne : (l : ℝ) ≠ 0 := by linarith [hl] + have : l * (1 / l * ‖u‖ ^ 2) ≤ l * inner ℝ u v := mul_le_mul_of_nonneg_left h2 hlnonneg + simpa [mul_assoc, hlne] using this + nlinarith [eq4, eq5] + show inner ℝ alpha beta ≥ m * l / (m + l) * ‖beta‖ ^ 2 + 1 / (m + l) * ‖alpha‖ ^ 2 apply eq3; apply eq2; apply eq1 lemma lipschitz_derivxm_eq_zero (h₁ : ∀ x : E, HasGradientAt f (f' x) x) @@ -128,12 +130,12 @@ lemma lipschitz_derivxm_eq_zero (h₁ : ∀ x : E, HasGradientAt f (f' x) x) have eq1 : ∀ x : E, 1 / (2 * l) * ‖f' x‖ ^ 2 ≤ f x - f xm := by apply lipschitz_minima_lower_bound h₁ h₂ min hl specialize eq1 xm - field_simp at eq1 - have _ : (0 : ℝ) < 2 * l := by linarith - have eq3 : 0 ≤ ‖f' xm‖ ^ 2 / (2 * l) := by - apply div_nonneg; simp; linarith - have eq4 : ‖f' xm‖ ^ 2 / (2 * l) = 0 := by linarith - field_simp at eq4; exact eq4 + have hnonneg : 0 ≤ 1 / (2 * l) * ‖f' xm‖ ^ 2 := by positivity + have hle : 1 / (2 * l) * ‖f' xm‖ ^ 2 ≤ 0 := by simpa using eq1 + have hzero : 1 / (2 * l) * ‖f' xm‖ ^ 2 = 0 := le_antisymm hle hnonneg + have hcoef : (1 / (2 * l : ℝ)) ≠ 0 := by positivity + have hsq : ‖f' xm‖ ^ 2 = 0 := (mul_eq_zero.mp hzero).resolve_left hcoef + exact norm_eq_zero.mp (sq_eq_zero_iff.mp hsq) variable (hsc: StrongConvexOn univ m f) {alg : Gradient_Descent_fix_stepsize f f' x₀} @@ -151,13 +153,13 @@ lemma gradient_method_strong_convex (hm : m > 0) (min : IsMinOn f univ xm) calc _ = ‖alg.x k - xm - alg.a • f' (alg.x k)‖ ^ 2 := by rw [sub_right_comm] - _ = ‖alg.x k - xm‖ ^ 2 - 2 * alg.a * inner (alg.x k - xm) (f' (alg.x k)) + _ = ‖alg.x k - xm‖ ^ 2 - 2 * alg.a * inner ℝ (alg.x k - xm) (f' (alg.x k)) + alg.a ^ 2 * ‖f' (alg.x k)‖ ^ 2 := by rw [norm_sub_sq_real, inner_smul_right] ring_nf; rw [norm_smul]; simp; rw [mul_pow, sq_abs] _ ≤ (1 - alg.a * (2 * m * alg.l / (m + alg.l))) * ‖alg.x k - xm‖ ^ 2 + alg.a * (alg.a - 2 / (m + alg.l)) * ‖f' (alg.x k)‖ ^ 2 := by - have : inner (alg.x k - xm) (f' (alg.x k)) ≥ + have : inner ℝ (alg.x k - xm) (f' (alg.x k)) ≥ m * alg.l / (m + alg.l) * ‖alg.x k - xm‖ ^ 2 + 1 / (m + alg.l) * ‖f' (alg.x k)‖ ^ 2 := by have eq1 : f' (alg.x k) = f' (alg.x k) - f' xm := by @@ -167,19 +169,38 @@ lemma gradient_method_strong_convex (hm : m > 0) (min : IsMinOn f univ xm) apply Strong_convex_Lipschitz_smooth; apply hsc; apply hm; apply alg.diff; apply alg.smooth; apply alg.hl rw [sub_mul, one_mul, mul_sub, sub_mul, ← add_comm_sub, ← pow_two] - apply add_le_add_right - rw [sub_eq_add_neg, sub_sub]; rw [sub_eq_add_neg (‖alg.x k - xm‖ ^ 2)] - apply add_le_add_left; apply neg_le_neg - calc - _ = - 2 * alg.a * ((m * alg.l / (m + alg.l)) * ‖alg.x k - xm‖ ^ 2 + - (1 / (m + alg.l)) * ‖f' (alg.x k)‖ ^ 2) := by - field_simp; rw [mul_add, mul_comm alg.a 2, ← mul_assoc, ← mul_assoc, mul_comm alg.a 2] - ring_nf - _ ≤ 2 * alg.a * inner (alg.x k - xm) (f' (alg.x k)) := by - rw [ge_iff_le] at this - have twoapos : 0 < 2 * alg.a := by linarith [alg.step₁] - rw [mul_le_mul_left twoapos]; apply this + have this_le : + (m * alg.l / (m + alg.l)) * ‖alg.x k - xm‖ ^ 2 + + 1 / (m + alg.l) * ‖f' (alg.x k)‖ ^ 2 ≤ + inner ℝ (alg.x k - xm) (f' (alg.x k)) := by + simpa [ge_iff_le] using this + have hmul : + (2 * alg.a) * + ((m * alg.l / (m + alg.l)) * ‖alg.x k - xm‖ ^ 2 + + 1 / (m + alg.l) * ‖f' (alg.x k)‖ ^ 2) ≤ + (2 * alg.a) * inner ℝ (alg.x k - xm) (f' (alg.x k)) := by + apply mul_le_mul_of_nonneg_left this_le + linarith [alg.step₁] + have hnegTerm : + -2 * alg.a * inner ℝ (alg.x k - xm) (f' (alg.x k)) ≤ + -alg.a * (2 * m * alg.l / (m + alg.l)) * ‖alg.x k - xm‖ ^ 2 + - alg.a * (2 / (m + alg.l)) * ‖f' (alg.x k)‖ ^ 2 := by + have hnegTerm0 : -(2 * alg.a * inner ℝ (alg.x k - xm) (f' (alg.x k))) ≤ + -(2 * alg.a * + ((m * alg.l / (m + alg.l)) * ‖alg.x k - xm‖ ^ 2 + + 1 / (m + alg.l) * ‖f' (alg.x k)‖ ^ 2)) := neg_le_neg hmul + have hleft : -(2 * alg.a * inner ℝ (alg.x k - xm) (f' (alg.x k))) = + -2 * alg.a * inner ℝ (alg.x k - xm) (f' (alg.x k)) := by ring + have hright : -(2 * alg.a * + ((m * alg.l / (m + alg.l)) * ‖alg.x k - xm‖ ^ 2 + + 1 / (m + alg.l) * ‖f' (alg.x k)‖ ^ 2)) = + -alg.a * (2 * m * alg.l / (m + alg.l)) * ‖alg.x k - xm‖ ^ 2 + - alg.a * (2 / (m + alg.l)) * ‖f' (alg.x k)‖ ^ 2 := by ring + rw [hleft, hright] at hnegTerm0 + exact hnegTerm0 + have hmid0 := add_le_add_left hnegTerm (‖alg.x k - xm‖ ^ 2) + have hmid1 := add_le_add_right hmid0 (alg.a ^ 2 * ‖f' (alg.x k)‖ ^ 2) + simpa [sub_eq_add_neg, add_assoc, add_left_comm, add_comm] using hmid1 _ ≤ (1 - alg.a * (2 * m * alg.l / (m + alg.l))) * ‖alg.x k - xm‖ ^ 2 := by simp have eq2 : alg.a * (alg.a - 2 / (m + alg.l)) ≤ 0 := by @@ -188,37 +209,35 @@ lemma gradient_method_strong_convex (hm : m > 0) (min : IsMinOn f univ xm) have eq3 : 0 ≤ ‖f' (alg.x k)‖ ^ 2 := by simp apply mul_nonpos_of_nonpos_of_nonneg eq2 eq3 have eq : 0 ≤ (1 - alg.a * (2 * m * alg.l / (m + alg.l))) := by - have : 0 < m + alg.l := by linarith - field_simp; rw [div_nonneg_iff]; left - constructor - · simp - calc - alg.a * (2 * m * alg.l) ≤ 2 / (m + alg.l) * (2 * m * alg.l) := by - rw [mul_le_mul_right]; linarith [step₂, alg.step₁]; - apply mul_pos <;> linarith - _ ≤ (m + alg.l) ^ 2 / (m + alg.l) := by - field_simp; rw [div_le_div_right this] - ring_nf - calc - _ ≤ m * alg.l * 4 + (m - alg.l) ^ 2 := by - simp; apply sq_nonneg - _ = m * alg.l * 2 + m ^ 2 + alg.l ^ 2 := by ring_nf - _ = (m + alg.l) := by - rw [pow_two]; simp - · linarith + have hmlpos : 0 < m + alg.l := by linarith + have hcoef_nonneg : 0 ≤ 2 * m * alg.l / (m + alg.l) := by positivity + have hbound1 : + alg.a * (2 * m * alg.l / (m + alg.l)) ≤ + (2 / (m + alg.l)) * (2 * m * alg.l / (m + alg.l)) := by + exact mul_le_mul_of_nonneg_right step₂ hcoef_nonneg + have hsq : 4 * m * alg.l ≤ (m + alg.l) ^ 2 := by + nlinarith [sq_nonneg (m - alg.l)] + have hbound2 : (2 / (m + alg.l)) * (2 * m * alg.l / (m + alg.l)) ≤ (1 : ℝ) := by + have hmlnz : (m + alg.l : ℝ) ≠ 0 := by linarith + field_simp [hmlnz] + nlinarith [sq_nonneg (m - alg.l)] + have hbound : alg.a * (2 * m * alg.l / (m + alg.l)) ≤ (1 : ℝ) := le_trans hbound1 hbound2 + linarith intro k - induction' k with q IH1 - · simp; rw [alg.initial] - · calc - _ = ‖alg.x (q + 1) - xm‖ ^ 2 := by simp - _ ≤ (1 - alg.a * (2 * m * alg.l / (m + alg.l))) * ‖alg.x q - xm‖ ^ 2 := by - apply reduction - _ ≤ (1 - alg.a * (2 * m * alg.l / (m + alg.l))) * - (1 - alg.a * (2 * m * alg.l / (m + alg.l))) ^ q * ‖x₀ - xm‖ ^ 2 := by - rw [mul_assoc _ _ (‖x₀ - xm‖ ^ 2)] - apply mul_le_mul_of_nonneg_left; apply IH1; apply eq - _ = (1 - alg.a * (2 * m * alg.l / (m + alg.l))) ^ (q + 1) * ‖x₀ - xm‖ ^ 2 := by - simp; left; rw [pow_succ, pow_mul_comm'] - _ = (1 - alg.a * (2 * m * alg.l / (m + alg.l))) ^ Nat.succ q * ‖x₀ - xm‖ ^ 2 := by simp + induction k with + | zero => + simp [alg.initial] + | succ q IH1 => + calc + _ = ‖alg.x (q + 1) - xm‖ ^ 2 := by simp + _ ≤ (1 - alg.a * (2 * m * alg.l / (m + alg.l))) * ‖alg.x q - xm‖ ^ 2 := by + apply reduction + _ ≤ (1 - alg.a * (2 * m * alg.l / (m + alg.l))) * + (1 - alg.a * (2 * m * alg.l / (m + alg.l))) ^ q * ‖x₀ - xm‖ ^ 2 := by + rw [mul_assoc _ _ (‖x₀ - xm‖ ^ 2)] + apply mul_le_mul_of_nonneg_left IH1 eq + _ = (1 - alg.a * (2 * m * alg.l / (m + alg.l))) ^ (q + 1) * ‖x₀ - xm‖ ^ 2 := by + rw [pow_succ] + ring end Strongly_Convex_Gradient_Descent diff --git a/Optlib/Algorithm/LASSO.lean b/Optlib/Algorithm/LASSO.lean index 9e9dc64..544255c 100644 --- a/Optlib/Algorithm/LASSO.lean +++ b/Optlib/Algorithm/LASSO.lean @@ -30,6 +30,9 @@ variable {f' : (EuclideanSpace ℝ (Fin n)) → (EuclideanSpace ℝ (Fin n))} local notation "‖" x "‖₂" => @Norm.norm (EuclideanSpace ℝ (Fin m)) (PiLp.instNorm 2 fun _ ↦ ℝ) x local notation "‖" x "‖₁" => (Finset.sum Finset.univ (fun (i : Fin n) => ‖x i‖)) +instance : CoeTC ((Fin m) → ℝ) (EuclideanSpace ℝ (Fin m)) := ⟨WithLp.toLp 2⟩ +instance : CoeTC ((Fin n) → ℝ) (EuclideanSpace ℝ (Fin n)) := ⟨WithLp.toLp 2⟩ + open Set Real Matrix Finset /- `u ⬝ Av = Aᵀu ⬝ v` for u v in EuclideanSpace -/ @@ -53,8 +56,11 @@ lemma norm2eq_dot (x : EuclideanSpace ℝ (Fin m)) : ‖x‖₂ ^ 2 = x ⬝ᵥ /- `⟪x, y⟫_ℝ = x ⬝ y` for x y in EuclideanSpace -/ -lemma real_inner_eq_dot (x y : EuclideanSpace ℝ (Fin m)) : inner x y = x ⬝ᵥ y := by - simp; rw [dotProduct] +lemma real_inner_eq_dot (x y : EuclideanSpace ℝ (Fin m)) : inner ℝ x y = x ⬝ᵥ y := by + calc + inner ℝ x y = y.ofLp ⬝ᵥ star x.ofLp := EuclideanSpace.inner_eq_star_dotProduct (x := x) (y := y) + _ = y.ofLp ⬝ᵥ x.ofLp := by simp + _ = x.ofLp ⬝ᵥ y.ofLp := by simpa using (dotProduct_comm y.ofLp x.ofLp) /- gradient of a quadratic in ℝⁿ -/ @@ -80,17 +86,21 @@ lemma quadratic_gradient : ∀ x : (EuclideanSpace ℝ (Fin n)), · apply div_pos εpos; rw [sq_pos_iff]; linarith [normApos] intro y ydist; rw [inner_smul_left] - simp; rw [← dotProduct] - have aux1 : (fun x_1 ↦ ((Aᵀ * A) *ᵥ x) x_1) ⬝ᵥ (fun x_1 ↦ y x_1 - x x_1) - = (Aᵀ * A) *ᵥ x ⬝ᵥ (y - x) := by - rw [dotProduct, dotProduct]; simp - rw [aux1, ← mulVec_mulVec, ← dot_mul_eq_transpose_mul_dot _ (y - x), Matrix.mulVec_sub, + simp; rw [real_inner_eq_dot (m := n)] + have hsub : A *ᵥ (y - x).ofLp = A *ᵥ (y.ofLp - x.ofLp) := by simp + rw [← mulVec_mulVec, ← dot_mul_eq_transpose_mul_dot _ (y - x), hsub, Matrix.mulVec_sub, dotProduct_sub] ring_nf have aux2 (u v : Fin m → ℝ) : u ⬝ᵥ u + (v ⬝ᵥ v - v ⬝ᵥ u * 2) = (u - v) ⬝ᵥ (u - v) := by rw [dotProduct_sub, sub_dotProduct, sub_dotProduct, ← sub_add, sub_sub, dotProduct_comm u v] rw [← mul_two, add_comm_sub] - rw [aux2, ← norm2eq_dot]; simp; rw [← Matrix.mulVec_sub] + rw [aux2, ← norm2eq_dot] + simp + have hnormsub : WithLp.toLp 2 (A *ᵥ y.ofLp) - WithLp.toLp 2 (A *ᵥ x.ofLp) + = WithLp.toLp 2 (A *ᵥ (y - x).ofLp) := by + ext i + simp [sub_eq_add_neg, Matrix.mulVec_add, Matrix.mulVec_neg] + rw [hnormsub] calc ‖(A *ᵥ (y - x))‖₂ ^ 2 ≤ (normA * ‖x - y‖) ^ 2 := by rw [norm_sub_rev] @@ -121,9 +131,8 @@ private lemma linear_gradient : ∀ x : (EuclideanSpace ℝ (Fin n)), use ε; use εpos intro y _ rw [dot_mul_eq_transpose_mul_dot, dot_mul_eq_transpose_mul_dot, ← dotProduct_sub] - rw [EuclideanSpace.inner_eq_star_dotProduct]; simp - repeat rw [dotProduct] - simp + rw [real_inner_eq_dot (m := n)] + simp [dotProduct] apply mul_nonneg; linarith [εpos]; apply norm_nonneg /- gradient of the square of an affine map in ℝⁿ -/ @@ -166,10 +175,30 @@ lemma affine_sq_convex : apply convex_univ exact (fun x _ => affine_sq_gradient x) intro x _ y _ - rw [Matrix.mulVec_sub, Matrix.mulVec_sub, ← sub_add, sub_add_eq_add_sub, sub_add_cancel, - ← Matrix.mulVec_sub, real_inner_eq_dot] - rw [← dot_mul_eq_transpose_mul_dot,← Matrix.mulVec_sub, ← norm2eq_dot] - apply sq_nonneg + simp [sub_eq_add_neg, add_assoc, add_left_comm, add_comm, real_inner_eq_dot] + repeat rw [← dot_mul_eq_transpose_mul_dot] + let u : Fin m → ℝ := A *ᵥ x.ofLp + -b + let v : Fin m → ℝ := A *ᵥ y.ofLp + -b + have hdot : u ⬝ᵥ (A *ᵥ x.ofLp) + (v ⬝ᵥ (A *ᵥ y.ofLp) + (-u ⬝ᵥ (A *ᵥ y.ofLp) + -v ⬝ᵥ (A *ᵥ x.ofLp))) + = (u - v) ⬝ᵥ ((A *ᵥ x.ofLp) - (A *ᵥ y.ofLp)) := by + rw [dotProduct_sub, sub_dotProduct, sub_dotProduct] + simp + abel_nf + have hdot' : + (WithLp.toLp 2 (A *ᵥ x.ofLp + -b)).ofLp ⬝ᵥ A *ᵥ x.ofLp + + ((WithLp.toLp 2 (A *ᵥ y.ofLp + -b)).ofLp ⬝ᵥ A *ᵥ y.ofLp + + (-((WithLp.toLp 2 (A *ᵥ x.ofLp + -b)).ofLp ⬝ᵥ A *ᵥ y.ofLp) + + -((WithLp.toLp 2 (A *ᵥ y.ofLp + -b)).ofLp ⬝ᵥ A *ᵥ x.ofLp))) = + (u - v) ⬝ᵥ ((A *ᵥ x.ofLp) - (A *ᵥ y.ofLp)) := by + simpa [u, v] using hdot + rw [hdot'] + have huv : (u - v) = (A *ᵥ x.ofLp) - (A *ᵥ y.ofLp) := by + dsimp [u, v] + abel_nf + rw [huv] + have hnonneg : 0 ≤ ‖(WithLp.toLp 2 ((A *ᵥ x.ofLp) - (A *ᵥ y.ofLp)) : EuclideanSpace ℝ (Fin m))‖₂ ^ 2 := by + exact sq_nonneg ‖(WithLp.toLp 2 ((A *ᵥ x.ofLp) - (A *ᵥ y.ofLp)) : EuclideanSpace ℝ (Fin m))‖₂ + simpa [norm2eq_dot] using hnonneg /- ‖ ‖₁ is convex on ℝⁿ -/ @@ -182,7 +211,7 @@ lemma norm_one_convex : ConvexOn ℝ univ (fun x : (EuclideanSpace ℝ (Fin n)) intro i _ simp calc - |a * x i + b * y i| ≤ |a * x i| + |b * y i| := by apply abs_add + |a * x i + b * y i| ≤ |a * x i| + |b * y i| := by exact abs_add_le _ _ _ = a * |x i| + b * |y i| := by rw [abs_mul, abs_mul, abs_of_nonneg anneg, abs_of_nonneg bnneg] @@ -216,7 +245,9 @@ theorem norm_one_proximal rw [prox_iff_subderiv_smul (fun x : (EuclideanSpace ℝ (Fin n)) => ‖x‖₁) norm_one_convex tμpos] rw [← mem_SubderivAt, HasSubgradientAt] intro y - simp; rw [← sum_add_distrib]; apply sum_le_sum + simp [real_inner_eq_dot (m := n), dotProduct] + rw [← Finset.sum_add_distrib] + apply sum_le_sum intro i _ let abs_subg := SubderivAt_abs (xm i) by_cases hxm : xm i = 0 @@ -228,9 +259,12 @@ theorem norm_one_proximal · simp [hx] at minpoint; exact minpoint calc μ⁻¹ * t⁻¹ * x i * y i ≤ μ⁻¹ * t⁻¹ * |x i * y i| := by - rw [mul_assoc _ (x i), mul_le_mul_left] - apply le_abs_self; rw [← mul_inv, inv_pos]; apply mul_pos - linarith [μpos]; linarith [tpos] + have hcoef : 0 ≤ μ⁻¹ * t⁻¹ := by + apply mul_nonneg + · exact inv_nonneg.2 (le_of_lt μpos) + · exact inv_nonneg.2 (le_of_lt tpos) + simpa [mul_assoc, mul_left_comm, mul_comm] using + (mul_le_mul_of_nonneg_left (le_abs_self (x i * y i)) hcoef) _ ≤ |y i| * μ⁻¹ * t⁻¹ * t * μ := by rw [abs_mul, ← mul_assoc, mul_comm, ← mul_assoc, ← mul_assoc, mul_assoc _ t] apply mul_le_mul_of_nonneg_left @@ -242,41 +276,55 @@ theorem norm_one_proximal linarith [μpos]; linarith [tpos] rw [eq_ite_iff, or_iff_right] at abs_subg rcases abs_subg with ⟨_, abs_subg⟩ - let sgnxm := sign (xm i) + let sgnxm : ℝ := (xm i).sign have aux : sgnxm ∈ SubderivAt abs (xm i) := by - rw [abs_subg]; simp + rw [abs_subg]; simp [sgnxm] rw [← mem_SubderivAt, HasSubgradientAt] at aux specialize aux (y i) - have aux2 : inner sgnxm (y i - xm i) = μ⁻¹ * t⁻¹ * (x i - xm i) * (y i - xm i) := by - simp [sgnxm]; left + have aux2 : inner ℝ sgnxm (y i - xm i) = μ⁻¹ * t⁻¹ * (x i - xm i) * (y i - xm i) := by + simp [sgnxm] rw [minpoint]; simp; rw [minpoint] at hxm; simp at hxm; push_neg at hxm rcases hxm with ⟨xiieq0, ieq⟩ have eq1 : max (|x i| - t * μ) 0 = |x i| - t * μ := by apply max_eq_left; linarith rw [eq1]; simp; nth_rw 3 [mul_sub] - rw [← sub_add, real_sign_mul_abs]; simp nth_rw 2 [mul_comm (sign (x i))] - rw [← mul_assoc _ (t * μ), ← mul_inv, mul_comm μ t, inv_mul_cancel₀, one_mul] by_cases hx : 0 < x i - · have eq2 : sign (sign (x i) * (|x i| - t * μ)) = 1 := by - apply Real.sign_of_pos; apply mul_pos - calc - 0 < 1 := by simp - 1 = sign (x i) := by - symm; apply Real.sign_of_pos hx + · have eq2 : (sign (x i) * (|x i| - t * μ)).sign = 1 := by + apply Real.sign_of_pos + apply mul_pos + · calc + 0 < 1 := by simp + 1 = sign (x i) := by + symm; exact Real.sign_of_pos hx linarith [ieq] - rw [eq2]; symm; apply Real.sign_of_pos hx + rw [eq2] + simp [Real.sign_of_pos hx, abs_of_pos hx] + field_simp [μpos.ne', tpos.ne'] + have hinner1 : inner ℝ 1 (y i - (x i - t * μ)) + = (y i - (x i - t * μ)) * (starRingEnd ℝ) 1 := by + exact RCLike.inner_apply 1 (y i - (x i - t * μ)) + rw [hinner1] + simp · have xneg : x i < 0 := by contrapose! xiieq0; linarith - have eq2 : sign (sign (x i) * (|x i| - t * μ)) = -1 := by - apply Real.sign_of_neg; apply mul_neg_of_neg_of_pos - calc - sign (x i) = -1 := by - apply Real.sign_of_neg xneg - _ < 0 := by linarith - linarith [ieq] - rw [eq2]; symm; apply Real.sign_of_neg xneg - linarith [μpos, tpos] + have eq2 : (sign (x i) * (|x i| - t * μ)).sign = -1 := by + apply Real.sign_of_neg + apply mul_neg_of_neg_of_pos + · calc + sign (x i) = -1 := by + exact Real.sign_of_neg xneg + _ < 0 := by linarith + · linarith [ieq] + rw [eq2] + simp [Real.sign_of_neg xneg, abs_of_neg xneg] + field_simp [μpos.ne', tpos.ne'] + have hinner : inner ℝ 1 (y i - (t * μ + x i)) + = (y i - (t * μ + x i)) * (starRingEnd ℝ) 1 := by + exact RCLike.inner_apply 1 (y i - (t * μ + x i)) + rw [hinner] + simp + abel_nf rw [aux2] at aux; linarith [aux] push_neg; intro hxm'; contrapose! hxm'; exact hxm @@ -294,16 +342,19 @@ variable {n m : ℕ+} local notation "‖" x "‖₂" => @Norm.norm (EuclideanSpace ℝ (Fin m)) (PiLp.instNorm 2 fun _ ↦ ℝ) x local notation "‖" x "‖₁" => (Finset.sum Finset.univ (fun (i : Fin n) => ‖x i‖)) +instance : CoeTC ((Fin m) → ℝ) (EuclideanSpace ℝ (Fin m)) := ⟨WithLp.toLp 2⟩ +instance : CoeTC ((Fin n) → ℝ) (EuclideanSpace ℝ (Fin n)) := ⟨WithLp.toLp 2⟩ + open Set Real Matrix Finset NNReal structure LASSO (A : Matrix (Fin m) (Fin n) ℝ) (b : (Fin m) → ℝ) (μ : ℝ) (μpos : 0 < μ) (Ane0 : A ≠ 0) - (x₀ : (EuclideanSpace ℝ (Fin n))) := + (x₀ : (EuclideanSpace ℝ (Fin n))) where (f h : (EuclideanSpace ℝ (Fin n)) → ℝ) (f' : (EuclideanSpace ℝ (Fin n)) → (EuclideanSpace ℝ (Fin n))) (L : ℝ≥0) (t : ℝ) (xm : (EuclideanSpace ℝ (Fin n))) (x y : ℕ → (EuclideanSpace ℝ (Fin n))) (feq : f = fun x : (EuclideanSpace ℝ (Fin n)) => (1 / 2) * ‖A *ᵥ x - b‖₂ ^ 2) - (f'eq : f' = fun x : (EuclideanSpace ℝ (Fin n)) => (Aᵀ *ᵥ (A *ᵥ x - b))) + (f'eq : f' = fun x : (EuclideanSpace ℝ (Fin n)) => ((Aᵀ *ᵥ (A *ᵥ x - b)) : EuclideanSpace ℝ (Fin n))) (heq : h = fun y => μ • ‖y‖₁) (teq : t = 1 / L) (Leq : L = ‖(Matrix.toEuclideanLin ≪≫ₗ LinearMap.toContinuousLinearMap) (Aᵀ * A)‖₊) (minphi : IsMinOn (f + h) Set.univ xm) @@ -328,11 +379,33 @@ instance {A : Matrix (Fin m) (Fin n) ℝ} {b : (Fin m) → ℝ} {μ : ℝ} {μpo exact (fun x => affine_sq_gradient x) h₂ : LipschitzWith p.L p.f' := by rw [lipschitzWith_iff_norm_sub_le]; intro x y - rw [p.f'eq]; simp - rw [← Matrix.mulVec_sub, ← sub_add, sub_add_eq_add_sub, sub_add_cancel] - rw [← Matrix.mulVec_sub] - rw [p.Leq]; simp - apply Matrix.l2_opNorm_mulVec (Aᵀ * A) + rw [p.f'eq, p.Leq] + have hsub : + Aᵀ *ᵥ (A *ᵥ x.ofLp - b) - Aᵀ *ᵥ (A *ᵥ y.ofLp - b) + = (Aᵀ * A) *ᵥ (x.ofLp - y.ofLp) := by + have hinner : A *ᵥ x.ofLp - b - (A *ᵥ y.ofLp - b) = A *ᵥ x.ofLp - A *ᵥ y.ofLp := by + abel_nf + calc + Aᵀ *ᵥ (A *ᵥ x.ofLp - b) - Aᵀ *ᵥ (A *ᵥ y.ofLp - b) + = Aᵀ *ᵥ (A *ᵥ x.ofLp - b - (A *ᵥ y.ofLp - b)) := by + symm + exact Matrix.mulVec_sub Aᵀ (A *ᵥ x.ofLp - b) (A *ᵥ y.ofLp - b) + _ = Aᵀ *ᵥ (A *ᵥ x.ofLp - A *ᵥ y.ofLp) := by rw [hinner] + _ = (Aᵀ * A) *ᵥ (x.ofLp - y.ofLp) := by rw [← Matrix.mulVec_sub, mulVec_mulVec] + have hleft : + ‖(fun z : EuclideanSpace ℝ (Fin n) => WithLp.toLp 2 (Aᵀ *ᵥ (A *ᵥ z.ofLp - b))) x + - (fun z : EuclideanSpace ℝ (Fin n) => WithLp.toLp 2 (Aᵀ *ᵥ (A *ᵥ z.ofLp - b))) y‖ + = ‖WithLp.toLp 2 ((Aᵀ * A) *ᵥ (x.ofLp - y.ofLp))‖ := by + calc + ‖(fun z : EuclideanSpace ℝ (Fin n) => WithLp.toLp 2 (Aᵀ *ᵥ (A *ᵥ z.ofLp - b))) x + - (fun z : EuclideanSpace ℝ (Fin n) => WithLp.toLp 2 (Aᵀ *ᵥ (A *ᵥ z.ofLp - b))) y‖ + = ‖WithLp.toLp 2 (Aᵀ *ᵥ (A *ᵥ x.ofLp - b) - Aᵀ *ᵥ (A *ᵥ y.ofLp - b))‖ := by + simp [sub_eq_add_neg] + _ = ‖WithLp.toLp 2 ((Aᵀ * A) *ᵥ (x.ofLp - y.ofLp))‖ := by simp [hsub] + rw [hleft] + have hmul := Matrix.l2_opNorm_mulVec (Aᵀ * A) (x - y) + rw [Matrix.l2_opNorm_def (A := Aᵀ * A)] at hmul + simpa [sub_eq_add_neg] using hmul h₃ : ContinuousOn p.h univ := by rw [ContinuousOn] intro x _ @@ -364,40 +437,49 @@ instance {A : Matrix (Fin m) (Fin n) ℝ} {b : (Fin m) → ℝ} {μ : ℝ} {μpo calc |μ| * |Finset.sum Finset.univ fun i ↦ (|y i| - |x i|)| ≤ |μ| * Finset.sum Finset.univ fun i ↦ |(|y i| - |x i|)| := by - rw [mul_le_mul_left]; apply Finset.abs_sum_le_sum_abs - simp; linarith [μpos] + exact mul_le_mul_of_nonneg_left + (Finset.abs_sum_le_sum_abs (s := Finset.univ) (f := fun i : Fin n ↦ (|y i| - |x i|))) + (abs_nonneg _) _ ≤ |μ| * (n * (ε / n / μ)) := by - rw [mul_le_mul_left] + refine mul_le_mul_of_nonneg_left ?_ (abs_nonneg _) calc (Finset.sum Finset.univ fun i ↦ |(|y i| - |x i|)|) ≤ (Finset.sum Finset.univ (fun _ ↦ (ε / n / μ))) := by apply Finset.sum_le_sum exact fun i _ => le i _ = (n * (ε / n / μ)) := by simp - simp; linarith [μpos] _ = ε := by - field_simp; rw [mul_comm, ← mul_assoc, mul_comm ε] - simp; left; linarith + rw [abs_of_pos μpos] + field_simp [μpos.ne'] minphi : IsMinOn (p.f + p.h) Set.univ p.xm := p.minphi tpos : 0 < p.t := by rw [p.teq]; simp rw [p.Leq]; simp - rw [Transpose_mul_self_eq_zero] - exact Ane0 + intro h + have hAtA : Aᵀ * A = 0 := by + apply (Matrix.toLpLin (p := (2 : ENNReal)) (q := (2 : ENNReal))).injective + simpa [Matrix.toLpLin_mul_same] using h + exact Ane0 ((Transpose_mul_self_eq_zero).1 hAtA) step : p.t ≤ 1 / p.L := by rw [p.teq] ori : p.x 0 = x₀ := p.ori hL : p.L > (0 : ℝ) := by rw [p.Leq]; simp - rw [Transpose_mul_self_eq_zero] - exact Ane0 + intro h + have hAtA : Aᵀ * A = 0 := by + apply (Matrix.toLpLin (p := (2 : ENNReal)) (q := (2 : ENNReal))).injective + simpa [Matrix.toLpLin_mul_same] using h + exact Ane0 ((Transpose_mul_self_eq_zero).1 hAtA) update : ∀ (k : ℕ), prox_prop (p.t • p.h) (p.x k - p.t • p.f' (p.x k)) (p.x (k + 1)) := by intro k apply norm_one_proximal · rw [p.heq] · rw [p.teq]; simp rw [p.Leq]; simp - rw [Transpose_mul_self_eq_zero] - exact Ane0 + intro h + have hAtA : Aᵀ * A = 0 := by + apply (Matrix.toLpLin (p := (2 : ENNReal)) (q := (2 : ENNReal))).injective + simpa [Matrix.toLpLin_mul_same] using h + exact Ane0 ((Transpose_mul_self_eq_zero).1 hAtA) · linarith · intro i; rw [p.update2 k, p.update1 k] diff --git a/Optlib/Algorithm/Nesterov/NesterovAccelerationFirst.lean b/Optlib/Algorithm/Nesterov/NesterovAccelerationFirst.lean index 794055e..7364fb0 100644 --- a/Optlib/Algorithm/Nesterov/NesterovAccelerationFirst.lean +++ b/Optlib/Algorithm/Nesterov/NesterovAccelerationFirst.lean @@ -25,7 +25,7 @@ variable {f h : E → ℝ} {f' : E → E} {x0 : E} open Set Real -class Nesterov_first (f h: E → ℝ) (f' : E → E) (x0 : E) := +class Nesterov_first (f h: E → ℝ) (f' : E → E) (x0 : E) where (l : NNReal) (x y : ℕ → E) (t γ : ℕ → ℝ) (hl : l > (0 : ℝ)) (h₁ : ∀ x : E, HasGradientAt f (f' x) x) (convf : ConvexOn ℝ univ f) (h₂ : LipschitzWith l f') (convh : ConvexOn ℝ univ h) @@ -66,8 +66,9 @@ theorem Nesterov_first_converge (minφ : IsMinOn (f + h) univ xm) : linarith [(alg.tbound k).1] _ ≤ (1 / alg.t k) * (alg.t k * h z - ⟪alg.y k - alg.x (k + 1) - (alg.t k) • (f' (alg.y k)), z - alg.x (k + 1)⟫) := by - rw [mul_le_mul_left]; apply add_le_add_right; exact hieq1 z k - simp; linarith [(alg.tbound k).1] + apply mul_le_mul_of_nonneg_left + · exact sub_le_sub_right (hieq1 z k) _ + · exact le_of_lt (one_div_pos.2 (alg.tbound k).1) _ = h z + ⟪(f' (alg.y k)) + (1 / alg.t k) • (alg.x (k + 1) - alg.y k), z - alg.x (k + 1)⟫ := by rw [sub_eq_add_neg, ← inner_neg_left, mul_add, ← mul_assoc, one_div_mul_cancel] @@ -82,10 +83,16 @@ theorem Nesterov_first_converge (minφ : IsMinOn (f + h) univ xm) : + alg.l / 2 * ‖x - y‖ ^ 2 := by apply lipschitz_continuos_upper_bound' alg.h₁ alg.h₂ y x _ ≤ f y + ⟪f' y, x - y⟫ + 1 / (2 * alg.t k) * ‖x - y‖ ^ 2 := by - apply add_le_add_left; apply mul_le_mul_of_nonneg_right - rw [← mul_one_div, ← one_div_mul_one_div, mul_comm, mul_le_mul_left] - rw [le_one_div]; exact (alg.tbound k).2; exact alg.hl; exact (alg.tbound k).1 - simp; apply sq_nonneg + have hmul : alg.l / 2 * ‖x - y‖ ^ 2 ≤ 1 / (2 * alg.t k) * ‖x - y‖ ^ 2 := by + have hcoef : (alg.l : ℝ) ≤ 1 / alg.t k := by + have h := one_div_le_one_div_of_le (alg.tbound k).1 (alg.tbound k).2 + simpa [one_div, inv_inv] using h + have hhalf : (1 / 2 : ℝ) * (alg.l : ℝ) ≤ (1 / 2 : ℝ) * (1 / alg.t k) := + mul_le_mul_of_nonneg_left hcoef (by norm_num) + apply mul_le_mul_of_nonneg_right + · simpa [div_eq_mul_inv, mul_assoc, mul_left_comm, mul_comm] using hhalf + · exact sq_nonneg ‖x - y‖ + linarith [hmul] let φ := fun z : E ↦ f z + h z have φieq2 : ∀ z : E, ∀ k : ℕ, φ (alg.x (k + 1)) ≤ φ z + (1 / alg.t k) * ⟪alg.x (k + 1) - alg.y k, z - alg.x (k + 1)⟫ @@ -94,11 +101,12 @@ theorem Nesterov_first_converge (minφ : IsMinOn (f + h) univ xm) : calc _ ≤ f (alg.y k) + ⟪f' (alg.y k), alg.x (k + 1) - alg.y k⟫ + 1 / (2 * alg.t k) * ‖alg.x (k + 1) - alg.y k‖ ^ 2 + h (alg.x (k + 1)) := by - apply add_le_add_right; exact fieq1 k (alg.x (k + 1)) (alg.y k) + simpa [φ, add_assoc, add_left_comm, add_comm] using + add_le_add_right (fieq1 k (alg.x (k + 1)) (alg.y k)) (h (alg.x (k + 1))) _ ≤ f (alg.y k) + ⟪f' (alg.y k), alg.x (k + 1) - alg.y k⟫ + 1 / (2 * alg.t k) * ‖alg.x (k + 1) - alg.y k‖ ^ 2 + h z + ⟪(f' (alg.y k)) + (1 / alg.t k) • (alg.x (k + 1) - alg.y k), z - alg.x (k + 1)⟫ := by - rw [add_assoc _ (h z)]; apply add_le_add_left; exact hieq2 z k + rw [add_assoc _ (h z)]; linarith [hieq2 z k] _ = f (alg.y k) + ⟪f' (alg.y k), z - alg.y k⟫ + (1 / alg.t k) * ⟪alg.x (k + 1) - alg.y k, z - alg.x (k + 1)⟫ + 1 / (2 * alg.t k) * ‖alg.x (k + 1) - alg.y k‖ ^ 2 + h z := by @@ -108,10 +116,12 @@ theorem Nesterov_first_converge (minφ : IsMinOn (f + h) univ xm) : _ ≤ φ z + (1 / alg.t k) * ⟪alg.x (k + 1) - alg.y k, z - alg.x (k + 1)⟫ + (1 / (2 * alg.t k)) * ‖alg.x (k + 1) - alg.y k‖ ^ 2 := by - rw [add_comm _ (h z)]; repeat rw [← add_assoc]; repeat apply add_le_add_right - simp [φ]; rw [add_comm _ (h z), add_assoc]; apply add_le_add_left - apply Convex_first_order_condition' - exact alg.h₁ (alg.y k); exact alg.convf; repeat simp + rw [add_comm _ (h z)]; repeat rw [← add_assoc] + simp [φ]; rw [add_comm _ (h z), add_assoc] + have hcv : f (alg.y k) + ⟪f' (alg.y k), z - alg.y k⟫ ≤ f z := by + apply Convex_first_order_condition' + exact alg.h₁ (alg.y k); exact alg.convf; repeat simp + linarith [hcv] have φieq3 (k : ℕ) : φ (alg.x (k + 1)) - φ xm - (1 - alg.γ k) * (φ (alg.x k) - φ xm) ≤ 1 / (alg.t k) * ⟪alg.x (k + 1) - alg.y k, (1 - alg.γ k) • (alg.x k) + (alg.γ k) • xm - alg.x (k + 1)⟫ + @@ -137,7 +147,7 @@ theorem Nesterov_first_converge (minφ : IsMinOn (f + h) univ xm) : (1 - alg.γ k) * ((1 / alg.t k) • ⟪alg.x (k + 1) - alg.y k, alg.x k - alg.x (k + 1)⟫ + 1 / (2 * alg.t k) * ‖alg.x (k + 1) - alg.y k‖ ^ 2) := by apply add_le_add - · rw [mul_le_mul_left]; exact ieq2; linarith [(alg.γbound k).1] + · exact mul_le_mul_of_nonneg_left ieq2 (le_of_lt (alg.γbound k).1) · apply mul_le_mul_of_nonneg_left; exact ieq1; linarith [(alg.γbound k).2] _ = (alg.γ k) * (1 / alg.t k) * ⟪alg.x (k + 1) - alg.y k, xm - alg.x (k + 1)⟫ + (1 - alg.γ k) * (1 / alg.t k) * ⟪alg.x (k + 1) - alg.y k, alg.x k - alg.x (k + 1)⟫ + @@ -155,7 +165,7 @@ theorem Nesterov_first_converge (minφ : IsMinOn (f + h) univ xm) : let v := fun k : ℕ+ ↦ alg.x (k - 1) + (1 / (alg.γ (k - 1))) • (alg.x k - alg.x (k - 1)) have eq : ∀ k : ℕ+, alg.y k = (1 - alg.γ k) • alg.x k + (alg.γ k) • (v k) := by intro k - simp [φ, v]; rw [alg.update1 k, sub_smul, sub_add_eq_add_sub, ← smul_add, ← add_sub, one_smul] + simp [v]; rw [alg.update1 k, sub_smul, sub_add_eq_add_sub, ← smul_add, ← add_sub, one_smul] rw [add_left_cancel_iff, ← smul_sub, mul_div_assoc, ← smul_eq_mul, smul_assoc] have h2 : ((1 - alg.γ (k - 1)) / alg.γ (k - 1)) • (alg.x k - alg.x (k - 1)) = alg.x (k - 1) + (alg.γ (k - 1))⁻¹ • (alg.x k - alg.x (k - 1)) - alg.x k := by @@ -210,13 +220,18 @@ theorem Nesterov_first_converge (minφ : IsMinOn (f + h) univ xm) : rw [sq_pos_iff]; linarith [(alg.γbound n).1] have cond' : ∀ n : ℕ+, (1 - alg.γ n) * α n ≤ α (n - 1) := by intro n - let cond := alg.cond n - simp [α] - rw [mul_div_assoc, mul_div_assoc, ← mul_assoc, mul_comm _ 2, mul_assoc, mul_le_mul_left] - rw [← mul_div_assoc]; exact cond; simp + have hmul : + 2 * ((1 - alg.γ n) * alg.t n / alg.γ n ^ 2) ≤ + 2 * (alg.t (n - 1) / alg.γ (n - 1) ^ 2) := + mul_le_mul_of_nonneg_left (alg.cond n) (show (0 : ℝ) ≤ 2 by norm_num) + simpa [α, mul_assoc, mul_left_comm, mul_comm, mul_div_assoc, div_eq_mul_inv] using hmul have h10 (n : ℕ) : α n * (alg.γ n ^ (2 : ℕ) / (2 * alg.t n)) = 1 := by - field_simp [α]; rw [mul_comm, div_self]; apply mul_ne_zero - simp; linarith [(alg.γbound n).1]; linarith [alg.tbound n] + have hγ : alg.γ n ≠ 0 := by linarith [(alg.γbound n).1] + have ht : alg.t n ≠ 0 := by linarith [alg.tbound n] + calc + α n * (alg.γ n ^ (2 : ℕ) / (2 * alg.t n)) + = ((2 * alg.t n) / alg.γ n ^ (2 : ℕ)) * (alg.γ n ^ (2 : ℕ) / (2 * alg.t n)) := by rfl + _ = 1 := by field_simp [hγ, ht] have decrease (n : ℕ+) : (α n) * (φ (alg.x (n + 1)) - φ xm) + ‖v (n + 1) - xm‖ ^ 2 ≤ (α (n - 1)) * (φ (alg.x n) - φ xm) + ‖v n - xm‖ ^ 2 := by calc @@ -227,20 +242,27 @@ theorem Nesterov_first_converge (minφ : IsMinOn (f + h) univ xm) : _ ≤ (α n) * ((alg.γ n) ^ 2 / (2 * alg.t n) * (‖v n - xm‖ ^ 2 - ‖v (n + 1) - xm‖ ^ 2)) + (α n) * ((1 - alg.γ n) * (φ (alg.x n) - φ xm)) + ‖v (n + 1) - xm‖ ^ 2 := by - rw [mul_add]; repeat apply add_le_add_right - rw [mul_le_mul_left]; exact φieq4 n; exact αpos n + rw [mul_add] + have hscaled : α n * (φ (alg.x (n + 1)) - φ xm - (1 - alg.γ n) * (φ (alg.x n) - φ xm)) + ≤ α n * ((alg.γ n) ^ 2 / (2 * alg.t n) * (‖v n - xm‖ ^ 2 - ‖v (n + 1) - xm‖ ^ 2)) := + mul_le_mul_of_nonneg_left (φieq4 n) (le_of_lt (αpos n)) + linarith [hscaled] _ = ‖v n - xm‖ ^ 2 - ‖v (n + 1) - xm‖ ^ 2 + (α n) * ((1 - alg.γ n) * (φ (alg.x n) - φ xm)) + ‖v (n + 1) - xm‖ ^ 2 := by rw [← mul_assoc, h10, one_mul] _ ≤ ‖v n - xm‖ ^ 2 - ‖v (n + 1) - xm‖ ^ 2 + (α (n - 1)) * (φ (alg.x n) - φ xm) + ‖v (n + 1) - xm‖ ^ 2 := by - apply add_le_add_right; apply add_le_add_left - rw [← mul_assoc]; apply mul_le_mul_of_nonneg_right; rw [mul_comm] - exact cond' n - simp rw [isMinOn_iff] at minφ specialize minφ (alg.x n) - simp at minφ; exact minφ + simp at minφ + have hφ : 0 ≤ φ (alg.x n) - φ xm := by + simp [φ] + linarith [minφ] + have hmul : α n * ((1 - alg.γ n) * (φ (alg.x n) - φ xm)) + ≤ α (n - 1) * (φ (alg.x n) - φ xm) := by + have hmul' := mul_le_mul_of_nonneg_right (cond' n) hφ + simpa [mul_assoc, mul_left_comm, mul_comm] using hmul' + linarith [hmul] _ = (α (n - 1)) * (φ (alg.x n) - φ xm) + ‖v n - xm‖ ^ 2 := by rw [add_comm _ (α (↑n - 1) * (φ (alg.x ↑n) - φ xm)), add_assoc, sub_add_cancel] let nr := fun n : ℕ ↦ α n * (φ (alg.x (n + 1)) - φ xm) @@ -257,31 +279,50 @@ theorem Nesterov_first_converge (minφ : IsMinOn (f + h) univ xm) : rw [sub_nonpos] at bound calc f (alg.x (k + 1)) + h (alg.x (k + 1)) - f xm - h xm = + φ (alg.x (k + 1)) - φ xm := by + simp [φ, sub_eq_add_neg, add_assoc, add_left_comm, add_comm] + _ = alg.γ k ^ 2 / (2 * alg.t k) * ((α k) * (φ (alg.x (↑k + 1))- φ xm)) := by - rw [sub_sub, ← mul_assoc, mul_comm _ (α k), h10 k]; simp + have h10' : alg.γ k ^ 2 / (2 * alg.t k) * α k = 1 := by + simpa [mul_assoc, mul_left_comm, mul_comm] using h10 k + calc + φ (alg.x (k + 1)) - φ xm = (alg.γ k ^ 2 / (2 * alg.t k) * α k) * (φ (alg.x (k + 1)) - φ xm) := by + rw [h10']; ring + _ = alg.γ k ^ 2 / (2 * alg.t k) * (α k * (φ (alg.x (k + 1)) - φ xm)) := by ring _ ≤ alg.γ k ^ 2 / (2 * alg.t k) * nr k := by - rw [mul_le_mul_left]; simp [nr]; apply div_pos - rw [sq_pos_iff]; linarith [(alg.γbound k).1]; linarith [alg.tbound k] + apply mul_le_mul_of_nonneg_left + · simp [nr] + · apply le_of_lt; apply div_pos + · rw [sq_pos_iff]; linarith [(alg.γbound k).1] + · linarith [alg.tbound k] _ ≤ alg.γ k ^ 2 / (2 * alg.t k) * nr 0 := by - rw [mul_le_mul_left]; exact bound; apply div_pos - rw [sq_pos_iff]; linarith [(alg.γbound k).1]; linarith [alg.tbound k] + apply mul_le_mul_of_nonneg_left bound + apply le_of_lt; apply div_pos + · rw [sq_pos_iff]; linarith [(alg.γbound k).1] + · linarith [alg.tbound k] _ ≤ alg.γ k ^ 2 / (2 * alg.t k) * ‖x0 - xm‖ ^ 2 := by - rw [mul_le_mul_left]; simp [nr, v, α]; rw [alg.oriγ]; simp - specialize φieq3 0; rw [alg.oriγ] at φieq3; simp at φieq3 - calc - 2 * alg.t 0 * (φ (alg.x 1) - φ xm) + ‖alg.x 1 - xm‖ ^ 2 ≤ 2 * alg.t 0 - * ((alg.t 0)⁻¹ * ⟪alg.x 1 - alg.y 0, xm - alg.x 1⟫ - + (alg.t 0)⁻¹ * 2⁻¹ * ‖alg.x 1 - alg.y 0‖ ^ 2 + φ xm - φ xm) - + ‖alg.x 1 - xm‖ ^ 2 := by - apply add_le_add_right; rw [mul_le_mul_left]; simp; linarith [φieq3] - linarith [alg.tbound 0] - _ = ‖alg.x 0 - xm‖ ^ 2 := by - rw [← add_sub, sub_self, add_zero, mul_add, ← mul_assoc]; ring_nf - rw [mul_inv_cancel₀, one_mul, one_mul, alg.oriy, norm_sub_rev (alg.x 1) xm] - rw [add_comm (⟪alg.x 1 - alg.x 0, xm - alg.x 1⟫ * 2), mul_comm, ← norm_add_sq_real] - simp; rw [norm_sub_rev]; linarith [alg.tbound 0] - rw [alg.initial]; apply div_pos; rw [sq_pos_iff] - linarith [(alg.γbound k).1]; linarith [alg.tbound k] + apply mul_le_mul_of_nonneg_left + · simp [nr, v, α]; rw [alg.oriγ]; simp + specialize φieq3 0; rw [alg.oriγ] at φieq3; simp at φieq3 + calc + 2 * alg.t 0 * (φ (alg.x 1) - φ xm) + ‖alg.x 1 - xm‖ ^ 2 ≤ 2 * alg.t 0 + * ((alg.t 0)⁻¹ * ⟪alg.x 1 - alg.y 0, xm - alg.x 1⟫ + + (alg.t 0)⁻¹ * 2⁻¹ * ‖alg.x 1 - alg.y 0‖ ^ 2 + φ xm - φ xm) + + ‖alg.x 1 - xm‖ ^ 2 := by + have hmul : 2 * alg.t 0 * (φ (alg.x 1) - φ xm) ≤ 2 * alg.t 0 + * ((alg.t 0)⁻¹ * ⟪alg.x 1 - alg.y 0, xm - alg.x 1⟫ + + (alg.t 0)⁻¹ * 2⁻¹ * ‖alg.x 1 - alg.y 0‖ ^ 2 + φ xm - φ xm) := + mul_le_mul_of_nonneg_left (by linarith [φieq3]) (by linarith [alg.tbound 0]) + linarith [hmul] + _ = ‖alg.x 0 - xm‖ ^ 2 := by + rw [← add_sub, sub_self, add_zero, mul_add, ← mul_assoc]; ring_nf + rw [mul_inv_cancel₀, one_mul, one_mul, alg.oriy, norm_sub_rev (alg.x 1) xm] + rw [add_comm (⟪alg.x 1 - alg.x 0, xm - alg.x 1⟫ * 2), mul_comm, ← norm_add_sq_real] + simp; rw [norm_sub_rev]; linarith [alg.tbound 0] + rw [alg.initial] + · apply le_of_lt; apply div_pos; rw [sq_pos_iff] + · linarith [(alg.γbound k).1] + · linarith [alg.tbound k] end Nesterov_first @@ -292,7 +333,7 @@ variable {f h : E → ℝ} {f' : E → E} {x0 : E} open Set Real PNat -class Nesterov_first_fix_stepsize (f h: E → ℝ) (f' : E → E) (x0 : E) := +class Nesterov_first_fix_stepsize (f h: E → ℝ) (f' : E → E) (x0 : E) where (l : NNReal) (hl : l > (0 : ℝ)) (h₁ : ∀ x : E, HasGradientAt f (f' x) x) (convf: ConvexOn ℝ univ f) (h₂ : LipschitzWith l f') (convh : ConvexOn ℝ univ h) @@ -313,9 +354,14 @@ instance {f h: E → ℝ} {f' : E → E} {x0 : E} [p : Nesterov_first_fix_stepsi oriγ := by simp [p.γeq 0] initial := p.initial cond := by - intro n; simp [p.teq n, p.teq (n - 1), p.γeq n, p.γeq (n - 1)]; field_simp - rw [mul_assoc, ← div_div, div_le_div_right, pow_two, ← mul_assoc, mul_div_assoc] - rw [div_self, add_sub]; ring_nf; simp; linarith; linarith [p.hl] + intro n + rw [p.teq n, p.teq (n - 1), p.γeq n, p.γeq (n - 1)] + field_simp [p.hl] + have hn1 : (1 : ℕ) ≤ n := PNat.one_le n + have hcast : (((n : ℕ) - 1 : ℕ) : ℝ) = (n : ℝ) - 1 := by + norm_num [Nat.cast_sub hn1] + rw [hcast] + nlinarith [sq_nonneg ((n : ℝ) + 1)] tbound := by intro k; rw [p.teq k]; simp; exact p.hl hl := p.hl @@ -350,9 +396,8 @@ theorem Nesterov_first_fix_stepsize_converge (minφ : IsMinOn (f + h) univ xm): ‖x0 - xm‖ ^ 2 := rfl rw [h1, h2]; apply Nesterov_first_converge minφ _ ≤ 2 * alg.l / (k + 2) ^ 2 * ‖x0 - xm‖ ^ 2 := by - apply mul_le_mul_of_nonneg_right; rw [alg.γeq k, alg.teq k]; field_simp - rw [pow_two, add_comm]; rw [mul_comm ((k + 2 : ℝ) ^ 2), ← div_div, div_le_div_right] - rw [mul_rotate, ← mul_div, div_self, mul_one] - simp; field_simp; apply sq_nonneg + apply mul_le_mul_of_nonneg_right _ (sq_nonneg _) + rw [alg.γeq k, alg.teq k]; field_simp + nlinarith end Nesterov_first_fix_stepsize diff --git a/Optlib/Algorithm/Nesterov/NesterovAccelerationSecond.lean b/Optlib/Algorithm/Nesterov/NesterovAccelerationSecond.lean index 052e315..ecf6ada 100644 --- a/Optlib/Algorithm/Nesterov/NesterovAccelerationSecond.lean +++ b/Optlib/Algorithm/Nesterov/NesterovAccelerationSecond.lean @@ -4,6 +4,7 @@ Released under Apache 2.0 license as described in the file LICENSE. Authors: Yuxuan Wu, Chenyi Li -/ import Optlib.Function.Proximal +import Mathlib.Tactic /-! # NesterovAccelerationSecond @@ -26,7 +27,7 @@ variable {f h : E → ℝ} {f' : E → E} open Set Real -class Nesterov_second (f h : E → ℝ) (f' : E → E) (x0 : E) := +class Nesterov_second (f h : E → ℝ) (f' : E → E) (x0 : E) where (l : NNReal) (hl : l > (0 : ℝ)) (x y : ℕ → E) (z : ℕ+ → E) (t γ : ℕ → ℝ) (h₁ : ∀ x : E, HasGradientAt f (f' x) x) (convf: ConvexOn ℝ Set.univ f) (h₂ : LipschitzWith l f') (convh : ConvexOn ℝ univ h) @@ -45,7 +46,7 @@ theorem Nesterov_second_convergence (minφ : IsMinOn (f + h) Set.univ xm): ∀ (k : ℕ), f (alg.x (k + 1)) + h (alg.x (k + 1)) - f xm - h xm ≤ (alg.γ (k + 1)) ^ 2 / (2 * alg.t (k + 1)) * ‖x0 - xm‖ ^ 2 := by let φ := fun z : E ↦ f z + h z - have φdef : ∀ z : E, φ z = f z + h z := by simp + have φdef : ∀ z : E, φ z = f z + h z := by intro z; rfl have h1 : ∀ k : ℕ+, alg.γ k • (alg.y (k - 1) - alg.y k) - alg.t k • (f' (alg.z k)) ∈ (SubderivAt (alg.t k • h) (alg.y k)) := by intro k; obtain h1 := alg.update2 k @@ -89,7 +90,6 @@ theorem Nesterov_second_convergence (minφ : IsMinOn (f + h) Set.univ xm): constructor . linarith [(alg.γbound k).2] . contrapose eq1 - push_neg at * linarith [eq1] specialize fall mem1 mem2 pos ((alg.γbound k).1) (by linarith) rw [← (alg.update3 k)] at fall @@ -103,7 +103,7 @@ theorem Nesterov_second_convergence (minφ : IsMinOn (f + h) Set.univ xm): ⟪alg.γ k • (alg.y (k - 1) - alg.y k) - alg.t k • (f' (alg.z k)), w - (alg.y k)⟫ := by intro w k rw [← mul_div_right_comm, ← mul_div, ← mul_sub] - apply (mul_le_mul_right (bsc1 k)).mp + apply le_of_mul_le_mul_right _ (bsc1 k) rw [mul_comm, ← mul_assoc, div_mul, div_self, div_one] rw [mul_assoc] nth_rw 3 [mul_comm] @@ -138,13 +138,14 @@ theorem Nesterov_second_convergence (minφ : IsMinOn (f + h) Set.univ xm): simp symm apply nm0 - apply (mul_le_mul_right ax).mpr - have lc2 : alg.l / 2 > (0 : ℝ) := by linarith [alg.hl] - have tc2 : (2 * alg.t ↑k) > 0 := by linarith [(alg.tbound k).1] - rw [one_div] - apply (le_inv_comm₀ lc2 tc2).mpr - rw [← one_div, ← div_mul, mul_comm] - linarith [(alg.tbound k).2] + apply mul_le_mul_of_nonneg_right + · have lc2 : alg.l / 2 > (0 : ℝ) := by linarith [alg.hl] + have tc2 : (2 * alg.t ↑k) > 0 := by linarith [(alg.tbound k).1] + rw [one_div] + apply (le_inv_comm₀ lc2 tc2).mpr + rw [← one_div, ← div_mul, mul_comm] + linarith [(alg.tbound k).2] + · exact le_of_lt ax have hieq6 : ∀ k : ℕ+, f (alg.x k) ≤ f (alg.z k) + ⟪f' (alg.z k), (1 - alg.γ k) • alg.x (k - 1) + alg.γ k • alg.y k - alg.z k⟫ + ((alg.γ k) ^ 2 / (2 * alg.t k)) * ‖alg.y k - alg.y (k - 1)‖ ^ 2 := by intro k @@ -185,15 +186,14 @@ theorem Nesterov_second_convergence (minφ : IsMinOn (f + h) Set.univ xm): simp only [add_le_add_iff_right] by_cases eq1 : alg.γ k = 1 . simp [eq1] - . push_neg at eq1 + . have pos : 1 - alg.γ k > 0 := by apply lt_iff_le_and_ne.mpr constructor . linarith [(alg.γbound k).2] . contrapose eq1 - push_neg at * linarith [eq1] - apply (mul_le_mul_left pos).mpr + refine mul_le_mul_of_nonneg_left ?_ (le_of_lt pos) apply Convex_first_order_condition' (alg.h₁ (alg.z k)) alg.convf simp simp @@ -227,7 +227,7 @@ theorem Nesterov_second_convergence (minφ : IsMinOn (f + h) Set.univ xm): + alg.γ ↑k ^ 2 / (2 * alg.t ↑k) * ‖alg.y ↑k - alg.y (↑k - 1)‖ ^ 2 := by simp have gpos : alg.γ k > 0 := by exact (alg.γbound k).1 - apply (mul_le_mul_left gpos).mpr + refine mul_le_mul_of_nonneg_left ?_ (le_of_lt gpos) apply Convex_first_order_condition' (alg.h₁ (alg.z k)) alg.convf simp simp @@ -367,7 +367,7 @@ variable {f h : E → ℝ} {f' : E → E} {x0 : E} open Set Real PNat -class Nesterov_second_fix_stepsize (f h: E → ℝ) (f' : E → E) (x0 : E) := +class Nesterov_second_fix_stepsize (f h: E → ℝ) (f' : E → E) (x0 : E) where (l : NNReal) (hl : l > (0 : ℝ)) (x y : ℕ → E) (z : ℕ+ → E) (t γ : ℕ → ℝ) (h₁ : ∀ x : E, HasGradientAt f (f' x) x) (convf: ConvexOn ℝ Set.univ f) (h₂ : LipschitzWith l f') (convh : ConvexOn ℝ univ h) @@ -385,20 +385,18 @@ instance {f h : E → ℝ} {f' : E → E} {x0 : E} [p : Nesterov_second_fix_step convf := p.convf h₂ := p.h₂ convh := p.convh - x := p.x; y := p.y; t := p.t; γ := p.γ; + x := p.x; y := p.y; z := p.z; t := p.t; γ := p.γ; oriy := p.oriy oriγ := by simp [p.γeq 1]; norm_num initial := p.initial cond := by - intro n; rw [p.teq n, p.teq (n + 1), p.γeq n, p.γeq (n + 1)]; field_simp [p.hl] - rw [← div_div, ← div_div, ← div_div] - repeat apply div_le_div_of_nonneg_right _ (by positivity) - rw [pow_two, ← mul_assoc, mul_div_assoc, div_self, mul_one] - · calc - _ = n ^ 2 + (2 : ℝ) * n := by ring_nf - _ ≤ 1 + (2 : ℝ) * n + n ^ 2 := by linarith - _ = (1 + n) ^ 2 := by rw [add_pow_two]; simp - · linarith + intro n + have hn0 : (n : ℕ) ≠ 0 := Nat.ne_of_gt n.2 + have hn1 : ((n : ℕ) + 1) ≠ 0 := Nat.succ_ne_zero _ + rw [p.teq n, p.teq (n + 1), p.γeq n, p.γeq (n + 1)] + simp [hn0] + field_simp [p.hl] + nlinarith [show (0 : ℝ) < (n : ℝ) from by exact_mod_cast n.2] tbound := by intro k; rw [p.teq k]; simp; exact p.hl hl := p.hl @@ -409,7 +407,7 @@ instance {f h : E → ℝ} {f' : E → E} {x0 : E} [p : Nesterov_second_fix_step simp [hk]; positivity · by_cases hk : k = 0 rw [hk]; simp; norm_num; push_neg at hk - simp [hk]; rw [div_le_iff₀ (by positivity)]; simp [hk] + simp [hk]; rw [div_le_iff₀ (by positivity)]; simp have : (k : ℝ) ≥ 1 := by rw [← Nat.pos_iff_ne_zero, Nat.lt_iff_add_one_le, zero_add] at hk; simp [hk] linarith @@ -442,8 +440,14 @@ theorem Nesterov_second_fix_stepsize_converge (minφ : IsMinOn (f + h) Set.univ rw [h1, h2]; apply Nesterov_second_convergence minφ _ ≤ 2 * alg.l / (k + 2) ^ 2 * ‖x0 - xm‖ ^ 2 := by apply mul_le_mul_of_nonneg_right _ (sq_nonneg _) - rw [alg.γeq (k + 1), alg.teq (k + 1)]; field_simp - rw [pow_two, add_comm]; rw [add_assoc, one_add_one_eq_two, ← div_div] - apply le_of_eq; ring_nf + have hk1 : (k + 1 : ℕ) ≠ 0 := Nat.succ_ne_zero k + have hγ : alg.γ (k + 1) = (2 : ℝ) / (k + 2) := by + rw [alg.γeq (k + 1)] + simp [Nat.cast_add, Nat.cast_one] + ring_nf + rw [hγ, alg.teq (k + 1)] + field_simp [alg.hl] + ring_nf + norm_num end Nesterov_second_fix_stepsize diff --git a/Optlib/Algorithm/Nesterov/NesterovSmooth.lean b/Optlib/Algorithm/Nesterov/NesterovSmooth.lean index 5b3fda2..bdea52b 100644 --- a/Optlib/Algorithm/Nesterov/NesterovSmooth.lean +++ b/Optlib/Algorithm/Nesterov/NesterovSmooth.lean @@ -4,6 +4,7 @@ Released under Apache 2.0 license as described in the file LICENSE. Authors: Chenyi Li, Ziyu Wang, Zaiwen Wen -/ import Optlib.Function.Lsmooth +import Mathlib.Tactic /-! # NesterovSmooth @@ -21,7 +22,7 @@ section open Set -class Nesterov (f : E → ℝ) (f' : E → E) (γ : ℕ+ → ℝ) (initial_point : E) := +class Nesterov (f : E → ℝ) (f' : E → E) (γ : ℕ+ → ℝ) (initial_point : E) where (x : ℕ → E) (y : ℕ+ → E) (v : ℕ → E) (l : NNReal) (diff : ∀ x₁, HasGradientAt f (f' x₁) x₁) (update1 : ∀ (k : ℕ+), y k = (1 - γ k) • x (k - 1) + γ k • v (k - 1)) @@ -36,36 +37,35 @@ lemma one_iter (hfun : ConvexOn ℝ Set.univ f) (hg : ∀ (k : ℕ+), γ k = 2 / ∀ (k : ℕ+), f (alg.x k) - f xm - (1 - γ k) * (f (alg.x (k - 1)) - f xm) ≤ alg.l * (γ k) ^ 2 / 2 * (‖alg.v (k - 1) - xm‖ ^ 2 - ‖alg.v k - xm‖ ^ 2) := by have h2 : ∀ (k : ℕ+), ∀ x' : E , f (alg.x k) - f x' ≤ alg.l * - inner (alg.x k - alg.y k) (x' - alg.x k) + alg.l / 2 * ‖alg.x k - alg.y k‖ ^ 2 := by + inner ℝ (alg.x k - alg.y k) (x' - alg.x k) + alg.l / 2 * ‖alg.x k - alg.y k‖ ^ 2 := by intro k x' rw [sub_le_iff_le_add', ← add_assoc] have : (f' (alg.y k)) = alg.l.1 • (alg.y k - alg.x k) := by have update2 : ∀ (k : ℕ+), alg.x k = alg.y k - (1 / alg.l.1) • (f' (alg.y k)) := alg.update2 specialize update2 k - have : alg.l > 0 := alg.hl rw [eq_sub_iff_add_eq', ← eq_sub_iff_add_eq] at update2 rw [← update2, smul_smul] - field_simp - have t1 : f (alg.y k) + inner (f' (alg.y k)) (x' - alg.y k) ≤ f x' := by + simp [ne_of_gt alg.hl] + have t1 : f (alg.y k) + inner ℝ (f' (alg.y k)) (x' - alg.y k) ≤ f x' := by exact Convex_first_order_condition' (alg.diff (alg.y k)) hfun (by trivial) x' (by trivial) calc - _ ≤ f (alg.y k) + inner (f' (alg.y k)) (alg.x k - alg.y k) + + _ ≤ f (alg.y k) + inner ℝ (f' (alg.y k)) (alg.x k - alg.y k) + alg.l.1 / 2 * ‖alg.x k - alg.y k‖ ^ 2 := by exact lipschitz_continuos_upper_bound' alg.diff alg.smooth (alg.y k) (alg.x k) - _ = f (alg.y k) + inner (f' (alg.y k)) (x' - alg.y k + (alg.x k - x')) + + _ = f (alg.y k) + inner ℝ (f' (alg.y k)) (x' - alg.y k + (alg.x k - x')) + alg.l / 2 * ‖alg.x k - alg.y k‖ ^ 2 := by rw [add_comm (x' - alg.y k), add_sub (alg.x k - x'), sub_add, sub_self, sub_zero]; simp - _ = f (alg.y k) + inner (f' (alg.y k)) (x' - alg.y k) + inner (f' (alg.y k)) (alg.x k - x') + _ = f (alg.y k) + inner ℝ (f' (alg.y k)) (x' - alg.y k) + inner ℝ (f' (alg.y k)) (alg.x k - x') + alg.l / 2 * ‖alg.x k - alg.y k‖ ^ 2 := by rw [inner_add_right, ← add_assoc] - _ ≤ f x' + inner (f' (alg.y k)) (alg.x k - x') + alg.l / 2 * ‖alg.x k - alg.y k‖ ^ 2 := by + _ ≤ f x' + inner ℝ (f' (alg.y k)) (alg.x k - x') + alg.l / 2 * ‖alg.x k - alg.y k‖ ^ 2 := by rw [add_le_add_iff_right, add_le_add_iff_right]; exact t1 - _ = f x' + inner (alg.l.1 • (alg.y k - alg.x k)) (alg.x k - x') + + _ = f x' + inner ℝ (alg.l.1 • (alg.y k - alg.x k)) (alg.x k - x') + alg.l / 2 * ‖alg.x k - alg.y k‖ ^ 2 := by rw [this] - _ = f x' + alg.l * inner (alg.x k - alg.y k) (x' - alg.x k) + + _ = f x' + alg.l * inner ℝ (alg.x k - alg.y k) (x' - alg.x k) + alg.l / 2 * ‖alg.x k - alg.y k‖ ^ 2 := by rw [real_inner_smul_left, ← inner_neg_neg, neg_sub, neg_sub]; simp have h3 : ∀ (k : ℕ+), f (alg.x k) - f xm - (1 - γ k) * (f (alg.x (k - 1)) - f xm) ≤ - alg.l * (inner (alg.x k - alg.y k) ((1 - γ k) • (alg.x (k - 1)) + ((γ k) • xm) - + alg.l * (inner ℝ (alg.x k - alg.y k) ((1 - γ k) • (alg.x (k - 1)) + ((γ k) • xm) - alg.x k)) + alg.l / 2 * ‖alg.x k - alg.y k‖ ^ 2 := by intro k have : f (alg.x k) - f xm - (1 - γ k) * (f (alg.x (k - 1)) - f xm) = γ k * @@ -84,23 +84,23 @@ lemma one_iter (hfun : ConvexOn ℝ Set.univ f) (hg : ∀ (k : ℕ+), γ k = 2 / rw [smul_sub, smul_sub, add_sub, ← add_sub_right_comm, sub_sub, ← add_smul] ring_nf; rw [one_smul, add_comm] calc - _ ≤ γ k * (alg.l * (inner (alg.x k - alg.y k) (xm - alg.x k)) + alg.l / 2 * - ‖alg.x k - alg.y k‖ ^ 2) + (1 - γ k) * (alg.l * (inner (alg.x k - alg.y k) + _ ≤ γ k * (alg.l * (inner ℝ (alg.x k - alg.y k) (xm - alg.x k)) + alg.l / 2 * + ‖alg.x k - alg.y k‖ ^ 2) + (1 - γ k) * (alg.l * (inner ℝ (alg.x k - alg.y k) (alg.x (k - 1) - alg.x k)) + alg.l / 2 * ‖alg.x k - alg.y k‖ ^ 2) := by apply add_le_add · exact mul_le_mul_of_nonneg_left (h2 k xm) hz · exact mul_le_mul_of_nonneg_left (h2 k (alg.x (k - 1))) (by linarith) - _ = alg.l * (γ k * (inner (alg.x k - alg.y k) (xm - alg.x k))) + alg.l * ((1 - γ k) * - (inner (alg.x k - alg.y k) (alg.x (k - 1) - alg.x k))) + + _ = alg.l * (γ k * (inner ℝ (alg.x k - alg.y k) (xm - alg.x k))) + alg.l * ((1 - γ k) * + (inner ℝ (alg.x k - alg.y k) (alg.x (k - 1) - alg.x k))) + alg.l / 2 * ‖alg.x k - alg.y k‖ ^ 2 := by ring_nf - _ = alg.l * inner (alg.x k - alg.y k) (γ k • (xm - alg.x k)) + alg.l * - (inner (alg.x k - alg.y k) ((1 - γ k) • + _ = alg.l * inner ℝ (alg.x k - alg.y k) (γ k • (xm - alg.x k)) + alg.l * + (inner ℝ (alg.x k - alg.y k) ((1 - γ k) • (alg.x (k - 1) - alg.x k))) + alg.l / 2 * ‖alg.x k - alg.y k‖ ^ 2 := by rw [← inner_smul_right _ _ (γ k), ← inner_smul_right _ _ (1 - γ k)] - _ = alg.l * inner (alg.x k - alg.y k) (γ k • (xm - alg.x k) + (1 - γ k) • + _ = alg.l * inner ℝ (alg.x k - alg.y k) (γ k • (xm - alg.x k) + (1 - γ k) • (alg.x (k - 1) - alg.x k)) + alg.l / 2 * ‖alg.x k - alg.y k‖ ^ 2 := by rw [← mul_add, ← inner_add_right (alg.x k - alg.y k)] - _ = alg.l * inner (alg.x k - alg.y k) ((1 - γ k) • (alg.x (k - 1)) + + _ = alg.l * inner ℝ (alg.x k - alg.y k) ((1 - γ k) • (alg.x (k - 1)) + ((γ k) • xm)- alg.x k) + alg.l / 2 * ‖alg.x k - alg.y k‖ ^ 2 := by rw [this] intro k have hz : γ k ≥ (0 : ℝ) := by @@ -134,30 +134,30 @@ lemma one_iter (hfun : ConvexOn ℝ Set.univ f) (hg : ∀ (k : ℕ+), γ k = 2 / left; rw [mul_inv_cancel₀ (by linarith), one_smul, sub_smul, one_smul, add_comm, sub_add] have this2 : alg.l / 2 * (‖alg.y k - (1 - γ k) • (alg.x (k - 1)) - γ k • xm‖ ^ 2 - ‖alg.x k - (1 - γ k) • alg.x (k - 1) - γ k • xm‖ ^ 2) = alg.l * - (inner (alg.x k - alg.y k) ((1 - γ k) • (alg.x (k - 1)) + ((γ k) • xm)- alg.x k)) + (inner ℝ (alg.x k - alg.y k) ((1 - γ k) • (alg.x (k - 1)) + ((γ k) • xm)- alg.x k)) + alg.l / 2 * ‖alg.x k - alg.y k‖ ^ 2 := by rw [sub_sub, sub_sub, norm_sub_sq_real, norm_sub_sq_real, norm_sub_sq_real] calc - _ = alg.l / 2 * (‖alg.y k‖ ^ 2 - ‖alg.x k‖ ^ 2) + alg.l / 2 * 2 * (inner (alg.x k) - ((1 - γ k) • alg.x (↑k - 1) + γ k • xm) - inner (alg.y k) + _ = alg.l / 2 * (‖alg.y k‖ ^ 2 - ‖alg.x k‖ ^ 2) + alg.l / 2 * 2 * (inner ℝ (alg.x k) + ((1 - γ k) • alg.x (↑k - 1) + γ k • xm) - inner ℝ (alg.y k) ((1 - γ k) • alg.x (↑k - 1) + γ k • xm)) := by ring_nf - _ = alg.l / 2 * (‖alg.y k‖ ^ 2 - ‖alg.x k‖ ^ 2) + alg.l * inner (alg.x k - alg.y k) + _ = alg.l / 2 * (‖alg.y k‖ ^ 2 - ‖alg.x k‖ ^ 2) + alg.l * inner ℝ (alg.x k - alg.y k) ((1 - γ k) • alg.x (↑k - 1) + γ k • xm) := by rw [← inner_sub_left]; ring_nf - _ = alg.l / 2 * (‖alg.y k‖ ^ 2 - ‖alg.x k‖ ^ 2) + alg.l * inner (alg.x k - alg.y k) + _ = alg.l / 2 * (‖alg.y k‖ ^ 2 - ‖alg.x k‖ ^ 2) + alg.l * inner ℝ (alg.x k - alg.y k) ((1 - γ k) • (alg.x (k - 1)) + ((γ k) • xm) - alg.x k + alg.x k) := by rw [sub_add, sub_self, sub_zero] - _ = alg.l / 2 * (‖alg.y k‖ ^ 2 - ‖alg.x k‖ ^ 2) + alg.l * inner (alg.x k - alg.y k) - (alg.x k) + alg.l * (inner (alg.x k - alg.y k) ((1 - γ k) • (alg.x (k - 1)) + _ = alg.l / 2 * (‖alg.y k‖ ^ 2 - ‖alg.x k‖ ^ 2) + alg.l * inner ℝ (alg.x k - alg.y k) + (alg.x k) + alg.l * (inner ℝ (alg.x k - alg.y k) ((1 - γ k) • (alg.x (k - 1)) + ((γ k) • xm) - alg.x k)) := by rw [inner_add_right, mul_add]; ring_nf _ = alg.l / 2 * (‖alg.y k‖ ^ 2 - ‖alg.x k‖ ^ 2) + alg.l * ‖alg.x k‖ ^ 2 - - alg.l * inner (alg.x k) (alg.y k) + alg.l * (inner (alg.x k - alg.y k) ((1 - γ k) + alg.l * inner ℝ (alg.x k) (alg.y k) + alg.l * (inner ℝ (alg.x k - alg.y k) ((1 - γ k) • (alg.x (k - 1)) + ((γ k) • xm) - alg.x k)) := by rw [inner_sub_left, mul_sub, mul_sub, real_inner_self_eq_norm_sq] rw [real_inner_comm, add_sub]; - _ = alg.l * (inner (alg.x k - alg.y k) ((1 - γ k) • (alg.x (k - 1)) + ((γ k) • xm) + _ = alg.l * (inner ℝ (alg.x k - alg.y k) ((1 - γ k) • (alg.x (k - 1)) + ((γ k) • xm) - alg.x k)) + alg.l / 2 * (‖alg.x k‖ ^ 2 - 2 * - inner (alg.x k) (alg.y k) + ‖alg.y k‖ ^ 2) := by ring_nf + inner ℝ (alg.x k) (alg.y k) + ‖alg.y k‖ ^ 2) := by ring_nf rw [this1, this2] exact h3 k @@ -176,18 +176,15 @@ theorem nesterov_algorithm_smooth (hfun: ConvexOn ℝ Set.univ f) specialize con k have : (γ k) ^ 2 > 0 := by rw [hg k] - simp only [Real.rpow_two, div_pow, gt_iff_lt] - apply div_pos (by linarith) - apply sq_pos_of_ne_zero - exact Nat.cast_add_one_ne_zero ↑k - rw [← div_le_div_right this, sub_div, mul_div_right_comm (1 - γ k)] at h4 + positivity + rw [← div_le_div_iff_of_pos_right this, sub_div, mul_div_right_comm (1 - γ k)] at h4 rw [← one_mul (f (alg.x k) - f xm), mul_div_right_comm 1] at h4 rw [mul_div_right_comm (alg.l).1, mul_assoc, mul_comm (γ k ^ 2)] at h4 rw [← mul_assoc, mul_div_assoc] at h4 rw [div_self (by linarith), mul_one, mul_sub (alg.l.1 / 2)] at h4 rw [tsub_le_iff_left, add_sub, le_sub_iff_add_le] at h4 apply le_trans h4 - simp only [Real.rpow_two, ge_iff_le, add_le_add_iff_right, gt_iff_lt, sub_pos, sub_neg] + simp only [add_le_add_iff_right] have : f xm ≤ f (alg.x (k - 1)):= min (by trivial) apply mul_le_mul_of_nonneg_right _ (by linarith) exact con @@ -210,7 +207,7 @@ theorem nesterov_algorithm_smooth (hfun: ConvexOn ℝ Set.univ f) rw [alg.initial1, sub_self, zero_mul, sub_zero] at h4 rw [alg.initial1, sub_self, zero_div, zero_mul, zero_add] simp - simp only [PNat.one_coe, Real.rpow_two, one_pow, mul_one, le_refl, tsub_eq_zero_of_le] at h4 + simp only [PNat.one_coe, one_pow, mul_one, le_refl, tsub_eq_zero_of_le] at h4 rw [← le_sub_iff_add_le, ← mul_sub] exact h4 have h8 : ∀ (k : ℕ+), 1 / (γ k) ^ 2 * (f (alg.x k) - f xm) + alg.l / 2 @@ -226,16 +223,16 @@ theorem nesterov_algorithm_smooth (hfun: ConvexOn ℝ Set.univ f) have : alg.l > 0 := alg.hl apply mul_nonneg _ _ · positivity - · simp only [Real.rpow_two, sq_nonneg] + · simp only [sq_nonneg] have h10 : alg.l / (2 : ℝ) * ‖x₀ - xm‖ ^ 2 / ((1 :ℝ) / (2 / (k + 1)) ^ 2) = 2 * alg.l / ((k + 1) ^ 2) * ‖x₀ - xm‖ ^ 2 := by - simp [Nat.cast_add_one_ne_zero ↑k]; field_simp; ring_nf + field_simp [Nat.cast_add_one_ne_zero ↑k] rw [hg k] at h9 rw [← le_div_iff₀'] at h9 · rw [h10] at h9 exact h9 - · simp only [Real.rpow_two, div_pow, one_div, inv_div] + · simp only [div_pow, one_div, inv_div] apply div_pos · apply sq_pos_of_ne_zero exact Nat.cast_add_one_ne_zero ↑k - · simp only [gt_iff_lt, zero_lt_two, pow_pos] + · simp only [zero_lt_two, pow_pos] diff --git a/Optlib/Algorithm/ProximalGradient.lean b/Optlib/Algorithm/ProximalGradient.lean index 51d60d2..165174d 100644 --- a/Optlib/Algorithm/ProximalGradient.lean +++ b/Optlib/Algorithm/ProximalGradient.lean @@ -26,7 +26,7 @@ variable [ProperSpace E] variable {xm x₀: E} {s : Set E} {f : E → ℝ} {f' : E → E} {h : E → ℝ} variable {t : ℝ} {x : ℕ → E} {L : NNReal} -class proximal_gradient_method (f h: E → ℝ) (f' : E → E) (x₀ : E) := +class proximal_gradient_method (f h: E → ℝ) (f' : E → E) (x₀ : E) where (xm : E) (t : ℝ) (x : ℕ → E) (L : NNReal) (fconv : ConvexOn ℝ univ f) (hconv : ConvexOn ℝ univ h) (h₁ : ∀ x₁ : E, HasGradientAt f (f' x₁) x₁) (h₂ : LipschitzWith L f') @@ -40,6 +40,8 @@ theorem proximal_gradient_method_converge : ∀ (k : ℕ+), (f (alg.x k) + h (alg.x k) - f alg.xm - h alg.xm) ≤ 1 / (2 * k * alg.t) * ‖x₀ - alg.xm‖ ^ 2 := by intro k + have hkpos : (0 : ℝ) < k := by exact_mod_cast k.pos + have hden : 0 < 2 * k * alg.t := by exact mul_pos (mul_pos (by norm_num) hkpos) alg.tpos rw [mul_comm, mul_one_div, le_div_iff₀, mul_comm] have th : ContinuousOn (alg.t • h) univ := by apply ContinuousOn.const_smul alg.h₃ alg.t @@ -58,78 +60,81 @@ theorem proximal_gradient_method_converge : ∀ (k : ℕ+), rw [one_div_mul_cancel, one_smul] at eq2; exact eq2 linarith [alg.tpos]; exact alg.hconv; linarith [alg.tpos] have fieq1 : ∀ x : E, f (x - alg.t • Gt x) ≤ - f x - alg.t * inner (f' x) (Gt x) + alg.t ^ 2 * alg.L / 2 * ‖Gt x‖ ^ 2 := by + f x - alg.t * inner ℝ (f' x) (Gt x) + alg.t ^ 2 * alg.L / 2 * ‖Gt x‖ ^ 2 := by intro x let y := x - alg.t • Gt x - have ieq1 : f y ≤ f x + inner (f' x) (y - x) + alg.L / 2 * ‖y - x‖ ^ 2 := by + have ieq1 : f y ≤ f x + inner ℝ (f' x) (y - x) + alg.L / 2 * ‖y - x‖ ^ 2 := by apply lipschitz_continuos_upper_bound' alg.h₁ alg.h₂ have eq3 : y - x = - alg.t • Gt x := by simp [Gt, y] rw [eq3] at ieq1; rw [inner_smul_right, norm_smul, mul_pow] at ieq1 rw [← mul_assoc, mul_comm ] at ieq1 simp at ieq1; rw [← sub_eq_add_neg] at ieq1; simp; linarith [alg.tpos] have fieq2 : ∀ x : E, - f (x - alg.t • Gt x) ≤ f x - alg.t * inner (f' x) (Gt x) + alg.t / 2 * ‖Gt x‖ ^ 2 := by + f (x - alg.t • Gt x) ≤ f x - alg.t * inner ℝ (f' x) (Gt x) + alg.t / 2 * ‖Gt x‖ ^ 2 := by intro x calc f (x - alg.t • Gt x) ≤ - f x - alg.t * inner (f' x) (Gt x) + alg.t ^ 2 * alg.L / 2 * ‖Gt x‖ ^ 2 := fieq1 x - _ ≤ f x - alg.t * inner (f' x) (Gt x) + alg.t / 2 * ‖Gt x‖ ^ 2 := by - apply add_le_add_left - apply mul_le_mul_of_nonneg_right - apply div_le_div_of_nonneg_right _ (by norm_num) - calc - alg.t ^ 2 * alg.L ≤ alg.t * (1 / alg.L) * alg.L := by - rw [pow_two]; apply mul_le_mul_of_nonneg_right - rw [mul_le_mul_left alg.tpos]; exact alg.step; simp - _ = alg.t := by field_simp; rw [← mul_div, div_self (by linarith [alg.hL]), mul_one] - exact sq_nonneg _ - have fieq3 : ∀ x z : E, f x + inner (f' x) (z - x) ≤ f z := by + f x - alg.t * inner ℝ (f' x) (Gt x) + alg.t ^ 2 * alg.L / 2 * ‖Gt x‖ ^ 2 := fieq1 x + _ ≤ f x - alg.t * inner ℝ (f' x) (Gt x) + alg.t / 2 * ‖Gt x‖ ^ 2 := by + have hmul : alg.t ^ 2 * alg.L / 2 * ‖Gt x‖ ^ 2 ≤ alg.t / 2 * ‖Gt x‖ ^ 2 := by + apply mul_le_mul_of_nonneg_right + · apply div_le_div_of_nonneg_right _ (by norm_num) + calc + alg.t ^ 2 * alg.L ≤ alg.t * (1 / alg.L) * alg.L := by + rw [pow_two]; apply mul_le_mul_of_nonneg_right + · exact mul_le_mul_of_nonneg_left alg.step (le_of_lt alg.tpos) + · exact le_of_lt alg.hL + _ = alg.t := by field_simp; rw [← mul_div, div_self (by linarith [alg.hL]), mul_one] + · exact sq_nonneg _ + linarith + have fieq3 : ∀ x z : E, f x + inner ℝ (f' x) (z - x) ≤ f z := by intro x z apply Convex_first_order_condition' (alg.h₁ x) alg.fconv simp; simp have hieq1 : ∀ x z : E, - h (x - alg.t • Gt x) + inner (Gt x - f' x) (z - x + alg.t • Gt x) ≤ h z := by + h (x - alg.t • Gt x) + inner ℝ (Gt x - f' x) (z - x + alg.t • Gt x) ≤ h z := by intro x z specialize hG x rw [← mem_SubderivAt, HasSubgradientAt] at hG specialize hG z; rw [sub_add]; apply hG have hieq2 : ∀ x z : E, - h (x - alg.t • Gt x) ≤ h z - inner (Gt x - f' x) (z - x + alg.t • Gt x) := by + h (x - alg.t • Gt x) ≤ h z - inner ℝ (Gt x - f' x) (z - x + alg.t • Gt x) := by intro x z; linarith [hieq1 x z] have univieq : ∀ x z : E, - φ (x - alg.t • Gt x) ≤ φ z + inner (Gt x) (x - z) - alg.t / 2 * ‖Gt x‖ ^ 2 := by + φ (x - alg.t • Gt x) ≤ φ z + inner ℝ (Gt x) (x - z) - alg.t / 2 * ‖Gt x‖ ^ 2 := by intro x z calc - φ (x - alg.t • Gt x) ≤ (f x - alg.t * inner (f' x) (Gt x) + alg.t / 2 * ‖Gt x‖ ^ 2) - + (h z - inner (Gt x - f' x) (z - x + alg.t • Gt x)) := by + φ (x - alg.t • Gt x) ≤ (f x - alg.t * inner ℝ (f' x) (Gt x) + alg.t / 2 * ‖Gt x‖ ^ 2) + + (h z - inner ℝ (Gt x - f' x) (z - x + alg.t • Gt x)) := by linarith [fieq2 x, hieq2 x z] - _ ≤ (f z - inner (f' x) (z - x) - alg.t * inner (f' x) (Gt x) + alg.t / 2 * ‖Gt x‖ ^ 2) - + (h z - inner (Gt x - f' x) (z - x + alg.t • Gt x)) := by + _ ≤ (f z - inner ℝ (f' x) (z - x) - alg.t * inner ℝ (f' x) (Gt x) + alg.t / 2 * ‖Gt x‖ ^ 2) + + (h z - inner ℝ (Gt x - f' x) (z - x + alg.t • Gt x)) := by linarith [fieq3 x z] - _ = φ z + inner (Gt x) (x - z) - alg.t / 2 * ‖Gt x‖ ^ 2 := by + _ = φ z + inner ℝ (Gt x) (x - z) - alg.t / 2 * ‖Gt x‖ ^ 2 := by rw [← inner_smul_right, sub_sub, ← inner_add_right] rw [inner_sub_left, ← sub_add, add_rotate, ← add_comm_sub, ← add_sub] rw [← add_sub, sub_self, add_zero, add_rotate, inner_add_right, ← neg_sub x z] - rw [inner_neg_right, ← sub_sub, sub_neg_eq_add, add_comm _ (inner (Gt x) (x - z))] - rw [add_comm _ (inner (Gt x) (x - z)), ← add_sub _ (φ z), ← add_sub, add_assoc] + rw [inner_neg_right, ← sub_sub, sub_neg_eq_add, add_comm _ (inner ℝ (Gt x) (x - z))] + rw [add_comm _ (inner ℝ (Gt x) (x - z)), ← add_sub _ (φ z), ← add_sub, add_assoc] rw [add_assoc, add_left_cancel_iff] rw [inner_smul_right, real_inner_self_eq_norm_sq] rw [add_comm_sub, ← add_sub] have (a : ℝ): alg.t / 2 * a - alg.t * a = - alg.t / 2 * a := by ring rw [this, sub_eq_add_neg, ← add_assoc, add_comm (h z) (f z)]; field_simp + ring have φieq1 : ∀ x : E, φ (x - alg.t • Gt x) - φ alg.xm ≤ (1 / (2 * alg.t)) * (‖x - alg.xm‖ ^ 2 - ‖x - alg.t • Gt x - alg.xm‖ ^ 2) := by intro x calc - φ (x - alg.t • Gt x) - φ alg.xm ≤ inner (Gt x) (x - alg.xm) - alg.t / 2 * ‖Gt x‖ ^ 2 := by + φ (x - alg.t • Gt x) - φ alg.xm ≤ inner ℝ (Gt x) (x - alg.xm) - alg.t / 2 * ‖Gt x‖ ^ 2 := by linarith [univieq x alg.xm] _ = (1 / (2 * alg.t)) * (‖x - alg.xm‖ ^ 2 - ‖x - alg.t • Gt x - alg.xm‖ ^ 2) := by - have aux (p q : E) : inner p q - alg.t / 2 * ‖p‖ ^ 2 = + have aux (p q : E) : inner ℝ p q - alg.t / 2 * ‖p‖ ^ 2 = 1 / (2 * alg.t) * (‖q‖ ^ 2 - ‖q - alg.t • p‖ ^ 2) := by rw [norm_sub_sq_real]; field_simp; ring_nf rw [inner_smul_right, real_inner_comm]; nth_rw 2 [mul_comm _ (alg.t)⁻¹]; rw [norm_smul, mul_pow, pow_two ‖alg.t‖] - simp; rw [mul_comm _ (inner q p), mul_assoc _ alg.t, mul_inv_cancel₀, ← mul_assoc] + simp; rw [mul_comm _ (inner ℝ q p), mul_assoc _ alg.t, mul_inv_cancel₀, ← mul_assoc] rw [← mul_assoc, inv_mul_cancel₀]; simp repeat linarith [alg.tpos] rw [sub_right_comm]; apply aux @@ -144,7 +149,7 @@ theorem proximal_gradient_method_converge : ∀ (k : ℕ+), rw [iter i] calc φ ((alg.x i) - alg.t • Gt (alg.x i)) ≤ φ (alg.x i) - + inner (Gt (alg.x i)) ((alg.x i) - (alg.x i)) + + inner ℝ (Gt (alg.x i)) ((alg.x i) - (alg.x i)) - alg.t / 2 * ‖Gt (alg.x i)‖ ^ 2 := by linarith [univieq (alg.x i) (alg.x i)] _ ≤ φ (alg.x i) := by @@ -194,12 +199,15 @@ theorem proximal_gradient_method_converge : ∀ (k : ℕ+), _ ≤ 2 * alg.t * ((1 / (2 * alg.t)) * ‖(alg.x 0) - alg.xm‖ ^ 2 - (1 / (2 * alg.t)) * ‖(alg.x k) - alg.xm‖ ^ 2) := by - rw [mul_le_mul_left] - let ieq' := ieq k; simp at ieq' - simp; apply ieq'; linarith [alg.tpos] + have ieq' : k * (φ (alg.x k) - φ alg.xm) ≤ + (1 / (2 * alg.t)) * ‖(alg.x 0) - alg.xm‖ ^ 2 + - (1 / (2 * alg.t)) * ‖(alg.x k) - alg.xm‖ ^ 2 := by + simpa [nsmul_eq_mul] using (ieq k) + exact mul_le_mul_of_nonneg_left ieq' (by linarith [alg.tpos]) _ = ‖(alg.x 0) - alg.xm‖ ^ 2 - ‖(alg.x k) - alg.xm‖ ^ 2 := by rw [← mul_sub, ← mul_assoc, mul_one_div_cancel]; simp; linarith [alg.tpos] _ ≤ ‖x₀ - alg.xm‖ ^ 2 := by rw [alg.ori]; simp - field_simp; linarith [alg.tpos] + field_simp + linarith [hden] end method diff --git a/Optlib/Algorithm/SubgradientMethod.lean b/Optlib/Algorithm/SubgradientMethod.lean index 7150965..10ce385 100644 --- a/Optlib/Algorithm/SubgradientMethod.lean +++ b/Optlib/Algorithm/SubgradientMethod.lean @@ -40,9 +40,9 @@ theorem bounded_subgradient_to_Lipschitz (hf : ConvexOn ℝ univ f) (hc : Contin rcases hx₂' with ⟨gx, hx₁⟩ have hx₃ : ‖gx‖ ≤ G := by rcases h hx₁ with hx; apply hx rcases hx₁ y with hx₂ - have hx₄ : f x - f y ≤ inner gx (x - y) := by + have hx₄ : f x - f y ≤ inner ℝ gx (x - y) := by rw [add_comm] at hx₂ - have : f x ≤ f y - inner gx (y - x) := le_sub_left_of_add_le hx₂ + have : f x ≤ f y - inner ℝ gx (y - x) := le_sub_left_of_add_le hx₂ rw [sub_eq_add_neg, ← inner_neg_right, neg_sub] at this exact sub_left_le_of_le_add this have hy₂' : Nonempty (SubderivAt f y) := SubderivAt.nonempty hf hc y @@ -50,10 +50,10 @@ theorem bounded_subgradient_to_Lipschitz (hf : ConvexOn ℝ univ f) (hc : Contin rcases hy₂' with ⟨gy, hy₁⟩ have hy₃ : ‖gy‖ ≤ G := by rcases h hy₁ with hy; apply hy rcases hy₁ x with hy₂ - have hy₄: f x - f y ≥ inner gy (x - y) := by + have hy₄: f x - f y ≥ inner ℝ gy (x - y) := by calc - _ ≥ f y + inner gy (x - y) - f y := by apply sub_le_sub_right hy₂ - _ = inner gy (x - y) := by ring + _ ≥ f y + inner ℝ gy (x - y) - f y := by apply sub_le_sub_right hy₂ + _ = inner ℝ gy (x - y) := by ring have hG₁: ↑G = ENNReal.ofReal ↑G := by simp rw [edist_dist, edist_dist, hG₁] have hG₂ : ENNReal.ofReal (↑G * (dist x y)) = ENNReal.ofReal ↑G * ENNReal.ofReal (dist x y) := by @@ -66,14 +66,14 @@ theorem bounded_subgradient_to_Lipschitz (hf : ConvexOn ℝ univ f) (hc : Contin apply abs_le.mpr constructor · calc - f x - f y ≥ inner gy (x - y) := hy₄ + f x - f y ≥ inner ℝ gy (x - y) := hy₄ _ ≥ - (‖gy‖ * ‖x - y‖) := by apply neg_le_of_neg_le rw [← inner_neg_right, neg_sub, norm_sub_rev] apply real_inner_le_norm _ ≥ - (↑G * ‖x - y‖) := neg_le_neg (mul_le_mul_of_nonneg_right hy₃ (norm_nonneg _)) · calc - f x - f y ≤ inner gx (x - y) := hx₄ + f x - f y ≤ inner ℝ gx (x - y) := hx₄ _ ≤ ‖gx‖ * ‖x - y‖ := real_inner_le_norm _ _ _ ≤ ↑G * ‖x - y‖ := mul_le_mul_of_nonneg_right hx₃ (norm_nonneg _) @@ -90,9 +90,10 @@ theorem Lipschitz_to_bounded_subgradient (h : LipschitzWith G f ) : have hg₁ : ‖g‖ ≠ 0 := by apply ne_of_gt (lt_of_le_of_lt _ h₃) simp only [NNReal.zero_le_coe] - have hl : inner g (y - x) = ‖g‖ := by + have hl : inner ℝ g (y - x) = ‖g‖ := by rw[hy ,add_comm, ← add_sub, sub_self, add_zero, inner_smul_right, inner_self_eq_norm_sq_to_K] - field_simp; apply pow_two + field_simp + norm_cast rw [hl] at hy₂ have _ : f y - f x ≥ ‖g‖ := by calc @@ -113,7 +114,9 @@ theorem Lipschitz_to_bounded_subgradient (h : LipschitzWith G f ) : calc f y - f x ≤ |f y - f x|:= by apply le_abs_self _ ≤ ↑G * (‖1 / ‖g‖‖ * ‖g‖) := by apply h₃' - _ = ↑G := by field_simp + _ = ↑G := by + rw [Real.norm_eq_abs, one_div, abs_inv, ← abs_of_nonneg (norm_nonneg g), abs_abs] + rw [inv_mul_cancel₀ (abs_ne_zero.mpr hg₁), mul_one] _ < ‖g‖ := by apply h₃ linarith @@ -134,7 +137,7 @@ variable (hf : ConvexOn ℝ univ f) open Finset -class subgradient_method (f : E → ℝ) (x₀ : E) := +class subgradient_method (f : E → ℝ) (x₀ : E) where (x g : ℕ → E) (a : ℕ → ℝ) (ha : ∀ n, a n > 0) (G : NNReal) (lipschitz : LipschitzWith G f) @@ -158,10 +161,12 @@ theorem subgradient_method_converge: rcases h' (alg.hg 0) with h₀' rw [← mul_pow] apply le_trans _ (two_mul_le_add_sq _ _) - rw [mul_assoc, mul_assoc]; apply (mul_le_mul_left two_pos).mpr - rw [mul_comm, ← mul_assoc]; apply (mul_le_mul_right (alg.ha 0)).mpr - have : f (alg.x 0) - f xm ≤ - inner (alg.g 0) (xm - alg.x 0) := by - simp [hk₀]; rw[add_comm]; apply hk₀ + rw [mul_assoc, mul_assoc] + refine mul_le_mul_of_nonneg_left ?_ (show (0 : ℝ) ≤ 2 by positivity) + rw [mul_comm, ← mul_assoc] + refine mul_le_mul_of_nonneg_right ?_ (le_of_lt (alg.ha 0)) + have : f (alg.x 0) - f xm ≤ - inner ℝ (alg.g 0) (xm - alg.x 0) := by + simp; rw[add_comm]; apply hk₀ apply le_trans this _ rw [← inner_neg_right,neg_sub, alg.initial] apply le_trans (real_inner_le_norm _ _) _; rw [mul_comm] @@ -180,30 +185,30 @@ theorem subgradient_method_converge: rw[mul_comm]; simp rcases h' (alg.hg i) with hi apply mul_le_mul_of_nonneg_right _ (sq_nonneg _) - · apply pow_le_pow_left; apply norm_nonneg; apply hi + · nlinarith [hi, norm_nonneg (alg.g i), alg.G.2] have inq₂: 2 * alg.a i * (sInf {x | ∃ i ∈ Finset.range (k + 1), f (alg.x i) = x} - f xm) - ≤ 2 * inner (alg.x i - xm) (alg.a i • alg.g i) := by - rw [mul_assoc]; apply (mul_le_mul_left two_pos).mpr - rw [inner_smul_right]; apply (mul_le_mul_left (alg.ha i)).mpr + ≤ 2 * inner ℝ (alg.x i - xm) (alg.a i • alg.g i) := by + rw [mul_assoc] + refine mul_le_mul_of_nonneg_left ?_ (show (0 : ℝ) ≤ 2 by positivity) + rw [inner_smul_right] + refine mul_le_mul_of_nonneg_left ?_ (le_of_lt (alg.ha i)) rcases (alg.hg i) xm with hxm calc _ = sInf (Set.range fun (x : Finset.range (k + 1)) => f (alg.x x)) - f xm := by rw [← heq] _ ≤ f (alg.x i) - f xm := by simp have : f (alg.x i) ∈ Set.range fun (x : Finset.range (k + 1)) => f (alg.x x) := by - simp; use i - constructor - · apply lt_of_le_of_lt hi₂; apply (Nat.lt_succ_self k) - · simp + exact ⟨⟨i, Finset.mem_range.mpr (lt_of_le_of_lt hi₂ (Nat.lt_succ_self k))⟩, rfl⟩ apply csInf_le _ this; apply Finite.bddBelow_range - _ ≤ inner (alg.x i - xm) (alg.g i) := by + _ ≤ inner ℝ (alg.x i - xm) (alg.g i) := by simp; apply le_add_of_sub_left_le rw [sub_eq_add_neg, ← inner_neg_left, neg_sub, real_inner_comm] apply hxm rw [add_assoc, add_assoc] - apply add_le_add_left; apply add_le_add - · apply neg_le_neg; apply inq₂ - · apply inq₁ + have hneg : -(2 * inner ℝ (alg.x i - xm) (alg.a i • alg.g i)) ≤ + -(2 * alg.a i * (sInf {x | ∃ i ∈ Finset.range (k + 1), f (alg.x i) = x} - f xm)) := by + exact neg_le_neg inq₂ + linarith [hneg, inq₁] have h₁' : ∀ ⦃i : ℕ⦄, i ≥ 0 ∧ i ≤ k → alg.a i * (2 * (sInf {f (alg.x i) | i ∈ Finset.range (k + 1)} - f xm)) ≤ ‖alg.x i - xm‖ ^ 2 - ‖alg.x (i+1) - xm‖ ^ 2 + alg.G ^ 2 * (alg.a i) ^ 2 := by intro i ⟨hi₁, hi₂⟩ @@ -231,6 +236,7 @@ theorem subgradient_method_converge: (k + 1)) fun x => alg.a x ^ 2 := by apply h₂ _ ≤ ‖x₀ - xm‖ ^ 2 + alg.G ^ 2 * Finset.sum (Finset.range (k + 1)) fun x => alg.a x ^ 2 := by simp +omit [CompleteSpace E] in /-- convergence with fixed step size --/ theorem subgradient_method_fix_step_size {t : ℝ} (ha' : ∀ (n : ℕ), alg.a n = t) : @@ -248,14 +254,16 @@ theorem subgradient_method_fix_step_size {t : ℝ} simp apply mul_pos _ ht · apply add_pos_of_nonneg_of_pos (Nat.cast_nonneg k) zero_lt_one - apply (mul_le_mul_left hpos).mp + refine le_of_mul_le_mul_left ?_ hpos calc 2 * ((↑k + 1) * t) * (sInf {x | ∃ i ∈ Finset.range (k + 1), f (alg.x i) = x} - f xm) = 2 * ((↑k + 1) * t) * (sInf {x | ∃ i < k + 1, f (alg.x i) = x} - f xm) := by simp - _ ≤ ‖x₀ - xm‖ ^ 2 + ↑alg.G ^ 2 * ((↑k + 1) * t ^ 2) := by apply hk + _ ≤ ‖x₀ - xm‖ ^ 2 + ↑alg.G ^ 2 * ((↑k + 1) * t ^ 2) := by + simpa [Nat.lt_add_one_iff] using hk _ = 2 * ((↑k + 1) * t) * (‖x₀ - xm‖ ^ 2 / (2 * (↑k + 1) * t) + ↑alg.G ^ 2 * t / 2) := by - field_simp; ring + field_simp +omit [CompleteSpace E] in /-- convergence with fixed $‖x^{i+1}-x^{i}‖$ --/ theorem subgradient_method_fixed_distance {s : ℝ} (hm : IsMinOn f univ xm) (ha' : ∀ (n : ℕ), alg.a n * ‖alg.g n‖ = s) (hs : s > 0): @@ -265,7 +273,7 @@ theorem subgradient_method_fixed_distance {s : ℝ} (hm : IsMinOn f univ xm) have heq : (Set.range fun (x : Finset.range (k + 1)) => f (alg.x x)) = {x | ∃ i ∈ Finset.range (k + 1), f (alg.x i) = x} := by simp [Set.ext_iff] have hnek : Nonempty (Finset.range (k + 1)) := by - simp; use 0; apply Nat.succ_pos k + exact ⟨⟨0, by simp⟩⟩ obtain h' := Lipschitz_to_bounded_subgradient alg.lipschitz have h₁ : ∀ ⦃i : ℕ⦄ , i ≥ 0 ∧ i ≤ k → ‖alg.x (i+1) - xm‖ ^ 2 ≤ ‖alg.x i - xm‖ ^ 2 - 2 * (alg.a i) * (sInf {f (alg.x i) | i ∈ Finset.range (k + 1)} - f xm) + ‖alg.a i‖ ^ 2 * ‖alg.g i‖ ^ 2:= by @@ -277,28 +285,27 @@ theorem subgradient_method_fixed_distance {s : ℝ} (hm : IsMinOn f univ xm) + ‖alg.a i‖ ^ 2 * ‖alg.g i‖ ^ 2 := by ring rw [this] have inq₂: 2 * alg.a i * (sInf {x | ∃ i ∈ Finset.range (k + 1), f (alg.x i) = x} - f xm) - ≤ 2 * inner (alg.x i - xm) (alg.a i • alg.g i) := by - rw [mul_assoc]; apply (mul_le_mul_left two_pos).mpr - rw[inner_smul_right]; apply (mul_le_mul_left (alg.ha i)).mpr + ≤ 2 * inner ℝ (alg.x i - xm) (alg.a i • alg.g i) := by + rw [mul_assoc] + refine mul_le_mul_of_nonneg_left ?_ (show (0 : ℝ) ≤ 2 by positivity) + rw[inner_smul_right] + refine mul_le_mul_of_nonneg_left ?_ (le_of_lt (alg.ha i)) rcases (alg.hg i) xm with hxm calc _ = sInf (Set.range fun (x : Finset.range (k + 1)) => f (alg.x x)) - f xm := by rw [← heq] _ ≤ f (alg.x i)- f xm := by simp have : f (alg.x i) ∈ Set.range fun (x : Finset.range (k + 1)) => f (alg.x x) := by - simp; use i - constructor - · apply lt_of_le_of_lt hi₂; apply (Nat.lt_succ_self k) - · simp + exact ⟨⟨i, Finset.mem_range.mpr (lt_of_le_of_lt hi₂ (Nat.lt_succ_self k))⟩, rfl⟩ apply csInf_le _ this; apply Finite.bddBelow_range - _ ≤ inner (alg.x i - xm) (alg.g i) := by + _ ≤ inner ℝ (alg.x i - xm) (alg.g i) := by simp; apply le_add_of_sub_left_le rw[sub_eq_add_neg, ← inner_neg_left, neg_sub, real_inner_comm]; apply hxm rw[add_assoc, add_assoc] - apply add_le_add_left - apply add_le_add - · apply neg_le_neg; apply inq₂ - · simp + have hneg : -(2 * inner ℝ (alg.x i - xm) (alg.a i • alg.g i)) ≤ + -(2 * alg.a i * (sInf {x | ∃ i ∈ Finset.range (k + 1), f (alg.x i) = x} - f xm)) := by + exact neg_le_neg inq₂ + linarith [hneg] have h₁' : ∀ ⦃i : ℕ⦄ , i ≥ 0 ∧ i ≤ k → alg.a i * (2 * (sInf {f (alg.x i) | i ∈ Finset.range (k + 1)} - f xm)) ≤ ‖alg.x i - xm‖ ^ 2 - ‖alg.x (i+1) - xm‖ ^ 2 + s ^ 2 := by intro i ⟨hi₁, hi₂⟩ @@ -332,7 +339,8 @@ theorem subgradient_method_fixed_distance {s : ℝ} (hm : IsMinOn f univ xm) apply Finset.sum_le_sum intro i _ rw [← (ha' i)] - apply (div_le_iff₀ hG).mpr ((mul_le_mul_left (alg.ha i)).mpr (h' (alg.hg i))) + apply (div_le_iff₀ hG).mpr + exact mul_le_mul_of_nonneg_left (h' (alg.hg i)) (le_of_lt (alg.ha i)) have hpos₁ : (↑k + 1) * (s / ↑alg.G) > 0 := by apply mul_pos · apply add_pos_of_nonneg_of_pos (Nat.cast_nonneg k) zero_lt_one @@ -344,13 +352,8 @@ theorem subgradient_method_fixed_distance {s : ℝ} (hm : IsMinOn f univ xm) · apply hpos₁ have h₂' : (2 * (k + 1) * (s / ↑alg.G)) * (sInf {x | ∃ i < k + 1, f (alg.x i) = x} - f xm) ≤ ‖x₀ - xm‖ ^ 2 - ‖alg.x (k + 1) - xm‖ ^ 2 + (↑k + 1) * s ^ 2 := by - apply le_trans _ h₂ - apply mul_le_mul_of_nonneg_right - · rw[mul_assoc] - apply mul_le_mul_of_nonneg_left - · apply inq₁ - · linarith - · apply le_sub_right_of_add_le; simp + have hnonneg : 0 ≤ sInf {x | ∃ i < k + 1, f (alg.x i) = x} - f xm := by + apply le_sub_right_of_add_le; simp apply le_csInf · simp at heq rw[← heq] @@ -362,14 +365,25 @@ theorem subgradient_method_fixed_distance {s : ℝ} (hm : IsMinOn f univ xm) simp[isMinOn_univ_iff] at hm rcases hm (alg.x i) with hmi apply hmi + have hmul : (2 * (k + 1) * (s / ↑alg.G)) * (sInf {x | ∃ i < k + 1, f (alg.x i) = x} - f xm) ≤ + (2 * ∑ i ∈ Finset.range (k + 1), alg.a i) * (sInf {x | ∃ i < k + 1, f (alg.x i) = x} - f xm) := by + refine mul_le_mul_of_nonneg_right ?_ hnonneg + rw[mul_assoc] + exact mul_le_mul_of_nonneg_left inq₁ (show (0 : ℝ) ≤ 2 by positivity) + have h₂'' : (2 * ∑ i ∈ Finset.range (k + 1), alg.a i) * + (sInf {x | ∃ i < k + 1, f (alg.x i) = x} - f xm) ≤ + ‖x₀ - xm‖ ^ 2 - ‖alg.x (k + 1) - xm‖ ^ 2 + (↑k + 1) * s ^ 2 := by + simpa [Nat.lt_add_one_iff] using h₂ + exact le_trans hmul h₂'' calc _= sInf {x | ∃ i < k + 1, f (alg.x i) = x} - f xm := by simp _ ≤ (‖x₀ - xm‖ ^ 2 - ‖alg.x (k + 1) - xm‖ ^ 2 + (k + 1) * s ^ 2) / (2 * (k + 1) * (s / alg.G)) := by apply (le_div_iff₀' hpos₁').mpr h₂' _ ≤ (‖x₀ - xm‖ ^ 2 + (↑k + 1) * s ^ 2) / (2 * (↑k + 1) * (s / ↑alg.G)) := by - apply (div_le_div_right hpos₁').mpr; simp + apply (div_le_div_iff_of_pos_right hpos₁').2 + simp _ = alg.G * ‖x₀ - xm‖ ^ 2 / (2 * (k + 1) * s) + alg.G * s / 2 := by - field_simp; ring + field_simp /- @@ -411,7 +425,7 @@ lemma subgradient_method_diminishing_step_size (hm : IsMinOn f univ xm) calc ‖x₀ - xm‖ ^ 2 < Finset.sum (Finset.range (b + 1)) alg.a * ε := by apply h₂ _ = 2 * Finset.sum (Finset.range (b + 1)) alg.a * (ε / 2) := by - field_simp;ring + field_simp have ha₂ : ∃ a₂, ∀ (b : ℕ), a₂ ≤ b → alg.G ^ 2 * (Finset.range (b + 1)).sum (fun i => (alg.a i) ^ 2) / (2 * (Finset.range (b + 1)).sum alg.a) < ε / 2 := by by_cases hG : ↑alg.G = 0 @@ -438,9 +452,7 @@ lemma subgradient_method_diminishing_step_size (hm : IsMinOn f univ xm) _ = (ε / (2 * ↑alg.G ^ 2) + |x₁|) / 2 := by field_simp; ring _ < (ε / (2 * ↑alg.G ^ 2) + ε / (2 * ↑alg.G ^ 2)) / 2 := by - apply (mul_lt_mul_left zero_lt_two).mp - rw [mul_div_cancel₀, mul_div_cancel₀] - simp [hx₁]; simp; simp + nlinarith [hx₁] _ = ε / (2 * ↑alg.G ^ 2) := by field_simp; ring have c₃ : 0 ∈ s := by simp [s] @@ -486,8 +498,7 @@ lemma subgradient_method_diminishing_step_size (hm : IsMinOn f univ xm) (2 * Finset.sum (Finset.range (b + 1)) alg.a) + (↑alg.G ^ 2 * Finset.sum (Finset.range (b - a₂)) fun x => alg.a (a₂ + 1 + x) ^ 2) / (2 * Finset.sum (Finset.range (b + 1)) alg.a) := by - field_simp; rw[← mul_add]; simp - left + field_simp obtain heq := Finset.sum_range_add (fun i => alg.a i ^ 2) (a₂ + 1) (b - a₂) have h₃' : (b + 1) = a₂ + 1 + (b - a₂) := by rw[(Nat.add_comm a₂ 1), Nat.add_assoc, (Nat.add_sub_cancel' hba₂), Nat.add_comm] @@ -497,18 +508,22 @@ lemma subgradient_method_diminishing_step_size (hm : IsMinOn f univ xm) · rcases hasA b hba₁ with h₃; simp [s₁] at h₃ obtain h₃₁ := (div_lt_iff₀ εpos).mp h₃ obtain h₃₂ := (div_lt_iff₀' hpos'').mpr h₃₁ - obtain h₃₃ := (div_lt_div_right zero_lt_four).mpr h₃₂ + have h₃₃ : (2 * ↑alg.G ^ 2 * Finset.sum (Finset.range (a₂ + 1)) (fun x => alg.a x ^ 2)) / + Finset.sum (Finset.range (b + 1)) alg.a / 4 < ε / 4 := by + exact (div_lt_div_iff_of_pos_right zero_lt_four).2 h₃₂ calc _ = (2 * ↑alg.G ^ 2 * Finset.sum (Finset.range (a₂ + 1)) fun x => alg.a x ^ 2) / Finset.sum (Finset.range (b + 1)) alg.a / 4 := by field_simp;ring - _ < ε / 4 := by apply h₃₃ + _ < ε / 4 := by exact h₃₃ · apply (div_lt_iff₀ hpos).mpr calc _ ≤ ↑alg.G ^ 2 * Finset.sum (Finset.range (b - a₂)) (fun x => (ε / (2 * ↑alg.G ^ 2)) * alg.a (a₂ + 1 + x)) := by - apply (mul_le_mul_left hpos').mpr; apply Finset.sum_le_sum; intro i _ + refine mul_le_mul_of_nonneg_left ?_ (le_of_lt hpos') + apply Finset.sum_le_sum; intro i _ have hposi : alg.a (a₂ + 1 + i) > 0 := by apply (alg.ha (a₂ + 1 + i)) - rw [pow_two]; apply (mul_le_mul_right hposi).mpr + rw [pow_two] + refine mul_le_mul_of_nonneg_right ?_ (le_of_lt hposi) have : a₂ + 1 + i ≥ a₂ := by rw[Nat.add_assoc]; apply Nat.le_add_right rcases ha₂ (a₂ + 1 + i) this with hai₂ @@ -521,7 +536,7 @@ lemma subgradient_method_diminishing_step_size (hm : IsMinOn f univ xm) rw[← Finset.mul_sum] _ < ↑alg.G ^ 2 * (ε / (2 * ↑alg.G ^ 2)) * Finset.sum (Finset.range (b + 1)) (fun x => alg.a x) := by - apply (mul_lt_mul_left hposG).mpr + refine mul_lt_mul_of_pos_left ?_ hposG obtain heq := Finset.sum_range_add (fun x => alg.a x) (a₂ + 1) (b - a₂) have h₃' : (b + 1) = a₂ + 1 + (b - a₂) := by rw [Nat.add_comm a₂, Nat.add_assoc, (Nat.add_sub_cancel' hba₂), Nat.add_comm] @@ -546,7 +561,7 @@ lemma subgradient_method_diminishing_step_size (hm : IsMinOn f univ xm) have heq : (Set.range fun (x : Finset.range (b + 1)) => f (alg.x x)) = {x | ∃ i ∈ Finset.range (b + 1), f (alg.x i) = x} := by simp [Set.ext_iff] have hneb : Nonempty (Finset.range (b + 1)) := by - simp; use 0; apply Nat.succ_pos b + exact ⟨⟨0, by simp⟩⟩ apply le_sub_right_of_add_le simp apply le_csInf @@ -556,7 +571,9 @@ lemma subgradient_method_diminishing_step_size (hm : IsMinOn f univ xm) rw[← hb₂]; simp[isMinOn_univ_iff] at hm rcases hm (alg.x i) with hmi apply hmi - rw[(abs_of_nonneg hne)] + have hne' : sInf {x | ∃ i ≤ b, f (alg.x i) = x} - f xm ≥ 0 := by + simpa [Nat.lt_add_one_iff] using hne + rw[(abs_of_nonneg hne')] have h₁ : ∀ (k : ℕ), 2 * ((Finset.range (k + 1)).sum alg.a) * (sInf {f (alg.x i) | i ∈ Finset.range (k + 1)} - (f xm)) ≤ ‖x₀ - xm‖ ^ 2 + alg.G ^ 2 * (Finset.range (k + 1)).sum (fun i => (alg.a i) ^ 2) := by @@ -577,10 +594,10 @@ lemma subgradient_method_diminishing_step_size (hm : IsMinOn f univ xm) _ = ‖x₀ - xm‖ ^ 2 / (2 * Finset.sum (Finset.range (b + 1)) alg.a) + (↑alg.G ^ 2 * Finset.sum (Finset.range (b + 1)) fun i => alg.a i ^ 2) / (2 * Finset.sum (Finset.range (b + 1)) alg.a) := by - simp [div_add_div_same] + simp [add_div] _ < ε / 2 + ε / 2 := by apply add_lt_add; apply hba₁'; apply hba₂' - _ = ε := by field_simp + _ = ε := by ring obtain h₁' := Filter.Tendsto.add_const (f xm) h₁ simp at h₁'; simp; apply h₁' diff --git a/Optlib/Convex/BanachSubgradient.lean b/Optlib/Convex/BanachSubgradient.lean index 3077448..4fbf220 100644 --- a/Optlib/Convex/BanachSubgradient.lean +++ b/Optlib/Convex/BanachSubgradient.lean @@ -3,8 +3,11 @@ Copyright (c) 2023 Wanyi He. All rights reserved. Released under Apache 2.0 license as described in the file LICENSE. Author: Wanyi He, Chenyi Li, Zichen Wang -/ -import Mathlib.Analysis.NormedSpace.HahnBanach.Separation -import Mathlib.LinearAlgebra.Dual +import Mathlib.Analysis.Convex.Function +import Mathlib.Analysis.Convex.Topology +import Mathlib.Analysis.LocallyConvex.Separation +import Mathlib.Analysis.Normed.Operator.Basic +import Mathlib.LinearAlgebra.Dual.Lemmas section @@ -23,7 +26,7 @@ lemma EpigraphInterior_existence (hc : ContinuousOn f (interior s)) (hx : x ∈ have h1 : IsOpen t := IsOpen.preimage continuous_fst isOpen_interior have h2: ContinuousOn (fun p : (E × ℝ) => f p.fst) t := ContinuousOn.comp hc continuousOn_fst (fun ⦃x⦄ a => a) - apply ContinuousOn.isOpen_inter_preimage (h2.prod continuousOn_snd) h1 isOpen_lt_prod + apply ContinuousOn.isOpen_inter_preimage (h2.prodMk continuousOn_snd) h1 isOpen_lt_prod have h' : {p : E × ℝ| p.1 ∈ interior s ∧ f p.1 < p.2} ⊆ {p | p.1 ∈ s ∧ f p.1 ≤ p.2} := fun p ⟨hp1, hp2⟩ => ⟨interior_subset hp1, le_of_lt hp2⟩ apply interior_mono h' @@ -60,7 +63,7 @@ lemma Continuous_epi_open {f₁ : E → ℝ} (hcon : ContinuousOn f₁ univ) : have : {(x, y) : E × ℝ | y > f₁ x} = {(x, y) : E × ℝ | x ∈ univ ∧ y > f₁ x} := by ext z; simp rw [this] - apply ContinuousOn.isOpen_inter_preimage (h2.prod continuousOn_snd) h1 isOpen_lt_prod + apply ContinuousOn.isOpen_inter_preimage (h2.prodMk continuousOn_snd) h1 isOpen_lt_prod end noncomputable section @@ -127,38 +130,37 @@ theorem Banach_SubderivWithinAt.Nonempty (hf : ConvexOn ℝ s f) have hgu' : g.1 x + g.2 (f x) < g.1 a.1 + g.2 a.2 := by obtain hg1 := hg a; obtain hg2 := hg (x , f x) rw[← hg1 , ← hg2]; apply hφ a ha - simp only [hu, hu] at hgu'; exact hgu' + simp only [hu] at hgu'; exact hgu' have hu0 : u > 0 := by specialize hgu (x, f x + 1) (EpigraphInterior_existence hc hx (f x + 1) (lt_add_one (f x))) dsimp at hgu; linarith let h := - (1 / u) • g.1 have : ∀ (x : E), ‖h x‖ ≤ ((1 / u) * ‖φ‖) * ‖x‖ := by - intro x; field_simp [h]; simp only [abs_of_pos hu0] - apply div_le_div_of_nonneg_right _ (by linarith) + intro x + have hxφ : ‖g.1 x‖ ≤ ‖φ‖ * ‖x‖ := by + calc + ‖g.1 x‖ = ‖φ (x, 0)‖ := by rw [hg (x, 0), hu, mul_zero, add_zero] + ‖φ (x, 0)‖ ≤ ‖φ‖ * ‖(x, (0 : ℝ))‖ := ContinuousLinearMap.le_opNorm φ (x, 0) + _ = ‖φ‖ * ‖x‖ := by simp only [Prod.norm_def, norm_zero, max_eq_left (norm_nonneg x)] calc - |φ (x, 0)| = ‖φ (x, 0)‖ := rfl - _ ≤ ‖φ‖ * ‖(x , (0 : ℝ))‖ := ContinuousLinearMap.le_opNorm φ (x, 0) - _ = ‖φ‖ * ‖x‖ := by - simp only [Prod.norm_def, norm_zero, max_eq_left (norm_nonneg x)] + ‖h x‖ = (1 / u) * ‖g.1 x‖ := by simp [h, hu0.le] + _ ≤ (1 / u) * (‖φ‖ * ‖x‖) := mul_le_mul_of_nonneg_left hxφ (by positivity) + _ = ((1 / u) * ‖φ‖) * ‖x‖ := by ring have hh : ∃ (C : ℝ), ∀ (x : E), ‖h x‖ ≤ C * ‖x‖ := by use ((1 / u) * ‖φ‖) let h' := (LinearMap.mkContinuousOfExistsBound h hh) have key1 : ∀ a ∈ interior (Epi f s) , h' (a.1 - x) + f x < a.2 := by dsimp [h']; intro a ha - specialize hgu a ha; dsimp [g] at hgu - have uneq : u ≠ 0 := by linarith - rw [← mul_lt_mul_iff_of_pos_left hu0]; field_simp - have eq1 : u * (-φ (a.1 - x, 0) + f x * u) / u = u * f x - φ (a.1 - x, 0) := by - field_simp; ring_nf - have eq2 : φ (x, 0) - φ (a.1, 0) = -φ (a.1 - x, 0) := by - have : φ (x, 0) - φ (a.1, 0) = φ ((x, 0) - (a.1, 0)) := by - simp only [φ.map_sub] - simp only [this, Prod.mk_sub_mk, sub_zero] - have : (-(1 : ℝ)) • (a.1 - x, (0 : ℝ)) = (x - a.1, 0) := by simp - rw [← this, ContinuousLinearMap.map_smulₛₗ]; simp - field_simp [h, g, eq1, eq2, hgu] - rw [div_lt_iff₀ (by positivity)]; rw [← mul_lt_mul_iff_of_pos_left hu0] at hgu - linarith + specialize hgu a ha + have hu' : u * f x - (g.1 a.1 - g.1 x) < u * a.2 := by linarith + have hu_ne : u ≠ 0 := by linarith + have hhx : h (a.1 - x) = -(u⁻¹ * (g.1 a.1 - g.1 x)) := by simp [h] + have hm : u * (-(u⁻¹ * (g.1 a.1 - g.1 x)) + f x) = u * f x - (g.1 a.1 - g.1 x) := by + field_simp [hu_ne]; ring + apply (mul_lt_mul_iff_of_pos_left hu0).1 + rw [hhx] + rw [hm] + exact hu' have key2₀ : ∀ a ∈ (Epi f s), a.1 ∈ interior s → h' (a.1 - x) + f x ≤ a.2 := by intro a ha posa @@ -167,9 +169,9 @@ theorem Banach_SubderivWithinAt.Nonempty (hf : ConvexOn ℝ s f) have hfa : f a.1 = a.2 := by linarith [ha.2] let an : ℕ → E × ℝ := fun n => (a.1, f a.1 + 1 / (n + 1)) have can2 : Tendsto (fun n => (an n).2) atTop (nhds (f a.1)) := by - obtain hh := Tendsto.add - (tendsto_const_nhds) (tendsto_one_div_add_atTop_nhds_zero_nat) - simp only [add_zero] at hh; exact hh + simpa [an] using + (Tendsto.add tendsto_const_nhds tendsto_one_div_add_atTop_nhds_zero_nat : + Tendsto (fun n : ℕ => f a.1 + 1 / (n + 1 : ℝ)) atTop (nhds (f a.1 + 0))) have hxn : ∀ (n : ℕ), h' ((an n).1 - x) + f x ≤ (an n).2 := by intro n have : (1 : ℝ) / (n + 1) > 0 := one_div_pos.mpr (by linarith) @@ -179,9 +181,7 @@ theorem Banach_SubderivWithinAt.Nonempty (hf : ConvexOn ℝ s f) have cleft : Tendsto (fun n => h' ((an n).1 - x) + f x) atTop (nhds (h' (a.1 - x) + f x)) := by exact tendsto_const_nhds - apply le_of_tendsto_of_tendsto' cleft ?_ hxn - simp only [an, hfa] - exact can2 + exact le_of_tendsto_of_tendsto' cleft can2 hxn have key2₁ : ∀ a ∈ (Epi f s), a.1 ∉ interior s → h' (a.1 - x) + f x ≤ a.2 := by intro a ha _ diff --git a/Optlib/Convex/ClosedCone.lean b/Optlib/Convex/ClosedCone.lean index 263f705..8ea0c7a 100644 --- a/Optlib/Convex/ClosedCone.lean +++ b/Optlib/Convex/ClosedCone.lean @@ -90,13 +90,13 @@ lemma cone_eq_finite_union (s : Finset ℕ) (V : ℕ → (EuclideanSpace ℝ (Fi · intro xin let mem_x := conic_Caratheodory s V x xin rcases mem_x with ⟨τ, τsubs, xinτ, idpτ, _⟩ - simp [finite_F, F, idx_set, idx_to_cone] + simp [F, idx_set, idx_to_cone] use τ - · simp [finite_F, F, idx_set, idx_to_cone] + · simp [F, idx_set, idx_to_cone] intro τ τsubs _ xinτ apply cone_subset_of_idx_subset' s τ τsubs V xinτ · intro C Cin - simp [finite_F, F] at Cin; rcases Cin with ⟨τ, τin, Ceq⟩ + simp [F] at Cin; rcases Cin with ⟨τ, τin, Ceq⟩ use τ; constructor · rw [← Ceq] · simp [idx_set] at τin; exact τin.2 @@ -107,9 +107,9 @@ lemma closed_conic_idp (s : Finset ℕ) (V : s → (EuclideanSpace ℝ (Fin n))) simp [cone'] let M : Matrix s (Fin n) ℝ := fun i ↦ V i let f := fun x : s → ℝ ↦ Finset.sum univ (fun i => x i • V i) - let F := Matrix.mulVecLin Mᵀ + let F := (EuclideanSpace.equiv (Fin n) ℝ).symm.toLinearMap.comp (Matrix.mulVecLin Mᵀ) have eq2 : f = F := by - simp [F]; ext x j; simp; apply Finset.sum_apply + ext x j; simp [f, F, M, Matrix.vecMul, dotProduct] show IsClosed (f '' (quadrant' s)) rw [eq2] have iscF : Continuous f := by @@ -130,15 +130,16 @@ lemma closed_conic_idp (s : Finset ℕ) (V : s → (EuclideanSpace ℝ (Fin n))) rw [eq2] at iscF have isclosed : IsClosedMap F := by have injF : Function.Injective F := by - simp only [F] + apply (EuclideanSpace.equiv (Fin n) ℝ).symm.injective.comp show Function.Injective Mᵀ.mulVec - rw [Matrix.mulVec_injective_iff]; simp - apply idp - have closeEmbF: IsClosedEmbedding F := by + rw [Matrix.mulVec_injective_iff] + simpa [M, Matrix.row] using idp.map' (EuclideanSpace.equiv (Fin n) ℝ).toLinearMap + (LinearMap.ker_eq_bot.2 (EuclideanSpace.equiv (Fin n) ℝ).injective) + have closeEmbF : Topology.IsClosedEmbedding F := by apply LinearMap.isClosedEmbedding_of_injective rw [LinearMap.ker_eq_bot] exact injF - apply IsClosedEmbedding.isClosedMap closeEmbF + exact Topology.IsClosedEmbedding.isClosedMap closeEmbF apply isclosed have domclosed : IsClosed (quadrant' s) := by let g := fun i : s ↦ {mu : s → ℝ | 0 ≤ mu i} diff --git a/Optlib/Convex/ConicCaratheodory.lean b/Optlib/Convex/ConicCaratheodory.lean index 9f6bf21..8769fbb 100644 --- a/Optlib/Convex/ConicCaratheodory.lean +++ b/Optlib/Convex/ConicCaratheodory.lean @@ -80,7 +80,8 @@ private lemma mem_conic_erase (s : Finset ℕ) (V : ℕ → (EuclideanSpace ℝ by_cases hi : i ∈ s · linarith [αpos ⟨i, hi⟩] · simp [α, hi, β]; linarith [tin i] - · have hαj₀ : α j₀ = 0 := by field_simp [α, β] + · have hαj₀ : α j₀ = 0 := by + simp [α, β, j₀.prop, ne_of_gt kj₀pos] rw [hαj₀, ← xdecompose]; simp [α] have aux : (Finset.sum s fun x ↦ (t x - β x / k j₀) • V x) = (Finset.sum s fun x ↦ t x • V x) - (1 / k j₀) • (Finset.sum s fun x ↦ β x • V x) := by @@ -126,8 +127,10 @@ theorem conic_Caratheodory (s : Finset ℕ) (V : ℕ → (EuclideanSpace ℝ (Fi simp [τ']; apply subset_trans _ τin.1 apply Finset.erase_subset specialize τcardmin τ'subs xinerase - simp [to_card] at τcardmin - absurd τcardmin; simp + simp [to_card, Finset.card_erase_of_mem y.prop] at τcardmin + have hpos : 0 < τ.card := Finset.card_pos.2 ⟨y, y.prop⟩ + rw [← Nat.succ_pred_eq_of_pos hpos] at τcardmin + exact Nat.not_succ_le_self _ τcardmin · intro σ σsubs; specialize τcardmin σ simp [idx, to_card] at τcardmin apply τcardmin σsubs diff --git a/Optlib/Convex/ConvexFunction.lean b/Optlib/Convex/ConvexFunction.lean index 378e2eb..6d6756e 100644 --- a/Optlib/Convex/ConvexFunction.lean +++ b/Optlib/Convex/ConvexFunction.lean @@ -101,9 +101,7 @@ theorem Convex_first_order_condition {s : Set E} have x1nbhd: ‖x - x'‖ ≤ δ := by rw[h1, h2] have h3: b * ‖x - y‖ ≤ b1 * ‖x - y‖:= by - rw [mul_le_mul_right] - apply min_le_left - exact h₃ + exact mul_le_mul_of_nonneg_right (min_le_left b1 1) (norm_nonneg _) have h4: b1 * ‖x - y‖ = δ := by simp [b1] rw [div_mul_cancel₀] @@ -217,13 +215,13 @@ variable {E : Type*} [NormedAddCommGroup E] [InnerProductSpace ℝ E] [CompleteS variable {f : E → ℝ} {f' : E → E} {s : Set E} {x : E} theorem Convex_first_order_condition' (h : HasGradientAt f (f' x) x) (hf : ConvexOn ℝ s f) - (xs : x ∈ s) : ∀ (y : E), y ∈ s → f x + inner (f' x) (y - x) ≤ f y := by + (xs : x ∈ s) : ∀ (y : E), y ∈ s → f x + inner ℝ (f' x) (y - x) ≤ f y := by show ∀ (y : E), y ∈ s → f x + (toDual ℝ E) (f' x) (y - x) ≤ f y apply Convex_first_order_condition _ hf xs apply h theorem Convex_first_order_condition_inverse' (h : ∀ x ∈ s , HasGradientAt f (f' x) x) - (h₁ : Convex ℝ s) (h₂ : ∀ x : E, x ∈ s → ∀ y : E, y ∈ s → f x + inner (f' x) (y - x) ≤ f y) : + (h₁ : Convex ℝ s) (h₂ : ∀ x : E, x ∈ s → ∀ y : E, y ∈ s → f x + inner ℝ (f' x) (y - x) ≤ f y) : ConvexOn ℝ s f := by apply Convex_first_order_condition_inverse intro x; specialize h x @@ -231,33 +229,33 @@ theorem Convex_first_order_condition_inverse' (h : ∀ x ∈ s , HasGradientAt apply h; apply h₁; apply h₂ theorem Convex_first_order_condition_iff' (h₁ : Convex ℝ s) (h : ∀ x ∈ s, HasGradientAt f (f' x) x) : - ConvexOn ℝ s f ↔ ∀ x ∈ s, ∀ y ∈ s, f x + inner (f' x) (y - x) ≤ f y := + ConvexOn ℝ s f ↔ ∀ x ∈ s, ∀ y ∈ s, f x + inner ℝ (f' x) (y - x) ≤ f y := ⟨fun h₂ x xs ↦ Convex_first_order_condition' (h x xs) h₂ xs, Convex_first_order_condition_inverse' h h₁⟩ theorem Convex_monotone_gradient' (hfun: ConvexOn ℝ s f) (h : ∀ x ∈ s, HasGradientAt f (f' x) x) : - ∀ x ∈ s, ∀ y ∈ s, inner (f' x - f' y) (x - y) ≥ (0 : ℝ) := by + ∀ x ∈ s, ∀ y ∈ s, inner ℝ (f' x - f' y) (x - y) ≥ (0 : ℝ) := by let g := fun x ↦ (toDual ℝ E) (f' x) have h' : ∀ x ∈ s, HasFDerivAt f (g x) x := h - have equiv : ∀ x y : E, inner (f' x - f' y) (x - y) = (g x - g y) (x - y) := by + have equiv : ∀ x y : E, inner ℝ (f' x - f' y) (x - y) = (g x - g y) (x - y) := by intro x y - rw [← InnerProductSpace.toDual_apply] - simp only [map_sub, ContinuousLinearMap.coe_sub', Pi.sub_apply, toDual_apply, g] + rw [← InnerProductSpace.toDual_apply_apply] + simp only [map_sub, ContinuousLinearMap.coe_sub', Pi.sub_apply, InnerProductSpace.toDual_apply_apply, g] intro x hx y hy rw [equiv] exact Convex_monotone_gradient hfun h' x hx y hy theorem monotone_gradient_convex' (h₁ : Convex ℝ s) (hf : ∀ x ∈ s, HasGradientAt f (f' x) x) - (mono: ∀ x ∈ s, ∀ y ∈ s, inner (f' x - f' y) (x - y) ≥ (0 : ℝ)) : ConvexOn ℝ s f := by + (mono: ∀ x ∈ s, ∀ y ∈ s, inner ℝ (f' x - f' y) (x - y) ≥ (0 : ℝ)) : ConvexOn ℝ s f := by apply Convex_first_order_condition_inverse' hf h₁ intro x xs y ys let g := fun t : ℝ ↦ f (x + t • (y - x)) - let g' := fun t : ℝ ↦ (inner (f' (x + t • (y - x))) (y - x) : ℝ) + let g' := fun t : ℝ ↦ (inner ℝ (f' (x + t • (y - x))) (y - x) : ℝ) have h1 : ∀ r ∈ Icc 0 1, HasDerivAt g (g' r) r := by let h := fun r : ℝ ↦ (x + r • (y - x)) have : g = f ∘ h := rfl rw [this]; intro t ht - have : inner (f' (x + t • (y - x))) (y - x) = toDual ℝ E (f' (x + t • (y - x))) (y - x) := rfl + have : inner ℝ (f' (x + t • (y - x))) (y - x) = toDual ℝ E (f' (x + t • (y - x))) (y - x) := rfl simp [g']; rw [this]; apply HasFDerivAt.comp_hasDerivAt · apply hasGradientAt_iff_hasFDerivAt.mp have : x + t • (y - x) ∈ s := by @@ -270,13 +268,13 @@ theorem monotone_gradient_convex' (h₁ : Convex ℝ s) (hf : ∀ x ∈ s, HasGr rw [one_smul] at this; exact HasDerivAt.const_add x this have e1 : f x = g 0 := by simp [g] have e2 : f y = g 1 := by simp [g] - have e3 : inner (f' x) (y - x) = g' 0 := by simp [g'] + have e3 : inner ℝ (f' x) (y - x) = g' 0 := by simp [g'] rw [e1, e2, e3] have mono' : ∀ t ∈ Ioo 0 1, g' t ≥ g' 0 := by intro t ht; simp [g']; rw [← sub_nonneg, ← inner_sub_left] rcases ht with ⟨ht1, ht2⟩ - have hh: inner (f' (x + t • (y - x)) - f' x) (x + t • (y - x) - x) ≥ (0 : ℝ) := by + have hh: inner ℝ (f' (x + t • (y - x)) - f' x) (x + t • (y - x) - x) ≥ (0 : ℝ) := by apply mono (x + t • (y - x)) _ x xs have e4 : x + t • (y - x) = (1 - t) • x + t • y := by rw [smul_sub, add_sub, sub_smul, one_smul, add_sub_right_comm] @@ -300,7 +298,7 @@ theorem monotone_gradient_convex' (h₁ : Convex ℝ s) (hf : ∀ x ∈ s, HasGr simp; constructor; linarith; linarith theorem monotone_gradient_iff_convex' (h₁ : Convex ℝ s) (hf : ∀ x ∈ s, HasGradientAt f (f' x) x): - ConvexOn ℝ s f ↔ ∀ x ∈ s, ∀ y ∈ s, inner (f' x - f' y) (x - y) ≥ (0 : ℝ) := + ConvexOn ℝ s f ↔ ∀ x ∈ s, ∀ y ∈ s, inner ℝ (f' x - f' y) (x - y) ≥ (0 : ℝ) := ⟨fun h ↦ Convex_monotone_gradient' h hf, fun h ↦ monotone_gradient_convex' h₁ hf h⟩ theorem monotone_gradient_convex {f' : E → (E →L[ℝ] ℝ)} (h₁ : Convex ℝ s) @@ -310,10 +308,10 @@ theorem monotone_gradient_convex {f' : E → (E →L[ℝ] ℝ)} (h₁ : Convex have h' : ∀ x ∈ s, HasGradientAt f (g x) x := by intro x' hx' exact HasFDerivAt.hasGradientAt (hf x' hx') - have equiv : ∀ x y : E, inner (g x - g y) (x - y) = (f' x - f' y) (x - y) := by + have equiv : ∀ x y : E, inner ℝ (g x - g y) (x - y) = (f' x - f' y) (x - y) := by intro x y - rw [← InnerProductSpace.toDual_apply]; simp [g] - have mono' : ∀ x ∈ s, ∀ y ∈ s, inner (g x - g y) (x - y) ≥ (0 : ℝ) := by + rw [← InnerProductSpace.toDual_apply_apply]; simp [g] + have mono' : ∀ x ∈ s, ∀ y ∈ s, inner ℝ (g x - g y) (x - y) ≥ (0 : ℝ) := by intro x hx y hy specialize mono x hx y hy rw [equiv]; exact mono @@ -339,14 +337,14 @@ variable {f : E → ℝ} {f' : E → E} {s : Set E} theorem monotone_gradient_strict_convex (hs : Convex ℝ s) (hf : ∀ x ∈ s, HasGradientAt f (f' x) x) - (mono: ∀ x ∈ s, ∀ y ∈ s, x ≠ y → inner (f' x - f' y) (x - y) > (0 : ℝ)) : + (mono: ∀ x ∈ s, ∀ y ∈ s, x ≠ y → inner ℝ (f' x - f' y) (x - y) > (0 : ℝ)) : StrictConvexOn ℝ s f := by rw [StrictConvexOn]; use hs intro x xin y yin xney a b apos bpos absum1 by_contra h₀; push_neg at h₀ have anneg : 0 ≤ a := by linarith have bnneg : 0 ≤ b := by linarith - have mono' : ∀ x ∈ s, ∀ y ∈ s, inner (f' x - f' y) (x - y) ≥ (0 : ℝ) := by + have mono' : ∀ x ∈ s, ∀ y ∈ s, inner ℝ (f' x - f' y) (x - y) ≥ (0 : ℝ) := by intro x xin y yin by_cases h : x = y · rw [h]; simp @@ -363,14 +361,14 @@ theorem monotone_gradient_strict_convex (hs : Convex ℝ s) have : a = 1 - b := by linarith rw [this, sub_smul, add_comm_sub, ← smul_sub]; simp apply Convex.add_smul_sub_mem hs xin yin; simp; use bnneg; linarith - have eq1 : ∃ c : ℝ, c ∈ Set.Ioo 0 1 ∧ inner (f' (x + c • (z - x))) (z - x) = f z - f x := by + have eq1 : ∃ c : ℝ, c ∈ Set.Ioo 0 1 ∧ inner ℝ (f' (x + c • (z - x))) (z - x) = f z - f x := by apply lagrange hs hf x xin z zin - have eq2 : ∃ c : ℝ, c ∈ Set.Ioo 0 1 ∧ inner (f' (z + c • (y - z))) (y - z) = f y - f z := by + have eq2 : ∃ c : ℝ, c ∈ Set.Ioo 0 1 ∧ inner ℝ (f' (z + c • (y - z))) (y - z) = f y - f z := by apply lagrange hs hf z zin y yin rcases eq1 with ⟨c, cin, e1⟩ rcases eq2 with ⟨d, din, e2⟩ - have eq3 : b * inner (f' (z + d • (y - z))) (y - z) - - a * inner (f' (x + c • (z - x))) (z - x) = 0 := by + have eq3 : b * inner ℝ (f' (z + d • (y - z))) (y - z) - + a * inner ℝ (f' (x + c • (z - x))) (z - x) = 0 := by rw [e1, e2]; simp [z]; ring_nf; rw [add_comm, ← add_assoc] simp at eq2; rw [← eq2]; nth_rw 1 [← mul_one (f (a • x + b • y))]; rw [← absum1]; ring_nf rw [← inner_smul_right, ← inner_smul_right] at eq3 @@ -379,14 +377,17 @@ theorem monotone_gradient_strict_convex (hs : Convex ℝ s) have this2 : a • (z - x) = a • b • (y - x) := by simp [z]; nth_rw 2 [← one_smul ℝ x]; rw [← absum1, add_smul]; simp; rw [← smul_sub, smul_comm] rw [this1, this2, ← inner_sub_left, inner_smul_right, inner_smul_right, ← mul_assoc] at eq3 - have eq0 : inner (f' (z + d • (y - z)) - f' (x + c • (z - x))) (y - x) = (0 : ℝ) := by + have eq0 : inner ℝ (f' (z + d • (y - z)) - f' (x + c • (z - x))) (y - x) = (0 : ℝ) := by contrapose! eq3 rw [mul_ne_zero_iff] constructor · rw [mul_ne_zero_iff]; constructor <;> linarith · exact eq3 have zeq : z = x + b • (y - x) := by - nth_rw 1 [← one_smul ℝ x]; rw [← absum1, add_smul, smul_sub]; simp + show a • x + b • y = x + b • (y - x) + have hab : a = 1 - b := by linarith + rw [hab, sub_smul, one_smul, smul_sub] + abel_nf let u : E := z + d • (y - z) let v : E := x + c • (z - x) have ueq : u = x + (b + d) • (y - x) - d • b • (y - x) := by @@ -400,8 +401,8 @@ theorem monotone_gradient_strict_convex (hs : Convex ℝ s) have usubv : u - v = (b + d - d * b - c * b) • (y - x) := by rw [ueq, veq, ← smul_assoc, ← smul_assoc, ← sub_sub]; simp rw [← add_sub, ← sub_smul (b + d) (d * b)]; simp; rw [← sub_smul] - have eeq0 : inner (f' u - f' v) (u - v) = (0 : ℝ) := by - show inner (f' (z + d • (y - z)) - f' (x + c • (z - x))) (u - v) = (0 : ℝ) + have eeq0 : inner ℝ (f' u - f' v) (u - v) = (0 : ℝ) := by + show inner ℝ (f' (z + d • (y - z)) - f' (x + c • (z - x))) (u - v) = (0 : ℝ) rw [usubv, inner_smul_right, eq0]; simp have coefne0 : b + d - d * b - c * b > 0 := by nth_rw 1 [← mul_one d]; rw [← absum1]; simp; ring_nf @@ -412,7 +413,7 @@ theorem monotone_gradient_strict_convex (hs : Convex ℝ s) _ < b + d * a := by have : 0 < d * a := by apply mul_pos dpos apos linarith - have neq0 : inner (f' u - f' v) (u - v) > (0 : ℝ) := by + have neq0 : inner ℝ (f' u - f' v) (u - v) > (0 : ℝ) := by have uin : u ∈ s := by show z + d • (y - z) ∈ s apply Convex.add_smul_sub_mem hs zin yin; simp; simp at din @@ -433,24 +434,24 @@ theorem monotone_gradient_strict_convex (hs : Convex ℝ s) theorem strict_convex_monotone_gradient (hf : ∀ x ∈ s, HasGradientAt f (f' x) x) (h₁ : StrictConvexOn ℝ s f ) : - ∀ x ∈ s, ∀ y ∈ s, x ≠ y → inner (f' x - f' y) (x - y) > (0 : ℝ) := by + ∀ x ∈ s, ∀ y ∈ s, x ≠ y → inner ℝ (f' x - f' y) (x - y) > (0 : ℝ) := by intro x xin y yin xney have convf : ConvexOn ℝ s f := by apply StrictConvexOn.convexOn h₁ rw [StrictConvexOn] at h₁ rcases h₁ with ⟨hs, fsconv⟩ - have : inner (f' x - f' y) (x - y) ≥ (0 : ℝ) := by + have : inner ℝ (f' x - f' y) (x - y) ≥ (0 : ℝ) := by apply Convex_monotone_gradient' convf hf x xin y yin by_contra h0; push_neg at h0 - have eq : inner (f' x - f' y) (x - y) = (0 : ℝ) := by linarith - have eq1 : f x + inner (f' x) (y - x) ≤ f y := by + have eq : inner ℝ (f' x - f' y) (x - y) = (0 : ℝ) := by linarith + have eq1 : f x + inner ℝ (f' x) (y - x) ≤ f y := by apply Convex_first_order_condition' (hf x xin) convf xin y yin - have eq2 : f y + inner (f' y) (x - y) ≤ f x := by + have eq2 : f y + inner ℝ (f' y) (x - y) ≤ f x := by apply Convex_first_order_condition' (hf y yin) convf yin x xin - have eq2' : f y ≤ f x + inner (f' x) (y - x) := by - rw [← add_zero (inner (f' x) (y - x)), ← eq, inner_sub_left, add_sub, ← inner_add_right] + have eq2' : f y ≤ f x + inner ℝ (f' x) (y - x) := by + rw [← add_zero (inner ℝ (f' x) (y - x)), ← eq, inner_sub_left, add_sub, ← inner_add_right] simp; apply eq2 - have eq3 : f y - f x = inner (f' x) (y - x) := by linarith - have extc : ∃ c : ℝ, c ∈ Set.Ioo 0 1 ∧ inner (f' (x + c • (y - x))) (y - x) = f y - f x := by + have eq3 : f y - f x = inner ℝ (f' x) (y - x) := by linarith + have extc : ∃ c : ℝ, c ∈ Set.Ioo 0 1 ∧ inner ℝ (f' (x + c • (y - x))) (y - x) = f y - f x := by apply lagrange hs hf x xin y yin rcases extc with ⟨c, cin, e1⟩ let z : E := x + c • (y - x) @@ -458,20 +459,24 @@ theorem strict_convex_monotone_gradient (hf : ∀ x ∈ s, HasGradientAt f (f' x apply Convex.add_smul_sub_mem hs xin yin; simp; simp at cin; rcases cin with ⟨cpos, cl1⟩ constructor <;> linarith simp at cin; rcases cin with ⟨cpos, cl1⟩ - have eq0 : inner (f' z - f' x) (z - x) = (0 : ℝ) := by - simp [z]; rw [inner_smul_right, inner_sub_left, ← eq3, e1]; simp - have eq4 : f x + inner (f' x) (z - x) ≤ f z := by + have hz : z - x = c • (y - x) := by simp [z] + have e1' : inner ℝ (f' z) (y - x) = f y - f x := by simpa [z] using e1 + have eq0 : inner ℝ (f' z - f' x) (z - x) = (0 : ℝ) := by + rw [hz, inner_smul_right, inner_sub_left, e1', ← eq3]; ring + have eq4 : f x + inner ℝ (f' x) (z - x) ≤ f z := by apply Convex_first_order_condition' (hf x xin) convf xin z zin - have eq5 : f z + inner (f' z) (x - z) ≤ f x := by + have eq5 : f z + inner ℝ (f' z) (x - z) ≤ f x := by apply Convex_first_order_condition' (hf z zin) convf zin x xin - have eq5' : f z ≤ f x + inner (f' x) (z - x) := by - rw [← add_zero (inner (f' x) (z - x)), ← eq0, inner_sub_left] - rw [add_sub, add_comm (inner (f' x) (z - x))] + have eq5' : f z ≤ f x + inner ℝ (f' x) (z - x) := by + rw [← add_zero (inner ℝ (f' x) (z - x)), ← eq0, inner_sub_left] + rw [add_sub, add_comm (inner ℝ (f' x) (z - x))] rw [← add_sub, ← inner_sub_right, sub_self, inner_zero_right, add_zero] rw [← sub_neg_eq_add, ← inner_neg_right, neg_sub]; linarith - have eq6 : f z = inner (f' x) (z - x) + f x := by linarith + have eq6 : f z = inner ℝ (f' x) (z - x) + f x := by linarith have f1 : f z = (1 - c) • f x + c • f y := by - rw [eq6]; simp [z]; rw [inner_smul_right, ← eq3]; ring_nf + rw [eq6, hz, inner_smul_right, ← eq3] + simp [smul_eq_mul] + ring have f2 : f z < (1 - c) • f x + c • f y := by simp let d : ℝ := 1 - c @@ -485,7 +490,7 @@ theorem strict_convex_monotone_gradient (hf : ∀ x ∈ s, HasGradientAt f (f' x theorem strict_convex_iff_monotone_gradient (hs: Convex ℝ s) (h : ∀ x ∈ s, HasGradientAt f (f' x) x) : - (∀ x ∈ s, ∀ y ∈ s, x ≠ y → inner (f' x - f' y) (x - y) > (0 : ℝ)) + (∀ x ∈ s, ∀ y ∈ s, x ≠ y → inner ℝ (f' x - f' y) (x - y) > (0 : ℝ)) ↔ StrictConvexOn ℝ s f := by constructor exact monotone_gradient_strict_convex hs h diff --git a/Optlib/Convex/Farkas.lean b/Optlib/Convex/Farkas.lean index 38db9b1..73844d7 100644 --- a/Optlib/Convex/Farkas.lean +++ b/Optlib/Convex/Farkas.lean @@ -5,10 +5,8 @@ Authors: Shengyang Xu, Chenyi Li -/ import Mathlib.Analysis.Convex.Cone.Basic import Mathlib.Analysis.Calculus.LocalExtr.Basic -import Mathlib.Analysis.NormedSpace.HahnBanach.Separation +import Mathlib.Analysis.LocallyConvex.Separation import Mathlib.Analysis.InnerProductSpace.PiL2 -import Mathlib.Data.Matrix.Rank -import Mathlib.LinearAlgebra.FiniteDimensional import Optlib.Differential.Calculation import Optlib.Convex.ClosedCone @@ -55,9 +53,10 @@ lemma polyhedra_iff_cone {σ : Finset ℕ} : ∀ (b : ℕ → EuclideanSpace ℝ simp [ht]; specialize cpos i ht; exact cpos; simp [ht] rw [h] let f : ℕ → EuclideanSpace ℝ (Fin n) := fun i ↦ (c1 i) • (b i) - have htt : ∑ x in σ.attach, f x = Finset.sum (attach σ) fun x => (c1 x • b x) := by simp [f] + have htt : Finset.sum σ.attach (fun x => f x) = Finset.sum (attach σ) (fun x => c1 x • b x) := by + simp [f] have h1 : ∀ i : σ, c1 i • b i = c i • b i := by intro i; simp [c1] - have ht : ∑ x in σ.attach, f x = Finset.sum (attach σ) fun x => (c x • b x) := by + have ht : Finset.sum σ.attach (fun x => f x) = Finset.sum (attach σ) (fun x => c x • b x) := by rw [← htt]; apply Finset.sum_congr; simp intro i _; simp [f, c1] nth_rw 1 [Finset.sum_attach] at htt @@ -67,7 +66,8 @@ lemma polyhedra_iff_cone {σ : Finset ℕ} : ∀ (b : ℕ → EuclideanSpace ℝ use c1; constructor · intro i _; exact cpos i let f : ℕ → EuclideanSpace ℝ (Fin n) := fun i ↦ (c i) • (b i) - have : ∑ x in σ.attach, f x = Finset.sum (attach σ) fun x => (c x • b x) := by simp [f] + have : Finset.sum σ.attach (fun x => f x) = Finset.sum (attach σ) (fun x => c x • b x) := by + simp [f] rw [← h]; simp [c1]; rw [← this, Finset.sum_attach] private lemma leq_tendsto_zero {a x : ℝ} (ha : a < 0) (h : ∀ t > 0, t * x > a) : 0 ≤ x := by @@ -109,7 +109,8 @@ private lemma shift_sum (τ : Finset ℕ) (m : ℕ) (f : ℕ → EuclideanSpace rw [Finset.sum_attach, Finset.sum_attach, Finset.sum_image aux, eq]; simp private lemma shift_not_in (τ : Finset ℕ) (m : ℕ) (hm : ∀ i : τ, i < m): m ∉ τ := by - contrapose hm; simp; simp at hm; use m + intro hmem + exact Nat.lt_irrefl m (hm ⟨m, hmem⟩) private lemma mem_lt_m {m i : ℕ} {σ τ : Finset ℕ} (he : (τ ∪ σ).Nonempty) (hm : m = (Finset.max' (τ ∪ σ) he).succ) : (i ∈ (τ ∪ σ)) → (i < m) := by @@ -122,52 +123,58 @@ private lemma exist_of_mem_shift {x m : ℕ} {τ : Finset ℕ}: private lemma s_inter_t1_empty {m : ℕ} {σ τ : Finset ℕ} (he : (τ ∪ σ).Nonempty) (hm : m = (Finset.max' (τ ∪ σ) he).succ) : σ ∩ (Finset.image (fun x => x + m) τ) = ∅ := by - by_contra neq; push_neg at neq; rw [← Finset.nonempty_iff_ne_empty] at neq - rcases neq with ⟨x, xin⟩ - absurd Finset.mem_of_mem_inter_left xin - apply shift_not_in; intro i - calc - i.1 < m := by apply mem_lt_m he hm; simp [i.2] - m ≤ x := by - rcases exist_of_mem_shift (Finset.mem_of_mem_inter_right xin) with ⟨a, _, aeq⟩; linarith + rw [Finset.eq_empty_iff_forall_notMem] + intro x xin + have hx : x ∉ σ := by + apply shift_not_in; intro i + calc + i.1 < m := by apply mem_lt_m he hm; simp [i.2] + m ≤ x := by + rcases exist_of_mem_shift (Finset.mem_of_mem_inter_right xin) with ⟨a, _, aeq⟩; linarith + exact hx (Finset.mem_of_mem_inter_left xin) private lemma s_inter_t2_empty {m : ℕ} {σ τ : Finset ℕ} (he : (τ ∪ σ).Nonempty) (hm : m = (Finset.max' (τ ∪ σ) he).succ) : σ ∩ (Finset.image (fun x => x + 2 * m) τ) = ∅ := by - by_contra neq; push_neg at neq; rw [← Finset.nonempty_iff_ne_empty] at neq - rcases neq with ⟨x, xin⟩ - absurd Finset.mem_of_mem_inter_left xin - apply shift_not_in; intro i - calc - i.1 < m := by apply mem_lt_m he hm; simp [i.2] - m ≤ 2 * m := by linarith - 2 * m ≤ x := by - rcases exist_of_mem_shift (Finset.mem_of_mem_inter_right xin) with ⟨a, _, aeq⟩; linarith + rw [Finset.eq_empty_iff_forall_notMem] + intro x xin + have hx : x ∉ σ := by + apply shift_not_in; intro i + calc + i.1 < m := by apply mem_lt_m he hm; simp [i.2] + m ≤ 2 * m := by linarith + 2 * m ≤ x := by + rcases exist_of_mem_shift (Finset.mem_of_mem_inter_right xin) with ⟨a, _, aeq⟩; linarith + exact hx (Finset.mem_of_mem_inter_left xin) private lemma t1_inter_t2_empty {m : ℕ} {σ τ : Finset ℕ} (he : (τ ∪ σ).Nonempty) (hm : m = (Finset.max' (τ ∪ σ) he).succ) : (Finset.image (fun x => x + m) τ) ∩ (Finset.image (fun x => x + 2 * m) τ) = ∅ := by - by_contra neq; push_neg at neq; rw [← Finset.nonempty_iff_ne_empty] at neq - rcases neq with ⟨x, xin⟩ - absurd Finset.mem_of_mem_inter_left xin - apply shift_not_in; intro i - rcases exist_of_mem_shift (Finset.mem_of_mem_inter_right xin) with ⟨a, aeq⟩ - rcases exist_of_mem_shift i.2 with ⟨b, beq⟩ - calc - i.1 < 2 * m := by - rw [beq, two_mul]; apply Nat.add_lt_add_right; apply mem_lt_m he hm; simp - 2 * m ≤ x := by rw [aeq]; simp + rw [Finset.eq_empty_iff_forall_notMem] + intro x xin + have hx : x ∉ Finset.image (fun x => x + m) τ := by + apply shift_not_in; intro i + rcases exist_of_mem_shift (Finset.mem_of_mem_inter_right xin) with ⟨a, aeq⟩ + rcases exist_of_mem_shift i.2 with ⟨b, beq⟩ + calc + i.1 < 2 * m := by + rw [beq, two_mul]; apply Nat.add_lt_add_right; apply mem_lt_m he hm; simp + 2 * m ≤ x := by rw [aeq]; simp + exact hx (Finset.mem_of_mem_inter_left xin) lemma general_polyhedra_is_polyhedra_empty (τ σ : Finset ℕ) (he : ¬(τ ∪ σ).Nonempty) : ∀ (a : ℕ → EuclideanSpace ℝ (Fin n)), ∀ (b : ℕ → EuclideanSpace ℝ (Fin n)), ∃ μ c, {z | ∃ (lam : τ → ℝ), ∃ (mu : σ → ℝ), (∀ i, 0 ≤ mu i) ∧ z = Finset.sum univ (fun i ↦ lam i • a i) + Finset.sum univ (fun i ↦ mu i • b i)} = cone μ c := by - simp at he; rw [Finset.union_eq_empty] at he - intro a b; simp [he] - use ∅; use (fun _ => 0) - simp [cone, quadrant]; ext x; simp; constructor - · intro x0; simp [x0]; use (fun _ => 0); simp - · intro cond; simp [cond.2] + simp at he + rcases he with ⟨rfl, rfl⟩ + intro a b + refine ⟨∅, fun _ => 0, ?_⟩ + ext x + simp [cone, quadrant] + constructor + · intro hx; subst hx; refine ⟨⟨fun _ => 0, by simp⟩, rfl⟩ + · intro hx; simpa using hx.2.symm lemma general_polyhedra_is_polyhedra_ne (τ σ : Finset ℕ) (he : (τ ∪ σ).Nonempty) : ∀ (a : ℕ → EuclideanSpace ℝ (Fin n)), ∀ (b : ℕ → EuclideanSpace ℝ (Fin n)), @@ -180,11 +187,11 @@ lemma general_polyhedra_is_polyhedra_ne (τ σ : Finset ℕ) (he : (τ ∪ σ).N let τ2 := Finset.image (fun x => x + 2 * m) τ let μ := σ ∪ τ1 ∪ τ2 have mt1emp : σ ∩ τ1 = ∅ := by - simp only [τ1]; apply s_inter_t1_empty he; simp + simpa [τ1, m] using s_inter_t1_empty (σ := σ) (τ := τ) he (m := m) rfl have mt2emp : σ ∩ τ2 = ∅ := by - simp only [τ2]; apply s_inter_t2_empty he; simp + simpa [τ2, m] using s_inter_t2_empty (σ := σ) (τ := τ) he (m := m) rfl have t1t2emp : τ1 ∩ τ2 = ∅ := by - simp only [τ1, τ2]; apply t1_inter_t2_empty he; simp + simpa [τ1, τ2, m] using t1_inter_t2_empty (σ := σ) (τ := τ) he (m := m) rfl have disj_st : Disjoint σ (τ1 ∪ τ2) := by rw [Finset.disjoint_iff_inter_eq_empty, Finset.inter_union_distrib_left]; simp [mt1emp, mt2emp] have disj_tt : Disjoint τ1 τ2 := by @@ -219,27 +226,30 @@ lemma general_polyhedra_is_polyhedra_ne (τ σ : Finset ℕ) (he : (τ ∪ σ).N simp [hs, ht1, ht2] use w; use wnneg rw [xeq, tau_decpn] - have eq1 : ∑ x : { x // x ∈ σ }, mu x • b x = ∑ x in σ, (fun y => w y • c y) x := by + have eq1 : (∑ x : { x // x ∈ σ }, mu x • b x) = Finset.sum σ (fun x => w x • c x) := by nth_rw 2 [← Finset.sum_attach]; simp; congr ext x j; simp [w, c, cσ] - have eq2 : ∑ x : τ, (fun y => lamp y • cτ1 y) x = ∑ x in τ1, (fun y => w y • c y) x := by + have eq2 : (∑ x : τ, (fun y => lamp y • cτ1 y) x) = Finset.sum τ1 (fun x => w x • c x) := by rw [shift_sum τ m (fun y => lamp y • cτ1 y)] nth_rw 2 [← Finset.sum_attach]; simp; congr ext x j have hns : x.1 ∉ σ := by - contrapose mt1emp; simp at mt1emp; push_neg; rw [← Finset.nonempty_iff_ne_empty] - use x; simp [τ1, mt1emp, x.2] + intro hs + have : x.1 ∈ σ ∩ τ1 := by simp [hs] + simp [mt1emp] at this simp [w, c, hns] - have eq3 : ∑ x : τ, (fun y => lamn y • cτ2 y) x = ∑ x in τ2, (fun y => w y • c y) x := by + have eq3 : (∑ x : τ, (fun y => lamn y • cτ2 y) x) = Finset.sum τ2 (fun x => w x • c x) := by rw [shift_sum τ (2 * m) (fun y => lamn y • cτ2 y)] nth_rw 2 [← Finset.sum_attach]; simp; congr ext x j have hns : x.1 ∉ σ := by - contrapose mt2emp; simp at mt2emp; push_neg; rw [← Finset.nonempty_iff_ne_empty] - use x; simp [τ2, mt2emp, x.2] + intro hs + have : x.1 ∈ σ ∩ τ2 := by simp [hs] + simp [mt2emp] at this have hnt : x.1 ∉ τ1 := by - contrapose t1t2emp; simp at t1t2emp; push_neg; rw [← Finset.nonempty_iff_ne_empty] - use x; simp [τ2, t1t2emp, x.2] + intro hs + have : x.1 ∈ τ1 ∩ τ2 := by simp [hs] + simp [t1t2emp] at this simp [w, c, hns, hnt] rw [eq1, eq2, eq3]; simp [μ] rw [Finset.sum_union disj_st, Finset.sum_union disj_tt, add_comm] @@ -250,32 +260,35 @@ lemma general_polyhedra_is_polyhedra_ne (τ σ : Finset ℕ) (he : (τ ∪ σ).N let lamn : ℕ → ℝ := fun i => if i ∈ τ then w (i + 2 * m) else 0 let lam : τ → ℝ := fun i => lamp i.1 - lamn i.1 let mu : ℕ → ℝ := fun i => if i ∈ σ then w i else 0 - have eq1 : ∑ x : { x // x ∈ σ }, mu x • b x = ∑ x in σ, (fun y => w y • c y) x := by + have eq1 : (∑ x : { x // x ∈ σ }, mu x • b x) = Finset.sum σ (fun x => w x • c x) := by nth_rw 2 [← Finset.sum_attach]; simp; congr ext x j; simp [mu, c, cσ] - have eq2 : ∑ x : τ, (fun y => lamp y • cτ1 y) x = ∑ x in τ1, (fun y => w y • c y) x := by + have eq2 : (∑ x : τ, (fun y => lamp y • cτ1 y) x) = Finset.sum τ1 (fun x => w x • c x) := by rw [shift_sum τ m (fun y => lamp y • cτ1 y)] nth_rw 2 [← Finset.sum_attach]; simp; congr ext x j have hns : x.1 ∉ σ := by - contrapose mt1emp; simp at mt1emp; push_neg; rw [← Finset.nonempty_iff_ne_empty] - use x; simp [τ1, mt1emp, x.2] + intro hs + have : x.1 ∈ σ ∩ τ1 := by simp [hs] + simp [mt1emp] at this rcases exist_of_mem_shift x.2 with ⟨a, eq⟩ have hin : x.1 - m ∈ τ := by rw [eq]; simp - simp [mu, lamp, c, hns, hin]; rw [eq]; simp - have eq3 : ∑ x : τ, (fun y => lamn y • cτ2 y) x = ∑ x in τ2, (fun y => w y • c y) x := by + simp [lamp, c, hns, hin]; rw [eq]; simp + have eq3 : (∑ x : τ, (fun y => lamn y • cτ2 y) x) = Finset.sum τ2 (fun x => w x • c x) := by rw [shift_sum τ (2 * m) (fun y => lamn y • cτ2 y)] nth_rw 2 [← Finset.sum_attach]; simp; congr ext x j have hns : x.1 ∉ σ := by - contrapose mt2emp; simp at mt2emp; push_neg; rw [← Finset.nonempty_iff_ne_empty] - use x; simp [τ2, mt2emp, x.2] + intro hs + have : x.1 ∈ σ ∩ τ2 := by simp [hs] + simp [mt2emp] at this have hnt : x.1 ∉ τ1 := by - contrapose t1t2emp; simp at t1t2emp; push_neg; rw [← Finset.nonempty_iff_ne_empty] - use x; simp [τ2, t1t2emp, x.2] + intro hs + have : x.1 ∈ τ1 ∩ τ2 := by simp [hs] + simp [t1t2emp] at this rcases exist_of_mem_shift x.2 with ⟨a, eq⟩ have hin : x.1 - 2 * m ∈ τ := by rw [eq]; simp - simp [mu, lamn, c, hns, hnt, hin]; rw [eq]; simp + simp [lamn, c, hns, hnt, hin]; rw [eq]; simp rw [← eq1, ← eq2, ← eq3] at xeq; simp at xeq simp; use lam; use (fun i => mu i); constructor · intro a ain; simp [mu, ain]; linarith [wnneg a] @@ -301,24 +314,24 @@ lemma general_polyhedra_is_closed : IsClosed {z | ∃ (lam : τ → ℝ), ∃ (m theorem Farkas : (∃ (lam : τ → ℝ), ∃ (mu : σ → ℝ), (∀ i, 0 ≤ mu i) ∧ c = Finset.sum univ (fun i ↦ lam i • a i) + Finset.sum univ (fun i ↦ mu i • b i)) ↔ - ¬ (∃ (z : EuclideanSpace ℝ (Fin n)), (∀ i ∈ τ, inner (a i) z = (0 : ℝ)) - ∧ (∀ i ∈ σ, inner (b i) z ≥ (0 : ℝ)) ∧ (inner c z < (0 : ℝ))) := by + ¬ (∃ (z : EuclideanSpace ℝ (Fin n)), (∀ i ∈ τ, inner ℝ (a i) z = (0 : ℝ)) + ∧ (∀ i ∈ σ, inner ℝ (b i) z ≥ (0 : ℝ)) ∧ (inner ℝ c z < (0 : ℝ))) := by constructor intro h; rcases h with ⟨lam, mu, ⟨h1, h2⟩⟩ by_contra h3 rcases h3 with ⟨z, ⟨h31, ⟨h32, h33⟩⟩⟩ - have : inner c z ≥ (0 : ℝ) := by + have : inner ℝ c z ≥ (0 : ℝ) := by calc - _ = inner (Finset.sum univ (fun i ↦ lam i • a i)) z - + inner (Finset.sum univ (fun i ↦ mu i • b i)) z := by rw [h2]; simp [inner_add_left] - _ = Finset.sum univ (fun i ↦ inner (lam i • a i) z) - + Finset.sum univ (fun i ↦ inner (mu i • b i) z) := by + _ = inner ℝ (Finset.sum univ (fun i ↦ lam i • a i)) z + + inner ℝ (Finset.sum univ (fun i ↦ mu i • b i)) z := by rw [h2]; simp [inner_add_left] + _ = Finset.sum univ (fun i ↦ inner ℝ (lam i • a i) z) + + Finset.sum univ (fun i ↦ inner ℝ (mu i • b i) z) := by rw [sum_inner, sum_inner] - _ = Finset.sum univ (fun i ↦ lam i * inner (a i) z) - + Finset.sum univ (fun i ↦ mu i * inner (b i) z) := by + _ = Finset.sum univ (fun i ↦ lam i * inner ℝ (a i) z) + + Finset.sum univ (fun i ↦ mu i * inner ℝ (b i) z) := by congr; ext i; rw [inner_smul_left]; simp ext i; rw [inner_smul_left]; simp - _ = Finset.sum univ (fun i ↦ mu i * inner (b i) z) := by simp [h31] + _ = Finset.sum univ (fun i ↦ mu i * inner ℝ (b i) z) := by simp [h31] _ ≥ 0 := by apply Finset.sum_nonneg; intro i _ obtain h1i := h1 i; obtain h2i := h32 i i.2; positivity @@ -362,19 +375,22 @@ theorem Farkas : have cn : c ∉ S := by by_contra cn; simp only [Set.mem_setOf_eq, S] at cn; rcases cn with ⟨lam, mu, ⟨cn1, cn2⟩⟩ - apply h1; use lam; use mu + exact h1 ⟨lam, mu, cn1, cn2⟩ obtain sep := geometric_hahn_banach_point_closed scon sc cn rcases sep with ⟨f, u, ⟨sep1, sep2⟩⟩ - have feq : ∃ d : EuclideanSpace ℝ (Fin n), ∀ x, f x = inner d x := by - use ((toDual ℝ (EuclideanSpace ℝ (Fin n))).symm f); simp + have feq : ∃ d : EuclideanSpace ℝ (Fin n), ∀ x, f x = inner ℝ d x := by + refine ⟨(toDual ℝ (EuclideanSpace ℝ (Fin n))).symm f, ?_⟩ + intro x + symm + exact toDual_symm_apply (𝕜 := ℝ) (E := EuclideanSpace ℝ (Fin n)) (x := x) (y := f) rcases feq with ⟨d, feq⟩ have uleq : u < 0 := by have : 0 ∈ S := by simp [S]; use 0; use 0; simp specialize sep2 0 this; rw [feq 0, inner_zero_right] at sep2; exact sep2 - have hc : inner c d < (0 : ℝ) := by + have hc : inner ℝ c d < (0 : ℝ) := by rw [real_inner_comm, ← feq c] apply lt_trans sep1 uleq - have hb : ∀ i : σ, inner (b i) d ≥ (0 : ℝ) := by + have hb : ∀ i : σ, inner ℝ (b i) d ≥ (0 : ℝ) := by intro i have : ∀ t > (0 : ℝ), (t • b i) ∈ S := by intro t ht @@ -385,13 +401,13 @@ theorem Farkas : intro t ht specialize sep2 (t • b i) (this t ht); rw [feq, inner_smul_right, real_inner_comm] at sep2; exact sep2 - have ha : ∀ i : τ, inner (a i) d = (0 : ℝ) := by + have ha : ∀ i : τ, inner ℝ (a i) d = (0 : ℝ) := by intro i have : ∀ t : ℝ, (t • a i) ∈ S := by intro t simp only [S]; use (fun j ↦ if j = i then t else 0); use 0; constructor; intro _; simp; simp only [Pi.zero_apply, zero_smul, sum_const_zero, - ite_smul, zero_add]; simp + ite_smul]; simp rw [le_antisymm_iff]; constructor · apply geq_tendsto_zero uleq intro t _ diff --git a/Optlib/Convex/FiniteDimensionalConvexFunctionsLocallyLipschitz.lean b/Optlib/Convex/FiniteDimensionalConvexFunctionsLocallyLipschitz.lean index 5e90ea4..1b0cd98 100644 --- a/Optlib/Convex/FiniteDimensionalConvexFunctionsLocallyLipschitz.lean +++ b/Optlib/Convex/FiniteDimensionalConvexFunctionsLocallyLipschitz.lean @@ -103,24 +103,22 @@ lemma Bounded_of_UpperBounded (hf : ConvexOn ℝ (ball x₀ r) f) simp only [smul_eq_mul, a] at h have h' : - f y + 2 * f x₀ ≤ f x := by linarith have fy_pos : - |m| ≤ - f y := by - simp only [neg_le_neg_iff, ge_iff_le] + simp only [neg_le_neg_iff] apply le_trans (hm y y_pos) (le_abs_self m) constructor · calc _ = -|m| - 2 * (|f x₀| + 1):= neg_add' |m| (2 * (|f x₀| + 1)) _ < -f y + 2 * f x₀ := by apply add_lt_add_of_le_of_lt fy_pos - rw[← mul_neg] - simp only [Nat.ofNat_pos, mul_lt_mul_left,neg_add'] - calc - _ < -|f x₀| :=by simp only [sub_lt_self_iff, zero_lt_one] - _ ≤ _ := neg_abs_le (f x₀) + have hx₀ : -2 * (|f x₀| + 1) < 2 * f x₀ := by + linarith [neg_abs_le (f x₀)] + simpa [neg_add'] using hx₀ _ ≤ _ := h' · calc _ ≤ m := by apply hm x hx.1 _ ≤ |m| := le_abs_self m _ < _ := by - simp only [lt_add_iff_pos_right, Nat.ofNat_pos, mul_pos_iff_of_pos_left] + simp only [lt_add_iff_pos_right] linarith [abs_nonneg (f x₀)] /-- @@ -145,7 +143,7 @@ lemma Lipschitz_of_Bounded [T0Space X](hf : ConvexOn ℝ (ball x₀ r) f) --type conversion rw[edist_dist,edist_dist] rw[ENNReal.coe_nnreal_eq] - simp only [NNReal.coe_mk, ge_iff_le] + simp only [NNReal.coe_mk] rw[← ENNReal.ofReal_mul K_pos] rw[ENNReal.ofReal_le_ofReal_iff (mul_nonneg K_pos dist_nonneg)] --type conversion @@ -161,9 +159,13 @@ lemma Lipschitz_of_Bounded [T0Space X](hf : ConvexOn ℝ (ball x₀ r) f) have vx_pos : vx ∈ ball x₀ r := sub hv have uy_pos : uy ∈ ball x₀ r := sub hu let z := uy + (ε / ‖uy - vx‖) • (uy - vx) + let _ := MetricSpace.ofT0PseudoMetricSpace X have sub_pos : 0 < ‖uy - vx‖ := by - apply norm_pos_iff'.mpr - exact sub_ne_zero_of_ne h + have hne : ‖uy - vx‖ ≠ 0 := by + intro hnorm + apply h + exact dist_eq_zero.mp <| by simpa [dist_eq_norm] using hnorm + exact lt_of_le_of_ne (norm_nonneg _) (Ne.symm hne) have z_pos : z ∈ ball x₀ r := by simp only [mem_ball,dist_eq_norm,z] calc @@ -193,8 +195,7 @@ lemma Lipschitz_of_Bounded [T0Space X](hf : ConvexOn ℝ (ball x₀ r) f) apply div_pos hε.1 this have b_pos : 0 < b := by apply div_pos - rw[norm_pos_iff'] - exact sub_ne_zero_of_ne h + exact sub_pos apply this have a_add_b_one : a + b = 1 := by simp[a,b] @@ -215,9 +216,9 @@ lemma Lipschitz_of_Bounded [T0Space X](hf : ConvexOn ℝ (ball x₀ r) f) have h1 : (ε + ‖uy - vx‖) * f uy ≤ ε * f vx + ‖uy - vx‖ * f z:= by rw[← h_combin] at h simp[a,b] at h - rw[← mul_le_mul_left this] at h - field_simp at h - exact h + have h' := mul_le_mul_of_nonneg_left h (le_of_lt this) + field_simp at h' + exact h' have h2 : ε * (f uy - f vx) ≤ 2 * M * ‖uy - vx‖ := by calc _ ≤ (f z - f uy) * ‖uy - vx‖ := by @@ -243,10 +244,8 @@ lemma Lipschitz_of_Bounded [T0Space X](hf : ConvexOn ℝ (ball x₀ r) f) _ ≤ _ := by simp[K] apply mul_le_mul_of_nonneg_right _ (le_of_lt sub_pos) - rw[div_le_div_right hε.1] - apply mul_le_mul_of_nonneg_left - apply le_abs_self - norm_num + rw[div_le_div_iff_of_pos_right hε.1] + nlinarith [le_abs_self M] by_cases h : x = y · rw[h] simp only [sub_self, abs_zero, norm_zero, mul_zero, le_refl] @@ -325,8 +324,7 @@ lemma LocallyUpperBounded (hs_convex : Convex ℝ s)(hs_isopen : IsOpen s) have bi_pos : ∀ i : ι , ‖b i‖ ≠ 0 := by intro i refine norm_ne_zero_iff.mpr ?_ - exact Basis.ne_zero b i - change Basis ι ℝ α at b + exact (b).ne_zero i by_cases hn : n = 0 · have : Module.finrank ℝ α = 0 := by show n = 0;apply hn; @@ -368,7 +366,7 @@ lemma LocallyUpperBounded (hs_convex : Convex ℝ s)(hs_isopen : IsOpen s) simp only [neg_add_cancel_comm]; rw[this] apply hr₀.2 - simp only [mem_ball, dist_self_add_left,dist_add_self_left] + simp only [mem_ball, dist_add_self_left] rw[norm_smul,norm_div,norm_norm,div_mul_cancel₀] simp[r] calc @@ -381,7 +379,7 @@ lemma LocallyUpperBounded (hs_convex : Convex ℝ s)(hs_isopen : IsOpen s) simp only [neg_add_cancel_comm] rw[this] apply hr₀.2 - simp only [mem_ball, dist_self_add_left,dist_add_self_left,neg_smul, norm_neg] + simp only [mem_ball, dist_add_self_left, neg_smul, norm_neg] rw[norm_smul,norm_div,norm_norm,div_mul_cancel₀] simp[r] calc @@ -435,6 +433,7 @@ lemma LocallyUpperBounded (hs_convex : Convex ℝ s)(hs_isopen : IsOpen s) apply ConvexOn.le_sup_of_mem_convexHull hf _ hx apply subset_convexHull +omit [FiniteDimensional ℝ α] in lemma LocallyLipschitz_of_LocallyUpperBounded (hs : IsOpen s) (h : ∀ x ∈ s , ∃ t ∈ 𝓝[s] x , Convex ℝ t ∧ IsOpen t ∧ BddAbove (f '' t)) (hf : ConvexOn ℝ s f) diff --git a/Optlib/Convex/ImageSubgradientClosed.lean b/Optlib/Convex/ImageSubgradientClosed.lean index e0bb3d2..a89cbf4 100644 --- a/Optlib/Convex/ImageSubgradientClosed.lean +++ b/Optlib/Convex/ImageSubgradientClosed.lean @@ -4,7 +4,6 @@ Released under Apache 2.0 license as described in the file LICENSE. Author: Zichen Wang -/ import Optlib.Function.Proximal -import Mathlib.Topology.Instances.EReal open Set InnerProductSpace Topology Filter diff --git a/Optlib/Convex/QuasiConvexFirstOrder.lean b/Optlib/Convex/QuasiConvexFirstOrder.lean index e1db12a..0af9684 100644 --- a/Optlib/Convex/QuasiConvexFirstOrder.lean +++ b/Optlib/Convex/QuasiConvexFirstOrder.lean @@ -16,7 +16,8 @@ noncomputable section variable {E : Type*} [NormedAddCommGroup E] [InnerProductSpace ℝ E] [CompleteSpace E] variable {f : E → ℝ} {f' : E → (E →L[ℝ] ℝ)} {s : Set E}{x: E} -theorem Quasiconvex_first_order_condition_right (h : HasFDerivAt f (f' x) x) (xs : x ∈ s) +omit [CompleteSpace E] in theorem Quasiconvex_first_order_condition_right + (h : HasFDerivAt f (f' x) x) (xs : x ∈ s) (hf: QuasiconvexOn ℝ s f) : ∀ y ∈ s, f y ≤ f x → f' x (y - x) ≤ 0 := by have h₁: ∀ ε > (0 : ℝ), ∃ δ > (0 : ℝ), ∀ (x' : E), ‖x - x'‖ ≤ δ → ‖f x' - f x - (f' x) (x' - x)‖ ≤ ε * ‖x - x'‖ := by @@ -35,7 +36,7 @@ theorem Quasiconvex_first_order_condition_right (h : HasFDerivAt f (f' x) x) (xs let ε := (f' x) (y - x) / (2 * ‖x-y‖) have εpos: 0 < ε := by apply div_pos H - exact Real.mul_pos two_pos h₃ + exact mul_pos two_pos h₃ specialize h₁ ε εpos rcases h₁ with ⟨δ, dpos, converge⟩ let b1:= δ /(‖x - y‖) @@ -60,7 +61,7 @@ theorem Quasiconvex_first_order_condition_right (h : HasFDerivAt f (f' x) x) (xs _= (1 : ℝ) • x - a • x - b • y:= by rw [one_smul] _= b • (x - y) := by - rw [← sub_smul 1 a]; simp [a, b, sum_a_b]; rw[smul_sub b x y] + rw [← sub_smul 1 a]; simp [a, b]; rw[smul_sub b x y] have h01 : x' - x = b • (y - x) :=by rw [← neg_inj, ← smul_neg, neg_sub, neg_sub]; exact h10 have h1 : ‖x - x'‖ = ‖b • (x - y)‖ := by @@ -71,9 +72,7 @@ theorem Quasiconvex_first_order_condition_right (h : HasFDerivAt f (f' x) x) (xs have x1nbhd: ‖x - x'‖ ≤ δ := by rw [h1, h2] have h3: b * ‖x - y‖ ≤ b1 * ‖x - y‖:= by - rw [mul_le_mul_right] - apply min_le_left - exact h₃ + exact mul_le_mul_of_nonneg_right (min_le_left b1 1) (norm_nonneg _) have h4: b1 * ‖x - y‖= δ := by rw[div_mul_cancel₀] apply ne_of_gt h₃ diff --git a/Optlib/Convex/StronglyConvex.lean b/Optlib/Convex/StronglyConvex.lean index c36d40e..471aac1 100644 --- a/Optlib/Convex/StronglyConvex.lean +++ b/Optlib/Convex/StronglyConvex.lean @@ -76,7 +76,7 @@ theorem Strongly_Convex_Unique_Minima (hsc: StrongConvexOn s m f) {mp : m > 0} . linarith . apply pow_pos; linarith apply absurd (min xs) - simp [← xeq] + simp calc f x ≤ f xm - 2⁻¹ * 2⁻¹ * (m / 2 * ‖xm - xm'‖ ^ 2) := by apply sc _ < f xm := by apply lt_of_sub_pos; simp; apply nng @@ -99,7 +99,7 @@ lemma strongconvex_of_convex_add_sq (f : E → ℝ) (x : E) (hfun : ConvexOn ℝ apply add_le_add · rw [← smul_eq_mul, ← smul_eq_mul] apply hfun yin zin anneg bnneg absum1 - · field_simp; rw [div_le_div_right, add_sub] + · field_simp have eq1 : a • y + b • z - x = a • (y - x) + b • (z - x) := by rw [smul_sub, smul_sub, add_comm_sub, sub_sub, ← add_smul, add_comm b a] rw [absum1, one_smul, ← add_sub] @@ -107,22 +107,21 @@ lemma strongconvex_of_convex_add_sq (f : E → ℝ) (x : E) (hfun : ConvexOn ℝ + a * ‖u‖ ^ 2 - a * b * ‖u - v‖ ^ 2 := by rw [norm_add_sq_real, norm_sub_sq_real] rw [inner_smul_left, inner_smul_right, norm_smul, norm_smul]; field_simp - rw [add_comm (b * ‖v‖ ^ 2), mul_pow, sq_abs, mul_pow, sq_abs] - rw [mul_add, ← sub_sub, mul_sub, ← sub_add] - rw [add_sub_right_comm, add_sub_right_comm, ← sub_mul, ← add_sub, ← sub_mul] - nth_rw 3 [← mul_one a]; rw [← absum1, mul_add] - nth_rw 5 [← mul_one b]; rw [← absum1, mul_add, mul_comm b a] - rw [pow_two, pow_two b]; simp; rw [add_right_comm, add_left_cancel_iff] - rw [mul_mul_mul_comm, mul_comm a 2, mul_assoc] + simp [Real.norm_eq_abs, sq_abs] at * + have hab : b = 1 - a := by linarith + rw [hab] + ring have eq3 : y - z = (y - x) - (z - x) := by simp have eq4 (u v : E) : ‖a • u + b • v‖ ^ 2 ≤ b * ‖v‖ ^ 2 + a * ‖u‖ ^ 2 - a * b * ‖u - v‖ ^ 2 := by rw [eq2] let u := y - x let v := z - x - rw [eq1, eq3]; - show ‖a • u + b • v‖ ^ 2 ≤ b * ‖v‖ ^ 2 + a * ‖u‖ ^ 2 - a * b * ‖u - v‖ ^ 2 - apply eq4 u v - simp + rw [eq1] + calc + ‖a • (y - x) + b • (z - x)‖ ^ 2 + ≤ b * ‖z - x‖ ^ 2 + a * ‖y - x‖ ^ 2 - a * b * ‖y - z‖ ^ 2 := by + simpa [u, v] using (eq4 u v) + _ = b * ‖z - x‖ ^ 2 + a * (‖y - x‖ ^ 2 - b * ‖y - z‖ ^ 2) := by ring end Strongly_Convex @@ -131,7 +130,7 @@ section variable [CompleteSpace E] theorem Strong_Convex_lower (hsc : StrongConvexOn s m f) (hf : ∀ x ∈ s, HasGradientAt f (f' x) x) : - ∀ x ∈ s, ∀ y ∈ s, inner (f' x - f' y) (x - y) ≥ m * ‖x - y‖ ^ 2 := by + ∀ x ∈ s, ∀ y ∈ s, inner ℝ (f' x - f' y) (x - y) ≥ m * ‖x - y‖ ^ 2 := by intro x xs y ys have cvx := strongConvexOn_iff_convex.mp hsc have grd := sub_normsquare_gradient hf m @@ -142,7 +141,7 @@ theorem Strong_Convex_lower (hsc : StrongConvexOn s m f) (hf : ∀ x ∈ s, HasG apply grm theorem Lower_Strong_Convex (hf : ∀ x ∈ s, HasGradientAt f (f' x) x) (hs : Convex ℝ s) - (h : ∀ x ∈ s, ∀ y ∈ s, inner (f' x - f' y) (x - y) ≥ m * ‖x - y‖ ^ 2) : + (h : ∀ x ∈ s, ∀ y ∈ s, inner ℝ (f' x - f' y) (x - y) ≥ m * ‖x - y‖ ^ 2) : StrongConvexOn s m f := by apply strongConvexOn_iff_convex.mpr have grd := sub_normsquare_gradient hf m @@ -155,12 +154,12 @@ theorem Lower_Strong_Convex (hf : ∀ x ∈ s, HasGradientAt f (f' x) x) (hs : C apply h theorem Strong_Convex_iff_lower (hf : ∀ x ∈ s, HasGradientAt f (f' x) x) (hs : Convex ℝ s) : - StrongConvexOn s m f ↔ ∀ x ∈ s, ∀ y ∈ s, inner (f' x - f' y) (x - y) ≥ m * ‖x - y‖ ^ 2 := + StrongConvexOn s m f ↔ ∀ x ∈ s, ∀ y ∈ s, inner ℝ (f' x - f' y) (x - y) ≥ m * ‖x - y‖ ^ 2 := ⟨fun hsc x xs y ys ↦ Strong_Convex_lower hsc hf x xs y ys, fun h ↦ Lower_Strong_Convex hf hs h⟩ theorem Strong_Convex_second_lower (hsc: StrongConvexOn s m f) (hf : ∀ x ∈ s, HasGradientAt f (f' x) x) : ∀ x ∈ s, ∀ y ∈ s, - f y ≥ f x + inner (f' x) (y - x) + m / 2 * ‖y - x‖ ^ 2 := by + f y ≥ f x + inner ℝ (f' x) (y - x) + m / 2 * ‖y - x‖ ^ 2 := by intro x xs y ys have cvx := strongConvexOn_iff_convex.mp hsc have grd := sub_normsquare_gradient hf m x xs @@ -179,7 +178,7 @@ theorem Strong_Convex_second_lower (hsc: StrongConvexOn s m f) nth_rw 1 [← sub_self y] at foc rw [← sub_self x] at foc rw [sub_add, ← sub_add y x x, add_comm (y - x), inner_sub_right x, inner_add_right y] at foc - rw [real_inner_comm x y, sub_right_comm (inner x y), ← sub_sub, sub_self, sub_sub 0] at foc + rw [real_inner_comm x y, sub_right_comm (inner ℝ x y), ← sub_sub, sub_self, sub_sub 0] at foc rw [← inner_add_left, zero_sub, mul_neg, sub_neg_eq_add] at foc have : m = m / 2 * 2 := by simp nth_rw 1 [this] at foc diff --git a/Optlib/Convex/Subgradient.lean b/Optlib/Convex/Subgradient.lean index c9c48f2..b9377d4 100644 --- a/Optlib/Convex/Subgradient.lean +++ b/Optlib/Convex/Subgradient.lean @@ -189,11 +189,11 @@ theorem SubderivAt.convex : ∀ x, Convex ℝ (SubderivAt f x) := by have ineq1 : a • f y ≥ a • f x + a • ⟪g₁, y - x⟫ := by rw [← smul_add] apply smul_le_smul_of_nonneg_left (h1 y) lea - have ineq2 : b • f y ≥ b • f x + b • inner g₂ (y - x) := by + have ineq2 : b • f y ≥ b • f x + b • ⟪g₂, y - x⟫ := by rw [← smul_add] apply smul_le_smul_of_nonneg_left (h2 y) leb - have eq : (a • f x + a • inner g₁ (y - x)) + (b • f x + b • inner g₂ (y - x)) - = f x + inner (a • g₁ + b • g₂) (y - x) := by + have eq : (a • f x + a • ⟪g₁, y - x⟫) + (b • f x + b • ⟪g₂, y - x⟫) + = f x + ⟪a • g₁ + b • g₂, y - x⟫ := by rw [add_add_add_comm, ← Eq.symm (Convex.combo_self abeq (f x))] apply congrArg (HAdd.hAdd (f x)) rw [inner_add_left, inner_smul_left, inner_smul_left]; rfl @@ -208,11 +208,11 @@ theorem SubderivWithinAt.convex : ∀ x ∈ s, Convex ℝ (SubderivWithinAt f s have ineq1 : a • f y ≥ a • f x + a • ⟪g₁, y - x⟫ := by rw [← smul_add] apply smul_le_smul_of_nonneg_left (h1 y ys) lea - have ineq2 : b • f y ≥ b • f x + b • inner g₂ (y - x) := by + have ineq2 : b • f y ≥ b • f x + b • ⟪g₂, y - x⟫ := by rw [← smul_add] apply smul_le_smul_of_nonneg_left (h2 y ys) leb - have eq : (a • f x + a • inner g₁ (y - x)) + (b • f x + b • inner g₂ (y - x)) - = f x + inner (a • g₁ + b • g₂) (y - x) := by + have eq : (a • f x + a • ⟪g₁, y - x⟫) + (b • f x + b • ⟪g₂, y - x⟫) + = f x + ⟪a • g₁ + b • g₂, y - x⟫ := by rw [add_add_add_comm, ← Eq.symm (Convex.combo_self abeq (f x))] apply congrArg (HAdd.hAdd (f x)) rw [inner_add_left, inner_smul_left, inner_smul_left]; rfl @@ -224,8 +224,8 @@ theorem subgradientAt_mono {u v : E} {f : E → ℝ}{y : E} (hu : u ∈ SubderivAt f x) (hv : v ∈ SubderivAt f y) : ⟪u - v, x - y⟫ ≥ (0 : ℝ):= by specialize hu y; specialize hv x have ineq1 : ⟪u, x - y⟫ ≥ f x - f y := by - rw [congrArg (inner u) (Eq.symm (neg_sub y x)), inner_neg_right]; linarith - have _ : inner v (x - y) ≤ f x - f y := Iff.mpr le_sub_iff_add_le' hv + rw [congrArg (fun z => ⟪u, z⟫) (Eq.symm (neg_sub y x)), inner_neg_right]; linarith + have _ : ⟪v, x - y⟫ ≤ f x - f y := Iff.mpr le_sub_iff_add_le' hv rw [inner_sub_left]; linarith end congr @@ -259,7 +259,7 @@ theorem SubderivAt.nonempty (hf : ConvexOn ℝ univ f) (hc : ContinuousOn f univ have : x ∈ interior univ := by simp rw [← interior_univ] at hc obtain h := SubderivWithinAt.Nonempty hf hc x this - simp [h] + simp rcases h with ⟨a, ha⟩ exact ⟨a, ha⟩ @@ -286,14 +286,14 @@ theorem SubderivWithinAt_eq_gradient {f'x : E} (hx : x ∈ interior s) · use g; intro y ys apply Convex_first_order_condition' h hf (interior_subset hx) y ys intro g' hg'; by_contra neq - apply not_le_of_lt (norm_sub_pos_iff.mpr neq) + apply not_le_of_gt (norm_sub_pos_iff.mpr neq) let v := g' - g; obtain vneq := sub_ne_zero.mpr neq have : Tendsto (fun (t : ℝ) => (f (x + t • v) - f x - ⟪g, t • v⟫) * ‖t • v‖⁻¹) (𝓝[>] 0) (𝓝 0) := by rw [Metric.tendsto_nhdsWithin_nhds]; intro ε εpos - unfold HasFDerivAt at h' - rw [hasFDerivAtFilter_iff_tendsto, Metric.tendsto_nhds_nhds] at h' - obtain ⟨δ, δpos, hδ⟩ := h' ε εpos + have h'' := (hasGradientAt_iff_tendsto).mp h + rw [Metric.tendsto_nhds_nhds] at h'' + obtain ⟨δ, δpos, hδ⟩ := h'' ε εpos use (δ * ‖v‖⁻¹) obtain pos := mul_pos δpos (inv_pos.mpr (norm_pos_iff.mpr vneq)) constructor @@ -301,8 +301,10 @@ theorem SubderivWithinAt_eq_gradient {f'x : E} (hx : x ∈ interior s) intro t _ ht; rw [dist_eq_norm] at ht; rw [dist_eq_norm] have : dist (x + t • v) x < δ := by rw [dist_eq_norm, add_sub_cancel_left, norm_smul, ← (sub_zero t)] - apply lt_of_lt_of_eq ((mul_lt_mul_right (norm_sub_pos_iff.mpr neq)).mpr ht) - rw [mul_assoc, inv_mul_cancel₀ (norm_ne_zero_iff.mpr vneq), mul_one] + have hmul : ‖t - 0‖ * ‖v‖ < (δ * ‖v‖⁻¹) * ‖v‖ := + mul_lt_mul_of_pos_right ht (norm_pos_iff.mpr vneq) + have hnorm : ‖v‖⁻¹ * ‖v‖ = (1 : ℝ) := inv_mul_cancel₀ (norm_ne_zero_iff.mpr vneq) + simpa [mul_assoc, hnorm] using hmul specialize hδ this; rw [dist_eq_norm] at hδ have eq1 : ‖‖x + t • v - x‖⁻¹‖ = ‖t • v‖⁻¹ := by rw [add_sub_cancel_left, norm_inv, norm_norm] @@ -329,7 +331,7 @@ theorem SubderivWithinAt_eq_gradient {f'x : E} (hx : x ∈ interior s) rw [mem_ball_iff_norm, sub_zero] at tball rw [mem_ball_iff_norm, add_sub_cancel_left, norm_smul] have : ‖t‖ * ‖v‖ < ε * ‖v‖⁻¹ * ‖v‖ := by - apply (mul_lt_mul_right (norm_sub_pos_iff.mpr neq)).mpr tball + exact mul_lt_mul_of_pos_right tball (norm_pos_iff.mpr vneq) rwa [mul_assoc, inv_mul_cancel₀ (norm_ne_zero_iff.mpr vneq), mul_one] at this obtain ineq1 := hg' (x + t • v); rw [add_sub_cancel_left] at ineq1 have eq1 : ‖v‖ = (⟪g', t • v⟫ - ⟪g, t • v⟫) * ‖t • v‖⁻¹ := by @@ -353,8 +355,9 @@ theorem SubderivWithinAt_eq_gradient {f'x : E} (hx : x ∈ interior s) rw [this] rw [eq2, eq3, mul_eq_mul_right_iff]; left; rw [inner_sub_left] - rw [mem_setOf, eq1, mul_le_mul_right tvpos] - apply sub_le_sub_right (le_sub_iff_add_le'.mpr (ineq1 mems)) + rw [mem_setOf, eq1] + exact mul_le_mul_of_nonneg_right + (sub_le_sub_right (le_sub_iff_add_le'.mpr (ineq1 mems)) _) (le_of_lt tvpos) /-- Alternarive version for FDeriv --/ theorem SubderivWithinAt_eq_FDeriv {f' : E → (E →L[ℝ] ℝ)} (hx : x ∈ interior s) @@ -380,7 +383,7 @@ end equivalence section optimality_theory theorem HasSubgradientAt_zero_of_isMinOn (h : IsMinOn f univ x) : HasSubgradientAt f 0 x := - fun y => le_of_le_of_eq' (h trivial) (by rw [inner_zero_left, add_zero]) + fun y => by simpa [inner_zero_left] using h trivial theorem isMinOn_of_HasSubgradentAt_zero (h : HasSubgradientAt f 0 x) : IsMinOn f univ x := by intro y _; specialize h y @@ -393,7 +396,7 @@ theorem HasSubgradientAt_zero_iff_isMinOn : theorem HasSubgradientWithinAt_zero_of_isMinOn (h : IsMinOn f s x) : HasSubgradientWithinAt f 0 s x := - fun y ys => le_of_le_of_eq' (h ys) (by rw [inner_zero_left, add_zero]) + fun y ys => by simpa [inner_zero_left] using h ys theorem isMinOn_of_HasSubgradentWithinAt_zero (h : HasSubgradientWithinAt f 0 s x) : IsMinOn f s x := by @@ -432,10 +435,8 @@ variable {f : E → ℝ} {g : E} {x : E} {s : Set E} theorem HasSubgradientAt.pos_smul {c : ℝ} (h : HasSubgradientAt f g x) (hc : 0 < c) : HasSubgradientAt (c • f) (c • g) x := by intro y; rw [inner_smul_left] - have ineq : c * f y ≥ c * (f x + inner g (y - x)) := (mul_le_mul_left hc).mpr (h y) - have eq : c * (f x + inner g (y - x)) = c * f x + c * inner g (y - x) := - mul_add c (f x) (inner g (y - x)) - exact Eq.trans_le (id eq.symm) ineq + have ineq := mul_le_mul_of_nonneg_left (h y) (le_of_lt hc) + simpa [Pi.smul_apply, mul_add, ge_iff_le] using ineq theorem SubderivAt.pos_smul {c : ℝ} (hc : 0 < c) : SubderivAt (c • f) x = c • (SubderivAt f x) := by @@ -447,19 +448,20 @@ theorem SubderivAt.pos_smul {c : ℝ} (hc : 0 < c) : have neq : c ≠ 0 := ne_of_gt hc calc f y = c⁻¹ * (c * f y) := (eq_inv_mul_iff_mul_eq₀ neq).mpr rfl - _ ≥ c⁻¹ * (c * f x + inner g (y - x)) := + _ ≥ c⁻¹ * (c * f x + ⟪g, y - x⟫) := mul_le_mul_of_nonneg_left (hg y) (inv_nonneg.mpr (le_of_lt hc)) - _ = f x + inner (c⁻¹ • g) (y - x) := by + _ = f x + ⟪c⁻¹ • g, y - x⟫ := by rw [mul_add, inner_smul_left, ← ((eq_inv_mul_iff_mul_eq₀ neq).mpr rfl)] rfl exact smul_inv_smul₀ (ne_of_gt hc) g rintro ⟨gg, hgg, eq⟩; intro y calc - c * f y ≥ c * (f x + inner gg (y - x)) := (mul_le_mul_left hc).mpr (hgg y) - _ = c * f x + c * inner gg (y - x) := mul_add c (f x) (inner gg (y - x)) - _ = c * f x + inner (c • gg) (y - x) := by + c * f y ≥ c * (f x + ⟪gg, y - x⟫) := by + exact mul_le_mul_of_nonneg_left (hgg y) (le_of_lt hc) + _ = c * f x + c * ⟪gg, y - x⟫ := mul_add c (f x) ⟪gg, y - x⟫ + _ = c * f x + ⟪c • gg, y - x⟫ := by rw [inner_smul_left]; exact rfl - _ = c * f x + inner g (y - x) := by rw [← eq] + _ = c * f x + ⟪g, y - x⟫ := by rw [← eq] /-- Subderivatives of the sum of two functions is a subset of the sum of the subderivatives of the two functions --/ @@ -493,7 +495,7 @@ theorem SubderivAt.add {f₁ f₂ : E → ℝ} (h₁ : ConvexOn ℝ univ f₁) ( rw [SubderivAt, SubderivAt, SubderivAt, Set.subset_def] intro g hg rw [Set.mem_setOf] at hg; rw [Set.mem_add] - let S₁ := {(x, y) : E × ℝ | y > f₁ (x + x₀) - f₁ x₀ - inner g x} + let S₁ := {(x, y) : E × ℝ | y > f₁ (x + x₀) - f₁ x₀ - ⟪g, x⟫} let S₂ := {(x, y) : E × ℝ | y ≤ f₂ x₀ - f₂ (x + x₀)} have hs1 : Convex ℝ S₁ := by @@ -537,8 +539,8 @@ theorem SubderivAt.add {f₁ f₂ : E → ℝ} (h₁ : ConvexOn ℝ univ f₁) ( rw [← eq'] at hh; rw [eq] apply le_trans (add_le_add hi hj) hh have hint : Disjoint S₁ S₂ := by - rw [disjoint_iff]; by_contra joint - obtain ⟨⟨x, y⟩, ⟨hp1, hp2⟩⟩ := nmem_singleton_empty.mp joint + rw [Set.disjoint_iff_inter_eq_empty, Set.eq_empty_iff_forall_notMem] + rintro ⟨x, y⟩ ⟨hp1, hp2⟩ rw [Set.mem_setOf] at hp1 hp2 specialize hg (x + x₀); rw [← add_sub, sub_self, add_zero] at hg apply not_le_of_gt ?_ hg @@ -550,7 +552,7 @@ theorem SubderivAt.add {f₁ f₂ : E → ℝ} (h₁ : ConvexOn ℝ univ f₁) ( f₁ x₀ + f₂ x₀ + ⟪g, x⟫_ℝ - (f₁ (x + x₀) + f₂ (x + x₀)) := by ring rwa [hh x₀, hh (x + x₀), ← eq] have hso : IsOpen S₁ := by - apply Continuous_epi_open (f₁ := fun x ↦ f₁ (x + x₀) - f₁ x₀ - inner g x) + apply Continuous_epi_open (f₁ := fun x ↦ f₁ (x + x₀) - f₁ x₀ - ⟪g, x⟫) apply ContinuousOn.sub · apply ContinuousOn.sub · apply ContinuousOn.comp (g := f₁) (f := fun x ↦ x + x₀) (t := univ) hcon @@ -560,21 +562,23 @@ theorem SubderivAt.add {f₁ f₂ : E → ℝ} (h₁ : ConvexOn ℝ univ f₁) ( apply ContinuousOn.inner continuousOn_const continuousOn_id obtain ⟨f, c, ⟨hsl, hsr⟩⟩ := geometric_hahn_banach_open hs1 hso hs2 hint - have eq : ∃ a : E, ∃ b : ℝ, ∀ (p : E × ℝ), f p = inner a p.1 + b * p.2 := by + have eq : ∃ a : E, ∃ b : ℝ, ∀ (p : E × ℝ), f p = ⟪a, p.1⟫ + b * p.2 := by let f1 := ContinuousLinearMap.comp f (ContinuousLinearMap.inl ℝ E ℝ) let f2 := ContinuousLinearMap.comp f (ContinuousLinearMap.inr ℝ E ℝ) use (toDual ℝ E).symm f1 use (toDual ℝ ℝ).symm f2 intro p - have : ((toDual ℝ ℝ).symm f2) * p.2 = inner (((toDual ℝ ℝ).symm f2)) p.2 := by - simp [f2] - have : ((toDual ℝ ℝ).symm f2) * p.2 = f2 p.2 := by - rw [this] - simp only [toDual_symm_apply, ContinuousLinearMap.coe_comp', Function.comp_apply, - ContinuousLinearMap.inl_apply, ContinuousLinearMap.inr_apply] - rw [this]; simp [f1, f2] + have hf2 : ((toDual ℝ ℝ).symm f2) * p.2 = f2 p.2 := by + have hinner : ⟪(toDual ℝ ℝ).symm f2, p.2⟫ = f2 p.2 := + InnerProductSpace.toDual_symm_apply (𝕜 := ℝ) (E := ℝ) (x := p.2) (y := f2) + calc + ((toDual ℝ ℝ).symm f2) * p.2 = p.2 * ((toDual ℝ ℝ).symm f2) := by ring + _ = ⟪(toDual ℝ ℝ).symm f2, p.2⟫ := by rfl + _ = f2 p.2 := hinner + rw [hf2]; simp [f2] have : (p.1, (0 : ℝ)) + ((0 : E), p.2) = p := by simp nth_rw 1 [← this]; rw [ContinuousLinearMap.map_add] + simp [f1] rcases eq with ⟨a, b, hab⟩ have hin : (0, 0) ∈ S₂ := by rw [Set.mem_setOf]; simp @@ -594,11 +598,12 @@ theorem SubderivAt.add {f₁ f₂ : E → ℝ} (h₁ : ConvexOn ℝ univ f₁) ( push_neg at hb have pos : (c / (2 * b)) > 0 := by apply div_pos_of_neg_of_neg hc (by linarith) - specialize (htp (c / (2 * b)) pos); field_simp [hb] at htp - have eq : b * c / (2 * b) = c / 2 := by - ring_nf; simp; field_simp [hb] - rw [mul_div_right_comm, div_self (by linarith), one_mul] - rw [eq] at htp; linarith + specialize (htp (c / (2 * b)) pos) + have hb0 : b ≠ 0 := by linarith + have hhalf : b * (c / (2 * b)) = c / 2 := by + field_simp [hb0] + have : c / 2 < c := by linarith [htp, hhalf] + linarith have bleq0 : b < 0 := by rw [ceq0] at htp specialize htp 1 (by linarith); rw [mul_one] at htp; linarith @@ -653,7 +658,7 @@ theorem SubderivAt_of_norm_at_zero : SubderivAt (fun (x : E) => ‖x‖) 0 = {g apply not_lt.mpr hg this intro hg y calc - ‖(0 : E)‖ + inner g (y - 0) = inner g y := by simp only [norm_zero, zero_add, sub_zero] + ‖(0 : E)‖ + ⟪g, y - 0⟫ = ⟪g, y⟫ := by simp only [norm_zero, zero_add, sub_zero] _ ≤ ‖g‖ * ‖y‖ := real_inner_le_norm g y _ ≤ 1 * ‖y‖ := mul_le_mul_of_nonneg_right hg (norm_nonneg y) _ = ‖y‖ := by simp only [one_mul] @@ -684,9 +689,9 @@ theorem SubderivAt_abs (x : ℝ) : have ineq : (0 : ℝ) < 0 := by calc 0 ≥ x + g * (-x):= by - simp only [abs_zero, zero_sub, abs_pos_of_pos, abs_of_pos hx] at hg - have : inner g (-x) = g * (-x) := by rfl - rwa [this] at hg + simp only [abs_zero, zero_sub, abs_of_pos hx] at hg + have hinner : ⟪g, -x⟫ = (-x) * g := by rfl + rwa [hinner, mul_comm] at hg _ = x * (1 - g) := by ring _ > 0 := mul_pos hx (by linarith) exact LT.lt.false ineq @@ -696,7 +701,9 @@ theorem SubderivAt_abs (x : ℝ) : apply glt have h1: g ≤ 1 := by calc - g = inner g 1 := by simp + g = ⟪g, 1⟫ := by + rw [show ⟪g, 1⟫ = (1 : ℝ) * g by rfl] + ring _ ≤ 1 := hg simp only [Real.sign_of_pos hx] at gne exact Ne.lt_of_le gne h1 @@ -706,14 +713,18 @@ theorem SubderivAt_abs (x : ℝ) : by_cases glt : g < -1 · specialize hg (x - 1) have : x - 1 < 0 := by linarith - simp only [abs_of_neg this, abs_of_neg hx, abs_zero, zero_sub] at hg + simp only [abs_of_neg this, abs_of_neg hx] at hg have : -g ≤ 1 := by calc - -g = inner g (x - 1 - x) := by simp + -g = ⟪g, x - 1 - x⟫ := by + rw [show ⟪g, x - 1 - x⟫ = (x - 1 - x) * g by rfl] + ring _ ≤ 1 := by linarith [hg] linarith specialize hg 0 - have eq1 : inner g (-x) = g * (-x) := rfl + have eq1 : ⟪g, -x⟫ = g * (-x) := by + rw [show ⟪g, -x⟫ = (-x) * g by rfl] + ring have eq2 : -x + g * -x = -x * (1 + g) := by ring simp only [abs_zero, zero_sub, abs_of_neg hx, eq1, eq2] at hg have : -x * (1 + g) > 0 := by @@ -725,14 +736,18 @@ theorem SubderivAt_abs (x : ℝ) : by_cases hx : x > 0 · simp only [Real.sign_of_pos hx] at hg calc - |x| + inner g (y - x) = x + inner 1 (y - x) := by rw [abs_of_pos hx, hg] - _ = y := by simp + |x| + ⟪g, y - x⟫ = x + ⟪1, y - x⟫ := by rw [abs_of_pos hx, hg] + _ = y := by + rw [show ⟪(1 : ℝ), y - x⟫ = (y - x) * 1 by rfl] + ring _ ≤ |y| := le_abs_self y have hx : x < 0 := Ne.lt_of_le h (not_lt.mp hx) simp only [Real.sign_of_neg hx] at hg calc - |x| + inner g (y - x) = -x + inner (-1) (y - x) := by rw [abs_of_neg hx, hg] - _ = -y := by simp; ring + |x| + ⟪g, y - x⟫ = -x + ⟪-1, y - x⟫ := by rw [abs_of_neg hx, hg] + _ = -y := by + rw [show ⟪(-1 : ℝ), y - x⟫ = (y - x) * (-1) by rfl] + ring _ ≤ |y| := neg_le_abs y end diff --git a/Optlib/Differential/Calculation.lean b/Optlib/Differential/Calculation.lean index 89e163e..0677330 100644 --- a/Optlib/Differential/Calculation.lean +++ b/Optlib/Differential/Calculation.lean @@ -50,7 +50,7 @@ theorem HasGradientAtFilter.comp have eq : (starRingEnd 𝕜) g' • (toDual 𝕜 F) f' = (toDual 𝕜 F) (g' • f') := by rw [map_smulₛₗ] rw [HasGradientAtFilter, ← eq] - exact hg.hasDerivAtFilter.comp_hasFDerivAtFilter x hf hL + exact hg.hasDerivAtFilter.comp_hasFDerivAtFilter hf <| hL.prodMap <| by simp theorem HasGradientWithinAt.comp (hg : HasGradientWithinAt g g' t (f x)) (hf : HasGradientWithinAt f f' s x) @@ -184,27 +184,28 @@ section Sum /-! ### Derivative of a finite sum of functions -/ -open BigOperators Asymptotics +open scoped BigOperators +open Asymptotics variable {ι : Type*} {u : Finset ι} {A : ι → F → 𝕜} {A' : ι → F} theorem HasGradientAtFilter.sum (h : ∀ i ∈ u, HasGradientAtFilter (A i) (A' i) x L) : - HasGradientAtFilter (fun y => ∑ i in u, A i y) (∑ i in u, A' i) x L := by - have : ∑ i in u, (toDual 𝕜 F) (A' i) = (toDual 𝕜 F) (∑ i in u, A' i) := by + HasGradientAtFilter (fun y => Finset.sum u fun i => A i y) (Finset.sum u A') x L := by + have : Finset.sum u (fun i => (toDual 𝕜 F) (A' i)) = (toDual 𝕜 F) (Finset.sum u A') := by rw [map_sum] rw [HasGradientAtFilter, ← this]; unfold HasGradientAtFilter at h - exact HasFDerivAtFilter.sum h + exact HasFDerivAtFilter.fun_sum h theorem HasGradientWithinAt.sum (h : ∀ i ∈ u, HasGradientWithinAt (A i) (A' i) s x) : - HasGradientWithinAt (fun y => ∑ i in u, A i y) (∑ i in u, A' i) s x := by + HasGradientWithinAt (fun y => Finset.sum u fun i => A i y) (Finset.sum u A') s x := by exact HasGradientAtFilter.sum h theorem HasGradientAt.sum (h : ∀ i ∈ u, HasGradientAt (A i) (A' i) x) : - HasGradientAt (fun y => ∑ i in u, A i y) (∑ i in u, A' i) x := by + HasGradientAt (fun y => Finset.sum u fun i => A i y) (Finset.sum u A') x := by exact HasGradientAtFilter.sum h theorem gradient_sum (h : ∀ i ∈ u, DifferentiableAt 𝕜 (A i) x) : - ∇ (fun y => ∑ i in u, A i y) x = ∑ i in u, ∇ (A i) x := + ∇ (fun y => Finset.sum u fun i => A i y) x = Finset.sum u fun i => ∇ (A i) x := (HasGradientAt.sum fun i hi => (h i hi).hasGradientAt).gradient end Sum @@ -228,8 +229,11 @@ theorem HasGradientAt.neg (h : HasGradientAt f f' x) : exact HasGradientAtFilter.neg h theorem gradient_neg : ∇ (fun y => - f y) x = - ∇ f x := by - unfold gradient - simp only [fderiv_neg, map_neg] + by_cases h : DifferentiableAt 𝕜 f x + · exact (h.hasGradientAt.neg).gradient + · rw [gradient_eq_zero_of_not_differentiableAt h, + gradient_eq_zero_of_not_differentiableAt (by simpa [differentiableAt_neg_iff] using h)] + simp end Neg @@ -300,8 +304,7 @@ open ContinuousLinearMap lemma equiv_lemma_mul : c x • (toDual 𝕜 F) d' + d x • (toDual 𝕜 F) c' = (toDual 𝕜 F) ((starRingEnd 𝕜) (c x) • d' + (starRingEnd 𝕜) (d x) • c'):= by - simp - congr <;> exact SemilinearMapClass.map_smul_inv _ _ _ + rw [map_add, map_smulₛₗ, map_smulₛₗ, starRingEnd_self_apply, starRingEnd_self_apply] theorem HasGradientAt.mul (hc : HasGradientAt c c' x) (hd : HasGradientAt d d' x) : HasGradientAt (fun y => c y * d y) diff --git a/Optlib/Differential/GradientDiv.lean b/Optlib/Differential/GradientDiv.lean index 57ad10f..9755968 100644 --- a/Optlib/Differential/GradientDiv.lean +++ b/Optlib/Differential/GradientDiv.lean @@ -48,14 +48,14 @@ lemma Simplifying₁ (h₁ : a ≠ 0) (h₂ : b ≠ 0) (h₃ : ‖b‖ / 2 ≤ simp only [one_div, div_inv_eq_mul, one_mul] have l₃ : |b * b * b| / 2 = |b * b| * (|b| / 2) := by rw [mul_div, abs_mul] have l₄ : |b * b * a| = |b * b| * |a| := by rw [abs_mul] - rw [l₃, l₄, mul_le_mul_left] - apply h₃ - rw [abs_pos] - simp only [ne_eq, mul_eq_zero, or_self] - apply h₂ - have : 0 < |b * b * b| := by rw [abs_pos]; simp [h₂] - have : 0 < |b * b * b| / 2 := half_pos this - apply this + rw [l₃, l₄] + exact mul_le_mul_of_nonneg_left h₃ (abs_nonneg (b * b)) + have : 0 < |b * b * b| / 2 := by + have hb : 0 < |b * b * b| := by + rw [abs_pos] + simp [h₂] + exact div_pos hb (by norm_num) + exact this have : 0 < |b * b * a| := by rw [abs_pos]; simp [h₁, h₂] exact Iff.mpr one_div_pos this @@ -81,9 +81,7 @@ lemma div_div_mul (h₁ : a / b ≤ c) (h₂ : 0 < a) (h₃ : 0 < b) (h₄ : 0 < 1 / c ≤ b / a := by have : a ≤ c * b := Iff.mp (div_le_iff₀ h₃) h₁ have : a ≤ b * c := by linarith - apply Iff.mpr (div_le_div_iff h₄ h₂) - rw [one_mul] - apply this + exact (div_le_div_iff₀ h₄ h₂).2 <| by simpa [mul_comm, mul_left_comm, mul_assoc] using this theorem HasGradientAt.one_div (hf : HasGradientAt f grad x)(h₁: ¬ f x = (0 : ℝ)): HasGradientAt (fun y => (1 : ℝ) / (f y)) (- ((1 : ℝ) / (f x) ^ (2 : ℕ)) • grad) x := by @@ -251,10 +249,10 @@ theorem HasGradientAt.one_div (hf : HasGradientAt f grad x)(h₁: ¬ f x = (0 : have h₂ : min δ₀ δ₂ ≤ δ₂ := by exact min_le_right δ₀ δ₂ apply le_trans h' h₂ - have zp1 :‖f x * (f x - f x' + inner grad (x' - x)) / (f x * f x * f x')‖ = - ‖(f x - f x' + inner grad (x' - x)) / (f x * f x')‖ := by + have zp1 :‖f x * (f x - f x' + ⟪grad, (x' - x)⟫) / (f x * f x * f x')‖ = + ‖(f x - f x' + ⟪grad, (x' - x)⟫) / (f x * f x')‖ := by rw [mul_comm, mul_assoc (f x) (f x) (f x'), - div_mul_eq_div_div ((f x - f x' + inner grad (x' - x)) * (f x)) (f x) (f x * f x'), mul_div_cancel_right₀] + div_mul_eq_div_div ((f x - f x' + ⟪grad, (x' - x)⟫) * (f x)) (f x) (f x * f x'), mul_div_cancel_right₀] apply h₁ have zp2 : ‖f x‖ * ‖f x‖/2 ≤ ‖f x * f x'‖ := by @@ -305,16 +303,16 @@ theorem HasGradientAt.one_div (hf : HasGradientAt f grad x)(h₁: ¬ f x = (0 : _ = (ε / 2) * ‖x' - x‖ := by rw [div_self (mul_ne_zero l l), mul_one, norm_sub_rev] calc - ‖f x * (f x - f x' + inner grad (x' - x)) / (f x * f x * f x')‖ = - ‖(f x - f x' + inner grad (x' - x)) / (f x * f x')‖ := by + ‖f x * (f x - f x' + ⟪grad, (x' - x)⟫) / (f x * f x * f x')‖ = + ‖(f x - f x' + ⟪grad, (x' - x)⟫) / (f x * f x')‖ := by apply zp1 - _ = ‖(f x - f x' + inner grad (x' - x))‖ * ‖1/(f x * f x')‖ := by + _ = ‖(f x - f x' + ⟪grad, (x' - x)⟫)‖ * ‖1/(f x * f x')‖ := by apply Vert_div - _ ≤ ‖(f x - f x' + inner grad (x' - x))‖ * (2 / (‖f x‖ * ‖f x‖)) := by + _ ≤ ‖(f x - f x' + ⟪grad, (x' - x)⟫)‖ * (2 / (‖f x‖ * ‖f x‖)) := by apply mul_le_mul_of_nonneg_left zp3 apply norm_nonneg _ ≤ ((ε * ‖f x‖ * ‖f x‖/4) * ‖x - x'‖) * (2 / (‖f x‖ * ‖f x‖)) := by - have : ‖(f x - f x' + inner grad (x' - x))‖ ≤ (ε * ‖f x‖ * ‖f x‖/4) * ‖x - x'‖ := by + have : ‖(f x - f x' + ⟪grad, (x' - x)⟫)‖ ≤ (ε * ‖f x‖ * ‖f x‖/4) * ‖x - x'‖ := by apply hδ₂ apply hp₂ apply mul_le_mul_of_nonneg_right this @@ -385,22 +383,22 @@ theorem HasGradientAt.one_div (hf : HasGradientAt f grad x)(h₁: ¬ f x = (0 : apply Eq.symm (mul_div_mul_left (⟪grad, (x' - x)⟫) ((f x) * (f x)) l') have k₆ : (f x - f x') * f x /(f x' * f x * f x) + f x' * (⟪grad, (x' - x)⟫)/(f x' * f x * f x) = ((f x - f x') * f x + f x' * (⟪grad, (x' - x)⟫))/(f x' * f x * f x) := by - apply div_add_div_same ((f x - f x') * f x) (f x' * (⟪grad, (x' - x)⟫)) (f x' * f x * f x) - have k₇ : ((f x - f x') * f x + f x' * inner grad (x' - x)) / (f x' * f x * f x) = - (f x * (f x - f x' + inner grad (x' - x)) + - (f x' * inner grad (x' - x) - f x * inner grad (x' - x))) / (f x' * f x * f x) := by - have h' : (f x - f x') * f x + f x' * inner grad (x' - x) = - f x * (f x - f x' + inner grad (x' - x)) + - (f x' * (inner grad (x' - x)) - f x * (inner grad (x' - x))) := by + exact (add_div _ _ _).symm + have k₇ : ((f x - f x') * f x + f x' * ⟪grad, (x' - x)⟫) / (f x' * f x * f x) = + (f x * (f x - f x' + ⟪grad, (x' - x)⟫) + + (f x' * ⟪grad, (x' - x)⟫ - f x * ⟪grad, (x' - x)⟫)) / (f x' * f x * f x) := by + have h' : (f x - f x') * f x + f x' * ⟪grad, (x' - x)⟫ = + f x * (f x - f x' + ⟪grad, (x' - x)⟫) + + (f x' * ⟪grad, (x' - x)⟫ - f x * ⟪grad, (x' - x)⟫) := by linarith rw [h'] - have k₈ : (f x * (f x - f x' + inner grad (x' - x)) + - (f x' * inner grad (x' - x) - f x * inner grad (x' - x))) / - (f x * f x * f x') = f x * (f x - f x' + inner grad (x' - x))/ - (f x * f x * f x') + (f x' * inner grad (x' - x) - f x * inner grad (x' - x))/ + have k₈ : (f x * (f x - f x' + ⟪grad, (x' - x)⟫) + + (f x' * ⟪grad, (x' - x)⟫ - f x * ⟪grad, (x' - x)⟫)) / + (f x * f x * f x') = f x * (f x - f x' + ⟪grad, (x' - x)⟫)/ + (f x * f x * f x') + (f x' * ⟪grad, (x' - x)⟫ - f x * ⟪grad, (x' - x)⟫)/ (f x * f x * f x') := by - apply add_div ((f x) * (f x - f x' + inner grad (x' - x))) - (f x' * inner grad (x' - x) - f x * inner grad (x' - x)) (f x * f x * f x') + apply add_div ((f x) * (f x - f x' + ⟪grad, (x' - x)⟫)) + (f x' * ⟪grad, (x' - x)⟫ - f x * ⟪grad, (x' - x)⟫) (f x * f x * f x') have k₉ : f x' * f x * f x = f x * f x * f x' := by linarith have p₁ : ‖1 / f x' - 1 / f x - (- (⟪grad, (x' - x)⟫))/((f x) * (f x))‖ ≤ ε * ‖x' - x‖ := by rw [k₄, k₁, k₂, k₅] @@ -409,22 +407,22 @@ theorem HasGradientAt.one_div (hf : HasGradientAt f grad x)(h₁: ¬ f x = (0 : rw [this] rw [k₆, k₇, k₉] calc - ‖(f x * (f x - f x' + inner grad (x' - x)) + - (f x' * inner grad (x' - x) - f x * inner grad (x' - x))) / - (f x * f x * f x')‖ = ‖f x * (f x - f x' + inner grad (x' - x))/ - (f x * f x * f x') + (f x' * inner grad (x' - x) - f x * inner grad (x' - x))/ + ‖(f x * (f x - f x' + ⟪grad, (x' - x)⟫) + + (f x' * ⟪grad, (x' - x)⟫ - f x * ⟪grad, (x' - x)⟫)) / + (f x * f x * f x')‖ = ‖f x * (f x - f x' + ⟪grad, (x' - x)⟫)/ + (f x * f x * f x') + (f x' * ⟪grad, (x' - x)⟫ - f x * ⟪grad, (x' - x)⟫)/ (f x * f x * f x')‖ := by rw [k₈] - _ ≤ ‖f x * (f x - f x' + inner grad (x' - x))/ - (f x * f x * f x')‖ + ‖(f x' * inner grad (x' - x) - f x * inner grad (x' - x))/ + _ ≤ ‖f x * (f x - f x' + ⟪grad, (x' - x)⟫)/ + (f x * f x * f x')‖ + ‖(f x' * ⟪grad, (x' - x)⟫ - f x * ⟪grad, (x' - x)⟫)/ (f x * f x * f x')‖ := by - apply norm_add_le ((f x) * (f x - f x' + inner grad (x' - x))/ - (f x * f x * f x')) ((f x' * inner grad (x' - x) - f x * inner grad (x' - x))/ + apply norm_add_le ((f x) * (f x - f x' + ⟪grad, (x' - x)⟫)/ + (f x * f x * f x')) ((f x' * ⟪grad, (x' - x)⟫ - f x * ⟪grad, (x' - x)⟫)/ (f x * f x * f x')) _ ≤ (ε/2) * ‖x' - x‖ + (ε/2) * ‖x' - x‖ := by exact add_le_add (hδ₅ x' hp₂) (hδ₄ x' hp₁) _ = ε * ‖x' - x‖ := by linarith have j₁ : ‖1 / f x' - 1 / f x - (- (⟪grad, (x' - x)⟫))/((f x) * (f x))‖ = ‖1 / f x' - 1 / f x - - inner ((-(1 / f x ^ ↑2) • grad)) (x' - x)‖ := by + ⟪(-(1 / f x ^ ↑2) • grad), (x' - x)⟫‖ := by congr; rw [k₃] rw [j₁] at p₁ have l1 : ‖x - x'‖ = ‖x' - x‖ := by diff --git a/Optlib/Differential/Lemmas.lean b/Optlib/Differential/Lemmas.lean index a8f3203..93887fa 100644 --- a/Optlib/Differential/Lemmas.lean +++ b/Optlib/Differential/Lemmas.lean @@ -4,9 +4,12 @@ Released under Apache 2.0 license as described in the file LICENSE. Authors: Chenyi Li -/ import Mathlib.Analysis.Calculus.MeanValue +import Mathlib.Analysis.Calculus.Deriv.MeanValue import Mathlib.Analysis.Calculus.ContDiff.Defs -import Mathlib.Topology.Semicontinuous +import Mathlib.Analysis.Calculus.Gradient.Basic import Mathlib.Analysis.Normed.Lp.ProdLp +import Mathlib.Analysis.Normed.Operator.BoundedLinearMaps +import Mathlib.Topology.Semicontinuity.Basic import Optlib.Differential.Calculation /-! @@ -44,7 +47,9 @@ lemma bounded_lowersemicontinuous_to_epi_closed (f : E → ℝ) (hc : LowerSemic rcases xntend with ⟨xtend, ytend⟩ rw [LowerSemicontinuousOn] at hc specialize hc p.1 - simp at hc; rw [LowerSemicontinuousWithinAt, nhdsWithin_univ] at hc + simp at hc + have hc' : ∀ y < f p.1, ∀ᶠ x' in 𝓝 p.1, y < f x' := by + simpa [SemicontinuousWithinAt, nhdsWithin_univ] using hc let linf := liminf (fun n ↦ f (xn n).1) atTop have aux : Tendsto (fun n ↦ (xn n).2) atTop (nhds p.2) ↔ ∀ ε > 0, ∃ N, ∀ n ≥ N, (fun n ↦ (xn n).2) n ∈ Ioo (p.2 - ε) (p.2 + ε) := by @@ -55,7 +60,7 @@ lemma bounded_lowersemicontinuous_to_epi_closed (f : E → ℝ) (hc : LowerSemic by_contra h; push_neg at h let t := (linf + f p.1) / 2 have tin : t < f p.1 := add_div_two_lt_right.2 h - specialize hc t tin + specialize hc' t tin have ieq2 : t ≤ linf := by apply le_liminf_of_le · rw [Filter.IsCoboundedUnder, Filter.IsCobounded] @@ -74,7 +79,7 @@ lemma bounded_lowersemicontinuous_to_epi_closed (f : E → ℝ) (hc : LowerSemic let auxlt := fun x : E ↦ (t < f x) have le_of_lt : ∀ x : E, auxlt x → auxle x := by simp [auxlt]; intro x cd; exact le_of_lt cd - apply Eventually.mono hc le_of_lt + apply Eventually.mono hc' le_of_lt contrapose! ieq2 apply left_lt_add_div_two.2 h have ieq3 : linf ≤ p.2 := by @@ -164,7 +169,7 @@ lemma continuous_positive_direction [NormedSpace ℝ E] (h : ContinuousAt f x) ( obtain ⟨δ, hδ1, hδ2⟩ := continuous_positive_neighborhood h hx by_cases hv : v = 0 · rw [hv]; simp; use 1; constructor; linarith; intro t _ _; exact hx - have : ‖v‖ > 0 := norm_pos_iff'.mpr hv + have : ‖v‖ > 0 := by simpa [gt_iff_lt] using (norm_pos_iff.2 hv) use δ / (2 * ‖v‖); constructor; positivity intro y hy obtain hδ2 := hδ2 (x + y • v) @@ -173,7 +178,7 @@ lemma continuous_positive_direction [NormedSpace ℝ E] (h : ContinuousAt f x) ( simp at hy; rw [norm_smul]; simp; rw [abs_of_nonneg hy.1] calc _ ≤ δ / (2 * ‖v‖) * ‖v‖ := (mul_le_mul_iff_of_pos_right this).mpr hy.2 - _ = δ / 2 := by field_simp; ring + _ = δ / 2 := by field_simp _ < δ := by linarith exact hδ2 this @@ -207,43 +212,41 @@ theorem deriv_function_comp_segment (x y : E) (h₁ : ∀ x₁ : E, HasFDerivAt theorem HasFDeriv_Convergence (h: HasFDerivAt f (f' x) x) : ∀ ε > (0 : ℝ), ∃ δ > (0 : ℝ), ∀ (x' : E), ‖x - x'‖ ≤ δ → ‖f x' - f x - (f' x) (x' - x)‖ ≤ ε * ‖x - x'‖ := by - rw [HasFDerivAt, hasFDerivAtFilter_iff_isLittleO, Asymptotics.isLittleO_iff] at h + rw [hasFDerivAt_iff_isLittleO_nhds_zero, Asymptotics.isLittleO_iff] at h intro ε epos specialize h epos rw [Filter.Eventually] at h - let t := {x_1 | ‖f x_1 - f x - (f' x) (x_1 - x)‖ ≤ ε * ‖x_1 - x‖} - have h₁: ∃ ε1 > (0 : ℝ), Metric.ball x ε1 ⊆ t := Iff.mp Metric.mem_nhds_iff h + let t := {h : E | ‖f (x + h) - f x - (f' x) h‖ ≤ ε * ‖h‖} + have h₁ : ∃ ε1 > (0 : ℝ), Metric.ball (0 : E) ε1 ⊆ t := Iff.mp Metric.mem_nhds_iff h rcases h₁ with ⟨e1, e1pos, h₁⟩ use (e1 / 2); constructor exact (half_pos e1pos) intro x' xnhds - have h₂: x' ∈ Metric.ball x e1:= by - rw [Metric.mem_ball, dist_comm] - rw [← dist_eq_norm] at xnhds + have h₂ : x' - x ∈ Metric.ball (0 : E) e1 := by + rw [Metric.mem_ball, dist_zero_right] + rw [← norm_neg (x - x'), neg_sub] at xnhds apply lt_of_le_of_lt xnhds (half_lt_self e1pos) - have h₃: x' ∈ t := h₁ h₂ + have h₃ : x' - x ∈ t := h₁ h₂ rw [Set.mem_setOf] at h₃ - rw [norm_sub_rev x] - exact h₃ + simpa [add_sub_cancel, ContinuousLinearMap.map_sub, norm_sub_rev] using h₃ theorem Convergence_HasFDeriv (h : ∀ ε > (0 : ℝ), ∃ δ > (0 : ℝ), ∀ (x' : E), ‖x - x'‖ ≤ δ → ‖f x' - f x - (f' x) (x' - x)‖ ≤ ε * ‖x - x'‖) : HasFDerivAt f (f' x) x := by - rw [HasFDerivAt, hasFDerivAtFilter_iff_isLittleO, Asymptotics.isLittleO_iff] + rw [hasFDerivAt_iff_isLittleO_nhds_zero, Asymptotics.isLittleO_iff] intro ε epos - rw [Filter.Eventually] specialize h ε epos rcases h with ⟨δ, dpos, h⟩ + rw [Filter.Eventually] rw [Metric.mem_nhds_iff] - use δ ; constructor + use δ; constructor apply dpos - intro x' x1mem - have h1: ‖x - x'‖ ≤ δ:= by - rw [Metric.ball, Set.mem_setOf, dist_comm, dist_eq_norm] at x1mem - exact LT.lt.le x1mem - specialize h x' h1 - rw[Set.mem_setOf, norm_sub_rev x'] - apply h + intro h' x1mem + have h1 : ‖x - (x + h')‖ ≤ δ := by + rw [Metric.mem_ball, dist_comm, dist_eq_norm] at x1mem + simpa [sub_eq_add_neg, add_assoc] using LT.lt.le x1mem + specialize h (x + h') h1 + simpa [sub_eq_add_neg, add_comm, add_left_comm, add_assoc] using h theorem HasFDeriv_iff_Convergence_Point {f'x : (E →L[ℝ] ℝ)}: HasFDerivAt f (f'x) x ↔ ∀ ε > (0 : ℝ), ∃ δ > (0 : ℝ), ∀ (x' : E), @@ -273,7 +276,7 @@ open Topology InnerProductSpace Set Filter Tendsto theorem HasGradient_Convergence (h : HasGradientAt f (f' x) x) : ∀ ε > (0 : ℝ), ∃ δ > (0 : ℝ), ∀ x' : E, ‖x - x'‖ ≤ δ - → ‖f x' - f x - inner (f' x) (x' - x)‖ ≤ ε * ‖x - x'‖ := by + → ‖f x' - f x - inner ℝ (f' x) (x' - x)‖ ≤ ε * ‖x - x'‖ := by rw [hasGradientAt_iff_hasFDerivAt] at h show ∀ ε > (0 : ℝ), ∃ δ > (0 : ℝ), ∀ (x' : E), ‖x - x'‖ ≤ δ → ‖f x' - f x - ((toDual ℝ E) (f' x)) (x' - x)‖ ≤ ε * ‖x - x'‖ @@ -281,14 +284,15 @@ theorem HasGradient_Convergence (h : HasGradientAt f (f' x) x) : exact h theorem Convergence_HasGradient (h : ∀ ε > (0 : ℝ), ∃ δ > (0 : ℝ), ∀ x' : E, - ‖x - x'‖ ≤ δ → ‖f x' - f x - inner (f' x) (x' - x)‖ ≤ ε * ‖x - x'‖) : + ‖x - x'‖ ≤ δ → ‖f x' - f x - inner ℝ (f' x) (x' - x)‖ ≤ ε * ‖x - x'‖) : HasGradientAt f (f' x) x := by rw [hasGradientAt_iff_hasFDerivAt] - exact HasFDeriv_iff_Convergence_Point.mpr h + simpa using (HasFDeriv_iff_Convergence_Point (f := f) (x := x) + (f'x := (toDual ℝ E) (f' x))).2 h theorem HasGradient_iff_Convergence_Point {f'x : E}: HasGradientAt f f'x x ↔ ∀ ε > (0 : ℝ), ∃ δ > (0 : ℝ), ∀ x' : E, - ‖x - x'‖ ≤ δ → ‖f x' - f x - inner (f'x) (x' - x)‖ ≤ ε * ‖x - x'‖ := by + ‖x - x'‖ ≤ δ → ‖f x' - f x - inner ℝ f'x (x' - x)‖ ≤ ε * ‖x - x'‖ := by constructor · intro h; apply HasGradient_Convergence exact h @@ -296,7 +300,7 @@ theorem HasGradient_iff_Convergence_Point {f'x : E}: theorem HasGradient_iff_Convergence : HasGradientAt f (f' x) x ↔ ∀ ε > (0 : ℝ), ∃ δ > (0 : ℝ), ∀ x' : E, - ‖x - x'‖ ≤ δ → ‖f x' - f x - inner (f' x) (x' - x)‖ ≤ ε * ‖x - x'‖ := by + ‖x - x'‖ ≤ δ → ‖f x' - f x - inner ℝ (f' x) (x' - x)‖ ≤ ε * ‖x - x'‖ := by constructor apply HasGradient_Convergence apply Convergence_HasGradient @@ -321,7 +325,7 @@ lemma gradient_norm_sq_eq_two_self (x : E) : apply pow_two_nonneg lemma gradient_of_inner_const (x : E) (a : E): - HasGradientAt (fun x ↦ (inner a x : ℝ)) a x := by + HasGradientAt (fun x ↦ (inner ℝ a x : ℝ)) a x := by apply HasGradient_iff_Convergence_Point.mpr simp only [gt_iff_lt, Real.norm_eq_abs] intros ε εpos @@ -335,10 +339,9 @@ lemma gradient_of_const_mul_norm (l : ℝ) (z : E) : HasGradientAt (fun (x : E) => l / 2 * ‖x‖ ^ 2) (l • z) z := by let h := fun x : E => ‖x‖ ^ 2 have e1 : (l • z) = (l / 2) • (2 : ℝ) • z := by rw [smul_smul]; simp - have : (fun (x : E) => l / 2 * ‖x‖ ^ 2) = (fun (x : E) => (l / 2) • h x) := by - ext; simp have h1 : HasGradientAt h ((2 : ℝ) • z) z := gradient_norm_sq_eq_two_self z - rw [this, e1]; refine HasGradientAt.const_smul' (l / 2) h1 + rw [show (fun x : E => l / 2 * ‖x‖ ^ 2) = fun x ↦ (l / 2 : ℝ) • h x by ext x; simp [h]] + simpa [e1] using HasGradientAt.const_smul h1 (l / 2) lemma gradient_of_sq : ∀ u : E, HasGradientAt (fun u ↦ ‖u - x‖ ^ 2 / 2) (u - x) u := by intro s @@ -348,7 +351,7 @@ lemma gradient_of_sq : ∀ u : E, HasGradientAt (fun u ↦ ‖u - x‖ ^ 2 / 2) · linarith · intro x' dles; field_simp; rw [abs_div]; simp have eq1 (u v : E) (e : ℝ) (dle : ‖u - v‖ ≤ e) : - |‖v‖ ^ 2 - ‖u‖ ^ 2 - inner ((2 : ℝ) • u) (v - u)| ≤ e * ‖u - v‖ := by + |‖v‖ ^ 2 - ‖u‖ ^ 2 - inner ℝ ((2 : ℝ) • u) (v - u)| ≤ e * ‖u - v‖ := by rw [← norm_neg (u - v), neg_sub] at dle; rw [← real_inner_self_eq_norm_sq, ← real_inner_self_eq_norm_sq, inner_sub_right] rw [real_inner_smul_left, real_inner_smul_left]; ring_nf @@ -366,13 +369,13 @@ lemma gradient_of_sq : ∀ u : E, HasGradientAt (fun u ↦ ‖u - x‖ ^ 2 / 2) have eq2 : s - x' = u - v := by rw [hu, hv]; simp have eq3 : x' - s = v - u := by rw [hu, hv]; simp rw [eq2, eq3] - show |‖v‖ ^ 2 - ‖u‖ ^ 2 - inner ((2 : ℝ) • u) (v - u)| / 2 ≤ e * ‖u - v‖ + show |‖v‖ ^ 2 - ‖u‖ ^ 2 - inner ℝ ((2 : ℝ) • u) (v - u)| / 2 ≤ e * ‖u - v‖ calc - |‖v‖ ^ 2 - ‖u‖ ^ 2 - inner ((2 : ℝ) • u) (v - u)| / 2 ≤ (e * ‖u - v‖) / 2 := by - rw [div_le_div_right] - apply eq1; rw [hu, hv]; simp; apply dles; simp + |‖v‖ ^ 2 - ‖u‖ ^ 2 - inner ℝ ((2 : ℝ) • u) (v - u)| / 2 ≤ (e * ‖u - v‖) / 2 := by + have hle := eq1 u v e (by rw [hu, hv]; simpa using dles) + nlinarith _ ≤ e * ‖u - v‖ := by - field_simp + nlinarith [norm_nonneg (u - v), le_of_lt ep] lemma sub_normsquare_gradient (hf : ∀ x ∈ s, HasGradientAt f (f' x) x) (m : ℝ): ∀ x ∈ s, HasGradientAt (fun x ↦ f x - m / 2 * ‖x‖ ^ 2) (f' x - m • x) x := by @@ -431,14 +434,14 @@ open InnerProductSpace Set -/ lemma expansion (hf : ∀ x : E, HasGradientAt f (f' x) x) (x p : E) : - ∃ t : ℝ, t > 0 ∧ t < 1 ∧ f (x + p) = f x + inner (f' (x + t • p)) p := by + ∃ t : ℝ, t > 0 ∧ t < 1 ∧ f (x + p) = f x + inner ℝ (f' (x + t • p)) p := by let g := fun r : ℝ ↦ f (x + r • p) - let g' := fun r : ℝ ↦ (inner (f' (x + r • p)) p : ℝ) + let g' := fun r : ℝ ↦ (inner ℝ (f' (x + r • p)) p : ℝ) have h1 : ∀ r , HasDerivAt g (g' r) r := by let h := fun r : ℝ ↦ x + r • p have : g = f ∘ h := by rfl rw [this]; intro r - have : inner (f' (x + r • p)) p = toDual ℝ E (f' (x + r • p)) p := rfl + have : inner ℝ (f' (x + r • p)) p = toDual ℝ E (f' (x + r • p)) p := rfl simp [g']; rw [this]; apply HasFDerivAt.comp_hasDerivAt · apply hasGradientAt_iff_hasFDerivAt.mp exact hf (x + r • p) @@ -449,7 +452,7 @@ lemma expansion (hf : ∀ x : E, HasGradientAt f (f' x) x) (x p : E) : rw [one_smul] at this; exact this have e1 : f (x + p) = g 1 := by simp [g] have e2 : f x = g 0 := by simp [g] - have e3 : ∀ t, inner (f' (x + t • p)) p = g' t := by simp [] + have e3 : ∀ t, inner ℝ (f' (x + t • p)) p = g' t := by intro t; rfl rw [e1, e2] have : ∃ c ∈ Set.Ioo 0 1, g' c = (g 1 - g 0) / (1 - 0) := by apply exists_hasDerivAt_eq_slope g g' (by norm_num) @@ -465,14 +468,14 @@ lemma expansion (hf : ∀ x : E, HasGradientAt f (f' x) x) (x p : E) : rw [e3 c]; simp [h2] lemma general_expansion (x p : E) (hf : ∀ y ∈ Metric.closedBall x ‖p‖, HasGradientAt f (f' y) y) : - ∃ t : ℝ, t > 0 ∧ t < 1 ∧ f (x + p) = f x + inner (f' (x + t • p)) p := by + ∃ t : ℝ, t > 0 ∧ t < 1 ∧ f (x + p) = f x + inner ℝ (f' (x + t • p)) p := by let g := fun r : ℝ ↦ f (x + r • p) - let g' := fun r : ℝ ↦ (inner (f' (x + r • p)) p : ℝ) + let g' := fun r : ℝ ↦ (inner ℝ (f' (x + r • p)) p : ℝ) have h1 : ∀ r ∈ Icc 0 1, HasDerivAt g (g' r) r := by let h := fun r : ℝ ↦ x + r • p have : g = f ∘ h := by rfl rw [this]; intro r hr - have : inner (f' (x + r • p)) p = toDual ℝ E (f' (x + r • p)) p := rfl + have : inner ℝ (f' (x + r • p)) p = toDual ℝ E (f' (x + r • p)) p := rfl simp [g']; rw [this]; apply HasFDerivAt.comp_hasDerivAt · apply hasGradientAt_iff_hasFDerivAt.mp have : x + r • p ∈ Metric.closedBall x ‖p‖ := by @@ -486,7 +489,7 @@ lemma general_expansion (x p : E) (hf : ∀ y ∈ Metric.closedBall x ‖p‖, H rw [one_smul] at this; exact this have e1 : f (x + p) = g 1 := by simp [g] have e2 : f x = g 0 := by simp [g] - have e3 : ∀ t, inner (f' (x + t • p)) p = g' t := by simp [] + have e3 : ∀ t, inner ℝ (f' (x + t • p)) p = g' t := by intro t; rfl rw [e1, e2] have : ∃ c ∈ Set.Ioo 0 1, g' c = (g 1 - g 0) / (1 - 0) := by apply exists_hasDerivAt_eq_slope g g' (by norm_num) @@ -501,15 +504,15 @@ lemma general_expansion (x p : E) (hf : ∀ y ∈ Metric.closedBall x ‖p‖, H theorem lagrange (hs : Convex ℝ s) (hf : ∀ x ∈ s, HasGradientAt f (f' x) x) : ∀ x ∈ s, ∀ y ∈ s, ∃ c : ℝ, c ∈ Set.Ioo 0 1 ∧ - inner (f' (x + c • (y - x))) (y - x) = f y - f x := by + inner ℝ (f' (x + c • (y - x))) (y - x) = f y - f x := by intro x xs y ys let g := fun t : ℝ ↦ f (x + t • (y - x)) - let g' := fun t : ℝ ↦ (inner (f' (x + t • (y - x))) (y - x) : ℝ) + let g' := fun t : ℝ ↦ (inner ℝ (f' (x + t • (y - x))) (y - x) : ℝ) have h1 : ∀ r ∈ Icc 0 1 , HasDerivAt g (g' r) r := by let h := fun r : ℝ ↦ (x + r • (y - x)) have : g = f ∘ h := rfl rw [this]; intro t ht - have : inner (f' (x + t • (y - x))) (y - x) = toDual ℝ E (f' (x + t • (y - x))) (y - x) := rfl + have : inner ℝ (f' (x + t • (y - x))) (y - x) = toDual ℝ E (f' (x + t • (y - x))) (y - x) := rfl simp [g']; rw [this]; apply HasFDerivAt.comp_hasDerivAt · apply hasGradientAt_iff_hasFDerivAt.mp have : x + t • (y - x) ∈ s := by @@ -546,21 +549,26 @@ variable {x : E} {y : F} {z : WithLp 2 (E × F)} open Set Bornology Filter BigOperators Topology -lemma fst_norm_le_prod_L2 (z : WithLp 2 (E × F)) : ‖z.1‖ ≤ ‖z‖ := by - have h : ‖z.1‖ ^ 2 ≤ ‖z‖ ^ 2 := by linarith [WithLp.prod_norm_sq_eq_of_L2 z, sq_nonneg ‖z.2‖] +instance instCoeProdWithLpL2 : CoeTC (E × F) (WithLp 2 (E × F)) where + coe := WithLp.toLp 2 + +lemma fst_norm_le_prod_L2 (z : WithLp 2 (E × F)) : ‖z.fst‖ ≤ ‖z‖ := by + have h : ‖z.fst‖ ^ 2 ≤ ‖z‖ ^ 2 := by + linarith [WithLp.prod_norm_sq_eq_of_L2 z, sq_nonneg ‖z.snd‖] apply nonneg_le_nonneg_of_sq_le_sq (norm_nonneg _) rwa [← pow_two, ← pow_two] -lemma snd_norm_le_prod_L2 (z : WithLp 2 (E × F)) : ‖z.2‖ ≤ ‖z‖ := by - have h : ‖z.2‖ ^ 2 ≤ ‖z‖ ^ 2 := by linarith [WithLp.prod_norm_sq_eq_of_L2 z, sq_nonneg ‖z.1‖] +lemma snd_norm_le_prod_L2 (z : WithLp 2 (E × F)) : ‖z.snd‖ ≤ ‖z‖ := by + have h : ‖z.snd‖ ^ 2 ≤ ‖z‖ ^ 2 := by + linarith [WithLp.prod_norm_sq_eq_of_L2 z, sq_nonneg ‖z.fst‖] apply nonneg_le_nonneg_of_sq_le_sq (norm_nonneg _) rwa [← pow_two, ← pow_two] -lemma prod_norm_le_block_sum_L2 (z : WithLp 2 (E × F)) : ‖z‖ ≤ ‖z.1‖ + ‖z.2‖ := by - have : ‖z‖ ^ 2 ≤ (‖z.1‖ + ‖z.2‖) ^ 2:= by +lemma prod_norm_le_block_sum_L2 (z : WithLp 2 (E × F)) : ‖z‖ ≤ ‖z.fst‖ + ‖z.snd‖ := by + have : ‖z‖ ^ 2 ≤ (‖z.fst‖ + ‖z.snd‖) ^ 2 := by simp [WithLp.prod_norm_sq_eq_of_L2, add_sq] positivity - apply nonneg_le_nonneg_of_sq_le_sq (Left.add_nonneg (norm_nonneg z.1) (norm_nonneg z.2)) + apply nonneg_le_nonneg_of_sq_le_sq (Left.add_nonneg (norm_nonneg z.fst) (norm_nonneg z.snd)) rwa [← pow_two, ← pow_two] lemma norm_prod_right_zero (x : E) : @@ -590,17 +598,21 @@ instance instNormedSpaceProdL2 : NormedSpace ℝ (WithLp 2 (E × F)) where exact norm_smul_le a b instance instIsBoundedLinearMapL2equiv : - @IsBoundedLinearMap ℝ _ (E × F) _ _ (WithLp 2 (E × F)) _ _ id where - map_add := fun x ↦ congrFun rfl - map_smul := fun c ↦ congrFun rfl + IsBoundedLinearMap ℝ (WithLp.toLp 2 : E × F → WithLp 2 (E × F)) where + map_add := by intro x y; simp + map_smul := by intro c z; simp bound := by use 2 constructor · norm_num · intro z + have h := prod_norm_le_block_sum_L2 (WithLp.toLp 2 z) rw [Prod.norm_def] - have h := prod_norm_le_block_sum_L2 z - simp only [id_eq] - linarith [h, le_max_left ‖z.1‖ ‖z.2‖, le_max_right ‖z.1‖ ‖z.2‖] + calc + ‖WithLp.toLp 2 z‖ ≤ ‖(WithLp.toLp 2 z).fst‖ + ‖(WithLp.toLp 2 z).snd‖ := h + _ = ‖z.fst‖ + ‖z.snd‖ := by simp + _ ≤ max ‖z.fst‖ ‖z.snd‖ + max ‖z.fst‖ ‖z.snd‖ := + add_le_add (le_max_left _ _) (le_max_right _ _) + _ = 2 * max ‖z.fst‖ ‖z.snd‖ := by ring end ProdLp_diff diff --git a/Optlib/Differential/Subdifferential.lean b/Optlib/Differential/Subdifferential.lean index 4a67cea..06376d9 100644 --- a/Optlib/Differential/Subdifferential.lean +++ b/Optlib/Differential/Subdifferential.lean @@ -5,7 +5,7 @@ Author: Anqing Shen, Yifan Bai, Chenyi Li, Zaiwen Wen -/ import Mathlib.Order.LiminfLimsup import Mathlib.Topology.Defs.Filter -import Mathlib.Data.Real.EReal +import Mathlib.Topology.Instances.EReal.Lemmas import Optlib.Differential.Calculation import Optlib.Function.Proximal @@ -19,7 +19,7 @@ variable {f g : E → ℝ} {x y u v : E} {c : ℝ} /- the general differential function used in the definition -/ def differential_fun (x : E) (f : E → ℝ) (u : E) := - fun y ↦ Real.toEReal ((f y - f x - inner u (y - x)) / ‖y - x‖) + fun y ↦ Real.toEReal ((f y - f x - inner ℝ u (y - x)) / ‖y - x‖) /- the definition of the Frechet subdifferential-/ def f_subdifferential (f : E → ℝ) (x : E) : Set E := @@ -40,9 +40,9 @@ def critial_point (f : E → ℝ) : Set E := /-- equivalence of Frechet subdifferential -/ theorem has_f_subdiff_iff : u ∈ f_subdifferential f x ↔ - ∀ ε > 0, ∀ᶠ y in 𝓝 x, f y - f x - inner u (y - x) ≥ -ε * ‖y - x‖ := by - have h0 : (∀ ε > 0, ∀ᶠ y in 𝓝[≠] x, f y - f x - inner u (y - x) > -ε * ‖y - x‖) - ↔ ∀ ε > 0, ∀ᶠ y in 𝓝 x, f y - f x - inner u (y - x) ≥ -ε * ‖y - x‖ := by + ∀ ε > 0, ∀ᶠ y in 𝓝 x, f y - f x - inner ℝ u (y - x) ≥ -ε * ‖y - x‖ := by + have h0 : (∀ ε > 0, ∀ᶠ y in 𝓝[≠] x, f y - f x - inner ℝ u (y - x) > -ε * ‖y - x‖) + ↔ ∀ ε > 0, ∀ᶠ y in 𝓝 x, f y - f x - inner ℝ u (y - x) ≥ -ε * ‖y - x‖ := by constructor · intro h ε εpos specialize h ε εpos @@ -149,7 +149,7 @@ theorem convex_f_f_subdiff_eq_subgradient (f : E → ℝ) (x : E) have yin': y' ∈ univ:= by simp specialize convfun xin yin' - have pos: 0 < (1 / 2) * ((f x) + inner g (y' - x) - f y') / ‖y' - x‖:=by + have pos: 0 < (1 / 2) * ((f x) + inner ℝ g (y' - x) - f y') / ‖y' - x‖:=by apply div_pos · apply mul_pos simp; simp @@ -160,7 +160,7 @@ theorem convex_f_f_subdiff_eq_subgradient (f : E → ℝ) (x : E) rw [yeq'] at hy' simp at hy' rw[← gt_iff_lt] at pos - specialize hg ((1 / 2) * ((f x) + inner g (y' - x) - f y')/‖y' - x‖) + specialize hg ((1 / 2) * ((f x) + inner ℝ g (y' - x) - f y')/‖y' - x‖) specialize hg pos simp at hg rw[Filter.Eventually,mem_nhds_iff] at hg @@ -190,7 +190,7 @@ theorem convex_f_f_subdiff_eq_subgradient (f : E → ℝ) (x : E) by_contra yeq' rw[sub_eq_zero] at yeq' rw[yeq'] at hy' - rw[sub_self, inner_zero_right,add_zero, lt_iff_not_le] at hy' + rw[sub_self, inner_zero_right,add_zero, lt_iff_not_ge] at hy' apply hy' simp simp @@ -204,14 +204,14 @@ theorem convex_f_f_subdiff_eq_subgradient (f : E → ℝ) (x : E) by_contra yeq' rw[sub_eq_zero] at yeq' rw[yeq'] at hy' - rw[sub_self, inner_zero_right,add_zero, lt_iff_not_le] at hy' + rw[sub_self, inner_zero_right,add_zero, lt_iff_not_ge] at hy' apply hy' simp · apply norm_pos_iff.mpr by_contra yeq' rw[sub_eq_zero] at yeq' rw[yeq'] at hy' - rw[sub_self, inner_zero_right,add_zero, lt_iff_not_le] at hy' + rw[sub_self, inner_zero_right,add_zero, lt_iff_not_ge] at hy' apply hy' simp simp @@ -231,7 +231,7 @@ theorem convex_f_f_subdiff_eq_subgradient (f : E → ℝ) (x : E) by_contra yeq' rw[norm_eq_zero,sub_eq_zero] at yeq' rw[yeq'] at hy' - rw[sub_self, inner_zero_right,add_zero, lt_iff_not_le] at hy' + rw[sub_self, inner_zero_right,add_zero, lt_iff_not_ge] at hy' apply hy' simp refine div_mul_cancel₀ δ' nonzero @@ -241,7 +241,6 @@ theorem convex_f_f_subdiff_eq_subgradient (f : E → ℝ) (x : E) apply lt_of_le_of_lt apply min_le_left exact lt_two_mul_self posδ - simp apply div_nonneg apply le_min linarith @@ -252,8 +251,8 @@ theorem convex_f_f_subdiff_eq_subgradient (f : E → ℝ) (x : E) rcases hx1 with ⟨x1s,x1t⟩ rw[mem_setOf] at x1s rcases x1s with ⟨r,rpos,rltone,x1eq⟩ - have x1in: x1 ∈ {x_1 | inner g (x_1 - x) ≤ - f x_1 - f x + 2⁻¹ * (f x + inner g (y' - x) - f y') / ‖y' - x‖ * ‖x_1 - x‖}:=by + have x1in: x1 ∈ {x_1 | inner ℝ g (x_1 - x) ≤ + f x_1 - f x + 2⁻¹ * (f x + inner ℝ g (y' - x) - f y') / ‖y' - x‖ * ‖x_1 - x‖}:=by apply mem_of_subset_of_mem tin assumption rw[mem_setOf,x1eq] at x1in @@ -271,7 +270,7 @@ theorem convex_f_f_subdiff_eq_subgradient (f : E → ℝ) (x : E) have r2pos: 0 < (1 -r)/2:=by linarith have req: r + (1-r) = 1:=by simp specialize convfun rnonneg rleone req - have nonneg: 0 ≤ f y' - f x - inner g (y' - x):=by + have nonneg: 0 ≤ f y' - f x - inner ℝ g (y' - x):=by apply nonneg_of_mul_nonneg_right _ r2pos rw[mul_sub, ← sub_self_div_two (1 - r), sub_mul, sub_mul (1 - r)] simp @@ -288,17 +287,17 @@ theorem convex_f_f_subdiff_eq_subgradient (f : E → ℝ) (x : E) rw[neg_mul,← sub_eq_add_neg ((1 - r) * f y'),← mul_sub, mul_assoc, mul_comm (1 - r) ‖y' - x‖] rw[← mul_assoc, div_mul, div_self] simp - rw[mul_comm (2⁻¹ * inner g (y' - x) + 2⁻¹ * (f x - f y')), - mul_add, add_comm ((1 - r) * (2⁻¹ * inner g (y' - x)))] + rw[mul_comm (2⁻¹ * inner ℝ g (y' - x) + 2⁻¹ * (f x - f y')), + mul_add, add_comm ((1 - r) * (2⁻¹ * inner ℝ g (y' - x)))] rw[← add_assoc, ← mul_assoc, ← mul_assoc,inv_eq_one_div] linarith by_contra yeq' rw[norm_eq_zero,sub_eq_zero] at yeq' rw[yeq'] at hy' - rw[sub_self, inner_zero_right,add_zero, lt_iff_not_le] at hy' + rw[sub_self, inner_zero_right,add_zero, lt_iff_not_ge] at hy' apply hy' simp - have nonneg': ¬ 0 > f y' - f x - inner g (y' - x):=by linarith + have nonneg': ¬ 0 > f y' - f x - inner ℝ g (y' - x):=by linarith apply nonneg' simp linarith @@ -326,7 +325,7 @@ theorem convex_f_f_subdiff_eq_subgradient (f : E → ℝ) (x : E) theorem f_subdiff_neg_f_subdiff_unique (hu : u ∈ f_subdifferential f x) (hv : v ∈ f_subdifferential (- f) x) : u = - v := by rw [has_f_subdiff_iff] at * - have h : ∀ ε > 0, ∀ᶠ y in 𝓝 x, inner (u + v) (y - x) ≤ ε * ‖y - x‖ := by + have h : ∀ ε > 0, ∀ᶠ y in 𝓝 x, inner ℝ (u + v) (y - x) ≤ ε * ‖y - x‖ := by intro ε εpos have ε2pos : 0 < ε / 2 := by positivity filter_upwards [hu _ ε2pos, hv _ ε2pos] with y huy hvy @@ -371,9 +370,11 @@ theorem f_subdiff_smul (h : u ∈ f_subdifferential (c • f) x) (cpos : 0 < c) filter_upwards [h _ (mul_pos cpos εpos)] with y hy rw [real_inner_smul_left] simp only [Pi.smul_apply, smul_eq_mul, neg_mul, neg_le_sub_iff_le_add] at hy - apply (mul_le_mul_left cpos).mp - field_simp - linarith + have hy' : c⁻¹ * inner ℝ u (y - x) ≤ f y - f x + ε * ‖y - x‖ := by + simpa [div_eq_mul_inv, mul_assoc, mul_comm, mul_left_comm] using + (show inner ℝ u (y - x) / c ≤ f y - f x + ε * ‖y - x‖ from + (div_le_iff₀ cpos).2 (by nlinarith [hy])) + linarith [hy'] /-- first order optimality condition for unconstrained optimization problem -/ theorem first_order_optimality_condition (f : E → ℝ) (x₀ : E) (hx : IsLocalMin f x₀) : @@ -508,8 +509,8 @@ theorem f_subdiff_add' (f : E → ℝ ) (g : E → ℝ ) (g' : E → E) (x : E) specialize hg ε2pos filter_upwards [zin _ ε2pos, hg ] with a za ga simp at ga - have h: - (g a - g x - inner (g' x) (a - x)) ≥ -(ε / 2) * ‖a - x‖:=by - change -(ε / 2) * ‖a - x‖ ≤ - (g a - g x - inner (g' x) (a - x)) + have h: - (g a - g x - inner ℝ (g' x) (a - x)) ≥ -(ε / 2) * ‖a - x‖:=by + change -(ε / 2) * ‖a - x‖ ≤ - (g a - g x - inner ℝ (g' x) (a - x)) rw[neg_mul, neg_le_neg_iff] apply le_trans; apply le_abs_self; assumption rw[inner_sub_left]; diff --git a/Optlib/Function/KL.lean b/Optlib/Function/KL.lean index f2f0c1a..c58a350 100644 --- a/Optlib/Function/KL.lean +++ b/Optlib/Function/KL.lean @@ -31,8 +31,8 @@ lemma subdifferential_Graph' (f : E → ℝ) : use fun n => (u n, f (u n), v n) constructor · intro n; simp; exact (hv n).1 - · apply Tendsto.prod_mk_nhds u_conv - (Tendsto.prod_mk_nhds fun_conv ((forall_and_right _ _).1 hv).2) + · exact Filter.Tendsto.prodMk_nhds u_conv + (Filter.Tendsto.prodMk_nhds fun_conv ((forall_and_right _ _).1 hv).2) · intro h simp [subdifferential_Graph, subdifferential] simp at h @@ -68,7 +68,7 @@ theorem GraphOfSubgradientIsClosed {f : E → ℝ} exact this rw [nhds_prod_eq,Filter.tendsto_prod_iff'] at hconv; simp at hconv - exact Tendsto.prod_mk_nhds hconv.1 (Tendsto.prod_mk_nhds hf hconv.2) + exact Filter.Tendsto.prodMk_nhds hconv.1 (Filter.Tendsto.prodMk_nhds hf hconv.2) /- Definition of Φ_η, the family of desingularizing function -/ def desingularizing_function (η : ℝ) := {φ : ℝ → ℝ | (ConcaveOn ℝ (Ico 0 η) φ) -- ∧ (∀ x ∈ Ioo 0 η, φ x > 0) @@ -108,24 +108,26 @@ lemma desingularizing_function_is_nonneg (φ : ℝ → ℝ) (η : ℝ) (h : φ obtain h_lag := exists_deriv_eq_slope φ hx₁ Cont_φ Diff_φ rcases h_lag with ⟨c, ⟨hc, hval⟩⟩ use c, hc - field_simp [hval] + have hx0 : x - 0 ≠ 0 := by linarith + have hval' : deriv φ c * (x - 0) = φ x - φ 0 := (eq_div_iff hx0).1 hval + linarith choose y hy₁ hy₂ using hhh - simp [hy₂, h₂]; field_simp; rcases hy₁ with ⟨hy₁,hy₁'⟩ have yleq: y < η := by linarith - exact h₅ y hy₁ yleq + have hyderiv : 0 < deriv φ y := h₅ y hy₁ yleq + nlinarith [hy₂, h₂, hyderiv, hx₁] -- Definition of KL property with specific desingularizing function def KL_point_with_reparameter (σ : E → ℝ) (u : E) (φ : ℝ → ℝ) : Prop := ∃ η ∈ Ioi 0, ∃ s ∈ 𝓝 u, (φ ∈ desingularizing_function η) ∧ (∀ x ∈ s ∩ {y ∈ active_domain σ | σ u < σ y ∧ σ y < σ u + η}, - deriv φ (σ x - σ u) * (EMetric.infEdist 0 (subdifferential σ x)).toReal ≥ 1) + deriv φ (σ x - σ u) * (Metric.infEDist 0 (subdifferential σ x)).toReal ≥ 1) -- Definition of the KL property at one point def KL_point (f : E → ℝ) (u : E) : Prop := ∃ η ∈ Ioi 0, ∃ s ∈ 𝓝 u, ∃ φ ∈ desingularizing_function η, ∀ x ∈ s ∩ {y | f u < f y ∧ f y < f u + η}, - (ENNReal.ofReal (deriv φ (f x - f u))) * (EMetric.infEdist 0 (subdifferential f x)) ≥ ENNReal.ofReal 1 + (ENNReal.ofReal (deriv φ (f x - f u))) * (Metric.infEDist 0 (subdifferential f x)) ≥ ENNReal.ofReal 1 -- Definition of the KL function def KL_function (f : E → ℝ) : Prop := @@ -139,7 +141,7 @@ def KL_function (f : E → ℝ) : Prop := def KL_property_with_regularization (f : E → ℝ) (u' : E) (φ : ℝ → ℝ) : Prop := ∃ η ∈ Ioi 0, ∃ s ∈ 𝓝 u', (φ ∈ desingularizing_function η) ∧ (∀ x ∈ s ∩ {y ∈ active_domain f | f u' < f y ∧ f y < f u' + η}, - (EMetric.infEdist 0 (subdifferential (λ u => φ (f u - f u')) x)).toReal ≥ 1) + (Metric.infEDist 0 (subdifferential (λ u => φ (f u - f u')) x)).toReal ≥ 1) -- deriv of function (fun t => c⁻¹ * t) is c⁻¹ lemma deriv_of_const_mul_func (c : ℝ) (x : ℝ) : deriv (fun (t : ℝ) => c⁻¹ * t) x = c⁻¹ := by @@ -165,10 +167,11 @@ lemma const_mul_special_concave : ∀ c > 0, (fun t => c⁻¹ * t) ∈ desingula have h₃: ContDiffOn ℝ 1 (fun t ↦ c⁻¹ * t) (Ioo 0 (c / 2)) := by rw [fun_smul_eq_mul]; apply ContDiff.contDiffOn; apply contDiff_const_smul have h₄: ContinuousAt (fun t ↦ c⁻¹ * t) 0 := by - rw [fun_smul_eq_mul]; apply (continuousAt_const_smul_iff₀ _).2 - apply continuousAt_id; field_simp + simpa using (continuous_const.mul continuous_id).continuousAt have h₅: ∀ (x : ℝ), 0 < x → x < c / 2 → 0 < deriv (fun t ↦ c⁻¹ * t) x := by - intro x _ _; rw [deriv_of_const_mul_func]; field_simp; exact cpos + intro x _ _ + rw [deriv_of_const_mul_func] + exact inv_pos.mpr cpos exact ⟨h₁, h₃, h₄, h₅⟩ @@ -195,23 +198,24 @@ lemma const_mul_edist_ge_one {c : ℝ} {ed : ENNReal} (hpos : c > 0) rw [hed']; refine ENNReal.mul_top ?h; simpa rw [this]; simp; push_neg at hed' calc - _ ≥ ENNReal.ofReal c⁻¹ * ENNReal.ofReal c := mul_le_mul_left' hed (ENNReal.ofReal c⁻¹) + _ ≥ ENNReal.ofReal c⁻¹ * ENNReal.ofReal c := by + exact mul_le_mul_of_nonneg_left hed (by exact bot_le) _ = ENNReal.ofReal 1 := by rw [← ENNReal.ofReal_mul]; field_simp; simp; exact le_of_lt hpos lemma edist_geq_const (h_noncrit : 0 ∉ subdifferential f x) : ∃ c > 0, ∀ u, ‖u - x‖ + ‖f u - f x‖ < c → - EMetric.infEdist 0 (subdifferential f u) ≥ ENNReal.ofReal c := by + Metric.infEDist 0 (subdifferential f u) ≥ ENNReal.ofReal c := by by_contra! hc have sqh: ∀ n : ℕ, ∃ u, ‖u - x‖ + ‖f u - f x‖ < 1 / (n + 1) ∧ - (EMetric.infEdist 0 (subdifferential f u)) < ENNReal.ofReal (1 / (n + 1)) := + (Metric.infEDist 0 (subdifferential f u)) < ENNReal.ofReal (1 / (n + 1)) := fun n ↦ hc (1 / (n + 1)) (by simp; linarith) choose u hu using sqh - have inequ_fun : ∀ n, (EMetric.infEdist 0 (subdifferential f (u n))).toReal ≤ 1 / (n + 1) := by + have inequ_fun : ∀ n, (Metric.infEDist 0 (subdifferential f (u n))).toReal ≤ 1 / (n + 1) := by intro n apply (ENNReal.toReal_le_of_le_ofReal _ (le_of_lt (hu n).right)) simp; linarith - have : Tendsto (fun n ↦ (EMetric.infEdist 0 (subdifferential f (u n))).toReal) atTop (𝓝 0) := + have : Tendsto (fun n ↦ (Metric.infEDist 0 (subdifferential f (u n))).toReal) atTop (𝓝 0) := squeeze_zero (by simp) inequ_fun tendsto_one_div_add_atTop_nhds_zero_nat have h_contra : 0 ∈ subdifferential f x := by have u_to_x : Tendsto u atTop (𝓝 x) := by @@ -236,7 +240,7 @@ lemma edist_geq_const (h_noncrit : 0 ∉ subdifferential f x) : intro n rcases hu n with ⟨_,hu₂⟩ have : ∃ vn ∈ subdifferential f (u n), edist 0 vn < 1 / (n + 1) := by - apply EMetric.infEdist_lt_iff.1 + apply Metric.infEDist_lt_iff.1 rw [← one_div_type_trans n] exact hu₂ choose vn hvn using this @@ -248,15 +252,15 @@ lemma edist_geq_const (h_noncrit : 0 ∉ subdifferential f x) : intro n exact (hv n).1 have v_to_zero: Tendsto v atTop (𝓝 0) := by - rw [dist_zero_left] at hv have : Tendsto (fun n => ‖v n‖) atTop (𝓝 0) := by apply squeeze_zero (by simp) _ tendsto_one_div_add_atTop_nhds_zero_nat intro n - apply le_of_lt (hv n).right + have hdist : dist 0 (v n) < 1 / (n + 1) := (hv n).right + exact (le_of_lt (by simpa [dist_eq_norm] using hdist)) apply tendsto_zero_iff_norm_tendsto_zero.2 this show (x, 0) ∈ subdifferential_Graph f apply GraphOfSubgradientIsClosed v_in_subdiff - (Filter.Tendsto.prod_mk_nhds u_to_x v_to_zero) fu_to_fx + (Filter.Tendsto.prodMk_nhds u_to_x v_to_zero) fu_to_fx contradiction /-- Non-critical KL property is naturally true -/ @@ -281,24 +285,9 @@ end section aux_lemma_uniform_KL -lemma real_geq_ennreal_ofreal_geq {a b : ℝ} {c : ENNReal} (hgeq : a ≥ b) (apos: a > 0): +lemma real_geq_ennreal_ofreal_geq {a b : ℝ} {c : ENNReal} (hgeq : a ≥ b) (_apos : a > 0): (ENNReal.ofReal a) * c ≥ (ENNReal.ofReal b) * c := by - by_cases hc : c = 0 - rw [hc] - simp - push_neg at hc - by_cases hctop : c = ⊤ - rw [hctop] - have ha : (ENNReal.ofReal a) * ⊤ = ⊤ := by - refine ENNReal.mul_top ?h - simpa - rw [ha] - simp - push_neg at hctop - refine (ENNReal.mul_le_mul_right ?_ ?_).mpr ?_ - · exact hc - · exact hctop - · exact ENNReal.ofReal_le_ofReal hgeq + exact mul_le_mul_left (ENNReal.ofReal_le_ofReal hgeq) c end aux_lemma_uniform_KL @@ -323,24 +312,24 @@ variable [NormedAddCommGroup E] [InnerProductSpace ℝ E] theorem uniformized_KL_property {f : E → ℝ} {Ω : Set E} (h_compact : IsCompact Ω) (h_Ω1 : ∀ x ∈ Ω, KL_point f x) (h_Ω2: is_constant_on f Ω) : ∃ ε ∈ Ioi 0, ∃ η ∈ Ioi 0, ∃ φ ∈ desingularizing_function η, ∀ u ∈ Ω , ∀ x ∈ - {y : E | (EMetric.infEdist y Ω).toReal < ε} ∩ {y | f u < f y ∧ f y < f u + η}, - (ENNReal.ofReal (deriv φ (f x - f u))) * EMetric.infEdist 0 (subdifferential f x) ≥ 1 := by + {y : E | (Metric.infEDist y Ω).toReal < ε} ∩ {y | f u < f y ∧ f y < f u + η}, + (ENNReal.ofReal (deriv φ (f x - f u))) * Metric.infEDist 0 (subdifferential f x) ≥ 1 := by -- case : Ω = ∅ - by_cases h_nonempty : Ω = ∅ - · push_neg at h_nonempty + by_cases h_empty : Ω = ∅ + · use 1, (by simp), 1, (by simp), (fun t => 2⁻¹ * t) constructor rw [← div_self] exact (const_mul_special_concave 2 (by simp)) simp - rw [h_nonempty] + rw [h_empty] tauto -- case : Ω ≠ ∅ - push_neg at h_nonempty + have h_nonempty : Ω.Nonempty := Set.nonempty_iff_ne_empty.2 h_empty obtain ⟨μ, constant_value⟩ := exist_constant_value f h_Ω2 h_nonempty have : ∀ x ∈ Ω, ∃ η ∈ Ioi 0, ∃ (O : Set E) (_: IsOpen O) (_: x ∈ O), ∃ φ ∈ desingularizing_function η, ∀ u ∈ O ∩ {y | f x < f y ∧ f y < f x + η}, - (ENNReal.ofReal (deriv φ (f u - f x))) * EMetric.infEdist 0 (subdifferential f u) ≥ 1 := by + (ENNReal.ofReal (deriv φ (f u - f x))) * Metric.infEDist 0 (subdifferential f u) ≥ 1 := by intro x hx; simp [KL_point] at h_Ω1 rcases h_Ω1 x hx with ⟨η, ⟨hη, ⟨s, ⟨hs, ⟨φ, hφ, h_Ω1⟩⟩⟩⟩⟩ rcases mem_nhds_iff.1 hs with ⟨O, hO1, hO2, hO3⟩ @@ -397,6 +386,9 @@ theorem uniformized_KL_property {f : E → ℝ} {Ω : Set E} (h_compact : IsComp exact min_le (η x) η_in_image rcases this with ⟨η_min, ηpos, hmin⟩ simp [desingularizing_function] at hφ + have hsum_attach : ∀ z : ℝ, ∑ c ∈ ht2.toFinset.attach, φ (↑c) z = ∑ c ∈ ht2.toFinset, φ c z := by + intro z + simpa using (Finset.sum_attach (s := ht2.toFinset) (f := fun c => φ c z)) -- φ_sum is desingularizing_function have h_special_concave: φ_sum ∈ desingularizing_function η_min := by simp [desingularizing_function] @@ -406,6 +398,7 @@ theorem uniformized_KL_property {f : E → ℝ} {Ω : Set E} (h_compact : IsComp apply convex_Ico intro x xpos y ypos a b apos bpos absum simp [φ_sum] + rw [hsum_attach x, hsum_attach y, hsum_attach (a * x + b * y)] have : ∀ d : ℝ, ∀ x : ℝ, d * ∑ c ∈ ht2.toFinset, φ c x = ∑ c ∈ ht2.toFinset, d * (φ c x) := by intro d x @@ -439,13 +432,15 @@ theorem uniformized_KL_property {f : E → ℝ} {Ω : Set E} (h_compact : IsComp -- exact (hφ c (mem_t_in_Ω c hc)).2.1 x hx1 xleq -- exact t_nonempty have h₃ : φ_sum 0 = 0 := by - simp [φ_sum] + simp [φ_sum, hsum_attach] have : ∀ x ∈ ht2.toFinset, φ x 0 = 0 := by intro x xt exact (hφ x (mem_t_in_Ω x xt)).2.1 apply Finset.sum_eq_zero this have h₄ : ContDiffOn ℝ 1 φ_sum (Ioo 0 η_min) := by - have : φ_sum = (fun c => ∑ x ∈ ht2.toFinset, φ x c) := by ext c; simp [φ_sum] + have : φ_sum = (fun c => ∑ x ∈ ht2.toFinset, φ x c) := by + ext c + simpa [φ_sum] using hsum_attach c rw [this] apply ContDiffOn.sum intro c hc @@ -455,9 +450,10 @@ theorem uniformized_KL_property {f : E → ℝ} {Ω : Set E} (h_compact : IsComp exact hmin c ((Set.Finite.mem_toFinset ht2).1 hc) have h₅ : ContinuousAt φ_sum 0 := by rw [ContinuousAt] - have : φ_sum = (fun c => ∑ x ∈ ht2.toFinset, φ x c) := by ext c; simp [φ_sum] + have : φ_sum = (fun c => ∑ x ∈ ht2.toFinset, φ x c) := by + ext c + simpa [φ_sum] using hsum_attach c rw [this] - simp [φ_sum] apply tendsto_finset_sum intro c hc obtain cont := (hφ c (mem_t_in_Ω c hc)).2.2.2.1 @@ -465,17 +461,23 @@ theorem uniformized_KL_property {f : E → ℝ} {Ω : Set E} (h_compact : IsComp exact cont have h₆ : ∀ (x : ℝ), 0 < x → x < η_min → 0 < deriv φ_sum x := by intro y ypos yleq - have : φ_sum = (fun c => ∑ x ∈ ht2.toFinset, φ x c) := by ext c; simp [φ_sum] + have : φ_sum = (fun c => ∑ x ∈ ht2.toFinset, φ x c) := by + ext c + simpa [φ_sum] using hsum_attach c rw [this] have : deriv (fun c ↦ ∑ x ∈ ht2.toFinset, φ x c) y = ∑ x ∈ ht2.toFinset, deriv (φ x) y := by - apply deriv_sum - intro c hc - have η_inequ: y < η c := by - obtain := hmin c ((Set.Finite.mem_toFinset ht2).1 hc) - linarith - specialize hφ c (mem_t_in_Ω c hc) - obtain contdiff:= ContDiffOn.contDiffAt hφ.2.2.1 (Ioo_mem_nhds ypos η_inequ) - apply ContDiffAt.differentiableAt contdiff (by simp) + have hfun : (fun c => ∑ x ∈ ht2.toFinset, φ x c) = (∑ x ∈ ht2.toFinset, φ x) := by + ext c; exact Eq.symm (Finset.sum_apply c ht2.toFinset φ) + have hderiv : deriv (∑ x ∈ ht2.toFinset, φ x) y = ∑ x ∈ ht2.toFinset, deriv (φ x) y := by + apply deriv_sum + intro c hc + have η_inequ : y < η c := by + obtain := hmin c ((Set.Finite.mem_toFinset ht2).1 hc) + linarith + specialize hφ c (mem_t_in_Ω c hc) + obtain contdiff := ContDiffOn.contDiffAt hφ.2.2.1 (Ioo_mem_nhds ypos η_inequ) + exact ContDiffAt.differentiableAt contdiff (by simp) + simpa [hfun] using hderiv rw [this] apply Finset.sum_pos · intro c hc @@ -486,7 +488,7 @@ theorem uniformized_KL_property {f : E → ℝ} {Ω : Set E} (h_compact : IsComp exact hφ.2.2.2.2 y ypos this · exact t_nonempty exact ⟨h₁,h₃,h₄,h₅,h₆⟩ - have uniform_ball: ∃ ε ∈ Ioi 0, {y| EMetric.infEdist y Ω < ENNReal.ofReal ε} ⊆ ⋃ x ∈ t, O x := by + have uniform_ball: ∃ ε ∈ Ioi 0, {y| Metric.infEDist y Ω < ENNReal.ofReal ε} ⊆ ⋃ x ∈ t, O x := by have union_open : IsOpen (⋃ x ∈ t, O x) := by have : ∀ x ∈ t, IsOpen (O x) := by intro x hx @@ -497,15 +499,15 @@ theorem uniformized_KL_property {f : E → ℝ} {Ω : Set E} (h_compact : IsComp obtain res_thickening := IsCompact.exists_thickening_subset_open h_compact union_open ht3 rcases res_thickening with ⟨ε, ⟨hε, h2⟩⟩ use ε, hε - have : {y| EMetric.infEdist y Ω < ENNReal.ofReal ε} = Metric.thickening ε Ω := by - ext x; exact Metric.mem_thickening_iff_infEdist_lt + have : {y| Metric.infEDist y Ω < ENNReal.ofReal ε} = Metric.thickening ε Ω := by + ext x; exact Metric.mem_thickening_iff_infEDist_lt rwa [this] choose ε uniform_ball using uniform_ball - have : {y| EMetric.infEdist y Ω < ENNReal.ofReal ε} = {y| (EMetric.infEdist y Ω).toReal < ε} := by - ext x; apply ENNReal.lt_ofReal_iff_toReal_lt (Metric.infEdist_ne_top h_nonempty) + have : {y| Metric.infEDist y Ω < ENNReal.ofReal ε} = {y| (Metric.infEDist y Ω).toReal < ε} := by + ext x; apply ENNReal.lt_ofReal_iff_toReal_lt (Metric.infEDist_ne_top h_nonempty) rw [this] at uniform_ball -- There exists one open set in the finite cover - have exist_one_ball: ∀ u ∈ {y| (EMetric.infEdist y Ω).toReal < ε} + have exist_one_ball: ∀ u ∈ {y| (Metric.infEDist y Ω).toReal < ε} ∩ {y | μ < f y ∧ f y < μ + η_min}, ∃ x ∈ t, u ∈ O x := by intro u hu @@ -521,7 +523,7 @@ theorem uniformized_KL_property {f : E → ℝ} {Ω : Set E} (h_compact : IsComp rcases hu with ⟨_,hu2,hu3⟩ -- rcases hu with ⟨_, ⟨hu21 , ⟨hu221, hu222⟩⟩⟩ calc - _ ≥ (ENNReal.ofReal (deriv (φ ui) (f u - μ))) * EMetric.infEdist 0 (subdifferential f u) := by + _ ≥ (ENNReal.ofReal (deriv (φ ui) (f u - μ))) * Metric.infEDist 0 (subdifferential f u) := by have deriv_φ_pos: deriv φ_sum (f u - μ) > 0 := by simp [desingularizing_function] at h_special_concave obtain h_tmp := h_special_concave.2.2.2.2 @@ -530,23 +532,28 @@ theorem uniformized_KL_property {f : E → ℝ} {Ω : Set E} (h_compact : IsComp · linarith [hu3] apply real_geq_ennreal_ofreal_geq simp [φ_sum] + have hfun_attach : + (∑ b ∈ ht2.toFinset.attach, φ (↑b)) = (fun c => ∑ x ∈ ht2.toFinset, φ x c) := by + ext c + simpa [Finset.sum_apply] using hsum_attach c + rw [hfun_attach] have equ₁: deriv (fun c ↦ ∑ x ∈ ht2.toFinset, φ x c) (f u - μ) = ∑ x ∈ ht2.toFinset, deriv (φ x) (f u - μ) := by - apply deriv_sum - intro c hc - have σu_pos : f u - μ > 0 := by linarith [hu2] - have η_inequ: (f u - μ) < η c := by - obtain inequ_ηmin:= hmin c ((Set.Finite.mem_toFinset ht2).1 hc) - linarith - specialize hφ c (mem_t_in_Ω c hc) - obtain contdiff:= ContDiffOn.contDiffAt hφ.2.2.1 (Ioo_mem_nhds σu_pos η_inequ) - apply ContDiffAt.differentiableAt contdiff (by simp) - have equ₂ : deriv (fun c ↦ ∑ x ∈ ht2.toFinset, φ x c) (f u - μ) = - deriv (∑ x ∈ ht2.toFinset, φ x) (f u - μ) := by - have : (fun c ↦ ∑ x ∈ ht2.toFinset, φ x c) = (∑ x ∈ ht2.toFinset, φ x) := by + have hfun : (fun c => ∑ x ∈ ht2.toFinset, φ x c) = (∑ x ∈ ht2.toFinset, φ x) := by ext c; exact Eq.symm (Finset.sum_apply c ht2.toFinset φ) - rw [this] - rw [← equ₂, equ₁] + have hderiv : + deriv (∑ x ∈ ht2.toFinset, φ x) (f u - μ) = ∑ x ∈ ht2.toFinset, deriv (φ x) (f u - μ) := by + apply deriv_sum + intro c hc + have σu_pos : f u - μ > 0 := by linarith [hu2] + have η_inequ : (f u - μ) < η c := by + obtain inequ_ηmin := hmin c ((Set.Finite.mem_toFinset ht2).1 hc) + linarith + specialize hφ c (mem_t_in_Ω c hc) + obtain contdiff := ContDiffOn.contDiffAt hφ.2.2.1 (Ioo_mem_nhds σu_pos η_inequ) + exact ContDiffAt.differentiableAt contdiff (by simp) + simpa [hfun] using hderiv + rw [equ₁] -- have : (∑ x ∈ ht2.toFinset, deriv (φ x) (f u - μ)) ≥ deriv (φ ui) (f u - μ) := by let g x := deriv (φ x) (f u - μ) @@ -575,27 +582,4 @@ theorem uniformized_KL_property {f : E → ℝ} {Ω : Set E} (h_compact : IsComp rw [equ_μ] at h_exist exact h_exist - --- theorem uniformly_KL_property' {f : E → ℝ} {Ω : Set E} (h_compact : IsCompact Ω) --- (h_Ω1 : ∀ x ∈ Ω, KL_point f x) (h_Ω2: is_constant_on f Ω) : --- ∃ ε ∈ Ioi 0, ∃ η ∈ Ioi 0, ∃ φ ∈ desingularizing_function η, ∀ u ∈ Ω , ∀ x ∈ --- {y : E | (EMetric.infEdist y Ω).toReal < ε} ∩ {y | f u < f y ∧ f y < f u + η}, --- (Real.toEReal (deriv φ (f x - f u))) * (EMetric.infEdist 0 (subdifferential f x)) --- ≥ Real.toEReal 1 := by - --- obtain h := uniformly_KL_property h_compact h_Ω1 h_Ω2 --- rcases h with ⟨ε, hε, η, hη, φ, hφ, h⟩ --- use ε, hε, η, hη, φ, hφ --- intro u hu x hx --- by_cases h_empty : EMetric.infEdist 0 (subdifferential f x) = ⊤ --- · rw [h_empty] --- have hderiv: Real.toEReal (deriv φ (f x - f u)) > 0 := by sorry --- have hh: (Real.toEReal (deriv φ (f x - f u))) * (ENNReal.toEReal ⊤) = ⊤ := by --- sorry --- rw [hh] --- simp --- · push_neg at h_empty --- have h_not_bot: EMetric.infEdist 0 (subdifferential f x) ≠ ⊥ := by sorry --- sorry --- -- by_cases h_empty : EMetric.infEdist 0 (subdifferential f x) = ∅ end uniformized_KL diff --git a/Optlib/Function/L1Space.lean b/Optlib/Function/L1Space.lean index d3d0436..00a98c1 100644 --- a/Optlib/Function/L1Space.lean +++ b/Optlib/Function/L1Space.lean @@ -52,22 +52,26 @@ open scoped Pointwise Module /--A noncomputable function mapping basis vectors to the `l₁` space.-/ noncomputable def f : Fin (Module.finrank ℝ α) → PiLp 1 (fun _ : Fin (Module.finrank ℝ α) => ℝ) := - fun i j => if i = j then ‖(Module.finBasis ℝ α) i‖ else 0 + fun i => WithLp.toLp 1 (fun j => if i = j then ‖(Module.finBasis ℝ α) i‖ else 0) -noncomputable def σ := Basis.constrL (Module.finBasis ℝ α) f +noncomputable def σ := (Module.finBasis ℝ α).constrL f -theorem continuous_map_sigma : Continuous (σ (α := α)):= by exact ContinuousLinearMap.continuous σ +theorem continuous_map_sigma : Continuous (σ (α := α)):= by exact (σ (α := α)).continuous theorem sigma_orthogonal_same_index {i j : Fin (Module.finrank ℝ α)} (h : i = j) : - (σ ((Module.finBasis ℝ α) i)) j = ‖(Module.finBasis ℝ α) i‖ := by simp[σ,f,h] + (σ ((Module.finBasis ℝ α) i)) j = ‖(Module.finBasis ℝ α) i‖ := by + rw [σ, (Module.finBasis ℝ α).constrL_basis f i, f] + simp [h] theorem sigma_orthogonal_diff_index {i j : Fin (Module.finrank ℝ α)} (h : i ≠ j) : - (σ ((Module.finBasis ℝ α) i)) j = 0 := by simp[σ,f,h] + (σ ((Module.finBasis ℝ α) i)) j = 0 := by + rw [σ, (Module.finBasis ℝ α).constrL_basis f i, f] + simp [h] theorem sigma_apply_basis (i : Fin (Module.finrank ℝ α)) : σ ((Module.finBasis ℝ α) i) = fun j => if i = j then ‖(Module.finBasis ℝ α) i‖ else 0 := by ext j - simp[σ,f]; + rw [σ, (Module.finBasis ℝ α).constrL_basis f i, f] theorem sigma_norm_apply : ∀ x , ∀ j , ∑ i , (((Module.finBasis ℝ α).repr x) i) • σ ((Module.finBasis ℝ α) i) j = (((Module.finBasis ℝ α).repr x) j) * ‖(Module.finBasis ℝ α) j‖ := by @@ -87,19 +91,12 @@ theorem sigma_norm_apply : ∀ x , ∀ j , ∑ i , (((Module.finBasis ℝ α).r theorem sigma_decompose_apply : ∀ x , ∀ j , (σ x) j = ∑ i , (((Module.finBasis ℝ α).repr x) i) • σ ((Module.finBasis ℝ α) i) j:= by - intro x - rw[← PiLp.ext_iff] + intro x j calc - _ = σ (∑ i , (((Module.finBasis ℝ α).repr x) i) • (Module.finBasis ℝ α) i):= by - congr;exact Eq.symm (Basis.sum_repr (Module.finBasis ℝ α) x) - _ = ∑ i , σ ((((Module.finBasis ℝ α).repr x) i) • (Module.finBasis ℝ α) i):= by - simp only [map_sum, map_smul] - _ = _ := by - ext j; - repeat rw[Finset.sum_apply] - congr - ext x - simp only [map_smul, PiLp.smul_apply, smul_eq_mul] + _ = (σ (∑ i , (((Module.finBasis ℝ α).repr x) i) • (Module.finBasis ℝ α) i)) j := by + rw [(Module.finBasis ℝ α).sum_repr x] + _ = (∑ i , σ ((((Module.finBasis ℝ α).repr x) i) • (Module.finBasis ℝ α) i)) j := by rw [map_sum] + _ = _ := by simp [map_smul] /-- For any element x in the vector space α, the norm of the image of x @@ -109,9 +106,7 @@ in the finite basis representation of x, each multiplied by the norm of the corr -/ theorem l1_norm_eq : ∀ x , ‖σ x‖ = ∑ i , ‖((Module.finBasis ℝ α).repr x) i‖ * ‖(Module.finBasis ℝ α) i‖ := by intro x - rw[PiLp.norm_eq_of_nat 1 (by norm_num)] - simp only [pow_one, Nat.cast_one, ne_eq, one_ne_zero, not_false_eq_true, - div_self, Real.rpow_one] + rw [PiLp.norm_eq_of_L1] congr ext i rw[sigma_decompose_apply x i,← norm_smul,sigma_norm_apply,norm_smul] @@ -127,14 +122,14 @@ b i, including both positive and negative scalings. -/ local notation "b" => (Module.finBasis ℝ α) theorem l1Ball_sub_convexHull{x : α}{r : ℝ}(hr : r > 0)(hn : Module.finrank ℝ α ≠ 0): - σ.toFun ⁻¹' (Metric.ball (σ.toFun x) r) ⊆ + σ ⁻¹' (Metric.ball (σ x) r) ⊆ convexHull ℝ (({x} : Set α) + ((⋃ i , {(r / ‖b i‖) • (b i)}) ∪ (⋃ i ,{- (r / ‖b i‖) • (b i)}))):= by intro x₀ hx₀ - simp[dist_eq_norm] at hx₀ + have hx₀ : ‖σ x₀ - σ x‖ < r := by simpa [Metric.mem_ball, dist_eq_norm] using hx₀ rw[← map_sub] at hx₀ have sum_le_r : ∑ i , ‖(b).equivFun (x₀ - x) i‖ * ‖(b) i‖ / r ≤ 1 := by + change ∑ i, ‖(b).repr (x₀ - x) i‖ * ‖b i‖ / r ≤ 1 rw[← Finset.sum_div] - simp only [Basis.equivFun_apply, Pi.sub_apply] rw[← l1_norm_eq (x₀ - x)] apply le_of_lt apply Bound.div_lt_one_of_pos_of_lt hr hx₀ @@ -142,13 +137,13 @@ theorem l1Ball_sub_convexHull{x : α}{r : ℝ}(hr : r > 0)(hn : Module.finrank let ι := Fin n let ι₀ := Fin (n + 2) let w₀ := (b).equivFun (x₀ - x) - have repr : ∑ i , w₀ i • b i = x₀ - x := Basis.sum_equivFun b (x₀ - x) + have repr : ∑ i , w₀ i • b i = x₀ - x := (b).sum_equivFun (x₀ - x) let w₁ : ι → ℝ := fun i => |(b).equivFun (x₀ - x) i| * ‖b i‖ / r let sum := ∑ i : ι, w₁ i have sum_pos : 1 - sum ≥ 0 := by - simp only [sum,w₁,ge_iff_le, gt_iff_lt,sub_pos,Pi.sub_apply, sub_nonneg] + simp only [sum, w₁, ge_iff_le, sub_nonneg] apply sum_le_r let w : ι₀ → ℝ @@ -177,7 +172,7 @@ theorem l1Ball_sub_convexHull{x : α}{r : ℝ}(hr : r > 0)(hn : Module.finrank have hw₀ : ∀ (i : ι₀), 0 ≤ w i := by intro ⟨i,hi⟩ by_cases h : i < n - · simp only [Pi.sub_apply, h, ↓reduceDIte, ge_iff_le, w, w₁] + · simp only [h, ↓reduceDIte, ge_iff_le, w, w₁] apply div_nonneg _ (le_of_lt hr) apply mul_nonneg apply abs_nonneg @@ -188,41 +183,51 @@ theorem l1Ball_sub_convexHull{x : α}{r : ℝ}(hr : r > 0)(hn : Module.finrank have : n + 1 + 1 = n + 2 := by norm_num rw[← this] rw[Fin.sum_univ_castSucc,Fin.sum_univ_castSucc] - simp only [Fin.coe_castSucc, Fin.is_lt, ↓reduceDIte, Fin.eta, Fin.val_last, lt_self_iff_false, + simp only [Fin.val_castSucc, Fin.is_lt, ↓reduceDIte, Fin.eta, Fin.val_last, lt_self_iff_false, add_lt_iff_neg_left, not_lt_zero'] have : ∑ x : Fin n, w₁ x = sum := rfl rw[this] linarith have hz : ∀ (i : ι₀), z i ∈ ((⋃ i , {(r / ‖b i‖) • (b i)}) ∪ (⋃ i ,{- (r / ‖b i‖) • (b i)})) := by - intro i + rintro ⟨i, hi⟩ simp only [dite_eq_ite, z] - by_cases h₁ : (i : ℕ) = n + 1 - · simp[h₁] - simp only [h₁, ↓reduceIte] - by_cases h₂ : (i : ℕ) = n - · simp[h₂] - simp only [h₂, ↓reduceIte, add_right_inj] - let use_i : ι := ⟨i ,lem_i i.2 h₁ h₂⟩ - simp only [↓reduceDIte] - let a := (b).equivFun (x₀ - x) use_i - rcases lt_trichotomy a 0 with ha | ha | ha + by_cases h₁ : i = n + 1 · right - have : (b).equivFun (x₀ - x) use_i ≠ 0 := by linarith - simp at this - rw[sign_neg ha] - simp[this, ↓reduceIte] - · left; - simp only [a] at ha - rw[ha,sign_zero] - simp; + refine Set.mem_iUnion.2 ⟨fin0, ?_⟩ + simp [h₁] + by_cases h₂ : i = n · left - rw[sign_pos ha] + refine Set.mem_iUnion.2 ⟨fin0, ?_⟩ + simp [h₂] + let use_i : ι := ⟨i, lem_i hi h₁ h₂⟩ + have hsub : (b).equivFun (x₀ - x) use_i = ((b).repr x₀) use_i - ((b).repr x) use_i := by simp + rcases lt_trichotomy ((b).equivFun (x₀ - x) use_i) 0 with ha | ha | ha + · right + refine Set.mem_iUnion.2 ⟨use_i, ?_⟩ + have hdlt : ((b).repr x₀) use_i - ((b).repr x) use_i < 0 := by simpa [hsub] using ha + have hdne : ((b).repr x₀) use_i - ((b).repr x) use_i ≠ 0 := ne_of_lt hdlt + rw [Set.mem_singleton_iff] + simp [h₁, h₂, use_i, hdne] + rw [sign_neg hdlt] + simp + · left + refine Set.mem_iUnion.2 ⟨use_i, ?_⟩ + have hd0 : ((b).repr x₀) use_i - ((b).repr x) use_i = 0 := by simpa [hsub] using ha + rw [Set.mem_singleton_iff] + simp [h₁, h₂, use_i, hd0] + · left + refine Set.mem_iUnion.2 ⟨use_i, ?_⟩ + have hdgt : 0 < ((b).repr x₀) use_i - ((b).repr x) use_i := by simpa [hsub] using ha + have hdne : ((b).repr x₀) use_i - ((b).repr x) use_i ≠ 0 := ne_of_gt hdgt + rw [Set.mem_singleton_iff] + simp [h₁, h₂, use_i, hdne] + rw [sign_pos hdgt] simp have bi_pos : ∀ i : ι , ‖b i‖ ≠ 0 := by intro i refine norm_ne_zero_iff.mpr ?_ - exact Basis.ne_zero b i + exact (b).ne_zero i have hx : ∑ i : ι₀, w i • z i = x₀ - x := by rw[Fin.sum_univ_castSucc,Fin.sum_univ_castSucc] @@ -239,10 +244,10 @@ theorem l1Ball_sub_convexHull{x : α}{r : ℝ}(hr : r > 0)(hn : Module.finrank refine Nat.ne_of_lt ?h refine Nat.lt_succ_of_lt i.2 have h₂ : (i : ℕ) ≠ n := Ne.symm (Nat.ne_of_lt' i.2) - simp only [neg_smul, dite_eq_ite, Fin.coe_castSucc, h₁, ↓reduceIte, h₂, + simp only [neg_smul, dite_eq_ite, Fin.val_castSucc, h₁, ↓reduceIte, h₂, Fin.eta, z] have : w₁ i • ((SignType.sign ((b).equivFun (x₀ - x) i)) * (r / ‖b i‖)) = w₀ i := by - simp only [Pi.sub_apply, smul_eq_mul, w₁, w₀] + simp only [smul_eq_mul, w₁, w₀] calc _ = |(b).equivFun (x₀ - x) i| * (‖b i‖ / r) * (SignType.sign ((b).equivFun (x₀ - x) i)) * (r / ‖b i‖) := by rw[← mul_div] @@ -269,27 +274,14 @@ theorem l1Ball_sub_convexHull{x : α}{r : ℝ}(hr : r > 0)(hn : Module.finrank theorem sigma_is_injective : Function.Injective σ (α := α) := by intro x y h - rw[← sub_eq_zero] - rw[← sub_eq_zero, ← map_sub] at h - let z := x - y - let n := Module.finrank ℝ α - let bs := Module.finBasis ℝ α - have hz : z = ∑ i : Fin n , (bs.repr z i)• bs i := Eq.symm (Basis.sum_repr bs z) - change σ z = 0 at h - rw[hz] at h - simp at h - have hi :∀ i , (∑ x : Fin n, (bs.repr z) x • σ (bs x)) i = (bs.repr z) i * ‖(Module.finBasis ℝ α) i‖:= by + rw [← sub_eq_zero] + have hz : σ (x - y) = 0 := by rw [map_sub, h, sub_self] + have hsum : ∑ i, ‖((Module.finBasis ℝ α).repr (x - y)) i‖ * ‖(Module.finBasis ℝ α) i‖ = 0 := by + rw [← l1_norm_eq (x - y), hz, norm_zero] + have hrepr : ∀ i, ((Module.finBasis ℝ α).repr (x - y)) i = 0 := by intro i - repeat rw[Finset.sum_apply]; - simp only [PiLp.smul_apply] - rw[sigma_norm_apply] - show z = 0 - rw[hz] - apply Fintype.sum_eq_zero (fun a => (bs.repr z) a • bs a) - intro i - rw[smul_eq_zero] - left - have : ‖(Module.finBasis ℝ α) i‖ ≠ 0:= norm_ne_zero_iff.mpr $ Basis.ne_zero (Module.finBasis ℝ α) i - have h1 : (bs.repr z) i * ‖(Module.finBasis ℝ α) i‖ = 0 := by - rw[← hi , h, PiLp.zero_apply] - apply eq_zero_of_ne_zero_of_mul_right_eq_zero this h1 + have hzero := (Finset.sum_eq_zero_iff_of_nonneg (s := Finset.univ) fun j _ ↦ by positivity).mp hsum i (by simp) + exact abs_eq_zero.mp <| eq_zero_of_ne_zero_of_mul_right_eq_zero (norm_ne_zero_iff.mpr <| (Module.finBasis ℝ α).ne_zero i) hzero + apply (Module.finBasis ℝ α).repr.injective + ext i + simpa using hrepr i diff --git a/Optlib/Function/Lsmooth.lean b/Optlib/Function/Lsmooth.lean index 1ff68dd..6b86e87 100644 --- a/Optlib/Function/Lsmooth.lean +++ b/Optlib/Function/Lsmooth.lean @@ -76,12 +76,12 @@ theorem lipschitz_continuous_upper_bound {E : Type*} apply HasDerivAt.add · apply HasDerivAt.const_add · apply hasDerivAt_mul_const - · have : l * ‖y - x‖ ^ 2 * t = (2 * t) * (l * ‖y - x‖ ^ 2 / 2) := by field_simp; ring_nf + · have : l * ‖y - x‖ ^ 2 * t = (2 * t) * (l * ‖y - x‖ ^ 2 / 2) := by field_simp rw [this]; apply HasDerivAt.mul_const obtain hd := HasDerivAt.pow (n := 2) (hasDerivAt_id' t) simp at hd; exact hd suffices g 1 ≤ u 1 by - simp [u, g, u', LL, g'] at this + simp [u, g, LL, g'] at this rw [map_sub]; linarith apply image_le_of_deriv_right_le_deriv_boundary (a := 0) (b := 2) · exact HasDerivAt.continuousOn (fun x _ ↦ gderiv x) @@ -106,7 +106,7 @@ open InnerProductSpace Set variable {f : E → ℝ} {a : ℝ} {f' : E → E} {l : NNReal} -theorem lower_to_lipschitz (h₂ : ∀ x y, inner (f' x - f' y) (x - y) ≥ 1 / l * ‖f' x - f' y‖ ^ 2) +theorem lower_to_lipschitz (h₂ : ∀ x y, inner ℝ (f' x - f' y) (x - y) ≥ 1 / l * ‖f' x - f' y‖ ^ 2) (hl : l > 0) : LipschitzWith l f' := by rw [lipschitzWith_iff_norm_sub_le] intro x y @@ -120,18 +120,17 @@ theorem lower_to_lipschitz (h₂ : ∀ x y, inner (f' x - f' y) (x - y) ≥ 1 / apply real_inner_le_norm _ = (1 / l * ‖f' x - f' y‖) * (l * ‖x - y‖) := by field_simp - ring_nf have H₂ : 1 / l > 0 := by apply one_div_pos.mpr hl cases lt_or_ge 0 (‖f' x - f' y‖) case inl h => apply le_of_mul_le_mul_left H₁ apply mul_pos _ h - · simp [H₂, hl] + · simpa using H₂ case inr h => apply le_trans h apply mul_nonneg - · simp [hl] + · simp apply norm_nonneg _ end @@ -145,16 +144,16 @@ variable {f : E → ℝ} {a : ℝ} {f' : E → E} {xm : E} {l : NNReal} theorem lipschitz_continuos_upper_bound' (h₁ : ∀ x₁ : E, HasGradientAt f (f' x₁) x₁) (h₂ : LipschitzWith l f') : - ∀ x y : E, f y ≤ f x + inner (f' x) (y - x) + l / 2 * ‖y - x‖ ^ 2 := by + ∀ x y : E, f y ≤ f x + inner ℝ (f' x) (y - x) + l / 2 * ‖y - x‖ ^ 2 := by intro x y rw [lipschitzWith_iff_norm_sub_le] at h₂ let g := fun x ↦ (toDual ℝ E) (f' x) have h' : ∀ x : E, HasFDerivAt f (g x) x := h₁ - have equiv : ∀ x y : E, inner (f' x) (y - x) = (g x) (y - x) := by + have equiv : ∀ x y : E, inner ℝ (f' x) (y - x) = (g x) (y - x) := by intro x y - rw [InnerProductSpace.toDual_apply] + rw [InnerProductSpace.toDual_apply_apply] have h₂' : LipschitzWith l g := by - simp only [g, equiv] + simp only [g] rw [lipschitzWith_iff_norm_sub_le] intro x y have h1 : ∀ x : E, ‖(toDual ℝ E) x‖ =‖x‖ := by @@ -177,12 +176,17 @@ theorem lipschitz_minima_lower_bound (h₁ : ∀ x : E, HasGradientAt f (f' x) x have eq : f xm ≤ f x - 1 / (2 * l) * ‖f' x‖ ^ 2 := by calc _ ≤ f y := by apply min - _ ≤ f x + inner (f' x) (y - x) + l / 2 * ‖y - x‖ ^ 2 := by + _ ≤ f x + inner ℝ (f' x) (y - x) + l / 2 * ‖y - x‖ ^ 2 := by apply lipschitz_continuos_upper_bound' h₁ h₂ _ = f x - 1 / (2 * l) * ‖f' x‖ ^ 2 := by rw [add_assoc]; rw [sub_eq_add_neg (f x), add_left_cancel_iff.2] - field_simp [y]; rw [← real_inner_self_eq_norm_sq, ← real_inner_self_eq_norm_sq] - rw [inner_smul_right, inner_smul_left, inner_smul_right] + have hyx : y - x = - ((1 : ℝ) / l : ℝ) • f' x := by simp [y] + rw [hyx, real_inner_smul_right, real_inner_self_eq_norm_sq] + have hlR : (0 : ℝ) < (l : ℝ) := by exact_mod_cast hl + have hnorm : ‖-(1 / (l : ℝ)) • f' x‖ ^ 2 = (1 / (l : ℝ)) ^ 2 * ‖f' x‖ ^ 2 := by + rw [norm_smul] + simp [pow_two, mul_assoc, mul_left_comm, mul_comm] + rw [hnorm] field_simp; ring_nf linarith @@ -202,27 +206,32 @@ theorem lipschitz_to_lnorm_sub_convex (hs : Convex ℝ s) ConvexOn ℝ s (fun x ↦ l / 2 * ‖x‖ ^ 2 - f x) := by rw [lipschitzOnWith_iff_norm_sub_le] at h₂ let g' : E → E := fun x ↦ l.1 • x - f' x - have H₂ : ∀ x ∈ s, ∀ y ∈ s, inner (g' x - g' y) (x - y) ≥ (0 : ℝ) := by + have H₂ : ∀ x ∈ s, ∀ y ∈ s, inner ℝ (g' x - g' y) (x - y) ≥ (0 : ℝ) := by intro x hx y hy + have hxy : + inner ℝ (g' x - g' y) (x - y) = l.1 * ‖x - y‖ ^ 2 - inner ℝ (f' x - f' y) (x - y) := by + calc + inner ℝ (g' x - g' y) (x - y) = inner ℝ (l.1 • (x - y) - (f' x - f' y)) (x - y) := by + simp [g', sub_eq_add_neg, add_assoc, add_left_comm, add_comm] + _ = l.1 * inner ℝ (x - y) (x - y) - inner ℝ (f' x - f' y) (x - y) := by + rw [inner_sub_left, inner_smul_left] + simp + _ = l.1 * ‖x - y‖ ^ 2 - inner ℝ (f' x - f' y) (x - y) := by + rw [real_inner_self_eq_norm_sq] calc - _ = l.1 * (inner (x - y) (x - y)) - inner (f' x - f' y) (x - y) := by - simp [g'] - rw [← sub_add, sub_right_comm, sub_add, inner_sub_left, ← smul_sub, inner_smul_left] - simp only [conj_trivial] - _ = l * ‖x - y‖ ^ 2 - inner (f' x - f' y) (x - y) := by - simp; left - apply real_inner_self_eq_norm_sq - _ ≥ l * ‖x - y‖ ^ 2 - ‖f' x - f' y‖ * ‖x - y‖ := by - apply add_le_add; linarith - simp - apply real_inner_le_norm - _ ≥ l * ‖x - y‖ ^ 2 - l * ‖x - y‖ ^ 2 := by - simp - rw [pow_two, ← mul_assoc] - apply mul_le_mul (h₂ hx hy); linarith; apply norm_nonneg - apply mul_nonneg _ (norm_nonneg _) - simp [hl] - _ = 0 := by simp + _ = l.1 * ‖x - y‖ ^ 2 - inner ℝ (f' x - f' y) (x - y) := hxy + _ = l * ‖x - y‖ ^ 2 - inner ℝ (f' x - f' y) (x - y) := by rfl + _ ≥ l * ‖x - y‖ ^ 2 - ‖f' x - f' y‖ * ‖x - y‖ := by + apply add_le_add; linarith + simp + apply real_inner_le_norm + _ ≥ l * ‖x - y‖ ^ 2 - l * ‖x - y‖ ^ 2 := by + simp + rw [pow_two, ← mul_assoc] + apply mul_le_mul (h₂ hx hy); linarith; apply norm_nonneg + apply mul_nonneg _ (norm_nonneg _) + exact le_of_lt hl + _ = 0 := by simp have H₃ : ∀ x ∈ s, HasGradientAt (fun x ↦ l / 2 * ‖x‖ ^ 2 - f x) (g' x) x := by intro x hx have u₂ := HasGradientAt.const_smul (gradient_norm_sq_eq_two_self x) ((l / (2 : ℝ)) : ℝ) @@ -241,9 +250,9 @@ theorem lipschitz_to_lnorm_sub_convex (hs : Convex ℝ s) theorem convex_to_lower {l : ℝ} (h₁ : ∀ x : E, HasGradientAt f (f' x) x) (h₂ : ConvexOn ℝ Set.univ (fun x ↦ l / 2 * ‖x‖ ^ 2 - f x)) (lp : l > 0) (hfun: ConvexOn ℝ Set.univ f) (x : E) (y : E) : - inner (f' x - f' y) (x - y) ≥ 1 / l * ‖f' x - f' y‖ ^ 2 := by + inner ℝ (f' x - f' y) (x - y) ≥ 1 / l * ‖f' x - f' y‖ ^ 2 := by rw [ConvexOn] at hfun - let fs : E → (E → ℝ) := fun s => (fun x => f x - inner (f' s) x) + let fs : E → (E → ℝ) := fun s => (fun x => f x - inner ℝ (f' s) x) have hfunconvex : ∀ s : E, ConvexOn ℝ Set.univ (fs s) := by intro s rw [ConvexOn] @@ -281,22 +290,22 @@ theorem convex_to_lower {l : ℝ} (h₁ : ∀ x : E, HasGradientAt f (f' x) x) rw [inner_add_right, real_inner_smul_right, real_inner_smul_right] calc _ = (l / 2) * ‖a • x₁ + b • y₁‖ ^ 2 - f (a • x₁ + b • y₁) + - (a * inner (f' s) x₁ + b * inner (f' s) y₁) := by ring_nf + (a * inner ℝ (f' s) x₁ + b * inner ℝ (f' s) y₁) := by ring_nf _ ≤ a • (l / 2 * ‖x₁‖ ^ 2 - f x₁) + b • (l / 2 * ‖y₁‖ ^ 2 - f y₁) + - (a * inner (f' s) x₁ + b * inner (f' s) y₁) := by apply add_le_add_right h₂' - _ = a • (l / 2 * ‖x₁‖ ^ 2 - (f x₁ - inner (f' s) x₁)) + b • - (l / 2 * ‖y₁‖ ^ 2 - (f y₁ - inner (f' s) y₁)) := by simp; ring_nf + (a * inner ℝ (f' s) x₁ + b * inner ℝ (f' s) y₁) := by linarith [h₂'] + _ = a • (l / 2 * ‖x₁‖ ^ 2 - (f x₁ - inner ℝ (f' s) x₁)) + b • + (l / 2 * ‖y₁‖ ^ 2 - (f y₁ - inner ℝ (f' s) y₁)) := by simp; ring_nf let gs' := fun s ↦ (fun z ↦ l • z - (fs' s z)) have hgx₁ : ∀ s x : E, HasGradientAt (gs s) ((gs' s) x) x := by intro s z apply HasGradientAt.sub (gradient_of_const_mul_norm l z) (hfconx₁ s z) - have hgx₂ : ∀ s z₁ z₂ : E, (gs s) z₁ + inner (gs' s z₁) (z₂ - z₁) ≤ gs s z₂ := by + have hgx₂ : ∀ s z₁ z₂ : E, (gs s) z₁ + inner ℝ (gs' s z₁) (z₂ - z₁) ≤ gs s z₂ := by intro s z₁ z₂ apply Convex_first_order_condition' (hgx₁ s z₁) (hgxconvex s) · simp only [Set.mem_univ] · simp only [Set.mem_univ] have hfx₂ : ∀ (s x y₁ : E), (fs s) y₁ ≤ fs s x + - inner (fs' s x) (y₁ - x) + l / 2 * ‖y₁ - x‖ ^ 2 := by + inner ℝ (fs' s x) (y₁ - x) + l / 2 * ‖y₁ - x‖ ^ 2 := by intro s z₁ z₂ simp only [fs, fs'] rcases hgx₂ s z₁ z₂ with hgx₂' @@ -304,30 +313,30 @@ theorem convex_to_lower {l : ℝ} (h₁ : ∀ x : E, HasGradientAt f (f' x) x) have t₈ : gs s z₂ = l / 2 * ‖z₂‖ ^ 2 - fs s z₂ := by rfl have t₉ : gs' s z₁ = l • z₁ - fs' s z₁ := by rfl rw [t₇, t₈, t₉] at hgx₂' - have t₁₀ : fs s z₂ + (l / 2 * ‖z₁‖ ^ 2 - fs s z₁ + inner (l • z₁ - fs' s z₁) (z₂ - z₁)) + have t₁₀ : fs s z₂ + (l / 2 * ‖z₁‖ ^ 2 - fs s z₁ + inner ℝ (l • z₁ - fs' s z₁) (z₂ - z₁)) ≤ l / 2 * ‖z₂‖ ^ 2 := by apply add_le_of_le_sub_left hgx₂' have t₁₁ : fs s z₂ ≤ l / 2 * ‖z₂‖ ^ 2 - (l / 2 * ‖z₁‖ ^ 2 - fs s z₁ + - inner (l • z₁ - fs' s z₁) (z₂ - z₁)) := by + inner ℝ (l • z₁ - fs' s z₁) (z₂ - z₁)) := by rw [add_comm] at t₁₀ apply le_sub_left_of_add_le t₁₀ simp only [] at t₁₁; rw [← sub_add (l / 2 * ‖z₁‖ ^ 2) _ _] at t₁₁ calc _ ≤ l / 2 * ‖z₂‖ ^ 2 - (l / 2 * ‖z₁‖ ^ 2 - f z₁ + - inner (f' s) z₁ + inner (l • z₁ - fs' s z₁) (z₂ - z₁)) := by apply t₁₁ - _ = l / 2 * ‖z₂‖ ^ 2 -(l / 2 * ‖z₁‖ ^ 2 - f z₁ + inner (f' s) z₁ + - (l * (inner z₁ z₂ - ‖z₁‖ ^ 2) - inner (f' z₁ - f' s) (z₂ - z₁))) := by + inner ℝ (f' s) z₁ + inner ℝ (l • z₁ - fs' s z₁) (z₂ - z₁)) := by apply t₁₁ + _ = l / 2 * ‖z₂‖ ^ 2 -(l / 2 * ‖z₁‖ ^ 2 - f z₁ + inner ℝ (f' s) z₁ + + (l * (inner ℝ z₁ z₂ - ‖z₁‖ ^ 2) - inner ℝ (f' z₁ - f' s) (z₂ - z₁))) := by rw [inner_sub_left, inner_smul_left] - simp; rw [inner_sub_right, real_inner_self_eq_norm_sq];left ; simp - _ = f z₁ - inner (f' s) z₁ + inner (f' z₁ - f' s) (z₂ - z₁) + - l / 2 * (‖z₂‖ ^ 2 - 2 * inner z₂ z₁ + ‖z₁‖ ^ 2) := by + simp; rw [inner_sub_right, real_inner_self_eq_norm_sq] + _ = f z₁ - inner ℝ (f' s) z₁ + inner ℝ (f' z₁ - f' s) (z₂ - z₁) + + l / 2 * (‖z₂‖ ^ 2 - 2 * inner ℝ z₂ z₁ + ‖z₁‖ ^ 2) := by field_simp; ring_nf; rw [real_inner_comm] - _ = f z₁ - inner (f' s) z₁ + inner (f' z₁ - f' s) (z₂ - z₁) + l / 2 * ‖z₂ - z₁‖ ^ 2 := by + _ = f z₁ - inner ℝ (f' s) z₁ + inner ℝ (f' z₁ - f' s) (z₂ - z₁) + l / 2 * ‖z₂ - z₁‖ ^ 2 := by rw [← norm_sub_sq_real] have hfs₃ : ∀ s : E, IsMinOn (fs s) univ s := by intro s apply first_order_convex (hfconx₁ s) (hfunconvex s) - simp only [fs, fs', sub_self] + simp only [fs', sub_self] have hfy₃ : IsMinOn (fs y) _ y := hfs₃ y have hfx₄ : fs x x ≤ fs x y - 1 / (2 * l) * ‖fs' x y‖ ^ 2 := by have : fs x x ≤ fs x (y - (1 / l) • fs' x y) := by @@ -338,7 +347,7 @@ theorem convex_to_lower {l : ℝ} (h₁ : ∀ x : E, HasGradientAt f (f' x) x) apply le_trans this rcases hfx₂ x y (y - (1 / l) • fs' x y) with hfx₂' calc - _ ≤ fs x y + inner (fs' x y) (y - (1 / l) • fs' x y - y) + _ ≤ fs x y + inner ℝ (fs' x y) (y - (1 / l) • fs' x y - y) + l / 2 * ‖y - (1 / l) • fs' x y - y‖ ^ 2 := by apply hfx₂' _ = fs x y - 1 / (2 * l) * ‖fs' x y‖ ^ 2 := by have : y - (1 / l) • fs' x y - y = - (1 / l) • fs' x y := by simp @@ -354,14 +363,14 @@ theorem convex_to_lower {l : ℝ} (h₁ : ∀ x : E, HasGradientAt f (f' x) x) apply le_trans this rcases hfx₂ y x (x - (1 / l) • fs' y x) with hfy₂' calc - _ ≤ fs y x + inner (fs' y x) (x - (1 / l) • fs' y x - x) + _ ≤ fs y x + inner ℝ (fs' y x) (x - (1 / l) • fs' y x - x) + l / 2 * ‖x - (1 / l) • fs' y x - x‖ ^ 2 := by apply hfy₂' _ = fs y x - 1 / (2 * l) * ‖fs' y x‖ ^ 2 := by have : x - (1 / l) • fs' y x - x = - (1 / l) • fs' y x := by simp rw [this, real_inner_smul_right] rw [← real_inner_self_eq_norm_sq, ← real_inner_self_eq_norm_sq, real_inner_smul_right] rw [real_inner_smul_left]; field_simp; ring - have hh₁: (1 / (2 * l)) * ‖f' x - f' y‖ ^ 2 ≤ f y - f x - inner (f' x) (y - x) := by + have hh₁: (1 / (2 * l)) * ‖f' x - f' y‖ ^ 2 ≤ f y - f x - inner ℝ (f' x) (y - x) := by calc (1 / (2 * l)) * ‖f' x - f' y‖ ^ 2 ≤ fs x y - fs x x := by have : f' x - f' y = - fs' x y := by @@ -371,20 +380,20 @@ theorem convex_to_lower {l : ℝ} (h₁ : ∀ x : E, HasGradientAt f (f' x) x) have : ‖- fs' x y‖ = ‖fs' x y‖ :=by apply norm_neg rw [this] linarith [hfx₄] - _ = f y - f x - inner (f' x) (y - x) := by - have t₄: fs x y = f y - inner (f' x) y := by rfl - have t₅: fs x x = f x - inner (f' x) x := by rfl + _ = f y - f x - inner ℝ (f' x) (y - x) := by + have t₄: fs x y = f y - inner ℝ (f' x) y := by rfl + have t₅: fs x x = f x - inner ℝ (f' x) x := by rfl rw [t₄,t₅,inner_sub_right] ring - have hh₂: (1 / (2 * l)) * ‖f' x - f' y‖ ^ 2 ≤ f x - f y - inner (f' y) (x - y) := by + have hh₂: (1 / (2 * l)) * ‖f' x - f' y‖ ^ 2 ≤ f x - f y - inner ℝ (f' y) (x - y) := by calc (1 / (2 * l)) * ‖f' x - f' y‖ ^ 2 ≤ fs y x -fs y y := by - have : f' x - f' y = fs' y x := by simp + have : f' x - f' y = fs' y x := by simp [fs'] rw [this] linarith [hfy₄] - _ = f x - f y - inner (f' y) (x - y) := by - have t₄' : fs y y = f y - inner (f' y) y := by rfl - have t₅' : fs y x = f x - inner (f' y) x := by rfl + _ = f x - f y - inner ℝ (f' y) (x - y) := by + have t₄' : fs y y = f y - inner ℝ (f' y) y := by rfl + have t₅' : fs y x = f x - inner ℝ (f' y) x := by rfl rw [t₄', t₅', inner_sub_right] ring calc @@ -392,24 +401,24 @@ theorem convex_to_lower {l : ℝ} (h₁ : ∀ x : E, HasGradientAt f (f' x) x) field_simp rw [← mul_two,mul_comm] ring - _ ≤ (f y - f x - inner (f' x) (y - x)) + (f x - f y - inner (f' y) (x - y)) := by + _ ≤ (f y - f x - inner ℝ (f' x) (y - x)) + (f x - f y - inner ℝ (f' y) (x - y)) := by apply add_le_add hh₁ hh₂ - _ = inner (f' x - f' y) (x - y) := by + _ = inner ℝ (f' x - f' y) (x - y) := by rw [inner_sub_left] - have t₆ : (inner (f' x) (y - x) : ℝ) = - (inner (f' x) (x - y) : ℝ) := by + have t₆ : (inner ℝ (f' x) (y - x) : ℝ) = - (inner ℝ (f' x) (x - y) : ℝ) := by rw [inner_sub_right, inner_sub_right]; ring rw[t₆]; ring theorem lipschitz_to_lower (h₁ : ∀ x, HasGradientAt f (f' x) x) (h₂ : LipschitzWith l f') (hfun : ConvexOn ℝ Set.univ f) (hl : l > 0) : - ∀ x y, inner (f' x - f' y) (x - y) ≥ 1 / l * ‖f' x - f' y‖ ^ 2 := by + ∀ x y, inner ℝ (f' x - f' y) (x - y) ≥ 1 / l * ‖f' x - f' y‖ ^ 2 := by obtain convex : ConvexOn ℝ Set.univ (fun x ↦ l / 2 * ‖x‖ ^ 2 - f x) := lipschitz_to_lnorm_sub_convex convex_univ (fun x _ => h₁ x) (lipschitzOnWith_univ.mpr h₂) hl exact convex_to_lower h₁ convex hl hfun theorem lower_iff_lipschitz (h₁ : ∀ x, HasGradientAt f (f' x) x) (hfun: ConvexOn ℝ Set.univ f) (hl : l > 0) : LipschitzWith l f' ↔ - ∀ x y, inner (f' x - f' y) (x - y) ≥ 1 / l * ‖f' x - f' y‖ ^ 2 := + ∀ x y, inner ℝ (f' x - f' y) (x - y) ≥ 1 / l * ‖f' x - f' y‖ ^ 2 := ⟨fun h ↦ lipschitz_to_lower h₁ h hfun hl, fun h ↦ lower_to_lipschitz h hl⟩ theorem lipshictz_iff_lnorm_sub_convex (h₁ : ∀ x, HasGradientAt f (f' x) x) @@ -424,7 +433,7 @@ theorem lipshictz_iff_lnorm_sub_convex (h₁ : ∀ x, HasGradientAt f (f' x) x) theorem lower_iff_lnorm_sub_convex (h₁ : ∀ x, HasGradientAt f (f' x) x) (hfun: ConvexOn ℝ Set.univ f) (hl : l > 0) : ConvexOn ℝ univ (fun x ↦ l / 2 * ‖x‖ ^ 2 - f x) - ↔ ∀ x y, inner (f' x - f' y) (x - y) ≥ 1 / l * ‖f' x - f' y‖ ^ 2 := by + ↔ ∀ x y, inner ℝ (f' x - f' y) (x - y) ≥ 1 / l * ‖f' x - f' y‖ ^ 2 := by rw [← lipshictz_iff_lnorm_sub_convex h₁ hfun hl] rw [lower_iff_lipschitz h₁ hfun hl] diff --git a/Optlib/Function/MinimaClosedFunction.lean b/Optlib/Function/MinimaClosedFunction.lean index 44c01dc..50a4662 100644 --- a/Optlib/Function/MinimaClosedFunction.lean +++ b/Optlib/Function/MinimaClosedFunction.lean @@ -6,7 +6,7 @@ Authors: Wanyi He import Mathlib.Analysis.Convex.Basic import Mathlib.Topology.MetricSpace.Basic import Mathlib.Topology.MetricSpace.Bounded -import Mathlib.Topology.Semicontinuous +import Mathlib.Topology.Semicontinuity.Basic import Mathlib.Topology.Sequences /-! @@ -16,6 +16,8 @@ variable {E F : Type*} open Set Bornology Topology Filter TopologicalSpace +class LinearOrderedRing (R : Type*) extends Ring R, LinearOrder R, IsStrictOrderedRing R + section preparation variable {E F : Type*} [CompleteLinearOrder F] @@ -33,15 +35,11 @@ private lemma l0 {f : E → F}(y : F) (h : (f ⁻¹' Set.Iic y).Nonempty) : · exact Exists.intro x xeq · exact Exists.intro x xeq have h₁ : sInf {f x | x ∈ f ⁻¹' Set.Iic y} ≤ sInf {f x | x ∈ (f ⁻¹' Set.Iic y)ᶜ} := by - apply sInf_le_sInf_of_forall_exists_le - intro y' ynsub - rcases h with ⟨x', xsub⟩; use f x' - constructor - · exact ⟨x', xsub, rfl⟩ - rcases ynsub with ⟨x, xnsub, xeq⟩ - apply le_trans xsub (Eq.trans_ge xeq (le_of_lt _)) - simp only [← Set.preimage_setOf_eq, ← Set.preimage_compl, Set.compl_Iic, Set.Ioi_def] at xnsub - assumption + rcases h with ⟨x', xsub⟩ + refine le_sInf ?_ + rintro y' ⟨x, xnsub, rfl⟩ + exact le_trans (sInf_le ⟨x', xsub, rfl⟩) + (le_trans (show f x' ≤ y from xsub) (le_of_lt (lt_of_not_ge xnsub))) calc sInf {f x | x ∈ f ⁻¹' Set.Iic y} = sInf {f x | x ∈ f ⁻¹' Set.Iic y} ⊓ sInf {f x | x ∈ (f ⁻¹' Set.Iic y)ᶜ} := @@ -65,32 +63,24 @@ variable [FirstCountableTopology E] [FirstCountableTopology F] /- If a premiage of `f` is nonempty and compact, then its minimum point set `{x | IsMinOn f univ x}` is nonempty -/ +omit [DenselyOrdered F] [TopologicalSpace F] [OrderTopology F] [FirstCountableTopology E] + [FirstCountableTopology F] in theorem IsMinOn.of_isCompact_preimage (hf : LowerSemicontinuous f) {y : F} (h1 : (f ⁻¹' Set.Iic y).Nonempty) (h2 : IsCompact (f ⁻¹' Set.Iic y)) : ∃ x, IsMinOn f univ x := by - have hs : Set.Nonempty {f x | x ∈ (f ⁻¹' Set.Iic y)} := by - rcases h1 with ⟨x, xsub⟩ - exact Exists.intro (f x) (Exists.intro x ⟨xsub, rfl⟩) - have hs' : BddBelow {f x | x ∈ (f ⁻¹' Set.Iic y)} := - OrderBot.bddBelow {x | ∃ x_1 ∈ f ⁻¹' Iic y, f x_1 = x} - rcases exists_seq_tendsto_sInf hs hs' with ⟨fx, _, cfx, fxs⟩ - choose x xsub xeq using fxs - rcases IsCompact.tendsto_subseq h2 xsub with ⟨x', xsub', k, mono, cxk⟩ - have cfxk : Tendsto (f ∘ x ∘ k) atTop (𝓝 (sInf {f x | x ∈ (f ⁻¹' Set.Iic y)})) := by - have xkeq : ∀ (n : ℕ), (f ∘ x ∘ k) n = (fx ∘ k) n := fun n => xeq <| k n - rw [tendsto_congr xkeq] - apply Tendsto.comp cfx (StrictMono.tendsto_atTop mono) - have inepi : (x', sInf {f x | x ∈ (f ⁻¹' Set.Iic y)}) ∈ {p : E × F | f p.1 ≤ p.2} := - (IsClosed.isSeqClosed (LowerSemicontinuous.isClosed_epigraph hf)) - (fun n => Eq.le (by rfl)) (Tendsto.prod_mk_nhds cxk cfxk) - use x'; intro xx _ - apply le_of_eq_of_le - · apply le_antisymm inepi (sInf_le (Exists.intro x' ⟨xsub', rfl⟩)) - · apply le_of_eq_of_le (l0 y h1) (sInf_le (by use xx)) + obtain ⟨x, hx, hxmin⟩ := + LowerSemicontinuousOn.exists_isMinOn h1 h2 (hf.lowerSemicontinuousOn _) + have hxmin' : ∀ z ∈ f ⁻¹' Set.Iic y, f x ≤ f z := by simpa [isMinOn_iff] using hxmin + refine ⟨x, fun z _ => ?_⟩ + by_cases hz : z ∈ f ⁻¹' Set.Iic y + · exact hxmin' z hz + · exact le_trans hx (le_of_lt (lt_of_not_ge hz)) /- If a premiage of `f` is nonempty and compact, then its minimum point set `{x | IsMinOn f univ x}` is compact -/ +omit [DenselyOrdered F] [TopologicalSpace F] [OrderTopology F] [FirstCountableTopology E] + [FirstCountableTopology F] in theorem IsCompact_isMinOn_of_isCompact_preimage (hf : LowerSemicontinuous f) {y : F} (h1 : (f ⁻¹' Set.Iic y).Nonempty) (h2 : IsCompact (f ⁻¹' Set.Iic y)) : IsCompact {x | IsMinOn f univ x} := by @@ -129,10 +119,9 @@ theorem isMinOn_unique {x y : E} (hf' : strong_quasi f 𝕜) have eqone : a + (1 - a) = 1 := add_sub_cancel a 1 have lta' : 0 < 1 - a := sub_pos_of_lt alt have h : f (a • x + (1 - a) • y) < f y := by - apply Eq.trans_gt (max_eq_right (hx trivial)) - apply hf' neq lta lta' eqone + simpa [max_eq_right (hx trivial)] using hf' neq lta lta' eqone simp only [isMinOn_iff] at hy specialize hy (a • x + (1 - a) • y) trivial - apply not_le_of_lt h hy + exact (not_le_of_gt h) hy end diff --git a/Optlib/Function/Proximal.lean b/Optlib/Function/Proximal.lean index 2ad865f..704230c 100644 --- a/Optlib/Function/Proximal.lean +++ b/Optlib/Function/Proximal.lean @@ -3,7 +3,7 @@ Copyright (c) 2024 Shengyang Xu, Chenyi Li. All rights reserved. Released under Apache 2.0 license as described in the file LICENSE. Authors: Shengyang Xu, Chenyi Li -/ -import Mathlib.Topology.Semicontinuous +import Mathlib.Topology.Semicontinuity.Basic import Mathlib.Analysis.Convex.Basic import Optlib.Convex.Subgradient import Optlib.Function.Lsmooth @@ -66,9 +66,11 @@ theorem prox_set_compact_of_lowersemi (f : E → ℝ) (hc : LowerSemicontinuous have hg : LowerSemicontinuous g := by apply LowerSemicontinuous.add hc apply Continuous.lowerSemicontinuous - apply continuous_iff_continuousOn_univ.2 - apply HasGradientAt.continuousOn - intro u _; apply gradient_of_sq u + have hcont : ContinuousOn (fun u : E ↦ ‖u - x‖ ^ 2 / 2) univ := by + apply HasGradientAt.continuousOn + intro u _ + exact gradient_of_sq (x := x) u + exact (continuousOn_univ.mp hcont) have S_bddbelow : BddBelow ImS := by use L; rw [mem_lowerBounds] rintro gy ⟨y0, _, gyeq⟩; rw [← gyeq]; exact boundg y0 @@ -101,7 +103,7 @@ theorem prox_set_compact_of_lowersemi (f : E → ℝ) (hc : LowerSemicontinuous rw [add_right_comm, mul_comm]; simp; linarith [sq_nonneg b] calc 0 ≤ a ^ 2 / 2 := by linarith [sq_nonneg a] - _ ≤ b * 2 / 2 := by rw [div_le_div_right]; exact h1; linarith + _ ≤ b * 2 / 2 := by nlinarith [h1] _ ≤ b + 1 := by simp linarith apply aux ieq @@ -119,10 +121,12 @@ theorem prox_set_compact_of_lowersemi (f : E → ℝ) (hc : LowerSemicontinuous apply Tendsto.comp cfx (StrictMono.tendsto_atTop mono) have inepi : (x', sInf ImS) ∈ epi := by let p := fun c ↦ (((fun n ↦ xn n) ∘ k) c, (g ∘ xn ∘ k) c) - have pnin : ∀ c : ℕ, p c ∈ epi := by simp [epi] + have pnin : ∀ c : ℕ, p c ∈ epi := by + intro c + simp [p, epi] apply IsClosed.isSeqClosed epi_closed pnin show Tendsto (fun c ↦ (((fun n ↦ xn n) ∘ k) c, (g ∘ xn ∘ k) c)) atTop (𝓝 (x', sInf ImS)) - apply Tendsto.prod_mk_nhds cxk cfxk + simpa [nhds_prod_eq] using (Tendsto.prodMk cxk cfxk) have minima_ieq : g x' ≤ sInf ImS := inepi have minima : ∀ w : E, g x' ≤ g w := by intro w @@ -134,7 +138,7 @@ theorem prox_set_compact_of_lowersemi (f : E → ℝ) (hc : LowerSemicontinuous linarith · have gwnin : g x < g w := by simp [g, S] at hw; simp [g]; exact hw - have gxin : g x ∈ ImS := by use x; simp [g, ImS, S] + have gxin : g x ∈ ImS := by use x; simp [g, S] have legw : sInf ImS ≤ g w := by rw [Real.sInf_le_iff S_bddbelow neImS] intro _ epos; use g x; use gxin; linarith @@ -170,44 +174,45 @@ theorem prox_set_compact_of_convex (f : E → ℝ) (hc : ContinuousOn f univ) have subd: ∃ z : E, Nonempty (SubderivAt f z) := by use x; apply SubderivAt.nonempty hconv hc have hc : LowerSemicontinuous f := - Continuous.lowerSemicontinuous (continuous_iff_continuousOn_univ.mpr hc) + Continuous.lowerSemicontinuous <| by simpa [continuousOn_univ] using hc rcases subd with ⟨z, a, ain⟩ rw [← mem_SubderivAt, HasSubgradientAt] at ain let g := fun u ↦ f u + ‖u - x‖ ^ 2 / 2 let epi := {p : (E × ℝ) | g p.1 ≤ p.2} - have second_lower_bound (y : E) : g y ≥ f z + inner a (y - z) + ‖y - x‖ ^ 2 / 2 := by + have second_lower_bound (y : E) : g y ≥ f z + inner ℝ a (y - z) + ‖y - x‖ ^ 2 / 2 := by simp [g] specialize ain y; linarith - have lower_bound (y : E) : f z + inner a (x - z) - ‖a‖ ^ 2 / 2 ≤ g y := by + have lower_bound (y : E) : f z + inner ℝ a (x - z) - ‖a‖ ^ 2 / 2 ≤ g y := by have : y - z = x - z + (y - x) := by simp specialize second_lower_bound y rw [this, inner_add_right, ← add_assoc, add_assoc] at second_lower_bound - have : 0 ≤ ‖a‖ ^ 2 / 2 + inner a (y - x) + ‖y - x‖ ^ 2 / 2 := by + have : 0 ≤ ‖a‖ ^ 2 / 2 + inner ℝ a (y - x) + ‖y - x‖ ^ 2 / 2 := by field_simp; rw [mul_comm, ← norm_add_sq_real] - apply div_nonneg (sq_nonneg ‖a + (y - x)‖) - norm_num + nlinarith [sq_nonneg ‖a + (y - x)‖] calc - f z + inner a (x - z) - ‖a‖ ^ 2 / 2 ≤ f z + inner a (x - z) - ‖a‖ ^ 2 / 2 + - (‖a‖ ^ 2 / 2 + inner a (y - x) + ‖y - x‖ ^ 2 / 2) := le_add_of_nonneg_right this - _ = f z + inner a (x - z) + (inner a (y - x) + ‖y - x‖ ^ 2 / 2) := by ring + f z + inner ℝ a (x - z) - ‖a‖ ^ 2 / 2 ≤ f z + inner ℝ a (x - z) - ‖a‖ ^ 2 / 2 + + (‖a‖ ^ 2 / 2 + inner ℝ a (y - x) + ‖y - x‖ ^ 2 / 2) := le_add_of_nonneg_right this + _ = f z + inner ℝ a (x - z) + (inner ℝ a (y - x) + ‖y - x‖ ^ 2 / 2) := by ring _ ≤ g y := second_lower_bound have hg : LowerSemicontinuous g := by apply LowerSemicontinuous.add hc apply Continuous.lowerSemicontinuous - apply continuous_iff_continuousOn_univ.2 - apply HasGradientAt.continuousOn - intro u _; apply gradient_of_sq u + have hcont : ContinuousOn (fun u : E ↦ ‖u - x‖ ^ 2 / 2) univ := by + apply HasGradientAt.continuousOn + intro u _ + exact gradient_of_sq (x := x) u + exact (continuousOn_univ.mp hcont) have epi_closed : IsClosed epi := by apply bounded_lowersemicontinuous_to_epi_closed · exact lowerSemicontinuousOn_univ_iff.2 hg - use (f z + inner a (x - z) - ‖a‖ ^ 2 / 2) + use (f z + inner ℝ a (x - z) - ‖a‖ ^ 2 / 2) let S := {y : E| g y ≤ g z} have eq : S = (g ⁻¹' Set.Iic (g z)) := by constructor let ImS := {g y | y ∈ S} have neImS : Set.Nonempty ImS := by use g z; simp [ImS, S]; use z have S_bddbelow : BddBelow ImS := by - use (f z + inner a (x - z) - ‖a‖ ^ 2 / 2) + use (f z + inner ℝ a (x - z) - ‖a‖ ^ 2 / 2) rw [mem_lowerBounds] rintro gy ⟨y0, _, gyeq⟩ rw [← gyeq]; exact lower_bound y0 @@ -222,22 +227,24 @@ theorem prox_set_compact_of_convex (f : E → ℝ) (hc : ContinuousOn f univ) simp [S] at uin apply mem_closedBall_iff_norm.2 have norm_bound: ‖u - (x - a)‖ ≤ ‖z - (x - a)‖ + 2 := by - have ieq : f z + inner a (u - z) + ‖u - x‖ ^ 2 / 2 ≤ f z + ‖z - x‖ ^ 2 / 2 + 1 := by + have ieq : f z + inner ℝ a (u - z) + ‖u - x‖ ^ 2 / 2 ≤ f z + ‖z - x‖ ^ 2 / 2 + 1 := by calc - f z + inner a (u - z) + ‖u - x‖ ^ 2 / 2 ≤ g u := second_lower_bound u + f z + inner ℝ a (u - z) + ‖u - x‖ ^ 2 / 2 ≤ g u := second_lower_bound u _ ≤ f z + ‖z - x‖ ^ 2 / 2 := uin _ ≤ f z + ‖z - x‖ ^ 2 / 2 + 1 := by linarith rw [add_assoc, add_assoc, add_le_add_iff_left] at ieq - have eq : inner a (u - z) + ‖u - x‖ ^ 2 / 2 = - (‖u - (x - a)‖ ^ 2 - ‖a‖ ^ 2 + 2 * inner (x - z) a) / 2 := by + have eq : inner ℝ a (u - z) + ‖u - x‖ ^ 2 / 2 = + (‖u - (x - a)‖ ^ 2 - ‖a‖ ^ 2 + 2 * inner ℝ (x - z) a) / 2 := by field_simp; rw [← sub_add, norm_add_sq_real]; ring_nf rw [add_assoc, ← add_mul, ← inner_add_left, add_comm, real_inner_comm]; simp rw [eq] at ieq have ieq2 : ‖u - (x - a)‖ ^ 2 ≤ ‖z - (x - a)‖ ^ 2 + 2 := by - field_simp at ieq; rw [div_le_div_right, sub_add, sub_le_iff_le_add] at ieq + field_simp at ieq + rw [sub_add, sub_le_iff_le_add] at ieq rw [add_right_comm, add_comm (‖z - x‖ ^ 2), norm_sub_rev z x] at ieq rw [real_inner_comm, ← norm_sub_sq_real, ← sub_add a, sub_add_comm] at ieq - rw [sub_add] at ieq; exact ieq; norm_num + rw [sub_add] at ieq + exact ieq have : |‖z - (x - a)‖ + 2| = ‖z - (x - a)‖ + 2 := by apply abs_of_pos; apply add_pos_of_nonneg_of_pos (norm_nonneg (z - (x - a))) simp @@ -260,10 +267,12 @@ theorem prox_set_compact_of_convex (f : E → ℝ) (hc : ContinuousOn f univ) apply Tendsto.comp cfx (StrictMono.tendsto_atTop mono) have inepi : (x', sInf ImS) ∈ epi := by let p := fun c ↦ (((fun n ↦ xn n) ∘ k) c, (g ∘ xn ∘ k) c) - have pnin : ∀ c : ℕ, p c ∈ epi := by simp [epi] + have pnin : ∀ c : ℕ, p c ∈ epi := by + intro c + simp [p, epi] apply IsClosed.isSeqClosed epi_closed pnin show Tendsto (fun c ↦ (((fun n ↦ xn n) ∘ k) c, (g ∘ xn ∘ k) c)) atTop (𝓝 (x', sInf ImS)) - apply Tendsto.prod_mk_nhds cxk cfxk + simpa [nhds_prod_eq] using (Tendsto.prodMk cxk cfxk) have minima_ieq : g x' ≤ sInf ImS := inepi have minima : ∀ w : E, g x' ≤ g w := by intro w @@ -275,7 +284,7 @@ theorem prox_set_compact_of_convex (f : E → ℝ) (hc : ContinuousOn f univ) linarith · have gwnin : g z < g w := by simp [S] at hw; simp [g]; exact hw - have gzin : g z ∈ ImS := by use z; simp [ImS, S] + have gzin : g z ∈ ImS := by use z; simp [S] have legw : sInf ImS ≤ g w := by rw [Real.sInf_le_iff S_bddbelow neImS] intro _ epos; use g z; use gzin; linarith @@ -359,39 +368,16 @@ theorem prox_unique_of_convex (f : E → ℝ) (x : E) (hfun : ConvexOn ℝ univ -/ lemma convex_of_norm_sq {s : Set E} (x : E) (conv: Convex ℝ s) : ConvexOn ℝ s (fun (u : E) ↦ ‖u - x‖ ^ 2 / 2) := by - rw [ConvexOn]; use conv + let g : E → ℝ := fun u ↦ ‖u - x‖ ^ 2 / 2 + have hstrong : StrongConvexOn univ (1 : ℝ) g := by + simpa [g] using strongconvex_of_convex_add_sq (fun _ : E ↦ (0 : ℝ)) x + (by simpa [ConvexOn] using (convex_univ : Convex ℝ (univ : Set E))) + have hstrong0 : StrongConvexOn univ (0 : ℝ) g := by + exact StrongConvexOn.mono (show (0 : ℝ) ≤ 1 by norm_num) hstrong + have huniv : ConvexOn ℝ univ g := (strongConvexOn_zero.mp hstrong0) + refine ⟨conv, ?_⟩ intro y _ z _ a b anneg bnneg absum1 - field_simp - have eq1 : a • y + b • z - x = a • (y - x) + b • (z - x) := by - rw [smul_sub, smul_sub, add_comm_sub, sub_sub, ← add_smul, add_comm b a] - rw [absum1, one_smul, ← add_sub] - rw [eq1] - have ieq1 (u v : E) : ‖a • u + b • v‖ ^ 2 / 2 ≤ (a * ‖u‖ ^ 2 + b * ‖v‖ ^ 2) / 2 := by - rw [div_le_div_right, norm_add_sq_real, add_comm, ← add_assoc] - rw [norm_smul, norm_smul, mul_pow, mul_pow]; simp - nth_rw 3 [← mul_one a]; nth_rw 3 [← one_mul b] - rw [← absum1]; ring_nf; rw [add_right_comm] - apply add_le_add_right - rw [add_comm]; apply add_le_add_right - calc - inner (a • u) (b • v) * 2 ≤ ‖a • u‖ * ‖b • v‖ * 2 := by - rw [mul_le_mul_right] - apply real_inner_le_norm - simp - _ = a * b * (2 * ‖u‖ * ‖v‖) := by - rw [norm_smul, norm_smul]; simp - rw [abs_of_nonneg anneg, abs_of_nonneg bnneg]; ring - _ ≤ a * b * (‖u‖ ^ 2 + ‖v‖ ^ 2) := by - by_cases a * b > 0 - · rw [mul_le_mul_left] - apply two_mul_le_add_pow_two - linarith - · have ieq2 : 0 ≤ a * b := by apply mul_nonneg anneg bnneg - have ieq3 : 0 = a * b := by linarith - rw [← ieq3]; simp - _ = b * ‖v‖ ^ 2 * a + b * a * ‖u‖ ^ 2 := by ring - simp - apply ieq1 + exact huniv.2 (by simp) (by simp) anneg bnneg absum1 /- Sub-derivative at x equal to sub-derivative within univ at x @@ -419,26 +405,30 @@ theorem proximal_shift (a : E) {t : ℝ} (tnz : t ≠ 0) (f : E → ℝ): simp constructor · intro cond y + have htsq : 0 < t ^ 2 := sq_pos_iff.mpr tnz specialize cond (t⁻¹ • (y - a)) - rw [← smul_assoc, smul_eq_mul, mul_inv_cancel₀] at cond + rw [← smul_assoc, smul_eq_mul, mul_inv_cancel₀ tnz] at cond simp at cond calc t ^ 2 * f (t • z + a) + ‖t • z - t • x‖ ^ 2 / 2 = t ^ 2 * (f (t • z + a) + ‖z - x‖ ^ 2 / 2) := by - rw [← smul_sub, norm_smul, mul_pow, mul_add]; field_simp + rw [← smul_sub, norm_smul, mul_pow, Real.norm_eq_abs, sq_abs, mul_add] + ring_nf _ ≤ t ^ 2 * (f y + ‖t⁻¹ • (y - a) - x‖ ^ 2 / 2) := by - rw [mul_le_mul_left]; use cond; rw [sq_pos_iff]; use tnz + exact mul_le_mul_of_nonneg_left cond (le_of_lt htsq) _ = t ^ 2 * f y + ‖t • ((1 / t) • (y - a) - x)‖ ^ 2 / 2 := by - rw [mul_add, norm_smul, mul_pow]; field_simp + rw [mul_add, norm_smul, mul_pow, Real.norm_eq_abs, sq_abs] + ring_nf _ = t ^ 2 * f y + ‖y - (t • x + a)‖ ^ 2 / 2 := by - rw [smul_sub, ← smul_assoc, smul_eq_mul, ← sub_sub, sub_right_comm]; field_simp - use tnz + rw [smul_sub, ← smul_assoc, smul_eq_mul, ← sub_sub, sub_right_comm] + field_simp [tnz] + simp · intro cond y specialize cond (t • y + a) rw [← smul_sub, norm_smul, mul_pow] at cond; simp at cond rw [← smul_sub, norm_smul, mul_pow] at cond; simp at cond rw [mul_div_assoc, ← mul_add, mul_div_assoc, ← mul_add] at cond - rw [mul_le_mul_left] at cond; use cond; rw [sq_pos_iff]; use tnz + exact (mul_le_mul_iff_of_pos_left (sq_pos_iff.mpr tnz)).1 cond /- relation of proximal between a function and its scale @@ -452,25 +442,31 @@ theorem proximal_scale {t : ℝ} (tpos : 0 < t) (f : E → ℝ): constructor · intro cond y specialize cond (t • y) - have tsq : 0 < t ^ 2 := by field_simp - rw [← mul_le_mul_left tsq] - calc - t ^ 2 * (t⁻¹ * f (t⁻¹ • z) + ‖t⁻¹ • z - t⁻¹ • x‖ ^ 2 / 2) = - t * f (t⁻¹ • z) + ‖z - x‖ ^ 2 / 2 := by - rw [← smul_sub, norm_smul, mul_pow, mul_add, pow_two, ← mul_assoc, mul_assoc _ _ (t⁻¹)] - rw [mul_inv_cancel₀, mul_div_assoc, ← mul_assoc]; simp - rw [← pow_two, mul_inv_cancel₀]; repeat simp; repeat linarith - _ ≤ t * f (t⁻¹ • t • y) + ‖t • y - x‖ ^ 2 / 2 := cond - _ = t ^ 2 * (t⁻¹ * f y) + ‖t • (y - t⁻¹ • x)‖ ^ 2 / 2 := by - rw [pow_two t, ← mul_assoc, mul_assoc _ _ (t⁻¹), mul_inv_cancel₀] - rw [← smul_assoc, smul_eq_mul, inv_mul_cancel₀]; simp - rw [smul_sub, ← smul_assoc, smul_eq_mul, mul_inv_cancel₀]; simp; repeat linarith - _ = t ^ 2 * (t⁻¹ * f y + ‖y - t⁻¹ • x‖ ^ 2 / 2) := by - rw [mul_add, norm_smul, mul_pow]; field_simp + have tsq : 0 < t ^ 2 := by nlinarith [tpos] + have hmul : + t ^ 2 * (t⁻¹ * f (t⁻¹ • z) + ‖t⁻¹ • z - t⁻¹ • x‖ ^ 2 / 2) ≤ + t ^ 2 * (t⁻¹ * f y + ‖y - t⁻¹ • x‖ ^ 2 / 2) := by + calc + t ^ 2 * (t⁻¹ * f (t⁻¹ • z) + ‖t⁻¹ • z - t⁻¹ • x‖ ^ 2 / 2) = + t * f (t⁻¹ • z) + ‖z - x‖ ^ 2 / 2 := by + rw [← smul_sub, norm_smul, mul_pow, mul_add, pow_two, ← mul_assoc, mul_assoc _ _ (t⁻¹)] + rw [mul_inv_cancel₀, mul_div_assoc, ← mul_assoc]; simp + rw [← pow_two, mul_inv_cancel₀]; repeat simp; repeat linarith + _ ≤ t * f (t⁻¹ • t • y) + ‖t • y - x‖ ^ 2 / 2 := cond + _ = t ^ 2 * (t⁻¹ * f y) + ‖t • (y - t⁻¹ • x)‖ ^ 2 / 2 := by + rw [pow_two t, ← mul_assoc, mul_assoc _ _ (t⁻¹), mul_inv_cancel₀] + rw [← smul_assoc, smul_eq_mul, inv_mul_cancel₀]; simp + rw [smul_sub, ← smul_assoc, smul_eq_mul, mul_inv_cancel₀]; simp; repeat linarith + _ = t ^ 2 * (t⁻¹ * f y + ‖y - t⁻¹ • x‖ ^ 2 / 2) := by + rw [mul_add, norm_smul, mul_pow, Real.norm_eq_abs, sq_abs] + ring_nf + exact (mul_le_mul_iff_of_pos_left tsq).1 hmul · intro cond y specialize cond (t⁻¹ • y) - have tsq : 0 < t ^ 2 := by field_simp - rw [← mul_le_mul_left tsq] at cond + have tsq : 0 < t ^ 2 := by nlinarith [tpos] + have cond : t ^ 2 * (t⁻¹ * f (t⁻¹ • z) + ‖t⁻¹ • z - t⁻¹ • x‖ ^ 2 / 2) ≤ + t ^ 2 * (t⁻¹ * f (t⁻¹ • y) + ‖t⁻¹ • y - t⁻¹ • x‖ ^ 2 / 2) := + (mul_le_mul_iff_of_pos_left tsq).2 cond calc t * f (t⁻¹ • z) + ‖z - x‖ ^ 2 / 2 = t ^ 2 * (t⁻¹ * f (t⁻¹ • z) + ‖t⁻¹ • z - t⁻¹ • x‖ ^ 2 / 2) := by @@ -479,7 +475,8 @@ theorem proximal_scale {t : ℝ} (tpos : 0 < t) (f : E → ℝ): rw [← pow_two, mul_inv_cancel₀]; repeat simp; repeat linarith _ ≤ t ^ 2 * (t⁻¹ * f (t⁻¹ • y) + ‖t⁻¹ • y - t⁻¹ • x‖ ^ 2 / 2) := cond _ = t ^ 2 * (t⁻¹ * f (t⁻¹ • y)) + ‖t • (t⁻¹ • y - t⁻¹ • x)‖ ^ 2 / 2 := by - rw [mul_add, norm_smul, mul_pow]; field_simp + rw [mul_add, norm_smul, mul_pow, Real.norm_eq_abs, sq_abs] + ring_nf _ = t * f (t⁻¹ • y) + ‖y - x‖ ^ 2 / 2 := by rw [pow_two t, ← mul_assoc, mul_assoc _ _ (t⁻¹), mul_inv_cancel₀] rw [smul_sub, ← smul_assoc, smul_eq_mul, mul_inv_cancel₀]; simp @@ -489,26 +486,27 @@ theorem proximal_scale {t : ℝ} (tpos : 0 < t) (f : E → ℝ): change of proximal when added a linear components -/ theorem proximal_add_linear (a : E) (f : E → ℝ): - ∀ z : E, prox_prop (fun x ↦ f x + inner a x) x z ↔ + ∀ z : E, prox_prop (fun x ↦ f x + inner ℝ a x) x z ↔ prox_prop f (x - a) z := by intro z rw [prox_prop, prox_prop, isMinOn_univ_iff, isMinOn_univ_iff] have aux (v : E) : ‖v - (x - a)‖ ^ 2 / 2 = - ‖v - x‖ ^ 2 / 2 + inner a v + (‖a‖ ^ 2 / 2 - inner a x) := by + ‖v - x‖ ^ 2 / 2 + inner ℝ a v + (‖a‖ ^ 2 / 2 - inner ℝ a x) := by rw [← sub_add, norm_add_sq_real, real_inner_comm, inner_sub_right]; ring_nf constructor · intro cond y specialize cond y - rw [aux, aux, add_comm _ (inner a z), add_comm _ (inner a y)] + rw [aux, aux, add_comm _ (inner ℝ a z), add_comm _ (inner ℝ a y)] linarith · intro cond y specialize cond y - rw [aux, aux, add_comm _ (inner a z), add_comm _ (inner a y)] at cond + rw [aux, aux, add_comm _ (inner ℝ a z), add_comm _ (inner ℝ a y)] at cond linarith /- change of proximal when added a square components -/ +set_option maxHeartbeats 1000000 in theorem proximal_add_sq (a : E) {l : ℝ} (lpos : 0 < l) (f : E → ℝ): ∀ z : E, prox_prop (fun x ↦ f x + l / 2 * ‖x - a‖ ^ 2) x z ↔ prox_prop ((1 / (l + 1)) • f) ((1 / (l + 1)) • (x + l • a)) z := by @@ -525,16 +523,30 @@ theorem proximal_add_sq (a : E) {l : ℝ} (lpos : 0 < l) (f : E → ℝ): rw [add_sub_right_comm]; simp; rw [mul_sub, ← add_sub_right_comm, ← add_sub_assoc] nth_rw 3 [← one_mul (‖v‖ ^ 2)]; rw [← add_mul, ← mul_assoc l, mul_comm l 2, sub_sub] rw [mul_assoc, ← mul_add, ← inner_smul_right _ _ l, ← inner_add_right] - field_simp; rw [mul_comm]; simp + field_simp [lpos.ne']; ring_nf; norm_num constructor · intro cond y specialize cond y - rw [aux, aux]; simp; rw [← mul_add, ← mul_add, mul_le_mul_left] - linarith [cond]; simp; linarith + let c : ℝ := ((l + 1)⁻¹ * ‖x + l • a‖ ^ 2 - ‖x‖ ^ 2 - l * ‖a‖ ^ 2) / 2 + have hplus : 0 < l + 1 := by linarith + have hshift : f z + l / 2 * ‖z - a‖ ^ 2 + ‖z - x‖ ^ 2 / 2 + c ≤ + f y + l / 2 * ‖y - a‖ ^ 2 + ‖y - x‖ ^ 2 / 2 + c := by + simpa [add_assoc, add_left_comm, add_comm] using add_le_add_right cond c + have hmul : (l + 1)⁻¹ * (f z + l / 2 * ‖z - a‖ ^ 2 + ‖z - x‖ ^ 2 / 2 + c) ≤ + (l + 1)⁻¹ * (f y + l / 2 * ‖y - a‖ ^ 2 + ‖y - x‖ ^ 2 / 2 + c) := + mul_le_mul_of_nonneg_left hshift (le_of_lt (inv_pos.mpr hplus)) + rw [aux, aux] + simpa [Pi.smul_apply, c, mul_add, add_assoc, add_left_comm, add_comm] using hmul · intro cond y specialize cond y - rw [aux, aux] at cond; simp at cond; rw [← mul_add, ← mul_add, mul_le_mul_left] at cond - linarith [cond]; simp; linarith + let c : ℝ := ((l + 1)⁻¹ * ‖x + l • a‖ ^ 2 - ‖x‖ ^ 2 - l * ‖a‖ ^ 2) / 2 + rw [aux, aux] at cond + have hplus : 0 < l + 1 := by linarith + have hmul := mul_le_mul_of_nonneg_left cond (le_of_lt hplus) + have hshift : f z + l / 2 * ‖z - a‖ ^ 2 + ‖z - x‖ ^ 2 / 2 + c ≤ + f y + l / 2 * ‖y - a‖ ^ 2 + ‖y - x‖ ^ 2 / 2 + c := by + simpa [Pi.smul_apply, c, hplus.ne', mul_add, mul_assoc, add_assoc, add_left_comm, add_comm] using hmul + exact (add_le_add_iff_right c).1 hshift end properties @@ -553,12 +565,8 @@ theorem prox_iff_subderiv (f : E → ℝ) (hfun : ConvexOn ℝ univ f) : let g := fun u ↦ ‖u - x‖ ^ 2 / 2 have hg : ConvexOn ℝ Set.univ g := by apply convex_of_norm_sq x (convex_univ) have hcg : ContinuousOn g univ := by - simp [g]; apply ContinuousOn.div - apply ContinuousOn.pow _ - · apply ContinuousOn.norm - apply ContinuousOn.sub continuousOn_id continuousOn_const - · apply continuousOn_const - · simp + intro u _ + simpa [g] using (gradient_of_sq (x := x) u).continuousAt.continuousWithinAt show 0 ∈ SubderivAt (f + g) u ↔ x - u ∈ SubderivAt f u have : SubderivAt (f + g) u = SubderivAt (g + f) u := by unfold SubderivAt; ext z; rw [Set.mem_setOf, Set.mem_setOf]; @@ -638,15 +646,23 @@ theorem prox_iff_subderiv_smul (f : E → ℝ) {t : ℝ} (hfun : ConvexOn ℝ un · intro cond y specialize cond y; simp at cond rw [inner_smul_left]; simp - rw [← mul_le_mul_left ht]; ring_nf; field_simp - exact cond + have hcond : t * (f u + t⁻¹ * inner ℝ (x - u) (y - u)) ≤ t * f y := by + have hEq : t * (f u + t⁻¹ * inner ℝ (x - u) (y - u)) = t * f u + inner ℝ (x - u) (y - u) := by + field_simp [ht.ne'] + rw [hEq] + exact cond + exact (mul_le_mul_iff_of_pos_left ht).1 hcond · intro cond y - specialize cond y; rw [inner_smul_left] at cond; field_simp at cond - simp - have hrect : 0 < t⁻¹ := by - simp; linarith - rw [← mul_le_mul_left hrect]; ring_nf; field_simp - exact cond + specialize cond y + rw [inner_smul_left] at cond + have cond' : f u + t⁻¹ * inner ℝ (x - u) (y - u) ≤ f y := by + simpa [sub_eq_add_neg, add_assoc, add_left_comm, add_comm] using cond + have hmul : t * (f u + t⁻¹ * inner ℝ (x - u) (y - u)) ≤ t * f y := + (mul_le_mul_iff_of_pos_left ht).2 cond' + have hEq : t * (f u + t⁻¹ * inner ℝ (x - u) (y - u)) = t * f u + inner ℝ (x - u) (y - u) := by + field_simp [ht.ne'] + rw [hEq] at hmul + exact hmul exact gconv end diff --git a/Optlib/Optimality/Constrained_Problem.lean b/Optlib/Optimality/Constrained_Problem.lean index 03cba1d..febcef8 100644 --- a/Optlib/Optimality/Constrained_Problem.lean +++ b/Optlib/Optimality/Constrained_Problem.lean @@ -5,12 +5,13 @@ Authors: Chenyi Li, Shengyang Xu, Yuxuan Wu -/ import Mathlib.Analysis.Convex.Cone.Basic import Mathlib.Analysis.Calculus.LocalExtr.Basic -import Mathlib.Analysis.NormedSpace.HahnBanach.Separation -import Mathlib.Data.Matrix.Rank -import Mathlib.LinearAlgebra.FiniteDimensional +import Mathlib.Analysis.LocallyConvex.Separation +import Mathlib.LinearAlgebra.Matrix.Rank +import Mathlib.LinearAlgebra.FiniteDimensional.Basic import Mathlib.Analysis.Calculus.Implicit import Mathlib.Analysis.Calculus.MeanValue import Mathlib.Analysis.InnerProductSpace.Calculus +import Mathlib.Analysis.Calculus.TangentCone.Seq import Optlib.Differential.Calculation import Optlib.Convex.Farkas import Optlib.Differential.Lemmas @@ -46,7 +47,7 @@ variable {τ σ : Finset ℕ} The equality constraints are a set of functions from a Hilbert space to ℝ. The inequality constraints are a set of functions from a Hilbert space to ℝ. -/ -structure Constrained_OptimizationProblem (E : Type _) (τ σ : Finset ℕ) := +structure Constrained_OptimizationProblem (E : Type _) (τ σ : Finset ℕ) where (domain : Set E) (equality_constraints : (i : ℕ) → E → ℝ) (inequality_constraints : (j : ℕ) → E → ℝ) @@ -145,11 +146,11 @@ section linear variable {E : Type _} [NormedAddCommGroup E] [InnerProductSpace ℝ E] -def IsLinear (f : E → ℝ) : Prop := ∃ a, ∃ b, f = fun x ↦ (inner x a : ℝ) + b +def IsLinear (f : E → ℝ) : Prop := ∃ a, ∃ b, f = fun x ↦ (inner ℝ x a : ℝ) + b -lemma IsLinear_iff (f : E → ℝ) : IsLinear f ↔ ∃ a b, f = fun x ↦ (inner x a : ℝ) + b := by rfl +lemma IsLinear_iff (f : E → ℝ) : IsLinear f ↔ ∃ a b, f = fun x ↦ (inner ℝ x a : ℝ) + b := by rfl -lemma IsLinear_iff' (f : E → ℝ) : IsLinear f ↔ ∃ a b, f = fun x ↦ (inner a x : ℝ) + b := by +lemma IsLinear_iff' (f : E → ℝ) : IsLinear f ↔ ∃ a b, f = fun x ↦ (inner ℝ a x : ℝ) + b := by constructor repeat rintro ⟨a, b, rfl⟩; exact ⟨a, b, by ext x; simp; exact real_inner_comm _ _⟩ @@ -199,190 +200,11 @@ theorem linearized_feasible_directions_convex (point : E) : lemma posTangentCone_localmin_inner_pos {f : E → ℝ} {loc : E} (hl : IsLocalMinOn f p.FeasSet loc) (hf : DifferentiableAt ℝ f loc) : ∀ v ∈ posTangentConeAt p.FeasSet loc, ⟪gradient f loc, v⟫_ℝ ≥ (0 : ℝ) := by - intro v vt; rw [posTangentConeAt] at vt; simp at vt - rcases vt with ⟨c, d, ⟨a, ha⟩, ⟨vt1, vt2⟩⟩ - by_contra proneg; push_neg at proneg - rw [IsLocalMinOn, IsMinFilter, eventually_iff_exists_mem] at hl - rcases hl with ⟨s, ⟨hs, hs2⟩⟩ - rw [nhdsWithin] at hs - rcases Metric.mem_nhdsWithin_iff.mp hs with ⟨ε, ⟨εpos, εball⟩⟩ - let s1 := Metric.ball loc ε ∩ p.FeasSet - have hmin : ∀ y ∈ s1, f loc ≤ f y := fun y yin => hs2 y (εball yin) - let z := fun n ↦ loc + d n - have hzd : ∀ n, d n = z n - loc := fun _ => eq_sub_of_add_eq' rfl - rw [real_inner_comm] at proneg - have hcp : ∀ᶠ (n : ℕ) in atTop, c n > 0 := by - rw [Filter.tendsto_atTop] at vt1 - specialize vt1 (1 : ℝ) - apply Filter.Eventually.mp vt1 - apply Filter.Eventually.of_forall - intro n hn; linarith - have hz3 : ∀ᶠ (n : ℕ) in atTop, (1 / c n) > 0 := by - apply Filter.Eventually.mp hcp - apply Filter.Eventually.of_forall - intro n hn; exact one_div_pos.mpr hn - have hzt : Tendsto z atTop (𝓝 loc) := by - have : Tendsto d atTop (𝓝 0) := by - rw [Filter.tendsto_atTop] at vt1 - rw [Filter.tendsto_atTop'] at vt2 - rw [Metric.tendsto_atTop']; intro ε hε - have : Metric.ball v ε ∈ 𝓝 v := by exact Metric.ball_mem_nhds _ hε - specialize vt2 (Metric.ball v ε) this - rcases vt2 with ⟨a, ha⟩ - specialize vt1 (2 * (‖v‖ + ε) / ε); simp at vt1 - rcases vt1 with ⟨a1, ha1⟩ - let n1 := max a a1 - use n1; intro n hn - specialize ha n (ge_trans (Nat.le_of_lt hn) (a.le_max_left a1)) - specialize ha1 n (ge_trans (Nat.le_of_lt hn) (a.le_max_right a1)) - have : ‖d n‖ < ε := by - have : ‖c n • d n‖ ≤ ‖v‖ + ε := by - rw [Metric.mem_ball, dist_eq_norm] at ha; - have t1 : ‖c n • d n - v‖ ≥ ‖c n • d n‖ - ‖v‖ := norm_sub_norm_le _ v - linarith - have cpos : c n > 0 := by - apply lt_of_le_of_lt' - · show c n ≥ 2 * (‖v‖ + ε) / ε - exact ha1 - · positivity - rw [norm_smul, Real.norm_eq_abs, abs_of_pos cpos] at this; - calc _ ≤ (‖v‖ + ε) / c n := (le_div_iff₀' cpos).mpr this - _ ≤ (‖v‖ + ε) / (2 * (‖v‖ + ε) / ε) := - div_le_div_of_nonneg_left (by positivity) (by positivity) ha1 - _ = ε / 2 := by field_simp [εpos]; ring_nf - _ < ε := by linarith - simp; exact this - have h1 : z = (fun n ↦ d n + loc) := by - funext n; rw [hzd n, sub_add, sub_self, sub_zero] - rw [h1] - convert Filter.Tendsto.add_const loc this - rw [zero_add] - have hz : (fun n ↦ f (z n) - f loc - inner (z n - loc) (gradient f loc)) - =o[atTop] (fun n ↦ z n - loc) := by - have : HasGradientAt f (gradient f loc) loc := hf.hasGradientAt - rw [hasGradientAt_iff_isLittleO] at this - have heq : (fun n ↦ f (z n) - f loc - inner (z n - loc) (gradient f loc)) = - (fun n ↦ f (z n) - f loc - inner (gradient f loc) (z n - loc)) := by - ext n; rw [real_inner_comm] - rw [heq] - apply Asymptotics.IsLittleO.comp_tendsto this hzt - have hz1 : (fun n ↦ f (z n) - f loc - (1 / c n) * inner v (gradient f loc)) - =o[atTop] (fun n ↦ 1 / c n) := by - have t1: (fun n ↦ z n - loc) =O[atTop] (fun n ↦ 1 / c n) := by - rw [Asymptotics.isBigO_iff] - rw [Filter.tendsto_atTop] at vt1 - rw [Filter.tendsto_atTop'] at vt2 - have : Metric.ball v 1 ∈ 𝓝 v := by exact Metric.ball_mem_nhds _ (by norm_num) - specialize vt2 (Metric.ball v 1) this - rcases vt2 with ⟨a, ha⟩ - specialize vt1 (2 * (‖v‖ + ε) / ε); simp at vt1 - rcases vt1 with ⟨a1, ha1⟩ - let n1 := max a a1 - use (‖v‖ + 1 : ℝ); simp; use n1; intro n hn - specialize ha n (ge_trans hn (a.le_max_left a1)) - specialize ha1 n (ge_trans hn (a.le_max_right a1)) - have cpos : c n > 0 := by - apply lt_of_le_of_lt' - · show c n ≥ 2 * (‖v‖ + ε) / ε - exact ha1 - · positivity - rw [abs_of_pos] - have : ‖d n‖ ≤ (‖v‖ + 1) * (c n)⁻¹ := by - have : ‖c n • d n‖ ≤ ‖v‖ + 1 := by - rw [Metric.mem_ball, dist_eq_norm] at ha; - have t1 : ‖c n • d n - v‖ ≥ ‖c n • d n‖ - ‖v‖ := norm_sub_norm_le _ v - linarith - rw [norm_smul, Real.norm_eq_abs, abs_of_pos cpos] at this; - field_simp; exact (le_div_iff₀' cpos).mpr this - rw [← hzd n]; exact this; apply cpos - have t2 : (fun n ↦ f (z n) - f loc - inner (z n - loc) (gradient f loc)) - =o[atTop] (fun n ↦ 1 / c n) := Asymptotics.IsLittleO.trans_isBigO hz t1 - have t3 : (fun n ↦ (inner (z n - loc - (1 / c n) • v) (gradient f loc) : ℝ)) - =o[atTop] (fun n ↦ 1 / c n) := by - have t5: (fun n ↦ z n - loc - (1 / c n) • v) =o[atTop] (fun n ↦ 1 / c n) := by - rw [← Asymptotics.isLittleO_norm_norm] - apply (Asymptotics.isLittleO_iff_tendsto' _).mpr - · have : (fun x ↦ ‖z x - loc - (1 / c x) • v‖ / ‖1 / c x‖) - =ᶠ[atTop] (fun x ↦ ‖c x • (z x - loc) - v‖) := by - simp; rw [Filter.EventuallyEq] - apply Filter.Eventually.mp hcp - apply Filter.Eventually.of_forall - intro n hcn1; rw [mul_comm, ← Real.norm_eq_abs, ← norm_smul] - congr; field_simp; rw [smul_sub, smul_smul]; field_simp - rw [Filter.tendsto_congr' this]; - have : Tendsto (fun (n : ℕ) => c n • d n - v) atTop (𝓝 (v - v)) := by - apply Filter.Tendsto.sub vt2 tendsto_const_nhds - apply Filter.Tendsto.norm at this - simp at this; convert this; simp [hzd] - · apply Filter.Eventually.mp hcp - apply Filter.Eventually.of_forall - intro n hcn1 hcn2 - exfalso; simp at hcn2; linarith - rw [Asymptotics.isLittleO_iff]; intro c1 hc1 - rw [Asymptotics.isLittleO_iff] at t5; - have pos1 : ‖gradient f loc‖ ≠ (0 : ℝ) := by - by_contra hhh; simp at hhh - have : inner v (gradient f loc) = (0 : ℝ) := by rw [hhh, inner_zero_right] - linarith - have pos2 : ‖gradient f loc‖ > (0 : ℝ) := by positivity - have : c1 / ‖gradient f loc‖ > (0 : ℝ) := by positivity - specialize t5 this - apply Filter.Eventually.mp t5 - apply Filter.Eventually.of_forall - intro n hn; - calc _ ≤ ‖z n - loc - (1 / c n) • v‖ * ‖gradient f loc‖ := norm_inner_le_norm _ _ - _ ≤ c1 / ‖gradient f loc‖ * ‖1 / c n‖ * ‖gradient f loc‖ := - (mul_le_mul_right pos2).mpr hn - _ ≤ c1 * ‖1 / c n‖ := by ring_nf; field_simp [pos1] - have t4 : (fun n => f (z n) - f loc - 1 / c n * Inner.inner v (gradient f loc)) = - (fun n ↦ f (z n) - f loc - inner (z n - loc) (gradient f loc)) + - (fun n ↦ (inner (z n - loc - (1 / c n) • v) (gradient f loc) : ℝ)) := by - ext n; dsimp; simp [inner_sub_left, inner_add_left, inner_smul_left] - rw [t4]; apply Asymptotics.IsLittleO.add t2 t3 - have hz2 : ∀ᶠ (n : ℕ) in atTop, f (z n) ≤ f loc + (1 / 2) * - (1 / c n) * inner v (gradient f loc) := by - rw [Asymptotics.isLittleO_iff] at hz1 - have : (- (1 / 2 : ℝ) * inner v (gradient f loc)) > 0 := by - simp [proneg];rw [mul_comm]; apply mul_neg_of_neg_of_pos proneg (by norm_num) - specialize hz1 this - apply Filter.Eventually.mp hz1 - apply Filter.Eventually.mp hz3 - apply Filter.Eventually.of_forall - intro n hn hn1 - rw [Real.norm_eq_abs, Real.norm_eq_abs, abs_le, abs_of_pos hn] at hn1 - rcases hn1 with ⟨_, hn1⟩ - rw [sub_le_iff_le_add, sub_le_iff_le_add] at hn1 - have : -(1 / 2) * inner v (gradient f loc) * (1 / c n) + 1 / c n * inner v - (gradient f loc) + f loc = f loc + 1 / 2 * (1 / c n) * inner v (gradient f loc) := by - ring_nf - rw [this] at hn1; exact hn1 - have hz4 : ∀ᶠ (n : ℕ) in atTop, f (z n) < f loc := by - apply Filter.Eventually.mp hz2 - apply Filter.Eventually.mp hz3 - apply Filter.Eventually.of_forall - intro n hn1 hn2 - have : 1 / 2 * (1 / c n) * (inner v (gradient f loc)) < 0 := by - apply mul_neg_of_pos_of_neg - · apply Right.mul_pos; simp; exact hn1 - · exact proneg - linarith - have hz5 : ∀ᶠ (n : ℕ) in atTop, z n ∈ s1 := by - simp only [s1, mem_inter_iff, Metric.mem_ball, dist_self_add_left] - apply Filter.Eventually.and - · rw [Filter.tendsto_atTop'] at hzt - simp; - have : Metric.ball loc ε ∈ 𝓝 loc := by exact Metric.ball_mem_nhds loc εpos - rcases hzt (Metric.ball loc ε) this with ⟨a, ha⟩ - use a; intro b hb; specialize ha b (by linarith [hb]) - simp at ha; exact ha - · simp; use a - simp at hz5 hz4 - rcases hz5 with ⟨n, hn1⟩; rcases hz4 with ⟨m, hm1⟩ - let M := max n m - have hh2 : f (z M) < f loc := hm1 M (le_max_right n m) - have hh1 : z M ∈ s1 := by simp [s1]; apply hn1 M (le_max_left n m) - have hh3 : f loc ≤ f (z M) := hmin (z M) hh1 - linarith + intro v vt + have hgrad : HasGradientAt f (gradient f loc) loc := hf.hasGradientAt + have hnonneg : 0 ≤ ((toDual ℝ E) (gradient f loc)) v := + hl.hasFDerivWithinAt_nonneg hgrad.hasFDerivAt.hasFDerivWithinAt vt + simpa [InnerProductSpace.toDual_apply_apply, real_inner_comm] using hnonneg /- Linearized feasible directions contain tagent cone @@ -418,7 +240,7 @@ theorem linearized_feasible_directions_contain_tagent_cone (xf : x ∈ p.FeasSet . intro i itau apply ge_antisymm . apply posTangentCone_localmin_inner_pos (imin i itau) (diffable i itau) v hv - . rw [← neg_neg (inner (gradient (equality_constraints p i) x) v)] + . rw [← neg_neg (inner ℝ (gradient (equality_constraints p i) x) v)] apply neg_nonpos_of_nonneg rw [← inner_neg_left] have a₁ : ∀ i ∈ τ, DifferentiableAt ℝ (-equality_constraints p i) x := @@ -467,7 +289,7 @@ theorem local_Minimum_TangentCone (loc : E) (hl : p.Local_Minimum loc) theorem local_Minimum_TangentCone' (loc : E) (hl : p.Local_Minimum loc) (hf : Differentiable ℝ p.objective) : posTangentConeAt p.FeasSet loc ∩ {d | ⟪gradient p.objective loc, d⟫_ℝ < (0 : ℝ)} = ∅ := by - rw [Set.eq_empty_iff_forall_not_mem] + rw [Set.eq_empty_iff_forall_notMem] intro d ⟨hd1, hd2⟩ simp at hd2 obtain hd1 := local_Minimum_TangentCone loc hl hf d hd1 @@ -567,7 +389,7 @@ lemma StrictFderivAt_of_FderivAt_of_ContinuousAt with ⟨ε, ε0, hε⟩ refine ⟨ε, ε0, ?_⟩ rintro ⟨a, b⟩ h - rw [← ball_prod_same, prod_mk_mem_set_prod_eq] at h + rw [← ball_prod_same, prodMk_mem_set_prod_eq] at h have hf' : ∀ x' ∈ Metric.ball x ε, ‖c' x' - c' x‖ ≤ μ := fun x' H' => by rw [← dist_eq_norm] exact le_of_lt (hε H').2 @@ -608,7 +430,7 @@ lemma exist_forall_forall_exist (P : ℕ → ℝ → Prop) (s : Finset ℕ) (hs have po : ∀ y ∈ s1, y > 0 := by intro y hy simp [s1] at hy; rcases hy with ⟨a, ha1, ha2⟩ - simp only [gt_iff_lt, and_imp, ha1, ↓reduceDIte, f] at ha2; rw [← ha2] + simp only [gt_iff_lt, ha1, ↓reduceDIte, f] at ha2; rw [← ha2] exact (h a ha1).choose_spec.1 have up : ∀ y ∈ s1, tm ≤ y := fun y a ↦ Finset.min'_le s1 y a use tm; constructor @@ -650,20 +472,23 @@ lemma LICQ_mlen (x : EuclideanSpace ℝ (Fin n)) (LIx : p.LICQ x) lemma LICQ_Axfullrank (x : EuclideanSpace ℝ (Fin n)) (LIx : p.LICQ x) {M : Matrix (p.active_set x) (Fin n) ℝ} - (eq : M = fun i : (p.active_set x) ↦ if i.1 ∈ τ then gradient (p.equality_constraints i) x - else gradient (p.inequality_constraints i) x): + (eq : M = fun i : (p.active_set x) ↦ (if i.1 ∈ τ then gradient (p.equality_constraints i) x + else gradient (p.inequality_constraints i) x).ofLp): Matrix.rank M = (Fintype.card (p.active_set x)) := by + rw [LICQ] at LIx apply LE.le.antisymm · apply Matrix.rank_le_card_height · simp rw [Matrix.rank_eq_finrank_span_row, finrank_span_eq_card] - simp; rw [eq]; apply LIx + simp; rw [eq] + simpa [Function.comp] using LIx.map' (WithLp.linearEquiv 2 ℝ (Fin n → ℝ)).toLinearMap + (LinearMap.ker_eq_bot.2 (WithLp.linearEquiv 2 ℝ (Fin n → ℝ)).injective) lemma LICQ_existZ (x : EuclideanSpace ℝ (Fin n)) (LIx : p.LICQ x) {m : ℕ} (meq : m = (p.active_set x).card) {M : Matrix (p.active_set x) (Fin n) ℝ} - (eq : M = fun i : (p.active_set x) ↦ if i.1 ∈ τ then gradient (p.equality_constraints i) x - else gradient (p.inequality_constraints i) x): + (eq : M = fun i : (p.active_set x) ↦ (if i.1 ∈ τ then gradient (p.equality_constraints i) x + else gradient (p.inequality_constraints i) x).ofLp): ∃ (Z : Matrix (Fin n) (Fin (n - m)) ℝ), M * Z = 0 ∧ Matrix.rank Z = (n - m) := by rw [LICQ] at LIx; have mlen : m ≤ n := LICQ_mlen x LIx meq @@ -696,7 +521,7 @@ lemma LICQ_existZ (x : EuclideanSpace ℝ (Fin n)) (LIx : p.LICQ x) · simp rw [Matrix.rank_eq_finrank_span_row, finrank_span_eq_card] simp; rw [Nat.sub_add_cancel]; apply mlen - let base_indep := Basis.linearIndependent base + let base_indep := base.linearIndependent simp only [Z] rw [linearIndependent_iff''] intro s g cond sum @@ -708,11 +533,11 @@ lemma LICQ_existZ (x : EuclideanSpace ℝ (Fin n)) (LIx : p.LICQ x) · intro cond; rw [cond]; simp [coe] · intro cond; simp [coe] at cond; exact cond rw [coe_zero]; simp only [coe] - rw [← sum]; simp + simpa [Matrix.row] using sum lemma mulVec_eq_toEuclidean {s : Type*} (M : Matrix s (Fin n) ℝ) (y : EuclideanSpace ℝ (Fin n)) : M *ᵥ y = (toEuclideanLin M) y := by - rw [Matrix.toEuclideanLin_apply]; ext j; simp [Matrix.mulVec, Matrix.dotProduct] + rfl lemma inj_iff_full_finrank {s t : Type*} {M : Matrix s t ℝ} [Fintype s] [Fintype t] (hn : Fintype.card s = Fintype.card t) : @@ -746,34 +571,38 @@ lemma LICQ_injM (z : EuclideanSpace ℝ (Fin n)) (m : ℕ) rw [eq1, eq2] at Bzeq0; simp at Bzeq0 have aux : (p.active_set x).card + (n - m) = n := by rw [← meq]; rw [add_comm, Nat.sub_add_cancel]; exact mlen - refine (inj_transpose_iff_inj_of_sq ?_).1 ?_ z Bzeq0 - · simp; rw [aux] - · intro v Btveq0 - let y := v ∘ Sum.inl - let z := v ∘ Sum.inr - have yeq : Bt *ᵥ (Sum.elim y (fun _ ↦ 0)) = Aᵀ *ᵥ y := by ext i; simp [Bt, mulVec, dotProduct] - have zeq : Bt *ᵥ (Sum.elim (fun _ ↦ 0) z) = Z *ᵥ z := by ext i; simp [Bt, mulVec, dotProduct] - have veq : v = (Sum.elim y (fun _ ↦ 0)) + (Sum.elim (fun _ ↦ 0) z) := by - simp [y, z]; ext i; cases i <;> simp - have eq : Bᵀ *ᵥ v = Aᵀ *ᵥ y + Z *ᵥ z := by rw [veq, ← Bteq, mulVec_add, yeq, zeq] - rw [eq] at Btveq0 - have yzero : y = 0 := by - have h : A *ᵥ (Aᵀ *ᵥ y + Z *ᵥ z) = 0 := by rw [Btveq0]; simp - rw [mulVec_add, mulVec_mulVec, mulVec_mulVec, AZorth] at h; simp at h - refine (inj_iff_full_finrank ?_).1 ?_ y h - · simp - · simp; rw [← meq, Afull] - have yzero' : (Sum.elim y (fun _ : (Fin (n - m)) ↦ 0)) = 0 := by - ext i; cases i <;> simp [yzero] - have zzero : z = 0 := by - have h : Zᵀ *ᵥ (Aᵀ *ᵥ y + Z *ᵥ z) = 0 := by rw [Btveq0]; simp - rw [mulVec_add, mulVec_mulVec, mulVec_mulVec, ← transpose_mul, AZorth] at h; simp at h - refine (inj_iff_full_finrank ?_).1 ?_ z h - · simp - · simp; rw [rank_transpose_mul_self, Zfull] - have zzero' : (Sum.elim (fun _ : (p.active_set x) ↦ 0) z) = 0 := by - ext i; cases i <;> simp [zzero] - rw [veq, yzero', zzero']; simp + have z0 : z.ofLp = 0 := by + refine (inj_transpose_iff_inj_of_sq ?_).1 ?_ z Bzeq0 + · simp; rw [aux] + · intro v Btveq0 + let y := v ∘ Sum.inl + let z := v ∘ Sum.inr + have yeq : Bt *ᵥ (Sum.elim y (fun _ ↦ 0)) = Aᵀ *ᵥ y := by + ext i; simp [Bt, mulVec, dotProduct] + have zeq : Bt *ᵥ (Sum.elim (fun _ ↦ 0) z) = Z *ᵥ z := by + ext i; simp [Bt, mulVec, dotProduct] + have veq : v = (Sum.elim y (fun _ ↦ 0)) + (Sum.elim (fun _ ↦ 0) z) := by + simp [y, z]; ext i; cases i <;> simp + have eq : Bᵀ *ᵥ v = Aᵀ *ᵥ y + Z *ᵥ z := by rw [veq, ← Bteq, mulVec_add, yeq, zeq] + rw [eq] at Btveq0 + have yzero : y = 0 := by + have h : A *ᵥ (Aᵀ *ᵥ y + Z *ᵥ z) = 0 := by rw [Btveq0]; simp + rw [mulVec_add, mulVec_mulVec, mulVec_mulVec, AZorth] at h; simp at h + refine (inj_iff_full_finrank ?_).1 ?_ y h + · simp + · simp; rw [← meq, Afull] + have yzero' : (Sum.elim y (fun _ : (Fin (n - m)) ↦ 0)) = 0 := by + ext i; cases i <;> simp [yzero] + have zzero : z = 0 := by + have h : Zᵀ *ᵥ (Aᵀ *ᵥ y + Z *ᵥ z) = 0 := by rw [Btveq0]; simp + rw [mulVec_add, mulVec_mulVec, mulVec_mulVec, ← transpose_mul, AZorth] at h; simp at h + refine (inj_iff_full_finrank ?_).1 ?_ z h + · simp + · simp; rw [rank_transpose_mul_self, Zfull] + have zzero' : (Sum.elim (fun _ : (p.active_set x) ↦ 0) z) = 0 := by + ext i; cases i <;> simp [zzero] + rw [veq, yzero', zzero']; simp + simpa using z0 lemma LICQ_strictfderiv_Ax_elem {x : EuclideanSpace ℝ (Fin n)} (c : EuclideanSpace ℝ (Fin n) → ((p.active_set x) → ℝ)) @@ -783,7 +612,7 @@ lemma LICQ_strictfderiv_Ax_elem {x : EuclideanSpace ℝ (Fin n)} (gradceq : gradc = fun z ↦ (fun i : (p.active_set x) ↦ if i.1 ∈ τ then gradient (p.equality_constraints i) z else gradient (p.inequality_constraints i) z)) (A : EuclideanSpace ℝ (Fin n) → Matrix (p.active_set x) (Fin n) ℝ) - (Aeq : A = fun z ↦ (fun i ↦ gradc z i)) + (Aeq : A = fun z ↦ (fun i j ↦ gradc z i j)) (Jz : EuclideanSpace ℝ (Fin n) → EuclideanSpace ℝ (Fin n) →L[ℝ] EuclideanSpace ℝ (p.active_set x)) (Jzeq : Jz = fun z ↦ (LinearMap.toContinuousLinearMap (toEuclideanLin (A z)))) (conte : ∀ i ∈ τ, ContDiffAt ℝ (1 : ℕ) (equality_constraints p i) x) @@ -794,15 +623,20 @@ lemma LICQ_strictfderiv_Ax_elem {x : EuclideanSpace ℝ (Fin n)} rw [eventually_iff, Metric.mem_nhds_iff] at h; rcases h with ⟨ε, _, _⟩ intro i; by_cases hi : i.1 ∈ τ · rw [ceq, Jzeq, Aeq]; simp [hi] - rw [HasStrictFDerivAt]; + rw [hasStrictFDerivAt_iff_isLittleO] have eq : (fun p_1 : EuclideanSpace ℝ (Fin n) × EuclideanSpace ℝ (Fin n) ↦ p.equality_constraints i.1 p_1.1 - p.equality_constraints i.1 p_1.2 - - ((EuclideanSpace.proj i).comp (LinearMap.toContinuousLinearMap (toEuclideanLin fun i ↦ gradc x i))) + ((EuclideanSpace.proj i).comp + (LinearMap.toContinuousLinearMap (toEuclideanLin fun i j ↦ (gradc x i).ofLp j))) (p_1.1 - p_1.2)) = (fun p_1 : EuclideanSpace ℝ (Fin n) × EuclideanSpace ℝ (Fin n) ↦ p.equality_constraints i.1 p_1.1 - p.equality_constraints i.1 p_1.2 - - inner (gradient (p.equality_constraints ↑i) x) (p_1.1 - p_1.2) ):= by - ext q; rw [inner_sub_right, gradceq]; simp [toEuclideanLin_apply, mulVec, dotProduct, hi] - rw [← Finset.sum_sub_distrib]; apply Finset.sum_congr; rfl; exact fun _ _ ↦ by ring_nf + inner ℝ (gradient (p.equality_constraints ↑i) x) (p_1.1 - p_1.2) ):= by + ext q; rw [inner_sub_right, gradceq] + simp [toLpLin_apply, mulVec, dotProduct, hi] + rw [← inner_sub_right] + simpa [dotProduct, mul_comm] using + (EuclideanSpace.inner_eq_star_dotProduct (x := gradient (p.equality_constraints ↑i) x) + (y := q.1 - q.2)).symm rw [eq] specialize conte i hi exact StrictFderivAt_of_FderivAt_of_ContinuousAt conte @@ -813,15 +647,20 @@ lemma LICQ_strictfderiv_Ax_elem {x : EuclideanSpace ℝ (Fin n)} rw [Finset.mem_filter] at hi2 exact hi2.1 rw [ceq, Jzeq, Aeq]; simp [hi] - rw [HasStrictFDerivAt]; + rw [hasStrictFDerivAt_iff_isLittleO] have eq : (fun p_1 : EuclideanSpace ℝ (Fin n) × EuclideanSpace ℝ (Fin n) ↦ p.inequality_constraints i.1 p_1.1 - p.inequality_constraints i.1 p_1.2 - - ((EuclideanSpace.proj i).comp (LinearMap.toContinuousLinearMap (toEuclideanLin fun i ↦ gradc x i))) + ((EuclideanSpace.proj i).comp + (LinearMap.toContinuousLinearMap (toEuclideanLin fun i j ↦ (gradc x i).ofLp j))) (p_1.1 - p_1.2)) = (fun p_1 : EuclideanSpace ℝ (Fin n) × EuclideanSpace ℝ (Fin n) ↦ p.inequality_constraints i.1 p_1.1 - p.inequality_constraints i.1 p_1.2 - ⟪gradient (p.inequality_constraints ↑i) x, p_1.1 - p_1.2⟫_ℝ ):= by - ext q; rw [inner_sub_right, gradceq]; simp [toEuclideanLin_apply, mulVec, dotProduct, hi] - rw [← Finset.sum_sub_distrib]; apply Finset.sum_congr; rfl; exact fun _ _ ↦ by ring_nf + ext q; rw [inner_sub_right, gradceq] + simp [toLpLin_apply, mulVec, dotProduct, hi] + rw [← inner_sub_right] + simpa [dotProduct, mul_comm] using + (EuclideanSpace.inner_eq_star_dotProduct (x := gradient (p.inequality_constraints ↑i) x) + (y := q.1 - q.2)).symm rw [eq] specialize conti i hi' exact StrictFderivAt_of_FderivAt_of_ContinuousAt conti @@ -831,12 +670,12 @@ lemma LICQ_implicit_f {x : EuclideanSpace ℝ (Fin n)} {m : ℕ} (v : EuclideanS {Rz : EuclideanSpace ℝ (Fin n) → EuclideanSpace ℝ (p.active_set x) × (Fin (n - m) → ℝ)} {Rt : ℝ → EuclideanSpace ℝ (p.active_set x) × (Fin (n - m) → ℝ)} (Rteq : Rt = fun t ↦ t • Mx v) (Rxeq0 : Rz x = 0) - (Rzgrad : HasStrictFDerivAt Rz Mx x) (Mxsurj : LinearMap.range Mx = ⊤) : + (Rzgrad : HasStrictFDerivAt Rz Mx x) (Mxsurj : Mx.range = ⊤) : ∃ (N : ℕ) (d : ℕ → EuclideanSpace ℝ (Fin n)), (∀ m ≥ N, Rz (d m) = Rt (1 / m)) ∧ (Filter.Tendsto d atTop (𝓝 x)) := by let g := HasStrictFDerivAt.implicitFunction Rz Mx Rzgrad Mxsurj - have hfg : ∀ᶠ (p : (EuclideanSpace ℝ (p.active_set x) × (Fin (n - m) → ℝ)) × (LinearMap.ker Mx)) in - 𝓝 (Rz x, (0 : LinearMap.ker Mx)), Rz (g p.1 p.2) = p.1 := by + have hfg : ∀ᶠ (p : (EuclideanSpace ℝ (p.active_set x) × (Fin (n - m) → ℝ)) × Mx.ker) in + 𝓝 (Rz x, (0 : Mx.ker)), Rz (g p.1 p.2) = p.1 := by simp only [g]; apply HasStrictFDerivAt.map_implicitFunction_eq Rzgrad Mxsurj rw [Rxeq0] at hfg rw [eventually_iff, Metric.mem_nhds_iff] at hfg @@ -859,7 +698,7 @@ lemma LICQ_implicit_f {x : EuclideanSpace ℝ (Fin n)} {m : ℕ} (v : EuclideanS simp at Rtmin; simp [Rtmin] · simp only [g] apply HasStrictFDerivAt.tendsto_implicitFunction Rzgrad Mxsurj - · rw [Rxeq0]; rw [NormedAddCommGroup.tendsto_nhds_zero]; simp; apply Rtleε + · rw [Rxeq0]; rw [NormedAddGroup.tendsto_nhds_zero]; simp; apply Rtleε · simp lemma eq_lemma {y z : EuclideanSpace ℝ (Fin n)} {n : ℕ} (h : ‖(n : ℝ) • y‖ ≠ 0) : @@ -868,7 +707,10 @@ lemma eq_lemma {y z : EuclideanSpace ℝ (Fin n)} {n : ℕ} (h : ‖(n : ℝ) have eq : z = (n : ℝ) • (1 / n : ℝ) • z := by rw [smul_smul]; field_simp; rw [div_self, one_smul]; simp [h] nth_rw 2 [eq] - rw [← smul_sub, smul_smul, norm_smul]; field_simp; rw [← div_div, div_self]; simp [h] + rw [← smul_sub, smul_smul, norm_smul]; field_simp + have hn0 : (n : ℝ) ≠ 0 := by exact_mod_cast h.1 + have hcoef : (n : ℝ) / (‖y‖ * n) = ‖y‖⁻¹ := by field_simp [hn0] + simp [hcoef] lemma comap1 {x : EuclideanSpace ℝ (Fin n)} {m : ℕ} {Mx : EuclideanSpace ℝ (Fin n) →L[ℝ] EuclideanSpace ℝ (p.active_set x) × (Fin (n - m) → ℝ)} @@ -892,7 +734,7 @@ lemma comap1 {x : EuclideanSpace ℝ (Fin n)} {m : ℕ} have zin' : z ∈ Metric.ball 0 a := by simp; calc ‖z‖ ≤ c * ‖Mx z‖ := antil - _ < c * (a / c) := by rw [mul_lt_mul_left]; linarith [zin]; simp [hc'] + _ < c * (a / c) := by exact mul_lt_mul_of_pos_left zin hc' _ = a := by field_simp exact ha zin' @@ -903,14 +745,21 @@ lemma comap2 (hv : v ≠ 0): rw [Metric.mem_nhds_iff] at smem; rcases smem with ⟨a, apos, ha⟩ let μ := a / (a + ‖v‖) have eq : μ * ‖v‖ = (1 - μ) * a := by - field_simp [μ]; rw [mul_comm] + have hden : a + ‖v‖ ≠ 0 := by linarith [apos, norm_nonneg v] + change (a / (a + ‖v‖)) * ‖v‖ = (1 - a / (a + ‖v‖)) * a + field_simp [hden] + ring have vpos : 0 < ‖v‖ := by refine lt_of_le_of_ne (norm_nonneg v) ?_; symm; simp [hv] have μle : 0 < 1 - μ := by - field_simp [μ, hv] - apply add_pos ?_ vpos; linarith + have hden : 0 < a + ‖v‖ := by linarith [apos, norm_nonneg v] + have hμ : μ < 1 := by + change a / (a + ‖v‖) < 1 + exact (div_lt_one hden).2 (by linarith [vpos]) + linarith have μpos : 0 < μ := by - field_simp [μ]; apply add_pos_of_pos_of_nonneg _ (norm_nonneg v); linarith + have hden : 0 < a + ‖v‖ := by linarith [apos, norm_nonneg v] + simpa [μ] using (div_pos apos hden) let r := min μ ‖v‖ use Metric.ball 0 r; constructor · apply Metric.ball_mem_nhds; simp [r]; exact ⟨μpos, hv⟩ @@ -919,16 +768,21 @@ lemma comap2 (hv : v ≠ 0): by_contra hz; simp [hz] at zin; simp [r] at zin simp [ze] at zin; rw [norm_smul] at zin; field_simp at zin have : 0 < ‖z‖ := by refine lt_of_le_of_ne (norm_nonneg z) ?_; symm; simp [ze] - rw [div_lt_iff₀ this] at zin + have zin' : ‖z - v‖ / ‖z‖ < r := by + simpa [div_eq_mul_inv, Real.norm_eq_abs, abs_of_pos (one_div_pos.mpr this), mul_comm] using zin + have zin : ‖z - v‖ < r * ‖z‖ := (div_lt_iff₀ this).1 zin' have ieq : ‖z - v‖ < μ * ‖z - v‖ + (1 - μ) * a := by calc _ < r * ‖z‖ := zin - _ ≤ μ * ‖z‖ := by rw [mul_le_mul_right this]; simp [r] + _ ≤ μ * ‖z‖ := by + exact mul_le_mul_of_nonneg_right (by simp [r]) (norm_nonneg z) _ ≤ μ * (‖z - v‖ + ‖v‖) := by - rw [mul_le_mul_left μpos, add_comm]; apply norm_le_norm_add_norm_sub' - _ ≤ μ * ‖z - v‖ + (1 - μ) * a := by rw [mul_add]; apply add_le_add_left; rw [eq] + exact mul_le_mul_of_nonneg_left (by simpa [add_comm] using norm_le_norm_add_norm_sub' z v) + (le_of_lt μpos) + _ ≤ μ * ‖z - v‖ + (1 - μ) * a := by linarith [eq] rw [← sub_lt_iff_lt_add'] at ieq; nth_rw 1 [← one_mul (‖z - v‖)] at ieq - rw [← sub_mul, mul_lt_mul_left μle] at ieq + have ieq' : (1 - μ) * ‖z - v‖ < (1 - μ) * a := by linarith [ieq] + have ieq : ‖z - v‖ < a := lt_of_mul_lt_mul_left ieq' (le_of_lt μle) apply ha; simp; rw [dist_eq_norm]; simp [ieq] lemma LICQ_tendsto {x : EuclideanSpace ℝ (Fin n)} {m N : ℕ} @@ -983,7 +837,11 @@ lemma LICQ_tendsto {x : EuclideanSpace ℝ (Fin n)} {m N : ℕ} have neq : ‖(i : ℝ) • (d i - x)‖ ≠ 0 := by rw [norm_smul]; apply mul_ne_zero; simp; linarith [Nat.lt_of_add_one_le igeN.2] specialize dne i igeN.2; simp; apply sub_ne_zero_of_ne dne - field_simp [deriv', φ, neq]; apply eq_lemma neq + have hne : ¬(i = 0 ∨ d i - x = 0) := by + intro h; rcases h with hi | hz + · exact neq (by simp [hi]) + · exact neq (by simp [hz]) + simpa [deriv', φ, neq, hne] using (eq_lemma (y := d i - x) (z := v) neq) obtain lim' := Filter.Tendsto.congr' eq5 lim refine Filter.Tendsto.of_tendsto_comp lim' ?_ simp only [φ]; exact comap2 vne0 @@ -1002,54 +860,50 @@ theorem LICQ_linearized_feasible_directions_sub_posTangentCone intro v hv by_cases veq0 : v = 0 - · rw [veq0]; rw [posTangentConeAt]; simp - use fun n ↦ n; use fun _ ↦ 0; simp; constructor - · use 0; exact fun _ _ ↦ xf - · exact tendsto_natCast_atTop_atTop + · rw [veq0] + change 0 ∈ tangentConeAt NNReal p.FeasSet x + exact zero_mem_tangentConeAt (subset_closure xf) let gradc : EuclideanSpace ℝ (Fin n) → ((p.active_set x) → (EuclideanSpace ℝ (Fin n))) := fun z ↦ (fun i ↦ if i.1 ∈ τ then gradient (p.equality_constraints i) z else gradient (p.inequality_constraints i) z) -- gradient of the constraints - let Ax : Matrix (p.active_set x) (Fin n) ℝ := fun i ↦ gradc x i -- Jacobi at x + let Ax : Matrix (p.active_set x) (Fin n) ℝ := fun i j ↦ (gradc x i).ofLp j -- Jacobi at x let m := (p.active_set x).card have mlen : m ≤ n := by apply LICQ_mlen x LIx; simp [m] have existZ : ∃ (Z : Matrix (Fin n) (Fin (n - m)) ℝ), Ax * Z = 0 ∧ Matrix.rank Z = (n - m) := by apply LICQ_existZ x LIx; simp [m]; simp [Ax, gradc] rw [LICQ] at LIx; - rw [posTangentConeAt]; simp only [eventually_atTop, ge_iff_le, mem_setOf_eq] + rw [posTangentConeAt, mem_tangentConeAt_iff_exists_seq] rcases existZ with ⟨Z, ⟨eq1, eq2⟩⟩ let Mx : EuclideanSpace ℝ (Fin n) →L[ℝ] EuclideanSpace ℝ (p.active_set x) × (Fin (n - m) → ℝ) := (LinearMap.toContinuousLinearMap (Matrix.toEuclideanLin Ax)).prod - (LinearMap.toContinuousLinearMap (Matrix.toEuclideanLin Zᵀ)) -- Jacobi of Rz at x + (LinearMap.toContinuousLinearMap + ((Matrix.mulVecLin Zᵀ).comp (EuclideanSpace.equiv (Fin n) ℝ).toLinearMap)) -- Jacobi of Rz at x let c : EuclideanSpace ℝ (Fin n) → ((p.active_set x) → ℝ) := fun z ↦ (fun i ↦ if i.1 ∈ τ then (p.equality_constraints i) z else (p.inequality_constraints i) z) -- the constraints let Rz : EuclideanSpace ℝ (Fin n) → EuclideanSpace ℝ (p.active_set x) × (Fin (n - m) → ℝ) := - fun z ↦ (c z, Zᵀ *ᵥ (z - x)) -- z part in R + fun z ↦ (WithLp.toLp 2 (c z), Zᵀ *ᵥ (z - x)) -- z part in R let Rt : ℝ → EuclideanSpace ℝ (p.active_set x) × (Fin (n - m) → ℝ) := fun t ↦ t • Mx v -- t part in R let A : EuclideanSpace ℝ (Fin n) → Matrix (p.active_set x) (Fin n) ℝ := - fun z ↦ (fun i ↦ gradc z i) -- compose the gradient matrix + fun z ↦ (fun i j ↦ (gradc z i).ofLp j) -- compose the gradient matrix let Jz : EuclideanSpace ℝ (Fin n) → EuclideanSpace ℝ (Fin n) →L[ℝ] EuclideanSpace ℝ (p.active_set x) := fun z ↦ (LinearMap.toContinuousLinearMap (toEuclideanLin (A z))) -- change the Jacobi into linear transformation - have cgrad_atx : Jz x = (LinearMap.toContinuousLinearMap (toEuclideanLin Ax)) := by simp [Jz, A, gradc] -- A x = Ax + have cgrad_atx : Jz x = (LinearMap.toContinuousLinearMap (toEuclideanLin Ax)) := by rfl -- A x = Ax have Rzgrad : HasStrictFDerivAt Rz Mx x := by - simp only [Rz, Ax] - apply HasStrictFDerivAt.prod + simp only [Rz] + refine HasStrictFDerivAt.prodMk ?_ ?_ · rw [← cgrad_atx] rw [hasStrictFDerivAt_euclidean] refine LICQ_strictfderiv_Ax_elem c ?_ gradc ?_ A ?_ Jz ?_ conte conti repeat simp only [c, gradc, A, Jz] · let N : EuclideanSpace ℝ (Fin n) →L[ℝ] (Fin (n - m) → ℝ) := - (LinearMap.toContinuousLinearMap (toEuclideanLin Zᵀ)) - show HasStrictFDerivAt (fun y : EuclideanSpace ℝ (Fin n) ↦ Zᵀ *ᵥ (y - x)) N x - rw [HasStrictFDerivAt] - have aux : (fun p : EuclideanSpace ℝ (Fin n) × EuclideanSpace ℝ (Fin n) - ↦ Zᵀ *ᵥ (p.1 - x) - Zᵀ *ᵥ (p.2 - x) - N (p.1 - p.2)) = 0 := by - ext y j; rw [← mulVec_sub, sub_sub, add_sub_cancel]; rw [mulVec_eq_toEuclidean] - simp [N]; apply sub_eq_zero_of_eq; tauto - rw [aux]; simp + (LinearMap.toContinuousLinearMap + ((Matrix.mulVecLin Zᵀ).comp (EuclideanSpace.equiv (Fin n) ℝ).toLinearMap)) + change HasStrictFDerivAt (fun y : EuclideanSpace ℝ (Fin n) ↦ N (y - x)) N x + simpa using N.hasStrictFDerivAt.comp x ((hasStrictFDerivAt_id x).sub_const x) have Rxeq0 : Rz x = 0 := by simp [Rz, c]; ext i; @@ -1063,19 +917,24 @@ theorem LICQ_linearized_feasible_directions_sub_posTangentCone rw [Finset.mem_filter] at hi2 exact hi2.2 - have Mxinj : LinearMap.ker Mx = ⊥ := by - show LinearMap.ker (Mx : EuclideanSpace ℝ (Fin n) →ₗ[ℝ] EuclideanSpace ℝ - (p.active_set x) × (Fin (n - m) → ℝ)) = ⊥ + have Mxinj : Mx.ker = ⊥ := by rw [LinearMap.ker_eq_bot'] intro z Mzeq0; simp [Mx] at Mzeq0 - have heq1 : Ax *ᵥ z = 0 := by rw [mulVec_eq_toEuclidean]; apply Mzeq0.1 - have heq2 : Zᵀ *ᵥ z = 0 := by rw [mulVec_eq_toEuclidean]; apply Mzeq0.2 + have heq1 : Ax *ᵥ z = 0 := by + rw [mulVec_eq_toEuclidean] + exact congrArg (fun u => u.ofLp) Mzeq0.1 + have heq2 : Zᵀ *ᵥ z = 0 := by + ext i + have h2 := congrArg (fun w => w i) Mzeq0.2 + simpa [vecMul, mulVec, dotProduct, EuclideanSpace.equiv, mul_comm, mul_left_comm, mul_assoc] using h2 refine LICQ_injM z m Z Ax ?_ mlen ?_ eq2 eq1 ⟨heq1, heq2⟩ simp [m] - obtain hAx := LICQ_Axfullrank x LIx; simp at hAx - show Ax.rank = (active_set x).card; apply hAx; simp only [Ax] - have Mxsurj : LinearMap.range Mx = ⊤ := by - show LinearMap.range (Mx : EuclideanSpace ℝ (Fin n) →ₗ[ℝ] EuclideanSpace ℝ (p.active_set x) × (Fin (n - m) → ℝ)) = ⊤ + have hAx : Ax.rank = (p.active_set x).card := by + simpa using (LICQ_Axfullrank (p := p) x LIx (M := Ax) (eq := by + ext i j + rfl)) + exact hAx + have Mxsurj : Mx.range = ⊤ := by rw [← LinearMap.ker_eq_bot_iff_range_eq_top_of_finrank_eq_finrank] · apply Mxinj · simp; show n = m + (n - m) @@ -1091,51 +950,67 @@ theorem LICQ_linearized_feasible_directions_sub_posTangentCone simp only [linearized_feasible_directions] at hv rcases hv with ⟨hvh1, hvh2⟩ rcases implicit_f with ⟨N, d, hfd, dtend⟩ - rw [LinearMapClass.ker_eq_bot] at Mxinj + rw [LinearMap.ker_eq_bot] at Mxinj rw [LinearMap.range_eq_top] at Mxsurj obtain deriv := (hasFDerivAt_iff_tendsto.1 (HasStrictFDerivAt.hasFDerivAt Rzgrad)) obtain deriv := tendsto_nhds_iff_seq_tendsto.1 deriv d dtend - rw [tendsto_iff_norm_sub_tendsto_zero, NormedAddCommGroup.tendsto_nhds_zero] at dtend; simp at dtend + have dtend0 : Tendsto (fun n ↦ d n - x) atTop (𝓝 0) := by + have hsub : Tendsto (fun n ↦ d n - x) atTop (𝓝 (x - x)) := dtend.sub tendsto_const_nhds + simpa using hsub + rw [tendsto_iff_norm_sub_tendsto_zero, NormedAddGroup.tendsto_nhds_zero] at dtend; simp at dtend obtain ⟨ε, εpos, inactive⟩ := LICQ_inactive_nhds x xf conti obtain ⟨N', dtendx⟩ := dtend ε εpos use (fun n ↦ d n - x); constructor - · use max N N'; intro nn hnn; simp [FeasSet, FeasPoint] - specialize hfd nn (le_of_max_le_left hnn); simp [Rz, Rt, Mx] at hfd; rw [← mulVec_eq_toEuclidean] at hfd - rcases hfd with ⟨hv1, hv2⟩ - have Axeq : (nn : ℝ)⁻¹ • Ax *ᵥ v = fun i : (p.active_set x) ↦ ((nn : ℝ)⁻¹ * (gradc x i) ⬝ᵥ v) := by - simp [Ax]; ext i; simp; left; simp [mulVec] - have Axroweq : ∀ i : (p.active_set x), c (d nn) i = (nn : ℝ)⁻¹ * (gradc x i) ⬝ᵥ v := by - rw [Axeq] at hv1; simp [hv1] - constructor; constructor - · rw [hdomain]; simp - · intro i hi - have iina : i ∈ (p.active_set x) := by simp [active_set, hi] - obtain h := hvh1 i hi - obtain eq := Axroweq ⟨i, iina⟩; simp [c, hi, gradc] at eq - rw [eq]; simp; right; apply h - constructor - · rw [hdomain]; simp - · intro j hj - have notin : j ∉ τ := by - by_contra hh; - have : j ∈ τ ∩ σ := by simp [hj, hh] - rw [p.eq_ine_not_intersect] at this; tauto - by_cases hj1 : j ∈ p.active_set x - · have jin : j ∈ σ ∩ (p.active_set x) := by simp [hj1, hj] - obtain h := hvh2 j jin - obtain eq := Axroweq ⟨j, hj1⟩; simp [c, hj1, notin, gradc] at eq - rw [eq]; field_simp - rw [div_nonneg_iff]; left; simp at h; simp [dotProduct, h] - · specialize inactive j; simp [hj, hj1] at inactive - specialize inactive (d nn) - specialize dtendx nn (le_of_max_le_right hnn); rw [← dist_eq_norm] at dtendx - specialize inactive dtendx; linarith [inactive] - - constructor - · exact tendsto_natCast_atTop_atTop - · have Mxbij : Function.Bijective Mx := ⟨Mxinj, Mxsurj⟩ - refine LICQ_tendsto v veq0 ?_ Rxeq0 hfd dtend Mxbij deriv; simp [Rt] + · exact dtend0 + · constructor + · refine Filter.eventually_atTop.2 ?_ + use max N N' + intro nn hnn + simp [FeasSet, FeasPoint] + specialize hfd nn (le_of_max_le_left hnn); simp [Rz, Rt, Mx] at hfd + rcases hfd with ⟨hv1, hv2⟩ + have hv1' : c (d nn) = (nn : ℝ)⁻¹ • ((toEuclideanLin Ax) v).ofLp := by + simpa [smul_eq_mul] using congrArg (fun u => u.ofLp) hv1 + have Axroweq : ∀ i : (p.active_set x), c (d nn) i = (nn : ℝ)⁻¹ * (gradc x i) ⬝ᵥ v := by + intro i + have hrow := congrArg (fun w => w i) hv1' + simpa [Ax, mulVec, dotProduct] using hrow + constructor; constructor + · rw [hdomain]; simp + · intro i hi + have iina : i ∈ (p.active_set x) := by simp [active_set, hi] + obtain h := hvh1 i hi + have hdot : (gradient (p.equality_constraints i) x).ofLp ⬝ᵥ v.ofLp = 0 := by + simpa [EuclideanSpace.inner_eq_star_dotProduct, dotProduct_comm] using h + have eq : p.equality_constraints i (d nn) = (nn : ℝ)⁻¹ * (gradient (p.equality_constraints i) x).ofLp ⬝ᵥ v.ofLp := by + simpa [c, gradc, hi] using Axroweq ⟨i, iina⟩ + rw [eq]; simp; right; exact hdot + constructor + · rw [hdomain]; simp + · intro j hj + have notin : j ∉ τ := by + by_contra hh; + have : j ∈ τ ∩ σ := by simp [hj, hh] + rw [p.eq_ine_not_intersect] at this; tauto + by_cases hj1 : j ∈ p.active_set x + · have jin : j ∈ σ ∩ (p.active_set x) := by simp [hj1, hj] + obtain h := hvh2 j jin + have hdot : 0 ≤ (gradient (p.inequality_constraints j) x).ofLp ⬝ᵥ v.ofLp := by + simpa [EuclideanSpace.inner_eq_star_dotProduct, dotProduct_comm] using h + have eq : p.inequality_constraints j (d nn) = + (nn : ℝ)⁻¹ * (gradient (p.inequality_constraints j) x).ofLp ⬝ᵥ v.ofLp := by + simpa [c, gradc, notin] using Axroweq ⟨j, hj1⟩ + rw [eq]; field_simp + rw [div_nonneg_iff]; left; exact ⟨hdot, by positivity⟩ + · specialize inactive j; simp [hj, hj1] at inactive + specialize inactive (d nn) + specialize dtendx nn (le_of_max_le_right hnn); rw [← dist_eq_norm] at dtendx + specialize inactive dtendx; linarith [inactive] + · have Mxbij : Function.Bijective Mx := ⟨Mxinj, Mxsurj⟩ + have htv : Tendsto (fun i : ℕ ↦ (i : ℝ) • (d i - x)) atTop (𝓝 v) := by + refine LICQ_tendsto v veq0 ?_ Rxeq0 hfd dtend Mxbij deriv; simp [Rt] + simpa [NNReal.smul_def] using htv theorem LICQ_linearized_feasible_directions_eq_posTangentCone (x : EuclideanSpace ℝ (Fin n)) (xf : x ∈ p.FeasSet) @@ -1179,9 +1054,9 @@ lemma subtype_sum (σ τ : Finset ℕ) (f : σ → EuclideanSpace ℝ (Fin n)) have : ∑ i, g i = ∑ i : {x // x ∈ σ ∩ τ}, f {val := i.1, property := by obtain hi := i.2; rw [Finset.mem_inter] at hi; exact hi.1} := by congr; ext i; rw [h2] - rw [this]; simp [h3] + rw [this]; simp let f₁ : ℕ → EuclideanSpace ℝ (Fin n):= fun i => if h : i ∈ σ then f ⟨i, h⟩ else 0 - have eq1 : ∑ i ∈ σ.attach, f i = ∑ i in σ, f₁ i := by + have eq1 : ∑ i ∈ σ.attach, f i = ∑ i ∈ σ, f₁ i := by simp [f₁]; nth_rw 2 [← Finset.sum_attach]; congr; simp have eq2 : ∑ i ∈ (σ ∩ τ).attach, f {val := i.1, property := by obtain hi := i.2; rw [Finset.mem_inter] at hi; exact hi.1} = @@ -1193,8 +1068,8 @@ lemma subtype_sum (σ τ : Finset ℕ) (f : σ → EuclideanSpace ℝ (Fin n)) obtain eq := Finset.sdiff_union_inter σ τ nth_rw 1 [← eq]; rw [Finset.sum_union]; simp have feq0 : ∀ x ∈ (σ \ τ), f₁ x = 0 := by - simp [f₁]; intro x _ xninτ - intro h; specialize h3 ⟨x, h⟩; apply h3; simp [xninτ] + simp [f₁]; intro x _ xninτ h + specialize h3 ⟨x, h⟩; apply h3; simp [xninτ] apply Finset.sum_eq_zero feq0 apply Finset.disjoint_sdiff_inter σ τ @@ -1244,13 +1119,19 @@ theorem first_order_neccessary_general (p1 : Constrained_OptimizationProblem (Eu intro i _; apply DifferentiableAt.const_mul; exact (hc1 i i.2) intro i _; apply DifferentiableAt.const_mul; exact (he1 i i.2) exact hf.differentiableAt - apply DifferentiableAt.sum; intro i _; apply DifferentiableAt.const_mul - exact (he1 i i.2) + convert (DifferentiableAt.sum (u := (Finset.univ : Finset τ)) + (A := fun i m => lam i * p1.equality_constraints (↑i) m) + (by intro i _; exact DifferentiableAt.const_mul (he1 i i.2) (lam i))) using 1 + ext m; simp [Finset.sum_apply] apply DifferentiableAt.sub hf.differentiableAt - apply DifferentiableAt.sum; intro i _; apply DifferentiableAt.const_mul - exact (he1 i i.2) - apply DifferentiableAt.sum; intro i _; apply DifferentiableAt.const_mul - exact (hc1 i i.2) + convert (DifferentiableAt.sum (u := (Finset.univ : Finset τ)) + (A := fun i m => lam i * p1.equality_constraints (↑i) m) + (by intro i _; exact DifferentiableAt.const_mul (he1 i i.2) (lam i))) using 1 + ext m; simp [Finset.sum_apply] + convert (DifferentiableAt.sum (u := (Finset.univ : Finset σ)) + (A := fun i m => mu1 i * p1.inequality_constraints (↑i) m) + (by intro i _; exact DifferentiableAt.const_mul (hc1 i i.2) (mu1 i))) using 1 + ext m; simp [Finset.sum_apply] constructor · intro j; simp [mu1] by_cases ht : j.1 ∈ p1.active_set loc @@ -1263,10 +1144,9 @@ theorem first_order_neccessary_general (p1 : Constrained_OptimizationProblem (Eu unfold active_set at heq simp at heq rcases heq with hl | hl - · obtain neq := p1.eq_ine_not_intersect - exfalso; - apply absurd neq; push_neg; - apply Finset.ne_empty_of_mem (a := j.1) (by simp [hl]) + · exfalso + have : j.1 ∈ τ ∩ σ := by simp [hl, j.2] + simp [p1.eq_ine_not_intersect] at this exact hl simp [ht] @@ -1297,14 +1177,14 @@ variable {n : ℕ} {x : EuclideanSpace ℝ (Fin n)} variable {τ σ : Finset ℕ} {p : Constrained_OptimizationProblem (EuclideanSpace ℝ (Fin n)) τ σ} theorem LinearCQ_linear_constraint_eq (x : EuclideanSpace ℝ (Fin n)) (Lx : p.LinearCQ x) : - ∀ i ∈ τ, ∃ a, ∃ b, (equality_constraints p i) = fun y ↦ (inner a y : ℝ) + b := by + ∀ i ∈ τ, ∃ a, ∃ b, (equality_constraints p i) = fun y ↦ (inner ℝ a y : ℝ) + b := by intro i hi simp [LinearCQ] at Lx obtain Lx := (Lx).1 i ((equality_constraint_active_set x) hi) hi exact (IsLinear_iff' _).mp Lx theorem LinearCQ_linear_constraint_gradient_eq (x : EuclideanSpace ℝ (Fin n)) (Lx : p.LinearCQ x) : - ∀ i ∈ τ, ∃ a, ∃ b, ((equality_constraints p i) = fun y ↦ (inner a y : ℝ) + b) ∧ + ∀ i ∈ τ, ∃ a, ∃ b, ((equality_constraints p i) = fun y ↦ (inner ℝ a y : ℝ) + b) ∧ gradient (equality_constraints p i) x = a := by intro i hi obtain ⟨a, b, hab⟩ := LinearCQ_linear_constraint_eq x Lx i hi @@ -1313,14 +1193,14 @@ theorem LinearCQ_linear_constraint_gradient_eq (x : EuclideanSpace ℝ (Fin n)) exact (gradient_of_inner_const x a).gradient theorem LinearCQ_linear_constraint_ineq (x : EuclideanSpace ℝ (Fin n)) (Lx : p.LinearCQ x) : - ∀ i ∈ p.active_set x ∩ σ, ∃ a, ∃ b, (inequality_constraints p i) = fun y ↦ (inner a y : ℝ) + b := by + ∀ i ∈ p.active_set x ∩ σ, ∃ a, ∃ b, (inequality_constraints p i) = fun y ↦ (inner ℝ a y : ℝ) + b := by intro i hi - simp only [LinearCQ, and_imp] at Lx + simp only [LinearCQ] at Lx obtain Lx := (Lx).2 i hi exact (IsLinear_iff' _).mp Lx theorem LinearCQ_linear_constraint_gradient_ineq (x : EuclideanSpace ℝ (Fin n)) (Lx : p.LinearCQ x) : - ∀ i ∈ p.active_set x ∩ σ, ∃ a, ∃ b, ((inequality_constraints p i) = fun y ↦ (inner a y : ℝ) + b) ∧ + ∀ i ∈ p.active_set x ∩ σ, ∃ a, ∃ b, ((inequality_constraints p i) = fun y ↦ (inner ℝ a y : ℝ) + b) ∧ gradient (inequality_constraints p i) x = a := by intro i hi obtain ⟨a, b, hab⟩ := LinearCQ_linear_constraint_ineq x Lx i hi @@ -1344,57 +1224,64 @@ theorem Linear_linearized_feasible_directions_eq_posTangentCone obtain ⟨t_, ht_, ht⟩ := inactive_constraint x v xf conti obtain ⟨hv1, hv2⟩ := hv let z := fun (k : ℕ) ↦ (t_ / (k + 1)) • v - simp [posTangentConeAt] - let c := fun (k : ℕ) ↦ (k + (1 : ℝ)) / t_ + rw [posTangentConeAt, mem_tangentConeAt_iff_exists_seq] + let c : ℕ → NNReal := fun k ↦ ⟨((k : ℝ) + 1) / t_, by positivity⟩ use c; use z constructor - · use 0; intro n hn - simp [FeasSet, FeasPoint]; constructor; - · constructor; rw [hdomain]; trivial - intro i hi - obtain ⟨a, c, ⟨hab, hg⟩⟩ := LinearCQ_linear_constraint_gradient_eq x Lx i hi - simp [FeasSet, FeasPoint] at xf - obtain ⟨⟨_, h2⟩, ⟨_, _⟩⟩ := xf - obtain h2 := h2 i hi; rw [← h2]; rw [hab]; simp only [RCLike.inner_apply, conj_trivial] - have : ⟪a, z n⟫_ℝ = 0 := by - obtain hv1 := hv1 i hi - rw [hg] at hv1 - simp only [z]; rw [inner_smul_right, hv1, mul_zero] - rw [inner_add_right, this, add_zero] - constructor; rw [hdomain]; trivial - intro j hj - by_cases hj1 : j ∈ p.active_set x - · obtain hj' := Finset.mem_inter_of_mem hj1 hj - obtain ⟨a, c, ⟨hab, hg⟩⟩ := LinearCQ_linear_constraint_gradient_ineq x Lx j hj' - simp [FeasSet, FeasPoint] at xf - have : ⟪a, z n⟫_ℝ ≥ 0 := by - obtain hv2 := hv2 j (Finset.mem_inter_of_mem hj hj1) - rw [hg] at hv2; simp only [z]; rw [inner_smul_right] - positivity - obtain ⟨⟨_, _⟩, ⟨_, h2⟩⟩ := xf - simp [active_set] at hj1; - have : j ∉ τ := by - by_contra hh; - have : j ∈ τ ∩ σ := by simp [hj, hh] - rw [p.eq_ine_not_intersect] at this; tauto - simp [this] at hj1 - rw [← hj1.2, hab]; simp only [RCLike.inner_apply, conj_trivial] - rw [inner_add_right] + · have hz0' : Tendsto (fun n : ℕ ↦ t_ / ((n : ℝ))) atTop (𝓝 0) := + tendsto_const_div_atTop_nhds_zero_nat t_ + have hz0 : Tendsto (fun n : ℕ ↦ t_ / ((n : ℝ) + 1)) atTop (𝓝 0) := by + simpa [Nat.cast_add, Nat.cast_one, add_assoc, add_comm, add_left_comm] using + (Filter.tendsto_add_atTop_iff_nat 1).2 hz0' + simpa [z] using hz0.smul_const v + · constructor + · refine Filter.eventually_atTop.2 ?_ + use 0 + intro n hn + simp [FeasSet, FeasPoint]; constructor; + · constructor; rw [hdomain]; trivial + intro i hi + obtain ⟨a, c, ⟨hab, hg⟩⟩ := LinearCQ_linear_constraint_gradient_eq x Lx i hi + simp [FeasSet, FeasPoint] at xf + obtain ⟨⟨_, h2⟩, ⟨_, _⟩⟩ := xf + obtain h2 := h2 i hi; rw [← h2]; rw [hab]; simp + have : ⟪a, z n⟫_ℝ = 0 := by + obtain hv1 := hv1 i hi + rw [hg] at hv1 + simp only [z]; rw [inner_smul_right, hv1, mul_zero] + rw [inner_add_right, this, add_zero] + constructor; rw [hdomain]; trivial + intro j hj + by_cases hj1 : j ∈ p.active_set x + · obtain hj' := Finset.mem_inter_of_mem hj1 hj + obtain ⟨a, c, ⟨hab, hg⟩⟩ := LinearCQ_linear_constraint_gradient_ineq x Lx j hj' + simp [FeasSet, FeasPoint] at xf + have : ⟪a, z n⟫_ℝ ≥ 0 := by + obtain hv2 := hv2 j (Finset.mem_inter_of_mem hj hj1) + rw [hg] at hv2; simp only [z]; rw [inner_smul_right] + positivity + obtain ⟨⟨_, _⟩, ⟨_, h2⟩⟩ := xf + simp [active_set] at hj1; + have : j ∉ τ := by + by_contra hh; + have : j ∈ τ ∩ σ := by simp [hj, hh] + rw [p.eq_ine_not_intersect] at this; tauto + simp [this] at hj1 + rw [← hj1.2, hab]; simp + rw [inner_add_right] + linarith + simp [z] + have : (t_ / (↑n + 1)) ∈ Icc 0 t_ := by + simp; constructor; positivity + apply div_le_self (by linarith) (by linarith) + obtain ht := ht _ this j (Finset.mem_sdiff.mpr ⟨hj, hj1⟩) linarith - simp [z] - have : (t_ / (↑n + 1)) ∈ Icc 0 t_ := by - simp; constructor; positivity - apply div_le_self (by linarith) (by linarith) - obtain ht := ht _ this j (Finset.mem_sdiff.mpr ⟨hj, hj1⟩) - linarith - constructor - · apply Filter.Tendsto.atTop_div_const ht_ - apply tendsto_atTop_add_nonneg_right' - · exact tendsto_natCast_atTop_atTop - apply Filter.Eventually.of_forall; exact fun x ↦ zero_le_one' ℝ - apply tendsto_atTop_of_eventually_const (i₀ := 1) - intro i hi; simp [c, z] - rw [smul_smul]; field_simp + · apply tendsto_atTop_of_eventually_const (i₀ := 1) + intro i hi + simp [c, z, NNReal.smul_def] + rw [smul_smul] + field_simp + simp theorem first_order_neccessary_LinearCQ (p1 : Constrained_OptimizationProblem (EuclideanSpace ℝ (Fin n)) τ σ) diff --git a/Optlib/Optimality/OptimalityConditionOfUnconstrainedProblem.lean b/Optlib/Optimality/OptimalityConditionOfUnconstrainedProblem.lean index 757ec1e..b3e05f7 100644 --- a/Optlib/Optimality/OptimalityConditionOfUnconstrainedProblem.lean +++ b/Optlib/Optimality/OptimalityConditionOfUnconstrainedProblem.lean @@ -18,7 +18,7 @@ open Set InnerProductSpace x with d is less than zero. -/ def DescentDirection (d : E) (x : E) (_ : HasGradientAt f (f' x) x) : Prop := - inner (f' x) d < (0 : ℝ) + inner ℝ (f' x) d < (0 : ℝ) /- For any vector d, there does not exist a descent direction for the function f @@ -30,19 +30,19 @@ theorem optimal_no_descent_direction (hf : ∀ x : E, HasGradientAt f (f' x) x) intro d by_contra h have : ∃ t : ℝ , f (xm + t • d) < f xm := by - have h₁ : ∃ T : ℝ , T > 0 ∧ (∀ a ∈ Icc (- T) T, inner (f' (xm + a • d)) d < (0 : ℝ)) := by - let g := fun r : ℝ ↦ (inner (f' (xm + r • d)) d : ℝ) - have hg0 : g 0 = inner (f' xm) d := by simp [g] + have h₁ : ∃ T : ℝ , T > 0 ∧ (∀ a ∈ Icc (- T) T, inner ℝ (f' (xm + a • d)) d < (0 : ℝ)) := by + let g := fun r : ℝ ↦ (inner ℝ (f' (xm + r • d)) d : ℝ) + have hg0 : g 0 = inner ℝ (f' xm) d := by simp [g] have hc : ContinuousOn g univ := by - simp [g] + change ContinuousOn (fun r : ℝ ↦ inner ℝ (f' (xm + r • d)) d) univ apply ContinuousOn.inner · apply ContinuousOn.comp hfc · apply ContinuousOn.add continuousOn_const apply ContinuousOn.smul continuousOn_id continuousOn_const · simp · exact continuousOn_const - have hu : ∃ u < (0 : ℝ) , inner (f' xm) d ≤ u := by - use (inner (f' xm) d / 2) + have hu : ∃ u < (0 : ℝ) , inner ℝ (f' xm) d ≤ u := by + use (inner ℝ (f' xm) d / 2) rw [DescentDirection] at h constructor · linarith @@ -72,7 +72,7 @@ theorem optimal_no_descent_direction (hf : ∀ x : E, HasGradientAt f (f' x) x) use T rcases h₁ with ⟨T, ⟨hT1,hT2⟩⟩ have h₂ : ∃ t1 : ℝ, t1 ≥ -T ∧ t1 ≤ T ∧ f (xm + T • d) = - f xm + inner (f' (xm + t1 • d)) (T • d) := by + f xm + inner ℝ (f' (xm + t1 • d)) (T • d) := by rcases (expansion hf xm (T • d)) with ⟨ts,⟨ts1,⟨ts2,ts3⟩⟩⟩ use (ts • T) constructor @@ -111,7 +111,7 @@ theorem first_order_unconstrained (hf : ∀ x : E, HasGradientAt f (f' x) x) (mi -/ theorem first_order_convex (hf : ∀ x : E, HasGradientAt f (f' x) x) (hcon : ConvexOn ℝ univ f) (hfm : f' xm = 0) : IsMinOn f univ xm := by - have : ∀ y , f y ≥ f xm + inner (f' xm) (y - xm) := by + have : ∀ y , f y ≥ f xm + inner ℝ (f' xm) (y - xm) := by intro y apply Convex_first_order_condition' (hf xm) hcon (by trivial) · trivial diff --git a/Optlib/Optimality/Weak_Duality.lean b/Optlib/Optimality/Weak_Duality.lean index 4be2b67..a315b20 100644 --- a/Optlib/Optimality/Weak_Duality.lean +++ b/Optlib/Optimality/Weak_Duality.lean @@ -51,13 +51,15 @@ variable {p : Constrained_OptimizationProblem E τ σ} {x : E} lemma empty_domain_inf_value_top {p : Constrained_OptimizationProblem E τ σ} (hp : (p.domain) = ∅) : p.inf_value = ⊤ := by - unfold inf_value - unfold FeasSet FeasPoint + unfold inf_value FeasSet FeasPoint simp [hp] + rfl lemma empty_FeasSet_inf_value_top {p : Constrained_OptimizationProblem E τ σ} (hp : (p.FeasSet) = ∅) : p.inf_value = ⊤ := by - unfold inf_value; simp [hp] + unfold inf_value + simp [hp] + rfl lemma objective_le_sup {p : Constrained_OptimizationProblem E τ σ} (x : E) (hx : x ∈ p.FeasSet) : (p.objective x).toEReal ≤ p.sup_value := by @@ -70,16 +72,18 @@ lemma dual_objective_le_top_nonempty {p : Constrained_OptimizationProblem E τ intro lambda1 lambda2 unfold dual_objective let x := Classical.choose hp - apply iInf_lt_top.mpr - use x; simp; apply Classical.choose_spec hp + refine iInf_lt_top.mpr ?_ + refine ⟨x, ?_⟩ + refine iInf_lt_top.mpr ?_ + exact ⟨Classical.choose_spec hp, by + exact EReal.coe_lt_top (p.Lagrange_function x lambda1 lambda2)⟩ lemma dual_objective_eq_top_empty {p : Constrained_OptimizationProblem E τ σ} (hp : (p.domain) = ∅) : ∀ lambda1 lambda2, p.dual_objective lambda1 lambda2 = ⊤ := by - intro lambda1 lambda2; unfold dual_objective - simp; intro x - by_contra h - have : x ∉ p.domain := by exact of_eq_false (congrFun hp x) - exact this h + intro lambda1 lambda2 + unfold dual_objective + simp [hp] + rfl lemma objective_infimum_global_minimum {p : Constrained_OptimizationProblem E τ σ} (hp : (p.objective x).toEReal = p.inf_value) (hx : x ∈ p.FeasSet) : @@ -153,8 +157,8 @@ theorem weak_duality {p : Constrained_OptimizationProblem E τ σ} theorem weak_duality_aux {p : Constrained_OptimizationProblem E τ σ} (hp : (p.domain).Nonempty) : (p.dual_problem).sup_value ≤ p.inf_value := by unfold sup_value dual_problem; simp - intro b x lambda1 lambda2 hl hl2 hl3 - rw [← hl3, ← hl2] + intro b lambda1 lambda2 hl hl2 + rw [← hl2] have : ((p.dual_objective lambda1 lambda2).toReal).toEReal = p.dual_objective lambda1 lambda2 := by apply EReal.coe_toReal @@ -175,7 +179,7 @@ theorem weak_duality' {p : Constrained_OptimizationProblem E τ σ} : · exact weak_duality_aux hp push_neg at hp rw [empty_domain_inf_value_top hp] - simp only [le_top] + exact le_top end WeakDuality @@ -185,10 +189,10 @@ variable {E : Type _} {τ σ : Finset ℕ} variable [NormedAddCommGroup E] [InnerProductSpace ℝ E] [CompleteSpace E] variable {p : Constrained_OptimizationProblem E τ σ} -lemma ConcaveOn.sum {α 𝕜 : Type*} [OrderedSemiring 𝕜] [AddCommMonoid α][SMul 𝕜 α] - {ι : Type*} [DecidableEq ι] {s : Finset ι} {t : s → α → 𝕜} {d : Set α} - (h : ∀ i : s, ConcaveOn 𝕜 d (t i)) (hd : Convex 𝕜 d): - ConcaveOn 𝕜 d (fun x => ∑ i : s, t i x) := by +lemma concaveOn_sum {α : Type*} [AddCommMonoid α] [SMul ℝ α] + {ι : Type*} [DecidableEq ι] {s : Finset ι} {t : s → α → ℝ} {d : Set α} + (h : ∀ i : s, ConcaveOn ℝ d (t i)) (hd : Convex ℝ d) : + ConcaveOn ℝ d (fun x => ∑ i : s, t i x) := by constructor · exact hd intro x hx y hy a b ha hb hab @@ -205,12 +209,12 @@ theorem convex_problem_convex_Lagrange {p : Constrained_OptimizationProblem E τ (lambda1 : τ → ℝ) (lambda2 : σ → ℝ) (hKKT : KKT_point p x lambda1 lambda2) : ConvexOn ℝ univ (fun m ↦ p.Lagrange_function m lambda1 lambda2) := by + subst hτ unfold Lagrange_function apply ConvexOn.sub · apply ConvexOn.sub h - simp [hτ]; apply concaveOn_const 0 - exact convex_univ - apply ConcaveOn.sum _ convex_univ + simpa using (concaveOn_const (𝕜 := ℝ) (s := (univ : Set E)) (0 : ℝ) convex_univ) + apply concaveOn_sum _ convex_univ intro i apply ConcaveOn.smul · unfold KKT_point at hKKT @@ -228,10 +232,10 @@ theorem diff_problem_diff_Lagrange {p : Constrained_OptimizationProblem E τ σ} · apply DifferentiableAt.sub · exact hf simp [hτ] - apply DifferentiableAt.sum - intro i _ - apply DifferentiableAt.const_mul _ (lambda2 i) - apply conti i i.2 + convert (DifferentiableAt.sum (u := (Finset.univ : Finset σ)) + (A := fun j m => lambda2 j * p.inequality_constraints (↑j) m) + (by intro i _; exact DifferentiableAt.const_mul (conti i i.2) (lambda2 i))) using 1 + ext m; simp [Finset.sum_apply] theorem KKT_multipliers_objective_eq_Lagrangian {p : Constrained_OptimizationProblem E τ σ} (x : E) (lambda1 : τ → ℝ) (lambda2 : σ → ℝ) diff --git a/lake-manifest.json b/lake-manifest.json index f0afaa7..38d48d5 100644 --- a/lake-manifest.json +++ b/lake-manifest.json @@ -1,52 +1,32 @@ {"version": "1.1.0", "packagesDir": ".lake/packages", "packages": - [{"url": "https://github.com/leanprover-community/batteries", + [{"url": "https://github.com/leanprover-community/mathlib4", "type": "git", "subDir": null, - "scope": "leanprover-community", - "rev": "31a10a332858d6981dbcf55d54ee51680dd75f18", - "name": "batteries", - "manifestFile": "lake-manifest.json", - "inputRev": "main", - "inherited": true, - "configFile": "lakefile.toml"}, - {"url": "https://github.com/leanprover-community/quote4", - "type": "git", - "subDir": null, - "scope": "leanprover-community", - "rev": "1357f4f49450abb9dfd4783e38219f4ce84f9785", - "name": "Qq", + "scope": "", + "rev": "5c8398df528176d9c87ccd9226ba8f7c8852d59c", + "name": "mathlib", "manifestFile": "lake-manifest.json", - "inputRev": "master", - "inherited": true, + "inputRev": "v4.29.0-rc6", + "inherited": false, "configFile": "lakefile.lean"}, - {"url": "https://github.com/leanprover-community/aesop", + {"url": "https://github.com/leanprover-community/plausible", "type": "git", "subDir": null, "scope": "leanprover-community", - "rev": "5f934891e11d70a1b86e302fdf9cecfc21e8de46", - "name": "aesop", + "rev": "e84e3e16aea6b72cc5d311ca1bb25caad417e162", + "name": "plausible", "manifestFile": "lake-manifest.json", - "inputRev": "master", + "inputRev": "main", "inherited": true, "configFile": "lakefile.toml"}, - {"url": "https://github.com/leanprover-community/ProofWidgets4", + {"url": "https://github.com/leanprover-community/LeanSearchClient", "type": "git", "subDir": null, "scope": "leanprover-community", - "rev": "23268f52d3505955de3c26a42032702c25cfcbf8", - "name": "proofwidgets", - "manifestFile": "lake-manifest.json", - "inputRev": "v0.0.44", - "inherited": true, - "configFile": "lakefile.lean"}, - {"url": "https://github.com/leanprover/lean4-cli", - "type": "git", - "subDir": null, - "scope": "leanprover", - "rev": "2cf1030dc2ae6b3632c84a09350b675ef3e347d0", - "name": "Cli", + "rev": "c5d5b8fe6e5158def25cd28eb94e4141ad97c843", + "name": "LeanSearchClient", "manifestFile": "lake-manifest.json", "inputRev": "main", "inherited": true, @@ -55,81 +35,61 @@ "type": "git", "subDir": null, "scope": "leanprover-community", - "rev": "984d7ee170b75d6b03c0903e0b750ee2c6d1e3fb", + "rev": "f207d9fcf0cef00ba79962a33ef156061914d9c7", "name": "importGraph", "manifestFile": "lake-manifest.json", "inputRev": "main", "inherited": true, "configFile": "lakefile.toml"}, - {"url": "https://github.com/leanprover-community/LeanSearchClient", + {"url": "https://github.com/leanprover-community/ProofWidgets4", "type": "git", "subDir": null, "scope": "leanprover-community", - "rev": "7bedaed1ef024add1e171cc17706b012a9a37802", - "name": "LeanSearchClient", + "rev": "2e58165a9dcdca9837b666528f974299ee1a51cc", + "name": "proofwidgets", "manifestFile": "lake-manifest.json", - "inputRev": "main", + "inputRev": "v0.0.92", "inherited": true, - "configFile": "lakefile.toml"}, - {"url": "https://github.com/leanprover-community/plausible", + "configFile": "lakefile.lean"}, + {"url": "https://github.com/leanprover-community/aesop", "type": "git", "subDir": null, "scope": "leanprover-community", - "rev": "d212dd74414e997653cd3484921f4159c955ccca", - "name": "plausible", + "rev": "c3361708f266893de5d1769192b60d4b1831f2bb", + "name": "aesop", "manifestFile": "lake-manifest.json", - "inputRev": "main", + "inputRev": "master", "inherited": true, "configFile": "lakefile.toml"}, - {"url": "https://github.com/leanprover-community/mathlib4", - "type": "git", - "subDir": null, - "scope": "", - "rev": "d7317655e2826dc1f1de9a0c138db2775c4bb841", - "name": "mathlib", - "manifestFile": "lake-manifest.json", - "inputRev": "v4.13.0", - "inherited": false, - "configFile": "lakefile.lean"}, - {"url": "https://github.com/acmepjz/md4lean", + {"url": "https://github.com/leanprover-community/quote4", "type": "git", "subDir": null, - "scope": "", - "rev": "5e95f4776be5e048364f325c7e9d619bb56fb005", - "name": "MD4Lean", + "scope": "leanprover-community", + "rev": "221e8088e3a066b8676dc471ff10638cf1c10835", + "name": "Qq", "manifestFile": "lake-manifest.json", - "inputRev": "main", + "inputRev": "master", "inherited": true, - "configFile": "lakefile.lean"}, - {"url": "https://github.com/fgdorais/lean4-unicode-basic", + "configFile": "lakefile.toml"}, + {"url": "https://github.com/leanprover-community/batteries", "type": "git", "subDir": null, - "scope": "", - "rev": "107e98b3e7603628d9bfd817b4704488d8a25e96", - "name": "UnicodeBasic", + "scope": "leanprover-community", + "rev": "bd58e3506632241b59e406902d5e42b73cdeccce", + "name": "batteries", "manifestFile": "lake-manifest.json", "inputRev": "main", "inherited": true, - "configFile": "lakefile.lean"}, - {"url": "https://github.com/dupuisf/BibtexQuery", + "configFile": "lakefile.toml"}, + {"url": "https://github.com/leanprover/lean4-cli", "type": "git", "subDir": null, - "scope": "", - "rev": "bdc2fc30b1e834b294759a5d391d83020a90058e", - "name": "BibtexQuery", + "scope": "leanprover", + "rev": "3de531c1135f5e3a01f3ac04830996fda476b28e", + "name": "Cli", "manifestFile": "lake-manifest.json", - "inputRev": "master", + "inputRev": "v4.29.0-rc6", "inherited": true, - "configFile": "lakefile.lean"}, - {"url": "https://github.com/leanprover/doc-gen4.git", - "type": "git", - "subDir": null, - "scope": "", - "rev": "c2156beadb1a4d049ff3b19fe396c5403025aac5", - "name": "«doc-gen4»", - "manifestFile": "lake-manifest.json", - "inputRev": "c2156beadb1a4d049ff3b19fe396c5403025aac5", - "inherited": false, - "configFile": "lakefile.lean"}], + "configFile": "lakefile.toml"}], "name": "optlib", "lakeDir": ".lake"} diff --git a/lakefile.lean b/lakefile.lean index 26f93ec..4e94bc9 100644 --- a/lakefile.lean +++ b/lakefile.lean @@ -10,7 +10,7 @@ package optlib where @[default_target] lean_lib Optlib where -require mathlib from git "https://github.com/leanprover-community/mathlib4"@"v4.13.0" +require mathlib from git "https://github.com/leanprover-community/mathlib4"@"v4.29.0-rc6" meta if get_config? env = some "CI_BUILD" then require «doc-gen4» from git diff --git a/lean-toolchain b/lean-toolchain index 4f86f95..87b20aa 100644 --- a/lean-toolchain +++ b/lean-toolchain @@ -1 +1 @@ -leanprover/lean4:v4.13.0 +leanprover/lean4:v4.29.0-rc6