-
Notifications
You must be signed in to change notification settings - Fork 17
Expand file tree
/
Copy pathOptimalityConditionOfUnconstrainedProblem.lean
More file actions
134 lines (127 loc) · 5.17 KB
/
OptimalityConditionOfUnconstrainedProblem.lean
File metadata and controls
134 lines (127 loc) · 5.17 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
/-
Copyright (c) 2023 Chenyi Li. All rights reserved.
Released under Apache 2.0 license as described in the file LICENSE.
Authors: Chenyi Li, Zaiwen Wen
-/
import Mathlib.Analysis.Convex.Basic
import Mathlib.Analysis.Calculus.MeanValue
import Mathlib.Analysis.InnerProductSpace.Positive
import Optlib.Convex.ConvexFunction
variable {E : Type*} [NormedAddCommGroup E] [InnerProductSpace ℝ E] [CompleteSpace E]
variable {x xm : E} {f : E → ℝ} {f' : E → E} {s : Set E}
open Set InnerProductSpace
/-
A vector d is considered a descent direction at a point x if the inner product of the gradient at
x with d is less than zero.
-/
def DescentDirection (d : E) (x : E) (_ : HasGradientAt f (f' x) x) : Prop :=
inner (f' x) d < (0 : ℝ)
/-
For any vector d, there does not exist a descent direction for the function f
at the minimum point xm.
-/
theorem optimal_no_descent_direction (hf : ∀ x : E, HasGradientAt f (f' x) x)
(min : IsMinOn f univ xm) (hfc : ContinuousOn f' univ) :
∀ d : E, ¬ (DescentDirection d xm (hf xm)) := by
intro d
by_contra h
have : ∃ t : ℝ , f (xm + t • d) < f xm := by
have h₁ : ∃ T : ℝ , T > 0 ∧ (∀ a ∈ Icc (- T) T, inner (f' (xm + a • d)) d < (0 : ℝ)) := by
let g := fun r : ℝ ↦ (inner (f' (xm + r • d)) d : ℝ)
have hg0 : g 0 = inner (f' xm) d := by simp [g]
have hc : ContinuousOn g univ := by
simp [g]
apply ContinuousOn.inner
· apply ContinuousOn.comp hfc
· apply ContinuousOn.add continuousOn_const
apply ContinuousOn.smul continuousOn_id continuousOn_const
· simp
· exact continuousOn_const
have hu : ∃ u < (0 : ℝ) , inner (f' xm) d ≤ u := by
use (inner (f' xm) d / 2)
rw [DescentDirection] at h
constructor
· linarith
· rw [le_div_iff₀ (by norm_num)]; linarith
rcases hu with ⟨u, ⟨hu1, hu2⟩⟩
rw [← hg0] at hu2
have hc' : ∃ T, T > 0 ∧ (∀ a ∈ Icc (- T) T, g a < 0) := by
have : univ ∈ nhds (0 : ℝ) := by simp
rcases continuous (ContinuousOn.continuousAt hc this) with h1
specialize h1 (- u / 2) (by linarith)
rcases h1 with ⟨T, ⟨hT1, hT2⟩⟩
use T / 2; constructor
· linarith
· intro a ha
have : ‖a - 0‖ < T := by
simp; rw [abs_lt]; simp at ha
rcases ha with ⟨ha1, ha2⟩
constructor; linarith; linarith
specialize hT2 a this
simp at hT2
rw [abs_lt] at hT2
rcases hT2 with ⟨_, hs2⟩
rw [sub_lt_iff_lt_add] at hs2
apply lt_trans hs2
· linarith
rcases hc' with ⟨T, ⟨hT1, hT2⟩⟩
use T
rcases h₁ with ⟨T, ⟨hT1,hT2⟩⟩
have h₂ : ∃ t1 : ℝ, t1 ≥ -T ∧ t1 ≤ T ∧ f (xm + T • d) =
f xm + inner (f' (xm + t1 • d)) (T • d) := by
rcases (expansion hf xm (T • d)) with ⟨ts,⟨ts1,⟨ts2,ts3⟩⟩⟩
use (ts • T)
constructor
· simp; apply le_trans
· show -T ≤ 0
linarith
· simp [hT1]; linarith
· constructor
· simp [hT1]; linarith
· rw [smul_assoc]; exact ts3
rcases h₂ with ⟨t1, ⟨ht1, ⟨ht2, ht3⟩⟩⟩
use T; rw [ht3]; simp; rw [inner_smul_right]
simp at ht1
exact mul_neg_of_pos_of_neg hT1 (hT2 t1 ⟨ht1, ht2⟩)
rcases this with ⟨t, ht⟩
have : f (xm + t • d) ≥ f xm := min trivial
linarith
/-
Suppose we have a function f defined on a set E, such that for every point x in E,
f has a gradient f'(x) at x. Let xm be a point in E where f attains its minimum on the entire set.
Assume that f' is continuous on the entire set, then the gradient of f at xm equals 0.
-/
theorem first_order_unconstrained (hf : ∀ x : E, HasGradientAt f (f' x) x) (min : IsMinOn f univ xm)
(hfc : ContinuousOn f' univ) : f' xm = 0 := by
by_contra h
have h1: DescentDirection (-f' xm) xm (hf xm) := by
rw [DescentDirection, inner_neg_right, Left.neg_neg_iff]
rw [real_inner_self_eq_norm_sq]
simp [h]
exact (optimal_no_descent_direction hf min hfc (- f' xm)) h1
/-
If a function f is convex and has a gradient at every point, and if xm is a
point where the gradient is zero, then xm is a minimum point for f.
-/
theorem first_order_convex (hf : ∀ x : E, HasGradientAt f (f' x) x) (hcon : ConvexOn ℝ univ f)
(hfm : f' xm = 0) : IsMinOn f univ xm := by
have : ∀ y , f y ≥ f xm + inner (f' xm) (y - xm) := by
intro y
apply Convex_first_order_condition' (hf xm) hcon (by trivial)
· trivial
intro y
dsimp; specialize this y
rw [hfm, inner_zero_left, add_zero] at this
exact fun _ => this
/-
If f has a gradient at every point and f is convex, and the derivative of f is continuous
then a point xm is a minimum point of f if and only if the derivative of f at xm is equal to zero.
-/
theorem first_order_convex_iff (hf : ∀ x : E, HasGradientAt f (f' x) x) (hcon : ConvexOn ℝ univ f)
(hfc : ContinuousOn f' univ) :
IsMinOn f univ xm ↔ f' xm = 0 := by
constructor
· intro hmin
exact first_order_unconstrained hf hmin hfc
· intro hgrad
apply first_order_convex hf hcon hgrad