optlib/Optlib/Optimality/OptimalityConditionOfUnconstrainedProblem.lean at main · optsuite/optlib · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
/-
Copyright (c) 2023 Chenyi Li. All rights reserved.
Released under Apache 2.0 license as described in the file LICENSE.
Authors: Chenyi Li, Zaiwen Wen
-/
import Mathlib.Analysis.Convex.Basic
import Mathlib.Analysis.Calculus.MeanValue
import Mathlib.Analysis.InnerProductSpace.Positive
import Optlib.Convex.ConvexFunction

variable {E : Type*} [NormedAddCommGroup E] [InnerProductSpace ℝ E] [CompleteSpace E]
variable {x xm : E} {f : E → ℝ} {f' : E → E} {s : Set E}

open Set InnerProductSpace

/-
  A vector d is considered a descent direction at a point x if the inner product of the gradient at
  x with d is less than zero.
-/
def DescentDirection (d : E) (x : E) (_ : HasGradientAt f (f' x) x) : Prop :=
  inner (f' x) d < (0 : ℝ)

/-
  For any vector d, there does not exist a descent direction for the function f
  at the minimum point xm.
-/
theorem optimal_no_descent_direction (hf : ∀ x : E, HasGradientAt f (f' x) x)
    (min : IsMinOn f univ xm) (hfc : ContinuousOn f' univ) :
    ∀ d : E, ¬ (DescentDirection d xm (hf xm)) := by
  intro d
  by_contra h
  have : ∃ t : ℝ , f (xm + t • d) < f xm := by
    have h₁ : ∃ T : ℝ , T > 0 ∧ (∀ a ∈ Icc (- T) T, inner (f' (xm + a • d)) d < (0 : ℝ)) := by
      let g := fun r : ℝ ↦ (inner (f' (xm + r • d)) d : ℝ)
      have hg0 : g 0 = inner (f' xm) d := by simp [g]
      have hc : ContinuousOn g univ := by
        simp [g]
        apply ContinuousOn.inner
        · apply ContinuousOn.comp hfc
          · apply ContinuousOn.add continuousOn_const
            apply ContinuousOn.smul continuousOn_id continuousOn_const
          · simp
        · exact continuousOn_const
      have hu : ∃ u < (0 : ℝ) , inner (f' xm) d ≤  u := by
        use (inner (f' xm) d / 2)
        rw [DescentDirection] at h
        constructor
        · linarith
        · rw [le_div_iff₀ (by norm_num)]; linarith
      rcases hu with ⟨u, ⟨hu1, hu2⟩⟩
      rw [← hg0] at hu2
      have hc' : ∃ T, T > 0 ∧ (∀ a ∈ Icc (- T) T, g a < 0) := by
        have : univ ∈ nhds (0 : ℝ) := by simp
        rcases continuous (ContinuousOn.continuousAt hc this) with h1
        specialize h1 (- u / 2) (by linarith)
        rcases h1 with ⟨T, ⟨hT1, hT2⟩⟩
        use T / 2; constructor
        · linarith
        · intro a ha
          have : ‖a - 0‖ < T := by
            simp; rw [abs_lt]; simp at ha
            rcases ha with ⟨ha1, ha2⟩
            constructor; linarith; linarith
          specialize hT2 a this
          simp at hT2
          rw [abs_lt] at hT2
          rcases hT2 with ⟨_, hs2⟩
          rw [sub_lt_iff_lt_add] at hs2
          apply lt_trans hs2
          · linarith
      rcases hc' with ⟨T, ⟨hT1, hT2⟩⟩
      use T
    rcases h₁ with ⟨T, ⟨hT1,hT2⟩⟩
    have h₂ : ∃ t1 : ℝ, t1 ≥ -T ∧ t1 ≤ T ∧ f (xm + T • d) =
        f xm + inner (f' (xm + t1 • d)) (T • d) := by
      rcases (expansion hf xm (T • d)) with ⟨ts,⟨ts1,⟨ts2,ts3⟩⟩⟩
      use (ts • T)
      constructor
      · simp; apply le_trans
        · show -T ≤ 0
          linarith
        · simp [hT1]; linarith
      · constructor
        · simp [hT1]; linarith
        · rw [smul_assoc]; exact ts3
    rcases h₂ with ⟨t1, ⟨ht1, ⟨ht2, ht3⟩⟩⟩
    use T; rw [ht3]; simp; rw [inner_smul_right]
    simp at ht1
    exact mul_neg_of_pos_of_neg hT1 (hT2 t1 ⟨ht1, ht2⟩)
  rcases this with ⟨t, ht⟩
  have : f (xm + t • d) ≥ f xm := min trivial
  linarith

/-
  Suppose we have a function f defined on a set E, such that for every point x in E,
  f has a gradient f'(x) at x. Let xm be a point in E where f attains its minimum on the entire set.
  Assume that f' is continuous on the entire set, then the gradient of f at xm equals 0.
-/
theorem first_order_unconstrained (hf : ∀ x : E, HasGradientAt f (f' x) x) (min : IsMinOn f univ xm)
    (hfc : ContinuousOn f' univ) : f' xm = 0 := by
  by_contra h
  have h1: DescentDirection (-f' xm) xm (hf xm) := by
    rw [DescentDirection, inner_neg_right, Left.neg_neg_iff]
    rw [real_inner_self_eq_norm_sq]
    simp [h]
  exact (optimal_no_descent_direction hf min hfc (- f' xm)) h1

/-
  If a function f is convex and has a gradient at every point, and if xm is a
  point where the gradient is zero, then xm is a minimum point for f.
-/
theorem first_order_convex (hf : ∀ x : E, HasGradientAt f (f' x) x) (hcon : ConvexOn ℝ univ f)
    (hfm : f' xm = 0) : IsMinOn f univ xm := by
  have : ∀ y , f y ≥ f xm + inner (f' xm) (y - xm) := by
    intro y
    apply Convex_first_order_condition' (hf xm) hcon (by trivial)
    · trivial
  intro y
  dsimp; specialize this y
  rw [hfm, inner_zero_left, add_zero] at this
  exact fun _ => this

/-
  If f has a gradient at every point and f is convex, and the derivative of f is continuous
  then a point xm is a minimum point of f if and only if the derivative of f at xm is equal to zero.
-/
theorem first_order_convex_iff (hf : ∀ x : E, HasGradientAt f (f' x) x) (hcon : ConvexOn ℝ univ f)
    (hfc : ContinuousOn f' univ) :
    IsMinOn f univ xm ↔ f' xm = 0 := by
  constructor
  · intro hmin
    exact first_order_unconstrained hf hmin hfc
  · intro hgrad
    apply first_order_convex hf hcon hgrad