-
Notifications
You must be signed in to change notification settings - Fork 2
/
linesearch.py
166 lines (159 loc) · 4.58 KB
/
linesearch.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
import numpy as np
from numpy.linalg import norm
def linesearch(obj, g, x, p, maxiter=200, alpha=1, c1=1e-4, c2=0.9):
"""
INPUTS:
f: a function handle of objective value
g: starting gradient gk
x: starting x
p: direction p
maxiter: maximum iteration of Armijo line-search
alpha: initial step-size
c1: parameter of Armijo line-search
c2: parameter of strong Wolfe condition
OUTPUTS:
x: results
alpha: proper step-size
T: iterations
"""
T = 0
fa = obj(x + alpha*p, 'f')
fk = obj(x, 'f')
while fa > fk+alpha*c1*np.matmul(g.T,p) and T < maxiter:
alpha *= 0.5
fa = obj(x + alpha*p, 'f')
T += 1
# if alpha < 1E-6:
# alpha = 0 # Kill small alpha
x += alpha*p
return x, alpha, T
def linesearchgrad(obj, H, x, p, maxiter=200, alpha=1, c1=1e-4):
"""
INPUTS:
g: a function handle of gradient
H: Hessian Hk vector product
x: starting x
p: direction p
maxiter: maximum iteration of Armijo line-search
alpha: initial step-size
c1: parameter of Armijo line-search
OUTPUTS:
x: results
alpha: proper step-size
T: iterations
"""
T = 0
ga = obj(x + alpha*p, 'g')
gk = obj(x, 'g')
while norm(ga)**2 > norm(gk)**2 + 2*alpha*c1*np.matmul(
p.T, Ax(H, gk)) and T < maxiter:
alpha *= 0.5
ga = obj(x + alpha*p, 'g')
T += 1
# if alpha < 1E-6:
# alpha = 0 # Kill small alpha
x += alpha*p
return x, alpha, T
def linesearchzoom(obj, x, p, maxiter=200, c1=1e-4, c2=0.9, alpha0=1):
"""
All vector are column vectors.
INPUTS:
fg: a function handle of both objective function and its gradient
x: starting x
p: direction p
maxiter: maximum iteration of line search with strong Wolfe
c1: parameter of Armijo line-search
c2: parameter of strong Wolfe condition
alpha0: initial step-size
OUTPUTS:
x: results
alpha: proper step-size
T: iterations
"""
#phi(x) = f(x), phi'(x) = g(x).T.dot(p)
itrs = 0
itrs2 = 0
a1 = 0
# a2 = infun(0, amax)
a2 = alpha0
f0, g0 = obj(x, 'fg')
fb = f0 # f preview
while itrs < maxiter:
fa, ga = obj(x + a2*p, 'fg')
g0p = g0.T.dot(p)
if fa > f0 + a2*c1*g0p or (fa >= fb and itrs > 0):
alpha, itrs2 = zoomf(a1, a2, obj, f0, fa, g0p, x, p, c1, c2, itrs, maxiter)
break
gap = ga.T.dot(p)
if abs(gap) <= -c2*g0p:
alpha = a2
itrs2 = 0
break
if gap >= 0:
alpha, itrs2 = zoomf(a2, a1, obj, f0, fa, g0p, x, p, c1, c2, itrs, maxiter)
# alpha, itrs2 = zoomf(a1, a2, fg, f0, fa, g0p, x, p, c1, c2, itrs, maxiter)
break
a2 = a2*2
fb = fa
itrs += 1
itrs += itrs2
# if itrs >= maxiter:
# alpha = 0
return alpha, itrs
def zoomf(a1, a2, obj, f0, fa, g0p, x, p, c1, c2, itrs, maxiter):
itrs2 = 0
while (itrs2 + itrs) < maxiter :
#quadratic
itrs2 += 1
# lower bound
fa1, ga1 = obj(x + a1*p, 'fg')
ga1p = ga1.T.dot(p)
# upper bound
fa2, ga2 = obj(x + a2*p, 'fg')
ga2p = ga2.T.dot(p)
aj = cubicInterp(a1, a2, fa1, fa2, ga1p, ga2p)
a_mid = (a1 + a2)/2
if not inside(aj, a1, a_mid):
aj = a_mid
faj, gaj = obj(x + aj*p, 'fg')
if faj > f0 + aj*c1*g0p or faj >= fa1:
a2 = aj
else:
gajp = gaj.T.dot(p)
if np.abs(gajp) <= -c2*g0p:
break
if gajp*(a2 - a1) >= 0:
a2 = a1
a1 = aj
return aj, itrs2
def cubicInterp(x1, x2, f1, f2, g1, g2):
"""
find minimizer of the Hermite-cubic polynomial interpolating a
function of one variable, at the two points x1 and x2, using the
function (f1 and f2) and derivative (g1 and g2).
"""
# Nocedal and Wright Eqn (3.59)
d1 = g1 + g2 - 3*(f1 - f2)/(x1 - x2)
d2 = np.sign(x2 - x1)*np.sqrt(d1**2 - g1*g2)
xmin = x2 - (x2 - x1)*(g2 + d2 - d1)/(g2 - g1 + 2*d2);
return xmin
def inside(x, a, b):
"""
test x \in (a, b) or not
"""
l = 0
if not np.isreal(x):
return l
if a <= b:
if x >= a and x <= b:
l = 1
else:
if x >= b and x <= a:
l = 1
return l
def Ax(A, x):
if callable(A):
Ax = A(x)
else:
Ax =A.dot(x)
return Ax