bfgs_linesearch.hpp
Go to the documentation of this file.
1 #ifndef STAN_OPTIMIZATION_BFGS_LINESEARCH_HPP
2 #define STAN_OPTIMIZATION_BFGS_LINESEARCH_HPP
3 
4 #include <boost/math/special_functions/fpclassify.hpp>
5 #include <algorithm>
6 #include <cmath>
7 #include <cstdlib>
8 #include <string>
9 #include <limits>
10 
11 namespace stan {
12  namespace optimization {
13  /**
14  * Find the minima in an interval [loX, hiX] of a cubic function which
15  * interpolates the points, function values and gradients provided.
16  *
17  * Implicitly, this function constructs an interpolating polynomial
18  * g(x) = a_3 x^3 + a_2 x^2 + a_1 x + a_0
19  * such that g(0) = 0, g(x1) = f1, g'(0) = df0, g'(x1) = df1 where
20  * g'(x) = 3 a_3 x^2 + 2 a_2 x + a_1
21  * is the derivative of g(x). It then computes the roots of g'(x) and
22  * finds the minimal value of g(x) on the interval [loX,hiX] including
23  * the end points.
24  *
25  * This function implements the full parameter version of CubicInterp().
26  *
27  * @param df0 First derivative value, f'(x0)
28  * @param x1 Second point
29  * @param f1 Second function value, f(x1)
30  * @param df1 Second derivative value, f'(x1)
31  * @param loX Lower bound on the interval of solutions
32  * @param hiX Upper bound on the interval of solutions
33  **/
34  template<typename Scalar>
35  Scalar CubicInterp(const Scalar &df0,
36  const Scalar &x1, const Scalar &f1, const Scalar &df1,
37  const Scalar &loX, const Scalar &hiX) {
38  const Scalar c3((-12*f1 + 6*x1*(df0 + df1))/(x1*x1*x1));
39  const Scalar c2(-(4*df0 + 2*df1)/x1 + 6*f1/(x1*x1));
40  const Scalar &c1(df0);
41 
42  const Scalar t_s = std::sqrt(c2*c2 - 2.0*c1*c3);
43  const Scalar s1 = - (c2 + t_s)/c3;
44  const Scalar s2 = - (c2 - t_s)/c3;
45 
46  Scalar tmpF;
47  Scalar minF, minX;
48 
49  // Check value at lower bound
50  minF = loX*(loX*(loX*c3/3.0 + c2)/2.0 + c1);
51  minX = loX;
52 
53  // Check value at upper bound
54  tmpF = hiX*(hiX*(hiX*c3/3.0 + c2)/2.0 + c1);
55  if (tmpF < minF) {
56  minF = tmpF;
57  minX = hiX;
58  }
59 
60  // Check value of first root
61  if (loX < s1 && s1 < hiX) {
62  tmpF = s1*(s1*(s1*c3/3.0 + c2)/2.0 + c1);
63  if (tmpF < minF) {
64  minF = tmpF;
65  minX = s1;
66  }
67  }
68 
69  // Check value of second root
70  if (loX < s2 && s2 < hiX) {
71  tmpF = s2*(s2*(s2*c3/3.0 + c2)/2.0 + c1);
72  if (tmpF < minF) {
73  minF = tmpF;
74  minX = s2;
75  }
76  }
77 
78  return minX;
79  }
80 
81  /**
82  * Find the minima in an interval [loX, hiX] of a cubic function which
83  * interpolates the points, function values and gradients provided.
84  *
85  * Implicitly, this function constructs an interpolating polynomial
86  * g(x) = a_3 x^3 + a_2 x^2 + a_1 x + a_0
87  * such that g(x0) = f0, g(x1) = f1, g'(x0) = df0, g'(x1) = df1 where
88  * g'(x) = 3 a_3 x^2 + 2 a_2 x + a_1
89  * is the derivative of g(x). It then computes the roots of g'(x) and
90  * finds the minimal value of g(x) on the interval [loX,hiX] including
91  * the end points.
92  *
93  * @param x0 First point
94  * @param f0 First function value, f(x0)
95  * @param df0 First derivative value, f'(x0)
96  * @param x1 Second point
97  * @param f1 Second function value, f(x1)
98  * @param df1 Second derivative value, f'(x1)
99  * @param loX Lower bound on the interval of solutions
100  * @param hiX Upper bound on the interval of solutions
101  **/
102  template<typename Scalar>
103  Scalar CubicInterp(const Scalar &x0, const Scalar &f0, const Scalar &df0,
104  const Scalar &x1, const Scalar &f1, const Scalar &df1,
105  const Scalar &loX, const Scalar &hiX) {
106  return x0 + CubicInterp(df0, x1-x0, f1-f0, df1, loX-x0, hiX-x0);
107  }
108 
109  /**
110  * An internal utility function for implementing WolfeLineSearch()
111  **/
112  template<typename FunctorType, typename Scalar, typename XType>
113  int WolfLSZoom(Scalar &alpha, XType &newX, Scalar &newF, XType &newDF,
114  FunctorType &func,
115  const XType &x, const Scalar &f, const Scalar &dfp,
116  const Scalar &c1dfp, const Scalar &c2dfp, const XType &p,
117  Scalar alo, Scalar aloF, Scalar aloDFp,
118  Scalar ahi, Scalar ahiF, Scalar ahiDFp,
119  const Scalar &min_range) {
120  Scalar d1, d2, newDFp;
121  int itNum(0);
122 
123  while (1) {
124  itNum++;
125 
126  if (std::fabs(alo-ahi) < min_range)
127  return 1;
128 
129  if (itNum%5 == 0) {
130  alpha = 0.5*(alo+ahi);
131  } else {
132  // Perform cubic interpolation to determine next point to try
133  d1 = aloDFp + ahiDFp - 3*(aloF-ahiF)/(alo-ahi);
134  d2 = std::sqrt(d1*d1 - aloDFp*ahiDFp);
135  if (ahi < alo)
136  d2 = -d2;
137  alpha = ahi
138  - (ahi - alo) * (ahiDFp + d2 - d1) / (ahiDFp - aloDFp + 2*d2);
139  if (!boost::math::isfinite(alpha) ||
140  alpha < std::min(alo, ahi) + 0.01 * std::fabs(alo - ahi) ||
141  alpha > std::max(alo, ahi) - 0.01 * std::fabs(alo - ahi))
142  alpha = 0.5 * (alo + ahi);
143  }
144 
145  newX = x + alpha * p;
146  while (func(newX, newF, newDF)) {
147  alpha = 0.5 * (alpha + std::min(alo, ahi));
148  if (std::fabs(std::min(alo, ahi) - alpha) < min_range)
149  return 1;
150  newX = x + alpha * p;
151  }
152  newDFp = newDF.dot(p);
153  if (newF > (f + alpha * c1dfp) || newF >= aloF) {
154  ahi = alpha;
155  ahiF = newF;
156  ahiDFp = newDFp;
157  } else {
158  if (std::fabs(newDFp) <= -c2dfp)
159  break;
160  if (newDFp*(ahi-alo) >= 0) {
161  ahi = alo;
162  ahiF = aloF;
163  ahiDFp = aloDFp;
164  }
165  alo = alpha;
166  aloF = newF;
167  aloDFp = newDFp;
168  }
169  }
170  return 0;
171  }
172 
173  /**
174  * Perform a line search which finds an approximate solution to:
175  * \f[
176  * \min_\alpha f(x_0 + \alpha p)
177  * \f]
178  * satisfying the strong Wolfe conditions:
179  * 1) \f$ f(x_0 + \alpha p) \leq f(x_0) + c_1 \alpha p^T g(x_0) \f$
180  * 2) \f$ \vert p^T g(x_0 + \alpha p) \vert \leq c_2 \vert p^T g(x_0) \vert \f$
181  * where \f$g(x) = \frac{\partial f}{\partial x}\f$ is the gradient of f(x).
182  *
183  * @tparam FunctorType A type which supports being called as
184  * ret = func(x,f,g)
185  * where x is the input point, f and g are the function value and
186  * gradient at x and ret is non-zero if function evaluation fails.
187  *
188  * @param func Function which is being minimized.
189  *
190  * @param alpha First value of \f$ \alpha \f$ to try. Upon return this
191  * contains the final value of the \f$ \alpha \f$.
192  *
193  * @param x1 Final point, equal to \f$ x_0 + \alpha p \f$.
194  *
195  * @param f1 Final point function value, equal to \f$ f(x_0 + \alpha p) \f$.
196  *
197  * @param gradx1 Final point gradient, equal to \f$ g(x_0 + \alpha p) \f$.
198  *
199  * @param p Search direction. It is assumed to be a descent direction such
200  * that \f$ p^T g(x_0) < 0 \f$.
201  *
202  * @param x0 Value of starting point, \f$ x_0 \f$.
203  *
204  * @param f0 Value of function at starting point, \f$ f(x_0) \f$.
205  *
206  * @param gradx0 Value of function gradient at starting point,
207  * \f$ g(x_0) \f$.
208  *
209  * @param c1 Parameter of the Wolfe conditions. \f$ 0 < c_1 < c_2 < 1 \f$
210  * Typically c1 = 1e-4.
211  *
212  * @param c2 Parameter of the Wolfe conditions. \f$ 0 < c_1 < c_2 < 1 \f$
213  * Typically c2 = 0.9.
214  *
215  * @param minAlpha Smallest allowable step-size.
216  *
217  * @return Returns zero on success, non-zero otherwise.
218  **/
219  template<typename FunctorType, typename Scalar, typename XType>
220  int WolfeLineSearch(FunctorType &func,
221  Scalar &alpha,
222  XType &x1, Scalar &f1, XType &gradx1,
223  const XType &p,
224  const XType &x0, const Scalar &f0, const XType &gradx0,
225  const Scalar &c1, const Scalar &c2,
226  const Scalar &minAlpha) {
227  const Scalar dfp(gradx0.dot(p));
228  const Scalar c1dfp(c1*dfp);
229  const Scalar c2dfp(c2*dfp);
230 
231  Scalar alpha0(minAlpha);
232  Scalar alpha1(alpha);
233 
234  Scalar prevF(f0);
235  XType prevDF(gradx0);
236  Scalar prevDFp(dfp);
237  Scalar newDFp;
238 
239  int retCode = 0, nits = 0, ret;
240 
241  while (1) {
242  x1.noalias() = x0 + alpha1 * p;
243  ret = func(x1, f1, gradx1);
244  if (ret != 0) {
245  alpha1 = 0.5 * (alpha0 + alpha1);
246  continue;
247  }
248  newDFp = gradx1.dot(p);
249  if ((f1 > f0 + alpha * c1dfp) || (f1 >= prevF && nits > 0)) {
250  retCode = WolfLSZoom(alpha, x1, f1, gradx1,
251  func,
252  x0, f0, dfp,
253  c1dfp, c2dfp, p,
254  alpha0, prevF, prevDFp,
255  alpha1, f1, newDFp,
256  1e-16);
257  break;
258  }
259  if (std::fabs(newDFp) <= -c2dfp) {
260  alpha = alpha1;
261  break;
262  }
263  if (newDFp >= 0) {
264  retCode = WolfLSZoom(alpha, x1, f1, gradx1,
265  func,
266  x0, f0, dfp,
267  c1dfp, c2dfp, p,
268  alpha1, f1, newDFp,
269  alpha0, prevF, prevDFp,
270  1e-16);
271  break;
272  }
273 
274  alpha0 = alpha1;
275  prevF = f1;
276  std::swap(prevDF, gradx1);
277  prevDFp = newDFp;
278 
279  alpha1 *= 10.0;
280 
281  nits++;
282  }
283  return retCode;
284  }
285  }
286 }
287 
288 #endif
T max(const caf::Proxy< T > &a, T b)
bool isfinite(const stan::math::var &v)
fvar< T > fabs(const fvar< T > &x)
Definition: fabs.hpp:15
Float_t x1[n_points_granero]
Definition: compare.C:5
const char * p
Definition: xmltok.h:285
T sqrt(T number)
Definition: d0nt_math.hpp:156
int WolfeLineSearch(FunctorType &func, Scalar &alpha, XType &x1, Scalar &f1, XType &gradx1, const XType &p, const XType &x0, const Scalar &f0, const XType &gradx0, const Scalar &c1, const Scalar &c2, const Scalar &minAlpha)
void swap(art::HLTGlobalStatus &lhs, art::HLTGlobalStatus &rhs)
c2
Definition: demo5.py:33
Float_t f1
double func(double x, double y)
Scalar CubicInterp(const Scalar &df0, const Scalar &x1, const Scalar &f1, const Scalar &df1, const Scalar &loX, const Scalar &hiX)
int WolfLSZoom(Scalar &alpha, XType &newX, Scalar &newF, XType &newDF, FunctorType &func, const XType &x, const Scalar &f, const Scalar &dfp, const Scalar &c1dfp, const Scalar &c2dfp, const XType &p, Scalar alo, Scalar aloF, Scalar aloDFp, Scalar ahi, Scalar ahiF, Scalar ahiDFp, const Scalar &min_range)
c1
Definition: demo5.py:24
T min(const caf::Proxy< T > &a, T b)
Float_t e
Definition: plot.C:35