Classes | Typedefs | Enumerations | Functions
stan::optimization Namespace Reference

Classes

class  BFGSLineSearch
 
class  BFGSMinimizer
 
class  BFGSUpdate_HInv
 
class  ConvergenceOptions
 
class  LBFGSUpdate
 
class  LSOptions
 
class  mock_lbfgs_update
 
class  ModelAdaptor
 

Typedefs

typedef Eigen::Matrix< double, Eigen::Dynamic, Eigen::Dynamic > matrix_d
 
typedef Eigen::Matrix< double, Eigen::Dynamic, 1 > vector_d
 

Enumerations

enum  TerminationCondition {
  TERM_SUCCESS = 0, TERM_ABSX = 10, TERM_ABSF = 20, TERM_RELF = 21,
  TERM_ABSGRAD = 30, TERM_RELGRAD = 31, TERM_MAXIT = 40, TERM_LSFAIL = -1
}
 

Functions

template<typename Scalar >
Scalar CubicInterp (const Scalar &df0, const Scalar &x1, const Scalar &f1, const Scalar &df1, const Scalar &loX, const Scalar &hiX)
 
template<typename Scalar >
Scalar CubicInterp (const Scalar &x0, const Scalar &f0, const Scalar &df0, const Scalar &x1, const Scalar &f1, const Scalar &df1, const Scalar &loX, const Scalar &hiX)
 
template<typename FunctorType , typename Scalar , typename XType >
int WolfLSZoom (Scalar &alpha, XType &newX, Scalar &newF, XType &newDF, FunctorType &func, const XType &x, const Scalar &f, const Scalar &dfp, const Scalar &c1dfp, const Scalar &c2dfp, const XType &p, Scalar alo, Scalar aloF, Scalar aloDFp, Scalar ahi, Scalar ahiF, Scalar ahiDFp, const Scalar &min_range)
 
template<typename FunctorType , typename Scalar , typename XType >
int WolfeLineSearch (FunctorType &func, Scalar &alpha, XType &x1, Scalar &f1, XType &gradx1, const XType &p, const XType &x0, const Scalar &f0, const XType &gradx0, const Scalar &c1, const Scalar &c2, const Scalar &minAlpha)
 
void make_negative_definite_and_solve (matrix_d &H, vector_d &g)
 
template<typename M >
double newton_step (M &model, std::vector< double > &params_r, std::vector< int > &params_i, std::ostream *output_stream=0)
 

Typedef Documentation

typedef Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic> stan::optimization::matrix_d

Definition at line 14 of file newton.hpp.

typedef Eigen::Matrix<double, Eigen::Dynamic, 1> stan::optimization::vector_d

Definition at line 15 of file newton.hpp.

Enumeration Type Documentation

Enumerator
TERM_SUCCESS 
TERM_ABSX 
TERM_ABSF 
TERM_RELF 
TERM_ABSGRAD 
TERM_RELGRAD 
TERM_MAXIT 
TERM_LSFAIL 

Definition at line 20 of file bfgs.hpp.

Function Documentation

template<typename Scalar >
Scalar stan::optimization::CubicInterp ( const Scalar &  df0,
const Scalar &  x1,
const Scalar &  f1,
const Scalar &  df1,
const Scalar &  loX,
const Scalar &  hiX 
)

Find the minima in an interval [loX, hiX] of a cubic function which interpolates the points, function values and gradients provided.

Implicitly, this function constructs an interpolating polynomial g(x) = a_3 x^3 + a_2 x^2 + a_1 x + a_0 such that g(0) = 0, g(x1) = f1, g'(0) = df0, g'(x1) = df1 where g'(x) = 3 a_3 x^2 + 2 a_2 x + a_1 is the derivative of g(x). It then computes the roots of g'(x) and finds the minimal value of g(x) on the interval [loX,hiX] including the end points.

This function implements the full parameter version of CubicInterp().

Parameters
df0First derivative value, f'(x0)
x1Second point
f1Second function value, f(x1)
df1Second derivative value, f'(x1)
loXLower bound on the interval of solutions
hiXUpper bound on the interval of solutions

Definition at line 35 of file bfgs_linesearch.hpp.

References demo5::c1, demo5::c2, chisquared::c3, and std::sqrt().

Referenced by CubicInterp(), stan::optimization::BFGSMinimizer< ModelAdaptor< M >, QNUpdateType, Scalar, DimAtCompile >::step(), and TEST().

37  {
38  const Scalar c3((-12*f1 + 6*x1*(df0 + df1))/(x1*x1*x1));
39  const Scalar c2(-(4*df0 + 2*df1)/x1 + 6*f1/(x1*x1));
40  const Scalar &c1(df0);
41 
42  const Scalar t_s = std::sqrt(c2*c2 - 2.0*c1*c3);
43  const Scalar s1 = - (c2 + t_s)/c3;
44  const Scalar s2 = - (c2 - t_s)/c3;
45 
46  Scalar tmpF;
47  Scalar minF, minX;
48 
49  // Check value at lower bound
50  minF = loX*(loX*(loX*c3/3.0 + c2)/2.0 + c1);
51  minX = loX;
52 
53  // Check value at upper bound
54  tmpF = hiX*(hiX*(hiX*c3/3.0 + c2)/2.0 + c1);
55  if (tmpF < minF) {
56  minF = tmpF;
57  minX = hiX;
58  }
59 
60  // Check value of first root
61  if (loX < s1 && s1 < hiX) {
62  tmpF = s1*(s1*(s1*c3/3.0 + c2)/2.0 + c1);
63  if (tmpF < minF) {
64  minF = tmpF;
65  minX = s1;
66  }
67  }
68 
69  // Check value of second root
70  if (loX < s2 && s2 < hiX) {
71  tmpF = s2*(s2*(s2*c3/3.0 + c2)/2.0 + c1);
72  if (tmpF < minF) {
73  minF = tmpF;
74  minX = s2;
75  }
76  }
77 
78  return minX;
79  }
Float_t x1[n_points_granero]
Definition: compare.C:5
T sqrt(T number)
Definition: d0nt_math.hpp:156
c2
Definition: demo5.py:33
Float_t f1
c1
Definition: demo5.py:24
template<typename Scalar >
Scalar stan::optimization::CubicInterp ( const Scalar &  x0,
const Scalar &  f0,
const Scalar &  df0,
const Scalar &  x1,
const Scalar &  f1,
const Scalar &  df1,
const Scalar &  loX,
const Scalar &  hiX 
)

Find the minima in an interval [loX, hiX] of a cubic function which interpolates the points, function values and gradients provided.

Implicitly, this function constructs an interpolating polynomial g(x) = a_3 x^3 + a_2 x^2 + a_1 x + a_0 such that g(x0) = f0, g(x1) = f1, g'(x0) = df0, g'(x1) = df1 where g'(x) = 3 a_3 x^2 + 2 a_2 x + a_1 is the derivative of g(x). It then computes the roots of g'(x) and finds the minimal value of g(x) on the interval [loX,hiX] including the end points.

Parameters
x0First point
f0First function value, f(x0)
df0First derivative value, f'(x0)
x1Second point
f1Second function value, f(x1)
df1Second derivative value, f'(x1)
loXLower bound on the interval of solutions
hiXUpper bound on the interval of solutions

Definition at line 103 of file bfgs_linesearch.hpp.

References CubicInterp().

105  {
106  return x0 + CubicInterp(df0, x1-x0, f1-f0, df1, loX-x0, hiX-x0);
107  }
Float_t x1[n_points_granero]
Definition: compare.C:5
Scalar CubicInterp(const Scalar &x0, const Scalar &f0, const Scalar &df0, const Scalar &x1, const Scalar &f1, const Scalar &df1, const Scalar &loX, const Scalar &hiX)
Float_t f1
void stan::optimization::make_negative_definite_and_solve ( matrix_d H,
vector_d g 
)
inline

Definition at line 20 of file newton.hpp.

References stan::math::fabs(), MECModelEnuComparisons::g, and MECModelEnuComparisons::i.

Referenced by newton_step().

20  {
21  Eigen::SelfAdjointEigenSolver<matrix_d> solver(H);
22  matrix_d eigenvectors = solver.eigenvectors();
23  vector_d eigenvalues = solver.eigenvalues();
24  vector_d eigenprojections = eigenvectors.transpose() * g;
25  for (int i = 0; i < g.size(); i++) {
26  eigenprojections[i] = -eigenprojections[i] / fabs(eigenvalues[i]);
27  }
28  g = eigenvectors * eigenprojections;
29  }
fvar< T > fabs(const fvar< T > &x)
Definition: fabs.hpp:15
Eigen::Matrix< double, Eigen::Dynamic, Eigen::Dynamic > matrix_d
Definition: newton.hpp:14
Eigen::Matrix< double, Eigen::Dynamic, 1 > vector_d
Definition: newton.hpp:15
template<typename M >
double stan::optimization::newton_step ( M &  model,
std::vector< double > &  params_r,
std::vector< int > &  params_i,
std::ostream *  output_stream = 0 
)

Definition at line 32 of file newton.hpp.

References e, f1, MECModelEnuComparisons::g, stan::model::gradient(), stan::model::hessian(), MECModelEnuComparisons::i, and make_negative_definite_and_solve().

Referenced by stan::services::optimize::newton().

35  {
36  std::vector<double> gradient;
37  std::vector<double> hessian;
38 
39  double f0
40  = stan::model::grad_hess_log_prob<true, false>(model,
41  params_r, params_i,
42  gradient, hessian);
43  matrix_d H(params_r.size(), params_r.size());
44  for (size_t i = 0; i < hessian.size(); i++) {
45  H(i) = hessian[i];
46  }
47  vector_d g(params_r.size());
48  for (size_t i = 0; i < gradient.size(); i++)
49  g(i) = gradient[i];
51 // H.ldlt().solveInPlace(g);
52 
53  std::vector<double> new_params_r(params_r.size());
54  double step_size = 2;
55  double min_step_size = 1e-50;
56  double f1 = -1e100;
57 
58  while (f1 < f0) {
59  step_size *= 0.5;
60  if (step_size < min_step_size)
61  return f0;
62 
63  for (size_t i = 0; i < params_r.size(); i++)
64  new_params_r[i] = params_r[i] - step_size * g[i];
65  try {
66  f1 = stan::model::log_prob_grad<true, false>(model,
67  new_params_r,
68  params_i, gradient);
69  } catch (std::exception& e) {
70  // FIXME: this is not a good way to handle a general exception
71  f1 = -1e100;
72  }
73  }
74  for (size_t i = 0; i < params_r.size(); i++)
75  params_r[i] = new_params_r[i];
76 
77  return f1;
78  }
void make_negative_definite_and_solve(matrix_d &H, vector_d &g)
Definition: newton.hpp:20
void hessian(const M &model, const Eigen::Matrix< double, Eigen::Dynamic, 1 > &x, double &f, Eigen::Matrix< double, Eigen::Dynamic, 1 > &grad_f, Eigen::Matrix< double, Eigen::Dynamic, Eigen::Dynamic > &hess_f, std::ostream *msgs=0)
Definition: hessian.hpp:12
Eigen::Matrix< double, Eigen::Dynamic, Eigen::Dynamic > matrix_d
Definition: newton.hpp:14
::xsd::cxx::tree::exception< char > exception
Definition: Database.h:225
void gradient(const M &model, const Eigen::Matrix< double, Eigen::Dynamic, 1 > &x, double &f, Eigen::Matrix< double, Eigen::Dynamic, 1 > &grad_f, std::ostream *msgs=0)
Definition: gradient.hpp:15
Float_t f1
Eigen::Matrix< double, Eigen::Dynamic, 1 > vector_d
Definition: newton.hpp:15
Float_t e
Definition: plot.C:35
const XML_Char XML_Content * model
Definition: expat.h:151
template<typename FunctorType , typename Scalar , typename XType >
int stan::optimization::WolfeLineSearch ( FunctorType &  func,
Scalar &  alpha,
XType &  x1,
Scalar &  f1,
XType &  gradx1,
const XType &  p,
const XType &  x0,
const Scalar &  f0,
const XType &  gradx0,
const Scalar &  c1,
const Scalar &  c2,
const Scalar &  minAlpha 
)

Perform a line search which finds an approximate solution to:

\[ \min_\alpha f(x_0 + \alpha p) \]

satisfying the strong Wolfe conditions: 1) $ f(x_0 + \alpha p) \leq f(x_0) + c_1 \alpha p^T g(x_0) $ 2) $ \vert p^T g(x_0 + \alpha p) \vert \leq c_2 \vert p^T g(x_0) \vert $ where $g(x) = \frac{\partial f}{\partial x}$ is the gradient of f(x).

Template Parameters
FunctorTypeA type which supports being called as ret = func(x,f,g) where x is the input point, f and g are the function value and gradient at x and ret is non-zero if function evaluation fails.
Parameters
funcFunction which is being minimized.
alphaFirst value of $ \alpha $ to try. Upon return this contains the final value of the $ \alpha $.
x1Final point, equal to $ x_0 + \alpha p $.
f1Final point function value, equal to $ f(x_0 + \alpha p) $.
gradx1Final point gradient, equal to $ g(x_0 + \alpha p) $.
pSearch direction. It is assumed to be a descent direction such that $ p^T g(x_0) < 0 $.
x0Value of starting point, $ x_0 $.
f0Value of function at starting point, $ f(x_0) $.
gradx0Value of function gradient at starting point, $ g(x_0) $.
c1Parameter of the Wolfe conditions. $ 0 < c_1 < c_2 < 1 $ Typically c1 = 1e-4.
c2Parameter of the Wolfe conditions. $ 0 < c_1 < c_2 < 1 $ Typically c2 = 0.9.
minAlphaSmallest allowable step-size.
Returns
Returns zero on success, non-zero otherwise.

Definition at line 220 of file bfgs_linesearch.hpp.

References e, f1, stan::math::fabs(), func(), runNovaSAM::ret, runNovaSAM::retCode, std::swap(), and WolfLSZoom().

Referenced by stan::optimization::BFGSMinimizer< ModelAdaptor< M >, QNUpdateType, Scalar, DimAtCompile >::step(), and TEST().

226  {
227  const Scalar dfp(gradx0.dot(p));
228  const Scalar c1dfp(c1*dfp);
229  const Scalar c2dfp(c2*dfp);
230 
231  Scalar alpha0(minAlpha);
232  Scalar alpha1(alpha);
233 
234  Scalar prevF(f0);
235  XType prevDF(gradx0);
236  Scalar prevDFp(dfp);
237  Scalar newDFp;
238 
239  int retCode = 0, nits = 0, ret;
240 
241  while (1) {
242  x1.noalias() = x0 + alpha1 * p;
243  ret = func(x1, f1, gradx1);
244  if (ret != 0) {
245  alpha1 = 0.5 * (alpha0 + alpha1);
246  continue;
247  }
248  newDFp = gradx1.dot(p);
249  if ((f1 > f0 + alpha * c1dfp) || (f1 >= prevF && nits > 0)) {
250  retCode = WolfLSZoom(alpha, x1, f1, gradx1,
251  func,
252  x0, f0, dfp,
253  c1dfp, c2dfp, p,
254  alpha0, prevF, prevDFp,
255  alpha1, f1, newDFp,
256  1e-16);
257  break;
258  }
259  if (std::fabs(newDFp) <= -c2dfp) {
260  alpha = alpha1;
261  break;
262  }
263  if (newDFp >= 0) {
264  retCode = WolfLSZoom(alpha, x1, f1, gradx1,
265  func,
266  x0, f0, dfp,
267  c1dfp, c2dfp, p,
268  alpha1, f1, newDFp,
269  alpha0, prevF, prevDFp,
270  1e-16);
271  break;
272  }
273 
274  alpha0 = alpha1;
275  prevF = f1;
276  std::swap(prevDF, gradx1);
277  prevDFp = newDFp;
278 
279  alpha1 *= 10.0;
280 
281  nits++;
282  }
283  return retCode;
284  }
fvar< T > fabs(const fvar< T > &x)
Definition: fabs.hpp:15
Float_t x1[n_points_granero]
Definition: compare.C:5
const char * p
Definition: xmltok.h:285
void swap(art::HLTGlobalStatus &lhs, art::HLTGlobalStatus &rhs)
c2
Definition: demo5.py:33
Float_t f1
double func(double x, double y)
int WolfLSZoom(Scalar &alpha, XType &newX, Scalar &newF, XType &newDF, FunctorType &func, const XType &x, const Scalar &f, const Scalar &dfp, const Scalar &c1dfp, const Scalar &c2dfp, const XType &p, Scalar alo, Scalar aloF, Scalar aloDFp, Scalar ahi, Scalar ahiF, Scalar ahiDFp, const Scalar &min_range)
c1
Definition: demo5.py:24
Float_t e
Definition: plot.C:35
template<typename FunctorType , typename Scalar , typename XType >
int stan::optimization::WolfLSZoom ( Scalar &  alpha,
XType &  newX,
Scalar &  newF,
XType &  newDF,
FunctorType &  func,
const XType &  x,
const Scalar &  f,
const Scalar &  dfp,
const Scalar &  c1dfp,
const Scalar &  c2dfp,
const XType &  p,
Scalar  alo,
Scalar  aloF,
Scalar  aloDFp,
Scalar  ahi,
Scalar  ahiF,
Scalar  ahiDFp,
const Scalar &  min_range 
)

An internal utility function for implementing WolfeLineSearch()

Definition at line 113 of file bfgs_linesearch.hpp.

References stan::math::fabs(), func(), boost::math::isfinite(), cet::sqlite::max(), min(), and std::sqrt().

Referenced by TEST(), and WolfeLineSearch().

119  {
120  Scalar d1, d2, newDFp;
121  int itNum(0);
122 
123  while (1) {
124  itNum++;
125 
126  if (std::fabs(alo-ahi) < min_range)
127  return 1;
128 
129  if (itNum%5 == 0) {
130  alpha = 0.5*(alo+ahi);
131  } else {
132  // Perform cubic interpolation to determine next point to try
133  d1 = aloDFp + ahiDFp - 3*(aloF-ahiF)/(alo-ahi);
134  d2 = std::sqrt(d1*d1 - aloDFp*ahiDFp);
135  if (ahi < alo)
136  d2 = -d2;
137  alpha = ahi
138  - (ahi - alo) * (ahiDFp + d2 - d1) / (ahiDFp - aloDFp + 2*d2);
139  if (!boost::math::isfinite(alpha) ||
140  alpha < std::min(alo, ahi) + 0.01 * std::fabs(alo - ahi) ||
141  alpha > std::max(alo, ahi) - 0.01 * std::fabs(alo - ahi))
142  alpha = 0.5 * (alo + ahi);
143  }
144 
145  newX = x + alpha * p;
146  while (func(newX, newF, newDF)) {
147  alpha = 0.5 * (alpha + std::min(alo, ahi));
148  if (std::fabs(std::min(alo, ahi) - alpha) < min_range)
149  return 1;
150  newX = x + alpha * p;
151  }
152  newDFp = newDF.dot(p);
153  if (newF > (f + alpha * c1dfp) || newF >= aloF) {
154  ahi = alpha;
155  ahiF = newF;
156  ahiDFp = newDFp;
157  } else {
158  if (std::fabs(newDFp) <= -c2dfp)
159  break;
160  if (newDFp*(ahi-alo) >= 0) {
161  ahi = alo;
162  ahiF = aloF;
163  ahiDFp = aloDFp;
164  }
165  alo = alpha;
166  aloF = newF;
167  aloDFp = newDFp;
168  }
169  }
170  return 0;
171  }
bool isfinite(const stan::math::var &v)
fvar< T > fabs(const fvar< T > &x)
Definition: fabs.hpp:15
const char * p
Definition: xmltok.h:285
T sqrt(T number)
Definition: d0nt_math.hpp:156
double func(double x, double y)
static float min(const float a, const float b, const float c)
Definition: absgeo.cxx:45
T max(sqlite3 *const db, std::string const &table_name, std::string const &column_name)
Definition: statistics.h:68