algebra_solver.hpp
Go to the documentation of this file.
1 #ifndef STAN_MATH_REV_MAT_FUNCTOR_ALGEBRA_SOLVER_HPP
2 #define STAN_MATH_REV_MAT_FUNCTOR_ALGEBRA_SOLVER_HPP
3 
6 #include <stan/math/rev/core.hpp>
13 #include <unsupported/Eigen/NonLinearOptimization>
14 #include <iostream>
15 #include <string>
16 #include <vector>
17 
18 namespace stan {
19 namespace math {
20 
21 /**
22  * The vari class for the algebraic solver. We compute the Jacobian of
23  * the solutions with respect to the parameters using the implicit
24  * function theorem. The call to Jacobian() occurs outside the call to
25  * chain() -- this prevents malloc issues.
26  */
27 template <typename Fs, typename F, typename T, typename Fx>
28 struct algebra_solver_vari : public vari {
29  /** vector of parameters */
30  vari** y_;
31  /** number of parameters */
32  int y_size_;
33  /** number of unknowns */
34  int x_size_;
35  /** vector of solution */
37  /** Jacobian of the solution w.r.t parameters */
38  double* Jx_y_;
39 
40  algebra_solver_vari(const Fs& fs, const F& f, const Eigen::VectorXd& x,
41  const Eigen::Matrix<T, Eigen::Dynamic, 1>& y,
42  const std::vector<double>& dat,
43  const std::vector<int>& dat_int,
44  const Eigen::VectorXd& theta_dbl, Fx& fx,
45  std::ostream* msgs)
46  : vari(theta_dbl(0)),
47  y_(ChainableStack::instance().memalloc_.alloc_array<vari*>(y.size())),
48  y_size_(y.size()),
49  x_size_(x.size()),
50  theta_(
51  ChainableStack::instance().memalloc_.alloc_array<vari*>(x_size_)),
52  Jx_y_(ChainableStack::instance().memalloc_.alloc_array<double>(
53  x_size_ * y_size_)) {
54  using Eigen::Map;
55  using Eigen::MatrixXd;
56  for (int i = 0; i < y.size(); ++i)
57  y_[i] = y(i).vi_;
58 
59  theta_[0] = this;
60  for (int i = 1; i < x.size(); ++i)
61  theta_[i] = new vari(theta_dbl(i), false);
62 
63  // Compute the Jacobian and store in array, using the
64  // implicit function theorem, i.e. Jx_y = Jf_y / Jf_x
66  Map<MatrixXd>(&Jx_y_[0], x_size_, y_size_)
67  = -mdivide_left(fx.get_jacobian(theta_dbl),
68  f_y(fs, f, theta_dbl, value_of(y), dat, dat_int, msgs)
69  .get_jacobian(value_of(y)));
70  }
71 
72  void chain() {
73  for (int j = 0; j < y_size_; j++)
74  for (int i = 0; i < x_size_; i++)
75  y_[j]->adj_ += theta_[i]->adj_ * Jx_y_[j * x_size_ + i];
76  }
77 };
78 
79 /**
80  * Return the solution to the specified system of algebraic
81  * equations given an initial guess, and parameters and data,
82  * which get passed into the algebraic system. The user can
83  * also specify the relative tolerance (xtol in Eigen's code),
84  * the function tolerance, and the maximum number of steps
85  * (maxfev in Eigen's code).
86  *
87  * Throw an exception if the norm of f(x), where f is the
88  * output of the algebraic system and x the proposed solution,
89  * is greater than the function tolerance. We here use the
90  * norm as a metric to measure how far we are from the origin (0).
91  *
92  * @tparam F type of equation system function.
93  * @tparam T type of initial guess vector.
94  * @param[in] f Functor that evaluates the system of equations.
95  * @param[in] x Vector of starting values.
96  * @param[in] y parameter vector for the equation system. The function
97  * is overloaded to treat y as a vector of doubles or of a
98  * a template type T.
99  * @param[in] dat continuous data vector for the equation system.
100  * @param[in] dat_int integer data vector for the equation system.
101  * @param[in, out] msgs the print stream for warning messages.
102  * @param[in] relative_tolerance determines the convergence criteria
103  * for the solution.
104  * @param[in] function_tolerance determines whether roots are acceptable.
105  * @param[in] max_num_steps maximum number of function evaluations.
106  * @return theta Vector of solutions to the system of equations.
107  * @throw <code>std::invalid_argument</code> if x has size zero.
108  * @throw <code>std::invalid_argument</code> if x has non-finite elements.
109  * @throw <code>std::invalid_argument</code> if y has non-finite elements.
110  * @throw <code>std::invalid_argument</code> if dat has non-finite elements.
111  * @throw <code>std::invalid_argument</code> if dat_int has non-finite elements.
112  * @throw <code>std::invalid_argument</code> if relative_tolerance is strictly
113  * negative.
114  * @throw <code>std::invalid_argument</code> if function_tolerance is strictly
115  * negative.
116  * @throw <code>std::invalid_argument</code> if max_num_steps is not positive.
117  * @throw <code>boost::math::evaluation_error</code> (which is a subclass of
118  * <code>std::runtime_error</code>) if solver exceeds max_num_steps.
119  * @throw <code>boost::math::evaluation_error</code> (which is a subclass of
120  * <code>std::runtime_error</code>) if the norm of the solution exceeds the
121  * function tolerance.
122  */
123 template <typename F, typename T>
124 Eigen::VectorXd algebra_solver(
125  const F& f, const Eigen::Matrix<T, Eigen::Dynamic, 1>& x,
126  const Eigen::VectorXd& y, const std::vector<double>& dat,
127  const std::vector<int>& dat_int, std::ostream* msgs = nullptr,
128  double relative_tolerance = 1e-10, double function_tolerance = 1e-6,
129  long int max_num_steps = 1e+3) { // NOLINT(runtime/int)
130  check_nonzero_size("algebra_solver", "initial guess", x);
131  for (int i = 0; i < x.size(); i++)
132  check_finite("algebra_solver", "initial guess", x(i));
133  for (int i = 0; i < y.size(); i++)
134  check_finite("algebra_solver", "parameter vector", y(i));
135  for (double i : dat)
136  check_finite("algebra_solver", "continuous data", i);
137  for (int x : dat_int)
138  check_finite("algebra_solver", "integer data", x);
139 
140  if (relative_tolerance < 0)
141  invalid_argument("algebra_solver", "relative_tolerance,",
142  relative_tolerance, "",
143  ", must be greater than or equal to 0");
144  if (function_tolerance < 0)
145  invalid_argument("algebra_solver", "function_tolerance,",
146  function_tolerance, "",
147  ", must be greater than or equal to 0");
148  if (max_num_steps <= 0)
149  invalid_argument("algebra_solver", "max_num_steps,", max_num_steps, "",
150  ", must be greater than 0");
151 
152  // Create functor for algebraic system
155  Fx fx(Fs(), f, value_of(x), y, dat, dat_int, msgs);
156  Eigen::HybridNonLinearSolver<Fx> solver(fx);
157 
158  // Check dimension unknowns equals dimension of system output
159  check_matching_sizes("algebra_solver", "the algebraic system's output",
160  fx.get_value(value_of(x)), "the vector of unknowns, x,",
161  x);
162 
163  // Compute theta_dbl
164  Eigen::VectorXd theta_dbl = value_of(x);
165  solver.parameters.xtol = relative_tolerance;
166  solver.parameters.maxfev = max_num_steps;
167  solver.solve(theta_dbl);
168 
169  // Check if the max number of steps has been exceeded
170  if (solver.nfev >= max_num_steps) {
171  std::ostringstream message;
172  message << "algebra_solver: max number of iterations: " << max_num_steps
173  << " exceeded.";
174  throw boost::math::evaluation_error(message.str());
175  }
176 
177  // Check solution is a root
178  double system_norm = fx.get_value(theta_dbl).stableNorm();
179  if (system_norm > function_tolerance) {
180  std::ostringstream message2;
181  message2 << "algebra_solver: the norm of the algebraic function is: "
182  << system_norm << " but should be lower than the function "
183  << "tolerance: " << function_tolerance << ". Consider "
184  << "decreasing the relative tolerance and increasing the "
185  << "max_num_steps.";
186  throw boost::math::evaluation_error(message2.str());
187  }
188 
189  return theta_dbl;
190 }
191 
192 /**
193  * Return the solution to the specified system of algebraic
194  * equations given an initial guess, and parameters and data,
195  * which get passed into the algebraic system. The user can
196  * also specify the relative tolerance (xtol in Eigen's code),
197  * the function tolerance, and the maximum number of steps
198  * (maxfev in Eigen's code).
199  *
200  * Overload the previous definition to handle the case where y
201  * is a vector of parameters (var). The overload calls the
202  * algebraic solver defined above and builds a vari object on
203  * top, using the algebra_solver_vari class.
204  *
205  * @tparam F type of equation system function.
206  * @tparam T1 Type of elements in x vector.
207  * @tparam T2 Type of elements in y vector.
208  * @param[in] f Functor that evaluates the system of equations.
209  * @param[in] x Vector of starting values (initial guess).
210  * @param[in] y parameter vector for the equation system.
211  * @param[in] dat continuous data vector for the equation system.
212  * @param[in] dat_int integer data vector for the equation system.
213  * @param[in, out] msgs the print stream for warning messages.
214  * @param[in] relative_tolerance determines the convergence criteria
215  * for the solution.
216  * @param[in] function_tolerance determines whether roots are acceptable.
217  * @param[in] max_num_steps maximum number of function evaluations.
218  * @return theta Vector of solutions to the system of equations.
219  * @throw <code>std::invalid_argument</code> if x has size zero.
220  * @throw <code>std::invalid_argument</code> if x has non-finite elements.
221  * @throw <code>std::invalid_argument</code> if y has non-finite elements.
222  * @throw <code>std::invalid_argument</code> if dat has non-finite elements.
223  * @throw <code>std::invalid_argument</code> if dat_int has non-finite elements.
224  * @throw <code>std::invalid_argument</code> if relative_tolerance is strictly
225  * negative.
226  * @throw <code>std::invalid_argument</code> if function_tolerance is strictly
227  * negative.
228  * @throw <code>std::invalid_argument</code> if max_num_steps is not positive.
229  * @throw <code>boost::math::evaluation_error</code> (which is a subclass of
230  * <code>std::runtime_error</code>) if solver exceeds max_num_steps.
231  * @throw <code>boost::math::evaluation_error</code> (which is a subclass of
232  * <code>std::runtime_error</code>) if the norm of the solution exceeds the
233  * function tolerance.
234  */
235 template <typename F, typename T1, typename T2>
236 Eigen::Matrix<T2, Eigen::Dynamic, 1> algebra_solver(
237  const F& f, const Eigen::Matrix<T1, Eigen::Dynamic, 1>& x,
238  const Eigen::Matrix<T2, Eigen::Dynamic, 1>& y,
239  const std::vector<double>& dat, const std::vector<int>& dat_int,
240  std::ostream* msgs = nullptr, double relative_tolerance = 1e-10,
241  double function_tolerance = 1e-6,
242  long int max_num_steps = 1e+3) { // NOLINT(runtime/int)
243  Eigen::VectorXd theta_dbl
244  = algebra_solver(f, x, value_of(y), dat, dat_int, 0, relative_tolerance,
245  function_tolerance, max_num_steps);
246 
248 
249  // TODO(charlesm93): a similar object gets constructed inside
250  // the call to algebra_solver. Cache the previous result
251  // and use it here (if possible).
254  Fx fx(Fs(), f, value_of(x), value_of(y), dat, dat_int, msgs);
255 
256  // Construct vari
258  = new algebra_solver_vari<Fy, F, T2, Fx>(Fy(), f, value_of(x), y, dat,
259  dat_int, theta_dbl, fx, msgs);
260  Eigen::Matrix<var, Eigen::Dynamic, 1> theta(x.size());
261  theta(0) = var(vi0->theta_[0]);
262  for (int i = 1; i < x.size(); ++i)
263  theta(i) = var(vi0->theta_[i]);
264 
265  return theta;
266 }
267 } // namespace math
268 } // namespace stan
269 
270 #endif
Eigen::Matrix< fvar< T >, R1, C2 > mdivide_left(const Eigen::Matrix< fvar< T >, R1, C1 > &A, const Eigen::Matrix< fvar< T >, R2, C2 > &b)
#define F(x, y, z)
void check_finite(const char *function, const char *name, const T_y &y)
void check_nonzero_size(const char *function, const char *name, const T_y &y)
T value_of(const fvar< T > &v)
Definition: value_of.hpp:16
Eigen::VectorXd algebra_solver(const F &f, const Eigen::Matrix< T, Eigen::Dynamic, 1 > &x, const Eigen::VectorXd &y, const std::vector< double > &dat, const std::vector< int > &dat_int, std::ostream *msgs=nullptr, double relative_tolerance=1e-10, double function_tolerance=1e-6, long int max_num_steps=1e+3)
algebra_solver_vari(const Fs &fs, const F &f, const Eigen::VectorXd &x, const Eigen::Matrix< T, Eigen::Dynamic, 1 > &y, const std::vector< double > &dat, const std::vector< int > &dat_int, const Eigen::VectorXd &theta_dbl, Fx &fx, std::ostream *msgs)
friend class var
Definition: vari.hpp:32
const double j
Definition: BetheBloch.cxx:29
void invalid_argument(const char *function, const char *name, const T &y, const char *msg1, const char *msg2)
double e()
Definition: constants.hpp:89
vari(double x)
Definition: vari.hpp:58
int size(const std::vector< T > &x)
Definition: size.hpp:17
void check_matching_sizes(const char *function, const char *name1, const T_y1 &y1, const char *name2, const T_y2 &y2)
double adj_
Definition: vari.hpp:44