1 #ifndef STAN_VARIATIONAL_ADVI_HPP 2 #define STAN_VARIATIONAL_ADVI_HPP 13 #include <boost/circular_buffer.hpp> 14 #include <boost/lexical_cast.hpp> 25 namespace variational {
38 template <
class Model,
class Q,
class BaseRNG>
57 Eigen::VectorXd& cont_params,
59 int n_monte_carlo_grad,
60 int n_monte_carlo_elbo,
62 int n_posterior_samples)
70 static const char*
function =
"stan::variational::advi";
72 "Number of Monte Carlo samples for gradients",
75 "Number of Monte Carlo samples for ELBO",
78 "Evaluate ELBO at every eval_elbo iteration",
81 "Number of posterior samples for output",
102 static const char*
function =
103 "stan::variational::advi::calc_ELBO";
106 int dim = variational.dimension();
107 Eigen::VectorXd zeta(dim);
109 int n_dropped_evaluations = 0;
111 variational.sample(
rng_, zeta);
113 std::stringstream
ss;
114 double log_prob =
model_.template log_prob<false, true>(zeta, &
ss);
115 if (ss.str().length() > 0)
121 ++n_dropped_evaluations;
122 if (n_dropped_evaluations >= n_monte_carlo_elbo_) {
123 const char*
name =
"The number of dropped evaluations";
124 const char* msg1 =
"has reached its maximum amount (";
125 const char* msg2 =
"). Your model may be either severely " 126 "ill-conditioned or misspecified.";
133 elbo += variational.entropy();
148 static const char*
function =
149 "stan::variational::advi::calc_ELBO_grad";
152 "Dimension of elbo_grad",
153 elbo_grad.dimension(),
154 "Dimension of variational q",
155 variational.dimension());
157 "Dimension of variational q",
158 variational.dimension(),
159 "Dimension of variables in model",
162 variational.calc_grad(elbo_grad,
180 int adapt_iterations,
183 static const char*
function =
"stan::variational::advi::adapt_eta";
186 "Number of adaptation iterations",
189 logger.
info(
"Begin eta adaptation.");
192 const int eta_sequence_size = 5;
193 double eta_sequence[eta_sequence_size] = {100, 10, 1, 0.1, 0.01};
200 elbo_init =
calc_ELBO(variational, logger);
202 const char*
name =
"Cannot compute ELBO using the initial " 203 "variational distribution.";
204 const char* msg1 =
"Your model may be either " 205 "severely ill-conditioned or misspecified.";
210 Q elbo_grad = Q(
model_.num_params_r());
213 Q history_grad_squared = Q(
model_.num_params_r());
215 double pre_factor = 0.9;
216 double post_factor = 0.1;
218 double eta_best = 0.0;
222 bool do_more_tuning =
true;
223 int eta_sequence_index = 0;
224 while (do_more_tuning) {
226 eta = eta_sequence[eta_sequence_index];
228 int print_progress_m;
229 for (
int iter_tune = 1; iter_tune <= adapt_iterations; ++iter_tune) {
230 print_progress_m = eta_sequence_index
231 * adapt_iterations + iter_tune;
234 adapt_iterations * eta_sequence_size,
235 adapt_iterations,
true,
"",
"", logger);
242 elbo_grad.set_to_zero();
246 if (iter_tune == 1) {
247 history_grad_squared += elbo_grad.square();
249 history_grad_squared = pre_factor * history_grad_squared
250 + post_factor * elbo_grad.square();
252 eta_scaled = eta /
sqrt(static_cast<double>(iter_tune));
254 variational += eta_scaled * elbo_grad
255 / (tau + history_grad_squared.sqrt());
268 if (elbo < elbo_best && elbo_best > elbo_init) {
269 std::stringstream
ss;
271 <<
" Found best value [eta = " << eta_best
273 if (eta_sequence_index < eta_sequence_size - 1)
274 ss << (
" earlier than expected.");
279 do_more_tuning =
false;
281 if (eta_sequence_index < eta_sequence_size - 1) {
288 if (elbo > elbo_init) {
289 std::stringstream
ss;
291 <<
" Found best value [eta = " << eta_best
296 do_more_tuning =
false;
298 const char*
name =
"All proposed step-sizes";
299 const char* msg1 =
"failed. Your model may be either " 300 "severely ill-conditioned or misspecified.";
305 history_grad_squared.set_to_zero();
307 ++eta_sequence_index;
332 static const char*
function =
333 "stan::variational::advi::stochastic_gradient_ascent";
337 "Relative objective function tolerance",
340 "Maximum iterations",
344 Q elbo_grad = Q(
model_.num_params_r());
347 Q history_grad_squared = Q(
model_.num_params_r());
349 double pre_factor = 0.9;
350 double post_factor = 0.1;
365 boost::circular_buffer<double> elbo_diff(cb_size);
367 logger.
info(
"Begin stochastic gradient ascent.");
375 clock_t
start = clock();
380 bool do_more_iterations =
true;
381 for (
int iter_counter = 1; do_more_iterations; ++iter_counter) {
386 if (iter_counter == 1) {
387 history_grad_squared += elbo_grad.square();
389 history_grad_squared = pre_factor * history_grad_squared
390 + post_factor * elbo_grad.square();
392 eta_scaled = eta /
sqrt(static_cast<double>(iter_counter));
395 variational += eta_scaled * elbo_grad
396 / (tau + history_grad_squared.sqrt());
402 if (elbo > elbo_best)
405 elbo_diff.push_back(delta_elbo);
406 delta_elbo_ave = std::accumulate(elbo_diff.begin(),
407 elbo_diff.end(), 0.0)
408 /
static_cast<double>(elbo_diff.size());
410 std::stringstream
ss;
412 << std::setw(4) << iter_counter
414 << std::setw(15) << std::fixed << std::setprecision(3)
417 << std::setw(16) << std::fixed << std::setprecision(3)
420 << std::setw(15) << std::fixed << std::setprecision(3)
424 delta_t =
static_cast<double>(end -
start) / CLOCKS_PER_SEC;
426 std::vector<double> print_vector;
427 print_vector.clear();
428 print_vector.push_back(iter_counter);
429 print_vector.push_back(delta_t);
430 print_vector.push_back(elbo);
431 diagnostic_writer(print_vector);
433 if (delta_elbo_ave < tol_rel_obj) {
434 ss <<
" MEAN ELBO CONVERGED";
435 do_more_iterations =
false;
438 if (delta_elbo_med < tol_rel_obj) {
439 ss <<
" MEDIAN ELBO CONVERGED";
440 do_more_iterations =
false;
444 if (delta_elbo_med > 0.5 || delta_elbo_ave > 0.5) {
445 ss <<
" MAY BE DIVERGING... INSPECT ELBO";
451 if (do_more_iterations ==
false &&
453 logger.
info(
"Informational Message: The ELBO at a previous " 454 "iteration is larger than the ELBO upon " 456 logger.
info(
"This variational approximation may not " 457 "have converged to a good optimum.");
461 if (iter_counter == max_iterations) {
462 logger.
info(
"Informational Message: The maximum number of " 463 "iterations is reached! The algorithm may not have " 465 logger.
info(
"This variational approximation is not " 466 "guaranteed to be meaningful.");
467 do_more_iterations =
false;
485 int run(
double eta,
bool adapt_engaged,
int adapt_iterations,
486 double tol_rel_obj,
int max_iterations,
491 diagnostic_writer(
"iter,time_in_seconds,ELBO");
497 eta =
adapt_eta(variational, adapt_iterations, logger);
498 parameter_writer(
"Stepsize adaptation complete.");
499 std::stringstream
ss;
500 ss <<
"eta = " << eta;
501 parameter_writer(ss.str());
505 tol_rel_obj, max_iterations,
506 logger, diagnostic_writer);
513 std::vector<int> disc_vector;
514 std::vector<double>
values;
516 std::stringstream
msg;
517 model_.write_array(
rng_, cont_vector, disc_vector, values,
519 if (msg.str().length() > 0)
521 values.insert(values.begin(), 0);
522 parameter_writer(values);
526 std::stringstream
ss;
527 ss <<
"Drawing a sample of size " 529 <<
" from the approximate posterior... ";
537 std::stringstream msg2;
538 model_.write_array(
rng_, cont_vector, disc_vector, values,
540 if (msg2.str().length() > 0)
542 values.insert(values.begin(), 0);
543 parameter_writer(values);
545 logger.
info(
"COMPLETED.");
560 std::vector<double>
v;
561 for (boost::circular_buffer<double>::const_iterator
i = cb.begin();
562 i != cb.end(); ++
i) {
566 size_t n = v.size() / 2;
567 std::nth_element(v.begin(), v.begin()+
n, v.end());
void stochastic_gradient_ascent(Q &variational, double eta, double tol_rel_obj, int max_iterations, callbacks::logger &logger, callbacks::writer &diagnostic_writer) const
T max(const caf::Proxy< T > &a, T b)
double adapt_eta(Q &variational, int adapt_iterations, callbacks::logger &logger) const
void check_finite(const char *function, const char *name, const T_y &y)
fvar< T > fabs(const fvar< T > &x)
int run(double eta, bool adapt_engaged, int adapt_iterations, double tol_rel_obj, int max_iterations, callbacks::logger &logger, callbacks::writer ¶meter_writer, callbacks::writer &diagnostic_writer) const
void check_size_match(const char *function, const char *name_i, T_size1 i, const char *name_j, T_size2 j)
rosenbrock_model_namespace::rosenbrock_model Model
void print_progress(int m, int start, int finish, int refresh, bool tune, const std::string &prefix, const std::string &suffix, callbacks::logger &logger)
advi(Model &m, Eigen::VectorXd &cont_params, BaseRNG &rng, int n_monte_carlo_grad, int n_monte_carlo_elbo, int eval_elbo, int n_posterior_samples)
double calc_ELBO(const Q &variational, callbacks::logger &logger) const
void domain_error(const char *function, const char *name, const T &y, const char *msg1, const char *msg2)
virtual void info(const std::string &message)
double rel_difference(double prev, double curr) const
void check_positive(const char *function, const char *name, const T_y &y)
double circ_buff_median(const boost::circular_buffer< double > &cb) const
T max(sqlite3 *const db, std::string const &table_name, std::string const &column_name)
Eigen::VectorXd & cont_params_
void calc_ELBO_grad(const Q &variational, Q &elbo_grad, callbacks::logger &logger) const