normal_meanfield.hpp
Go to the documentation of this file.
1 #ifndef STAN_VARIATIONAL_NORMAL_MEANFIELD_HPP
2 #define STAN_VARIATIONAL_NORMAL_MEANFIELD_HPP
3 
5 #include <stan/math/prim/mat.hpp>
8 #include <algorithm>
9 #include <ostream>
10 #include <vector>
11 
12 namespace stan {
13 
14  namespace variational {
15 
16  /**
17  * Variational family approximation with mean-field (diagonal
18  * covariance) multivariate normal distribution.
19  */
20  class normal_meanfield : public base_family {
21  private:
22  /**
23  * Mean vector.
24  */
25  Eigen::VectorXd mu_;
26 
27  /**
28  * Log standard deviation (log scale) vector.
29  */
30  Eigen::VectorXd omega_;
31 
32  /**
33  * Dimensionality of distribution.
34  */
35  const int dimension_;
36 
37  public:
38  /**
39  * Construct a variational distribution of the specified
40  * dimensionality with a zero mean and zero log standard
41  * deviation (unit standard deviation).
42  *
43  * @param[in] dimension Dimensionality of distribution.
44  */
45  explicit normal_meanfield(size_t dimension)
46  : mu_(Eigen::VectorXd::Zero(dimension)),
47  omega_(Eigen::VectorXd::Zero(dimension)),
48  dimension_(dimension) {
49  }
50 
51  /**
52  * Construct a variational distribution with the specified mean
53  * vector and zero log standard deviation (unit standard
54  * deviation).
55  *
56  * @param[in] cont_params Mean vector.
57  */
58  explicit normal_meanfield(const Eigen::VectorXd& cont_params)
59  : mu_(cont_params),
60  omega_(Eigen::VectorXd::Zero(cont_params.size())),
61  dimension_(cont_params.size()) {
62  }
63 
64  /**
65  * Construct a variational distribution with the specified mean
66  * and log standard deviation vectors.
67  *
68  * @param[in] mu Mean vector.
69  * @param[in] omega Log standard deviation vector.
70  * @throw std::domain_error If the sizes of mean and log
71  * standard deviation vectors are different, or if either
72  * contains a not-a-number value.
73  */
74  normal_meanfield(const Eigen::VectorXd& mu,
75  const Eigen::VectorXd& omega)
76  : mu_(mu), omega_(omega), dimension_(mu.size()) {
77  static const char* function =
78  "stan::variational::normal_meanfield";
80  "Dimension of mean vector", dimension_,
81  "Dimension of log std vector", omega_.size() );
82  stan::math::check_not_nan(function, "Mean vector", mu_);
83  stan::math::check_not_nan(function, "Log std vector", omega_);
84  }
85 
86  /**
87  * Return the dimensionality of the approximation.
88  */
89  int dimension() const { return dimension_; }
90 
91  /**
92  * Return the mean vector.
93  */
94  const Eigen::VectorXd& mu() const { return mu_; }
95 
96  /**
97  * Return the log standard deviation vector.
98  */
99  const Eigen::VectorXd& omega() const { return omega_; }
100 
101  /**
102  * Set the mean vector to the specified value.
103  *
104  * @param[in] mu Mean vector.
105  * @throw std::domain_error If the mean vector's size does not
106  * match this approximation's dimensionality, or if it contains
107  * not-a-number values.
108  */
109  void set_mu(const Eigen::VectorXd& mu) {
110  static const char* function =
111  "stan::variational::normal_meanfield::set_mu";
112 
114  "Dimension of input vector", mu.size(),
115  "Dimension of current vector", dimension_);
116  stan::math::check_not_nan(function, "Input vector", mu);
117  mu_ = mu;
118  }
119 
120  /**
121  * Set the log standard deviation vector to the specified
122  * value.
123  *
124  * @param[in] omega Log standard deviation vector.
125  * @throw std::domain_error If the log standard deviation
126  * vector's size does not match this approximation's
127  * dimensionality, or if it contains not-a-number values.
128  */
129  void set_omega(const Eigen::VectorXd& omega) {
130  static const char* function =
131  "stan::variational::normal_meanfield::set_omega";
132 
134  "Dimension of input vector", omega.size(),
135  "Dimension of current vector", dimension_);
136  stan::math::check_not_nan(function, "Input vector", omega);
137  omega_ = omega;
138  }
139 
140  /**
141  * Sets the mean and log standard deviation vector for this
142  * approximation to zero.
143  */
144  void set_to_zero() {
145  mu_ = Eigen::VectorXd::Zero(dimension_);
146  omega_ = Eigen::VectorXd::Zero(dimension_);
147  }
148 
149  /**
150  * Return a new mean field approximation resulting from squaring
151  * the entries in the mean and log standard deviation. The new
152  * approximation does not hold any references to this
153  * approximation.
154  */
156  return normal_meanfield(Eigen::VectorXd(mu_.array().square()),
157  Eigen::VectorXd(omega_.array().square()));
158  }
159 
160  /**
161  * Return a new mean field approximation resulting from taking
162  * the square root of the entries in the mean and log standard
163  * deviation. The new approximation does not hold any
164  * references to this approximation.
165  *
166  * <b>Warning:</b> No checks are carried out to ensure the
167  * entries are non-negative before taking square roots, so
168  * not-a-number values may result.
169  */
171  return normal_meanfield(Eigen::VectorXd(mu_.array().sqrt()),
172  Eigen::VectorXd(omega_.array().sqrt()));
173  }
174 
175  /**
176  * Return this approximation after setting its mean vector and
177  * Cholesky factor for covariance to the values given by the
178  * specified approximation.
179  *
180  * @param[in] rhs Approximation from which to gather the mean
181  * and log standard deviation vectors.
182  * @return This approximation after assignment.
183  * @throw std::domain_error If the dimensionality of the specified
184  * approximation does not match this approximation's dimensionality.
185  */
187  static const char* function =
188  "stan::variational::normal_meanfield::operator=";
190  "Dimension of lhs", dimension_,
191  "Dimension of rhs", rhs.dimension());
192  mu_ = rhs.mu();
193  omega_ = rhs.omega();
194  return *this;
195  }
196 
197  /**
198  * Add the mean and Cholesky factor of the covariance matrix of
199  * the specified approximation to this approximation.
200  *
201  * @param[in] rhs Approximation from which to gather the mean
202  * and log standard deviation vectors.
203  * @return This approximation after adding the specified
204  * approximation.
205  * @throw std::domain_error If the size of the specified
206  * approximation does not match the size of this approximation.
207  */
209  static const char* function =
210  "stan::variational::normal_meanfield::operator+=";
212  "Dimension of lhs", dimension_,
213  "Dimension of rhs", rhs.dimension());
214  mu_ += rhs.mu();
215  omega_ += rhs.omega();
216  return *this;
217  }
218 
219  /**
220  * Return this approximation after elementwise division by the
221  * specified approximation's mean and log standard deviation
222  * vectors.
223  *
224  * @param[in] rhs Approximation from which to gather the mean
225  * and log standard deviation vectors.
226  * @return This approximation after elementwise division by the
227  * specified approximation.
228  * @throw std::domain_error If the dimensionality of the specified
229  * approximation does not match this approximation's dimensionality.
230  */
231  inline
233  static const char* function =
234  "stan::variational::normal_meanfield::operator/=";
236  "Dimension of lhs", dimension_,
237  "Dimension of rhs", rhs.dimension());
238  mu_.array() /= rhs.mu().array();
239  omega_.array() /= rhs.omega().array();
240  return *this;
241  }
242 
243  /**
244  * Return this approximation after adding the specified scalar
245  * to each entry in the mean and log standard deviation vectors.
246  *
247  * <b>Warning:</b> No finiteness check is made on the scalar, so
248  * it may introduce NaNs.
249  *
250  * @param[in] scalar Scalar to add.
251  * @return This approximation after elementwise addition of the
252  * specified scalar.
253  */
254  normal_meanfield& operator+=(double scalar) {
255  mu_.array() += scalar;
256  omega_.array() += scalar;
257  return *this;
258  }
259 
260  /**
261  * Return this approximation after multiplying by the specified
262  * scalar to each entry in the mean and log standard deviation
263  * vectors.
264  *
265  * <b>Warning:</b> No finiteness check is made on the scalar, so
266  * it may introduce NaNs.
267  *
268  * @param[in] scalar Scalar to add.
269  * @return This approximation after elementwise addition of the
270  * specified scalar.
271  */
272  normal_meanfield& operator*=(double scalar) {
273  mu_ *= scalar;
274  omega_ *= scalar;
275  return *this;
276  }
277 
278  /**
279  * Returns the mean vector for this approximation.
280  *
281  * See: <code>mu()</code>.
282  *
283  * @return Mean vector for this approximation.
284  */
285  const Eigen::VectorXd& mean() const {
286  return mu();
287  }
288 
289  /**
290  * Return the entropy of the approximation.
291  *
292  * <p>The entropy is defined by
293  * 0.5 * dim * (1+log2pi) + 0.5 * log det diag(sigma^2)
294  * = 0.5 * dim * (1+log2pi) + sum(log(sigma))
295  * = 0.5 * dim * (1+log2pi) + sum(omega)
296  *
297  * @return Entropy of this approximation.
298  */
299  double entropy() const {
300  return 0.5 * static_cast<double>(dimension_) *
301  (1.0 + stan::math::LOG_TWO_PI) + omega_.sum();
302  }
303 
304  /**
305  * Return the transform of the sepcified vector using the
306  * Cholesky factor and mean vector.
307  *
308  * The transform is defined by
309  * S^{-1}(eta) = sigma * eta + mu = exp(omega) * eta + mu.
310  *
311  * @param[in] eta Vector to transform.
312  * @throw std::domain_error If the specified vector's size does
313  * not match the dimensionality of this approximation.
314  * @return Transformed vector.
315  */
316  Eigen::VectorXd transform(const Eigen::VectorXd& eta) const {
317  static const char* function =
318  "stan::variational::normal_meanfield::transform";
320  "Dimension of mean vector", dimension_,
321  "Dimension of input vector", eta.size() );
322  stan::math::check_not_nan(function, "Input vector", eta);
323  // exp(omega) * eta + mu
324  return eta.array().cwiseProduct(omega_.array().exp()) + mu_.array();
325  }
326 
327  /**
328  * Assign a draw from this mean field approximation to the
329  * specified vector using the specified random number generator.
330  *
331  * @tparam BaseRNG Class of random number generator.
332  * @param[in] rng Base random number generator.
333  * @param[in,out] eta Vector to which the draw is assigned; must
334  * already be properly sized.
335  * @throws std::range_error If the index is out of range.
336  */
337  template <class BaseRNG>
338  void sample(BaseRNG& rng, Eigen::VectorXd& eta) const {
339  // Draw from standard normal and transform to real-coordinate space
340  for (int d = 0; d < dimension_; ++d)
341  eta(d) = stan::math::normal_rng(0, 1, rng);
342  eta = transform(eta);
343  }
344 
345  /**
346  * Calculates the "blackbox" gradient with respect to both the
347  * location vector (mu) and the log-std vector (omega) in
348  * parallel. It uses the same gradient computed from a set of
349  * Monte Carlo samples.
350  *
351  * @tparam M Model class.
352  * @tparam BaseRNG Class of base random number generator.
353  * @param[in] elbo_grad Parameters to store "blackbox" gradient
354  * @param[in] m Model.
355  * @param[in] cont_params Continuous parameters.
356  * @param[in] n_monte_carlo_grad Number of samples for gradient
357  * computation.
358  * @param[in,out] rng Random number generator.
359  * @param[in,out] logger logger for messages
360  * @throw std::domain_error If the number of divergent
361  * iterations exceeds its specified bounds.
362  */
363  template <class M, class BaseRNG>
364  void calc_grad(normal_meanfield& elbo_grad,
365  M& m,
366  Eigen::VectorXd& cont_params,
367  int n_monte_carlo_grad,
368  BaseRNG& rng,
369  callbacks::logger& logger)
370  const {
371  static const char* function =
372  "stan::variational::normal_meanfield::calc_grad";
373 
375  "Dimension of elbo_grad", elbo_grad.dimension(),
376  "Dimension of variational q", dimension_);
378  "Dimension of variational q", dimension_,
379  "Dimension of variables in model", cont_params.size());
380 
381  Eigen::VectorXd mu_grad = Eigen::VectorXd::Zero(dimension_);
382  Eigen::VectorXd omega_grad = Eigen::VectorXd::Zero(dimension_);
383  double tmp_lp = 0.0;
384  Eigen::VectorXd tmp_mu_grad = Eigen::VectorXd::Zero(dimension_);
385  Eigen::VectorXd eta = Eigen::VectorXd::Zero(dimension_);
386  Eigen::VectorXd zeta = Eigen::VectorXd::Zero(dimension_);
387 
388  // Naive Monte Carlo integration
389  static const int n_retries = 10;
390  for (int i = 0, n_monte_carlo_drop = 0; i < n_monte_carlo_grad; ) {
391  // Draw from standard normal and transform to real-coordinate space
392  for (int d = 0; d < dimension_; ++d)
393  eta(d) = stan::math::normal_rng(0, 1, rng);
394  zeta = transform(eta);
395  try {
396  std::stringstream ss;
397  stan::model::gradient(m, zeta, tmp_lp, tmp_mu_grad, &ss);
398  if (ss.str().length() > 0)
399  logger.info(ss);
400  stan::math::check_finite(function, "Gradient of mu", tmp_mu_grad);
401  mu_grad += tmp_mu_grad;
402  omega_grad.array() += tmp_mu_grad.array().cwiseProduct(eta.array());
403  ++i;
404  } catch (const std::exception& e) {
405  ++n_monte_carlo_drop;
406  if (n_monte_carlo_drop >= n_retries * n_monte_carlo_grad) {
407  const char* name = "The number of dropped evaluations";
408  const char* msg1 = "has reached its maximum amount (";
409  int y = n_retries * n_monte_carlo_grad;
410  const char* msg2 = "). Your model may be either severely "
411  "ill-conditioned or misspecified.";
412  stan::math::domain_error(function, name, y, msg1, msg2);
413  }
414  }
415  }
416  mu_grad /= static_cast<double>(n_monte_carlo_grad);
417  omega_grad /= static_cast<double>(n_monte_carlo_grad);
418 
419  omega_grad.array()
420  = omega_grad.array().cwiseProduct(omega_.array().exp());
421 
422  omega_grad.array() += 1.0; // add entropy gradient (unit)
423 
424  elbo_grad.set_mu(mu_grad);
425  elbo_grad.set_omega(omega_grad);
426  }
427  };
428 
429  /**
430  * Return a new approximation resulting from adding the mean and
431  * log standard deviation of the specified approximations.
432  *
433  * @param[in] lhs First approximation.
434  * @param[in] rhs Second approximation.
435  * @return Sum of the specified approximations.
436  * @throw std::domain_error If the dimensionalities do not match.
437  */
438  inline
440  const normal_meanfield& rhs) {
441  return lhs += rhs;
442  }
443 
444  /**
445  * Return a new approximation resulting from elementwise division of
446  * of the first specified approximation by the second.
447  *
448  * @param[in] lhs First approximation.
449  * @param[in] rhs Second approximation.
450  * @return Elementwise division of the specified approximations.
451  * @throw std::domain_error If the dimensionalities do not match.
452  */
453  inline
455  const normal_meanfield& rhs) {
456  return lhs /= rhs;
457  }
458 
459  /**
460  * Return a new approximation resulting from elementwise addition
461  * of the specified scalar to the mean and log standard deviation
462  * entries of the specified approximation.
463  *
464  * @param[in] scalar Scalar value
465  * @param[in] rhs Approximation.
466  * @return Addition of scalar to specified approximation.
467  */
468  inline
470  return rhs += scalar;
471  }
472 
473  /**
474  * Return a new approximation resulting from elementwise
475  * multiplication of the specified scalar to the mean and log
476  * standard deviation vectors of the specified approximation.
477  *
478  * @param[in] scalar Scalar value
479  * @param[in] rhs Approximation.
480  * @return Multiplication of scalar by the specified approximation.
481  */
482  inline
484  return rhs *= scalar;
485  }
486 
487  }
488 }
489 #endif
const XML_Char * name
Definition: expat.h:151
void set_omega(const Eigen::VectorXd &omega)
void check_finite(const char *function, const char *name, const T_y &y)
const Eigen::VectorXd & mean() const
base_family operator/(base_family lhs, const base_family &rhs)
base_family operator*(double scalar, base_family rhs)
normal_meanfield & operator/=(const normal_meanfield &rhs)
Float_t ss
Definition: plot.C:24
Definition: StanUtils.h:6
::xsd::cxx::tree::exception< char > exception
Definition: Database.h:225
normal_meanfield & operator=(const normal_meanfield &rhs)
normal_meanfield & operator+=(double scalar)
const Eigen::VectorXd & omega() const
void gradient(const M &model, const Eigen::Matrix< double, Eigen::Dynamic, 1 > &x, double &f, Eigen::Matrix< double, Eigen::Dynamic, 1 > &grad_f, std::ostream *msgs=0)
Definition: gradient.hpp:15
void check_size_match(const char *function, const char *name_i, T_size1 i, const char *name_j, T_size2 j)
normal_meanfield & operator+=(const normal_meanfield &rhs)
const double LOG_TWO_PI
Definition: constants.hpp:167
VectorBuilder< true, double, T_loc, T_scale >::type normal_rng(const T_loc &mu, const T_scale &sigma, RNG &rng)
Definition: normal_rng.hpp:35
base_family operator+(base_family lhs, const base_family &rhs)
normal_meanfield(const Eigen::VectorXd &mu, const Eigen::VectorXd &omega)
void check_not_nan(const char *function, const char *name, const T_y &y)
Float_t d
Definition: plot.C:236
const Eigen::VectorXd & mu() const
void domain_error(const char *function, const char *name, const T &y, const char *msg1, const char *msg2)
virtual void info(const std::string &message)
Definition: logger.hpp:47
normal_meanfield(const Eigen::VectorXd &cont_params)
void Zero()
void sample(BaseRNG &rng, Eigen::VectorXd &eta) const
Eigen::VectorXd transform(const Eigen::VectorXd &eta) const
Float_t e
Definition: plot.C:35
void set_mu(const Eigen::VectorXd &mu)
void calc_grad(normal_meanfield &elbo_grad, M &m, Eigen::VectorXd &cont_params, int n_monte_carlo_grad, BaseRNG &rng, callbacks::logger &logger) const
normal_meanfield & operator*=(double scalar)