normal_fullrank.hpp
Go to the documentation of this file.
1 #ifndef STAN_VARIATIONAL_NORMAL_FULLRANK_HPP
2 #define STAN_VARIATIONAL_NORMAL_FULLRANK_HPP
3 
5 #include <stan/math/prim/mat.hpp>
8 #include <algorithm>
9 #include <ostream>
10 #include <vector>
11 
12 namespace stan {
13 
14  namespace variational {
15 
16  /**
17  * Variational family approximation with full-rank multivariate
18  * normal distribution.
19  */
20  class normal_fullrank : public base_family {
21  private:
22  /**
23  * Mean vector.
24  */
25  Eigen::VectorXd mu_;
26 
27  /**
28  * Cholesky factor of covariance:
29  * Sigma = L_chol * L_chol.transpose()
30  */
31  Eigen::MatrixXd L_chol_;
32 
33  /**
34  * Dimensionality of distribution.
35  */
36  const int dimension_;
37 
38  /**
39  * Raise a domain exception if the specified vector contains
40  * not-a-number values.
41  *
42  * @param[in] mu Mean vector.
43  * @throw std::domain_error If the mean vector contains NaN
44  * values or does not match this distribution's dimensionality.
45  */
46  void validate_mean(const char* function,
47  const Eigen::VectorXd& mu) {
48  stan::math::check_not_nan(function, "Mean vector", mu);
50  "Dimension of input vector", mu.size(),
51  "Dimension of current vector", dimension_);
52  }
53 
54  /**
55  * Raise a domain exception if the specified matrix is not
56  * square, not lower triangular, or contains not-a-number
57  * values.
58  *
59  * <b>Warning:</b> This function does not check that the
60  * Cholesky factor is positive definite.
61  *
62  * @param[in] L_chol Cholesky factor for covariance matrix.
63  * @throw std::domain_error If the specified matrix is not
64  * square, is not lower triangular, if its size does not match
65  * the dimensionality of this approximation, or if it contains
66  * not-a-number values.
67  */
68  void validate_cholesky_factor(const char* function,
69  const Eigen::MatrixXd& L_chol) {
70  stan::math::check_square(function, "Cholesky factor", L_chol);
72  "Cholesky factor", L_chol);
74  "Dimension of mean vector", dimension_,
75  "Dimension of Cholesky factor", L_chol.rows());
76  stan::math::check_not_nan(function, "Cholesky factor", L_chol);
77  }
78 
79 
80  public:
81  /**
82  * Construct a variational distribution of the specified
83  * dimensionality with a zero mean and Cholesky factor of a zero
84  * covariance matrix.
85  *
86  * @param[in] dimension Dimensionality of distribution.
87  */
88  explicit normal_fullrank(size_t dimension)
89  : mu_(Eigen::VectorXd::Zero(dimension)),
90  L_chol_(Eigen::MatrixXd::Zero(dimension, dimension)),
91  dimension_(dimension) {
92  }
93 
94 
95  /**
96  * Construct a variational distribution with specified mean vector
97  * and Cholesky factor for identity covariance.
98  *
99  * @param[in] cont_params Mean vector.
100  */
101  explicit normal_fullrank(const Eigen::VectorXd& cont_params)
102  : mu_(cont_params),
103  L_chol_(Eigen::MatrixXd::Identity(cont_params.size(),
104  cont_params.size())),
105  dimension_(cont_params.size()) {
106  }
107 
108  /**
109  * Construct a variational distribution with specified mean and
110  * Cholesky factor for covariance.
111  *
112  * <b>Warning</b>: Positive-definiteness is not enforced for the
113  * Cholesky factor.
114  *
115  * @param[in] mu Mean vector.
116  * @param[in] L_chol Cholesky factor of covariance.
117  * @throws std::domain_error If the Cholesky factor is not
118  * square or not lower triangular, if the mean and Cholesky factor
119  * have different dimensionality, or if any of the elements is
120  * not-a-number.
121  */
122  normal_fullrank(const Eigen::VectorXd& mu,
123  const Eigen::MatrixXd& L_chol)
124  : mu_(mu), L_chol_(L_chol), dimension_(mu.size()) {
125  static const char* function = "stan::variational::normal_fullrank";
126  validate_mean(function, mu);
127  validate_cholesky_factor(function, L_chol);
128  }
129 
130  /**
131  * Return the dimensionality of the approximation.
132  */
133  int dimension() const { return dimension_; }
134 
135  /**
136  * Return the mean vector.
137  */
138  const Eigen::VectorXd& mu() const { return mu_; }
139 
140  /**
141  * Return the Cholesky factor of the covariance matrix.
142  */
143  const Eigen::MatrixXd& L_chol() const { return L_chol_; }
144 
145  /**
146  * Set the mean vector to the specified value.
147  *
148  * @param[in] mu Mean vector.
149  * @throw std::domain_error If the size of the specified mean
150  * vector does not match the stored dimension of this approximation.
151  */
152  void set_mu(const Eigen::VectorXd& mu) {
153  static const char* function = "stan::variational::set_mu";
154  validate_mean(function, mu);
155  mu_ = mu;
156  }
157 
158  /**
159  * Set the Cholesky factor to the specified value.
160  *
161  * @param[in] L_chol Cholesky factor of covariance matrix.
162  * @throw std::domain_error If the specified matrix is not
163  * square, is not lower triangular, if its size does not match
164  * the dimensionality of this approximation, or if it contains
165  * not-a-number values.
166  */
167  void set_L_chol(const Eigen::MatrixXd& L_chol) {
168  static const char* function = "stan::variational::set_L_chol";
169  validate_cholesky_factor(function, L_chol);
170  L_chol_ = L_chol;
171  }
172 
173  /**
174  * Set the mean vector and Cholesky factor for the covariance
175  * matrix to zero.
176  */
177  void set_to_zero() {
178  mu_ = Eigen::VectorXd::Zero(dimension_);
179  L_chol_ = Eigen::MatrixXd::Zero(dimension_, dimension_);
180  }
181 
182  /**
183  * Return a new full rank approximation resulting from squaring
184  * the entries in the mean and Cholesky factor for the
185  * covariance matrix. The new approximation does not hold
186  * any references to this approximation.
187  */
189  return normal_fullrank(Eigen::VectorXd(mu_.array().square()),
190  Eigen::MatrixXd(L_chol_.array().square()));
191  }
192 
193  /**
194  * Return a new full rank approximation resulting from taking
195  * the square root of the entries in the mean and Cholesky
196  * factor for the covariance matrix. The new approximation does
197  * not hold any references to this approximation.
198  *
199  * <b>Warning:</b> No checks are carried out to ensure the
200  * entries are non-negative before taking square roots, so
201  * not-a-number values may result.
202  */
204  return normal_fullrank(Eigen::VectorXd(mu_.array().sqrt()),
205  Eigen::MatrixXd(L_chol_.array().sqrt()));
206  }
207 
208  /**
209  * Return this approximation after setting its mean vector and
210  * Cholesky factor for covariance to the values given by the
211  * specified approximation.
212  *
213  * @param[in] rhs Approximation from which to gather the mean and
214  * covariance.
215  * @return This approximation after assignment.
216  * @throw std::domain_error If the dimensionality of the specified
217  * approximation does not match this approximation's dimensionality.
218  */
220  static const char* function =
221  "stan::variational::normal_fullrank::operator=";
223  "Dimension of lhs", dimension_,
224  "Dimension of rhs", rhs.dimension());
225  mu_ = rhs.mu();
226  L_chol_ = rhs.L_chol();
227  return *this;
228  }
229 
230  /**
231  * Add the mean and Cholesky factor of the covariance matrix of
232  * the specified approximation to this approximation.
233  *
234  * @param[in] rhs Approximation from which to gather the mean and
235  * covariance.
236  * @return This approximation after adding the specified
237  * approximation.
238  * @throw std::domain_error If the dimensionality of the specified
239  * approximation does not match this approximation's dimensionality.
240  */
242  static const char* function =
243  "stan::variational::normal_fullrank::operator+=";
245  "Dimension of lhs", dimension_,
246  "Dimension of rhs", rhs.dimension());
247  mu_ += rhs.mu();
248  L_chol_ += rhs.L_chol();
249  return *this;
250  }
251 
252  /**
253  * Return this approximation after elementwise division by the
254  * specified approximation's mean and Cholesky factor for
255  * covariance.
256  *
257  * @param[in] rhs Approximation from which to gather the mean and
258  * covariance.
259  * @return This approximation after elementwise division by the
260  * specified approximation.
261  * @throw std::domain_error If the dimensionality of the specified
262  * approximation does not match this approximation's dimensionality.
263  */
264  inline
266  static const char* function =
267  "stan::variational::normal_fullrank::operator/=";
268 
270  "Dimension of lhs", dimension_,
271  "Dimension of rhs", rhs.dimension());
272 
273  mu_.array() /= rhs.mu().array();
274  L_chol_.array() /= rhs.L_chol().array();
275  return *this;
276  }
277 
278  /**
279  * Return this approximation after adding the specified scalar
280  * to each entry in the mean and cholesky factor for covariance.
281  *
282  * <b>Warning:</b> No finiteness check is made on the scalar, so
283  * it may introduce NaNs.
284  *
285  * @param[in] scalar Scalar to add.
286  * @return This approximation after elementwise addition of the
287  * specified scalar.
288  */
289  normal_fullrank& operator+=(double scalar) {
290  mu_.array() += scalar;
291  L_chol_.array() += scalar;
292  return *this;
293  }
294 
295  /**
296  * Return this approximation after multiplying by the specified
297  * scalar to each entry in the mean and cholesky factor for
298  * covariance.
299  *
300  * <b>Warning:</b> No finiteness check is made on the scalar, so
301  * it may introduce NaNs.
302  *
303  * @param[in] scalar Scalar to add.
304  * @return This approximation after elementwise addition of the
305  * specified scalar.
306  */
307  normal_fullrank& operator*=(double scalar) {
308  mu_ *= scalar;
309  L_chol_ *= scalar;
310  return *this;
311  }
312 
313  /**
314  * Returns the mean vector for this approximation.
315  *
316  * See: <code>mu()</code>.
317  *
318  * @return Mean vector for this approximation.
319  */
320  const Eigen::VectorXd& mean() const {
321  return mu();
322  }
323 
324  /**
325  * Return the entropy of this approximation.
326  *
327  * <p>The entropy is defined by
328  * 0.5 * dim * (1+log2pi) + 0.5 * log det (L^T L)
329  * = 0.5 * dim * (1+log2pi) + sum(log(abs(diag(L)))).
330  *
331  * @return Entropy of this approximation
332  */
333  double entropy() const {
334  static double mult = 0.5 * (1.0 + stan::math::LOG_TWO_PI);
335  double result = mult * dimension_;
336  for (int d = 0; d < dimension_; ++d) {
337  double tmp = fabs(L_chol_(d, d));
338  if (tmp != 0.0) result += log(tmp);
339  }
340  return result;
341  }
342 
343  /**
344  * Return the transform of the sepcified vector using the
345  * Cholesky factor and mean vector.
346  *
347  * The transform is defined by
348  * S^{-1}(eta) = L_chol * eta + mu.
349  *
350  * @param[in] eta Vector to transform.
351  * @throw std::domain_error If the specified vector's size does
352  * not match the dimensionality of this approximation.
353  * @return Transformed vector.
354  */
355  Eigen::VectorXd transform(const Eigen::VectorXd& eta) const {
356  static const char* function =
357  "stan::variational::normal_fullrank::transform";
359  "Dimension of input vector", eta.size(),
360  "Dimension of mean vector", dimension_);
361  stan::math::check_not_nan(function, "Input vector", eta);
362 
363  return (L_chol_ * eta) + mu_;
364  }
365 
366  /**
367  * Set the specified vector to a draw from this variational
368  * approximation using the specified random number generator.
369  *
370  * @tparam BaseRNG Class of random number generator.
371  * @param[in,out] rng Base random number generator.
372  * @param[out] eta Random draw.
373  * @return A sample from the variational distribution.
374  */
375  template <class BaseRNG>
376  void sample(BaseRNG& rng, Eigen::VectorXd& eta) const {
377  for (int d = 0; d < dimension_; ++d)
378  eta(d) = stan::math::normal_rng(0, 1, rng);
379  eta = transform(eta);
380  }
381 
382  /**
383  * Calculates the "blackbox" gradient with respect to BOTH the
384  * location vector (mu) and the cholesky factor of the scale
385  * matrix (L_chol) in parallel. It uses the same gradient
386  * computed from a set of Monte Carlo samples
387  *
388  * @tparam M Model class.
389  * @tparam BaseRNG Class of base random number generator.
390  * @param[in] elbo_grad Approximation to store "blackbox" gradient.
391  * @param[in] m Model.
392  * @param[in] cont_params Continuous parameters.
393  * @param[in] n_monte_carlo_grad Sample size for gradient computation.
394  * @param[in,out] rng Random number generator.
395  * @param[in,out] logger logger for messages
396  * @throw std::domain_error If the number of divergent
397  * iterations exceeds its specified bounds.
398  */
399  template <class M, class BaseRNG>
400  void calc_grad(normal_fullrank& elbo_grad,
401  M& m,
402  Eigen::VectorXd& cont_params,
403  int n_monte_carlo_grad,
404  BaseRNG& rng,
405  callbacks::logger& logger)
406  const {
407  static const char* function =
408  "stan::variational::normal_fullrank::calc_grad";
410  "Dimension of elbo_grad", elbo_grad.dimension(),
411  "Dimension of variational q", dimension_);
413  "Dimension of variational q", dimension_,
414  "Dimension of variables in model", cont_params.size());
415 
416  Eigen::VectorXd mu_grad = Eigen::VectorXd::Zero(dimension_);
417  Eigen::MatrixXd L_grad = Eigen::MatrixXd::Zero(dimension_, dimension_);
418  double tmp_lp = 0.0;
419  Eigen::VectorXd tmp_mu_grad = Eigen::VectorXd::Zero(dimension_);
420  Eigen::VectorXd eta = Eigen::VectorXd::Zero(dimension_);
421  Eigen::VectorXd zeta = Eigen::VectorXd::Zero(dimension_);
422 
423  // Naive Monte Carlo integration
424  static const int n_retries = 10;
425  for (int i = 0, n_monte_carlo_drop = 0; i < n_monte_carlo_grad; ) {
426  // Draw from standard normal and transform to real-coordinate space
427  for (int d = 0; d < dimension_; ++d) {
428  eta(d) = stan::math::normal_rng(0, 1, rng);
429  }
430  zeta = transform(eta);
431  try {
432  std::stringstream ss;
433  stan::model::gradient(m, zeta, tmp_lp, tmp_mu_grad, &ss);
434  if (ss.str().length() > 0)
435  logger.info(ss);
436  stan::math::check_finite(function, "Gradient of mu", tmp_mu_grad);
437 
438  mu_grad += tmp_mu_grad;
439  for (int ii = 0; ii < dimension_; ++ii) {
440  for (int jj = 0; jj <= ii; ++jj) {
441  L_grad(ii, jj) += tmp_mu_grad(ii) * eta(jj);
442  }
443  }
444  ++i;
445  } catch (const std::exception& e) {
446  ++n_monte_carlo_drop;
447  if (n_monte_carlo_drop >= n_retries * n_monte_carlo_grad) {
448  const char* name = "The number of dropped evaluations";
449  const char* msg1 = "has reached its maximum amount (";
450  int y = n_retries * n_monte_carlo_grad;
451  const char* msg2 = "). Your model may be either severely "
452  "ill-conditioned or misspecified.";
453  stan::math::domain_error(function, name, y, msg1, msg2);
454  }
455  }
456  }
457  mu_grad /= static_cast<double>(n_monte_carlo_grad);
458  L_grad /= static_cast<double>(n_monte_carlo_grad);
459 
460  // Add gradient of entropy term
461  L_grad.diagonal().array() += L_chol_.diagonal().array().inverse();
462 
463  elbo_grad.set_mu(mu_grad);
464  elbo_grad.set_L_chol(L_grad);
465  }
466  };
467 
468  /**
469  * Return a new approximation resulting from adding the mean and
470  * covariance matrix Cholesky factor of the specified
471  * approximations.
472  *
473  * @param[in] lhs First approximation.
474  * @param[in] rhs Second approximation.
475  * @return Sum of the specified approximations.
476  * @throw std::domain_error If the dimensionalities do not match.
477  */
478  inline
480  return lhs += rhs;
481  }
482 
483  /**
484  * Return a new approximation resulting from elementwise division of
485  * of the first specified approximation by the second.
486  *
487  * @param[in] lhs First approximation.
488  * @param[in] rhs Second approximation.
489  * @return Elementwise division of the specified approximations.
490  * @throw std::domain_error If the dimensionalities do not match.
491  */
492  inline
494  return lhs /= rhs;
495  }
496 
497  /**
498  * Return a new approximation resulting from elementwise addition
499  * of the specified scalar to the mean and Cholesky factor of
500  * covariance entries for the specified approximation.
501  *
502  * @param[in] scalar Scalar value
503  * @param[in] rhs Approximation.
504  * @return Addition of scalar to specified approximation.
505  */
506  inline
508  return rhs += scalar;
509  }
510 
511  /**
512  * Return a new approximation resulting from elementwise
513  * multiplication of the specified scalar to the mean and Cholesky
514  * factor of covariance entries for the specified approximation.
515  *
516  * @param[in] scalar Scalar value
517  * @param[in] rhs Approximation.
518  * @return Multiplication of scalar by the specified approximation.
519  */
520  inline
522  return rhs *= scalar;
523  }
524 
525  }
526 }
527 #endif
const Eigen::VectorXd & mean() const
const XML_Char * name
Definition: expat.h:151
void check_finite(const char *function, const char *name, const T_y &y)
void check_lower_triangular(const char *function, const char *name, const Eigen::Matrix< T_y, Eigen::Dynamic, Eigen::Dynamic > &y)
Eigen::VectorXd transform(const Eigen::VectorXd &eta) const
normal_fullrank & operator/=(const normal_fullrank &rhs)
normal_fullrank & operator+=(const normal_fullrank &rhs)
fvar< T > fabs(const fvar< T > &x)
Definition: fabs.hpp:15
base_family operator/(base_family lhs, const base_family &rhs)
base_family operator*(double scalar, base_family rhs)
void validate_mean(const char *function, const Eigen::VectorXd &mu)
Float_t ss
Definition: plot.C:24
Definition: StanUtils.h:6
::xsd::cxx::tree::exception< char > exception
Definition: Database.h:225
Float_t tmp
Definition: plot.C:36
normal_fullrank(const Eigen::VectorXd &cont_params)
void gradient(const M &model, const Eigen::Matrix< double, Eigen::Dynamic, 1 > &x, double &f, Eigen::Matrix< double, Eigen::Dynamic, 1 > &grad_f, std::ostream *msgs=0)
Definition: gradient.hpp:15
void check_size_match(const char *function, const char *name_i, T_size1 i, const char *name_j, T_size2 j)
const Eigen::VectorXd & mu() const
const double LOG_TWO_PI
Definition: constants.hpp:167
VectorBuilder< true, double, T_loc, T_scale >::type normal_rng(const T_loc &mu, const T_scale &sigma, RNG &rng)
Definition: normal_rng.hpp:35
base_family operator+(base_family lhs, const base_family &rhs)
void validate_cholesky_factor(const char *function, const Eigen::MatrixXd &L_chol)
void check_not_nan(const char *function, const char *name, const T_y &y)
Float_t d
Definition: plot.C:236
void calc_grad(normal_fullrank &elbo_grad, M &m, Eigen::VectorXd &cont_params, int n_monte_carlo_grad, BaseRNG &rng, callbacks::logger &logger) const
void sample(BaseRNG &rng, Eigen::VectorXd &eta) const
normal_fullrank & operator=(const normal_fullrank &rhs)
void set_mu(const Eigen::VectorXd &mu)
void domain_error(const char *function, const char *name, const T &y, const char *msg1, const char *msg2)
void check_square(const char *function, const char *name, const Eigen::Matrix< T_y, Eigen::Dynamic, Eigen::Dynamic > &y)
virtual void info(const std::string &message)
Definition: logger.hpp:47
void set_L_chol(const Eigen::MatrixXd &L_chol)
void Zero()
normal_fullrank(const Eigen::VectorXd &mu, const Eigen::MatrixXd &L_chol)
normal_fullrank & operator+=(double scalar)
const Eigen::MatrixXd & L_chol() const
Float_t e
Definition: plot.C:35
normal_fullrank & operator*=(double scalar)