stan_csv_reader.hpp
Go to the documentation of this file.
1 #ifndef STAN_IO_STAN_CSV_READER_HPP
2 #define STAN_IO_STAN_CSV_READER_HPP
3 
4 #include <boost/algorithm/string.hpp>
5 #include <boost/lexical_cast.hpp>
6 #include <Eigen/Dense>
7 #include <istream>
8 #include <iostream>
9 #include <sstream>
10 #include <string>
11 
12 namespace stan {
13  namespace io {
14 
15  // FIXME: should consolidate with the options from
16  // the command line in stan::lang
21 
25  size_t chain_id;
26  size_t seed;
28  size_t num_samples;
29  size_t num_warmup;
31  size_t thin;
35 
37  : stan_version_major(0), stan_version_minor(0), stan_version_patch(0),
38  model(""), data(""), init(""),
39  chain_id(1), seed(0), random_seed(false),
40  num_samples(0), num_warmup(0), save_warmup(false), thin(0),
41  append_samples(false),
42  algorithm(""), engine("") {}
43  };
44 
46  double step_size;
47  Eigen::MatrixXd metric;
48 
50  : step_size(0), metric(0, 0) {}
51  };
52 
53  struct stan_csv_timing {
54  double warmup;
55  double sampling;
56 
58  : warmup(0), sampling(0) {}
59  };
60 
61  struct stan_csv {
63  Eigen::Matrix<std::string, Eigen::Dynamic, 1> header;
65  Eigen::MatrixXd samples;
67  };
68 
69  /**
70  * Reads from a Stan output csv file.
71  */
73  public:
76 
77  static bool read_metadata(std::istream& in, stan_csv_metadata& metadata,
78  std::ostream* out) {
79  std::stringstream ss;
81 
82  if (in.peek() != '#')
83  return false;
84  while (in.peek() == '#') {
85  std::getline(in, line);
86  ss << line << '\n';
87  }
88  ss.seekg(std::ios_base::beg);
89 
90  char comment;
91  std::string lhs;
92 
95 
96  while (ss.good()) {
97  ss >> comment;
98  std::getline(ss, lhs);
99 
100  size_t equal = lhs.find("=");
101  if (equal != std::string::npos) {
102  name = lhs.substr(0, equal);
103  boost::trim(name);
104  value = lhs.substr(equal + 1, lhs.size());
105  boost::trim(value);
106  boost::replace_first(value, " (Default)", "");
107  } else {
108  if (lhs.compare(" data") == 0) {
109  ss >> comment;
110  std::getline(ss, lhs);
111 
112  size_t equal = lhs.find("=");
113  if (equal != std::string::npos) {
114  name = lhs.substr(0, equal);
115  boost::trim(name);
116  value = lhs.substr(equal + 2, lhs.size());
117  boost::replace_first(value, " (Default)", "");
118  }
119 
120  if (name.compare("file") == 0)
121  metadata.data = value;
122 
123  continue;
124  }
125  }
126 
127  if (name.compare("stan_version_major") == 0) {
128  metadata.stan_version_major = boost::lexical_cast<int>(value);
129  } else if (name.compare("stan_version_minor") == 0) {
130  metadata.stan_version_minor = boost::lexical_cast<int>(value);
131  } else if (name.compare("stan_version_patch") == 0) {
132  metadata.stan_version_patch = boost::lexical_cast<int>(value);
133  } else if (name.compare("model") == 0) {
134  metadata.model = value;
135  } else if (name.compare("num_samples") == 0) {
136  metadata.num_samples = boost::lexical_cast<int>(value);
137  } else if (name.compare("num_warmup") == 0) {
138  metadata.num_warmup = boost::lexical_cast<int>(value);
139  } else if (name.compare("save_warmup") == 0) {
140  metadata.save_warmup = boost::lexical_cast<bool>(value);
141  } else if (name.compare("thin") == 0) {
142  metadata.thin = boost::lexical_cast<int>(value);
143  } else if (name.compare("chain_id") == 0) {
144  metadata.chain_id = boost::lexical_cast<int>(value);
145  } else if (name.compare("init") == 0) {
146  metadata.init = value;
147  boost::trim(metadata.init);
148  } else if (name.compare("seed") == 0) {
149  metadata.seed = boost::lexical_cast<unsigned int>(value);
150  metadata.random_seed = false;
151  } else if (name.compare("append_samples") == 0) {
152  metadata.append_samples = boost::lexical_cast<bool>(value);
153  } else if (name.compare("algorithm") == 0) {
154  metadata.algorithm = value;
155  } else if (name.compare("engine") == 0) {
156  metadata.engine = value;
157  }
158  }
159  if (ss.good() == true)
160  return false;
161 
162  return true;
163  } // read_metadata
164 
165  static bool
166  read_header(std::istream& in,
167  Eigen::Matrix<std::string, Eigen::Dynamic, 1>& header,
168  std::ostream* out) {
170 
171  if (in.peek() != 'l')
172  return false;
173 
174  std::getline(in, line);
175  std::stringstream ss(line);
176 
177  header.resize(std::count(line.begin(), line.end(), ',') + 1);
178  int idx = 0;
179  while (ss.good()) {
181  std::getline(ss, token, ',');
182  boost::trim(token);
183 
184  int pos = token.find('.');
185  if (pos > 0) {
186  token.replace(pos, 1, "[");
187  std::replace(token.begin(), token.end(), '.', ',');
188  token += "]";
189  }
190  header(idx++) = token;
191  }
192  return true;
193  }
194 
195  static bool read_adaptation(std::istream& in,
196  stan_csv_adaptation& adaptation,
197  std::ostream* out) {
198  std::stringstream ss;
200  int lines = 0;
201 
202  if (in.peek() != '#' || in.good() == false)
203  return false;
204 
205  while (in.peek() == '#') {
206  std::getline(in, line);
207  ss << line << std::endl;
208  lines++;
209  }
210  ss.seekg(std::ios_base::beg);
211 
212  if (lines < 4)
213  return false;
214 
215  char comment; // Buffer for comment indicator, #
216 
217  // Skip first two lines
218  std::getline(ss, line);
219 
220  // Stepsize
221  std::getline(ss, line, '=');
222  boost::trim(line);
223  ss >> adaptation.step_size;
224 
225  // Metric parameters
226  std::getline(ss, line);
227  std::getline(ss, line);
228  std::getline(ss, line);
229 
230  int rows = lines - 3;
231  int cols = std::count(line.begin(), line.end(), ',') + 1;
232  adaptation.metric.resize(rows, cols);
233 
234  for (int row = 0; row < rows; row++) {
235  std::stringstream line_ss;
236  line_ss.str(line);
237  line_ss >> comment;
238 
239  for (int col = 0; col < cols; col++) {
241  std::getline(line_ss, token, ',');
242  boost::trim(token);
243  adaptation.metric(row, col) = boost::lexical_cast<double>(token);
244  }
245  std::getline(ss, line); // Read in next line
246  }
247 
248  if (ss.good())
249  return false;
250  else
251  return true;
252  }
253 
254  static bool read_samples(std::istream& in, Eigen::MatrixXd& samples,
255  stan_csv_timing& timing, std::ostream* out) {
256  std::stringstream ss;
258 
259  int rows = 0;
260  int cols = -1;
261 
262  if (in.peek() == '#' || in.good() == false)
263  return false;
264 
265  while (in.good()) {
266  bool comment_line = (in.peek() == '#');
267  bool empty_line = (in.peek() == '\n');
268 
269  std::getline(in, line);
270 
271  if (empty_line)
272  continue;
273  if (!line.length())
274  break;
275 
276  if (comment_line) {
277  if (line.find("(Warm-up)") != std::string::npos) {
278  int left = 17;
279  int right = line.find(" seconds");
280  timing.warmup
281  += boost::lexical_cast<double>(line.substr(left, right - left));
282  } else if (line.find("(Sampling)") != std::string::npos) {
283  int left = 17;
284  int right = line.find(" seconds");
285  timing.sampling
286  += boost::lexical_cast<double>(line.substr(left, right - left));
287  }
288  } else {
289  ss << line << '\n';
290  int current_cols = std::count(line.begin(), line.end(), ',') + 1;
291  if (cols == -1) {
292  cols = current_cols;
293  } else if (cols != current_cols) {
294  if (out)
295  *out << "Error: expected " << cols << " columns, but found "
296  << current_cols << " instead for row " << rows + 1
297  << std::endl;
298  return false;
299  }
300  rows++;
301  }
302 
303  in.peek();
304  }
305 
306  ss.seekg(std::ios_base::beg);
307 
308  if (rows > 0) {
309  samples.resize(rows, cols);
310  for (int row = 0; row < rows; row++) {
311  std::getline(ss, line);
312  std::stringstream ls(line);
313  for (int col = 0; col < cols; col++) {
314  std::getline(ls, line, ',');
315  boost::trim(line);
316  samples(row, col) = boost::lexical_cast<double>(line);
317  }
318  }
319  }
320  return true;
321  }
322 
323  /**
324  * Parses the file.
325  *
326  * @param[in] in input stream to parse
327  * @param[out] out output stream to send messages
328  */
329  static stan_csv parse(std::istream& in, std::ostream* out) {
330  stan_csv data;
331 
332  if (!read_metadata(in, data.metadata, out)) {
333  if (out)
334  *out << "Warning: non-fatal error reading metadata" << std::endl;
335  }
336 
337  if (!read_header(in, data.header, out)) {
338  if (out)
339  *out << "Error: error reading header" << std::endl;
341  ("Error with header of input file in parse");
342  }
343 
344  if (!read_adaptation(in, data.adaptation, out)) {
345  if (out)
346  *out << "Warning: non-fatal error reading adapation data"
347  << std::endl;
348  }
349 
350  data.timing.warmup = 0;
351  data.timing.sampling = 0;
352 
353  if (!read_samples(in, data.samples, data.timing, out)) {
354  if (out)
355  *out << "Warning: non-fatal error reading samples" << std::endl;
356  }
357 
358  return data;
359  }
360  };
361 
362  } // io
363 
364 } // stan
365 
366 #endif
const XML_Char * name
Definition: expat.h:151
constexpr auto const & right(const_AssnsIter< L, R, D, Dir > const &a, const_AssnsIter< L, R, D, Dir > const &b)
Definition: AssnsIter.h:112
Eigen::MatrixXd samples
static bool read_metadata(std::istream &in, stan_csv_metadata &metadata, std::ostream *out)
static bool read_adaptation(std::istream &in, stan_csv_adaptation &adaptation, std::ostream *out)
bool equal(T *first, T *second)
Float_t ss
Definition: plot.C:24
string trim(string in)
Definition: rootgINukeVal.C:65
stan_csv_adaptation adaptation
const XML_Char int const XML_Char * value
Definition: expat.h:331
const int cols[3]
Int_t col[ntarg]
Definition: Style.C:29
void invalid_argument(const char *function, const char *name, const T &y, const char *msg1, const char *msg2)
constexpr auto const & left(const_AssnsIter< L, R, D, Dir > const &a, const_AssnsIter< L, R, D, Dir > const &b)
Definition: AssnsIter.h:104
ifstream in
Definition: comparison.C:7
static bool read_header(std::istream &in, Eigen::Matrix< std::string, Eigen::Dynamic, 1 > &header, std::ostream *out)
static stan_csv parse(std::istream &in, std::ostream *out)
def ls(target="")
Definition: g4zmq.py:69
stan_csv_metadata metadata
::xsd::cxx::tree::token< char, normalized_string > token
Definition: Database.h:156
static bool read_samples(std::istream &in, Eigen::MatrixXd &samples, stan_csv_timing &timing, std::ostream *out)
Eigen::Matrix< std::string, Eigen::Dynamic, 1 > header
stan_csv_timing timing
enum BeamMode string