MCMCSamples.cxx
Go to the documentation of this file.
1 #include <limits>
2 #include <string>
3 #include <vector>
4 
5 #include "TGraph.h"
6 #include "TH1D.h"
7 #include "TParameter.h"
8 
9 #include "CAFAna/Core/IFitVar.h"
10 #include "CAFAna/Core/ISyst.h"
11 #include "CAFAna/Core/Utilities.h"
12 
15 #include "CAFAna/Fit/MCMCSamples.h"
16 
17 #include "OscLib/OscCalcAnalytic.h"
18 
19 #include "Utilities/func/MathUtil.h"
20 
21 // an internal tool only
22 namespace
23 {
24  class BranchStatusResetter
25  {
26  public:
27  BranchStatusResetter(TTree * tree)
28  : fTree(tree)
29  {}
30 
31  ~BranchStatusResetter()
32  {
33  if (fTree)
34  fTree->SetBranchStatus("*", false);
35  }
36  private:
37  TTree * fTree;
38  };
39 }
40 
41 namespace ana
42 {
43  const std::string MCMCSamples::LOGLIKELIHOOD_BRANCH_NAME = "logprob";
44 
45  //----------------------------------------------------------------------
46  MCMCSamples::MCMCSamples(const std::vector<const IFitVar *> &vars,
47  const std::vector<const ana::ISyst *> &systs)
48  : fOffset(0),
49  fVars(vars),
50  fSysts(systs),
51  fBestFitFound(false),
52  fSamples(std::make_unique<TTree>("samples", "MCMC samples"))
53  {}
54 
55  //----------------------------------------------------------------------
57  const std::vector<std::string>& diagBranchNames,
58  const std::vector<const IFitVar *> &vars,
59  const std::vector<const ana::ISyst *> &systs,
60  std::unique_ptr<TTree> &tree,
61  const Hyperparameters &hyperParams,
62  double samplingTime)
63  : fOffset(offset),
64  fDiagBranches(diagBranchNames),
65  fVars(vars),
66  fSysts(systs),
67  fBestFitFound(false),
68  fSamples(std::move(tree)),
69  fHyperparams(hyperParams),
70  fSamplingTime(samplingTime)
71  {
72  // note: SetupTree() takes care of fDiagnosticVals and fEntryVals
73  SetupTree();
74  }
75 
76  //----------------------------------------------------------------------
78  : fOffset(std::move(other.fOffset)),
80  fVars(std::move(other.fVars)),
81  fSysts(std::move(other.fSysts)),
84  fSamples(std::move(other.fSamples)),
85  fEntryLL(std::move(other.fEntryLL)),
90  {
91  // note: SetupTree() takes care of fDiagnosticVals and fEntryVals
92  SetupTree();
93 
94  // almost certainly we don't want to be writing back to whatever file this may have come from ...
95  fSamples->SetDirectory(nullptr);
96  }
97 
98  //----------------------------------------------------------------------
100  {
101  fOffset = std::move(other.fOffset);
102 
103  fVars = std::move(other.fVars);
104  fSysts = std::move(other.fSysts);
105 
106  fBestFitSampleIdx = std::move(other.fBestFitSampleIdx);
107  fBestFitFound = std::move(other.fBestFitFound);
108 
109  fSamples = std::move(other.fSamples);
110 
111  fEntryLL = std::move(other.fEntryLL);
112  fDiagBranches = std::move(other.fDiagBranches);
113 
114  fDiagnosticVals = std::move(other.fDiagnosticVals);
115  fEntryVals = std::move(other.fEntryVals);
116  fHyperparams = std::move(other.fHyperparams);
117  fSamplingTime = std::move(other.fSamplingTime);
118 
119  // note: SetupTree() takes care of fDiagnosticVals and fEntryVals
120  SetupTree();
121 
122  return *this;
123  }
124 
125  //----------------------------------------------------------------------
126  void MCMCSamples::AddSample(const std::vector<double> &sample)
127  {
128 // std::cout << "\nGot sample: ";
129 // for (const auto & s: sample)
130 // std::cout << s << " ";
131 // std::cout << std::endl;
132 // std::cout << "sample size: " << sample.size() << std::endl;
133 // std::cout << " target size = LL (1) + diagVals (" << fDiagnosticVals.size() << ") + entryVals (" << fEntryVals.size() << ")" << std::endl;
134  fEntryLL = sample[0];
135  for (std::size_t idx = 1; idx < fOffset && idx < sample.size(); idx++)
136  {
137  assert(idx-1 < fDiagnosticVals.size());
138  fDiagnosticVals[idx - 1] = sample[idx];
139  }
140  for (std::size_t targetIdx = 0, sourceIdx = fOffset; sourceIdx < sample.size(); targetIdx++, sourceIdx++)
141  {
142  assert(targetIdx < fEntryVals.size());
143  double val = sample[sourceIdx];
144  // if it's a Var, allow it to rewrite the value if it wants to
145  // (e.g., dcp is pinned back to (0, 2pi))
146  if (targetIdx < fVars.size())
147  {
148  static osc::OscCalcAnalytic calc;
149  fVars[targetIdx]->SetValue(&calc, val);
150  val = fVars[targetIdx]->GetValue(&calc);
151  }
152  fEntryVals[targetIdx] = val;
153  }
154  fSamples->Fill();
155 // fSamples->Scan("*");
156  }
157 
158  //----------------------------------------------------------------------
160  {
161  // if we don't have any samples at all, then just swap
162  if (!fSamples || (fDiagBranches.size() + fVars.size() + fSysts.size()) == 0)
163  {
164  this->fSamples.reset(other.fSamples->CloneTree());
165  this->fDiagBranches = other.fDiagBranches;
166  this->fVars = other.fVars;
167  this->fSysts = other.fSysts;
168  //SetupTree();
169 
170  // make sure we extract this guy out of his old directory, if any,
171  // otherwise he wants to write back to that file
172  this->fSamples->LoadBaskets();
173  this->fSamples->SetDirectory(nullptr);
174 
175  return;
176  }
177 
178  // first, check that the branches (etc.) all match.
179  // if they don't, throw an exception
180  bool branchesSame = true;
181  branchesSame = branchesSame && fDiagBranches == other.fDiagBranches;
182  branchesSame = branchesSame && fVars == other.fVars;
183  branchesSame = branchesSame && fSysts == other.fSysts;
184  if (!branchesSame)
185  throw std::runtime_error("MCMCSamples::AdoptSamples(): branches are not the same!");
186 
187 
188  // then, take the entries from its TTree and and them to ours using TTree::Merge().
189  // clear the other tree once done.
190  BranchStatusResetter bsr(fSamples.get()); // turn all branches off when done
191  fSamples->SetBranchStatus("*", true);
192  other.fSamples->SetBranchStatus("*", true);
193  TList otherTreeList;
194  otherTreeList.SetOwner(false);
195  otherTreeList.Add(other.fSamples.get());
196  fSamples->Merge(&otherTreeList);
197 
198  otherTreeList.Clear();
199  other.fSamples.reset(nullptr);
200 
201  // clear the rest of the other MCMCSamples so it isn't left in an intermediate state
202  other.fDiagBranches.clear();
203  other.fVars.clear();
204  other.fSysts.clear();
205  other.fOffset = 0;
206  other.fBestFitSampleIdx = 0;
207  other.fBestFitFound = false;
208  other.fEntryLL = 0;
209  other.fDiagnosticVals.clear();
210  other.fEntryVals.clear();
211 
212  // finally, the best fit point isn't necessarily the same any more, so force recalculation the next time it's needed
213  fBestFitFound = false;
214  }
215 
216  //----------------------------------------------------------------------
217  std::size_t MCMCSamples::BestFitSampleIdx() const
218  {
219  if (fBestFitFound)
220  return fBestFitSampleIdx;
221 
222  double maxLL = -std::numeric_limits<double>::infinity();
223  auto branch = fSamples->GetBranch(MCMCSamples::LOGLIKELIHOOD_BRANCH_NAME.c_str());
224  branch->SetStatus(true);
225  for (int idx = 0; idx < fSamples->GetEntries(); idx++)
226  {
227  branch->GetEntry(idx);
228  if (fEntryLL > maxLL)
229  {
231  maxLL = fEntryLL;
232  fBestFitFound = true;
233  }
234  }
235  return fBestFitSampleIdx;
236  }
237 
238  //----------------------------------------------------------------------
239  std::size_t MCMCSamples::DiagOffset(const std::string &diagName) const
240  {
241  auto itr = std::find(fDiagBranches.begin(), fDiagBranches.end(), diagName);
242  assert(itr != fDiagBranches.end() && ("Unknown diagnostic branch: " + diagName).c_str());
243  return std::distance(fDiagBranches.begin(), itr);
244  }
245 
246 
247  //----------------------------------------------------------------------
248  std::unique_ptr<MCMCSamples> MCMCSamples::LoadFrom(TDirectory *dir,
249  const std::string& name)
250  {
251  dir = dir->GetDirectory(name.c_str()); // switch to subdir
252  assert(dir);
253  DontAddDirectory guard;
254 
255  TObjString *tag = (TObjString *) dir->Get("type");
256  assert(tag);
257  assert(tag->GetString() == "MCMCSamples");
258 
259  auto offset = dynamic_cast<TParameter<int>*>(dir->Get("offset"));
260 
261  auto diagBranchNames = dynamic_cast<TList*>(dir->Get("diagBranchNames"));
262  std::vector<std::string> diagBranches;
263  for (const TObject * brNameObj : *diagBranchNames)
264  {
265  auto str = dynamic_cast<const TObjString*>(brNameObj);
266  if (str)
267  diagBranches.emplace_back(str->GetName());
268  }
269 
270  auto fitVarNames = dynamic_cast<TList*>(dir->Get("fitVarNames"));
271  std::vector<const IFitVar*> fitVars;
272  for (const TObject * varNmObj : *fitVarNames)
273  {
274  auto str = dynamic_cast<const TObjString*>(varNmObj);
275  if (str)
276  fitVars.emplace_back(Registry<IFitVar>::ShortNameToPtr(str->GetName()));
277  }
278 
279  auto systNames = dynamic_cast<TList*>(dir->Get("systNames"));
280  std::vector<const ISyst*> systs;
281  for (const TObject * systNmObj : *systNames)
282  {
283  auto str = dynamic_cast<const TObjString*>(systNmObj);
284  if (str)
285  systs.emplace_back(Registry<ISyst>::ShortNameToPtr(str->GetName()));
286  }
287 
288  auto samplesPtr = dynamic_cast<TTree*>(dir->Get("samples"));
289  std::unique_ptr<TTree> samples(samplesPtr);
290  if (samples->GetCurrentFile())
291  {
292  samples->SetBranchStatus("*", true);
293  samples->LoadBaskets(); // read the entire TTree into memory
294  samples->SetDirectory(nullptr); // disassociate it from the file it came from so that when the file is closed it persists
295  }
296 
297  // these may not exist (hyperparameters are not merged when hadd'ing samples)
298  double stepSize = std::numeric_limits<double>::signaling_NaN();
299  std::unique_ptr<TMatrixD> invMetric;
300  auto hyperParamsDir = dynamic_cast<TDirectory *>(dir->Get("hyperparams"));
301  if (hyperParamsDir)
302  {
303  if (auto key = hyperParamsDir->FindKey("stepsize"))
304  stepSize = key->ReadObject<TParameter<double>>()->GetVal();
305  if (auto key = hyperParamsDir->FindKey("inv_metric"))
306  invMetric = std::unique_ptr<TMatrixD>(key->ReadObject<TMatrixD>());
307  }
308  Hyperparameters hyperparams{stepSize, std::move(invMetric)};
309 
310  // ditto
311  double samplingTime = std::numeric_limits<double>::signaling_NaN();
312  auto samplingTimeDir = dynamic_cast<TDirectory*>(dir->Get("samplingTime"));
313  if (!samplingTimeDir) // for backwards compatibility. might be stored at top level
314  samplingTimeDir = dir;
315  if (auto key = samplingTimeDir->FindKey("samplingTime"))
316  samplingTime = key->ReadObject<TParameter<double>>()->GetVal();
317 
318  delete dir;
319 
320  return std::unique_ptr<MCMCSamples>(new MCMCSamples(offset->GetVal(),
321  diagBranches,
322  fitVars,
323  systs,
324  samples,
325  hyperparams,
326  samplingTime));
327  }
328 
329  //----------------------------------------------------------------------
331  {
332  assert(std::find(fVars.begin(), fVars.end(), var) != fVars.end());
333  return Bayesian1DMarginal(*this, var, marginalMode);
334  }
335 
336  //----------------------------------------------------------------------
338  {
339  assert(std::find(fSysts.begin(), fSysts.end(), syst) != fSysts.end());
340  return Bayesian1DMarginal(*this, syst, marginalMode);
341  }
342 
343  //----------------------------------------------------------------------
344  template <typename T>
345  double MCMCSamples::MaxValue(const T *var) const
346  {
348  "MCMCSamples::MaxValue() can only be used with IFitVars and ISysts");
349  double max = -std::numeric_limits<double>::infinity();
350  std::size_t offset = VarOffset(var);
351  for (int idx = 0; idx < fSamples->GetEntries(); idx++)
352  {
353  double val = SampleValue(idx, var->ShortName(), offset);
354  if (val > max)
355  max = val;
356  }
357  return max;
358  }
359  // explicit instantiation of the correct types
360  template double MCMCSamples::MaxValue(const IConstrainedFitVar *) const;
361  template double MCMCSamples::MaxValue(const IFitVar *) const;
362  template double MCMCSamples::MaxValue(const ISyst *) const;
363 
364  //----------------------------------------------------------------------
365  template <typename T>
366  double MCMCSamples::MinValue(const T *var) const
367  {
369  "MCMCSamples::MinValue() can only be used with IFitVars and ISysts");
370  double min = std::numeric_limits<double>::infinity();
371  std::size_t offset = VarOffset(var);
372  for (int idx = 0; idx < fSamples->GetEntries(); idx++)
373  {
374  double val = SampleValue(idx, var->ShortName(), offset);
375  if (val < min)
376  min = val;
377  }
378  return min;
379  }
380  // explicit instantiation of the correct types
381  template double MCMCSamples::MinValue(const IConstrainedFitVar *) const;
382  template double MCMCSamples::MinValue(const IFitVar *) const;
383  template double MCMCSamples::MinValue(const ISyst *) const;
384 
385  //----------------------------------------------------------------------
386  void MCMCSamples::ParseDiagnosticBranches(const std::vector<std::string>& names)
387  {
388  std::size_t idx = 1;
389  fDiagnosticVals.resize(names.size() - (fVars.size() + fSysts.size() + 1));
390  auto firstFitVar = fVars.empty() ? fSysts[0]->ShortName() : fVars[0]->ShortName();
391  for (; idx < names.size(); idx++)
392  {
393  if (names[idx] == firstFitVar)
394  break;
395  fDiagBranches.push_back(names[idx]);
396  }
397  assert(idx < names.size() && "Did not find fit variables in Stan output!");
398  assert (names.size() - idx >= fVars.size() + fSysts.size() && "Stan output didn't contain all FitVars & Systs!");
399  }
400 
401  //----------------------------------------------------------------------
402  std::pair<std::size_t, double> MCMCSamples::QuantileLL(double quantile) const
403  {
404  auto LLs = SortedLLs();
405  return LLs[std::size_t((1 - quantile) * LLs.size())];
406  }
407 
408  //----------------------------------------------------------------------
409  std::pair<std::size_t, double> MCMCSamples::QuantileLL(double quantile,
410  std::vector<std::pair<std::size_t, double>> &LLs) const
411  {
412  if (LLs.empty())
413  LLs = SortedLLs(); // I think this should be efficient since std::vector has a move assignment operator
414  return LLs[std::size_t((1 - quantile) * (LLs.size()-1))]; // -1 so that we don't fall off the edge at Q=1.0
415  }
416 
417  //----------------------------------------------------------------------
418  std::map<double, std::pair<std::size_t, double>> MCMCSamples::QuantileLL(const std::vector<double> &quantiles) const
419  {
420  std::map<double, std::pair<std::size_t, double> > ret;
421  std::vector<std::pair<std::size_t, double> > LLs;
422  for (const auto & quantile : quantiles)
423  ret[quantile] = QuantileLL(quantile, LLs);
424 
425  return ret;
426  }
427 
428  //----------------------------------------------------------------------
430  {
431  BranchStatusResetter bsr(fSamples.get()); // turn branches off when done
432  // these diagnostics adapted from CmdStan's diagnose.cpp
433  auto treeDepthBr = fSamples->GetBranch("treedepth__");
434  if (treeDepthBr)
435  {
436  treeDepthBr->SetStatus(true);
437  unsigned int numMax = 0;
438  auto brOffset = DiagOffset(treeDepthBr->GetName());
439  for (int idx = 0; idx < treeDepthBr->GetEntries(); idx++)
440  {
441  treeDepthBr->GetEntry(idx);
442  if (fDiagnosticVals[brOffset] >= cfg.max_depth)
443  numMax++;
444  }
445  if (numMax > 0)
446  {
447  std::cout << numMax << " of " << treeDepthBr->GetEntries() << " ("
448  << std::setprecision(2)
449  << 100 * static_cast<double>(numMax) / treeDepthBr->GetEntries()
450  << "%) transitions hit the maximum treedepth limit of "
451  << cfg.max_depth << ", or 2^" << cfg.max_depth << " leapfrog steps."
452  << " Trajectories that are prematurely terminated due to this"
453  << " limit will result in slow exploration and you should"
454  << " increase the limit to ensure optimal performance."
455  << std::endl << std::endl;
456 
457  }
458  }
459 
460  auto divergentBr = fSamples->GetBranch("divergent__");
461  if (divergentBr)
462  {
463  divergentBr->SetStatus(true);
464  unsigned int numDiv = 0;
465  auto brOffset = DiagOffset(divergentBr->GetName());
466  for (int idx = 0; idx < divergentBr->GetEntries(); idx++)
467  {
468  divergentBr->GetEntry(idx);
469  if (fDiagnosticVals[brOffset] >0)
470  numDiv++;
471  }
472  if (numDiv > 0)
473  std::cout << numDiv << " of " << divergentBr->GetEntries() << " ("
474  << std::setprecision(2)
475  << 100 * static_cast<double>(numDiv) / divergentBr->GetEntries()
476  << "%) transitions ended with a divergence. These divergent"
477  << " transitions indicate that HMC is not fully able to"
478  << " explore the posterior distribution. Try rerunning with"
479  << " adapt_delta set to a larger value and see if the"
480  << " divergences vanish. If increasing adapt delta towards"
481  << " 1 does not remove the divergences then you will likely"
482  << " need to reparameterize your model."
483  << std::endl << std::endl;
484  }
485 
486  auto energyBr = fSamples->GetBranch("energy__");
487  if (energyBr)
488  {
489  double delta_e_sq_mean = 0;
490  double e_mean = 0;
491  double e_var = 0;
492 
493  double e_sample = 0;
494  double e_sample_prev = 0;
495 
496  auto brOffset = DiagOffset(energyBr->GetName());
497  for (int idx = 0; idx < energyBr->GetEntries(); idx++)
498  {
499  energyBr->SetStatus(true);
500  energyBr->GetEntry(idx);
501  e_sample = fDiagnosticVals[brOffset];
502 
503  double delta_e_sq = util::sqr(e_sample - e_sample_prev);
504  double d = delta_e_sq - delta_e_sq_mean;
505  delta_e_sq_mean += d / (idx + 1);
506 
507  d = e_sample - e_mean;
508  e_mean += d / (idx + 2);
509  e_var += d * (e_sample - e_mean);
510 
511  e_sample_prev = e_sample;
512  }
513 
514  e_var /= static_cast<double>(energyBr->GetEntries() - 1);
515 
516  double e_bfmi = delta_e_sq_mean / e_var;
517 
518  const double e_bfmi_threshold = 0.3;
519  if (e_bfmi < e_bfmi_threshold)
520  std::cout << "The E-BFMI, " << e_bfmi << ", is below the nominal"
521  << " threshold of " << e_bfmi_threshold << " which suggests"
522  << " that HMC may have trouble exploring the target"
523  << " distribution. You should consider any"
524  << " reparameterizations if possible."
525  << std::endl << std::endl;
526  }
527 
528  // unfortunately the remaining diagnostics really require a stan::mcmc::chains<> object.
529  // this presents an annoying issue: the TTree storage is necessary for use with RooNDKeysPDF,
530  // and is much more convenient for persistence.
531  // even more unfortunately, these are the most important diagnostics.
532  // Should we build a stan::mcmc::chains from it?
533  // todo: decide how to implement remaining diagnostics
534 /*
535  else if (chains.param_name(i).find("__") == std::string::npos) {
536 
537  double n_eff = chains.effective_sample_size(i);
538  if (n_eff / num_samples < 0.001)
539  bad_n_eff_names.push_back(chains.param_name(i));
540 
541  double split_rhat = chains.split_potential_scale_reduction(i);
542  if (split_rhat > 1.1)
543  bad_rhat_names.push_back(chains.param_name(i));
544  }
545  }
546 
547  if (bad_n_eff_names.size() > 0) {
548  std::cout << "The following parameters had fewer than 0.001 effective"
549  << " samples per transition:" << std::endl;
550  std::cout << " ";
551  for (size_t n = 0; n < bad_n_eff_names.size() - 1; ++n)
552  std::cout << bad_n_eff_names.at(n) << ", ";
553  std::cout << bad_n_eff_names.back() << std::endl;
554 
555  std::cout << "Such low values indicate that the effective sample size"
556  << " estimators may be biased high and actual performance"
557  << " may be substantially lower than quoted."
558  << std::endl << std::endl;
559  }
560 
561  if (bad_rhat_names.size() > 0) {
562  std::cout << "The following parameters had split R-hat less than 1.1:"
563  << std::endl;
564  std::cout << " ";
565  for (size_t n = 0; n < bad_rhat_names.size() - 1; ++n)
566  std::cout << bad_rhat_names.at(n) << ", ";
567  std::cout << bad_rhat_names.back() << std::endl;
568 
569  std::cout << "Such high values indicate incomplete mixing and biased"
570  << "estimation. You should consider regularization your model"
571  << " with additional prior information or looking for a more"
572  << " effective parameterization."
573  << std::endl << std::endl;
574  }
575 */
576 
577  }
578  //----------------------------------------------------------------------
579  MCMCSample MCMCSamples::Sample(std::size_t idx) const
580  {
581  BranchStatusResetter bsr(fSamples.get()); // turn branches off when done
582 
583  fSamples->SetBranchStatus("*", true);
584  fSamples->GetEntry(idx);
586  }
587 
588  //----------------------------------------------------------------------
589  double MCMCSamples::SampleValue(std::size_t rowIdx, const std::string & branchName, std::size_t varIdx) const
590  {
591  BranchStatusResetter bsr(fSamples.get()); // turn branches off when done
592 
593  fSamples->SetBranchStatus(branchName.c_str(), true);
594  auto branch = fSamples->GetBranch(branchName.c_str());
595  branch->SetStatus(true);
596  branch->GetEntry(rowIdx);
597  return fEntryVals[varIdx];
598  }
599 
600  //----------------------------------------------------------------------
601  void MCMCSamples::SampleValues(std::size_t idx,
602  const std::vector<const IFitVar *> &vars,
603  std::map<const IFitVar *, double> &varVals) const
604  {
605  for (const auto &var : vars)
606  varVals[var] = SampleValue(var, idx);
607  }
608 
609  //----------------------------------------------------------------------
610  void MCMCSamples::SampleValues(std::size_t idx,
611  const std::vector<const IFitVar *> &vars,
612  std::map<const IFitVar *, double> &varVals,
613  const std::vector<const ana::ISyst *> &systs,
614  std::map<const ana::ISyst *, double> &systVals) const
615  {
616  SampleValues(idx, vars, varVals);
617  for (const auto &syst : systs)
618  systVals[syst] = SampleValue(syst, idx);
619  }
620 
621 
622  //----------------------------------------------------------------------
623  void MCMCSamples::SaveTo(TDirectory *dir, const std::string& name) const
624  {
625  TDirectory * oldDir = gDirectory;
626  dir = dir->mkdir(name.c_str()); // switch to subdir
627  dir->cd();
628 
629  TObjString("MCMCSamples").Write("type");
630 
631  TParameter<int> offset("offset", fOffset);
632  offset.Write();
633 
634  // note that these are being sorted so that the list ordering is deterministic
635  TList diagBranchNames;
636  diagBranchNames.SetOwner();
637  std::vector<std::string> branchNames(fDiagBranches);
638  std::sort(branchNames.begin(), branchNames.end());
639  for (const auto & brName : branchNames)
640  diagBranchNames.AddLast(new TObjString(brName.c_str()));
641  diagBranchNames.Write("diagBranchNames", TObject::kSingleKey);
642 
643  branchNames.clear();
644  std::transform(fVars.begin(), fVars.end(), std::back_inserter(branchNames),
645  [](const IFitVar* var){return var->ShortName();});
646  TList fitVarNames;
647  fitVarNames.SetOwner();
648  for (const auto & brName : branchNames)
649  fitVarNames.AddLast(new TObjString(brName.c_str()));
650  fitVarNames.Write("fitVarNames", TObject::kSingleKey);
651 
652  branchNames.clear();
653  std::transform(fSysts.begin(), fSysts.end(), std::back_inserter(branchNames),
654  [](const ISyst* s){return s->ShortName();});
655  TList systNames;
656  systNames.SetOwner();
657  for (const auto & brName : branchNames)
658  systNames.AddLast(new TObjString(brName.c_str()));
659  systNames.Write("systNames", TObject::kSingleKey);
660 
661  // re-enable all the branches before writing...
662  fSamples->SetBranchStatus("*", true);
663  fSamples->Write("samples");
664  // don't let it get attached to the file that's about to be closed
665  fSamples->LoadBaskets();
666  fSamples->SetDirectory(nullptr);
667  auto hyperdir = dir->mkdir("hyperparams");
668  hyperdir->cd();
669  TParameter<double> stepSize("stepsize", fHyperparams.stepSize);
670  stepSize.Write();
672  fHyperparams.invMetric->Write("inv_metric");
673  TObjString("Hyperparameters").Write("type"); // so that hadd_cafana can be taught to skip over them
674  dir->cd();
675  hyperdir->Write();
676 
677  dir->cd();
678  auto timedir = dir->mkdir("samplingTime");
679  timedir->cd();
680  TObjString("SamplingTime").Write("type"); // so that hadd_cafana can be taught to skip over them
681  TParameter<double>("samplingTime", fSamplingTime).Write();
682  dir->cd();
683  timedir->Write();
684 
685  dir->Write();
686  delete dir;
687 
688  if (oldDir)
689  oldDir->cd();
690  }
691 
692  //----------------------------------------------------------------------
693  void MCMCSamples::SetNames(const std::vector<std::string> &names)
694  {
695  assert (fOffset == 0 && "MCMCSamples::SetNames() was called after tree was already set up!");
696 
698  SetupTree();
699  }
700 
701  //----------------------------------------------------------------------
703  {
704  auto CreateOrSetAddress = [](TTree * tree, const std::string& brName, double* address)
705  {
706  auto name = brName.c_str();
707  TBranch * br = tree->GetBranch(name);
708  if (br)
709  br->SetAddress(address);
710  else
711  tree->Branch(name, address);
712  return br;
713  };
714 
715  CreateOrSetAddress(fSamples.get(), MCMCSamples::LOGLIKELIHOOD_BRANCH_NAME, &fEntryLL);
716 
717  fDiagnosticVals.clear();
718  fDiagnosticVals.resize(fDiagBranches.size(), std::numeric_limits<double>::signaling_NaN());
719  assert(!fVars.empty() || !fSysts.empty());
720  auto firstFitVar = fVars.empty() ? fSysts[0]->ShortName() : fVars[0]->ShortName();
721  for (std::size_t valIdx = 0; valIdx < fDiagBranches.size(); valIdx++)
722  CreateOrSetAddress(fSamples.get(), fDiagBranches[valIdx], &fDiagnosticVals[valIdx]);
723 
724  fOffset = 1 + fDiagBranches.size(); // + 1 for the LL at the beginning
725 
726  fEntryVals.clear();
727  fEntryVals.resize(fVars.size() + fSysts.size(), std::numeric_limits<double>::signaling_NaN());
728  std::size_t valIdx = 0;
729  for (const auto & var : fVars)
730  CreateOrSetAddress(fSamples.get(), var->ShortName(), &fEntryVals[valIdx++]);
731  for (const auto &syst : fSysts)
732  CreateOrSetAddress(fSamples.get(), syst->ShortName(), &fEntryVals[valIdx++]);
733 
734  // disable the 'autosave' and 'autoflush' mechanisms
735  // since there's nowhere to write to anyway
736  // (this tree is memory-only unless SaveTo()'d).
737  fSamples->SetAutoFlush(0);
738  fSamples->SetAutoSave(0);
739  }
740 
741  //----------------------------------------------------------------------
742  std::vector<std::pair<std::size_t, double>> MCMCSamples::SortedLLs() const
743  {
744  std::vector<std::pair<std::size_t, double>> LLs;
745  LLs.reserve(fSamples->GetEntries());
746  auto branch = fSamples->GetBranch(MCMCSamples::LOGLIKELIHOOD_BRANCH_NAME.c_str());
747  fSamples->SetBranchStatus("*", false);
748  fSamples->SetBranchStatus(MCMCSamples::LOGLIKELIHOOD_BRANCH_NAME.c_str(), true);
749  for (int idx = 0; idx < fSamples->GetEntries(); idx++)
750  {
751  branch->GetEntry(idx);
752  LLs.push_back(std::make_pair(idx, fEntryLL));
753  }
754 
755  std::sort(LLs.begin(), LLs.end(),
756  [](const std::pair<std::size_t, double> & a, const std::pair<std::size_t, double> & b)
757  {
758  return a.second < b.second;
759  }
760  );
761 
762  return std::move(LLs);
763  }
764 
765  //----------------------------------------------------------------------
766  const TTree *MCMCSamples::ToTTree() const
767  {
768  // presumably user doesn't want the tree with some branches missing...
769  fSamples->SetBranchStatus("*", true);
770  return fSamples.get();
771  }
772 
773  //----------------------------------------------------------------------
774  template<typename SystOrVar>
775  TGraph MCMCSamples::Trace(const SystOrVar *fittable) const
776  {
777  static_assert((std::is_base_of_v<IFitVar, SystOrVar> || std::is_same_v<SystOrVar, ISyst>),
778  "MCMCSamples::Trace() can only be used with Systs or Vars");
779 
780  TGraph ret(fSamples->GetEntries());
781  for (int sample = 0; sample < fSamples->GetEntries(); sample++)
782  {
783  double val = this->SampleValue(fittable, sample);
784  ret.SetPoint(sample, sample+1, val);
785  }
786 
787  return ret;
788  }
789  template TGraph MCMCSamples::Trace(const IFitVar *fittable) const;
790  template TGraph MCMCSamples::Trace(const ISyst *fittable) const;
791 
792  //----------------------------------------------------------------------
793  std::size_t MCMCSamples::VarOffset(const IFitVar *var) const
794  {
795  auto itr = std::find(fVars.begin(), fVars.end(), var);
796  assert(itr != fVars.end() && ("Var was not fitted: " + var->ShortName()).c_str());
797  return std::distance(fVars.begin(), itr);
798  }
799 
800  //----------------------------------------------------------------------
801  std::size_t MCMCSamples::VarOffset(const ana::ISyst *syst) const
802  {
803  auto itr = std::find(fSysts.begin(), fSysts.end(), syst);
804  assert(itr != fSysts.end() && ("Syst was not fitted: " + syst->ShortName()).c_str());
805  return fVars.size() + std::distance(fSysts.begin(), itr);
806  }
807 
808 
809 
810 }
std::size_t fBestFitSampleIdx
Definition: MCMCSamples.h:261
const XML_Char * name
Definition: expat.h:151
std::unique_ptr< TMatrixD > invMetric
Definition: MCMCSamples.h:41
std::vector< double > quantiles(TH1D *h)
Definition: absCal.cxx:528
std::size_t VarOffset(const IFitVar *var) const
Where in fEntryVals is the given var?
Cuts and Vars for the 2020 FD DiF Study.
Definition: vars.h:6
double MaxValue(const T *var) const
Max value of this var/syst in the set.
std::vector< SystGroupDef > systs
Definition: syst_header.h:385
const TTree * ToTTree() const
Get a TTree with the MCMC samples in it.
virtual const std::string & ShortName() const final
The name printed out to the screen.
Definition: ISyst.h:27
Configuration parameters for the Stan MCMC fitter, bundled up together for easy storage and parameter...
Definition: StanConfig.h:6
std::vector< const ana::IFitVar * > fVars
Definition: MCMCSamples.h:258
double SampleValue(const IFitVar *var, std::size_t idx) const
Get the value of Var var for sample number .
Definition: MCMCSamples.h:169
std::pair< std::size_t, double > QuantileLL(double quantile) const
std::pair< Spectrum *, CheatDecomp * > make_pair(SpectrumLoaderBase &loader_data, SpectrumLoaderBase &loader_mc, HistAxis *axis, Cut *cut, const SystShifts &shift, const Var &wei)
Definition: DataMCLoad.C:336
std::size_t fOffset
Definition: MCMCSamples.h:256
unsigned distance(const T &t1, const T &t2)
osc::OscCalcDumb calc
T sqr(T x)
More efficient square function than pow(x,2)
Definition: MathUtil.h:23
Encapsulate code to systematically shift a caf::SRProxy.
Definition: ISyst.h:14
void AddSample(const std::vector< double > &sample)
MCMCSample Sample(std::size_t idx) const
MCMCSamples(const std::vector< const IFitVar * > &vars={}, const std::vector< const ana::ISyst * > &systs={})
Definition: MCMCSamples.cxx:46
void SampleValues(std::size_t idx, const std::vector< const ana::IFitVar * > &vars, std::map< const ana::IFitVar *, double > &varVals) const
Get the values of FitVars vars for sample number idx.
std::vector< const ana::ISyst * > fSysts
Definition: MCMCSamples.h:259
const XML_Char * s
Definition: expat.h:262
void SaveTo(TDirectory *dir, const std::string &name) const
Save this guy to a file so we don&#39;t have to rerun the MCMC.
MarginalMode
How to construct marginal distributions.
std::vector< std::pair< std::size_t, double > > SortedLLs() const
static std::unique_ptr< MCMCSamples > LoadFrom(TDirectory *dir, const std::string &name)
Load from file.
void RunDiagnostics(const StanConfig &cfg) const
Do some checks on the post-fit samples.
const XML_Char int const XML_Char * value
Definition: expat.h:331
std::unique_ptr< TTree > fSamples
Definition: MCMCSamples.h:265
const double a
double MinValue(const T *var) const
Min value of this var/syst in the set.
Float_t d
Definition: plot.C:236
const std::map< std::pair< std::string, std::string >, Variable > vars
TGraph Trace(const SystOrVar *fittable) const
Plot the trace of a Var or Syst vs. the MCMC sample number.
Hyperparameters fHyperparams
Hyperparameters deduced after adaptation, or manually set.
Definition: MCMCSamples.h:269
unsigned int quantile(std::vector< std::vector< double >> v, float E, float hadEfrac)
Definition: Toy_analyses.C:480
OStream cout
Definition: OStream.cxx:6
static float min(const float a, const float b, const float c)
Definition: absgeo.cxx:45
double SampleLL(std::size_t idx) const
Get the LL for sample number .
Definition: MCMCSamples.h:162
void AdoptSamples(MCMCSamples &&other)
Add the samples from other into this MCMCSamples. Warning: other will be cleared! ...
int max_depth
Depth of the binary tree used by the HMC when taking its leapfrog steps. Note: the number of leapfrog...
Definition: StanConfig.h:70
const std::string & ShortName() const
Definition: IFitVar.h:37
TDirectory * dir
Definition: macro.C:5
static const std::string LOGLIKELIHOOD_BRANCH_NAME
Definition: MCMCSamples.h:37
std::map< std::string, std::string > systNames
Definition: PlotUnfolding.C:32
std::vector< double > fDiagnosticVals
Definition: MCMCSamples.h:268
const hit & b
Definition: hits.cxx:21
assert(nhit_max >=nhit_nbins)
Bayesian1DMarginal MarginalizeTo(const IFitVar *var, BayesianMarginal::MarginalMode marginalMode=BayesianMarginal::MarginalMode::kHistogram) const
Marginalize over all other variables to obtain a 1D profile in var.
Interface definition for fittable variables.
Definition: IFitVar.h:17
std::size_t DiagOffset(const std::string &diagName) const
Where in fDiagnosticVals is the given diagnostic?
double fSamplingTime
how long did we spend sampling?
Definition: MCMCSamples.h:270
double T
Definition: Xdiff_gwt.C:5
std::vector< std::string > fDiagBranches
Definition: MCMCSamples.h:257
void ParseDiagnosticBranches(const std::vector< std::string > &names)
Set up the storage tree based on the branch names given us by Stan.
Prevent histograms being added to the current directory.
Definition: UtilsExt.h:46
T max(sqlite3 *const db, std::string const &table_name, std::string const &column_name)
Definition: statistics.h:68
MCMCSamples & operator=(MCMCSamples &&other)
Move assignment: need to call SetupTree(), so the default move assignment operator won&#39;t work...
Definition: MCMCSamples.cxx:99
void SetNames(const std::vector< std::string > &names)
std::vector< double > fEntryVals
Definition: MCMCSamples.h:267
std::size_t BestFitSampleIdx() const
Retrieve the index of the best-fit sample (lowest LL), which can then be used with the SampleLL() or ...
void SetupTree()
Hook up the storage tree to our internal variables.
A single MCMC sample.
Definition: MCMCSample.h:14
gm Write()
enum BeamMode string