MCMCSamples.cxx
Go to the documentation of this file.
1 #include <limits>
2 #include <string>
3 #include <vector>
4 
5 #include "TH1D.h"
6 #include "TParameter.h"
7 
8 #include "CAFAna/Core/IFitVar.h"
9 #include "CAFAna/Core/ISyst.h"
10 #include "CAFAna/Core/Utilities.h"
11 
14 #include "CAFAna/Fit/MCMCSamples.h"
15 
16 #include "OscLib/OscCalcAnalytic.h"
17 
18 #include "Utilities/func/MathUtil.h"
19 
20 // an internal tool only
21 namespace
22 {
23  class BranchStatusResetter
24  {
25  public:
26  BranchStatusResetter(TTree * tree)
27  : fTree(tree)
28  {}
29 
30  ~BranchStatusResetter()
31  {
32  if (fTree)
33  fTree->SetBranchStatus("*", false);
34  }
35  private:
36  TTree * fTree;
37  };
38 }
39 
40 namespace ana
41 {
42  const std::string MCMCSamples::LOGLIKELIHOOD_BRANCH_NAME = "logprob";
43 
44  //----------------------------------------------------------------------
45  MCMCSamples::MCMCSamples(const std::vector<const IFitVar *> &vars,
46  const std::vector<const ana::ISyst *> &systs)
47  : fOffset(0),
48  fVars(vars),
49  fSysts(systs),
50  fBestFitFound(false),
51  fSamples(std::make_unique<TTree>("samples", "MCMC samples"))
52  {}
53 
54  //----------------------------------------------------------------------
56  const std::vector<std::string>& diagBranchNames,
57  const std::vector<const IFitVar *> &vars,
58  const std::vector<const ana::ISyst *> &systs,
59  std::unique_ptr<TTree> &tree,
60  const Hyperparameters &hyperParams,
61  double samplingTime)
62  : fOffset(offset),
63  fDiagBranches(diagBranchNames),
64  fVars(vars),
65  fSysts(systs),
66  fBestFitFound(false),
67  fSamples(std::move(tree)),
68  fHyperparams(hyperParams),
69  fSamplingTime(samplingTime)
70  {
71  // note: SetupTree() takes care of fDiagnosticVals and fEntryVals
72  SetupTree();
73  }
74 
75  //----------------------------------------------------------------------
77  : fOffset(std::move(other.fOffset)),
79  fVars(std::move(other.fVars)),
80  fSysts(std::move(other.fSysts)),
83  fSamples(std::move(other.fSamples)),
84  fEntryLL(std::move(other.fEntryLL)),
89  {
90  // note: SetupTree() takes care of fDiagnosticVals and fEntryVals
91  SetupTree();
92 
93  // almost certainly we don't want to be writing back to whatever file this may have come from ...
94  fSamples->SetDirectory(nullptr);
95  }
96 
97  //----------------------------------------------------------------------
99  {
100  fOffset = std::move(other.fOffset);
101 
102  fVars = std::move(other.fVars);
103  fSysts = std::move(other.fSysts);
104 
105  fBestFitSampleIdx = std::move(other.fBestFitSampleIdx);
106  fBestFitFound = std::move(other.fBestFitFound);
107 
108  fSamples = std::move(other.fSamples);
109 
110  fEntryLL = std::move(other.fEntryLL);
111  fDiagBranches = std::move(other.fDiagBranches);
112 
113  fDiagnosticVals = std::move(other.fDiagnosticVals);
114  fEntryVals = std::move(other.fEntryVals);
115  fHyperparams = std::move(other.fHyperparams);
116  fSamplingTime = std::move(other.fSamplingTime);
117 
118  // note: SetupTree() takes care of fDiagnosticVals and fEntryVals
119  SetupTree();
120 
121  return *this;
122  }
123 
124  //----------------------------------------------------------------------
125  void MCMCSamples::AddSample(const std::vector<double> &sample)
126  {
127 // std::cout << "\nGot sample: ";
128 // for (const auto & s: sample)
129 // std::cout << s << " ";
130 // std::cout << std::endl;
131 // std::cout << "sample size: " << sample.size() << std::endl;
132 // std::cout << " target size = LL (1) + diagVals (" << fDiagnosticVals.size() << ") + entryVals (" << fEntryVals.size() << ")" << std::endl;
133  fEntryLL = sample[0];
134  for (std::size_t idx = 1; idx < fOffset && idx < sample.size(); idx++)
135  {
136  assert(idx-1 < fDiagnosticVals.size());
137  fDiagnosticVals[idx - 1] = sample[idx];
138  }
139  for (std::size_t targetIdx = 0, sourceIdx = fOffset; sourceIdx < sample.size(); targetIdx++, sourceIdx++)
140  {
141  assert(targetIdx < fEntryVals.size());
142  double val = sample[sourceIdx];
143  // if it's a Var, allow it to rewrite the value if it wants to
144  // (e.g., dcp is pinned back to (0, 2pi))
145  if (targetIdx < fVars.size())
146  {
147  static osc::OscCalcAnalytic calc;
148  fVars[targetIdx]->SetValue(&calc, val);
149  val = fVars[targetIdx]->GetValue(&calc);
150  }
151  fEntryVals[targetIdx] = val;
152  }
153  fSamples->Fill();
154 // fSamples->Scan("*");
155  }
156 
157  //----------------------------------------------------------------------
159  {
160  // if we don't have any samples at all, then just swap
161  if (!fSamples || (fDiagBranches.size() + fVars.size() + fSysts.size()) == 0)
162  {
163  this->fSamples.reset(other.fSamples->CloneTree());
164  this->fDiagBranches = other.fDiagBranches;
165  this->fVars = other.fVars;
166  this->fSysts = other.fSysts;
167  //SetupTree();
168 
169  // make sure we extract this guy out of his old directory, if any,
170  // otherwise he wants to write back to that file
171  this->fSamples->LoadBaskets();
172  this->fSamples->SetDirectory(nullptr);
173 
174  return;
175  }
176 
177  // first, check that the branches (etc.) all match.
178  // if they don't, throw an exception
179  bool branchesSame = true;
180  branchesSame = branchesSame && fDiagBranches == other.fDiagBranches;
181  branchesSame = branchesSame && fVars == other.fVars;
182  branchesSame = branchesSame && fSysts == other.fSysts;
183  if (!branchesSame)
184  throw std::runtime_error("MCMCSamples::AdoptSamples(): branches are not the same!");
185 
186 
187  // then, take the entries from its TTree and and them to ours using TTree::Merge().
188  // clear the other tree once done.
189  BranchStatusResetter bsr(fSamples.get()); // turn all branches off when done
190  fSamples->SetBranchStatus("*", true);
191  other.fSamples->SetBranchStatus("*", true);
192  TList otherTreeList;
193  otherTreeList.SetOwner(false);
194  otherTreeList.Add(other.fSamples.get());
195  fSamples->Merge(&otherTreeList);
196 
197  otherTreeList.Clear();
198  other.fSamples.reset(nullptr);
199 
200  // clear the rest of the other MCMCSamples so it isn't left in an intermediate state
201  other.fDiagBranches.clear();
202  other.fVars.clear();
203  other.fSysts.clear();
204  other.fOffset = 0;
205  other.fBestFitSampleIdx = 0;
206  other.fBestFitFound = false;
207  other.fEntryLL = 0;
208  other.fDiagnosticVals.clear();
209  other.fEntryVals.clear();
210 
211  // finally, the best fit point isn't necessarily the same any more, so force recalculation the next time it's needed
212  fBestFitFound = false;
213  }
214 
215  //----------------------------------------------------------------------
216  std::size_t MCMCSamples::BestFitSampleIdx() const
217  {
218  if (fBestFitFound)
219  return fBestFitSampleIdx;
220 
221  double maxLL = -std::numeric_limits<double>::infinity();
222  auto branch = fSamples->GetBranch(MCMCSamples::LOGLIKELIHOOD_BRANCH_NAME.c_str());
223  branch->SetStatus(true);
224  for (int idx = 0; idx < fSamples->GetEntries(); idx++)
225  {
226  branch->GetEntry(idx);
227  if (fEntryLL > maxLL)
228  {
230  maxLL = fEntryLL;
231  fBestFitFound = true;
232  }
233  }
234  return fBestFitSampleIdx;
235  }
236 
237  //----------------------------------------------------------------------
238  std::size_t MCMCSamples::DiagOffset(const std::string &diagName) const
239  {
240  auto itr = std::find(fDiagBranches.begin(), fDiagBranches.end(), diagName);
241  assert(itr != fDiagBranches.end() && ("Unknown diagnostic branch: " + diagName).c_str());
242  return std::distance(fDiagBranches.begin(), itr);
243  }
244 
245 
246  //----------------------------------------------------------------------
247  std::unique_ptr<MCMCSamples> MCMCSamples::LoadFrom(TDirectory *dir,
248  const std::string& name)
249  {
250  dir = dir->GetDirectory(name.c_str()); // switch to subdir
251  assert(dir);
252  DontAddDirectory guard;
253 
254  TObjString *tag = (TObjString *) dir->Get("type");
255  assert(tag);
256  assert(tag->GetString() == "MCMCSamples");
257 
258  auto offset = dynamic_cast<TParameter<int>*>(dir->Get("offset"));
259 
260  auto diagBranchNames = dynamic_cast<TList*>(dir->Get("diagBranchNames"));
261  std::vector<std::string> diagBranches;
262  for (const TObject * brNameObj : *diagBranchNames)
263  {
264  auto str = dynamic_cast<const TObjString*>(brNameObj);
265  if (str)
266  diagBranches.emplace_back(str->GetName());
267  }
268 
269  auto fitVarNames = dynamic_cast<TList*>(dir->Get("fitVarNames"));
270  std::vector<const IFitVar*> fitVars;
271  for (const TObject * varNmObj : *fitVarNames)
272  {
273  auto str = dynamic_cast<const TObjString*>(varNmObj);
274  if (str)
275  fitVars.emplace_back(Registry<IFitVar>::ShortNameToPtr(str->GetName()));
276  }
277 
278  auto systNames = dynamic_cast<TList*>(dir->Get("systNames"));
279  std::vector<const ISyst*> systs;
280  for (const TObject * systNmObj : *systNames)
281  {
282  auto str = dynamic_cast<const TObjString*>(systNmObj);
283  if (str)
284  systs.emplace_back(Registry<ISyst>::ShortNameToPtr(str->GetName()));
285  }
286 
287  auto samplesPtr = dynamic_cast<TTree*>(dir->Get("samples"));
288  std::unique_ptr<TTree> samples(samplesPtr);
289  if (samples->GetCurrentFile())
290  {
291  samples->SetBranchStatus("*", true);
292  samples->LoadBaskets(); // read the entire TTree into memory
293  samples->SetDirectory(nullptr); // disassociate it from the file it came from so that when the file is closed it persists
294  }
295 
296  // these may not exist (hyperparameters are not merged when hadd'ing samples)
297  double stepSize = std::numeric_limits<double>::signaling_NaN();
298  std::unique_ptr<TMatrixD> invMetric;
299  auto hyperParamsDir = dynamic_cast<TDirectory *>(dir->Get("hyperparams"));
300  if (hyperParamsDir)
301  {
302  if (auto key = hyperParamsDir->FindKey("stepsize"))
303  stepSize = key->ReadObject<TParameter<double>>()->GetVal();
304  if (auto key = hyperParamsDir->FindKey("inv_metric"))
305  invMetric = std::unique_ptr<TMatrixD>(key->ReadObject<TMatrixD>());
306  }
307  Hyperparameters hyperparams{stepSize, std::move(invMetric)};
308 
309  // ditto
310  double samplingTime = std::numeric_limits<double>::signaling_NaN();
311  auto samplingTimeDir = dynamic_cast<TDirectory*>(dir->Get("samplingTime"));
312  if (!samplingTimeDir) // for backwards compatibility. might be stored at top level
313  samplingTimeDir = dir;
314  if (auto key = samplingTimeDir->FindKey("samplingTime"))
315  samplingTime = key->ReadObject<TParameter<double>>()->GetVal();
316 
317  delete dir;
318 
319  return std::unique_ptr<MCMCSamples>(new MCMCSamples(offset->GetVal(),
320  diagBranches,
321  fitVars,
322  systs,
323  samples,
324  hyperparams,
325  samplingTime));
326  }
327 
328  //----------------------------------------------------------------------
330  {
331  assert(std::find(fVars.begin(), fVars.end(), var) != fVars.end());
332  return Bayesian1DMarginal(*this, var, marginalMode);
333  }
334 
335  //----------------------------------------------------------------------
337  {
338  assert(std::find(fSysts.begin(), fSysts.end(), syst) != fSysts.end());
339  return Bayesian1DMarginal(*this, syst, marginalMode);
340  }
341 
342  //----------------------------------------------------------------------
343  template <typename T>
344  double MCMCSamples::MaxValue(const T *var) const
345  {
347  "MCMCSamples::MaxValue() can only be used with IFitVars and ISysts");
348  double max = -std::numeric_limits<double>::infinity();
349  std::size_t offset = VarOffset(var);
350  for (int idx = 0; idx < fSamples->GetEntries(); idx++)
351  {
352  double val = SampleValue(idx, var->ShortName(), offset);
353  if (val > max)
354  max = val;
355  }
356  return max;
357  }
358  // explicit instantiation of the correct types
359  template double MCMCSamples::MaxValue(const IConstrainedFitVar *) const;
360  template double MCMCSamples::MaxValue(const IFitVar *) const;
361  template double MCMCSamples::MaxValue(const ISyst *) const;
362 
363  //----------------------------------------------------------------------
364  template <typename T>
365  double MCMCSamples::MinValue(const T *var) const
366  {
368  "MCMCSamples::MinValue() can only be used with IFitVars and ISysts");
369  double min = std::numeric_limits<double>::infinity();
370  std::size_t offset = VarOffset(var);
371  for (int idx = 0; idx < fSamples->GetEntries(); idx++)
372  {
373  double val = SampleValue(idx, var->ShortName(), offset);
374  if (val < min)
375  min = val;
376  }
377  return min;
378  }
379  // explicit instantiation of the correct types
380  template double MCMCSamples::MinValue(const IConstrainedFitVar *) const;
381  template double MCMCSamples::MinValue(const IFitVar *) const;
382  template double MCMCSamples::MinValue(const ISyst *) const;
383 
384  //----------------------------------------------------------------------
385  void MCMCSamples::ParseDiagnosticBranches(const std::vector<std::string>& names)
386  {
387  std::size_t idx = 1;
388  fDiagnosticVals.resize(names.size() - (fVars.size() + fSysts.size() + 1));
389  auto firstFitVar = fVars.empty() ? fSysts[0]->ShortName() : fVars[0]->ShortName();
390  for (; idx < names.size(); idx++)
391  {
392  if (names[idx] == firstFitVar)
393  break;
394  fDiagBranches.push_back(names[idx]);
395  }
396  assert(idx < names.size() && "Did not find fit variables in Stan output!");
397  assert (names.size() - idx >= fVars.size() + fSysts.size() && "Stan output didn't contain all FitVars & Systs!");
398  }
399 
400  //----------------------------------------------------------------------
401  std::pair<std::size_t, double> MCMCSamples::QuantileLL(double quantile) const
402  {
403  auto LLs = SortedLLs();
404  return LLs[std::size_t((1 - quantile) * LLs.size())];
405  }
406 
407  //----------------------------------------------------------------------
408  std::pair<std::size_t, double> MCMCSamples::QuantileLL(double quantile,
409  std::vector<std::pair<std::size_t, double>> &LLs) const
410  {
411  if (LLs.empty())
412  LLs = SortedLLs(); // I think this should be efficient since std::vector has a move assignment operator
413  return LLs[std::size_t((1 - quantile) * (LLs.size()-1))]; // -1 so that we don't fall off the edge at Q=1.0
414  }
415 
416  //----------------------------------------------------------------------
417  std::map<double, std::pair<std::size_t, double>> MCMCSamples::QuantileLL(const std::vector<double> &quantiles) const
418  {
419  std::map<double, std::pair<std::size_t, double> > ret;
420  std::vector<std::pair<std::size_t, double> > LLs;
421  for (const auto & quantile : quantiles)
422  ret[quantile] = QuantileLL(quantile, LLs);
423 
424  return ret;
425  }
426 
427  //----------------------------------------------------------------------
429  {
430  BranchStatusResetter bsr(fSamples.get()); // turn branches off when done
431  // these diagnostics adapted from CmdStan's diagnose.cpp
432  auto treeDepthBr = fSamples->GetBranch("treedepth__");
433  if (treeDepthBr)
434  {
435  treeDepthBr->SetStatus(true);
436  unsigned int numMax = 0;
437  auto brOffset = DiagOffset(treeDepthBr->GetName());
438  for (int idx = 0; idx < treeDepthBr->GetEntries(); idx++)
439  {
440  treeDepthBr->GetEntry(idx);
441  if (fDiagnosticVals[brOffset] >= cfg.max_depth)
442  numMax++;
443  }
444  if (numMax > 0)
445  {
446  std::cout << numMax << " of " << treeDepthBr->GetEntries() << " ("
447  << std::setprecision(2)
448  << 100 * static_cast<double>(numMax) / treeDepthBr->GetEntries()
449  << "%) transitions hit the maximum treedepth limit of "
450  << cfg.max_depth << ", or 2^" << cfg.max_depth << " leapfrog steps."
451  << " Trajectories that are prematurely terminated due to this"
452  << " limit will result in slow exploration and you should"
453  << " increase the limit to ensure optimal performance."
454  << std::endl << std::endl;
455 
456  }
457  }
458 
459  auto divergentBr = fSamples->GetBranch("divergent__");
460  if (divergentBr)
461  {
462  divergentBr->SetStatus(true);
463  unsigned int numDiv = 0;
464  auto brOffset = DiagOffset(divergentBr->GetName());
465  for (int idx = 0; idx < divergentBr->GetEntries(); idx++)
466  {
467  divergentBr->GetEntry(idx);
468  if (fDiagnosticVals[brOffset] >0)
469  numDiv++;
470  }
471  if (numDiv > 0)
472  std::cout << numDiv << " of " << divergentBr->GetEntries() << " ("
473  << std::setprecision(2)
474  << 100 * static_cast<double>(numDiv) / divergentBr->GetEntries()
475  << "%) transitions ended with a divergence. These divergent"
476  << " transitions indicate that HMC is not fully able to"
477  << " explore the posterior distribution. Try rerunning with"
478  << " adapt_delta set to a larger value and see if the"
479  << " divergences vanish. If increasing adapt delta towards"
480  << " 1 does not remove the divergences then you will likely"
481  << " need to reparameterize your model."
482  << std::endl << std::endl;
483  }
484 
485  auto energyBr = fSamples->GetBranch("energy__");
486  if (energyBr)
487  {
488  double delta_e_sq_mean = 0;
489  double e_mean = 0;
490  double e_var = 0;
491 
492  double e_sample = 0;
493  double e_sample_prev = 0;
494 
495  auto brOffset = DiagOffset(energyBr->GetName());
496  for (int idx = 0; idx < energyBr->GetEntries(); idx++)
497  {
498  energyBr->SetStatus(true);
499  energyBr->GetEntry(idx);
500  e_sample = fDiagnosticVals[brOffset];
501 
502  double delta_e_sq = util::sqr(e_sample - e_sample_prev);
503  double d = delta_e_sq - delta_e_sq_mean;
504  delta_e_sq_mean += d / (idx + 1);
505 
506  d = e_sample - e_mean;
507  e_mean += d / (idx + 2);
508  e_var += d * (e_sample - e_mean);
509 
510  e_sample_prev = e_sample;
511  }
512 
513  e_var /= static_cast<double>(energyBr->GetEntries() - 1);
514 
515  double e_bfmi = delta_e_sq_mean / e_var;
516 
517  const double e_bfmi_threshold = 0.3;
518  if (e_bfmi < e_bfmi_threshold)
519  std::cout << "The E-BFMI, " << e_bfmi << ", is below the nominal"
520  << " threshold of " << e_bfmi_threshold << " which suggests"
521  << " that HMC may have trouble exploring the target"
522  << " distribution. You should consider any"
523  << " reparameterizations if possible."
524  << std::endl << std::endl;
525  }
526 
527  // unfortunately the remaining diagnostics really require a stan::mcmc::chains<> object.
528  // this presents an annoying issue: the TTree storage is necessary for use with RooNDKeysPDF,
529  // and is much more convenient for persistence.
530  // even more unfortunately, these are the most important diagnostics.
531  // Should we build a stan::mcmc::chains from it?
532  // todo: decide how to implement remaining diagnostics
533 /*
534  else if (chains.param_name(i).find("__") == std::string::npos) {
535 
536  double n_eff = chains.effective_sample_size(i);
537  if (n_eff / num_samples < 0.001)
538  bad_n_eff_names.push_back(chains.param_name(i));
539 
540  double split_rhat = chains.split_potential_scale_reduction(i);
541  if (split_rhat > 1.1)
542  bad_rhat_names.push_back(chains.param_name(i));
543  }
544  }
545 
546  if (bad_n_eff_names.size() > 0) {
547  std::cout << "The following parameters had fewer than 0.001 effective"
548  << " samples per transition:" << std::endl;
549  std::cout << " ";
550  for (size_t n = 0; n < bad_n_eff_names.size() - 1; ++n)
551  std::cout << bad_n_eff_names.at(n) << ", ";
552  std::cout << bad_n_eff_names.back() << std::endl;
553 
554  std::cout << "Such low values indicate that the effective sample size"
555  << " estimators may be biased high and actual performance"
556  << " may be substantially lower than quoted."
557  << std::endl << std::endl;
558  }
559 
560  if (bad_rhat_names.size() > 0) {
561  std::cout << "The following parameters had split R-hat less than 1.1:"
562  << std::endl;
563  std::cout << " ";
564  for (size_t n = 0; n < bad_rhat_names.size() - 1; ++n)
565  std::cout << bad_rhat_names.at(n) << ", ";
566  std::cout << bad_rhat_names.back() << std::endl;
567 
568  std::cout << "Such high values indicate incomplete mixing and biased"
569  << "estimation. You should consider regularization your model"
570  << " with additional prior information or looking for a more"
571  << " effective parameterization."
572  << std::endl << std::endl;
573  }
574 */
575 
576  }
577  //----------------------------------------------------------------------
578  MCMCSample MCMCSamples::Sample(std::size_t idx) const
579  {
580  BranchStatusResetter bsr(fSamples.get()); // turn branches off when done
581 
582  fSamples->SetBranchStatus("*", true);
583  fSamples->GetEntry(idx);
585  }
586 
587  //----------------------------------------------------------------------
588  double MCMCSamples::SampleValue(std::size_t rowIdx, const std::string & branchName, std::size_t varIdx) const
589  {
590  BranchStatusResetter bsr(fSamples.get()); // turn branches off when done
591 
592  fSamples->SetBranchStatus(branchName.c_str(), true);
593  auto branch = fSamples->GetBranch(branchName.c_str());
594  branch->SetStatus(true);
595  branch->GetEntry(rowIdx);
596  return fEntryVals[varIdx];
597  }
598 
599  //----------------------------------------------------------------------
600  void MCMCSamples::SampleValues(std::size_t idx,
601  const std::vector<const IFitVar *> &vars,
602  std::map<const IFitVar *, double> &varVals) const
603  {
604  for (const auto &var : vars)
605  varVals[var] = SampleValue(var, idx);
606  }
607 
608  //----------------------------------------------------------------------
609  void MCMCSamples::SampleValues(std::size_t idx,
610  const std::vector<const IFitVar *> &vars,
611  std::map<const IFitVar *, double> &varVals,
612  const std::vector<const ana::ISyst *> &systs,
613  std::map<const ana::ISyst *, double> &systVals) const
614  {
615  SampleValues(idx, vars, varVals);
616  for (const auto &syst : systs)
617  systVals[syst] = SampleValue(syst, idx);
618  }
619 
620 
621  //----------------------------------------------------------------------
622  void MCMCSamples::SaveTo(TDirectory *dir, const std::string& name) const
623  {
624  TDirectory * oldDir = gDirectory;
625  dir = dir->mkdir(name.c_str()); // switch to subdir
626  dir->cd();
627 
628  TObjString("MCMCSamples").Write("type");
629 
630  TParameter<int> offset("offset", fOffset);
631  offset.Write();
632 
633  TList diagBranchNames;
634  diagBranchNames.SetOwner();
635  for (const auto & brName : fDiagBranches)
636  diagBranchNames.AddLast(new TObjString(brName.c_str()));
637  diagBranchNames.Write("diagBranchNames", TObject::kSingleKey);
638 
639  TList fitVarNames;
640  fitVarNames.SetOwner();
641  for (const auto & var : fVars)
642  fitVarNames.AddLast(new TObjString(var->ShortName().c_str()));
643  fitVarNames.Write("fitVarNames", TObject::kSingleKey);
644 
645  TList systNames;
646  systNames.SetOwner();
647  for (const auto & syst : fSysts)
648  systNames.AddLast(new TObjString(syst->ShortName().c_str()));
649  systNames.Write("systNames", TObject::kSingleKey);
650 
651  // re-enable all the branches before writing...
652  fSamples->SetBranchStatus("*", true);
653  fSamples->Write("samples");
654  // don't let it get attached to the file that's about to be closed
655  fSamples->LoadBaskets();
656  fSamples->SetDirectory(nullptr);
657  auto hyperdir = dir->mkdir("hyperparams");
658  hyperdir->cd();
659  TParameter<double> stepSize("stepsize", fHyperparams.stepSize);
660  stepSize.Write();
662  fHyperparams.invMetric->Write("inv_metric");
663  TObjString("Hyperparameters").Write("type"); // so that hadd_cafana can be taught to skip over them
664  dir->cd();
665  hyperdir->Write();
666 
667  dir->cd();
668  auto timedir = dir->mkdir("samplingTime");
669  timedir->cd();
670  TObjString("SamplingTime").Write("type"); // so that hadd_cafana can be taught to skip over them
671  TParameter<double>("samplingTime", fSamplingTime).Write();
672  dir->cd();
673  timedir->Write();
674 
675  dir->Write();
676  delete dir;
677 
678  if (oldDir)
679  oldDir->cd();
680  }
681 
682  //----------------------------------------------------------------------
683  void MCMCSamples::SetNames(const std::vector<std::string> &names)
684  {
685  assert (fOffset == 0 && "MCMCSamples::SetNames() was called after tree was already set up!");
686 
688  SetupTree();
689  }
690 
691  //----------------------------------------------------------------------
693  {
694  auto CreateOrSetAddress = [](TTree * tree, const std::string& brName, double* address)
695  {
696  auto name = brName.c_str();
697  TBranch * br = tree->GetBranch(name);
698  if (br)
699  br->SetAddress(address);
700  else
701  tree->Branch(name, address);
702  return br;
703  };
704 
705  CreateOrSetAddress(fSamples.get(), MCMCSamples::LOGLIKELIHOOD_BRANCH_NAME, &fEntryLL);
706 
707  fDiagnosticVals.clear();
708  fDiagnosticVals.resize(fDiagBranches.size(), std::numeric_limits<double>::signaling_NaN());
709  assert(!fVars.empty() || !fSysts.empty());
710  auto firstFitVar = fVars.empty() ? fSysts[0]->ShortName() : fVars[0]->ShortName();
711  for (std::size_t valIdx = 0; valIdx < fDiagBranches.size(); valIdx++)
712  CreateOrSetAddress(fSamples.get(), fDiagBranches[valIdx], &fDiagnosticVals[valIdx]);
713 
714  fOffset = 1 + fDiagBranches.size(); // + 1 for the LL at the beginning
715 
716  fEntryVals.clear();
717  fEntryVals.resize(fVars.size() + fSysts.size(), std::numeric_limits<double>::signaling_NaN());
718  std::size_t valIdx = 0;
719  for (const auto & var : fVars)
720  CreateOrSetAddress(fSamples.get(), var->ShortName(), &fEntryVals[valIdx++]);
721  for (const auto &syst : fSysts)
722  CreateOrSetAddress(fSamples.get(), syst->ShortName(), &fEntryVals[valIdx++]);
723 
724  // disable the 'autosave' and 'autoflush' mechanisms
725  // since there's nowhere to write to anyway
726  // (this tree is memory-only unless SaveTo()'d).
727  fSamples->SetAutoFlush(0);
728  fSamples->SetAutoSave(0);
729  }
730 
731  //----------------------------------------------------------------------
732  std::vector<std::pair<std::size_t, double>> MCMCSamples::SortedLLs() const
733  {
734  std::vector<std::pair<std::size_t, double>> LLs;
735  LLs.reserve(fSamples->GetEntries());
736  auto branch = fSamples->GetBranch(MCMCSamples::LOGLIKELIHOOD_BRANCH_NAME.c_str());
737  fSamples->SetBranchStatus("*", false);
738  fSamples->SetBranchStatus(MCMCSamples::LOGLIKELIHOOD_BRANCH_NAME.c_str(), true);
739  for (int idx = 0; idx < fSamples->GetEntries(); idx++)
740  {
741  branch->GetEntry(idx);
742  LLs.push_back(std::make_pair(idx, fEntryLL));
743  }
744 
745  std::sort(LLs.begin(), LLs.end(),
746  [](const std::pair<std::size_t, double> & a, const std::pair<std::size_t, double> & b)
747  {
748  return a.second < b.second;
749  }
750  );
751 
752  return std::move(LLs);
753  }
754 
755  //----------------------------------------------------------------------
756  const TTree *MCMCSamples::ToTTree() const
757  {
758  // presumably user doesn't want the tree with some branches missing...
759  fSamples->SetBranchStatus("*", true);
760  return fSamples.get();
761  }
762 
763  //----------------------------------------------------------------------
764  std::size_t MCMCSamples::VarOffset(const IFitVar *var) const
765  {
766  auto itr = std::find(fVars.begin(), fVars.end(), var);
767  assert(itr != fVars.end() && ("Var was not fitted: " + var->ShortName()).c_str());
768  return std::distance(fVars.begin(), itr);
769  }
770 
771  //----------------------------------------------------------------------
772  std::size_t MCMCSamples::VarOffset(const ana::ISyst *syst) const
773  {
774  auto itr = std::find(fSysts.begin(), fSysts.end(), syst);
775  assert(itr != fSysts.end() && ("Syst was not fitted: " + syst->ShortName()).c_str());
776  return fVars.size() + std::distance(fSysts.begin(), itr);
777  }
778 
779 
780 
781 }
std::size_t fBestFitSampleIdx
Definition: MCMCSamples.h:257
const XML_Char * name
Definition: expat.h:151
std::unique_ptr< TMatrixD > invMetric
Definition: MCMCSamples.h:41
std::vector< double > quantiles(TH1D *h)
Definition: absCal.cxx:528
std::size_t VarOffset(const IFitVar *var) const
Where in fEntryVals is the given var?
Cuts and Vars for the 2020 FD DiF Study.
Definition: vars.h:6
double MaxValue(const T *var) const
Max value of this var/syst in the set.
std::vector< SystGroupDef > systs
Definition: syst_header.h:385
const TTree * ToTTree() const
Get a TTree with the MCMC samples in it.
virtual const std::string & ShortName() const final
The name printed out to the screen.
Definition: ISyst.h:27
Configuration parameters for the Stan MCMC fitter, bundled up together for easy storage and parameter...
Definition: StanConfig.h:6
std::vector< const ana::IFitVar * > fVars
Definition: MCMCSamples.h:254
double SampleValue(const IFitVar *var, std::size_t idx) const
Get the value of Var var for sample number .
Definition: MCMCSamples.h:169
std::pair< std::size_t, double > QuantileLL(double quantile) const
std::pair< Spectrum *, CheatDecomp * > make_pair(SpectrumLoaderBase &loader_data, SpectrumLoaderBase &loader_mc, HistAxis *axis, Cut *cut, const SystShifts &shift, const Var &wei)
Definition: DataMCLoad.C:336
std::size_t fOffset
Definition: MCMCSamples.h:252
unsigned distance(const T &t1, const T &t2)
osc::OscCalcDumb calc
T sqr(T x)
More efficient square function than pow(x,2)
Definition: MathUtil.h:23
Encapsulate code to systematically shift a caf::SRProxy.
Definition: ISyst.h:14
void AddSample(const std::vector< double > &sample)
MCMCSample Sample(std::size_t idx) const
MCMCSamples(const std::vector< const IFitVar * > &vars={}, const std::vector< const ana::ISyst * > &systs={})
Definition: MCMCSamples.cxx:45
void SampleValues(std::size_t idx, const std::vector< const ana::IFitVar * > &vars, std::map< const ana::IFitVar *, double > &varVals) const
Get the values of FitVars vars for sample number idx.
std::vector< const ana::ISyst * > fSysts
Definition: MCMCSamples.h:255
void SaveTo(TDirectory *dir, const std::string &name) const
Save this guy to a file so we don&#39;t have to rerun the MCMC.
MarginalMode
How to construct marginal distributions.
std::vector< std::pair< std::size_t, double > > SortedLLs() const
static std::unique_ptr< MCMCSamples > LoadFrom(TDirectory *dir, const std::string &name)
Load from file.
void RunDiagnostics(const StanConfig &cfg) const
Do some checks on the post-fit samples.
const XML_Char int const XML_Char * value
Definition: expat.h:331
std::unique_ptr< TTree > fSamples
Definition: MCMCSamples.h:261
const double a
double MinValue(const T *var) const
Min value of this var/syst in the set.
Float_t d
Definition: plot.C:236
const std::map< std::pair< std::string, std::string >, Variable > vars
Hyperparameters fHyperparams
Hyperparameters deduced after adaptation, or manually set.
Definition: MCMCSamples.h:265
unsigned int quantile(std::vector< std::vector< double >> v, float E, float hadEfrac)
Definition: Toy_analyses.C:480
OStream cout
Definition: OStream.cxx:6
static float min(const float a, const float b, const float c)
Definition: absgeo.cxx:45
double SampleLL(std::size_t idx) const
Get the LL for sample number .
Definition: MCMCSamples.h:162
::xsd::cxx::tree::string< char, simple_type > string
Definition: Database.h:154
void AdoptSamples(MCMCSamples &&other)
Add the samples from other into this MCMCSamples. Warning: other will be cleared! ...
int max_depth
Depth of the binary tree used by the HMC when taking its leapfrog steps. Note: the number of leapfrog...
Definition: StanConfig.h:70
const std::string & ShortName() const
Definition: IFitVar.h:36
TDirectory * dir
Definition: macro.C:5
static const std::string LOGLIKELIHOOD_BRANCH_NAME
Definition: MCMCSamples.h:37
std::map< std::string, std::string > systNames
Definition: PlotUnfolding.C:32
std::vector< double > fDiagnosticVals
Definition: MCMCSamples.h:264
const hit & b
Definition: hits.cxx:21
assert(nhit_max >=nhit_nbins)
Bayesian1DMarginal MarginalizeTo(const IFitVar *var, BayesianMarginal::MarginalMode marginalMode=BayesianMarginal::MarginalMode::kHistogram) const
Marginalize over all other variables to obtain a 1D profile in var.
Interface definition for fittable variables.
Definition: IFitVar.h:16
std::size_t DiagOffset(const std::string &diagName) const
Where in fDiagnosticVals is the given diagnostic?
double fSamplingTime
how long did we spend sampling?
Definition: MCMCSamples.h:266
double T
Definition: Xdiff_gwt.C:5
std::vector< std::string > fDiagBranches
Definition: MCMCSamples.h:253
void ParseDiagnosticBranches(const std::vector< std::string > &names)
Set up the storage tree based on the branch names given us by Stan.
Prevent histograms being added to the current directory.
Definition: Utilities.h:62
T max(sqlite3 *const db, std::string const &table_name, std::string const &column_name)
Definition: statistics.h:68
MCMCSamples & operator=(MCMCSamples &&other)
Move assignment: need to call SetupTree(), so the default move assignment operator won&#39;t work...
Definition: MCMCSamples.cxx:98
void SetNames(const std::vector< std::string > &names)
std::vector< double > fEntryVals
Definition: MCMCSamples.h:263
std::size_t BestFitSampleIdx() const
Retrieve the index of the best-fit sample (lowest LL), which can then be used with the SampleLL() or ...
void SetupTree()
Hook up the storage tree to our internal variables.
A single MCMC sample.
Definition: MCMCSample.h:14
gm Write()