7 #include "TParameter.h" 19 #include "Utilities/func/MathUtil.h" 24 class BranchStatusResetter
27 BranchStatusResetter(TTree *
tree)
31 ~BranchStatusResetter()
34 fTree->SetBranchStatus(
"*",
false);
43 const std::string MCMCSamples::LOGLIKELIHOOD_BRANCH_NAME =
"logprob";
46 MCMCSamples::MCMCSamples(
const std::vector<const IFitVar *> &
vars,
47 const std::vector<const ana::ISyst *> &
systs)
52 fSamples(
std::make_unique<TTree>(
"samples",
"MCMC samples"))
57 const std::vector<std::string>& diagBranchNames,
58 const std::vector<const IFitVar *> &
vars,
59 const std::vector<const ana::ISyst *> &
systs,
60 std::unique_ptr<TTree> &
tree,
140 for (std::size_t targetIdx = 0, sourceIdx =
fOffset; sourceIdx < sample.size(); targetIdx++, sourceIdx++)
143 double val = sample[sourceIdx];
146 if (targetIdx <
fVars.size())
149 fVars[targetIdx]->SetValue(&calc, val);
150 val =
fVars[targetIdx]->GetValue(&calc);
173 this->
fSamples->SetDirectory(
nullptr);
180 bool branchesSame =
true;
182 branchesSame = branchesSame &&
fVars ==
other.fVars;
183 branchesSame = branchesSame &&
fSysts ==
other.fSysts;
185 throw std::runtime_error(
"MCMCSamples::AdoptSamples(): branches are not the same!");
190 BranchStatusResetter bsr(
fSamples.get());
191 fSamples->SetBranchStatus(
"*",
true);
192 other.fSamples->SetBranchStatus(
"*",
true);
194 otherTreeList.SetOwner(
false);
195 otherTreeList.Add(
other.fSamples.get());
198 otherTreeList.Clear();
199 other.fSamples.reset(
nullptr);
202 other.fDiagBranches.clear();
204 other.fSysts.clear();
206 other.fBestFitSampleIdx = 0;
207 other.fBestFitFound =
false;
209 other.fDiagnosticVals.clear();
210 other.fEntryVals.clear();
222 double maxLL = -std::numeric_limits<double>::infinity();
223 auto branch =
fSamples->GetBranch(MCMCSamples::LOGLIKELIHOOD_BRANCH_NAME.c_str());
251 dir = dir->GetDirectory(name.c_str());
255 TObjString *
tag = (TObjString *) dir->Get(
"type");
257 assert(tag->GetString() ==
"MCMCSamples");
259 auto offset =
dynamic_cast<TParameter<int>*
>(dir->Get(
"offset"));
261 auto diagBranchNames =
dynamic_cast<TList*
>(dir->Get(
"diagBranchNames"));
262 std::vector<std::string> diagBranches;
263 for (
const TObject * brNameObj : *diagBranchNames)
265 auto str =
dynamic_cast<const TObjString*
>(brNameObj);
267 diagBranches.emplace_back(
str->GetName());
270 auto fitVarNames =
dynamic_cast<TList*
>(dir->Get(
"fitVarNames"));
271 std::vector<const IFitVar*> fitVars;
272 for (
const TObject * varNmObj : *fitVarNames)
274 auto str =
dynamic_cast<const TObjString*
>(varNmObj);
279 auto systNames =
dynamic_cast<TList*
>(dir->Get(
"systNames"));
280 std::vector<const ISyst*>
systs;
281 for (
const TObject * systNmObj : *
systNames)
283 auto str =
dynamic_cast<const TObjString*
>(systNmObj);
288 auto samplesPtr =
dynamic_cast<TTree*
>(dir->Get(
"samples"));
289 std::unique_ptr<TTree> samples(samplesPtr);
290 if (samples->GetCurrentFile())
292 samples->SetBranchStatus(
"*",
true);
293 samples->LoadBaskets();
294 samples->SetDirectory(
nullptr);
298 double stepSize = std::numeric_limits<double>::signaling_NaN();
299 std::unique_ptr<TMatrixD> invMetric;
300 auto hyperParamsDir =
dynamic_cast<TDirectory *
>(dir->Get(
"hyperparams"));
303 if (
auto key = hyperParamsDir->FindKey(
"stepsize"))
304 stepSize =
key->ReadObject<TParameter<double>>()->GetVal();
305 if (
auto key = hyperParamsDir->FindKey(
"inv_metric"))
306 invMetric = std::unique_ptr<TMatrixD>(
key->ReadObject<
TMatrixD>());
311 double samplingTime = std::numeric_limits<double>::signaling_NaN();
312 auto samplingTimeDir =
dynamic_cast<TDirectory*
>(dir->Get(
"samplingTime"));
313 if (!samplingTimeDir)
314 samplingTimeDir =
dir;
315 if (
auto key = samplingTimeDir->FindKey(
"samplingTime"))
316 samplingTime =
key->ReadObject<TParameter<double>>()->GetVal();
320 return std::unique_ptr<MCMCSamples>(
new MCMCSamples(offset->GetVal(),
344 template <
typename T>
348 "MCMCSamples::MaxValue() can only be used with IFitVars and ISysts");
349 double max = -std::numeric_limits<double>::infinity();
365 template <
typename T>
369 "MCMCSamples::MinValue() can only be used with IFitVars and ISysts");
370 double min = std::numeric_limits<double>::infinity();
390 auto firstFitVar =
fVars.empty() ?
fSysts[0]->ShortName() :
fVars[0]->ShortName();
391 for (; idx < names.size(); idx++)
393 if (names[idx] == firstFitVar)
397 assert(idx < names.size() &&
"Did not find fit variables in Stan output!");
398 assert (names.size() - idx >=
fVars.size() +
fSysts.size() &&
"Stan output didn't contain all FitVars & Systs!");
405 return LLs[std::size_t((1 - quantile) * LLs.size())];
410 std::vector<std::pair<std::size_t, double>> &LLs)
const 414 return LLs[std::size_t((1 - quantile) * (LLs.size()-1))];
420 std::map<double, std::pair<std::size_t, double> >
ret;
421 std::vector<std::pair<std::size_t, double> > LLs;
422 for (
const auto &
quantile : quantiles)
431 BranchStatusResetter bsr(
fSamples.get());
433 auto treeDepthBr =
fSamples->GetBranch(
"treedepth__");
436 treeDepthBr->SetStatus(
true);
437 unsigned int numMax = 0;
438 auto brOffset =
DiagOffset(treeDepthBr->GetName());
439 for (
int idx = 0;
idx < treeDepthBr->GetEntries();
idx++)
441 treeDepthBr->GetEntry(
idx);
447 std::cout << numMax <<
" of " << treeDepthBr->GetEntries() <<
" (" 448 << std::setprecision(2)
449 << 100 *
static_cast<double>(numMax) / treeDepthBr->GetEntries()
450 <<
"%) transitions hit the maximum treedepth limit of " 452 <<
" Trajectories that are prematurely terminated due to this" 453 <<
" limit will result in slow exploration and you should" 454 <<
" increase the limit to ensure optimal performance." 460 auto divergentBr =
fSamples->GetBranch(
"divergent__");
463 divergentBr->SetStatus(
true);
464 unsigned int numDiv = 0;
465 auto brOffset =
DiagOffset(divergentBr->GetName());
466 for (
int idx = 0;
idx < divergentBr->GetEntries();
idx++)
468 divergentBr->GetEntry(
idx);
473 std::cout << numDiv <<
" of " << divergentBr->GetEntries() <<
" (" 474 << std::setprecision(2)
475 << 100 *
static_cast<double>(numDiv) / divergentBr->GetEntries()
476 <<
"%) transitions ended with a divergence. These divergent" 477 <<
" transitions indicate that HMC is not fully able to" 478 <<
" explore the posterior distribution. Try rerunning with" 479 <<
" adapt_delta set to a larger value and see if the" 480 <<
" divergences vanish. If increasing adapt delta towards" 481 <<
" 1 does not remove the divergences then you will likely" 482 <<
" need to reparameterize your model." 486 auto energyBr =
fSamples->GetBranch(
"energy__");
489 double delta_e_sq_mean = 0;
494 double e_sample_prev = 0;
496 auto brOffset =
DiagOffset(energyBr->GetName());
497 for (
int idx = 0;
idx < energyBr->GetEntries();
idx++)
499 energyBr->SetStatus(
true);
500 energyBr->GetEntry(
idx);
503 double delta_e_sq =
util::sqr(e_sample - e_sample_prev);
504 double d = delta_e_sq - delta_e_sq_mean;
505 delta_e_sq_mean += d / (
idx + 1);
507 d = e_sample - e_mean;
508 e_mean += d / (
idx + 2);
509 e_var += d * (e_sample - e_mean);
511 e_sample_prev = e_sample;
514 e_var /=
static_cast<double>(energyBr->GetEntries() - 1);
516 double e_bfmi = delta_e_sq_mean / e_var;
518 const double e_bfmi_threshold = 0.3;
519 if (e_bfmi < e_bfmi_threshold)
520 std::cout <<
"The E-BFMI, " << e_bfmi <<
", is below the nominal" 521 <<
" threshold of " << e_bfmi_threshold <<
" which suggests" 522 <<
" that HMC may have trouble exploring the target" 523 <<
" distribution. You should consider any" 524 <<
" reparameterizations if possible." 581 BranchStatusResetter bsr(
fSamples.get());
583 fSamples->SetBranchStatus(
"*",
true);
591 BranchStatusResetter bsr(
fSamples.get());
593 fSamples->SetBranchStatus(branchName.c_str(),
true);
602 const std::vector<const IFitVar *> &
vars,
603 std::map<const IFitVar *, double> &varVals)
const 605 for (
const auto &
var : vars)
611 const std::vector<const IFitVar *> &vars,
612 std::map<const IFitVar *, double> &varVals,
613 const std::vector<const ana::ISyst *> &
systs,
614 std::map<const ana::ISyst *, double> &systVals)
const 617 for (
const auto &syst : systs)
625 TDirectory * oldDir = gDirectory;
626 dir = dir->mkdir(name.c_str());
629 TObjString(
"MCMCSamples").Write(
"type");
635 TList diagBranchNames;
636 diagBranchNames.SetOwner();
638 std::sort(branchNames.begin(), branchNames.end());
639 for (
const auto & brName : branchNames)
640 diagBranchNames.AddLast(
new TObjString(brName.c_str()));
641 diagBranchNames.Write(
"diagBranchNames", TObject::kSingleKey);
647 fitVarNames.SetOwner();
648 for (
const auto & brName : branchNames)
649 fitVarNames.AddLast(
new TObjString(brName.c_str()));
650 fitVarNames.Write(
"fitVarNames", TObject::kSingleKey);
654 [](
const ISyst*
s){
return s->ShortName();});
656 systNames.SetOwner();
657 for (
const auto & brName : branchNames)
658 systNames.AddLast(
new TObjString(brName.c_str()));
659 systNames.Write(
"systNames", TObject::kSingleKey);
662 fSamples->SetBranchStatus(
"*",
true);
667 auto hyperdir = dir->mkdir(
"hyperparams");
673 TObjString(
"Hyperparameters").Write(
"type");
678 auto timedir = dir->mkdir(
"samplingTime");
680 TObjString(
"SamplingTime").Write(
"type");
695 assert (
fOffset == 0 &&
"MCMCSamples::SetNames() was called after tree was already set up!");
706 auto name = brName.c_str();
707 TBranch *
br = tree->GetBranch(
name);
720 auto firstFitVar =
fVars.empty() ?
fSysts[0]->ShortName() :
fVars[0]->ShortName();
721 for (std::size_t valIdx = 0; valIdx <
fDiagBranches.size(); valIdx++)
728 std::size_t valIdx = 0;
731 for (
const auto &syst :
fSysts)
744 std::vector<std::pair<std::size_t, double>> LLs;
745 LLs.reserve(
fSamples->GetEntries());
746 auto branch =
fSamples->GetBranch(MCMCSamples::LOGLIKELIHOOD_BRANCH_NAME.c_str());
747 fSamples->SetBranchStatus(
"*",
false);
748 fSamples->SetBranchStatus(MCMCSamples::LOGLIKELIHOOD_BRANCH_NAME.c_str(),
true);
749 for (
int idx = 0; idx <
fSamples->GetEntries(); idx++)
755 std::sort(LLs.begin(), LLs.end(),
756 [](
const std::pair<std::size_t, double> &
a,
const std::pair<std::size_t, double> &
b)
758 return a.second <
b.second;
762 return std::move(LLs);
769 fSamples->SetBranchStatus(
"*",
true);
774 template<
typename SystOrVar>
777 static_assert((std::is_base_of_v<IFitVar, SystOrVar> || std::is_same_v<SystOrVar, ISyst>),
778 "MCMCSamples::Trace() can only be used with Systs or Vars");
781 for (
int sample = 0; sample <
fSamples->GetEntries(); sample++)
784 ret.SetPoint(sample, sample+1, val);
803 auto itr = std::find(
fSysts.begin(),
fSysts.end(), syst);
std::size_t fBestFitSampleIdx
std::unique_ptr< TMatrixD > invMetric
std::vector< double > quantiles(TH1D *h)
std::size_t VarOffset(const IFitVar *var) const
Where in fEntryVals is the given var?
Cuts and Vars for the 2020 FD DiF Study.
double MaxValue(const T *var) const
Max value of this var/syst in the set.
const TTree * ToTTree() const
Get a TTree with the MCMC samples in it.
virtual const std::string & ShortName() const final
The name printed out to the screen.
Configuration parameters for the Stan MCMC fitter, bundled up together for easy storage and parameter...
std::vector< const ana::IFitVar * > fVars
double SampleValue(const IFitVar *var, std::size_t idx) const
Get the value of Var var for sample number .
std::pair< std::size_t, double > QuantileLL(double quantile) const
std::pair< Spectrum *, CheatDecomp * > make_pair(SpectrumLoaderBase &loader_data, SpectrumLoaderBase &loader_mc, HistAxis *axis, Cut *cut, const SystShifts &shift, const Var &wei)
unsigned distance(const T &t1, const T &t2)
T sqr(T x)
More efficient square function than pow(x,2)
Encapsulate code to systematically shift a caf::SRProxy.
void AddSample(const std::vector< double > &sample)
MCMCSample Sample(std::size_t idx) const
MCMCSamples(const std::vector< const IFitVar * > &vars={}, const std::vector< const ana::ISyst * > &systs={})
void SampleValues(std::size_t idx, const std::vector< const ana::IFitVar * > &vars, std::map< const ana::IFitVar *, double > &varVals) const
Get the values of FitVars vars for sample number idx.
std::vector< const ana::ISyst * > fSysts
void SaveTo(TDirectory *dir, const std::string &name) const
Save this guy to a file so we don't have to rerun the MCMC.
MarginalMode
How to construct marginal distributions.
std::vector< std::pair< std::size_t, double > > SortedLLs() const
static std::unique_ptr< MCMCSamples > LoadFrom(TDirectory *dir, const std::string &name)
Load from file.
void RunDiagnostics(const StanConfig &cfg) const
Do some checks on the post-fit samples.
const XML_Char int const XML_Char * value
std::unique_ptr< TTree > fSamples
double MinValue(const T *var) const
Min value of this var/syst in the set.
TGraph Trace(const SystOrVar *fittable) const
Plot the trace of a Var or Syst vs. the MCMC sample number.
Hyperparameters fHyperparams
Hyperparameters deduced after adaptation, or manually set.
unsigned int quantile(std::vector< std::vector< double >> v, float E, float hadEfrac)
static float min(const float a, const float b, const float c)
double SampleLL(std::size_t idx) const
Get the LL for sample number .
void AdoptSamples(MCMCSamples &&other)
Add the samples from other into this MCMCSamples. Warning: other will be cleared! ...
int max_depth
Depth of the binary tree used by the HMC when taking its leapfrog steps. Note: the number of leapfrog...
const std::string & ShortName() const
static const std::string LOGLIKELIHOOD_BRANCH_NAME
std::map< std::string, std::string > systNames
std::vector< double > fDiagnosticVals
assert(nhit_max >=nhit_nbins)
Bayesian1DMarginal MarginalizeTo(const IFitVar *var, BayesianMarginal::MarginalMode marginalMode=BayesianMarginal::MarginalMode::kHistogram) const
Marginalize over all other variables to obtain a 1D profile in var.
Interface definition for fittable variables.
std::size_t DiagOffset(const std::string &diagName) const
Where in fDiagnosticVals is the given diagnostic?
double fSamplingTime
how long did we spend sampling?
std::vector< std::string > fDiagBranches
void ParseDiagnosticBranches(const std::vector< std::string > &names)
Set up the storage tree based on the branch names given us by Stan.
Prevent histograms being added to the current directory.
T max(sqlite3 *const db, std::string const &table_name, std::string const &column_name)
MCMCSamples & operator=(MCMCSamples &&other)
Move assignment: need to call SetupTree(), so the default move assignment operator won't work...
void SetNames(const std::vector< std::string > &names)
std::vector< double > fEntryVals
std::size_t BestFitSampleIdx() const
Retrieve the index of the best-fit sample (lowest LL), which can then be used with the SampleLL() or ...
void SetupTree()
Hook up the storage tree to our internal variables.