MCMCSamples.h
Go to the documentation of this file.
1 #pragma once
2 
3 #include <vector>
4 
5 #include "TMatrixD.h"
6 #include "TTree.h"
7 
8 #include "CAFAna/Core/IFitVar.h"
9 #include "CAFAna/Core/ISyst.h"
11 #include "CAFAna/Fit/BayesianMarginal.h" // for MarginalMode enum
13 #include "CAFAna/Fit/MCMCSample.h"
14 #include "CAFAna/Fit/StanConfig.h"
15 
16 namespace ana
17 {
18  // forward declaration
19  class Bayesian1DMarginal;
20  class BayesianSurface;
21 
22  /// Storage for a list of MCMC samples.
23  ///
24  /// The internal storage is as a TTree (which simplifies persistence);
25  /// for efficiency and simplicity reasons the tree branches are simply doubles
26  /// (rather than MCMCSample objects), but MCMCSample objects can be obtained
27  /// using the Sample() method.
28  ///
29  /// \param varOffset The offset to the first Var value. (Previous values are the LL and internal fitter vars.)
30  /// \param vars The ana::Vars passed to the fitter
31  /// \param systs The ana::ISysts passed to the fitter
33  {
34  public:
35  /// Name of branch in internal TTree corresponding to the log-likelihood.
36  /// Used in several places, so centralized here.
39  {
40  double stepSize;
41  std::unique_ptr<TMatrixD> invMetric; // use TMatrix rather than Eigen for peristability. unique_ptr so it can be sized later
42 
43  explicit Hyperparameters(double stepsize = std::numeric_limits<double>::signaling_NaN(),
44  std::unique_ptr<TMatrixD> && invmetric = nullptr)
45  : stepSize(stepsize), invMetric(std::move(invmetric))
46  {}
47 
49  : stepSize(h.stepSize), invMetric(h.invMetric ? std::make_unique<TMatrixD>(*h.invMetric) : nullptr)
50  {}
51 
53  {
54  stepSize = std::move(other.stepSize);
55  invMetric = std::move(other.invMetric);
56  return *this;
57  }
58 
59  };
60 
61  /// Build the MCMCSamples object.
62  ///
63  /// \param vars Vars you want fitted by the MCMC
64  /// \param systs Systematics that should be marginalized
65  MCMCSamples(const std::vector<const IFitVar *> &vars = {},
66  const std::vector<const ana::ISyst *> &systs = {});
67 
68  /// Move constructor
70 
71  /// No copying for now (not interested in getting the memory semantics right)
72  MCMCSamples(const MCMCSamples&) = delete;
73 
74  /// Move assignment: need to call SetupTree(), so the default move assignment operator won't work
75  MCMCSamples &operator=(MCMCSamples &&other);
76 
77  /// Copying still forbidden
78  MCMCSamples & operator=(const MCMCSamples&) = delete;
79 
80  void AddSample(const std::vector<double> &sample);
81 
82  /// Add the samples from \ref other into this MCMCSamples. Warning: \ref other will be cleared!
83  void AdoptSamples(MCMCSamples&& other);
84 
85  /// Retrieve the index of the best-fit sample (lowest LL), which can then be used with the SampleLL() or SampleValue() methods
86  std::size_t BestFitSampleIdx() const;
87 
88  /// Discard any samples
89  void Clear() { fSamples->Clear(); }
90  const Hyperparameters & Hyperparams() const { return fHyperparams; }
91 
92 
93  /// Marginalize over all other variables to obtain a 1D profile in \a var
96 
97  /// Marginalize over all other variables to obtain a 1D profile in \a syst
100 
101  /// Marginalize over all other variables to obtain a 2D surface in \a x and \a y
102  template <typename SystOrVar1, typename SystOrVar2>
103  BayesianSurface MarginalizeTo(const SystOrVar1 * x, int nbinsx, double xmin, double xmax,
104  const SystOrVar2 * y, int nbinsy, double ymin, double ymax,
106  {
107  static_assert((std::is_base_of_v<IFitVar, SystOrVar1> || std::is_same_v<SystOrVar1, ISyst>)
108  && (std::is_base_of_v<IFitVar, SystOrVar2> || std::is_same_v<SystOrVar2, ISyst>),
109  "MCMCSamples::MarginalizeTo() can only be used with Systs or Vars");
110  if constexpr(std::is_base_of_v<IFitVar, SystOrVar1>)
111  assert(std::find(fVars.begin(), fVars.end(), x) != fVars.end());
112  else
113  assert(std::find(fSysts.begin(), fSysts.end(), x) != fSysts.end());
114 
115  if constexpr(std::is_base_of_v<IFitVar, SystOrVar2>)
116  assert(std::find(fVars.begin(), fVars.end(), y) != fVars.end());
117  else
118  assert(std::find(fSysts.begin(), fSysts.end(), y) != fSysts.end());
119  return BayesianSurface(*this,
120  x, nbinsx, xmin, xmax,
121  y, nbinsy, ymin, ymax,
122  marginalMode);
123  }
124 
125 
126  /// Max value of this var/syst in the set
127  template <typename T>
128  double MaxValue(const T *var) const;
129 
130  /// Min value of this var/syst in the set
131  template <typename T>
132  double MinValue(const T *var) const;
133 
134  /// How many samples do we have?
135  std::size_t NumSamples() const { return fSamples->GetEntries(); };
136 
137  /// Determine the LL at given quantile
138  ///
139  /// \param quantile e.g. 90% smallest LL of set of samples --> 0.9
140  /// \return the LL at given quantile
141  ///
142  /// Note that the necessary ordered list is constructed and destroyed each time this method is called,
143  /// so if you know you need multiple quantiles, use one of the other signatures.
144  std::pair<std::size_t, double> QuantileLL(double quantile) const;
145 
146  /// Like QuantileLL(double) except that the LL vector is returned to you.
147  /// If you pass a non-empty vector, that vector is assumed to already contain
148  /// the sorted LL list (facilitating re-use).
149  std::pair<std::size_t, double> QuantileLL(double quantile, std::vector<std::pair<std::size_t, double>>& LLs) const;
150 
151  /// Like QuantileLL(double) except requesting multiple LLs at once
152  std::map<double, std::pair<std::size_t, double>> QuantileLL(const std::vector<double>& quantiles) const;
153 
154  /// Do some checks on the post-fit samples
155  void RunDiagnostics(const StanConfig & cfg) const;
156 
157  /// The entire sample at index \idx.
158  /// If you just want one value prefer SampleLL() or SampleValue() (less overhead).
159  MCMCSample Sample(std::size_t idx) const;
160 
161  /// Get the LL for sample number \idx
162  double SampleLL(std::size_t idx) const
163  {
164  fSamples->GetEntry(idx);
165  return fEntryLL;
166  }
167 
168  /// Get the value of Var \a var for sample number \idx
169  double SampleValue(const IFitVar *var, std::size_t idx) const
170  {
171  return SampleValue(idx, var->ShortName(), VarOffset(var));
172  }
173 
174  /// Get the value of Syst \a syst for sample number \idx
175  double SampleValue(const ana::ISyst *syst, std::size_t idx) const
176  {
177  return SampleValue(idx, syst->ShortName(), VarOffset(syst));
178  }
179 
180  /// Get the values of FitVars \a vars for sample number \a idx
181  void SampleValues(std::size_t idx,
182  const std::vector<const ana::IFitVar *> &vars,
183  std::map<const ana::IFitVar *, double> &varVals) const;
184 
185  /// Get the values of FitVars \a vars and ISysts \a systs for sample number \a idx
186  void SampleValues(std::size_t idx,
187  const std::vector<const ana::IFitVar *> &vars,
188  std::map<const ana::IFitVar *, double> &varVals,
189  const std::vector<const ana::ISyst *> &systs, std::map<const ana::ISyst *, double> &systVals) const;
190  double SamplingTime() const { return fSamplingTime; }
191 
192  void SetHyperparams(double stepSize, const TMatrixD& invMassMatrix)
193  {
194  fHyperparams = Hyperparameters(stepSize, std::make_unique<TMatrixD>(invMassMatrix));
195  }
196 
198  {
199  fHyperparams = std::move(h);
200  };
201 
202  void SetNames(const std::vector<std::string>& names);
203  void SetSamplingTime(double s) { fSamplingTime = s; }
204 
205  /// Get a TTree with the MCMC samples in it
206  const TTree *ToTTree() const;
207 
208  /// Which Systs are sampled in these samples?
209  const std::vector<const ana::ISyst *> &Systs() const { return fSysts; }
210 
211  /// Which Vars are sampled in these samples?
212  const std::vector<const IFitVar *> & Vars() const { return fVars; }
213 
214  /// Save this guy to a file so we don't have to rerun the MCMC
215  void SaveTo(TDirectory * dir, const std::string& name) const;
216 
217  /// Load from file
218  static std::unique_ptr<MCMCSamples> LoadFrom(TDirectory * dir, const std::string& name);
219 
220  private:
221  /// Internal-use constructor needed for LoadFrom()
222  MCMCSamples(std::size_t offset,
223  const std::vector<std::string>& diagBranchNames,
224  const std::vector<const IFitVar *> &vars,
225  const std::vector<const ana::ISyst *> &systs,
226  std::unique_ptr<TTree> &tree,
227  const Hyperparameters &hyperParams,
228  double samplingTime);
229 
230  /// Where in fDiagnosticVals is the given diagnostic?
231  std::size_t DiagOffset(const std::string& diagName) const;
232 
233  /// Internal method for getting the value of a var when you know where it is in fEntryVals
234  double SampleValue(std::size_t rowIdx, const std::string & branchName, std::size_t varIdx) const;
235 
236  /// Set up the storage tree based on the branch names given us by Stan
237  void ParseDiagnosticBranches(const std::vector<std::string>& names);
238 
239  /// Hook up the storage tree to our internal variables
240  void SetupTree();
241 
242  /// Construct a sorted vector of the LLs of all the samples (ordered lowest to highest)
243  ///
244  /// \return The ordered vector
245  std::vector<std::pair<std::size_t, double>> SortedLLs() const;
246 
247  /// Where in fEntryVals is the given var?
248  std::size_t VarOffset(const IFitVar *var) const;
249  /// Where in fEntryVals is the given syst?
250  std::size_t VarOffset(const ana::ISyst * syst) const;
251 
252  std::size_t fOffset; //< number of branches read out before the fitted parameters start
253  std::vector<std::string> fDiagBranches;
254  std::vector<const ana::IFitVar *> fVars;
255  std::vector<const ana::ISyst *> fSysts;
256 
257  mutable std::size_t fBestFitSampleIdx;
258  mutable bool fBestFitFound;
259 
260  // would prefer to keep this on the stack, but no move assignment...
261  std::unique_ptr<TTree> fSamples;
262  double fEntryLL; // for use reading the TTree
263  std::vector<double> fEntryVals; // ditto
264  std::vector<double> fDiagnosticVals;
265  mutable Hyperparameters fHyperparams; ///< Hyperparameters deduced after adaptation, or manually set
266  double fSamplingTime; ///< how long did we spend sampling?
267  };
268 
269 }
std::size_t fBestFitSampleIdx
Definition: MCMCSamples.h:257
const XML_Char * name
Definition: expat.h:151
std::unique_ptr< TMatrixD > invMetric
Definition: MCMCSamples.h:41
std::vector< double > quantiles(TH1D *h)
Definition: absCal.cxx:528
std::size_t NumSamples() const
How many samples do we have?
Definition: MCMCSamples.h:135
std::size_t VarOffset(const IFitVar *var) const
Where in fEntryVals is the given var?
Cuts and Vars for the 2020 FD DiF Study.
Definition: vars.h:6
double MaxValue(const T *var) const
Max value of this var/syst in the set.
std::map< std::string, double > xmax
std::vector< SystGroupDef > systs
Definition: syst_header.h:385
const TTree * ToTTree() const
Get a TTree with the MCMC samples in it.
virtual const std::string & ShortName() const final
The name printed out to the screen.
Definition: ISyst.h:27
void Clear()
Discard any samples.
Definition: MCMCSamples.h:89
double SampleValue(const ana::ISyst *syst, std::size_t idx) const
Get the value of Syst syst for sample number .
Definition: MCMCSamples.h:175
Configuration parameters for the Stan MCMC fitter, bundled up together for easy storage and parameter...
Definition: StanConfig.h:6
std::vector< const ana::IFitVar * > fVars
Definition: MCMCSamples.h:254
double SampleValue(const IFitVar *var, std::size_t idx) const
Get the value of Var var for sample number .
Definition: MCMCSamples.h:169
void SetSamplingTime(double s)
Definition: MCMCSamples.h:203
std::pair< std::size_t, double > QuantileLL(double quantile) const
const std::vector< const IFitVar * > & Vars() const
Which Vars are sampled in these samples?
Definition: MCMCSamples.h:212
std::size_t fOffset
Definition: MCMCSamples.h:252
Encapsulate code to systematically shift a caf::SRProxy.
Definition: ISyst.h:14
Hyperparameters(const Hyperparameters &h)
Definition: MCMCSamples.h:48
void AddSample(const std::vector< double > &sample)
MCMCSample Sample(std::size_t idx) const
void SetHyperparams(double stepSize, const TMatrixD &invMassMatrix)
Definition: MCMCSamples.h:192
MCMCSamples(const std::vector< const IFitVar * > &vars={}, const std::vector< const ana::ISyst * > &systs={})
Definition: MCMCSamples.cxx:45
void SampleValues(std::size_t idx, const std::vector< const ana::IFitVar * > &vars, std::map< const ana::IFitVar *, double > &varVals) const
Get the values of FitVars vars for sample number idx.
Double_t ymax
Definition: plot.C:25
std::vector< const ana::ISyst * > fSysts
Definition: MCMCSamples.h:255
const XML_Char * s
Definition: expat.h:262
void SaveTo(TDirectory *dir, const std::string &name) const
Save this guy to a file so we don&#39;t have to rerun the MCMC.
MarginalMode
How to construct marginal distributions.
std::vector< std::pair< std::size_t, double > > SortedLLs() const
static std::unique_ptr< MCMCSamples > LoadFrom(TDirectory *dir, const std::string &name)
Load from file.
void RunDiagnostics(const StanConfig &cfg) const
Do some checks on the post-fit samples.
std::unique_ptr< TTree > fSamples
Definition: MCMCSamples.h:261
double MinValue(const T *var) const
Min value of this var/syst in the set.
const std::map< std::pair< std::string, std::string >, Variable > vars
Hyperparameters fHyperparams
Hyperparameters deduced after adaptation, or manually set.
Definition: MCMCSamples.h:265
unsigned int quantile(std::vector< std::vector< double >> v, float E, float hadEfrac)
Definition: Toy_analyses.C:480
const Hyperparameters & Hyperparams() const
Definition: MCMCSamples.h:90
const std::vector< const ana::ISyst * > & Systs() const
Which Systs are sampled in these samples?
Definition: MCMCSamples.h:209
void SetHyperparams(Hyperparameters h)
Definition: MCMCSamples.h:197
double SampleLL(std::size_t idx) const
Get the LL for sample number .
Definition: MCMCSamples.h:162
Hyperparameters & operator=(Hyperparameters &&other)
Definition: MCMCSamples.h:52
double SamplingTime() const
Definition: MCMCSamples.h:190
void AdoptSamples(MCMCSamples &&other)
Add the samples from other into this MCMCSamples. Warning: other will be cleared! ...
const std::string & ShortName() const
Definition: IFitVar.h:36
Int_t nbinsx
Definition: plot.C:23
TDirectory * dir
Definition: macro.C:5
BayesianSurface MarginalizeTo(const SystOrVar1 *x, int nbinsx, double xmin, double xmax, const SystOrVar2 *y, int nbinsy, double ymin, double ymax, BayesianMarginal::MarginalMode marginalMode=BayesianMarginal::MarginalMode::kHistogram) const
Marginalize over all other variables to obtain a 2D surface in x and y.
Definition: MCMCSamples.h:103
static const std::string LOGLIKELIHOOD_BRANCH_NAME
Definition: MCMCSamples.h:37
std::vector< double > fDiagnosticVals
Definition: MCMCSamples.h:264
assert(nhit_max >=nhit_nbins)
Bayesian1DMarginal MarginalizeTo(const IFitVar *var, BayesianMarginal::MarginalMode marginalMode=BayesianMarginal::MarginalMode::kHistogram) const
Marginalize over all other variables to obtain a 1D profile in var.
Interface definition for fittable variables.
Definition: IFitVar.h:16
std::size_t DiagOffset(const std::string &diagName) const
Where in fDiagnosticVals is the given diagnostic?
Double_t ymin
Definition: plot.C:24
double fSamplingTime
how long did we spend sampling?
Definition: MCMCSamples.h:266
double T
Definition: Xdiff_gwt.C:5
std::vector< std::string > fDiagBranches
Definition: MCMCSamples.h:253
void ParseDiagnosticBranches(const std::vector< std::string > &names)
Set up the storage tree based on the branch names given us by Stan.
Hyperparameters(double stepsize=std::numeric_limits< double >::signaling_NaN(), std::unique_ptr< TMatrixD > &&invmetric=nullptr)
Definition: MCMCSamples.h:43
void SetNames(const std::vector< std::string > &names)
std::vector< double > fEntryVals
Definition: MCMCSamples.h:263
std::size_t BestFitSampleIdx() const
Retrieve the index of the best-fit sample (lowest LL), which can then be used with the SampleLL() or ...
void SetupTree()
Hook up the storage tree to our internal variables.
A single MCMC sample.
Definition: MCMCSample.h:14
enum BeamMode string