FrequentistSurface.cxx
Go to the documentation of this file.
2 
3 #include <iostream>
4 #include <functional>
5 
6 #include "TCanvas.h"
7 #include "TGraph.h"
8 #include "TH2F.h"
9 #include "TObjArray.h"
10 #include "TPad.h"
11 #include "TROOT.h"
12 #include "TStyle.h"
13 #include "TKey.h"
14 #include "TVectorD.h"
15 #include "TObjString.h"
16 //#include "TCollection.h"
17 
18 #include "CAFAna/Core/Var.h"
22 #include "CAFAna/Core/IFitVar.h"
23 #include "CAFAna/Core/Progress.h"
24 #include "CAFAna/Core/ThreadPool.h"
25 #include "CAFAna/Core/Utilities.h"
26 
27 #include "OscLib/IOscCalc.h"
28 
29 namespace ana
30 {
31  //----------------------------------------------------------------------
34  const FitAxis& xax,
35  const FitAxis& yax,
36  const std::vector<const IFitVar*>& profVars,
37  const std::vector<const ISyst*>& profSysts,
38  const SeedList& seedPts,
39  const std::vector<SystShifts>& systSeedPts,
40  bool parallel,
42  : fParallel(parallel), fFitOpts(opts)
43  {
44  fLogX = xax.islog;
45  fLogY = yax.islog;
46 
47  CreateHistograms(xax, yax, profVars, profSysts);
48 
49  for(const IFitVar* v: profVars) fSeedValues.push_back(v->GetValue( calc));
50 
51  FillSurface(expt, calc, xax.var, yax.var, profVars, profSysts, seedPts, systSeedPts);
52 
53  FindMinimum(expt, calc, xax.var, yax.var, profVars, profSysts, seedPts, systSeedPts);
54  }
55 
56  //---------------------------------------------------------------------
58  {
59  }
60 
61  //---------------------------------------------------------------------
63  CreateHistograms(const FitAxis& xax, const FitAxis& yax,
64  const std::vector<const IFitVar*>& profVars,
65  const std::vector<const ISyst*>& profSysts)
66  {
67  fHist = ExpandedHistogram(";"+xax.var->LatexName()+";"+yax.var->LatexName(),
68  xax.nbins, xax.min, xax.max, xax.islog,
69  yax.nbins, yax.min, yax.max, yax.islog);
70 
71  for(unsigned int i = 0; i < profVars.size()+profSysts.size(); ++i){
73  if(i < profVars.size())
74  title = profVars[i]->LatexName();
75  else
76  title = profSysts[i-profVars.size()]->LatexName();
77 
78  fProfHists.push_back(ExpandedHistogram(title+";"+xax.var->LatexName()+";"+yax.var->LatexName(),
79  xax.nbins, xax.min, xax.max, xax.islog,
80  yax.nbins, yax.min, yax.max, yax.islog));
81  }
82 
83  }
84 
85  //---------------------------------------------------------------------
87  ProgressBarTitle(const IFitVar* xvar, const IFitVar* yvar,
88  const std::vector<const IFitVar*>& profVars,
89  const std::vector<const ISyst*>& profSysts) const
90  {
91  std::string title = "Filling surface for "+yvar->ShortName()+" vs "+xvar->ShortName();
92 
93  if(!profVars.empty() || !profSysts.empty()){
94  title += " (profiling ";
95 
96  for(const IFitVar* v: profVars) title += v->ShortName() + ", ";
97  for(const ISyst* s: profSysts) title += s->ShortName() + ", ";
98 
99  // Always have one superfluous ", " at the end
100  title.resize(title.size()-2);
101  title += ")";
102  }
103 
104  return title;
105  }
106 
107  //---------------------------------------------------------------------
110  const IFitVar *xvar, const IFitVar *yvar,
111  const std::vector<const IFitVar *> &profVars,
112  const std::vector<const ISyst *> &profSysts,
113  const SeedList& seedPts,
114  const std::vector<SystShifts> &systSeedPts)
115  {
116  // Nothing created during surface filling belongs in a
117  // directory. Unfortunately the local guards in Spectrum etc are racey when
118  // run in parallel. But this should cover the whole lot safely.
119  DontAddDirectory guard;
120 
121  const std::string progTitle = ProgressBarTitle(xvar, yvar, profVars, profSysts);
122 
123  Progress *prog = 0;
124  // Difficult to keep a progress bar properly up to date when threaded
125  if (!fParallel) prog = new Progress(progTitle);
126  ThreadPool *pool = 0;
127 
128  if(fParallel){
129  // A hack/workaround needed for parallel running:
130  //
131  // Give all the constituents of the Prediction a chance to do lazy
132  // initialization, before they race themselves trying to do it in
133  // parallel.
134  expt->ChiSq(calc);
135 
136  pool = new ThreadPool;
137  pool->ShowProgress(progTitle + TString::Format(" using %d threads", pool->NThreads()).Data());
138  }
139 
140  const int Nx = fHist->GetNbinsX();
141  const int Ny = fHist->GetNbinsY();
142 
143  // Fill bins in "random" order so that the progress bar is accurate
144  // straight away instead of possibly being misled by whatever atypical
145  // points we start with. This step is a prime which guarantees we get every
146  // cell.
147  int step = 7919;
148  // Very unlikely (Nx or Ny is a multiple of step), but just to be safe.
149  if ((Nx * Ny) % step == 0) step = 1;
150 
151  int bin = 0;
152  int neval = 0;
153 
154  do
155  {
156  const int x = bin % Nx + 1;
157  const int y = bin / Nx + 1;
158 
159  const double xv = BinCenterX(x);
160  const double yv = BinCenterY(y);
161 
162  if (xvar->Penalty(xv, calc) > 1e-10)
163  {
164  std::cerr << "Warning! " << xvar->ShortName() << " = " << xv
165  << " has penalty of " << xvar->Penalty(xv, calc)
166  << " that could have been applied in surface. "
167  << "This should never happen." << std::endl;
168  }
169  if (yvar->Penalty(yv, calc) > 1e-10)
170  {
171  std::cerr << "Warning! " << yvar->ShortName() << " = " << yv
172  << " has penalty of " << yvar->Penalty(yv, calc)
173  << " that could have been applied in surface. "
174  << "This should never happen." << std::endl;
175  }
176 
177  ThreadPool::func_t task = [=](){
178  FillSurfacePoint(expt, calc,
179  xvar, xv, yvar, yv,
180  profVars, profSysts, seedPts, systSeedPts);
181  };
182 
183  if(fParallel){
184  pool->AddTask(task);
185  }
186  else{
187  task(); // Just do it straight away
188  ++neval;
189  prog->SetProgress(neval / double(Nx * Ny));
190  }
191 
192  bin = (bin + step) % (Nx * Ny);
193  } while (bin != 0);
194 
195 
196  if(fParallel){
197  pool->Finish();
198  delete pool;
199  }
200  else{
201  prog->Done();
202  delete prog;
203  }
204  }
205 
206  //----------------------------------------------------------------------
209  const IFitVar* xvar, double x,
210  const IFitVar* yvar, double y,
211  const std::vector<const IFitVar*>& profVars,
212  const std::vector<const ISyst*>& profSysts,
213  const SeedList& seedPts,
214  const std::vector<SystShifts>& systSeedPts)
215  {
216  if(fParallel){
217  // Need to take our own copy so that we don't get overwritten by someone
218  // else's changes.
219  calc = calc->Copy();
220  }
221 
222  xvar->SetValue(calc, x);
223  yvar->SetValue(calc, y);
224 
225  //Make sure that the profiled values of fitvars do not persist between steps.
226  for(int i = 0; i < (int)fSeedValues.size(); ++i) profVars[i]->SetValue( calc, fSeedValues[i] );
227 
228  double chi;
229  if(profVars.empty() && profSysts.empty()){
230  chi = expt->ChiSq(calc);
231  }
232  else{
233  MinuitFitter fitter(expt, profVars, profSysts);
234  fitter.SetFitOpts(fFitOpts);
235  SystShifts bestSysts;
236  chi = fitter.Fit(calc, bestSysts, seedPts, systSeedPts, MinuitFitter::kQuiet)->EvalMetricVal();
237 
238  for(unsigned int i = 0; i < profVars.size(); ++i){
239  fProfHists[i]->Fill(x, y, profVars[i]->GetValue(calc));
240  }
241  for(unsigned int j = 0; j < profSysts.size(); ++j){
242  fProfHists[j+profVars.size()]->Fill(x, y, bestSysts.GetShift(profSysts[j]));
243  }
244  }
245 
246  fHist->Fill(x, y, chi);
247 
248  if(fParallel) delete calc;
249 
250  return chi;
251  }
252 
253 
254  //---------------------------------------------------------------------
257  const IFitVar* xvar, const IFitVar* yvar,
258  const std::vector<const IFitVar*>& profVars,
259  const std::vector<const ISyst*>& profSysts,
260  const SeedList& seedPts,
261  const std::vector<SystShifts>& systSeedPts)
262  {
263  // Location of the best minimum found from filled surface
264  double minchi = 1e10;
265  int minx = fHist->GetNbinsX()/2;
266  int miny = fHist->GetNbinsY()/2;
267  for(int x = 1; x <= fHist->GetNbinsX(); ++x){
268  for(int y = 1; y <= fHist->GetNbinsY(); ++y){
269  const double chi = fHist->GetBinContent(x, y);
270  if(chi < minchi){
271  minchi = chi;
272  minx = x;
273  miny = y;
274  }
275  }
276  }
277 
278  std::vector<const IFitVar*> allVars = {xvar, yvar};
279  allVars.insert(allVars.end(), profVars.begin(), profVars.end());
280  MinuitFitter fit(expt, allVars, profSysts);
281  fit.SetFitOpts(fFitOpts);
282  // Seed from best grid point
283  xvar->SetValue(calc, BinCenterX(minx));
284  yvar->SetValue(calc, BinCenterY(miny));
285  for(int i = 0; i < (int)fSeedValues.size(); ++i) profVars[i]->SetValue( calc, fSeedValues[i] );
286  SystShifts systSeed = SystShifts::Nominal();
287  fBestLikelihood = fit.Fit(calc, systSeed, seedPts)->EvalMetricVal();
288  fBestFitX = xvar->GetValue(calc);
289  fBestFitY = yvar->GetValue(calc);
290 
291  for(int x = 0; x < fHist->GetNbinsX()+2; ++x){
292  for(int y = 0; y < fHist->GetNbinsY()+2; ++y){
293  fHist->SetBinContent(x, y, fHist->GetBinContent(x, y)-fBestLikelihood);
294  }
295  }
296 
297  fHist->SetMinimum(0);
298  }
299 
300  //----------------------------------------------------------------------
301  void FrequentistSurface::SaveTo(TDirectory* dir, const std::string& name) const
302  {
303  TDirectory *tmp = gDirectory;
304 
305  dir = dir->mkdir(name.c_str()); // switch to subdir
306  dir->cd();
307 
308  TObjString("FrequentistSurface").Write("type");
309 
311 
312  TDirectory *profDir = dir->mkdir("profHists");
313  int idx = 0;
315  {
316  profDir->cd();
317  it->Write(TString::Format("hist%d", idx++));
318  }
319 
320  dir->Write();
321  delete dir;
322 
323  tmp->cd();
324  }
325 
326 //----------------------------------------------------------------------
327  std::unique_ptr<FrequentistSurface> FrequentistSurface::LoadFrom(TDirectory* dir, const std::string& name)
328  {
329  dir = dir->GetDirectory(name.c_str()); // switch to subdir
330  assert(dir);
331 
332  DontAddDirectory guard;
333 
334  TObjString *tag = (TObjString *) dir->Get("type");
335  assert(tag);
336  assert(tag->GetString() == "FrequentistSurface" || tag->GetString() == "Surface");
337  delete tag;
338 
339  std::unique_ptr<FrequentistSurface> surf(new FrequentistSurface);
340  ISurface::FillSurfObj(*surf, dir);
341 
342  for (std::size_t idx = 0; idx < surf->fSeedValues.size(); ++idx)
343  {
344  // Search for old "marg" name here too for backwards compatibility
345  TH2 *h = (TH2 *) dir->Get(TString::Format("profHists/hist%lu", idx));
346  if (h)
347  surf->fProfHists.push_back(h);
348  else
349  surf->fProfHists.push_back((TH2 *) dir->Get(TString::Format("margHists/hist%lu", idx)));
350  }
351 
352  const TObjString* logx = (TObjString*)dir->Get("logx");
353  const TObjString* logy = (TObjString*)dir->Get("logy");
354  // Tolerate missing log tags for backwards compatibility
355  surf->fLogX = (logx && logx->GetString() == "yes");
356  surf->fLogY = (logy && logy->GetString() == "yes");
357 
358  delete dir;
359 
360  return surf;
361  }
362 
363 
364  // See eg the statistics section of the PDG
365  TH2* Gaussian68Percent2D(const FrequentistSurface& s){return Flat(2.30, s);}
366  TH2* Gaussian90Percent2D(const FrequentistSurface& s){return Flat(4.61, s);}
367  TH2* Gaussian95Percent2D(const FrequentistSurface& s){return Flat(5.99, s);}
368  TH2* Gaussian2Sigma2D (const FrequentistSurface& s){return Flat(6.18, s);}
369  TH2* Gaussian99Percent2D(const FrequentistSurface& s){return Flat(9.21, s);}
370  TH2* Gaussian3Sigma2D (const FrequentistSurface& s){return Flat(11.83, s);}
371 
372  TH2* Gaussian68Percent1D(const FrequentistSurface& s){return Flat(1.00, s);}
373  TH2* Gaussian90Percent1D(const FrequentistSurface& s){return Flat(2.71, s);}
374  TH2* Gaussian95Percent1D(const FrequentistSurface& s){return Flat(3.84, s);}
375  TH2* Gaussian2Sigma1D (const FrequentistSurface& s){return Flat(4.00, s);}
376  TH2* Gaussian99Percent1D(const FrequentistSurface& s){return Flat(6.63, s);}
377  TH2* Gaussian3Sigma1D (const FrequentistSurface& s){return Flat(9.00, s);}
378 
379 
380 } // namespace
const XML_Char * name
Definition: expat.h:151
void Finish()
Wait for all threads to complete before returning.
Definition: ThreadPool.cxx:50
const std::string & LatexName() const
Definition: IFitVar.h:37
virtual double ChiSq(osc::IOscCalcAdjustable *osc, const SystShifts &syst=SystShifts::Nominal()) const
Definition: IExperiment.h:18
TH2 * Gaussian90Percent1D(const FrequentistSurface &s)
Up-value surface for 90% confidence in 1D in gaussian approximation.
virtual _IOscCalcAdjustable< T > * Copy() const =0
Cuts and Vars for the 2020 FD DiF Study.
Definition: vars.h:6
set< int >::iterator it
MinuitFitter::FitOpts fFitOpts
double BinCenterX(int bin) const
Definition: ISurface.cxx:244
double FillSurfacePoint(const IExperiment *expt, osc::IOscCalcAdjustable *calc, const IFitVar *xvar, double x, const IFitVar *yvar, double y, const std::vector< const IFitVar * > &profVars, const std::vector< const ISyst * > &profSysts, const SeedList &seedPts, const std::vector< SystShifts > &systSeedPts)
std::function< void(void)> func_t
The type of the user&#39;s worker functions.
Definition: ThreadPool.h:33
Simple record of shifts applied to systematic parameters.
Definition: SystShifts.h:20
TH2 * Gaussian68Percent2D(const FrequentistSurface &s)
Up-value surface for 68% confidence in 2D in gaussian approximation.
const IFitVar * var
Definition: FitAxis.h:17
double BinCenterY(int bin) const
Definition: ISurface.cxx:251
OStream cerr
Definition: OStream.cxx:7
TH2 * Gaussian2Sigma2D(const FrequentistSurface &s)
Up-value surface for 2 sigma confidence in 2D in gaussian approximation.
Float_t tmp
Definition: plot.C:36
TH2 * Gaussian2Sigma1D(const FrequentistSurface &s)
Up-value surface for 2 sigma confidence in 1D in gaussian approximation.
static SystShifts Nominal()
Definition: SystShifts.h:34
osc::OscCalcDumb calc
std::string ProgressBarTitle(const IFitVar *xvar, const IFitVar *yvar, const std::vector< const IFitVar * > &profVars, const std::vector< const ISyst * > &profSysts) const
Encapsulate code to systematically shift a caf::SRProxy.
Definition: ISyst.h:14
int NThreads() const
Definition: ThreadPool.h:40
bool logy
TH2F * ExpandedHistogram(const std::string &title, int nbinsx, double xmin, double xmax, bool xlog, int nbinsy, double ymin, double ymax, bool ylog)
Internal helper for Surface and FCSurface.
Definition: Utilities.cxx:153
std::vector< TH2 * > fProfHists
Log-likelihood scan across two parameters.
const XML_Char * s
Definition: expat.h:262
TH2 * Gaussian95Percent2D(const FrequentistSurface &s)
Up-value surface for 95% confidence in 2D in gaussian approximation.
expt
Definition: demo5.py:34
TH2F * fHist
Definition: ISurface.h:66
double min
Definition: FitAxis.h:19
void FindMinimum(const IExperiment *expt, osc::IOscCalcAdjustable *calc, const IFitVar *xvar, const IFitVar *yvar, const std::vector< const IFitVar * > &profVars, const std::vector< const ISyst * > &profSysts, const SeedList &seedPts, const std::vector< SystShifts > &systSeedPts)
virtual std::unique_ptr< IFitSummary > Fit(osc::IOscCalcAdjustable *seed, SystShifts &bestSysts=junkShifts, const SeedList &seedPts=SeedList(), const std::vector< SystShifts > &systSeedPts={}, Verbosity verb=kVerbose) const
Master fitting method. Depends on FitHelper and FitHelperSeeded.
Definition: IFitter.cxx:69
bool fLogX
Definition: ISurface.h:67
virtual double Penalty(double, osc::IOscCalcAdjustable *) const
Definition: IFitVar.h:34
TH2 * Gaussian3Sigma2D(const FrequentistSurface &s)
Up-value surface for 3 sigma confidence in 2D in gaussian approximation.
bool islog
Definition: FitAxis.h:21
int nbins
Definition: FitAxis.h:18
T GetShift(const ISyst *syst) const
TH2 * Gaussian99Percent2D(const FrequentistSurface &s)
Up-value surface for 99% confidence in 2D in gaussian approximation.
double max
Definition: FitAxis.h:20
double fBestFitY
Definition: ISurface.h:65
TH2 * Gaussian3Sigma1D(const FrequentistSurface &s)
Up-value surface for 3 sigma confidence in 1D in gaussian approximation.
const double j
Definition: BetheBloch.cxx:29
void SaveToHelper(TDirectory *dir) const
dir should already be the appropriate sub-directory
Definition: ISurface.cxx:189
float bin[41]
Definition: plottest35.C:14
void AddTask(const func_t &func)
Definition: ThreadPool.cxx:89
bool fLogY
Definition: ISurface.h:67
std::vector< double > fSeedValues
Definition: ISurface.h:68
bool logx
::xsd::cxx::tree::string< char, simple_type > string
Definition: Database.h:154
void SetProgress(double frac)
Update the progress fraction between zero and one.
Definition: Progress.cxx:41
const std::string & ShortName() const
Definition: IFitVar.h:36
TDirectory * dir
Definition: macro.C:5
double fBestFitX
Definition: ISurface.h:64
virtual void SetValue(osc::IOscCalcAdjustable *osc, double val) const =0
TH2 * Gaussian90Percent2D(const FrequentistSurface &s)
Up-value surface for 90% confidence in 2D in gaussian approximation.
void SaveTo(TDirectory *dir, const std::string &name) const
Base class defining interface for experiments.
Definition: IExperiment.h:14
A very simple thread pool for use by Surface.
Definition: ThreadPool.h:18
assert(nhit_max >=nhit_nbins)
A simple ascii-art progress bar.
Definition: Progress.h:9
void ShowProgress(const std::string &title)
Definition: ThreadPool.cxx:43
Interface definition for fittable variables.
Definition: IFitVar.h:16
void Format(TGraph *gr, int lcol, int lsty, int lwid, int mcol, int msty, double msiz)
Definition: Style.cxx:154
TH2 * Gaussian99Percent1D(const FrequentistSurface &s)
Up-value surface for 99% confidence in 1D in gaussian approximation.
static void FillSurfObj(ISurface &surf, TDirectory *dir)
Definition: ISurface.cxx:214
void CreateHistograms(const FitAxis &xax, const FitAxis &yax, const std::vector< const IFitVar * > &profVars, const std::vector< const ISyst * > &profSysts)
TH2 * Flat(double level, const ISurface &s)
Helper function for the gaussian approximation surfaces.
Definition: ISurface.cxx:232
Prevent histograms being added to the current directory.
Definition: UtilsExt.h:46
Float_t e
Definition: plot.C:35
TH2 * Gaussian95Percent1D(const FrequentistSurface &s)
Up-value surface for 95% confidence in 1D in gaussian approximation.
void Done()
Call this when action is completed.
Definition: Progress.cxx:90
static std::unique_ptr< FrequentistSurface > LoadFrom(TDirectory *dir, const std::string &name)
double fBestLikelihood
Definition: ISurface.h:63
TH2 * Gaussian68Percent1D(const FrequentistSurface &s)
Up-value surface for 68% confidence in 1D in gaussian approximation.
Collect information describing the axis of a fit variable.
Definition: FitAxis.h:10
surf
Definition: demo5.py:35
void SetFitOpts(FitOpts opts)
virtual double GetValue(const osc::IOscCalcAdjustable *osc) const =0
virtual void FillSurface(const IExperiment *expt, osc::IOscCalcAdjustable *calc, const IFitVar *xvar, const IFitVar *yvar, const std::vector< const IFitVar * > &profVars, const std::vector< const ISyst * > &profSysts, const SeedList &seedPts, const std::vector< SystShifts > &systSeedPts)
Perform MINUIT fits in one or two dimensions.
Definition: MinuitFitter.h:17