test_stanfit_systpulls.C
Go to the documentation of this file.
1 /*
2  * test_stanfit_systpulls.C:
3  * Test the use of the MCMC tool Stan for fitting in CAFAna.
4  * This test specifically examines the systematics fitting
5  * with a series of pull studies.
6  *
7  * Original author: J. Wolcott <jwolcott@fnal.gov>
8  * Date: November 2018
9  */
10 
11 #include "TCanvas.h"
12 #include "TGraph.h"
13 #include "TMarker.h"
14 #include "TROOT.h"
15 
16 #include "CAFAna/Analysis/Plots.h"
18 #include "CAFAna/Core/ISyst.h"
20 #include "CAFAna/Core/Spectrum.h"
21 #include "CAFAna/Core/SystShifts.h"
26 #include "CAFAna/Fit/StanConfig.h"
27 #include "CAFAna/Fit/StanFitter.h"
28 #include "CAFAna/Fit/MCMCSamples.h"
32 #include "CAFAna/Systs/DummySystStorage.h"
34 #include "CAFAna/Vars/HistAxes.h"
37 
38 #include "OscLib/OscCalcPMNSOpt.h"
39 
40 #include "Utilities/func/MathUtil.h"
41 
42 using namespace ana;
43 
44 namespace test
45 {
46  double POT = 10e20;
47 // double POT = 1e23;
48  const std::vector<std::string> SYSTS_TO_CHECK
49  {
50  "AbsMuEScale2017",
51  "Calibration",
52  "MECEnuShapeNu",
53 // "ppfx_hadp_beam_pc00",
54  "RelativeCalib",
55 // "RPAShapeRES2018", // shape of likelihood means sampling gets stuck and is quite slow
56  "RPAShapesupp2018",
57  };
58 
59  // ---------------------------------------------
61  {
62  calc.SetL(810);
63  calc.SetRho(2.75);
64  calc.SetDmsq21(7.6e-5);
65  calc.SetDmsq32(2.5e-3);
66  calc.SetTh12(asin(sqrt(.87))/2);
67  calc.SetTh13(asin(sqrt(.10))/2);
68  calc.SetTh23(TMath::Pi() / 4);
69  calc.SetdCP(0);
70  }
71 
72  // ---------------------------------------------
73 }
74 
75 /////////////////////////////////////////////
76 /////////////////////////////////////////////
77 
78 
80 {
83 
84  std::vector<const ISyst*> systs;
85  for (const auto & syst : test::SYSTS_TO_CHECK)
86  systs.push_back(dynamic_cast<const ISyst*>(Registry::ShortNameToPtr(syst)));
87  // let's use the 2018 numu predictions with no extrapolation for simplicity.
88  auto cvmfs_dir = std::getenv("NUMUDATA_DIR");
89  if (!cvmfs_dir)
90  {
91  std::cerr << "Couldn't find UPS dir for numu prediction!" << std::endl;
92  return;
93  }
94  auto fname = std::string(cvmfs_dir) + "ana2018/Predictions/pred_fakeNDData_numuconcat_fhc__numu2018.root";
95  PredictionSystJoint2018 pred(kNuMuNoExtrap, &calc, "fhc", -1, systs, fname);
96 
97  Spectrum nominal = pred.Predict(&calc);
98 
99  // loop over the systs.
100  // make a fake data spectrum and fit to it for each one.
101  // store the best fits...
102  std::map<const ISyst*, double> bestFits;
103  for (const auto & syst : systs)
104  {
105  std::cout << "\n\nTesting syst: " << syst->ShortName() << std::endl;
106  std::cout << "==================================" << std::endl;
107  std::cout << std::endl;
108 
109  // do a +2-sigma test.
110  SystShifts shifts(syst, +2);
111  Spectrum spec_pred = pred.PredictSyst(&calc, shifts);
112  Spectrum fakeData = spec_pred.FakeData(test::POT);
113  SingleSampleExperiment expt(&pred, fakeData);
114 
115  shifts.SetShift(syst, 0); // seed the fit at 0
116 
117  TCanvas c;
118  Spectrum CV = pred.Predict(&calc);
119  SpectrumStan shiftedStan = pred.PredictSyst(&calc, shifts);
120  DataMCComparison(fakeData, CV);
121  spec_pred.ToTH1(fakeData.POT(), kBlue)->Draw("hist same");
122  std::cout << " Before fitting, LL between spectra is "
123  << LogLikelihood(shiftedStan.ToBins(fakeData.POT()), fakeData.ToTH1(fakeData.POT())) / -2.
124  << std::endl;
125  osc::OscCalcPMNSOptStan stanCalc;
126  osc::CopyParams(&calc, &stanCalc);
127  std::cout << " Experiment object says it's " << expt.LogLikelihood(&stanCalc, shifts) << std::endl;
128  c.SaveAs(Form("/nova/ana/users/jwolcott/scratch/test_stanfit_syst_%s_prefit.png", syst->ShortName().c_str()));
129 
130  StanFitter fitter(&expt, {}, {syst});
132  config.num_warmup = 1000;
133  config.num_samples = 10000;
134 // config.verbosity = StanConfig::Verbosity::kEverything;
135  fitter.SetStanConfig(config);
136 
137  TGraph g;
138  for (int pullStep = -50; pullStep <= 50; pullStep++)
139  {
140  double pull = pullStep * 0.1;
141  shifts.ResetToNominal();
142  shifts.SetShift(syst, stan::math::var(pull)); // explicitly seed with Stan version to bypass double-to-Stan conversion warning
143  double ll = util::GetValAs<double>(expt.LogLikelihood(&stanCalc, shifts));
144 // std::cout << "for sigma = " << pull << ", LL = " << ll << std::endl;
145  g.SetPoint(g.GetN(), pull, ll);
146  }
147  c.Clear();
148  g.Draw("apl");
149  g.SetTitle(Form(";%s pull (#sigma); LL", syst->ShortName().c_str()));
150  c.SaveAs(Form("/nova/ana/users/jwolcott/scratch/test_stanfit_syst_%s_LLcurve.png", syst->ShortName().c_str()));
151 
152  shifts.ResetToNominal();
153  shifts.SetShift(syst, 0);
154  fitter.Fit(&calc, shifts);
155 // fitter.TestGradients(&calc, shifts);
156 // continue;
157 
158  TFile outF(Form("/nova/ana/users/jwolcott/scratch/samples_%s.root", syst->ShortName().c_str()), "recreate");
159  fitter.GetSamples().SaveTo(&outF, "samples");
160  outF.Close();
161 
162  c.Clear();
163  auto marg_syst = fitter.GetSamples().MarginalizeTo(syst);
164  auto bin_marg_ll = Binning::Simple(600, -6, 6);
165  auto h_marg_syst = marg_syst.ToTH1(bin_marg_ll);
166  h_marg_syst.SetTitle(Form(";%s pull (#sigma);LL", syst->ShortName().c_str()));
167  h_marg_syst.Draw();
168  c.SaveAs(Form("/nova/ana/users/jwolcott/scratch/test_stanfit_syst_%s_marg.png", syst->ShortName().c_str()));
169 
170  c.Clear();
171  double bestSyst = fitter.GetSamples().SampleValue(syst, fitter.GetSamples().BestFitSampleIdx());
172  std::cout << "Best fit for syst " << syst->ShortName() << " is at pull = " << bestSyst << std::endl;
173  bestFits[syst] = bestSyst;
174  shifts.SetShift(syst, bestSyst);
175  DataMCComparison(fakeData, pred.PredictSyst(&calc, shifts));
176  nominal.ToTH1(nominal.POT(), kGray)->Draw("hist same");
177  c.SaveAs(Form("/nova/ana/users/jwolcott/scratch/test_stanfit_syst_%s_bestfitpred.png", syst->ShortName().c_str()));
178 
179  } // for (systName)
180 
181  // make a plot illustrating that everything worked
182  TH1D h_bestFits("bestfits", ";Systematic;Best fit pull (#sigma)", bestFits.size(), 0, bestFits.size());
183  std::size_t systIdx = 0;
184  for (const auto & systPair : bestFits)
185  {
186  h_bestFits.SetBinContent(++systIdx, systPair.second);
187  h_bestFits.GetXaxis()->SetBinLabel(systIdx, systPair.first->ShortName().c_str());
188  }
189  TCanvas c;
190  h_bestFits.SetMarkerStyle(kFullCircle);
191  h_bestFits.Draw("p");
192  c.SaveAs("/nova/ana/users/jwolcott/scratch/test_stanfit_syst_bestfits.png");
193 }
tree Draw("slc.nhit")
virtual void SetL(double L)=0
Cuts and Vars for the 2020 FD DiF Study.
Definition: vars.h:6
std::vector< SystGroupDef > systs
Definition: syst_header.h:385
TH1D * ToTH1(double exposure, Color_t col=kBlack, Style_t style=kSolid, EExposureType expotype=kPOT, EBinType bintype=kBinContent) const
Histogram made from this Spectrum, scaled to some exposure.
Definition: Spectrum.cxx:148
virtual void SetDmsq21(const T &dmsq21)=0
Loads shifted spectra from files.
Simple record of shifts applied to systematic parameters.
Definition: SystShifts.h:20
T sqrt(T number)
Definition: d0nt_math.hpp:156
Configuration parameters for the Stan MCMC fitter, bundled up together for easy storage and parameter...
Definition: StanConfig.h:6
Spectrum PredictSyst(osc::IOscCalc *calc, const SystShifts &shift) const override
virtual void SetTh13(const T &th13)=0
OStream cerr
Definition: OStream.cxx:7
osc::OscCalcDumb calc
double LogLikelihood(const Eigen::ArrayXd &ea, const Eigen::ArrayXd &oa, bool useOverflow)
The log-likelihood formula from the PDG.
Definition: EigenUtils.cxx:36
virtual void SetDmsq32(const T &dmsq32)=0
Representation of a spectrum in any variable, with associated POT.
Definition: Spectrum.h:40
expt
Definition: demo5.py:34
Spectrum FakeData(double pot) const
Synonymous with AsimovData(). Retained for compatibility.
Definition: Spectrum.cxx:349
std::string getenv(std::string const &name)
void CopyParams(const osc::_IOscCalcAdjustable< T > *inCalc, osc::_IOscCalcAdjustable< U > *outCalc)
Copy parameters from one calculator to another, irrespective of their type.
Definition: IOscCalc.cxx:62
Optimized version of OscCalcPMNS.
Definition: StanTypedefs.h:31
void ResetCalculator(osc::IOscCalcAdjustable &calc)
double POT() const
Definition: Spectrum.h:227
OStream cout
Definition: OStream.cxx:6
std::vector< double > POT
Spectrum Predict(osc::IOscCalc *calc) const override
virtual void SetRho(double rho)=0
const std::vector< std::string > SYSTS_TO_CHECK
void ResetToNominal()
Definition: SystShifts.cxx:144
void test_stanfit_systpulls()
virtual void SetTh23(const T &th23)=0
TH1 * DataMCComparison(const Spectrum &data, const Spectrum &mc, EBinType bintype)
Definition: Plots.cxx:35
virtual stan::math::var LogLikelihood(osc::IOscCalcAdjustableStan *osc, const SystShifts &syst=kNoShift) const override
void ResetCalculator(osc::IOscCalcAdjustable &calc)
Float_t e
Definition: plot.C:35
enum BeamMode kBlue
static Binning Simple(int n, double lo, double hi, const std::vector< std::string > &labels={})
Definition: Binning.cxx:107
void SetShift(const ISyst *syst, double shift, bool force=false)
Definition: SystShifts.cxx:81
Fitter type that bolts the Stan fitting tools onto CAFAna.
Definition: StanFitter.h:126
static const T * ShortNameToPtr(const std::string &s, bool allowFail=false)
Definition: Registry.cxx:60
virtual void SetTh12(const T &th12)=0
virtual void SetdCP(const T &dCP)=0
T asin(T number)
Definition: d0nt_math.hpp:60
Compare a single data spectrum to the MC + cosmics expectation.
enum BeamMode string