test_stanfit_withsysts.C
Go to the documentation of this file.
1 /*
2  * test_stanfit_withsysts.C:
3  * Test the use of the MCMC tool Stan for fitting in CAFAna.
4  * This version tests an actual neutrino oscillation fit,
5  * including with systematics.
6  *
7  * Original author: J. Wolcott <jwolcott@fnal.gov>
8  * Date: January 2019
9  */
10 
11 #include "TCanvas.h"
12 #include "TGraph.h"
13 #include "TLegend.h"
14 #include "TMarker.h"
15 
16 #include "CAFAna/Analysis/Plots.h"
20 #include "CAFAna/Core/SystShifts.h"
25 #include "CAFAna/Fit/Priors.h"
26 #include "CAFAna/Fit/StanConfig.h"
27 #include "CAFAna/Fit/StanFitter.h"
29 #include "CAFAna/Vars/FitVars.h"
30 #include "CAFAna/Vars/HistAxes.h"
34 
35 #include "OscLib/OscCalcDMP.h"
36 
37 #include "Utilities/func/MathUtil.h"
38 
39 using namespace ana;
40 
41 namespace test
42 {
43  double POT = 1e25; // use a ridiculous exposure so that the systs really control the fitting
44  const std::string SAVED_SAMPLES_FILE = "samples_systs.root";
45 
46  double MOCKDATA_TH23 = TMath::Pi()/4; // 0.72; // 0.72 radians --> 41 degrees
47  double MOCKDATA_DM32 = 0.0025;
48 
49  const std::vector<std::string> SYSTS_TO_THROW
50  {
51  "AbsMuEScale2017",
52  "Calibration",
53  "MECEnuShapeNu",
54  "RelativeCalib",
55  "RPAShapesupp2018",
56  };
57 
58  // ---------------------------------------------
60  {
61  calc.SetL(810);
62  calc.SetRho(2.75);
63  calc.SetDmsq21(7.6e-5);
64  calc.SetDmsq32(2.5e-3);
65  calc.SetTh12(asin(sqrt(.87))/2);
66  calc.SetTh13(asin(sqrt(.10))/2);
67  calc.SetTh23(TMath::Pi() / 4);
68  calc.SetdCP(0);
69  }
70 
71  // ---------------------------------------------
72 
73  std::unique_ptr<IPrediction> GetNumuPrediction(osc::IOscCalcAdjustable & calc,
74  const SystShifts & systs,
75  bool doExtrap)
76  {
77  auto cvmfs_dir = std::getenv("NUMUDATA_DIR");
78  if (!cvmfs_dir)
79  {
80  std::cerr << "Couldn't find UPS dir for numu prediction!" << std::endl;
81  return nullptr;
82  }
83  auto fname = std::string(cvmfs_dir) + "lib/ana2018/Predictions/pred_fakeNDData_numuconcat_fhc__numu2018.root";
84  return std::make_unique<PredictionSystJoint2018>(doExtrap ? kNuMu : kNuMuNoExtrap,
85  &calc, "fhc", -1, systs.ActiveSysts(), fname);
86  }
87 
89  {
90  if (!dir.empty())
91  file = dir + "/" + file;
92 
93  return file;
94  }
95 }
96 
97 void test_stanfit_withsysts(bool loadSamplesFromFile=true,
98  bool saveSamplesToFile=false,
99  bool drawPlots=true,
100  bool extrapolation=false,
101  std::string dirPrefix=".",
102  std::string samplesFilename=test::SAVED_SAMPLES_FILE,
103  std::string preds="")
104 {
105  assert ((loadSamplesFromFile != saveSamplesToFile) || !saveSamplesToFile);
106 
107  auto TFText = [](bool val, std::string trueText) { return val ? trueText : "FALSE"; };
108  std::cout << std::endl;
109  std::cout << "Loading MCMC samples from file: " << TFText(loadSamplesFromFile, test::FullFilename(dirPrefix, samplesFilename)) << std::endl;
110  std::cout << "Saving MCMC samples to file: " << TFText(saveSamplesToFile, test::FullFilename(dirPrefix, samplesFilename)) << std::endl;
111  if (preds.empty())
112  std::cout << "Using 2018 CV fake data extrapolated numu predictions." << std::endl;
113  else
114  std::cout << "Loading extrapolated numu predictions from file: " << preds << std::endl;
115 
116  std::cout << std::endl;
117 
118  std::unique_ptr<IPrediction> pred;
119  std::unique_ptr<osc::IOscCalcAdjustable> calc;
120  std::unique_ptr<osc::IOscCalcAdjustable> calcTruth;
121  std::unique_ptr<Spectrum> fakeData;
122  std::unique_ptr<SystShifts> systTruePulls;
123  std::unique_ptr<SystShifts> shifts;
124 
125  // build my fit variables...
128  std::vector<const IFitVar*> fitVars{&fitSsqTh23_UniformPriorSsqTh23, &fitDmSq32Scaled_UniformPrior};
129 
130  std::unique_ptr<MCMCSamples> samples;
131  if (!loadSamplesFromFile)
132  {
133  calc = std::make_unique<osc::OscCalcDMP>();
134  std::vector<const ISyst *> systs;
135 
136  // for extrapolation, we need to use predictions
137  // extrapolated from ND fake data using the same underlying pulls
138  if (extrapolation)
139  {
140  TFile predFile(preds.c_str());
141  if (predFile.IsZombie())
142  {
143  std::cerr << "Need a prediction file (based on ND fake data w/ syst pulls) to do extrapolation." << std::endl;
144  exit(1);
145  }
146  TDirectory * predDir = dynamic_cast<TDirectory*>(predFile.Get("pred"));
147  TDirectory * calcDir = dynamic_cast<TDirectory*>(predFile.Get("calcTruth"));
148  TDirectory * systDir = dynamic_cast<TDirectory*>(predFile.Get("systTruth"));
149  if (!calcDir || !systDir)
150  {
151  std::cerr << "Prediction file must have oscillation calculator & SystShifts used as seeds stored in it." << std::endl;
152  exit(1);
153  }
154  pred = ana::LoadFrom<IPrediction>(predDir);
155  calc = ana::LoadFrom<osc::IOscCalcAdjustable>(calcDir);
156  shifts = ana::LoadFrom<SystShifts>(systDir);
157  for (const auto & syst : shifts->ActiveSysts())
158  systs.push_back(syst);
159  }
160  else
161  {
162  // pick a few test values for some mock data...
163  test::ResetCalculator(*calc);
164  calc->SetTh23(test::MOCKDATA_TH23);
165  calc->SetDmsq32(test::MOCKDATA_DM32);
166  calcTruth = std::make_unique<osc::OscCalcDMP>();
167  *calcTruth = *calc;
168 
169  // choose +/- 1, 2, 3 sigma pulls at random
170  TRandom3 rnd;
171  rnd.SetSeed(20190531);
172  if (extrapolation)
173  shifts = std::make_unique<GaussianPriorSystShifts>();
174  else
175  shifts = std::make_unique<SystShifts>();
176  for (const auto &systName : test::SYSTS_TO_THROW) {
177  systs.emplace_back(Registry<ISyst>::ShortNameToPtr(systName));
178  shifts->SetShift(systs.back(), int(rnd.Uniform(1, 4)) * (rnd.Uniform() < 0.5 ? -1 : 1));
179  }
180 
181  pred = test::GetNumuPrediction(*calc, *shifts, extrapolation);
182  }
183  systTruePulls = shifts->Copy();
184  Spectrum spec_pred = pred->PredictSyst(calc.get(), *shifts);
185  fakeData = std::make_unique<Spectrum>(spec_pred.FakeData(test::POT));
186  SingleSampleExperiment expt(pred.get(), *fakeData);
187 
188  // now put the calc back to normal and reset the systs to nominal
189  test::ResetCalculator(*calc);
190  shifts->ResetToNominal();
191 
192  StanConfig cfg;
193  cfg.num_warmup = 500;
194  cfg.num_samples = 1000;
195  cfg.max_depth = 15;
197 // cfg.verbosity = StanConfig::Verbosity::kEverything;
198  StanFitter fitter(&expt, fitVars, systs);
199  fitter.SetStanConfig(cfg);
200  fitter.Fit(calc.get(), *shifts);
201 
202  samples = fitter.ExtractSamples();
203 
204  if (saveSamplesToFile)
205  {
206  TFile outF(test::FullFilename(dirPrefix, samplesFilename).c_str(), "recreate");
207  fakeData->SaveTo(&outF, "fakedata");
208  samples->SaveTo(&outF, "samples");
209  SaveTo(&static_cast<osc::IOscCalc&>(*calcTruth), outF, "calcTruth");
210  systTruePulls->SaveTo(&outF, "systTruth");
211  }
212  }
213  else
214  {
215  TFile inf(test::FullFilename(dirPrefix, samplesFilename).c_str());
216  if (inf.IsZombie())
217  return;
218 
219  fakeData = LoadFrom<Spectrum>(dynamic_cast<TDirectory*>(inf.Get("fakedata")));
220  samples = MCMCSamples::LoadFrom(&inf, "samples");
221  systTruePulls = SystShifts::LoadFrom(dynamic_cast<TDirectory*>(inf.Get("systTruth")));
222  assert (fakeData && samples && systTruePulls);
223 
224 
225  auto bestFitIdx = samples->BestFitSampleIdx();
226  calc = std::make_unique<osc::OscCalcDMP>();
227  test::ResetCalculator(*calc);
228  for (const auto & var : samples->Vars())
229  var->SetValue(calc.get(), samples->SampleValue(var, bestFitIdx));
230  shifts = std::make_unique<SystShifts>();
231  for (const auto & syst : samples->Systs())
232  shifts->SetShift(syst, samples->SampleValue(syst, bestFitIdx));
233 
234  //auto bfDir = dynamic_cast<TDirectory*>(inf.Get("bestfit"));
235  //calc.reset(dynamic_cast<osc::OscCalcPMNSOpt*>(LoadFrom<osc::IOscCalc>(dynamic_cast<TDirectory*>(bfDir->Get("osc"))).release()));
236  //shifts = LoadFrom<SystShifts>(dynamic_cast<TDirectory*>(bfDir->Get("systs")));
237  //assert(shifts);
238 
239  pred = test::GetNumuPrediction(*calc, *shifts, extrapolation);
240  }
241 
242  if (!drawPlots)
243  return;
244 
245  // draw the contour in oscillation parameter space overlaid on the LL surface
246  TCanvas c;
247  BayesianSurface surf = samples->MarginalizeTo(&fitSsqTh23_UniformPriorSsqTh23, 30, .3, .7,
248  &fitDmSq32Scaled_UniformPrior, 30, 2.2, 2.8);
249  surf.Draw();
250  surf.DrawContour(surf.QuantileSurface(Quantile::kGaussian1Sigma), 7, kGreen+2); // dashed
251  surf.DrawBestFit(kGray);
252  TMarker marker(util::sqr(sin(test::MOCKDATA_TH23)), test::MOCKDATA_DM32*1e3, kFullStar);
253  marker.SetMarkerColor(kMagenta);
254  marker.SetMarkerSize(3);
255  marker.Draw();
256  c.SaveAs(test::FullFilename(dirPrefix, "test_stanfit_systs_surface_contour.png").c_str());
257 
258  // draw the marginals and the pulls
259  for (const auto & fitVarPtr : fitVars)
260  {
261  c.Clear();
262  Bayesian1DMarginal marg = samples->MarginalizeTo(fitVarPtr);
263  auto h = marg.ToTH1(Binning::Simple(50, samples->MinValue(fitVarPtr), samples->MaxValue(fitVarPtr)));
264  h.Draw("hist");
265  c.SaveAs(Form(test::FullFilename(dirPrefix, "test_stanfit_systs_marg_%s.png").c_str(), fitVarPtr->ShortName().c_str()));
266  }
267  for (const auto & systPtr : shifts->ActiveSysts())
268  {
269  c.Clear();
270  Bayesian1DMarginal marg = samples->MarginalizeTo(systPtr);
271  auto h = marg.ToTH1(Binning::Simple(50, samples->MinValue(systPtr), samples->MaxValue(systPtr)));
272  h.Draw("hist");
273  c.SaveAs(Form(test::FullFilename(dirPrefix, "test_stanfit_systs_marg_%s.png").c_str(), systPtr->ShortName().c_str()));
274  }
275  // 2D syst space
276  auto systs = shifts->ActiveSysts();
277  for (const auto & systPtr1 : systs)
278  {
279  for (auto itSyst2 = systs.rbegin(); itSyst2 != systs.rend(); itSyst2++)
280  {
281  const ISyst* systPtr2 = *itSyst2;
282  if (systPtr2 == systPtr1)
283  break;
284 
285  c.Clear();
286  BayesianSurface marg = samples->MarginalizeTo(systPtr1, 50, samples->MinValue(systPtr1), samples->MaxValue(systPtr1),
287  systPtr2, 50, samples->MinValue(systPtr2), samples->MaxValue(systPtr2));
288  auto h2 = marg.ToTH2();
289  h2->Draw("colz");
290  std::string outf = Form(test::FullFilename(dirPrefix, "test_stanfit_systs_marg_%s-%s.png").c_str(),
291  systPtr1->ShortName().c_str(),
292  systPtr2->ShortName().c_str());
293  c.SaveAs(outf.c_str());
294  }
295  }
296 
297  // draw a comparison of the fake data and the best fit prediction
298  c.Clear();
299  // calc we passed to the surface now contains best fit params
300  DataMCComparison(*fakeData, pred.get(), calc.get(), *shifts, kBinDensity);
301  c.SaveAs(test::FullFilename(dirPrefix, "test_stanfit_systs_bestfitpred.png").c_str());
302 
303 
304  // show that the pulls were what we expect
305  TH1D h_fittedPulls("fitted_pulls", ";Systematic;Pull (#sigma)", shifts->ActiveSysts().size(), 0, shifts->ActiveSysts().size());
306  TH1D h_truePulls("true_pulls", ";Systematic;Pull (#sigma)", shifts->ActiveSysts().size(), 0, shifts->ActiveSysts().size());
307  std::size_t systIdx = 0;
308  double maxShift = 0;
309  for (const auto & syst : shifts->ActiveSysts())
310  {
311  h_fittedPulls.SetBinContent(++systIdx, shifts->GetShift(syst));
312  maxShift = std::max(maxShift, std::abs(shifts->GetShift(syst)));
313  h_truePulls.SetBinContent(systIdx, systTruePulls->GetShift(syst));
314  maxShift = std::max(maxShift, std::abs(systTruePulls->GetShift(syst)));
315  for (auto & h : {&h_fittedPulls, &h_truePulls})
316  h->GetXaxis()->SetBinLabel(systIdx, syst->ShortName().c_str());
317  }
318  maxShift = std::max(2., 1.2*maxShift);
319  h_truePulls.GetYaxis()->SetRangeUser(-maxShift, maxShift);
320  c.Clear();
321  h_truePulls.Draw("hist x");
322  h_fittedPulls.SetMarkerStyle(kFullCircle);
323  h_fittedPulls.Draw("p same");
324  TLegend leg(0.7, 0.7, 0.9, 0.9);
325  leg.SetFillStyle(0);
326  leg.SetBorderSize(0);
327  leg.AddEntry(&h_truePulls, "True", "l");
328  leg.AddEntry(&h_fittedPulls, "Fitted", "p");
329  leg.Draw();
330  c.SaveAs(test::FullFilename(dirPrefix, "test_stanfit_syst_pulls.png").c_str());
331 
332 }
T max(const caf::Proxy< T > &a, T b)
virtual void SetL(double L)=0
Cuts and Vars for the 2020 FD DiF Study.
Definition: vars.h:6
TFile * inf
Definition: drawXsec.C:1
std::string FullFilename(const std::string &dir, std::string file)
std::vector< SystGroupDef > systs
Definition: syst_header.h:385
void SaveTo(const osc::IOscCalc &x, TDirectory *dir, const std::string &name)
void test_stanfit_withsysts(bool loadSamplesFromFile=true, bool saveSamplesToFile=false, bool drawPlots=true, bool extrapolation=false, std::string dirPrefix=".", std::string samplesFilename=test::SAVED_SAMPLES_FILE, std::string preds="")
virtual void SetDmsq21(const T &dmsq21)=0
virtual const std::string & ShortName() const final
The name printed out to the screen.
Definition: ISyst.h:27
void ExtractSamples(MCMCSamples &samples, MemoryTupleWriter::WhichSamples ws=MemoryTupleWriter::WhichSamples::kPostWarmup)
Extract the MCMC samples from the fitter (you own them now).
Definition: StanFitter.h:206
double MOCKDATA_TH23
Simple record of shifts applied to systematic parameters.
Definition: SystShifts.h:20
TH2 * QuantileSurface(Quantile quantile) const
T sqrt(T number)
Definition: d0nt_math.hpp:156
Configuration parameters for the Stan MCMC fitter, bundled up together for easy storage and parameter...
Definition: StanConfig.h:6
Divide bin contents by bin widths.
Definition: Utilities.h:45
virtual void SetTh13(const T &th13)=0
const std::string SAVED_SAMPLES_FILE
OStream cerr
Definition: OStream.cxx:7
Verbosity verbosity
How verbose do you want me to be?
Definition: StanConfig.h:89
osc::OscCalcDumb calc
T sqr(T x)
More efficient square function than pow(x,2)
Definition: MathUtil.h:23
Encapsulate code to systematically shift a caf::SRProxy.
Definition: ISyst.h:14
virtual void SetDmsq32(const T &dmsq32)=0
float abs(float number)
Definition: d0nt_math.hpp:39
Representation of a spectrum in any variable, with associated POT.
Definition: Spectrum.h:33
void SetStanConfig(const StanConfig &cfg)
Change the config used for Stan. See the StanConfig struct documentation for ideas.
Definition: StanFitter.h:224
void DrawBestFit(Color_t color, Int_t marker=kFullCircle) const
Draw the best fit point.
Definition: ISurface.cxx:32
expt
Definition: demo5.py:34
static std::unique_ptr< SystShifts > LoadFrom(TDirectory *dir, const std::string &name)
Definition: SystShifts.cxx:243
static std::unique_ptr< MCMCSamples > LoadFrom(TDirectory *dir, const std::string &name)
Load from file.
virtual std::unique_ptr< IFitSummary > Fit(osc::IOscCalcAdjustable *seed, SystShifts &bestSysts=junkShifts, const SeedList &seedPts=SeedList(), const std::vector< SystShifts > &systSeedPts={}, Verbosity verb=kVerbose) const override
Definition: StanFitter.cxx:129
Spectrum FakeData(double pot) const
Fake data is a MC spectrum scaled to the POT expected in the data.
Definition: Spectrum.cxx:402
const double MOCKDATA_TH23
void Draw() const
Draw the surface itself.
Definition: ISurface.cxx:19
std::string getenv(std::string const &name)
void DrawContour(TH2 *fc, Style_t style, Color_t color, double minchi=-1)
Definition: ISurface.cxx:44
const ConstrainedFitVarWithPrior fitDmSq32Scaled_UniformPrior
void ResetCalculator(osc::IOscCalcAdjustable &calc)
TFile * outf
Definition: testXsec.C:51
int num_warmup
Number of initial steps in the Markov chain (used to enter the typical set). These are usually discar...
Definition: StanConfig.h:63
const std::vector< std::string > SYSTS_TO_THROW
const ConstrainedFitVarWithPrior fitDmSq32Scaled_UniformPrior & kFitDmSq32Scaled
double MOCKDATA_DM32
std::unique_ptr< IPrediction > LoadFrom< IPrediction >(TDirectory *dir, const std::string &label)
Definition: IPrediction.cxx:28
OStream cout
Definition: OStream.cxx:6
std::vector< double > POT
TH1F * h2
Definition: plot.C:45
const ConstrainedFitVarWithPrior fitSsqTh23_UniformPriorSsqTh23
TH2 * ToTH2(double minchi=-1) const
Definition: ISurface.cxx:161
::xsd::cxx::tree::string< char, simple_type > string
Definition: Database.h:154
virtual void SetRho(double rho)=0
T sin(T number)
Definition: d0nt_math.hpp:132
int max_depth
Depth of the binary tree used by the HMC when taking its leapfrog steps. Note: the number of leapfrog...
Definition: StanConfig.h:70
TDirectory * dir
Definition: macro.C:5
const ConstrainedFitVarWithPrior fitSsqTh23_UniformPriorSsqTh23 & kFitSinSqTheta23
virtual void SetTh23(const T &th23)=0
stan::math::var PriorUniformInFitVar(const stan::math::var &, const osc::IOscCalcAdjustableStan *)
Prior uniform in the variable that is being fitted.
Definition: Priors.cxx:10
exit(0)
TFile * file
Definition: cellShifts.C:17
assert(nhit_max >=nhit_nbins)
TH1D ToTH1(const Binning &bins) const
std::vector< const ISyst * > ActiveSysts() const
Definition: SystShifts.cxx:207
Version of FitVarWithPrior for use with constrained FitVar_StanSupports.
const double MOCKDATA_DM32
TH1 * DataMCComparison(const Spectrum &data, const Spectrum &mc, EBinType bintype)
Definition: Plots.cxx:35
void ResetCalculator(osc::IOscCalcAdjustable &calc)
Float_t e
Definition: plot.C:35
static Binning Simple(int n, double lo, double hi, const std::vector< std::string > &labels={})
Definition: Binning.cxx:38
Fitter type that bolts the Stan fitting tools onto CAFAna.
Definition: StanFitter.h:126
std::unique_ptr< IPrediction > GetNumuPrediction(osc::IOscCalcAdjustable &calc, const SystShifts &systs, bool doExtrap)
int num_samples
Number of steps in the Markov chain retained for analysis (after warmup).
Definition: StanConfig.h:64
virtual void SetTh12(const T &th12)=0
virtual void SetdCP(const T &dCP)=0
T asin(T number)
Definition: d0nt_math.hpp:60
Compare a single data spectrum to the MC + cosmics expectation.
surf
Definition: demo5.py:35