train_dectree_caf.C
Go to the documentation of this file.
1 /*
2  You need to run this macro like:
3  root CAFAna/load_libs.C LEM/load_libs.C train_dectree_caf.C+
4 */
5 
6 #ifdef __CINT__
8 {
9  std::cout << "You need to run this macro like:\n"
10  << "root CAFAna/load_libs.C LEM/load_libs.C train_dectree_caf.C+"
11  << std::endl;
12  exit();
13 }
14 #else
15 
16 const int kNumTreesToTrain = 1000;
17 
19 #include "LEM/func/DecisionTree.h"
21 
22 #include "TCanvas.h"
23 #include "TFile.h"
24 #include "TH2.h"
25 #include "TLine.h"
26 #include "TTree.h"
27 
28 #include <cmath>
29 
30 template<class T> T sqr(T x){return x*x;}
31 
32 double oscW(double trueE, int ccnc, int pdg, int pdgorig)
33 {
34  if(pdg != pdgorig && ccnc == 1) return 0;
35  if(ccnc == 1) return 1;
36 
37  const double sinsq = sqr(sin(1.267*2.35e-3*810/trueE));
38  if(abs(pdg) == 12 && abs(pdgorig) == 14) return .5*.1*sinsq;
39  if(abs(pdg) == 14 && abs(pdgorig) == 12) return .5*.1*sinsq;
40  if(abs(pdg) == 12 && abs(pdgorig) == 12) return 1-.1*sinsq;
41  // TODO: not quite self-consistent with maximal-mixing elsewhere...
42  if(abs(pdg) == 14 && abs(pdgorig) == 14) return 1-.95*sinsq;
43 
44  std::cout << "Unknown neutrino: " << ccnc << " " << pdg << " " << pdgorig << std::endl;
45  return 0;
46 }
47 
48 std::vector<lem::dec::Evt> gTrainEvts, gTestEvts;
49 
51 {
52 public:
54 
55  virtual void HandleRecord(caf::StandardRecord* sr) override
56  {
57  // No truth
58  if(sr->mc.nnu == 0) return;
59 
60  // Filtered out
61  if(sr->sel.lem.pidexp < -.5) return;
62 
64  evt.vars[0] = sr->sel.lem.pidexp;
65  evt.vars[1] = sr->sel.lem.meanyexp;
66  evt.vars[2] = sr->sel.lem.meanqfracexp;
67  evt.vars[3] = sr->sel.lem.energydiffexp;
68  evt.vars[4] = sr->slc.calE;
69  evt.vars[5] = sr->sel.lem.enrichfracexp;
70 
71  evt.ccnc = 1-int(sr->mc.nu[0].iscc);
72  evt.pdg = sr->mc.nu[0].pdg;
73  evt.isSig = (evt.ccnc == 0 && abs(evt.pdg) == 12 && abs(sr->mc.nu[0].pdgorig) == 14);
74 
75  const double trueE = sr->mc.nu[0].E;
76 
77  evt.weight = oscW(trueE, evt.ccnc, evt.pdg, sr->mc.nu[0].pdgorig);
78  if(evt.weight == 0) return;
79 
80  fEvts.push_back(evt);
81  }
82 
83  virtual void HandlePOT() override
84  {
85  std::cout << "Total " << fRunPOT << " POT" << std::endl;
86 
87  for(unsigned int i = 0; i < fEvts.size(); ++i){
88  fEvts[i].weight *= 18e20/fRunPOT;
89  fEvts[i].weight *= 2; // About to split 50/50 between two sets
90 
91  // Rejects events with infs or nans, they blow up the tree
92  bool ok = true;
93  for(int j = 0; j < lem::dec::kNumPIDVars; ++j)
94  if(std::isnan(fEvts[i].vars[j]) ||
95  std::isinf(fEvts[i].vars[j])) ok = false;
96  if(!ok) continue;
97 
98  if(i%2 == 0)
99  gTestEvts.push_back(fEvts[i]);
100  else
101  gTrainEvts.push_back(fEvts[i]);
102  }
103 
104  fEvts.clear();
105  }
106 
107 protected:
108  std::vector<lem::dec::Evt> fEvts;
109 };
110 
112 {
113  // DecTreeLoader loader("/nova/ana/users/bckhouse/caf_lems/eweight_calE/*_nonswap_*.caf.root");
114  // DecTreeLoader loaderSwap("/nova/ana/users/bckhouse/caf_lems/eweight_calE/*_swap_*.caf.root");
115 
116  DecTreeLoader loader("/home/bckhouse/pbs/lem_retrain/out/fardet*_nonswap_*.root");
117  DecTreeLoader loaderSwap("/home/bckhouse/pbs/lem_retrain/out/fardet*_swap_*.root");
118 
119  const std::set<std::string> vars =
120  {"mc.nnu", "mc.nu.iscc", "mc.nu.pdg", "mc.nu.pdgorig", "mc.nu.E",
121  "slc.calE",
122  "sel.lem.pidexp", "sel.lem.meanyexp", "sel.lem.meanqfracexp",
123  "sel.lem.enrichfracexp", "sel.lem.energydiffexp"};
124 
125  loader.RequireVariables(vars);
126  loaderSwap.RequireVariables(vars);
127 
128  loader.Go();
129  loaderSwap.Go();
130 
131  TH1* hSig = new TH1F("", "", 500, -.5, 1.5);
132  TH1* hBkg = new TH1F("", "", 500, -.5, 1.5);
133  TH1* hBeam = new TH1F("", "", 500, -.5, 1.5);
134  TH1* hCC = new TH1F("", "", 500, -.5, 1.5);
135  const unsigned int N = gTrainEvts.size();
136  for(unsigned int n = 0; n < N; ++n){
137  if(gTrainEvts[n].isSig)
138  hSig->Fill(gTrainEvts[n].vars[0], gTrainEvts[n].weight);
139  else{
140  hBkg->Fill(gTrainEvts[n].vars[0], gTrainEvts[n].weight);
141 
142  if(gTrainEvts[n].ccnc == 0 && abs(gTrainEvts[n].pdg) == 12)
143  hBeam->Fill(gTrainEvts[n].vars[0], gTrainEvts[n].weight);
144 
145  if(gTrainEvts[n].ccnc == 0 && abs(gTrainEvts[n].pdg) == 14)
146  hCC->Fill(gTrainEvts[n].vars[0], gTrainEvts[n].weight);
147  }
148  }
149 
150  hSig->SetLineColor(kRed);
151  hBkg->SetLineColor(kBlue);
152  hBeam->SetLineColor(kMagenta);
153  hCC->SetLineColor(kBlack);
154  hBkg->Draw();
155  hSig->Draw("same");
156  hBeam->Draw("same");
157  hCC->Draw("same");
158 
159  double bestFOM = 0;
160  double bestCut = -1;
161 
162  for(int n = 0; n < hSig->GetNbinsX(); ++n){
163  // const double s1 = hSig->Integral(0, n);
164  // const double b1 = hBkg->Integral(0, n);
165 
166  const double s2 = hSig->Integral(n+1, -1);
167  const double b2 = hBkg->Integral(n+1, -1);
168 
169  if(s2+b2 == 0) continue;
170 
171  // const double fom = sqrt(sqr(s1)/(s1+b1)+sqr(s2)/(s2+b2));
172  const double fom = s2/sqrt(s2+b2);
173  // std::cout << fom << std::endl;
174  if(fom > bestFOM){
175  bestFOM = fom;
176  bestCut = hSig->GetXaxis()->GetBinLowEdge(n+1);
177  }
178  }
179 
180  TLine* lin = new TLine(bestCut, 0, bestCut, 20);
181  lin->SetLineWidth(2);
182  lin->SetLineColor(kGreen+2);
183  lin->Draw();
184 
185  std::cout << "Best PIDExp cut: " << bestFOM << std::endl;
186 
187  gPad->Update();
188 
189 
190  // The actual training
191  std::cout << "Training trees..." << std::endl;
192 
194  fst.ToFile("dectree.caf.root");
195  std::cout << "Wrote to file dectree.caf.root" << std::endl;
196  std::cout << "Evaluating..." << std::endl;
197 
198  // Cross check with test sample
199 
200  TH1* hSig2 = new TH1F("", "", 120, -.1, 1.1);
201  TH1* hBkg2 = new TH1F("", "", 120, -.1, 1.1);
202  TH1* hBeam2 = new TH1F("", "", 120, -.1, 1.1);
203  TH1* hCC2 = new TH1F("", "", 120, -.1, 1.1);
204 
205  TH2* h2D = new TH2F("", ";Calorimetric Energy (GeV); PID", 100, 0, 5, 100, 0, 1);
206 
207  for(unsigned int n = 0; n < gTestEvts.size(); ++n){
208  const double pid = fst.Classify(gTestEvts[n]);
209 
210  if(gTestEvts[n].isSig)
211  hSig2->Fill(pid, gTestEvts[n].weight);
212  else
213  hBkg2->Fill(pid, gTestEvts[n].weight);
214 
215  if(gTestEvts[n].ccnc == 0 && abs(gTestEvts[n].pdg) == 12 && !gTestEvts[n].isSig)
216  hBeam2->Fill(pid, gTestEvts[n].weight);
217 
218  if(gTestEvts[n].ccnc == 0 && abs(gTestEvts[n].pdg) == 14)
219  hCC2->Fill(pid, gTestEvts[n].weight);
220 
221  if(gTestEvts[n].isSig)
222  h2D->Fill(gTestEvts[n].vars[4], pid, gTestEvts[n].weight);
223  }
224 
225  new TCanvas;
226  h2D->Draw("colz");
227 
228  new TCanvas;
229  hSig2->SetLineColor(kRed);
230  hBkg2->SetLineColor(kBlue);
231  hBkg2->Draw();
232  hSig2->Draw("same");
233  hBeam2->SetLineColor(kMagenta);
234  hBeam2->Draw("same");
235  hCC2->Draw("same");
236  gPad->Print("mydectree.eps");
237  gPad->Print("mydectree_canv.root");
238 
239  double f2 = 0;
240  for(int n = 0; n < hSig2->GetNbinsX()+2; ++n){
241  const double s = hSig2->GetBinContent(n);
242  if(s == 0) continue;
243  const double b = hBkg2->GetBinContent(n);
244 
245  f2 += sqr(s)/(s+b);
246  }
247  std::cout << "Binned: " << sqrt(f2) << std::endl;
248 
249  double bestcutfom = 0;
250  for(int n = 0; n < hSig2->GetNbinsX()+2; ++n){
251  const double s = hSig2->Integral(n, -1);
252  if(!s) continue;
253  const double b = hBkg2->Integral(n, -1);
254  const double fom = s/sqrt(s+b);
255  if(fom > bestcutfom) bestcutfom = fom;
256  }
257 
258  std::cout << "Best cut FOM: " << bestcutfom << std::endl;
259 }
260 
261 #endif
Decision Tree PID.
std::vector< lem::dec::Evt > gTrainEvts
int isinf(const stan::math::var &a)
Definition: std_isinf.hpp:16
std::vector< lem::dec::Evt > gTestEvts
enum BeamMode kRed
Cuts and Vars for the 2020 FD DiF Study.
Definition: vars.h:6
void ToFile(const std::string &fname)
Write out PID structure to a file.
const int kNumPIDVars
Definition: DecisionTree.h:23
float meanyexp
Hadronic y of matches, "exp".
Definition: SRLem.h:32
const Var weight
T sqrt(T number)
Definition: d0nt_math.hpp:156
double fRunPOT
Crude measure, not including spill cuts.
float energydiffexp
Pot. diff between sig and bkg matches, "exp".
Definition: SRLem.h:45
void abs(TH1 *hist)
void train_dectree_caf()
Float_t f2
double oscW(double trueE, int ccnc, int pdg, int pdgorig)
int isnan(const stan::math::var &a)
Definition: std_isnan.hpp:18
A training or trial event for the decision tree.
Definition: DecisionTree.h:28
double Classify(const Evt &evt) const
Calculate the PID value for evt.
const XML_Char * s
Definition: expat.h:262
"Random forest" of decision trees
Definition: DecisionTree.h:42
float calE
Calorimetric energy of the cluster [GeV].
Definition: SRSlice.h:38
int evt
float pidexp
Fraction of matches that are signal, "exp".
Definition: SRLem.h:27
virtual void HandleRecord(caf::StandardRecord *sr) override
caf::StandardRecord * sr
virtual void Go() override
Load all the registered spectra.
const std::map< std::pair< std::string, std::string >, Variable > vars
const double j
Definition: BetheBloch.cxx:29
const int kNumTreesToTrain
loader
Definition: demo0.py:10
short nnu
Number of neutrinos in nu vector (0 or 1)
Definition: SRTruthBranch.h:37
float enrichfracexp
Fraction of matches that are "enriched", "exp".
Definition: SRLem.h:48
OStream cout
Definition: OStream.cxx:6
virtual void HandlePOT() override
static Forest Train(std::vector< Evt > &trainEvts, unsigned int nTrees, bool parallel=false)
Initial training of the PID.
float meanqfracexp
Fraction of charge matched, "exp".
Definition: SRLem.h:38
The StandardRecord is the primary top-level object in the Common Analysis File trees.
T sin(T number)
Definition: d0nt_math.hpp:132
Collaborates with Spectrum and OscillatableSpectrum to fill spectra from CAF files.
T sqr(T x)
SRIDBranch sel
Selector (PID) branch.
double vars[kNumPIDVars]
Definition: DecisionTree.h:30
exit(0)
const hit & b
Definition: hits.cxx:21
DecTreeLoader(const std::string &fname)
std::vector< lem::dec::Evt > fEvts
SRSlice slc
Slice branch: nhit, extents, time, etc.
SRTruthBranch mc
Truth branch for MC: energy, flavor, etc.
SRLem lem
Output from Library Event Matching (LEM)
Definition: SRIDBranch.h:43
double T
Definition: Xdiff_gwt.C:5
Float_t e
Definition: plot.C:35
loaderSwap
Definition: demo4.py:9
enum BeamMode kGreen
enum BeamMode kBlue
std::vector< SRNeutrino > nu
implemented as a vector to maintain mc.nu structure, i.e. not a pointer, but with 0 or 1 entries...
Definition: SRTruthBranch.h:25
enum BeamMode string