trimvar.C
Go to the documentation of this file.
1 //............................................
2 // This script will be usefull for trimming
3 // caf files to acess some variable for
4 // further Training and Testing using TMVA
5 //
6 // Author: Biswaranjan Behera
7 // email: bbehera@fnal.gov
8 //...........................................
10 #include "CAFAna/Core/Progress.h"
16 #include "CAFAna/Core/Utilities.h"
17 #include "CAFAna/Core/Binning.h"
18 #include "CAFAna/Cuts/Cuts.h"
19 #include "CAFAna/Cuts/TruthCuts.h"
20 #include "CAFAna/Core/Spectrum.h"
23 #include "CAFAna/Core/Var.h"
24 #include "CAFAna/Vars/Vars.h"
25 #include "CAFAna/Core/OscCurve.h"
27 #include "CAFAna/Core/EventList.h"
28 #include "CAFAna/Analysis/Plots.h"
29 #include "CAFAna/Cuts/Cuts.h"
30 #include "CAFAna/Cuts/SpillCuts.h"
31 
33 
34 #include "TLorentzVector.h"
35 #include <cmath>
36 #include <iostream>
37 #include <functional>
38 #include <list>
39 #include <memory>
40 #include <set>
41 #include <string>
42 #include <vector>
43 
44 //From Root
45 #include "TStyle.h"
46 #include "TFile.h"
47 #include "TROOT.h"
48 #include "TTree.h"
49 #include "TVector3.h"
50 #include "TH1.h"
51 
53 
54 using namespace ana;
55 
56 void trimvar(const std::string outputFilename, const std::string setname)
57 {
58 
59  // Nominal RHC MC defname
60  const std::string trainingSet = "muonid_training_sample";
61  const std::string testingSet = "muonid_testing_sample";
62 
63  // const std::string samQuery = "defname: " + defname;
64  std::string samQuery = ""; //"defname: " + defname;
65  if (setname == "train"){
66  cout << "Making Training Trim file from dataset: " << trainingSet << endl;
67  samQuery = "dataset_def_name_newest_snapshot " + trainingSet;
68  }
69  else if (setname == "test"){
70  cout << "Making Testing Trim File from dataset: " << testingSet << endl;
71  samQuery = "dataset_def_name_newest_snapshot " + testingSet;
72  }
73  else{
74  cerr << "Setname should be either \"train\" or \"test\". Exiting." << endl;
75  return;
76  }
77 
78  TFile outputFile(outputFilename.c_str(), "RECREATE");
79 
80  TTree *sigTree = new TTree("sigTree", "Signal Events");
81  TTree *bkgTree = new TTree("bkgTree", "Background Events");
82 
83  //...............................................................................
84 
85  // Six variable for MuonID
86  int fismuon = -5.0;
87  int ismuon = -5.0;
88 
89  float fScatLL = -999.0;
90  float fDedxLL = -999.0;
91  float fAvededxlast10cm = -999.0;
92  float fAvededxlast20cm = -999.0;
93  float fAvededxlast30cm = -999.0;
94  float fAvededxlast40cm = -999.0;
95 
96  //-------------------------------------------------------------------
97 
98  // Define the branches on the Tree
99 
100  // Six Muon PID variable
101  sigTree->Branch("DedxLL", &fDedxLL, "DedxLL/F");
102  sigTree->Branch("ScatLL", &fScatLL, "ScatLL/F");
103  sigTree->Branch("Avededxlast10cm", &fAvededxlast10cm, "Avededxlast10cm/F");
104  sigTree->Branch("Avededxlast20cm", &fAvededxlast20cm, "Avededxlast20cm/F");
105  sigTree->Branch("Avededxlast30cm", &fAvededxlast30cm, "Avededxlast30cm/F");
106  sigTree->Branch("Avededxlast40cm", &fAvededxlast40cm, "Avededxlast40cm/F");
107 
108  bkgTree->Branch("DedxLL", &fDedxLL, "DedxLL/F");
109  bkgTree->Branch("ScatLL", &fScatLL, "ScatLL/F");
110  bkgTree->Branch("Avededxlast10cm", &fAvededxlast10cm, "Avededxlast10cm/F");
111  bkgTree->Branch("Avededxlast20cm", &fAvededxlast20cm, "Avededxlast20cm/F");
112  bkgTree->Branch("Avededxlast30cm", &fAvededxlast30cm, "Avededxlast30cm/F");
113  bkgTree->Branch("Avededxlast40cm", &fAvededxlast40cm, "Avededxlast40cm/F");
114 
115  //---------------------------------------------------------------------
116 
117  // Begin looping over the selected files from the SAM Database.
118 
119  int stride = -1;
120  int offset = -1;
121  int limit = -1;
122 
123  if(getenv("CAFANA_STRIDE")){
124  stride = atoi(getenv("CAFANA_STRIDE"));
125  if(stride > 1 && getenv("CAFANA_OFFSET")){
126  offset = atoi(getenv("CAFANA_OFFSET"));
127  }
128  }
129 
130 
131  SAMQuerySource* filesrc = new SAMQuerySource(samQuery, stride, offset, limit);
132 
133  int Nfiles = filesrc->NFiles();
134 
135  Progress* prog = 0;
136 
137  int fileIdx = -1;
138  bool foundFile = true;
139  TFile* file1 = filesrc->GetNextFile();
140  if(!file1) foundFile = false; // out of files
141  assert(!file1->IsZombie());
142 
143  // Swapped from while loop to do-while, so we don't lose the first file
144  do {
145  //Start with some file management.
146  ++fileIdx;
147 
148  if(!prog) prog = new Progress(TString::Format("Filling signal and background trees from %d files", Nfiles).Data());
149 
150  if(Nfiles > 1 && prog) prog->SetProgress((fileIdx+1.)/Nfiles);
151 
152  // Define the recTree object
153  TTree *recTree = (TTree*)file1->Get("recTree");
154 
155  TTree *spillTree = (TTree*)file1->Get("spillTree");
156 
157  // Every time a GetEntry(i) is done, the data in the tree is copied to this object
158  caf::StandardRecord* sr = 0;
159  recTree->SetBranchAddress("rec", &sr);
160 
161  caf::SRSpill* sp = 0;
162  spillTree->SetBranchAddress("spill", &sp);
163 
164  //--------------------------------------------------------
165  // Standard Spill Cuts
166  //--------------------------------------------------------
167 
168  bool ismc = false;
169  if(sp->ismc) ismc = true;
170 
171  //cosmic trigger is 02,
172  //skip beam cuts
173  bool istrigger =false;
174  if(sp->trigger == 2) istrigger = true;
175 
176  // docdb 12474 slide 11
177  bool goodspill = false;
178  if(sp->spilltimesec==0 && sp->deltaspilltimensec==0 && sp->widthx==0)
179  goodspill = bool(sp->isgoodspill); // decafs have spill info removed
180  // and logic applied in decaf step
181  // so isgoodspill is equivalent to
182  // tightbeamqualitycuts
183 
184  if(std::abs(sp->deltaspilltimensec) > 0.5e9) continue;
185 
186  if(sp->spillpot < 2e12) continue;
187 
188  if(sp->hornI < -202 || sp->hornI > -198) continue;
189 
190  if(sp->posx < -2.00 || sp->posx > +2.00) continue;
191 
192  if(sp->posy < -2.00 || sp->posy > +2.00) continue;
193 
194  if(sp->widthx < 0.57 || sp->widthx > 1.58) continue;
195 
196  if(sp->widthy < 0.57 || sp->widthy > 1.58) continue;
197 
198  bool gdspill = false;
199  if(sp->dcmedgematchfrac==0 && sp->fracdcm3hits==0 && sp->nmissingdcmslg==0)
200  gdspill = bool(sp->isgoodspill);
201 
202  // fracdcm3hits > 0.7 for releases before 14-11-25,
203  // ND specific cut in cases where lights were left on
204  // nmissingdcms counts 0 hit dcms. No livegeo.
205  if(sp->det == caf::kNEARDET &&
206  (sp->fracdcm3hits > 0.45 ||
207  sp->nmissingdcms > 0) )
208  continue;
209 
210  // cut out events which have the isEventIncomplete DAQ header flag
211  // set.
212  if ( sp->eventincomplete ) continue;
213 
214  // nmissingdcmslg counts 0 hit dcms among those
215  // that livegeo think are live.
216  if( sp->det == caf::kFARDET &&
217  sp->nmissingdcmslg > 0 )
218  continue;
219 
220 
221  // FD specific cut to detect out of sync dcms.
222  // Only works if cosmics are overlaid. so we
223  // don't apply it on MC by default, although
224  // I assume you could safely apply it on cry.
225  if( sp->det == caf::kFARDET &&
226  !sp->ismc &&
227  sp->dcmedgematchfrac <= 0.2)
228  continue;
229 
230 
231  //--------------------------------------------------------
232 
233  //Looping over entries
234  Int_t numberOfEntries = recTree->GetEntriesFast();
235 
236  int i = 0;
237  //Begin filling the branches with variables and cuts applied.
238  for (Int_t event = 0; event < numberOfEntries; ++event) {
239  recTree->GetEntry(event);
240 
241  if(sr->vtx.nelastic == 0) continue;
242  if(sr->mc.nnu < 1) continue;
243  if(sr->trk.kalman.ntracks < 1) continue;
244 
245  int nkals = sr->trk.kalman.ntracks;
246 
247  //bool isquality = false;
248 
249  // bool isContained = false;
250  bool isquality = false;
251  bool iscontprong = true;
252  // bool isfiducial = false;
253 
254  for( unsigned int i = 0; i < sr->vtx.elastic[0].fuzzyk.nshwlid; ++i ) {
255  TVector3 start = sr->vtx.elastic[0].fuzzyk.png[i].shwlid.start;
256  TVector3 stop = sr->vtx.elastic[0].fuzzyk.png[i].shwlid.stop;
257  if( std::min( start.X(), stop.X() ) < -180.0 ||
258  std::max( start.X(), stop.X() ) > 180.0 ||
259  std::min( start.Y(), stop.Y() ) < -180.0 ||
260  std::max( start.Y(), stop.Y() ) > 180.0 ||
261  std::min( start.Z(), stop.Z() ) < 20.0 ||
262  std::max( start.Z(), stop.Z() ) > 1525.0 ){
263  iscontprong = false;
264  }
265  }
266 
267 
268  if(sr->trk.kalman.ntracks > 0 &&
269  sr->slc.nhit > 20 &&
270  sr->slc.ncontplanes > 4)
271  isquality = true;
272  else
273  isquality = false;
274 
275  if (isquality && iscontprong){
276  i++;
277 
278  if( nkals < 1 ) continue;
279 
280  // loop over track
281  for (int itrk = 0; itrk < nkals; ++itrk){
282 
283  if( sr->trk.kalman.tracks[itrk].rempid < 0 ) continue;
284 
285  fDedxLL = sr->trk.kalman.tracks[itrk].dedxllh2017;
286 
287  fScatLL = sr->trk.kalman.tracks[itrk].scatllh2017;
288 
289  if (sr->trk.kalman.tracks[itrk].avedEdxlast10cm == -5) continue;
290  if (sr->trk.kalman.tracks[itrk].avedEdxlast30cm == -5) continue;
291  if (sr->trk.kalman.tracks[itrk].avedEdxlast20cm == -5) continue;
292  if (sr->trk.kalman.tracks[itrk].avedEdxlast40cm == -5) continue;
293 
294  if (sr->trk.kalman.tracks[itrk].avedEdxlast10cm > 30)
295  fAvededxlast10cm = 30;
296  else
297  fAvededxlast10cm = sr->trk.kalman.tracks[itrk].avedEdxlast10cm;
298 
299  if (sr->trk.kalman.tracks[itrk].avedEdxlast20cm > 30)
300  fAvededxlast20cm = 30;
301  else
302  fAvededxlast20cm = sr->trk.kalman.tracks[itrk].avedEdxlast20cm;
303 
304  if (sr->trk.kalman.tracks[itrk].avedEdxlast30cm > 30)
305  fAvededxlast30cm = 30;
306  else
307  fAvededxlast30cm = sr->trk.kalman.tracks[itrk].avedEdxlast30cm;
308 
309  if (sr->trk.kalman.tracks[itrk].avedEdxlast40cm > 30)
310  fAvededxlast40cm = 30;
311  else
312  fAvededxlast40cm = sr->trk.kalman.tracks[itrk].avedEdxlast40cm;
313 
314  // If true muon, fill the signal tree. Otherwise fill in into the bkgd tree.
315  if(abs(sr->trk.kalman.tracks[itrk].truth.pdg) == 13)
316  sigTree->Fill();
317  else
318  bkgTree->Fill();
319 
320  } // end of trk loop
321 
322  }
323  else continue;
324  // end of pre selected if statement
325 
326 
327  } // end of event loop
328 
329  } while( (file1 =(TFile*)filesrc->GetNextFile()) );
330  // end of while loop of genie files
331 
332  if(prog){
333  prog->Done();
334  delete prog;
335  }
336 
337  outputFile.Write();
338  outputFile.Close();
339 
340  std::cout << " Congratulation! You did it succesfully..." << std::endl;
341 
342 } // end of void
343 
344 
Near Detector underground.
Definition: SREnums.h:10
T max(const caf::Proxy< T > &a, T b)
unsigned long int spilltimesec
Spill time in seconds [s].
Definition: SRSpill.h:33
unsigned int nmissingdcmslg
# of DCMS with 63 or more bad FEBs (LiveGeometry, subset of baddcmslg)
Definition: SRSpill.h:68
Far Detector at Ash River.
Definition: SREnums.h:11
Cuts and Vars for the 2020 FD DiF Study.
Definition: vars.h:6
void trimvar(const std::string outputFilename, const std::string setname)
Definition: trimvar.C:56
size_t ntracks
Definition: SRKalman.h:23
std::vector< SRFuzzyKProng > png
Vector of 3D prong objects.
Definition: SRFuzzyK.h:19
OStream cerr
Definition: OStream.cxx:7
void abs(TH1 *hist)
Det_t det
Detector, ND = 1, FD = 2, NDOS = 3.
Definition: SRSpill.h:29
unsigned int nmissingdcms
# of missing DCMs
Definition: SRSpill.h:64
float posy
y position on target
Definition: SRSpill.h:52
float dcmedgematchfrac
How many hits at the DCM edge are matched in the adjacent DCM?
Definition: SRSpill.h:74
float abs(float number)
Definition: d0nt_math.hpp:39
float fracdcm3hits
fraction of DCM3 hits in horizontal modules
Definition: SRSpill.h:65
float hornI
Horn current.
Definition: SRSpill.h:42
SRKalman kalman
Tracks produced by KalmanTrack.
Definition: SRTrackBranch.h:24
unsigned int nshwlid
number of shwlid showers - either 0 or number of 3d prongs
Definition: SRFuzzyK.h:24
int NFiles() const override
May return -1 indicating the number of files is not known.
std::string getenv(std::string const &name)
bool isgoodspill
Was the pot for a spill good? (only applicable to data, default true)
Definition: SRSpill.h:32
float widthy
Spill width in y dimension.
Definition: SRSpill.h:54
caf::StandardRecord * sr
File source based on a SAM query or dataset (definition)
float widthx
Spill width in x dimension.
Definition: SRSpill.h:53
short nnu
Number of neutrinos in nu vector (0 or 1)
Definition: SRTruthBranch.h:37
unsigned int nhit
number of hits
Definition: SRSlice.h:22
OStream cout
Definition: OStream.cxx:6
The StandardRecord is the primary top-level object in the Common Analysis File trees.
signed long long int deltaspilltimensec
Delta time [ns].
Definition: SRSpill.h:37
void SetProgress(double frac)
Update the progress fraction between zero and one.
Definition: Progress.cxx:41
bool eventincomplete
Data Quality DAQ Header information.
Definition: SRSpill.h:79
SRElastic elastic
Single vertex found by Elastic Arms.
virtual TFile * GetNextFile() override
Returns the next file in sequence, ready for reading.
assert(nhit_max >=nhit_nbins)
A simple ascii-art progress bar.
Definition: Progress.h:9
SRSlice slc
Slice branch: nhit, extents, time, etc.
SRTruthBranch mc
Truth branch for MC: energy, flavor, etc.
SRFuzzyK fuzzyk
Primary 3D prong object.
Definition: SRElastic.h:44
void Format(TGraph *gr, int lcol, int lsty, int lwid, int mcol, int msty, double msiz)
Definition: Style.cxx:154
float spillpot
Definition: SRSpill.h:38
unsigned char trigger
The trigger type from RawTrigger::fTriggerMask_TriggerType.
Definition: SRSpill.h:98
T min(const caf::Proxy< T > &a, T b)
unsigned int ncontplanes
number of continuous planes
Definition: SRSlice.h:25
SRTrackBranch trk
Track branch: nhit, len, etc.
void Done()
Call this when action is completed.
Definition: Progress.cxx:90
bool ismc
data or MC? True if MC
Definition: SRSpill.h:30
std::vector< SRKalmanTrack > tracks
3D Tracks produced by KalmanTrack
Definition: SRKalman.h:16
float posx
x position on target
Definition: SRSpill.h:51
SRVertexBranch vtx
Vertex branch: location, time, etc.
enum BeamMode string