SpectrumLoader.cxx
Go to the documentation of this file.
2 
3 #include "CAFAna/Core/Progress.h"
6 #include "CAFAna/Core/Spectrum.h"
8 
10 
11 #include <cassert>
12 #include <iostream>
13 #include <cmath>
14 
15 #include "TFile.h"
16 #include "TH2.h"
17 #include "TTree.h"
18 
19 namespace ana
20 {
21  //----------------------------------------------------------------------
23  : SpectrumLoaderBase(wildcard, src)
24  {
25  }
26 
27  //----------------------------------------------------------------------
28  SpectrumLoader::SpectrumLoader(const std::vector<std::string>& fnames,
29  DataSource src)
30  : SpectrumLoaderBase(fnames, src)
31  {
32  }
33 
34  //----------------------------------------------------------------------
36  : SpectrumLoaderBase(src)
37  {
38  }
39 
40  //----------------------------------------------------------------------
42  DataSource src,
43  int fileLimit)
44  {
46  ret.fSource = src;
47  ret.fWildcard = "project "+proj;
48  ret.fFileSource = std::unique_ptr<IFileSource>(new SAMProjectSource(proj, fileLimit));
49  return ret;
50  }
51 
52  //----------------------------------------------------------------------
54  {
55  }
56 
57  struct CompareByID
58  {
59  bool operator()(const Cut& a, const Cut& b)
60  {
61  return a.ID() < b.ID();
62  }
63  };
64 
65  //----------------------------------------------------------------------
67  {
68  if(fGone){
69  std::cerr << "Error: can only call Go() once on a SpectrumLoader" << std::endl;
70  abort();
71  }
72  fGone = true;
73 
74  // Find all the unique cuts
75  std::set<Cut, CompareByID> cuts;
76  for(auto& shiftdef: fHistDefs)
77  for(auto& cutdef: shiftdef.second)
78  cuts.insert(cutdef.first);
79  for(const Cut& cut: cuts) fAllCuts.push_back(cut);
80 
81  fLivetimeByCut.resize(fAllCuts.size());
82  fPOTByCut.resize(fAllCuts.size());
83 
84 
85  const int Nfiles = NFiles();
86 
87  Progress* prog = 0;
88 
90 
91  int fileIdx = -1;
92  while(TFile* f = GetNextFile()){
93  ++fileIdx;
94 
95  if(Nfiles >= 0 && !prog){
96  std::string sum = TString::Format("Filling %lu spectra", fHistDefs.TotalSize()).Data();
97  if(Nfiles > 1){
98  if(!fSpillHistDefs.empty()){
99  sum += TString::Format(", %lu spillTree spectra", fSpillHistDefs.size()).Data();
100  }
101  if(!fNuHistDefs.empty()){
102  sum += TString::Format(", %lu nuTree spectra", fNuHistDefs.size()).Data();
103  }
104  }
105 
106  sum += TString::Format(" from %d files matching '%s'", Nfiles, fWildcard.c_str()).Data();
107  prog = new Progress(sum);
108  }
109 
110  HandleFile(f, Nfiles == 1 ? prog : 0);
111 
112  if(Nfiles > 1 && prog) prog->SetProgress((fileIdx+1.)/Nfiles);
113  } // end for fileIdx
114 
115  StoreExposures();
116 
117  if(prog){
118  prog->Done();
119  delete prog;
120  }
121 
122  ReportExposures();
123 
124  fHistDefs.RemoveLoader(this);
125  fHistDefs.Clear();
126  }
127 
128  //----------------------------------------------------------------------
130  {
131  assert(!f->IsZombie());
132 
133  // Test for flat (trees are inside a directory) or nested (tree is at top
134  // level) cases.
135  TDirectory* dir = 0;
136  TTree* tr = 0;
137 
138  TObject* obj = f->Get("recTree");
139  assert(obj); // Must exist in one form or the other
140 
141  // It might seem like you could use GetObject() to do the type-checking
142  // directly, but that method seems to have a memory leak in the case that
143  // the types don't match.
144  if(obj->ClassName() == std::string("TTree")){
145  // nested case
146  tr = (TTree*)obj;
147  }
148  else{
149  // FlatCAF case
150  dir = (TDirectory*)obj;
151  tr = (TTree*)dir->Get("rec");
152  assert(tr);
153  }
154 
155  long n;
156  caf::SRProxy sr(dir, tr, "rec", n, 0);
157 
158  // FloatingExceptionOnNaN fpnan;
159 
160  const long Nentries = tr->GetEntries();
161  for(n = 0; n < Nentries; ++n){
162  if(!dir) tr->LoadTree(n); // nested mode
163 
164  HandleRecord(&sr);
165 
166  if(prog && n%100 == 0) prog->SetProgress(double(n)/Nentries);
167  } // end for n
168  }
169 
170  //----------------------------------------------------------------------
171  /// Helper for \ref SpectrumLoader::HandleRecord
172  template<class T, class U> class CutVarCache
173  {
174  public:
175  CutVarCache() : fVals(U::MaxID()+1), fValsSet(U::MaxID()+1, false) {}
176 
177  inline T Get(const U& var, const caf::SRProxy* sr)
178  {
179  const unsigned int id = var.ID();
180 
181  if(fValsSet[id]){
182  return fVals[id];
183  }
184  else{
185  const T val = var(sr);
186  fVals[id] = val;
187  fValsSet[id] = true;
188  return val;
189  }
190  }
191 
192  protected:
193  // Seems to be faster to do this than [unordered_]map
194  std::vector<T> fVals;
195  std::vector<bool> fValsSet;
196  };
197 
198  //----------------------------------------------------------------------
200  {
201  // First up, check this spill passes the global quality cuts
202  if(fSpillCut && !(*fSpillCut)(&sr->spill)) return;
203 
204  // Some shifts only adjust the weight, so they're effectively nominal, but
205  // aren't grouped with the other nominal histograms. Keep track of the
206  // results for nominals in these caches to speed those systs up.
207  CutVarCache<bool, Cut> nomCutCache;
208  CutVarCache<double, Var> nomWeiCache;
209  CutVarCache<double, Var> nomVarCache;
210 
211  // These aren't set in SRNeutrino, but we need them for flux systs, so copy
212  // them over from SRSpill.
213  //
214  // TODO TODO - remove this hack (and the equivalent in
215  // SpectrumLoaderBase.cxx) once they're available in the CAFs
216  if(sr->mc.nnu > 0){
217  sr->mc.nu[0].isFHC = bool(sr->spill.isFHC);
218  sr->mc.nu[0].is0HC = bool(sr->spill.is0HC);
219  sr->mc.nu[0].isRHC = bool(sr->spill.isRHC);
220  sr->mc.nu[0].det = caf::Det_t(sr->hdr.det);
221  }
222 
223  for(auto& shiftdef: fHistDefs){
224  caf::SRProxySystController::BeginTransaction();
225 
226  const SystShifts& shift = shiftdef.first;
227 
228  // Need to provide a clean slate for each new set of systematic shifts to
229  // work from. Copying the whole StandardRecord is pretty expensive, so
230  // modify it in place and revert it afterwards.
231 
232  bool shifted = false;
233 
234  double systWeight = 1;
235  // Can special-case nominal to not pay cost of Shift()
236  if(!shift.IsNominal()){
237  shift.Shift(sr, systWeight);
238  // If there were only weighting systs applied then the cached nominal
239  // values are still valid.
240  shifted = caf::SRProxySystController::AnyShifted();
241  }
242 
243  for(auto& cutdef: shiftdef.second){
244  const Cut& cut = cutdef.first;
245 
246  const bool pass = shifted ? cut(sr) : nomCutCache.Get(cut, sr);
247  // Cut failed, skip all the histograms that depended on it
248  if(!pass) continue;
249 
250  for(auto& weidef: cutdef.second){
251  const Var& weivar = weidef.first;
252 
253  double wei = shifted ? weivar(sr) : nomWeiCache.Get(weivar, sr);
254 
255  if(wei < 0){
256  std::cerr << "Negative weight " << wei
257  << " returned from Var";
258  std::cerr << std::endl;
259  abort();
260  }
261 
262  wei *= systWeight;
263  if(wei == 0) continue;
264 
265  for(auto& vardef: weidef.second){
266  if(vardef.first.IsMulti()){
267  for(double val: vardef.first.GetMultiVar()(sr)){
268  for(Spectrum* s: vardef.second.spects)
269  s->Fill(val, wei);
270  }
271  continue;
272  }
273 
274  const Var& var = vardef.first.GetVar();
275 
276  const double val = shifted ? var(sr) : nomVarCache.Get(var, sr);
277 
278  if(std::isnan(val) || std::isinf(val)){
279  std::cerr << "Warning: Bad value: " << val
280  << " returned from a Var. The input variable(s) could "
281  << "be NaN in the CAF, or perhaps your "
282  << "Var code computed 0/0?";
283  std::cout << ". Not filling into this histogram for this slice." << std::endl;
284  continue;
285  }
286 
287  for(Spectrum* s: vardef.second.spects) s->Fill(val, wei);
288 
289  for(ReweightableSpectrum* rw: vardef.second.rwSpects){
290  const double yval = rw->ReweightVar()(sr);
291 
292  if(std::isnan(yval) || std::isinf(yval)){
293  std::cerr << "Warning: Bad value: " << yval
294  << " for reweighting Var";
295  std::cout << ". Not filling into histogram." << std::endl;
296  continue;
297  }
298 
299  if(rw->fHistD) rw->fHistD->Fill(val, yval, wei);
300  if(rw->fHistF) rw->fHistF->Fill(val, yval, wei);
301  } // end for rw
302  } // end for vardef
303  } // end for weidef
304  } // end for cutdef
305 
306  // Return StandardRecord to its unshifted form ready for the next
307  // histogram.
308  caf::SRProxySystController::Rollback();
309  } // end for shiftdef
310  }
311 
312  //----------------------------------------------------------------------
314  {
315  // The POT member variables we use here were filled as part of
316  // SpectrumLoaderBase::GetNextFile() as we looped through the input files.
317 
318  // Let's just assume no-one is using the Cut::POT() function yet, so this
319  // printout remains relevant...
320 
321  if(fSpillCut && fRunPOT > 0){
322  std::cout << "Total " << fRunPOT << " POT, of which "
323  << fPOT << " (" << int(100*fPOT/fRunPOT+.5)
324  << "%) passes quality cuts." << std::endl;
325  }
326  else{
327  std::cout << "Total " << fRunPOT << " POT." << std::endl;
328  fPOT = fRunPOT;
329  }
330 
331  std::vector<Spectrum*> spectra;
332  fHistDefs.GetSpectra(spectra);
333  std::vector<ReweightableSpectrum*> rwSpectra;
334  fHistDefs.GetReweightableSpectra(rwSpectra);
335  // Normally using a floating-point number as a key is a bad idea. But this
336  // seems to work, and if it goes wrong all it breaks is this printout.
337  std::map<double, int> livetimes;
338  for(Spectrum* s: spectra) ++livetimes[s->Livetime()];
339  for(ReweightableSpectrum* rs: rwSpectra) ++livetimes[rs->fLivetime];
340 
341  if(livetimes.size() == 1){
342  const double t = livetimes.begin()->first;
343  if(t > 0)
344  std::cout << "Total livetime " << t << " seconds." << std::endl;
345  else
346  std::cout << "Unknown livetime." << std::endl;
347  }
348  else{
349  std::cout << "Livetimes:" << std::endl;
350  for(auto it: livetimes){
351  std::cout << " " << it.second << " spectra: ";
352  if(it.first > 0)
353  std::cout << it.first << " seconds" << std::endl;
354  else
355  std::cout << "unknown" << std::endl;
356  }
357  }
358  }
359 
360  //----------------------------------------------------------------------
362  {
363  const unsigned int N = fAllCuts.size();
364  for(unsigned int i = 0; i < N; ++i){
365  const Cut& cut = fAllCuts[i];
366 
367  // FD MC is 1 neutrino-per-event, so the livetime count will be all
368  // screwed up there. Just go with 0, ie "unknown".
369  double livetime = 0;
370  if(spill->det != caf::kFARDET || !spill->ismc || fSource == kCosmic)
371  livetime = cut.Livetime(spill);
372 
373  // Not implemented -> no good way to count
374  if(livetime < 0){
375  if(spill->det == caf::kFARDET && !spill->ismc){
376  // People might well expect FD data to have a livetime, flag it
377  // so that we can make a warning about it at the end.
378  fLivetimeByCut[i] = -1;
379  }
380  // Otherwise the user probably doesn't care. Just don't sum it into the
381  // livetime
382  }
383  else{
384  // If a previous spill had no livetime and set the flag, and this one
385  // then does have good livetime, we're in big trouble.
386  assert(fLivetimeByCut[i] >= 0);
387 
388  // Livetime was fine, accumulate it
390  }
391 
392 
393  double pot = cut.POT(spill);
394  // Not implemented -> fall back to usual counting
395  if(pot < 0) pot = spill->spillpot;
396 
397  fPOTByCut[i] += pot;
398  }
399  }
400 
401  //----------------------------------------------------------------------
403  {
404  std::map<int, double> livetime;
405  std::map<int, double> pot;
406 
407  for(unsigned int i = 0; i < fAllCuts.size(); ++i){
408  const int id = fAllCuts[i].ID();
409  if(fLivetimeByCut[i] < 0){
410  fLivetimeByCut[i] = 0;
411  std::cout << "WARNING: no way to compute livetime for FD data spectrum. If you want a livetime you need to be applying one of the cuts from TimingCuts.h or similar. You probably should be anyway to remove bad data near the spill ends." << std::endl;
412  }
413  livetime.emplace(id, fLivetimeByCut[i]);
414  pot.emplace(id, fPOTByCut[i]);
415  }
416 
417 
418  for(auto& shiftdef: fHistDefs){
419  for(auto& cutdef: shiftdef.second){
420  const Cut& cut = cutdef.first;
421  const int id = cut.ID();
422 
423  for(auto& weidef: cutdef.second){
424  for(auto& vardef: weidef.second){
425  for(Spectrum* s: vardef.second.spects){
426  s->fPOT += pot[id];
427  s->fLivetime += livetime[id];
428  }
429 
430  for(ReweightableSpectrum* rw: vardef.second.rwSpects){
431  rw->fPOT += pot[id];
432  rw->fLivetime += livetime[id];
433  }
434  }
435  }
436  }
437  }
438  }
439 
440 } // namespace
int ID() const
Cuts with the same definition will have the same ID.
Definition: Cut.h:55
::xsd::cxx::tree::id< char, ncname > id
Definition: Database.h:165
caf::Proxy< caf::SRSpill > spill
Definition: SRProxy.h:1986
Det_t
Which NOvA detector?
Definition: SREnums.h:7
const Var & ReweightVar() const
The variable that will be used to fill the y-axis.
void StoreExposures()
Save results of AccumulateExposures into the individual spectra.
std::vector< double > fLivetimeByCut
Indexing matches fAllCuts.
int isinf(const stan::math::var &a)
Definition: std_isinf.hpp:16
caf::Proxy< bool > is0HC
Definition: SRProxy.h:1282
Far Detector at Ash River.
Definition: SREnums.h:11
Cuts and Vars for the 2020 FD DiF Study.
Definition: CutFlow_header.h:5
set< int >::iterator it
double POT(const caf::SRSpillProxy *spill) const
Could be useful for cuts on specific batches?
Definition: Cut.h:45
TFile * GetNextFile()
Forwards to fFileSource but also accumulates POT and livetime.
bool IsNominal() const
Definition: SystShifts.h:43
bool fGone
Has Go() been called? Can&#39;t add more histograms after that.
Simple record of shifts applied to systematic parameters.
Definition: SystShifts.h:20
caf::Proxy< caf::SRHeader > hdr
Definition: SRProxy.h:1980
SpectrumLoader(const std::string &wildcard, DataSource src=kBeam)
Proxy for caf::StandardRecord.
Definition: SRProxy.h:1969
caf::Proxy< std::vector< caf::SRNeutrino > > nu
Definition: SRProxy.h:569
vector< vector< double > > clear
Spectrum with the value of a second variable, allowing for reweighting
double fRunPOT
Crude measure, not including spill cuts.
Helper for SpectrumLoader::HandleRecord.
void AccumulateExposures(const caf::SRSpillProxy *spill) override
OStream cerr
Definition: OStream.cxx:7
caf::Proxy< short int > nnu
Definition: SRProxy.h:568
double fPOT
Definition: Spectrum.h:319
void Fill(double x, double w=1)
Definition: Spectrum.cxx:785
virtual void ReportExposures()
Prints POT/livetime info for all spectra.
std::vector< bool > fValsSet
T Get(const U &var, const caf::SRProxy *sr)
Representation of a spectrum in any variable, with associated POT.
Definition: Spectrum.h:37
int isnan(const stan::math::var &a)
Definition: std_isnan.hpp:18
std::unique_ptr< SpillCut > fSpillCut
Cut applied to the spill branch for every event.
std::vector< double > fPOTByCut
Indexing matches fAllCuts.
caf::Proxy< bool > isFHC
Definition: SRProxy.h:1283
const XML_Char * s
Definition: expat.h:262
double Livetime(const caf::SRSpillProxy *spill) const
Provide a Livetime function if your cut is a timing cut etc.
Definition: Cut.h:39
const double a
std::vector< std::string > wildcard(const std::string &wildcardString)
Definition: convert.C:9
DataSource
Is this data-file representing beam spills or cosmic spills?
#define pot
bool operator()(const Cut &a, const Cut &b)
double fLivetime
Definition: Spectrum.h:320
virtual void Go() override
Load all the registered spectra.
static SpectrumLoader FromSAMProject(const std::string &proj, DataSource src=kBeam, int fileLimit=-1)
Named constructor for SAM projects.
std::list< SpillHistDef > fSpillHistDefs
fileLimit
Definition: ProjMan.py:109
OStream cout
Definition: OStream.cxx:6
IDMap< SystShifts, IDMap< Cut, IDMap< Var, IDMap< VarOrMultiVar, SpectList > > > > fHistDefs
All the spectra that need to be filled.
Base class for the various types of spectrum loader.
caf::Proxy< caf::SRTruthBranch > mc
Definition: SRProxy.h:1981
::xsd::cxx::tree::string< char, simple_type > string
Definition: Database.h:154
const Cut cut
Definition: exporter_fd.C:30
std::unique_ptr< IFileSource > fFileSource
void SetProgress(double frac)
Update the progress fraction between zero and one.
Definition: Progress.cxx:41
Collaborates with Spectrum and OscillatableSpectrum to fill spectra from CAF files.
TDirectory * dir
Definition: macro.C:5
double livetime
Definition: saveFDMCHists.C:21
virtual void HandleRecord(caf::SRProxy *sr)
Float_t proj
Definition: plot.C:35
const hit & b
Definition: hits.cxx:21
assert(nhit_max >=nhit_nbins)
A simple ascii-art progress bar.
Definition: Progress.h:9
Fetch files from a pre-existing SAM project.
virtual void HandleFile(TFile *f, Progress *prog=0)
void Format(TGraph *gr, int lcol, int lsty, int lwid, int mcol, int msty, double msiz)
Definition: Style.cxx:154
caf::Proxy< bool > isRHC
Definition: SRProxy.h:1284
double T
Definition: Xdiff_gwt.C:5
std::vector< T > fVals
int NFiles() const
Forwards to fFileSource.
double fPOT
Accumulated by calls to GetNextFile.
std::vector< Cut > fAllCuts
All unique cuts contained in fHistDefs.
Double_t sum
Definition: plot.C:31
std::list< NuHistDef > fNuHistDefs
void Done()
Call this when action is completed.
Definition: Progress.cxx:91
static constexpr Double_t sr
Definition: Munits.h:164
caf::Proxy< caf::Det_t > det
Definition: SRProxy.h:226
void Shift(caf::SRProxy *sr, double &weight) const
Definition: SystShifts.cxx:154