SpectrumLoaderBase.cxx
Go to the documentation of this file.
2 
3 #include "CAFAna/Core/HistAxis.h"
4 #include "CAFAna/Core/Progress.h"
8 #include "CAFAna/Core/Spectrum.h"
11 
12 #include "CAFAna/Core/CutApplier.h"
15 
17 
18 #include "ifdh.h"
19 #include "WebAPI.h"
20 
21 #include "TFile.h"
22 #include "TH1.h"
23 #include "TTree.h"
24 
25 #include <algorithm>
26 #include <cassert>
27 #include <iostream>
28 #include "boost/algorithm/string.hpp"
29 
30 namespace ana
31 {
32  //----------------------------------------------------------------------
34  : fGone(false),
35  fPOT(0), fRunPOT(0), fSinglePOT(0), fBeamNue(0), fSingleNue(0), fWeightedPOT(0)
36  {
37  }
38 
39  //----------------------------------------------------------------------
42  {
44  fFileSource = std::unique_ptr<IFileSource>(WildcardOrSAMQuery(wildcard));
45  }
46 
47  //----------------------------------------------------------------------
48  SpectrumLoaderBase::SpectrumLoaderBase(const std::vector<std::string>& fnames)
50  {
51  fWildcard = "file list";
52  fFileSource = std::unique_ptr<IFileSource>(new FileListSource(fnames));
53 
54  assert(!fnames.empty());
55  std::cout << "Loading from " << fnames.size() << " files" << std::endl;
56  }
57 
58  //----------------------------------------------------------------------
60  {
61  }
62 
63  //----------------------------------------------------------------------
65  {
66  assert(!fGone);
67  fSpillCut = std::make_unique<SpillCut>(cut);
68  }
69 
70  //----------------------------------------------------------------------
73  {
74  int stride = -1;
75  int offset = -1;
76  int limit = -1;
77  if(getenv("CAFANA_STRIDE")){
78  stride = atoi(getenv("CAFANA_STRIDE"));
79  if(stride > 1 && getenv("CAFANA_OFFSET")){
80  offset = atoi(getenv("CAFANA_OFFSET"));
81  }
82  }
83  if(getenv("CAFANA_LIMIT")){
84  limit = atoi(getenv("CAFANA_LIMIT"));
85  }
86 
87  // stat() blows up on strings with spaces
88  if(str.find(' ') == std::string::npos){
89  WildcardSource* ret = new WildcardSource(str, stride, offset, limit);
90  if(ret->NFiles() > 0) return ret;
91  delete ret;
92  }
93 
94  // Maybe this the name of a SAM project?
95  {
96  IFDHSilent silent; // the usual case is for this to fail
98 
99  // findProject always gives back an address just by gluing bits together.
100  // (a tad annoying, because it _does_ go and look for the project,
101  // and even would print its 'didn't-find-this-project' error out to stderr
102  // if not for the IFDHSilent, but without scraping its stderr
103  // there's no way to know whether the project is there or not--
104  // you still get the URL.)
105  // however, the WebAPI call looking for the /status will return a 404 if
106  // the project doesn't exist. (suggested by Robert I. in INC000000925362.)
107  try
108  {
109  ifdh_util_ns::WebAPI webapi(i.findProject(str, getenv("SAM_STATION")) + "/status");
110  return new SAMProjectSource(str);
111  }
112  catch (ifdh_util_ns::WebAPIException &e)
113  {
114  ;
115  }
116  }
117 
118  // Maybe this is a SAM dataset or query?
119  return new SAMQuerySource(str, stride, offset, limit);
120  }
121 
122  //----------------------------------------------------------------------
124  const Cut& cut,
125  const Var& wei)
126  {
127  const int sid = shift.ID();
128  const int cid = cut.ID();
129  const int wid = wei.ID();
130 
131  const std::pair<int, int> sc(sid, cid);
132  const std::tuple<int, int, int> scw(sid, cid, wid);
133 
134  if(fSinksS.find(sid) == fSinksS.end()){
135  SystApplier* sa = new SystApplier(shift);
136  fSinksS[sid] = sa;
137  fSink.AddSink(sa);
138  }
139  if(fSinksSC.find(sc) == fSinksSC.end()){
140  CutApplier* ca = new CutApplier(cut);
141  fSinksS[sid]->AddSink(ca);
142  fSinksSC[sc] = ca;
143  }
144  if(fSinksSCW.find(scw) == fSinksSCW.end()){
145  WeightApplier* wa = new WeightApplier(wei);
146  fSinksSC[sc]->AddSink(wa);
147  fSinksSCW[scw] = wa;
148  }
149 
150  return fSinksSCW[scw];
151  }
152 
153  //----------------------------------------------------------------------
155  const Var& var,
156  const Cut& cut,
157  const SystShifts& shift,
158  const Var& wei)
159  {
160  if(fGone){
161  std::cerr << "Error: can't add Spectra after the call to Go()" << std::endl;
162  abort();
163  }
164 
165  SpectrumSink* sink = new SpectrumSink(var, &spect);
166  fSpectrumSinks.insert(sink);
167  GetSinkSCW(shift, cut, wei)->AddSink(sink);
168  fExposureSource.AddSink(cut, sink);
169  }
170 
171  //----------------------------------------------------------------------
173  const MultiVar& var,
174  const Cut& cut,
175  const SystShifts& shift,
176  const Var& wei)
177  {
178  if(fGone){
179  std::cerr << "Error: can't add Spectra after the call to Go()" << std::endl;
180  abort();
181  }
182 
183  MultiVarSpectrumSink* sink = new MultiVarSpectrumSink(var, &spect);
184  fSpectrumSinks.insert(sink);
185  GetSinkSCW(shift, cut, wei)->AddSink(sink);
186  fExposureSource.AddSink(cut, sink);
187  }
188 
189  //----------------------------------------------------------------------
191  const NuTruthVar& var,
192  const NuTruthCut& cut,
193  const SystShifts& shift,
194  const NuTruthVar& wei)
195  {
196  // TODO - implement with the same source/sink deal as regular spectra
197  fNuHistDefs.emplace_back(spect, var, cut, shift, wei);
198  }
199 
200  //----------------------------------------------------------------------
202  const Var& xvar,
203  const Var& yvar,
204  const Cut& cut,
205  const SystShifts& shift,
206  const Var& wei)
207  {
208  if(fGone){
209  std::cerr << "Error: can't add Spectra after the call to Go()" << std::endl;
210  abort();
211  }
212 
213  ReweightableSpectrumSink* sink = new ReweightableSpectrumSink(xvar, yvar, &spect);
214  fSpectrumSinks.insert(sink);
215  GetSinkSCW(shift, cut, wei)->AddSink(sink);
216  fExposureSource.AddSink(cut, sink);
217  fSink.AddSink(sink);
218  }
219 
220  //----------------------------------------------------------------------
222  const SpillVar& var,
223  const SpillCut& cut,
224  const SpillVar& wei)
225  {
226  // TODO - implement with the same source/sink deal as regular spectra
227  fSpillHistDefs.emplace_back(h, var, cut, wei);
228  }
229 
230  //----------------------------------------------------------------------
232  {
233  return fFileSource->NFiles();
234  }
235 
236  //----------------------------------------------------------------------
238  {
239  TFile* f = fFileSource->GetNextFile();
240  if(!f) return 0; // out of files
241 
242  if(f->IsZombie()){
243  std::cout << "Bad file (zombie): " << f->GetName() << std::endl;
244  abort();
245  }
246 
247  // Crude way to count POT, at a file level without referring to spill cuts
248  TH1* hPOT = (TH1*)f->Get("TotalPOT");
249 
250  if(!hPOT){
251  std::cout << "Bad file (no POT tree): " << f->GetName() << std::endl;
252  abort();
253  }
254 
255  fRunPOT += hPOT->Integral();
256 
257  TKey *key = f->FindKey("TotalSinglePOT");
258 
259  // Initialise and only fill these histos if they exist
260  // We know they exist for nue overlay files
261  TH1* hSinglePOT = 0;
262  TH1* hTotalTrueSingleNue = 0;
263  TH1* hTotalTrueNonswapNue = 0;
264  if(key != 0){
265  hSinglePOT = (TH1*)f->Get("TotalSinglePOT");
266  assert(hSinglePOT);
267  fSinglePOT = hSinglePOT->Integral();
268 
269  hTotalTrueNonswapNue = (TH1*)f->Get("TotalTrueNonswapNue");
270  assert(hTotalTrueNonswapNue);
271  fBeamNue = hTotalTrueNonswapNue->Integral();
272 
273  hTotalTrueSingleNue = (TH1*)f->Get("TotalTrueSingleNue");
274  assert(hTotalTrueSingleNue);
275  fSingleNue = hTotalTrueSingleNue->Integral();
276  }
277  else{
278  delete hSinglePOT;
279  delete hTotalTrueSingleNue;
280  delete hTotalTrueNonswapNue;
281  }
282 
283  // Test for flat (trees are inside a directory) or nested (tree is at top
284  // level) cases.
285  TDirectory* spillDir = 0;
286  TTree* spillTree = 0;
287 
288  TObject* obj = f->Get("spillTree");
289  assert(obj); // Must exist in one form or the other
290 
291  // It might seem like you could use GetObject() to do the type-checking
292  // directly, but that method seems to have a memory leak in the case that
293  // the types don't match.
294  if(obj->ClassName() == std::string("TTree")){
295  // nested case
296  spillTree = (TTree*)obj;
297  }
298  else{
299  // FlatCAF case
300  spillDir = (TDirectory*)obj;
301  spillTree = (TTree*)spillDir->Get("spill");
302  assert(spillTree);
303  }
304 
305  // These aren't set in SRNeutrino, but we need them for flux systs, so
306  // learn them from SRSpill and copy them over.
307  //
308  // TODO TODO - remove this hack (and the equivalent in SpectrumLoader.cxx
309  // once they're available in the CAFs
310  bool isFHC = false, is0HC = false, isRHC = false;
312 
313  long n = 0;
314  caf::SRSpillProxy spill(spillDir, spillTree, "spill", n, 0);
315 
316  // FloatingExceptionOnNaN fpnan;
317 
318  // With multiple files exposure counting alternates with histogram filling,
319  // but with one file they deserve to be reported as two separate steps.
320  Progress* prog = 0;
321  if(fFileSource->NFiles() == 1)
322  prog = new Progress(TString::Format("Counting exposure from 1 file matching '%s'", fWildcard.c_str()).Data());
323 
324  const int Nentries = spillTree->GetEntries();
325 
326  double totFilePOT = 0;
327 
328  for(n = 0; n < Nentries; ++n){
329  if(!spillDir) spillTree->LoadTree(n); // nested mode
330 
331  if(!fSpillCut || (*fSpillCut)(&spill)){
332  AccumulateExposures(&spill);
333 
334  totFilePOT += spill.spillpot;
335 
336  // Only learn from good spills (bad spill could have bad beam?), and
337  // only once (efficiency)
338  if(!isFHC && !is0HC && !isRHC){
339  isFHC = spill.isFHC;
340  is0HC = spill.is0HC;
341  isRHC = spill.isRHC;
342  }
343  if (det == caf::kUNKNOWN)
344  det = spill.det;
345 
346  for(const SpillHistDef& hd: fSpillHistDefs){
347  // No cut, or cut passes
348  if(hd.cut(&spill)){
349  const double wei = hd.wei(&spill);
350  if(wei){
351  const double val = hd.var(&spill);
352  hd.h->Fill(val, wei);
353  }
354  } // end if(cut passes)
355  } // end for hd
356  }
357 
358  if(prog && n%100 == 0) prog->SetProgress(double(n)/Nentries);
359  } // end for n
360 
361  // The POT we want to use is either the singles POT in case of overlay files
362  // i.e the singlepot histo exists, or the standard after spill cuts
363  // Note: must be sure to run spill cuts for the overlays otherwise this gets
364  // reset to the fRunPOT
365  if(key != 0) fPOT += fSinglePOT;
366  else fPOT += totFilePOT;
367 
368  delete prog;
369  prog = 0;
370 
371  if(!fNuHistDefs.empty()){
372  // Test for flat (trees are inside a directory) or nested (tree is at top
373  // level) cases.
374 
375  TDirectory* nuDir = 0;
376  TTree* nuTree = 0;
377 
378  TObject* obj = f->Get("nuTree");
379  assert(obj); // Must exist in one form or the other
380 
381  // It might seem like you could use GetObject() to do the type-checking
382  // directly, but that method seems to have a memory leak in the case that
383  // the types don't match.
384  if(obj->ClassName() == std::string("TTree")){
385  // nested case
386  nuTree = (TTree*)obj;
387  }
388  else{
389  // FlatCAF case
390  nuDir = (TDirectory*)obj;
391  nuTree = (TTree*)spillDir->Get("nu");
392  assert(nuTree);
393  }
394 
395  const int NNuEntries = nuTree->GetEntries();
396 
397  long n = 0;
398  caf::SRNeutrinoProxy nu(nuDir, nuTree, nuDir ? "nu" : "mc.nu", n, 0);
399 
400  if(fFileSource->NFiles() == 1)
401  prog = new Progress(TString::Format("Filling from nuTree from 1 file matching '%s'", fWildcard.c_str()).Data());
402 
403 
404  for(n = 0; n < NNuEntries; ++n){
405  if(!nuDir) nuTree->LoadTree(n); // nested mode
406 
407  nu.isFHC = isFHC;
408  nu.is0HC = is0HC;
409  nu.isRHC = isRHC;
410  nu.det = det;
411 
412  for(const NuHistDef& hd: fNuHistDefs){
414 
415  double systWeight = 1;
416  // Can special-case nominal to not pay cost of Shift()
417  if(!hd.shift.IsNominal()){
418  hd.shift.Shift(&nu, systWeight);
419  }
420 
421  if(hd.cut(&nu)){
422  const double wei = hd.wei(&nu)*systWeight;
423  if(wei){
424  const double val = hd.var(&nu);
425  hd.spect.Fill(val, wei);
426  }
427  }
428 
429  // Return SRNeutrino to its unshifted form ready for the next
430  // histogram.
432  } // end for hd
433 
434  if(prog && n%10000 == 0) prog->SetProgress(double(n)/NNuEntries);
435  } // end for n
436 
437  for(const NuHistDef& hd: fNuHistDefs) hd.spect.fPOT += totFilePOT;
438 
439  delete prog;
440  } // end if NuHistDefs
441 
442  return f;
443  }
444 
445  //---------------------------------------------------------------------
447  {
449  std::cout << " beam nue: " << fBeamNue << " single nue: " << fSingleNue << " fPOT: " << fPOT << " fSinglePOT: " << fSinglePOT << std::endl;
450  return fWeightedPOT;
451  }
452 
453  //----------------------------------------------------------------------
455  {
456  }
457 
458  //----------------------------------------------------------------------
460  {
461  }
462 
463 } // namespace
Unknown detector.
Definition: SREnums.h:9
Det_t
Which NOvA detector?
Definition: SREnums.h:7
Histograms that are filled from the nuTree.
Cuts and Vars for the 2020 FD DiF Study.
Definition: vars.h:6
int ID() const
Vars with the same definition will have the same ID.
Definition: Var.h:36
TFile * GetNextFile()
Forwards to fFileSource but also accumulates POT and livetime.
const SpillVar fPOT([](const caf::SRSpillProxy *spill){return spill->spillpot;})
bool fGone
Has Go() been called? Can&#39;t add more histograms after that.
virtual void AddSpectrum(Spectrum &spect, const Var &var, const Cut &cut, const SystShifts &shift, const Var &wei=kUnweighted)
For use by the Spectrum constructor.
Simple record of shifts applied to systematic parameters.
Definition: SystShifts.h:20
Spectrum with the value of a second variable, allowing for reweighting
double fRunPOT
Crude measure, not including spill cuts.
OStream cerr
Definition: OStream.cxx:7
void SetSpillCut(const SpillCut &cut)
ifdh calls between construction and destruction produce no output
Definition: Utilities.h:47
virtual void Go() override
Load all the registered spectra.
void AddSink(const Cut &cut, IExposureSink *s)
Representation of a spectrum in any variable, with associated POT.
Definition: Spectrum.h:40
std::unique_ptr< SpillCut > fSpillCut
Cut applied to the spill branch for every event.
std::set< IRecordSink * > fSpectrumSinks
virtual void AccumulateExposures(const caf::SRSpillProxy *spill)=0
int ID() const
SystShifts with the same set of systs should have the same ID.
Definition: SystShifts.h:32
void AddSink(IRecordSink *s)
virtual void AddSpillHistogram(TH1 *h, const SpillVar &var, const SpillCut &cut, const SpillVar &wei=kSpillUnweighted)
Uses include counting the total POT or spills in a run.
int NFiles() const override
May return -1 indicating the number of files is not known.
std::vector< std::string > wildcard(const std::string &wildcardString)
Definition: convert.C:9
std::string getenv(std::string const &name)
std::unordered_map< int, SystApplier * > fSinksS
WeightApplier * GetSinkSCW(const SystShifts &shift, const Cut &cut, const Var &wei)
int ID() const
Cuts with the same definition will have the same ID.
Definition: Cut.h:55
File source based on a SAM query or dataset (definition)
std::list< SpillHistDef > fSpillHistDefs
ifdh
Definition: ProjMan.py:8
std::unordered_map< std::pair< int, int >, CutApplier * > fSinksSC
static bool isFHC
OStream cout
Definition: OStream.cxx:6
virtual void AddReweightableSpectrum(ReweightableSpectrum &spect, const Var &xvar, const Var &yvar, const Cut &cut, const SystShifts &shift, const Var &wei)
For use by the constructors of ReweightableSpectrum subclasses.
Base class for the various types of spectrum loader.
SpectrumLoaderBase()
Component of other constructors.
::xsd::cxx::tree::string< char, simple_type > string
Definition: Database.h:154
const Cut cut
Definition: exporter_fd.C:30
Interface class for accessing ROOT files in sequence.
Definition: IFileSource.h:10
Simple file source based on an explicit list provided by the user.
std::unique_ptr< IFileSource > fFileSource
void SetProgress(double frac)
Update the progress fraction between zero and one.
Definition: Progress.cxx:41
IFileSource * WildcardOrSAMQuery(const std::string &str) const
Figure out if str is a wildcard or SAM query and return a source.
assert(nhit_max >=nhit_nbins)
Helper for SpectrumLoaderBase. Details of SpillVar plots.
A simple ascii-art progress bar.
Definition: Progress.h:9
Fetch files from a pre-existing SAM project.
std::unordered_map< std::tuple< int, int, int >, WeightApplier * > fSinksSCW
void Format(TGraph *gr, int lcol, int lsty, int lwid, int mcol, int msty, double msiz)
Definition: Style.cxx:154
int NFiles() const
Forwards to fFileSource.
File source based on a wildcard (glob)
Definition: WildcardSource.h:8
Template for Cut and SpillCut.
Definition: Cut.h:15
double fPOT
Accumulated by calls to GetNextFile.
Float_t e
Definition: plot.C:35
std::list< NuHistDef > fNuHistDefs
TH1F * hd
Definition: Xdiff_gwt.C:57