SpectrumLoaderBase.cxx
Go to the documentation of this file.
2 
3 #include "CAFAna/Core/HistAxis.h"
4 #include "CAFAna/Core/Progress.h"
8 #include "CAFAna/Core/Spectrum.h"
11 
12 #include "CAFAna/Core/CutApplier.h"
15 
17 
18 #include "ifdh.h"
19 #include "WebAPI.h"
20 
21 #include "TFile.h"
22 #include "TH1.h"
23 #include "TTree.h"
24 
25 #include <algorithm>
26 #include <cassert>
27 #include <iostream>
28 #include "boost/algorithm/string.hpp"
29 
30 namespace ana
31 {
32  //----------------------------------------------------------------------
34  : fGone(false),
35  fPOT(0), fRunPOT(0), fSinglePOT(0), fBeamNue(0), fSingleNue(0), fWeightedPOT(0)
36  {
37  }
38 
39  //----------------------------------------------------------------------
42  {
44  fFileSource = std::unique_ptr<IFileSource>(WildcardOrSAMQuery(wildcard));
45  }
46 
47  //----------------------------------------------------------------------
48  SpectrumLoaderBase::SpectrumLoaderBase(const std::vector<std::string>& fnames)
50  {
51  fWildcard = "file list";
52  fFileSource = std::unique_ptr<IFileSource>(new FileListSource(fnames));
53 
54  assert(!fnames.empty());
55  std::cout << "Loading from " << fnames.size() << " files" << std::endl;
56  }
57 
58  //----------------------------------------------------------------------
60  {
61  }
62 
63  //----------------------------------------------------------------------
65  {
66  assert(!fGone);
67  fSpillCut = std::make_unique<SpillCut>(cut);
68  }
69 
70  //----------------------------------------------------------------------
73  {
74  // stat() blows up on strings with spaces
75  if(str.find(' ') == std::string::npos){
76  WildcardSource* ret = new WildcardSource(str, Stride(), Offset(), Limit());
77  if(ret->NFiles() > 0) return ret;
78  delete ret;
79  }
80 
81  // Maybe this the name of a SAM project?
82  {
83  IFDHSilent silent; // the usual case is for this to fail
85 
86  // findProject always gives back an address just by gluing bits together.
87  // (a tad annoying, because it _does_ go and look for the project,
88  // and even would print its 'didn't-find-this-project' error out to stderr
89  // if not for the IFDHSilent, but without scraping its stderr
90  // there's no way to know whether the project is there or not--
91  // you still get the URL.)
92  // however, the WebAPI call looking for the /status will return a 404 if
93  // the project doesn't exist. (suggested by Robert I. in INC000000925362.)
94  try
95  {
96  ifdh_util_ns::WebAPI webapi(i.findProject(str, getenv("SAM_STATION")) + "/status");
97  return new SAMProjectSource(str);
98  }
99  catch (ifdh_util_ns::WebAPIException &e)
100  {
101  ;
102  }
103  }
104 
105  // Maybe this is a SAM dataset or query?
106  return new SAMQuerySource(str, Stride(), Offset(), Limit());
107  }
108 
109  //----------------------------------------------------------------------
111  const Cut& cut,
112  const Var& wei)
113  {
114  const int sid = shift.ID();
115  const int cid = cut.ID();
116  const int wid = wei.ID();
117 
118  const std::pair<int, int> sc(sid, cid);
119  const std::tuple<int, int, int> scw(sid, cid, wid);
120 
121  if(fSinksS.find(sid) == fSinksS.end()){
122  SystApplier* sa = new SystApplier(shift);
123  fSinksS[sid] = sa;
124  fSink.AddSink(sa);
125  }
126  if(fSinksSC.find(sc) == fSinksSC.end()){
127  CutApplier* ca = new CutApplier(cut);
128  fSinksS[sid]->AddSink(ca);
129  fSinksSC[sc] = ca;
130  }
131  if(fSinksSCW.find(scw) == fSinksSCW.end()){
132  WeightApplier* wa = new WeightApplier(wei);
133  fSinksSC[sc]->AddSink(wa);
134  fSinksSCW[scw] = wa;
135  }
136 
137  return fSinksSCW[scw];
138  }
139 
140  //----------------------------------------------------------------------
142  const Var& var,
143  const Cut& cut,
144  const SystShifts& shift,
145  const Var& wei)
146  {
147  if(fGone){
148  std::cerr << "Error: can't add Spectra after the call to Go()" << std::endl;
149  abort();
150  }
151 
152  SpectrumSink* sink = new SpectrumSink(var, &spect);
153  fSpectrumSinks.insert(sink);
154  GetSinkSCW(shift, cut, wei)->AddSink(sink);
155  fExposureSource.AddSink(cut, sink);
156  }
157 
158  //----------------------------------------------------------------------
160  const MultiVar& var,
161  const Cut& cut,
162  const SystShifts& shift,
163  const Var& wei)
164  {
165  if(fGone){
166  std::cerr << "Error: can't add Spectra after the call to Go()" << std::endl;
167  abort();
168  }
169 
170  MultiVarSpectrumSink* sink = new MultiVarSpectrumSink(var, &spect);
171  fSpectrumSinks.insert(sink);
172  GetSinkSCW(shift, cut, wei)->AddSink(sink);
173  fExposureSource.AddSink(cut, sink);
174  }
175 
176  //----------------------------------------------------------------------
178  const NuTruthVar& var,
179  const NuTruthCut& cut,
180  const SystShifts& shift,
181  const NuTruthVar& wei)
182  {
183  // TODO - implement with the same source/sink deal as regular spectra
184  fNuHistDefs.emplace_back(spect, var, cut, shift, wei);
185  }
186 
187  //----------------------------------------------------------------------
189  const NuTruthMultiVar& multivar,
190  const NuTruthCut& cut,
191  const SystShifts& shift,
192  const NuTruthVar& wei)
193  {
194  // TODO - implement with the same source/sink deal as regular spectra
195  fNuHistDefs.emplace_back(spect, multivar, cut, shift, wei);
196  }
197 
198  //----------------------------------------------------------------------
200  const Var& xvar,
201  const Var& yvar,
202  const Cut& cut,
203  const SystShifts& shift,
204  const Var& wei)
205  {
206  if(fGone){
207  std::cerr << "Error: can't add Spectra after the call to Go()" << std::endl;
208  abort();
209  }
210 
211  ReweightableSpectrumSink* sink = new ReweightableSpectrumSink(xvar, yvar, &spect);
212  fSpectrumSinks.insert(sink);
213  GetSinkSCW(shift, cut, wei)->AddSink(sink);
214  fExposureSource.AddSink(cut, sink);
215  }
216 
217  //----------------------------------------------------------------------
219  const SpillVar& var,
220  const SpillCut& cut,
221  const SpillVar& wei)
222  {
223  // TODO - implement with the same source/sink deal as regular spectra
224  fSpillHistDefs.emplace_back(h, var, cut, wei);
225  }
226 
227  //----------------------------------------------------------------------
229  {
230  return fFileSource->NFiles();
231  }
232 
233  //----------------------------------------------------------------------
235  {
236  TFile* f = fFileSource->GetNextFile();
237  if(!f) return 0; // out of files
238 
239  if(f->IsZombie()){
240  std::cout << "Bad file (zombie): " << f->GetName() << std::endl;
241  abort();
242  }
243 
244  // Crude way to count POT, at a file level without referring to spill cuts
245  TH1* hPOT = (TH1*)f->Get("TotalPOT");
246 
247  if(!hPOT){
248  std::cout << "Bad file (no POT tree): " << f->GetName() << std::endl;
249  abort();
250  }
251 
252  fRunPOT += hPOT->Integral();
253 
254  TKey *key = f->FindKey("TotalSinglePOT");
255 
256  // Initialise and only fill these histos if they exist
257  // We know they exist for nue overlay files
258  TH1* hSinglePOT = 0;
259  TH1* hTotalTrueSingleNue = 0;
260  TH1* hTotalTrueNonswapNue = 0;
261  if(key != 0){
262  hSinglePOT = (TH1*)f->Get("TotalSinglePOT");
263  assert(hSinglePOT);
264  fSinglePOT = hSinglePOT->Integral();
265 
266  hTotalTrueNonswapNue = (TH1*)f->Get("TotalTrueNonswapNue");
267  assert(hTotalTrueNonswapNue);
268  fBeamNue = hTotalTrueNonswapNue->Integral();
269 
270  hTotalTrueSingleNue = (TH1*)f->Get("TotalTrueSingleNue");
271  assert(hTotalTrueSingleNue);
272  fSingleNue = hTotalTrueSingleNue->Integral();
273  }
274  else{
275  delete hSinglePOT;
276  delete hTotalTrueSingleNue;
277  delete hTotalTrueNonswapNue;
278  }
279 
280  TDirectory* spillDir = 0;
281  TTree* spillTree = 0;
282 
283  TObject* obj = f->Get("spillTree");
284  assert(obj); // Must exist in one form or the other
285 
286  // It might seem like you could use GetObject() to do the type-checking
287  // directly, but that method seems to have a memory leak in the case that
288  // the types don't match.
289  if(obj->ClassName() == std::string("TTree")){
290  spillTree = (TTree*)obj;
291  }
292  else{
293  spillDir = (TDirectory*)obj;
294  spillTree = (TTree*)spillDir->Get("spill");
295  assert(spillTree);
296  }
297 
298  const caf::CAFType type = caf::GetCAFType(spillDir, spillTree);
299 
300  long n = 0;
301  caf::SRSpillProxy spill(spillDir, spillTree, "spill", n, 0);
302 
303  // FloatingExceptionOnNaN fpnan;
304 
305  // With multiple files exposure counting alternates with histogram filling,
306  // but with one file they deserve to be reported as two separate steps.
307  Progress* prog = 0;
308  if(fFileSource->NFiles() == 1)
309  prog = new Progress(TString::Format("Counting exposure from 1 file matching '%s'", fWildcard.c_str()).Data());
310 
311  const int Nentries = spillTree->GetEntries();
312 
313  double totFilePOT = 0;
314 
315  for(n = 0; n < Nentries; ++n){
316  if(type != caf::kFlatMultiTree) spillTree->LoadTree(n); // for all single-tree modes
317 
318  if(!fSpillCut || (*fSpillCut)(&spill)){
319  AccumulateExposures(&spill);
320 
321  totFilePOT += spill.spillpot;
322 
323  for(const SpillHistDef& hd: fSpillHistDefs){
324  // No cut, or cut passes
325  if(hd.cut(&spill)){
326  const double wei = hd.wei(&spill);
327  if(wei){
328  const double val = hd.var(&spill);
329  hd.h->Fill(val, wei);
330  }
331  } // end if(cut passes)
332  } // end for hd
333  }
334 
335  if(prog && n%100 == 0) prog->SetProgress(double(n)/Nentries);
336  } // end for n
337 
338  // The POT we want to use is either the singles POT in case of overlay files
339  // i.e the singlepot histo exists, or the standard after spill cuts
340  // Note: must be sure to run spill cuts for the overlays otherwise this gets
341  // reset to the fRunPOT
342  if(key != 0) fPOT += fSinglePOT;
343  else fPOT += totFilePOT;
344 
345  delete prog;
346  prog = 0;
347 
348  if(!fNuHistDefs.empty()){
349  TDirectory* nuDir = 0;
350  TTree* nuTree = 0;
351 
352  TObject* obj = f->Get("nuTree");
353  assert(obj); // Must exist in one form or the other
354 
355  // It might seem like you could use GetObject() to do the type-checking
356  // directly, but that method seems to have a memory leak in the case that
357  // the types don't match.
358  if(obj->ClassName() == std::string("TTree")){
359  nuTree = (TTree*)obj;
360  }
361  else{
362  nuDir = (TDirectory*)obj;
363  nuTree = (TTree*)spillDir->Get("nu");
364  assert(nuTree);
365  }
366 
367  const caf::CAFType type = caf::GetCAFType(nuDir, nuTree);
368 
369  const int NNuEntries = nuTree->GetEntries();
370 
371  long n = 0;
372  caf::SRNeutrinoProxy nu(nuDir, nuTree, (type == caf::kNested) ? "mc.nu" : "nu", n, 0);
373 
374  if(fFileSource->NFiles() == 1)
375  prog = new Progress(TString::Format("Filling from nuTree from 1 file matching '%s'", fWildcard.c_str()).Data());
376 
377 
378  for(n = 0; n < NNuEntries; ++n){
379  if(type != caf::kFlatMultiTree) nuTree->LoadTree(n); // for all single-tree modes
380 
381  for(const NuHistDef& hd: fNuHistDefs){
383 
384  double systWeight = 1;
385  // Can special-case nominal to not pay cost of Shift()
386  if(!hd.shift.IsNominal()){
387  hd.shift.Shift(&nu, systWeight);
388  }
389 
390  if(hd.cut(&nu)){
391  const double wei = hd.wei(&nu)*systWeight;
392  if(wei){
393  if(hd.var){
394  const double val = (*hd.var)(&nu);
395  hd.spect.Fill(val, wei);
396  }
397  else{
398  for(double val: (*hd.multivar)(&nu)) hd.spect.Fill(val, wei);
399  }
400  }
401  }
402 
403  // Return SRNeutrino to its unshifted form ready for the next
404  // histogram.
406  } // end for hd
407 
408  if(prog && n%10000 == 0) prog->SetProgress(double(n)/NNuEntries);
409  } // end for n
410 
411  for(const NuHistDef& hd: fNuHistDefs) hd.spect.fPOT += totFilePOT;
412 
413  delete prog;
414  } // end if NuHistDefs
415 
416  return f;
417  }
418 
419  //---------------------------------------------------------------------
421  {
423  std::cout << " beam nue: " << fBeamNue << " single nue: " << fSingleNue << " fPOT: " << fPOT << " fSinglePOT: " << fSinglePOT << std::endl;
424  return fWeightedPOT;
425  }
426 
427  //----------------------------------------------------------------------
429  {
430  }
431 
432  //----------------------------------------------------------------------
434  {
435  }
436 
437 } // namespace
Histograms that are filled from the nuTree.
Cuts and Vars for the 2020 FD DiF Study.
Definition: vars.h:6
int ID() const
Vars with the same definition will have the same ID.
Definition: Var.h:36
TFile * GetNextFile()
Forwards to fFileSource but also accumulates POT and livetime.
const SpillVar fPOT([](const caf::SRSpillProxy *spill){return spill->spillpot;})
bool fGone
Has Go() been called? Can&#39;t add more histograms after that.
virtual void AddSpectrum(Spectrum &spect, const Var &var, const Cut &cut, const SystShifts &shift, const Var &wei=kUnweighted)
For use by the Spectrum constructor.
Proxy for caf::SRNeutrino.
Definition: SRProxy.h:510
Simple record of shifts applied to systematic parameters.
Definition: SystShifts.h:20
caf::Proxy< float > spillpot
Definition: SRProxy.h:1407
Spectrum with the value of a second variable, allowing for reweighting
double fRunPOT
Crude measure, not including spill cuts.
OStream cerr
Definition: OStream.cxx:7
void SetSpillCut(const SpillCut &cut)
ifdh calls between construction and destruction produce no output
Definition: Utilities.h:47
virtual void Go() override
Load all the registered spectra.
void AddSink(const Cut &cut, IExposureSink *s)
Representation of a spectrum in any variable, with associated POT.
Definition: Spectrum.h:40
CAFType GetCAFType(const TDirectory *dir, TTree *tr)
std::unique_ptr< SpillCut > fSpillCut
Cut applied to the spill branch for every event.
std::set< IRecordSink * > fSpectrumSinks
virtual void AccumulateExposures(const caf::SRSpillProxy *spill)=0
int Limit()
Value passed to –limit, or -1 if not specified.
Definition: Utilities.cxx:420
int ID() const
SystShifts with the same set of systs should have the same ID.
Definition: SystShifts.h:32
void AddSink(IRecordSink *s)
virtual void AddSpillHistogram(TH1 *h, const SpillVar &var, const SpillCut &cut, const SpillVar &wei=kSpillUnweighted)
Uses include counting the total POT or spills in a run.
int NFiles() const override
May return -1 indicating the number of files is not known.
std::vector< std::string > wildcard(const std::string &wildcardString)
Definition: convert.C:9
std::string getenv(std::string const &name)
std::unordered_map< int, SystApplier * > fSinksS
WeightApplier * GetSinkSCW(const SystShifts &shift, const Cut &cut, const Var &wei)
int ID() const
Cuts with the same definition will have the same ID.
Definition: Cut.h:55
size_t Stride(bool allow_default)
Value passed to –stride, or 1 if not specified.
Definition: Utilities.cxx:372
File source based on a SAM query or dataset (definition)
std::list< SpillHistDef > fSpillHistDefs
ifdh
Definition: ProjMan.py:8
std::unordered_map< std::pair< int, int >, CutApplier * > fSinksSC
Proxy for caf::SRSpill.
Definition: SRProxy.h:1346
OStream cout
Definition: OStream.cxx:6
virtual void AddReweightableSpectrum(ReweightableSpectrum &spect, const Var &xvar, const Var &yvar, const Cut &cut, const SystShifts &shift, const Var &wei)
For use by the constructors of ReweightableSpectrum subclasses.
Base class for the various types of spectrum loader.
SpectrumLoaderBase()
Component of other constructors.
const Cut cut
Definition: exporter_fd.C:30
size_t Offset(bool allow_default)
Value passed to –offset, or 0 if not specified.
Definition: Utilities.cxx:396
Interface class for accessing ROOT files in sequence.
Definition: IFileSource.h:10
Simple file source based on an explicit list provided by the user.
std::unique_ptr< IFileSource > fFileSource
void SetProgress(double frac)
Update the progress fraction between zero and one.
Definition: Progress.cxx:41
IFileSource * WildcardOrSAMQuery(const std::string &str) const
Figure out if str is a wildcard or SAM query and return a source.
assert(nhit_max >=nhit_nbins)
Helper for SpectrumLoaderBase. Details of SpillVar plots.
A simple ascii-art progress bar.
Definition: Progress.h:9
Fetch files from a pre-existing SAM project.
std::unordered_map< std::tuple< int, int, int >, WeightApplier * > fSinksSCW
void Format(TGraph *gr, int lcol, int lsty, int lwid, int mcol, int msty, double msiz)
Definition: Style.cxx:154
int NFiles() const
Forwards to fFileSource.
File source based on a wildcard (glob)
Definition: WildcardSource.h:8
Template for Cut and SpillCut.
Definition: Cut.h:15
double fPOT
Accumulated by calls to GetNextFile.
Float_t e
Definition: plot.C:35
std::list< NuHistDef > fNuHistDefs
TH1F * hd
Definition: Xdiff_gwt.C:57
enum BeamMode string