ChiSqrCalculator.cxx
Go to the documentation of this file.
1 //
2 // Created by Brian Rebel on 2/24/20.
3 //
4 
5 #include "NovaDAQConventions/DAQConventions.h"
6 
10 
11 #include "TH2.h"
12 #include "TH1.h"
13 #include "TMatrixT.h"
14 #include "TFile.h"
15 
16 namespace cmf{
17 
19 
20  //----------------------------------------------------------------------------
22  {
23  if(gChiSqrCalculator == nullptr) gChiSqrCalculator = new ChiSqrCalculator();
24 
25  return gChiSqrCalculator;
26  }
27 
28  //----------------------------------------------------------------------------
30  : fDiffVec(Eigen::VectorXd(cmf::CovarianceBinUtility::Instance()->TotalBins()))
31  , fInputEigenMatrixSyst(Eigen::MatrixXd::Zero(cmf::CovarianceBinUtility::Instance()->TotalBins(),
32  cmf::CovarianceBinUtility::Instance()->TotalBins()))
33  {
34  fDataSpectrum.resize(cmf::CovarianceBinUtility::Instance()->TotalBins(), 0.);
35  fMCSpectrum .resize(cmf::CovarianceBinUtility::Instance()->TotalBins(), 0.);
36 
37  LOG_VERBATIM("ChiSqrCalculator")
38  << "The ChiSqrCalculator will use "
40  << " as the number of logical bins in a spectrum."
41  << " There are "
42  << fInputEigenMatrixSyst.rows()
43  << " rows and "
44  << fInputEigenMatrixSyst.cols()
45  << " colums in the matrix";
46  }
47 
48  //----------------------------------------------------------------------------
50  {
51  }
52 
53  //----------------------------------------------------------------------------
54  double ChiSqrCalculator::CovarianceMatrixEntry(size_t matrixPos,
55  cmf::CovMat_t const& matType)
56  {
57  return cmf::kGarbageDouble;
58  }
59 
60  //----------------------------------------------------------------------------
61  int ChiSqrCalculator::FindReducedMatrixBin(std::set<long> const& keysToUse,
62  int const& fullMatrixBin)
63  {
64  long fullMatrixKey = cmf::CovarianceBinUtility::Instance()->BinToKey(fullMatrixBin, true);
65  long condensedMatrixKey = fullMatrixKey;
66 
67  LOG_DEBUG("ChiSqrCalculator")
68  << "key is "
69  << fullMatrixKey
70  << " "
71  << cmf::KeyToString(fullMatrixKey);
72 
73  if(keysToUse.find(fullMatrixKey) == keysToUse.end()){
74 
75  // check that this beam type and detector is desired
77  cmf::KeyToBeamType(fullMatrixKey)))
78  return std::numeric_limits<int>::lowest();
79 
80  // maybe we want concatenated selections, so check those out
81  // use the fullMatrixKey to keep the detector and beam information, but
82  // remove the selection type information and then add back in the concat
83  // selection type
86  condensedMatrixKey = (fullMatrixKey -
89  }
92  condensedMatrixKey = (fullMatrixKey -
95  }
96  else
97  return std::numeric_limits<int>::lowest();
98  }
99 
100  // subtract off the offset from the all selections for this key and then add
101  // back the offset for the current configuration for the key
102  return (fullMatrixBin -
103  cmf::CovarianceBinUtility::Instance()->KeyToOffset(fullMatrixKey, true) +
104  cmf::CovarianceBinUtility::Instance()->KeyToOffset(condensedMatrixKey));
105  }
106 
107  //----------------------------------------------------------------------------
109  {
110  LOG_VERBATIM("ChiSqrCalculator")
111  << pset.to_indented_string();
112 
113  fSystScale = pset.get<double>("SystScale", 1.);
114 
115  // if we are doing a stat only fit, we do nothing - the input covariance
116  // matrix was initialized with zeros in the constructor
117  if(pset.get<bool>("StatOnlyFit", false)) return;
118 
119  // get the information for where the covariance matrix is stored
120  // assume that the matrices for individual systematics have already
121  // been summed by this point
122  auto covarianceHistName = pset.get<std::string>("CovarianceMatrixHistName", "totalCovariance");
123 
124  // xrootd path to file in /pnfs/nova/scratch/
125  auto covarianceHistFileName = pset.get<std::string>("CovarianceMatrixHistFile");
126 
127  // get the covariance matrix from the input file - it is assumed to already be summed
128  // for the individual systematic uncertainties.
129 
130  // Load the covariance matrix from the input histogram file.
131  // Assume that the matrix has been properly normalized for the total
132  // number of shifts tested before we get it
133  TFile *covHistFile = TFile::Open(covarianceHistFileName.c_str());
134  TH2D *inputCovarianceSyst = dynamic_cast<TH2D*>(covHistFile->Get(covarianceHistName.c_str()));
135 
136  int numBins = inputCovarianceSyst->GetNbinsX();
137 
138  // The CovarianceBinUtility::TotalBins() returns the number for the full set of selections if
139  // the passed argument is true, the default is false but in this case we want to be sure the
140  // input covariance bin utility has everything we need
141  bool useFullMatrix = pset.get<bool>("UseFullMatrix");
142  if(numBins != (int)cmf::CovarianceBinUtility::Instance()->TotalBins(useFullMatrix))
143  throw cet::exception("ChiSqrCalculator")
144  << "input matrix has different number of logical bins than CovarianceBinUtility "
145  << numBins
146  << " vs "
148  << ". If you know that you want this, then you can set UseFullMatrix: false";
149 
150  LOG_DEBUG("ChiSqrCalculator")
151  << "numBins is "
152  << numBins
153  << " / "
155 
156  // in the following for loops we want to check that the bin to key values correspond
157  // to a selection we want. The returned value of SelectionsToUse is a set of
158  // DetBeamSels, a struct that holds a detector id, a beam type and a list of
159  // SelectionTypes
160  int iBin;
161  int jBin;
162 
163  std::set<long> keys;
164 
165  // get the keys for the selections we want
166  for(auto const& dbsItr : cmf::SelectionUtility::Instance()->SelectionsToUse()){
167  for(auto const& key: dbsItr.Keys()) keys.insert(key);
168  }
169 
170  for(int i = 0; i < numBins; ++i){
171 
172  iBin = this->FindReducedMatrixBin(keys, i);
173 
174  if(iBin == std::numeric_limits<int>::lowest()) continue;
175  LOG_DEBUG("ChiSqrCalculator")
176  << "i-th bin is "
177  << iBin
178  << " nominal i: "
179  << i;
180 
181  for(int j = 0; j < numBins; ++j){
182  jBin = this->FindReducedMatrixBin(keys, j);
183  if(jBin == std::numeric_limits<int>::lowest()) continue;
184 
185  LOG_DEBUG("ChiSqrCalculator")
186  << "j-th bin is "
187  << jBin
188  << " nominal j: "
189  << j;
190 
191  fInputEigenMatrixSyst(iBin, jBin) = inputCovarianceSyst->GetBinContent(i + 1, j + 1);
192  } // end loop over i bins
193  } // end loop over j bins
194 
195  covHistFile->Close();
196  }
197 
198  //----------------------------------------------------------------------------
200  bool isData)
201  {
202  // std::vector<float> tmpVec(spectrum.size(), 0.);
203  // for(size_t b = 0; b < spectrum.size(); ++b){
204  // tmpVec[b] = spectrum[b];
205 
206  // LOG_DEBUG("ChiSqrCalculator")
207  // << "spectrum bin "
208  // << b
209  // << " "
210  // << tmpVec[b]
211  // << " / "
212  // << spectrum[b];
213  // }
214 
215  LOG_DEBUG("ChiSqrCalculator")
216  << "SetSpectrum, there are: "
217  << spectrum.size()
218  << " bins, isData: "
219  << isData;
220 
221  if(isData) fDataSpectrum = spectrum;
222  else fMCSpectrum = spectrum;
223  }
224 
225  //......................................................................
226  double ChiSqrCalculator::ChiSqr(cmf::ChiSqrCalc_t const& chiSqrType,
228  {
229  std::vector<std::string> parNames;
230  std::vector<double> parVals;
231  for(auto const& itr : pars){
232  parNames.emplace_back(cmf::cOscParams_Strings[itr.first]);
233  parVals .emplace_back(itr.second);
234  }
235  return this->ChiSqr(chiSqrType, parNames, parVals.data());
236  }
237 
238  //......................................................................
239  double ChiSqrCalculator::ChiSqr(cmf::ChiSqrCalc_t const& chiSqrType,
240  std::vector<std::string> const& pars,
241  const double* vals)
242  {
243  double chiSqrNP = this->ChiSqrNuisance(pars, vals);
244 
245  double chiSqr = 0;
246  if (chiSqrType == cmf::kCovMat ) chiSqr = this->ChiSqrCovMat ();
247  else if(chiSqrType == cmf::kPoisson) chiSqr = this->ChiSqrPoisson();
248  else if(chiSqrType == cmf::kCNP ) chiSqr = this->ChiSqrCNP ();
249  else if(chiSqrType == cmf::kGauss ) chiSqr = this->ChiSqrGauss ();
250 
251  LOG_DEBUG("CMFChiSqrCalculator")
252  << "chi^2 = "
253  << chiSqr
254  << " nuisance chi^2 = "
255  << chiSqrNP
256  << " total = "
257  << chiSqr + chiSqrNP;
258 
259  return chiSqr + chiSqrNP;
260  }
261 
262  //......................................................................
264  {
265  // make a matrix for the calculation
266  Eigen::MatrixXd covMatEigen = Eigen::MatrixXd::Zero(fDataSpectrum.size(),
267  fDataSpectrum.size());
268 
269  // update the covariance matrix based on the latest event counts for the current point
271 
272  for(size_t i = 0; i < fDataSpectrum.size(); ++i){
273 
275 
276  LOG_DEBUG("ChiSqrCalculator")
277  << i
278  << " data "
279  << fDataSpectrum[i]
280  << " mc "
281  << fMCSpectrum[i]
282  << " diffs: "
283  << fDiffVec(i)
284  << " "
285  << covMatEigen(i, i);
286  }
287 
288  // LOG_DEBUG("CMFChiSqrCalculatorEigen")
289  // << "difference vector \n"
290  // << fDiffVec
291  // << " \n\n transpose "
292  // << fDiffVec.transpose()
293  // << " \n\n intermediate matrix \n\n"
294  // << covMatEigen.inverse()
295  // << " \n\n eigen chiSqr matrix \n\n"
296  // << (fDiffVec.transpose() * fCovarianceMatrixEigen.inverse() * fDiffVec).sum();
297 
298  return (fDiffVec.transpose() * covMatEigen.inverse() * fDiffVec).sum();
299  }
300 
301  //......................................................................
302  // use the Pearson construction, if the expected number of events is 0,
303  // then skip that bin
305  {
306  double chiSqr = 0.;
307 
308  for(size_t i = 0; i < fDataSpectrum.size(); ++i){
309  if(fMCSpectrum[i] > 0.)
310  chiSqr += this->BinGaussChiSqr(fDataSpectrum[i], fMCSpectrum[i]);
311 
312  LOG_DEBUG("ChiSqrCalculator")
313  << "Gaussian chi^2 "
314  << i
315  << " data "
316  << fDataSpectrum[i]
317  << " MC "
318  << fMCSpectrum[i]
319  << " chi^2 "
320  << chiSqr;
321  }
322 
323  return chiSqr;
324  }
325 
326  //......................................................................
327  // Calculate the contribution to the chi^2 due to nuisance parameters
328  double ChiSqrCalculator::ChiSqrNuisance(std::vector<std::string> const& pars,
329  const double* vals)
330  {
331  // we want to get any chi^2 contribution from the nuisance parameters now
332  double chiSqrNP = 0.;
333  double curNPVal = 0.;
334 
335 // LOG_VERBATIM("ChiSqrCalculator")
336 // << "there are "
337 // << cmf::NuisanceParameters::Instance()->NuisanceParameterInfo().size()
338 // << " nuisance parameters";
339 // for(auto const& itr : npMap)
340 // LOG_VERBATIM("ChiSqrCalculator")
341 // << "input npMap "
342 // << itr.first.first
343 // << " "
344 // << cmf::cDetType_Strings[itr.first.second]
345 // << " "
346 // << itr.second;
347 
348  for(size_t p = 0; p < pars.size(); ++p){
349  if(!cmf::ParameterUtility::Instance()->ParameterInfo(pars[p]).IsNuisance()) continue;
350 
351  curNPVal = vals[p];
352 
353  auto const& np = cmf::ParameterUtility::Instance()->ParameterInfo(pars[p]);
354 
355  // if we are looking at a systematic parameter, so not an oscillation
356  // parameter, multiply the current value by the systematic scale factor
357  // for stats only fits this will zero curNPVal of systematic parameters
358  // TODO: this may not be the right thing to do to scale the uncertainties except for the stats only fit
359  if(!np.IsOscPar()) curNPVal *= fSystScale;
360 
361  chiSqrNP += std::pow((curNPVal - np.CentralValue()) / np.Sigma(), 2.);
362 
363  LOG_DEBUG("ChiSqrCalculator")
364  << pars[p]
365  << " "
366  << np.CentralValue()
367  << " "
368  << curNPVal
369  << " "
370  << np.Sigma();
371 
372  }
373 
374  return chiSqrNP;
375  }
376 
377  //......................................................................
378  double ChiSqrCalculator::BinCNPChiSqr(double const& data,
379  double const& MC)
380  {
381  // strictly speaking, it is possible for both the amount of data
382  // and MC in a bin to be 0. In that case, the first part of this
383  // if statement would add 0 to the chi^2, so it is a no-op and
384  // I am leaving out the check on the amount of MC in the bin to
385  // reduce an extra conditional check
386  double chi2 = 0.;
387  if(data == 0.)
388  chi2 = 2. * MC;
389  else if(MC * data > 0.){
390  chi2 = std::pow(MC - data, 2.) / (3. / (1. / data + 2. / MC));
391  }
392 
393  return chi2;
394  }
395 
396  //......................................................................
398  double const& MC)
399  {
400  // below I multiplied out (d - MC)^2 / MC, ie the Pearson
401  // construction value to speed up the calculation
402  double chiSqr = data * data / MC;
403  chiSqr += MC - 2. * data;
404 
405  return chiSqr;
406  }
407 
408  //......................................................................
410  double const& MC)
411  {
412  double chi2 = MC - data;
413  if(data * MC > 0.) chi2 += data * std::log(data / MC);
414 
415  return 2. * chi2;
416  }
417 
418  //......................................................................
419  // use the log-likelihood function for poisson distributed data to get a
420  // goodness of fit
422  {
423  double chi2 = 0.;
424 
425  // check that the data and MC are both non-zero before trying to add the 3rd term
426  // with the natural log
427  for(size_t i = 0; i < fDataSpectrum.size(); ++i){
428  chi2 += this->BinPoissonChiSqr(fDataSpectrum[i],
429  fMCSpectrum[i]);
430 
431  LOG_DEBUG("ChiSqrCalculator")
432  << "Poisson bin "
433  << i
434  << " data: "
435  << fDataSpectrum[i]
436  << " MC: "
437  << fMCSpectrum[i]
438  << " total chi^2: "
439  << chi2;
440  }
441 
442  return chi2;
443  }
444 
445  //......................................................................
446  // use the CNP method proposed in https://arxiv.org/pdf/1903.07185.pdf
447  // to calculate the chi^2
449  {
450  double chi2;
451  double sumChi2 = 0.;
452 
453  for(size_t i = 0; i < fDataSpectrum.size(); ++i){
454 
455  chi2 = this->BinCNPChiSqr(fDataSpectrum[i], fMCSpectrum[i]);
456 
457  sumChi2 += chi2;
458 
459  LOG_DEBUG("ChiSqrCalculator")
460  << "CNP bin "
461  << i
462  << " data: "
463  << fDataSpectrum[i]
464  << " MC: "
465  << fMCSpectrum[i]
466  << " chi^2: "
467  << chi2
468  << " total chi^2: "
469  << sumChi2
470  << " poisson chi^2: "
471  << this->BinPoissonChiSqr(fDataSpectrum[i], fMCSpectrum[i]);
472 
473  }
474 
475  return sumChi2;
476  }
477 
478  //......................................................................
479  // This method should be called after FillMCSpectrum
481  bool isSystOnly)
482  {
483  // as in the CNPChiSqr function, use https://arxiv.org/pdf/1903.07185.pdf
484  // to determine the values for the statistical uncertainty matrix
485  // if we are not using some bins the count value will be 0, so set it to
486  // something just very small in that case
487 
488  // the data and MC spectra are the same size, just loop over the size of
489  // one of them
490  double stats;
491  for(size_t b = 0; b < fMCSpectrum.size(); ++b){
492  for(size_t j = 0; j < fMCSpectrum.size(); ++j){
493 
494  stats = 0.;
495  if(b == j && !isSystOnly){
496  // Default the value of stats to 1 on the diagonal to account for the possibility that
497  // we have neither data, nor MC in a bin. Since we do not remove those bins from the
498  // covariance matrix, we need to set the diagonal term to 1.
499  // That is true when we have removed a selection from the analysis, ie not including NC
500  // events in the 3 flavor analysis. There should never be a case where we have
501  // data events in a bin but no MC
502  if(fDataSpectrum[b] == 0. && fMCSpectrum[b] > 0.)
503  stats = fMCSpectrum[b] * 0.5;
504  else if(fMCSpectrum[b] * fDataSpectrum[b] > 0.)
505  stats = 3. / (1. / fDataSpectrum[b] + 2. / fMCSpectrum[b]);
506  else
507  stats = 1.;
508 
509  LOG_DEBUG("ChiSqrCalculator")
510  << b
511  << " "
512  << fMCSpectrum[b]
513  << " "
514  << fDataSpectrum[b]
515  << " "
516  << stats
517  << " "
519  << " "
520  << fSystScale;
521 
522  }
523 
525 
526  } // end loop over j bins
527 
528  } // end loop over b bins
529 
530  LOG_DEBUG("ChiSqrCalculator")
531  << matrix;
532 
533  } // end FillCovarianceMatrix
534 
535  //......................................................................
536  // Call these methods after all fitting is done, and only for the best fit
537  // otherwise you will get ROOT errors about histograms having the same name.
539  cmf::Spectrum const& dataSpectrum,
540  cmf::Spectrum const& mcSpectrum,
541  TH1D* binByBin,
542  TH1D* cumulative)
543  {
544  this->SetSpectrum(dataSpectrum, true );
545  this->SetSpectrum(mcSpectrum, false);
546 
547  // for the cmf::kCovMat chi^2 calc type, use the CNP bin by bin value
548  // because those give the same total chi^2
549 
550  // Fill histograms showing the chi^2 value for each bin and the cumulative chi^2
551 
552  double chiSqr;
553  double chiSqrSum = 0.;
554  for(size_t b = 0; b < fDataSpectrum.size(); ++b){
555 
556  if (chiSqrCalc == cmf::kCNP ||
557  chiSqrCalc == cmf::kCovMat) chiSqr = this->BinCNPChiSqr (fDataSpectrum[b], fMCSpectrum[b]);
558  else if(chiSqrCalc == cmf::kPoisson) chiSqr = this->BinPoissonChiSqr(fDataSpectrum[b], fMCSpectrum[b]);
559  else if(chiSqrCalc == cmf::kGauss) chiSqr = this->BinGaussChiSqr (fDataSpectrum[b], fMCSpectrum[b]);
560 
561  chiSqrSum += chiSqr;
562 
563  binByBin ->Fill(b, chiSqr);
564  cumulative->Fill(b, chiSqrSum);
565  }
566  }
567 
568 } // end namespace
Eigen::VectorXd fDiffVec
column-wise vector of the difference between data and MC
#define LOG_DEBUG(stream)
Definition: Messenger.h:149
double BinCNPChiSqr(double const &data, double const &MC)
void InitializeCovarianceMatrix(fhicl::ParameterSet const &pset)
enum cmf::covmat_type CovMat_t
static long SelectionTypeKey(cmf::SelectionType_t const &st)
Definition: StaticFuncs.h:19
double ChiSqrNuisance(std::vector< std::string > const &pars, const double *vals)
const char * p
Definition: xmltok.h:285
static const double kGarbageDouble
Definition: Constants.h:18
constexpr T pow(T x)
Definition: pow.h:75
cmf::Parameter const & ParameterInfo(std::string const &parName) const
double BinGaussChiSqr(double const &data, double const &MC)
static SelectionUtility * Instance()
std::vector< double > Spectrum
Definition: Constants.h:743
void FillChiSqrHistograms(cmf::ChiSqrCalc_t const &chiSqrCalc, cmf::Spectrum const &dataSpectrum, cmf::Spectrum const &mcSpectrum, TH1D *binByBin, TH1D *cumulative)
Definition: StanUtils.h:6
int stats(TString inputFilePath, Int_t firstRun, Int_t lastRun, Float_t thresh, TString myDet)
Definition: stats.C:13
::xsd::cxx::tree::exception< char > exception
Definition: Database.h:225
long BinToKey(int const &bin, bool allSels=false)
static cmf::BeamType_t KeyToBeamType(long const &key)
Definition: StaticFuncs.h:135
cmf::Spectrum fDataSpectrum
data spectrum in bins
double chi2()
static bool IsNuESelected(cmf::SelectionType_t const &sel)
Definition: StaticFuncs.h:380
cmf::Spectrum fMCSpectrum
MC spectrum in bins.
const XML_Char const XML_Char * data
Definition: expat.h:268
static bool IsNuMuSelected(cmf::SelectionType_t const &sel)
Definition: StaticFuncs.h:366
static ParameterUtility * Instance()
std::map< cmf::OscParm_t, float > OscillationParameterMap
Definition: Constants.h:745
static cmf::DetType_t KeyToDetectorType(long const &key)
Definition: StaticFuncs.h:69
bool const & IsNuisance() const
Definition: Parameter.h:143
T get(std::string const &key) const
Definition: ParameterSet.h:231
int FindReducedMatrixBin(std::set< long > const &keysToUse, int const &fullMatrixBin)
bool UsesSelection(cmf::SelectionType_t const &s) const
const double j
Definition: BetheBloch.cxx:29
double fSystScale
scaling factor to apply to systematic matrix entries for tests
static cmf::ChiSqrCalculator * Instance()
double ChiSqr(cmf::ChiSqrCalc_t const &chiSqrCalc, std::vector< std::string > const &pars, const double *vals)
size_t TotalBins(bool allSels=false)
Module to combine a set of results into a single file currently only does one data product type at a ...
Definition: Event.cxx:24
std::string to_indented_string() const
std::string pars("Th23Dmsq32")
void Zero()
bool UsesDetAndBeam(cmf::MetaData const &md) const
const hit & b
Definition: hits.cxx:21
static cmf::SelectionType_t KeyToSelectionType(long const &key)
Definition: StaticFuncs.h:169
double BinPoissonChiSqr(double const &data, double const &MC)
static ChiSqrCalculator * gChiSqrCalculator
#define LOG_VERBATIM(category)
Eigen::MatrixXd fInputEigenMatrixSyst
input matrix for systematics
Double_t sum
Definition: plot.C:31
void FillCovarianceMatrix(Eigen::MatrixXd &matrix, bool isSystOnly=false)
double CovarianceMatrixEntry(size_t matrixPos, cmf::CovMat_t const &matType)
static std::string KeyToString(long const &key)
Definition: StaticFuncs.h:227
enum cmf::chisqr_type ChiSqrCalc_t
void SetSpectrum(cmf::Spectrum const &spectrum, bool isData)
static CovarianceBinUtility * Instance()
const std::vector< std::string > cOscParams_Strings({"L","Rho","Dmsq21","Dmsq32","Th12","Th13","Th23","dCP","Th14","Th24","Th34","Dmsq41","d24","Eps_ee","Eps_emu","Eps_etau","Eps_mumu","Eps_mutau","Eps_tautau","Delta_emu","Delta_etau","Delta_mutau"})
enum BeamMode string