genie_syst_pca.C
Go to the documentation of this file.
1 #include "CAFAna/Core/Spectrum.h"
4 
7 
8 #include "TCanvas.h"
9 #include "TH1.h"
10 #include "TH1D.h"
11 #include "TH2.h"
12 #include "TFile.h"
13 #include "TLegend.h"
14 #include "TMatrixD.h"
15 #include "TMatrixDSymEigen.h"
16 #include "TMatrixDSym.h"
17 #include "TColor.h"
18 #include "TStyle.h"
19 #include "TLatex.h"
20 #include "TGaxis.h"
21 
22 #include <fstream>
23 #include <iostream>
24 #include <cmath>
25 #include <algorithm>
26 
27 using namespace ana;
28 
29 // store flavor spectra and their universes
30 std::vector<TH1D*> nom_hist;
31 std::vector<TH1D*> shifts_plus;
32 std::vector<TH1D*> shifts_minus;
33 std::vector<std::vector<TH1D*>> univ_hist;
34 
35 // store shifts and the multiverse projections along each pc
36 std::vector<TH1D*> pc_shifts_plus;
37 std::vector<TH1D*> pc_shifts_minus;
38 std::vector<std::vector<TH1D*>> pc_projections;
39 
40 void FillHists(std::string fileName, std::vector<TString> samples, const int nuniverses)
41 {
42  for(unsigned int i = 0;i < samples.size();++i){
43  // This doesn't actually matter since we'll be normalizing
44  double pot = samples[i].Contains("FHC") ? kAna2019FHCPOT : kAna2019RHCPOT;
45 
46  // Load the spectra
47  std::vector<TH1D*> hUni;
48  for(int j = 0;j < nuniverses;++j)
49  hUni.push_back(LoadFromFile<Spectrum>(fileName,("genieverse_"+(std::string)samples[i]+"/Univ"+std::to_string(j)).c_str()).release()->ToTH1(pot));
50 
51  nom_hist.push_back(hUni[0]);
52 
53  shifts_plus.push_back(ErrorBand(hUni[0],hUni,1));
54  shifts_minus.push_back(ErrorBand(hUni[0],hUni,-1));
55 
56  std::vector<TH1D*> temp_hist;
57  for(unsigned int j = 1;j < hUni.size();++j)
58  temp_hist.push_back(hUni[j]);
59  univ_hist.push_back(temp_hist);
60  }
61 
62 }
63 
64 void goFNBasis(){
65  unsigned int length = nom_hist.size();
66 
67  // Divide F/N for each component
68  for(unsigned int i = length/2;i < length;++i){
69  // Numu doesn't extrapolate NCs
70  if(i == length/2+4 || i == length/2+5) continue;
71  // Nue signal in the ND is numus
72  if(i == length/2+6 || i == length/2+7 || i == length/2+8 || i == length/2+9){
73  nom_hist[i]->Divide(nom_hist[i-length/2-6]);
74  shifts_plus[i]->Divide(shifts_plus[i-length/2-6]);
75  shifts_minus[i]->Divide(shifts_minus[i-length/2-6]);
76  continue;
77  }
78  // Others are standard ratios
79  nom_hist[i]->Divide(nom_hist[i-length/2]);
80  shifts_plus[i]->Divide(shifts_plus[i-length/2]);
81  shifts_minus[i]->Divide(shifts_minus[i-length/2]);
82  }
83 
84  // Same in each universe
85  for(unsigned int j = 0;j < univ_hist[0].size();++j){
86  for(unsigned int i = length/2;i < length;++i){
87  if(i == length/2+4 || i == length/2+5) continue;
88  if(i == length/2+6 || i == length/2+7 || i == length/2+8 || i == length/2+9){
89  univ_hist[i][j]->Divide(univ_hist[i-length/2-6][j]);
90  continue;
91  }
92  univ_hist[i][j]->Divide(univ_hist[i-length/2][j]);
93  }
94  }
95 
96 }
97 
98 void SuppressND(double sup){
99  unsigned int length = nom_hist.size();
100 
101  for(unsigned int i = 0;i < length/2;++i){
102  nom_hist[i]->Scale(1/sup);
103  shifts_plus[i]->Scale(1/sup);
104  shifts_minus[i]->Scale(1/sup);
105 
106  for(unsigned int j = 0;j < univ_hist[0].size();++j)
107  univ_hist[i][j]->Scale(1/sup);
108  }
109 
110 }
111 
113  for(unsigned int i = 0;i < nom_hist.size();++i){
114  double nom_area = nom_hist[i]->Integral();
115  if(nom_area == 0) continue;
116 
117  nom_hist[i]->Scale(1/nom_area);
118  shifts_plus[i]->Scale(1/nom_area);
119  shifts_minus[i]->Scale(1/nom_area);
120 
121  for(unsigned int j = 0;j < univ_hist[i].size();++j)
122  univ_hist[i][j]->Scale(1/nom_area);
123  }
124 }
125 
126 void FillPCAContainers(TMatrixD eigenvectors, TVectorD eigenvalues,
127  TH1D* joint_hist_nom, std::vector<TH1D*> joint_hist_systs)
128 {
129  for(int component = 0; component < eigenvalues.GetNrows(); component++){
130 
131  TVectorD x = eigenvectors[component];
132 
133  x *= sqrt(eigenvalues[component]);
134 
135  TH1D* retplus = new TH1D(x);
136  std::string nameplus = "pcplus"+std::to_string(component);
137  retplus->SetName(nameplus.c_str());
138  retplus->Add(joint_hist_nom);
139  pc_shifts_plus.push_back(retplus);
140 
141  x *= -1.;
142 
143  TH1D* retminus = new TH1D(x);
144  std::string nameminus = "pcminus"+std::to_string(component);
145  retminus->SetName(nameminus.c_str());
146  retminus->Add(joint_hist_nom);
147  pc_shifts_minus.push_back(retminus);
148 
149  std::vector<TH1D*> h_proj_univs;
150  // These take /forever/ to calculate with a lot of universes
151  for(unsigned int i = 0; i < univ_hist[0].size(); i+=10){
152 
153  double projection = 0;
154 
155  for(int binIdx = 1; binIdx <= joint_hist_systs[i]->GetNbinsX(); binIdx++)
156  projection += -1.*x[binIdx-1]*(joint_hist_systs[i]->GetBinContent(binIdx)-joint_hist_nom->GetBinContent(binIdx));
157 
158  projection *= 1./(x.Norm2Sqr());
159 
160  TVectorD y = x;
161  y *= projection;
162 
163  TH1D* ret = new TH1D(y);
164  std::string name = "proj"+std::to_string(component)+"univ"+std::to_string(i);
165  ret->SetName(name.c_str());
166  ret->Add(joint_hist_nom);
167  h_proj_univs.push_back(ret);
168 
169  }
170  pc_projections.push_back(h_proj_univs);
171 
172  }
173 
174 }
175 
176 void SavePCAShifts(int ncomponents,
177  TH1D* joint_hist_nom, std::vector<TH1D*> joint_hist_systs)
178 {
179  TFile* saveHists = new TFile("pc_shifts.root", "RECREATE");
180  TDirectory *nomdir = saveHists->mkdir("nom");
181  nomdir->cd();
182  TH1D* nom_save_hist = (TH1D*)joint_hist_nom->Clone("genie_nom");
183  nom_save_hist->Write();
184  saveHists->cd();
185  int UnivIdx=0;
186  TDirectory *univdir = saveHists->mkdir("univs");
187  univdir->cd();
188  for(auto joint_hist_univ: joint_hist_systs) {
189  std::string name_univ = "genie_univ"+std::to_string(UnivIdx);
190  TH1D* joint_hist_syst_ratio = (TH1D*)joint_hist_univ->Clone(name_univ.c_str());
191  joint_hist_syst_ratio->Write();
192  UnivIdx++;
193  }
194  saveHists->cd();
195  TDirectory *pcdir = saveHists->mkdir("shifts");
196  pcdir->cd();
197  int pcIdx = 0;
198  for(auto pc_shift:pc_shifts_plus) {
199  TH1D* genieplus = (TH1D*)pc_shift->Clone("");
200  std::string nameplus_save = "genie"+std::to_string(pcIdx);
201  genieplus->SetName(nameplus_save.c_str());
202  genieplus->Write();
203  if(pcIdx >= ncomponents) break;
204  pcIdx++;
205  }
206  saveHists->cd();
207  saveHists->Close();
208 }
209 
210 void genie_syst_pca(const int nuniverses = 1000)
211 {
212 
213  std::vector<TString> samples = {
214  "FHC_NumuND_numus_QE",
215  "RHC_NumuND_numus_QE",
216  "FHC_NumuND_numus_nonQE",
217  "RHC_NumuND_numus_nonQE",
218  "FHC_NumuND_ncs_All",
219  "RHC_NumuND_ncs_All",
220 
221  "FHC_NueND_nues_QE",
222  "RHC_NueND_nues_QE",
223  "FHC_NueND_nues_nonQE",
224  "RHC_NueND_nues_nonQE",
225  "FHC_NueND_numus_All",
226  "RHC_NueND_numus_All",
227  "FHC_NueND_ncs_All",
228  "RHC_NueND_ncs_All",
229 
230  "FHC_NumuFD_numus_QE",
231  "RHC_NumuFD_numus_QE",
232  "FHC_NumuFD_numus_nonQE",
233  "RHC_NumuFD_numus_nonQE",
234  "FHC_NumuFD_ncs_All",
235  "RHC_NumuFD_ncs_All",
236 
237  "FHC_NueFD_nues_QE",
238  "RHC_NueFD_nues_QE",
239  "FHC_NueFD_nues_nonQE",
240  "RHC_NueFD_nues_nonQE",
241  "FHC_NueFD_numus_All",
242  "RHC_NueFD_numus_All",
243  "FHC_NueFD_ncs_All",
244  "RHC_NueFD_ncs_All",
245  };
246 
247  // input root file
248  std::string fileName = "make_genieverse.root";
249 
250  // load the spectra
251  FillHists(fileName, samples, nuniverses);
252  // Switch to F/N for the relevant spectra
253  goFNBasis();
254  // Normalize importance
255  NormalizeHists();
256  // Suppress ND to improve convergence
258 
259  // flatten spectra
260  TH1D* joint_hist_nom = CombineHistograms(nom_hist);
261  TH1D* joint_hist_plus = CombineHistograms(shifts_plus);
262  TH1D* joint_hist_minus = CombineHistograms(shifts_minus);
263  std::vector <TH1D*> joint_hist_systs;
264  for(unsigned int i = 0;i < univ_hist[0].size();++i){
265  std::vector<TH1D*> temp_hist;
266  for(unsigned int j = 0;j < univ_hist.size();++j) temp_hist.push_back(univ_hist[j][i]);
267  TH1D* joint_hist_univ = CombineHistograms(temp_hist);
268  joint_hist_systs.push_back(joint_hist_univ);
269  }
270 
271  // get number of bins
272  int nbins = nom_hist[0]->GetNbinsX();
273  int joint_nbins = joint_hist_nom->GetNbinsX();
274 
275  // calculate bin-bin covariance
276  const std::vector<TH1D*> genie(joint_hist_systs.begin(), joint_hist_systs.end());
277  std::unique_ptr<TMatrixDSym> covMx = GetCovMx(genie);
278 
279  // diagonalize
280  TMatrixDSymEigen cov(*covMx.get());
281  TMatrixD V = cov.GetEigenVectors();
282  TMatrixD eigenvectors = V;
283  eigenvectors.Transpose(V);
284  TVectorD eigenvalues = cov.GetEigenValues();
285 
286  // Verify that the diagonalisation works as expected
287  CrossCheckDiag(*covMx.get(), V, eigenvectors, eigenvalues);
288 
289  // Get PCA Shifts and Projections
290  FillPCAContainers(eigenvectors, eigenvalues, joint_hist_nom, joint_hist_systs);
291 
292  // Save Shifts
293  int ncomponents = 100;
294  SavePCAShifts(ncomponents, joint_hist_nom, joint_hist_systs);
295 
296  // plotting!
297  // plot the distribution of universes
298  joint_hist_nom->SetMaximum(1.4*joint_hist_nom->GetMaximum());
299  joint_hist_nom->GetXaxis()->CenterTitle();
300  joint_hist_plus->SetLineColor(kAzure);
301  joint_hist_minus->SetLineColor(kRed);
302  std::vector<TH1D*> joints;
303  for(unsigned int i = 0;i < joint_hist_systs.size();++i){
304  joint_hist_systs[i]->SetLineColor(kGray);
305  joints.push_back(joint_hist_systs[i]);
306  }
307  joints.push_back(joint_hist_minus);
308  joints.push_back(joint_hist_plus);
309 
310  TCanvas *c1 = QuickUnivRatioPlot("c1",joint_hist_nom,joints);
311 
312  TLegend *l = new TLegend(0.64,0.69,0.92,0.84);
313  l->SetFillStyle(0);
314  l->AddEntry(joint_hist_nom,"Nominal MC","l");
315  l->AddEntry(joint_hist_plus,"+1#sigma Universe","l");
316  l->AddEntry(joint_hist_minus,"-1#sigma Universe","l");
317  l->AddEntry(joint_hist_systs[0],"Genie Universe","l");
318  l->Draw();
319 
321 
322  c1->cd(1);
324  c1->cd(2);
326 
327  c1->Print("plots/genie_all.pdf");
328 
329  // plot the covariance matrix
330  TH2D *histCov = new TH2D("histCov", "Bin-Bin Covariance", joint_nbins, 0, joint_nbins, joint_nbins, 0, joint_nbins);
331  for(int xbinIdx = 1; xbinIdx <= joint_nbins; xbinIdx++){
332  for(int ybinIdx = 1; ybinIdx <= joint_nbins; ybinIdx++){
333  histCov->Fill(xbinIdx, ybinIdx, (*covMx)[xbinIdx-1][ybinIdx-1]);
334  }
335  }
336 
337  gStyle->SetPalette(kCool);
338 
339  TCanvas *c2 = new TCanvas("c2", "c2");
340  histCov->GetXaxis()->CenterTitle();
341  histCov->GetYaxis()->CenterTitle();
342  histCov->Draw("COLZ");
343  c2->SetLogz();
344  c2->SetRightMargin(0.125);
345  c2->Update();
346 
347  //drawLabels2D(axis_labels);
348 
349  c2->Print("plots/genie_covariance.pdf");
350 
351  for(int i = 0;i < eigenvalues.GetNrows();++i)
352  if(eigenvalues[i] < 10E-15){
353  std::cout<<i-2<<" "<<eigenvalues[i-2]<<std::endl;
354  std::cout<<i-1<<" "<<eigenvalues[i-1]<<std::endl;
355  std::cout<<i <<" "<<eigenvalues[i] <<std::endl;
356  std::cout<<i+1<<" "<<eigenvalues[i+1]<<std::endl;
357  std::cout<<i+2<<" "<<eigenvalues[i+2]<<std::endl;
358  break;
359  }
360 
361  // plot the eigenvalue profile
362  TCanvas *c3 = new TCanvas("c3", "c3");
363  TH1D* histEigen = new TH1D(eigenvalues);
364  histEigen->SetName("pi");
365  histEigen->GetXaxis()->SetTitle("Eigennumber");
366  histEigen->GetYaxis()->SetTitle("Eigenvalue");
367  histEigen->SetMarkerStyle(kFullCircle);
368  histEigen->GetXaxis()->SetRangeUser(0,238);
369  histEigen->GetXaxis()->CenterTitle();
370  histEigen->GetYaxis()->CenterTitle();
371  histEigen->Draw("p");
372  c3->SetLogy();
373 
374  c3->Print("plots/genie_eigen.pdf");
375 
376  // plot the total explained variancee
377  TCanvas *c4 = new TCanvas("c4","c4");
378  TVectorD eigenInt(eigenvalues.GetNrows());
379  double sum=0;
380  for(int i = 0;i < eigenvalues.GetNrows();++i){
381  sum+=eigenvalues[i];
382  eigenInt[i] = sum/eigenvalues.Sum();
383  }
384  TH1D* histEigenInt = new TH1D(eigenInt);
385  histEigenInt->GetXaxis()->SetTitle("N_PC");
386  histEigenInt->GetYaxis()->SetTitle("Total Explained Variance");
387  histEigenInt->SetMarkerStyle(kFullCircle);
388  histEigenInt->GetXaxis()->SetRangeUser(0,238);
389  histEigenInt->GetXaxis()->CenterTitle();
390  histEigenInt->GetYaxis()->CenterTitle();
391  histEigenInt->Draw("p");
392 
393  c4->Update();
394  drawCoverLines(histEigenInt);
395 
396  c4->Print("plots/genie_coverage.pdf");
397 
398  // plot the PC and the universe projections
399  //for(int i = 0;i < eigenvalues.GetNrows();++i){
400  // only like 20 tho... No one will ever look at more than that right?
401  for(int i = 0;i < 20;++i){
402  for(unsigned int j = 0;j < pc_projections[i].size();++j) pc_projections[i][j]->SetLineColor(kGray);
403  pc_shifts_plus[i]->SetLineColor(kAzure);
404  pc_shifts_minus[i]->SetLineColor(kRed);
405  std::vector<TH1D*> hComb(pc_projections[i].begin(),pc_projections[i].end());
406  hComb.push_back(pc_shifts_minus[i]);
407  hComb.push_back(pc_shifts_plus[i]);
408  TCanvas *cproj = QuickUnivRatioPlot("cproj_"+std::to_string(i),joint_hist_nom,hComb);
409 
411 
412  TLegend *lproj = new TLegend(0.64,0.69,0.92,0.84);
413  lproj->SetFillStyle(0);
414  lproj->AddEntry(joint_hist_nom,"Nominal MC","l");
415  lproj->AddEntry(pc_shifts_plus[i],"+1#sigma Shift","l");
416  lproj->AddEntry(pc_shifts_minus[i],"-1#sigma Shift","l");
417  lproj->AddEntry(pc_projections[i][0],"Universes Projections","l");
418  lproj->Draw();
419 
420  cproj->cd(1);
422  cproj->cd(2);
424 
425  cproj->cd();
426  TLatex *tex = new TLatex(0.9,0.93,("PC"+std::to_string(i)+" Explained Variance: "+std::to_string(eigenvalues[i]/eigenvalues.Sum())).c_str());
427  tex->SetTextSize(1./30);
428  tex->SetTextAlign(32);
429  tex->Draw();
430 
431  if(i<10) cproj->Print(("plots/genie_proj_PC00"+std::to_string(i)+".pdf").c_str());
432  else if(i<100) cproj->Print(("plots/genie_proj_PC0"+std::to_string(i)+".pdf").c_str());
433  else std::cout<<"Uh, what?"<<std::endl;
434 
435  }
436 
437 }
const XML_Char * name
Definition: expat.h:151
TCanvas * QuickUnivRatioPlot(std::string name, TH1D *hNom, std::vector< TH1D * > others)
std::vector< int > axis_widths
std::vector< TH1D * > shifts_minus
fileName
Definition: plotROC.py:78
enum BeamMode kRed
Cuts and Vars for the 2020 FD DiF Study.
Definition: vars.h:6
void genie_syst_pca(const int nuniverses=1000)
THE MAIN GENIE PROJECT NAMESPACE
Definition: GeneratorBase.h:8
std::vector< TH1D * > shifts_plus
void drawBinLines(std::vector< int > width)
T sqrt(T number)
Definition: d0nt_math.hpp:156
TH1D * CombineHistograms(std::vector< TH1D * > hists)
void SavePCAShifts(int ncomponents, TH1D *joint_hist_nom, std::vector< TH1D * > joint_hist_systs)
void goFNBasis()
double suppress
c2
Definition: demo5.py:33
void drawLabels(std::vector< std::string > labels)
const int nbins
Definition: cellShifts.C:15
std::vector< TH1D * > pc_shifts_minus
length
Definition: demo0.py:21
void FillHists(std::string fileName, std::vector< TString > samples, const int nuniverses)
Float_t E
Definition: plot.C:20
hmean SetLineColor(4)
#define pot
std::vector< TH1D * > pc_shifts_plus
void FillPCAContainers(TMatrixD eigenvectors, TVectorD eigenvalues, TH1D *joint_hist_nom, std::vector< TH1D * > joint_hist_systs)
const double j
Definition: BetheBloch.cxx:29
OStream cout
Definition: OStream.cxx:6
const double kAna2019RHCPOT
Definition: Exposures.h:224
std::unique_ptr< TMatrixDSym > GetCovMx(const std::vector< TH1D * > &hists)
std::vector< TH1D * > nom_hist
TGraph * genie
Definition: Xsec_final.C:116
std::vector< std::vector< TH1D * > > pc_projections
TLatex * tex
Definition: f2_nu.C:499
const double kAna2019FHCPOT
Definition: Exposures.h:223
c1
Definition: demo5.py:24
std::vector< std::vector< TH1D * > > univ_hist
void NormalizeHists()
void SuppressND(double sup)
std::string to_string(ModuleType mt)
Definition: ModuleType.h:32
void CrossCheckDiag(TMatrixDSym covMx, TMatrixD V, TMatrixD eigenvectors, TVectorD eigenvalues, double tolerance=1e-14)
Double_t sum
Definition: plot.C:31
const int nuniverses
std::vector< std::string > axis_labels
TH1D * ErrorBand(TH1D *hNom, std::vector< TH1D * > hUnivs, int nsigmas)
simulatedPeak Scale(1/simulationNormalisationFactor)
void drawCoverLines(TH1D *hist)
enum BeamMode string