pi0_spectra.py
Go to the documentation of this file.
1 ###############################################################
2 # Reconstruct the pi0 mass peak using ND MC in h5 files
3 #
4 # @
5 #
6 ###############################################################
7 
8 import os
9 # Includes
10 import sys
11 sys.path.append('../..')
12 from PandAna.core import *
13 # analysis packages
14 import numpy as np
15 import pandas as pd
16 from scipy.optimize import curve_fit
17 from astropy.stats import poisson_conf_interval
18 # For plotting
19 import matplotlib.pyplot as plt
20 
21 ###########################################
22 # Look for vertices with two prongs
23 # Note: We don't need to explicitly check if there is a vertex like in CAFAna.
24 # Note 2: One liners can be done with lambda functions directly in the cut.
25 ###########################################
26 
27 # npng etc now in brackets instead of an attribute so the loader knows what to load
28 kTwoProng = Cut(lambda tables:
29  (tables['rec.vtx.elastic.fuzzyk']['npng'] == 2).groupby(level=KL).agg(np.any))
30 
31 ###########################################
32 # Look for events where all prongs are photon like
33 # multiliners need to be done with def x(tables): blah return blah
34 # then x = Cut(x)
35 ###########################################
36 def kGammaCut(tables):
37  df = tables['rec.vtx.elastic.fuzzyk.png.cvnpart']['photonid']
38  return (df > 0.75).groupby(level=KL).agg(np.all)
39 kGammaCut = Cut(kGammaCut)
40 
41 # Loose containment
42 def kContain(tables):
43  df = tables['rec.vtx.elastic']
44  return (df['vtx.x'] < 180) & \
45  (df['vtx.x'] > -180) & \
46  (df['vtx.y'] < 180) & \
47  (df['vtx.y'] > -180) & \
48  (df['vtx.z'] < 1000) & \
49  (df['vtx.z'] > 50)
50 kContain = Cut(kContain)
51 
52 kPlaneGap = Cut(lambda tables:
53  (tables['rec.vtx.elastic.fuzzyk.png']['maxplanegap'] > 1).\
54  groupby(level=KL).agg(np.all))
55 
56 kPlaneContig = Cut(lambda tables:
57  (tables['rec.vtx.elastic.fuzzyk.png']['maxplanecont'] > 4).\
58  groupby(level=KL).agg(np.all))
59 
60 # Does the event actually have a pi0?
61 kTruePi0 = Cut(lambda tables:
62  tables['rec.sand.nue']['npi0'] > 0)
63 
64 ###########################################
65 # Computes the invariant mass of two prong events
66 ###########################################
67 def kMass(tables):
68  # Note: We could leave this check out, but you would get a warning about taking
69  # the sqrt of negative numbers at the end (it won't crash like cafana).
70  # dataframes can support NaNs just fine.
71  check = tables['rec.vtx.elastic.fuzzyk']['npng'] == 2
72 
73  df = tables['rec.vtx.elastic.fuzzyk.png'][check]
74  x = df['dir.x']
75  y = df['dir.y']
76  z = df['dir.z']
77 
78  # Compute the length of the dir vector and then normalize
79  l = np.sqrt(x*x+y*y+z*z)
80  x = x/l
81  y = y/l
82  z = z/l
83 
84  # compute the dot product
85  dot = x.groupby(level=KL).prod()+y.groupby(level=KL).prod()+z.groupby(level=KL).prod()
86 
87  # multiply the energy of all prongs in each event together
88  EProd = df['calE'].groupby(level=KL).prod()
89 
90  # return a dataframe with a single column of the invariant mass
91  deadscale = 0.8747
92  return 1000*deadscale*np.sqrt(2*EProd*(1-dot))
93  # note NaNs can be removed by (df == df)
94 kMass = Var(kMass)
95 
96 if __name__ == '__main__':
97  import time
98 
99  start = time.time()
100  # Latest hdf5s as of 22-04-2019
101  dMC = '/pnfs/nova/persistent/users/karlwarb/HDF5-Training-19-02-26/ND-GIBUU-FHC'
102  filesMC = [os.path.join(dMC,f) for f in os.listdir(dMC) if 'h5caf.h5' in f]
103  tablesMC = loader(filesMC, limit=100)
104 
105  dData = '/pnfs/nova/persistent/users/karlwarb/HDF5-Training-19-02-26/ND-Data-FHC'
106  filesData = [os.path.join(dData,f) for f in os.listdir(dData) if 'h5caf.h5' in f]
107  tablesData = loader(filesData, limit=100)
108 
109  # Define cuts
110  # The cut class knows how to interpret & and ~
111  cutTot = kTwoProng & kGammaCut & kContain & kPlaneContig & kPlaneGap
112  cutBkg = cutTot & ~kTruePi0
113 
114  # Make Spectra
115  data = spectrum(tablesData, cutTot, kMass)
116  bkg = spectrum(tablesMC, cutBkg, kMass)
117  tot = spectrum(tablesMC, cutTot, kMass)
118 
119  print time.time() - start
120  tablesData.Go()
121  tablesMC.Go()
122  print time.time() - start
123  POT = data.POT()
124 
125  print('Found ' + str(data.POT()) + ' POT. Scaling to ' + str(POT) + ' POT.')
126 
127  print('Selected ' + str(data.entries()) + ' events in data.')
128  print('Selected ' + str(tot.entries()) + ' events in MC.')
129  print('Selected ' + str(bkg.entries()) + ' background.')
130 
131  # Do an analysis!
132  # With Spectra
133  inttot = tot.integral(POT=POT)
134  intbkg = bkg.integral(POT=POT)
135  pur = (inttot - intbkg) / inttot
136  print('This selection has a pi0 purity of ' + str(pur))
137 
138  # With histograms
139  nbins = 8
140  range = (0,400)
141  d, bins = data.histogram(nbins,range, POT=POT)
142  m, _ = tot.histogram(nbins,range, POT=POT)
143  b, _ = bkg.histogram(nbins,range, POT=POT)
144 
145  def gaussian(x, x0, a, stdev, o):
146  return a * np.exp( - ((x - x0) / stdev) ** 2 / 2) + o
147 
148  centers = (bins[:-1] + bins[1:])/2
149 
150  # A bug in scipy.optimize.curvefit requires these to be float64s instead of float32s.
151  dataparam, datacov = curve_fit(gaussian, centers.astype(np.float64), d, p0=[135., np.max(d), 15., 0])
152  mcparam, mccov = curve_fit(gaussian, centers.astype(np.float64), m, p0=[135., np.max(m), 15., 0])
153 
154  dataerr = np.sqrt(np.diag(datacov))
155  mcerr = np.sqrt(np.diag(mccov))
156 
157  datamu = 'Data $\mu$: ' + '%.1f'%dataparam[0] + '$\pm$' + '%.1f'%dataerr[0]
158  datasi = 'Data $\sigma$: ' + '%.1f'%dataparam[2] + '$\pm$' + '%.1f'%dataerr[2]
159  mcmu = 'MC $\mu$: ' + '%.1f'%mcparam[0] + '$\pm$' + '%.1f'%mcerr[0]
160  mcsi = 'MC $\sigma$: ' + '%.1f'%mcparam[2] + '$\pm$' + '%.1f'%mcerr[2]
161 
162  # <codecell>
163  # Plots time
164  plt.figure(1,figsize=(6,4))
165 
166  # A histogram with 1 entry in each bin, Use our data as the weights.
167  plt.hist(bins[:-1], bins, weights=m, histtype='step', color='xkcd:red', label='$\pi^0$ Signal')
168  plt.hist(bins[:-1], bins, weights=b, color='xkcd:dark blue', label='Background')
169 
170  # Compute some errors
171  derr = poisson_conf_interval(d,'frequentist-confidence')
172  plt.errorbar(centers, d, yerr=[d-derr[0],derr[1]-d], fmt='ko', label='ND Data')
173 
174  plt.xlabel('M$_{\gamma\gamma}$')
175  plt.ylabel('Events')
176 
177  # I want the legend for the pi0 signal to be a line instead of an empty box
178  handles, labels = plt.gca().get_legend_handles_labels()
179  handles[0] = plt.Line2D([], [], c=handles[0].get_edgecolor())
180 
181  # I want data listed first in the legend even tho we plotted it last
182  handles[0],handles[1],handles[2] = handles[2],handles[0],handles[1]
183  labels[0],labels[1],labels[2] = labels[2],labels[0],labels[1]
184 
185  plt.legend(loc='upper right', handles=handles, labels=labels)
186 
187  # Print the text for the fit parameters
188  plt.text(0.7, 0.65, datamu, color='k', fontsize=12, horizontalalignment='left', verticalalignment='center', \
189  transform=plt.gca().transAxes)
190  plt.text(0.7, 0.59, datasi, color='k', fontsize=12, horizontalalignment='left', verticalalignment='center', \
191  transform=plt.gca().transAxes)
192  plt.text(0.7, 0.53, mcmu, color='xkcd:red', fontsize=12, horizontalalignment='left', verticalalignment='center', \
193  transform=plt.gca().transAxes)
194  plt.text(0.7, 0.48, mcsi, color='xkcd:red', fontsize=12, horizontalalignment='left', verticalalignment='center', \
195  transform=plt.gca().transAxes)
196 
197  plt.show()
def gaussian(x, x0, a, stdev, o)
Definition: pi0_spectra.py:145
bool print
T prod(const std::vector< T > &v)
Definition: prod.hpp:17