Functions | Variables
PandAna.reco_validation.prod5_pid_validation_data_mc Namespace Reference

Functions

def get_files_nd (dataset_name)
 
def which_prod5pid (pid, dataset_name)
 
def prod5_pid_data_mc (limit, stride, offset, spectra_file, output)
 
def ratio (data, mc, nbins, pot, binrange=(0, 1))
 
def plot_data_mc (data, mc, nbins, xlabel, ylabel, title, name, logy=True)
 

Variables

 invalid
 
 divide
 
list datasets
 
list data_mc_pairs
 
 KLP = KL+['rec.vtx.elastic.fuzzyk.png_idx']
 
 parser = argparse.ArgumentParser('Plot prod5 and prod4 PID distributions')
 
 default
 
 None
 
 type
 
 help
 
 args = parser.parse_args()
 

Function Documentation

def PandAna.reco_validation.prod5_pid_validation_data_mc.get_files_nd (   dataset_name)

Definition at line 37 of file prod5_pid_validation_data_mc.py.

References novadaq::HexUtils.format(), open(), and print.

Referenced by PandAna.reco_validation.prod5_pid_validation_data_mc.prod5_pid_data_mc().

37 def get_files_nd(dataset_name):
38  dataset_name = dataset_name.replace('_', '-')
39  d = '/lfstev/nnet/R19-02-23-miniprod5/' + dataset_name
40  if 'Data' in dataset_name:
41  print('Retrieving {} files from {}/{}'.format(dataset_name, d, 'evaled.txt'))
42  return [line.rstrip('\n') for line in open(d + '/evaled.txt')]
43  else:
44  return [os.path.join(d, f) for f in os.listdir(d) if 'h5caf.h5' in f]
45 
46 # only want to look at the networks that correspond to the beam mode of the files
47 # not the best way to do this but it works
bool print
std::string format(const int32_t &value, const int &ndigits=8)
Definition: HexUtils.cpp:14
procfile open("FD_BRL_v0.txt")
def PandAna.reco_validation.prod5_pid_validation_data_mc.plot_data_mc (   data,
  mc,
  nbins,
  xlabel,
  ylabel,
  title,
  name,
  logy = True 
)

Definition at line 167 of file prod5_pid_validation_data_mc.py.

References novadaq::HexUtils.format(), analysePickle.hist, make_mec_shifts_plots.legend, plot(), print, PandAna.Demos.demo1.range, and PandAna.reco_validation.prod5_pid_validation_data_mc.ratio().

Referenced by PandAna.reco_validation.prod5_pid_validation_data_mc.prod5_pid_data_mc().

167 def plot_data_mc(data, mc, nbins, xlabel, ylabel, title, name, logy=True):
168  pot = data.POT()
169  fig, ax = plt.subplots(2, sharex=True, gridspec_kw = {'hspace': 0, 'height_ratios': [3, 1]})
170 
171  for spec, color, label in zip([data, mc], ['k', 'r'], ['data', 'mc']):
172  n, bins = spec.histogram(nbins, range=(0,1), POT=pot)
173  ax[0].hist(bins[:-1], bins, weights=n, histtype='step', color=color, label=label)
174 
175  one_x = np.linspace(0, 1)
176  one_y = [1 for _ in range(len(one_x))]
177  ax[1].plot(one_x, one_y, color='k', linestyle='dashed')
178  nratio, binsratio, err, bin_centers = ratio(data, mc, nbins, pot)
179  ax[1].hist(binsratio[:-1], binsratio, weights=nratio, histtype='step', color='k')
180  ax[1].errorbar(bin_centers, nratio, yerr=err, ecolor='k', fmt='k.')
181 
182  ax[1].set_xlabel(xlabel)
183  ax[1].set_ylabel('Data / MC')
184  ax[1].set_xlim([0, 1])
185  ax[1].set_ylim([0.5, 1.5])
186  ax[0].set_xlim([0, 1])
187  ax[0].set_ylabel(ylabel)
188  ax[0].set_yscale('log')
189 
190  ax[0].legend(loc='best')
191 
192  plt.tight_layout()
193  plt.subplots_adjust(top=0.93)
194  fig.suptitle(title)
195  fig.savefig(name + '.png')
196  print('Created {}.png'.format(name))
197  plt.close()
198 
bool print
std::string format(const int32_t &value, const int &ndigits=8)
Definition: HexUtils.cpp:14
def plot_data_mc(data, mc, nbins, xlabel, ylabel, title, name, logy=True)
void plot(std::string label, std::map< std::string, std::map< std::string, Spectrum * >> &plots, bool log)
def PandAna.reco_validation.prod5_pid_validation_data_mc.prod5_pid_data_mc (   limit,
  stride,
  offset,
  spectra_file,
  output 
)

Definition at line 57 of file prod5_pid_validation_data_mc.py.

References PandAna.reco_validation.prod5_pid_validation.ApplySlcCutsPngDF(), allInOneTrainingPlots.df, novadaq::HexUtils.format(), PandAna.reco_validation.prod5_pid_validation_data_mc.get_files_nd(), parse_dependency_file_t.list, PandAna.core.core.load_spectra(), PandAna.reco_validation.prod5_pid_validation_data_mc.plot_data_mc(), print, PandAna.core.core.save_spectra(), and PandAna.reco_validation.prod5_pid_validation_data_mc.which_prod5pid().

57 def prod5_pid_data_mc(limit, stride, offset, spectra_file, output):
58  testing_datasets = [datasets[3]]
59  if spectra_file is None:
60  loaders = {}
61  slc_tables = {} # loader for slice level cuts only
62  prong_tables = {} # loader for prong level cuts only
63  for dataset_name in datasets:
64  slc_tables[dataset_name] = loader(get_files_nd(dataset_name),
65  stride=stride,
66  limit=limit,
67  offset=offset)
68  prong_tables[dataset_name] = loader(get_files_nd(dataset_name),
69  stride=stride,
70  limit=limit,
71  offset=offset, index=KLP)
72  loaders[dataset_name] = associate([slc_tables[dataset_name],
73  prong_tables[dataset_name]]) # read same data only once for both loaders
74 
75 
76  specs = {}
77  slc_specs = {}
78  save_specs = []
79  save_labels = []
80  for cut_name, cut in cut_levels.items():
81  slc_specs[cut_name] = {}
82  specs[cut_name] = {}
83  for dataset_name in datasets:
84  slc_specs[cut_name][dataset_name] \
85  = spectrum(slc_tables[dataset_name], cut, kCaloE) # use dummy var for slice spectra with slice cuts
86  specs[cut_name][dataset_name] = {}
87  for pid in pids:
88  for var_name in [pid+'_prod4', which_prod5pid(pid, dataset_name)]:
89  specs[cut_name][dataset_name][var_name] = \
90  spectrum(prong_tables[dataset_name], kProngCuts, pid_scores[var_name]) # apply only particle truth cuts to prong level var
91 
92 
93  for loader_name, load in loaders.items():
94  load.Go() # go, go, go
95 
96  filename = output + '/prod5_pid_data_mc_spectra'
97  if stride:
98  filename += '_s{}'.format(stride)
99  if limit:
100  filename += '_l{}'.format(limit)
101  if offset:
102  filename += '_o{}'.format(offset)
103  filename += '.hdf5'
104  for cut_name, cut in cut_levels.items():
105  for dataset_name in datasets:
106  for pid in pids:
107  for var_name in [pid+'_prod4', which_prod5pid(pid, dataset_name)]:
108 
109  # get prong dataframe
110  df_prong = specs[cut_name][dataset_name][var_name].df()
111  df_weight = specs[cut_name][dataset_name][var_name]._weight
112  # apply slice cuts to prong dataframe
113  df_prong = ApplySlcCutsPngDF(df_prong, slc_specs[cut_name][dataset_name].df())
114  df_weight = ApplySlcCutsPngDF(df_weight, slc_specs[cut_name][dataset_name].df())
115  # reset prong spectrum with new dataframe
116  specs[cut_name][dataset_name][var_name]._df = df_prong
117  specs[cut_name][dataset_name][var_name]._weight = df_weight
118  # save
119  save_specs.append(specs[cut_name][dataset_name][var_name])
120  save_labels.append('{}_{}_{}'.format(cut_name, var_name, dataset_name))
121 
122 
123  save_spectra(filename,
124  save_specs,
125  save_labels)
126 
127  else:
128  print('Loading spectra from {}'.format(spectra_file))
129 
130  specs = {}
131  pid_score_names = list(pid_scores.keys())
132  for cut_name, cut in cut_levels.items():
133  specs[cut_name] = {}
134  for dataset_name in datasets:
135  specs[cut_name][dataset_name] = {}
136  for pid in pids:
137  for var_name in [pid+'_prod4', which_prod5pid(pid, dataset_name)]:
138  spec_name = '{}_{}_{}'.format(cut_name, var_name, dataset_name)
139  specs[cut_name][dataset_name][var_name] = load_spectra(spectra_file,
140  spec_name)
141 
142  if spectra_file:
143  for cut_name, cut in cut_levels.items():
144  for data_mc in data_mc_pairs:
145  for pid in pids:
146  for var_name in [pid+'_prod4', which_prod5pid(pid, data_mc['data'])]:
147  #var_name = which_prod5pid(pid, data_mc['data'])
148  plot_data_mc(specs[cut_name][data_mc['data']][var_name],
149  specs[cut_name][data_mc['mc']][var_name],
150  30, pid, 'Prongs',
151  '{} {} {}'.format(cut_name, data_mc['horn'], var_name),
152  'data_mc_plots_technote/data_mc_ND_{}_{}_{}'.format(data_mc['horn'],
153  cut_name,
154  var_name))
155 
156 
def load_spectra(fname, groups)
Definition: core.py:164
def prod5_pid_data_mc(limit, stride, offset, spectra_file, output)
bool print
std::string format(const int32_t &value, const int &ndigits=8)
Definition: HexUtils.cpp:14
def plot_data_mc(data, mc, nbins, xlabel, ylabel, title, name, logy=True)
def save_spectra(fname, spectra, groups)
Definition: core.py:116
def PandAna.reco_validation.prod5_pid_validation_data_mc.ratio (   data,
  mc,
  nbins,
  pot,
  binrange = (0, 1) 
)

Definition at line 157 of file prod5_pid_validation_data_mc.py.

Referenced by PandAna.reco_validation.prod5_pid_validation_data_mc.plot_data_mc().

157 def ratio(data, mc, nbins, pot, binrange=(0, 1)):
158  h1, bins1 = data.histogram(bins=nbins, range=binrange, POT=pot)
159  h2, bins2 = mc.histogram(bins=nbins, range=binrange, POT=pot)
160 
161  # calculate statistical error on the ratio
162  bin_centers = (bins1[:-1] + bins1[1:])/2
163  err = np.sqrt(1/h2 + 1/h1) * h1 / h2
164  return h1 / h2, bins1, err, bin_centers
165 
166 
def PandAna.reco_validation.prod5_pid_validation_data_mc.which_prod5pid (   pid,
  dataset_name 
)

Definition at line 48 of file prod5_pid_validation_data_mc.py.

Referenced by PandAna.reco_validation.prod5_pid_validation_data_mc.prod5_pid_data_mc().

48 def which_prod5pid(pid, dataset_name):
49  ret = pid
50  if 'RHC' in dataset_name:
51  ret = ret + '_prod5rhc'
52  elif 'FHC' in dataset_name:
53  ret = ret + '_prod5fhc'
54  return ret
55 
56 # main function

Variable Documentation

PandAna.reco_validation.prod5_pid_validation_data_mc.args = parser.parse_args()

Definition at line 213 of file prod5_pid_validation_data_mc.py.

list PandAna.reco_validation.prod5_pid_validation_data_mc.data_mc_pairs
Initial value:
1 = [{'data': 'ND_Data_FHC',
2  'mc' : 'ND_Mont_FHC',
3  'horn': 'FHC'},
4  {'data': 'ND_Data_RHC',
5  'mc' : 'ND_Mont_RHC',
6  'horn': 'RHC'}]

Definition at line 27 of file prod5_pid_validation_data_mc.py.

list PandAna.reco_validation.prod5_pid_validation_data_mc.datasets
Initial value:
1 = ['ND_Data_FHC',
2  'ND_Data_RHC',
3  'ND_Mont_FHC',
4  'ND_Mont_RHC']

Definition at line 23 of file prod5_pid_validation_data_mc.py.

PandAna.reco_validation.prod5_pid_validation_data_mc.default

Definition at line 203 of file prod5_pid_validation_data_mc.py.

PandAna.reco_validation.prod5_pid_validation_data_mc.divide

Definition at line 8 of file prod5_pid_validation_data_mc.py.

PandAna.reco_validation.prod5_pid_validation_data_mc.help

Definition at line 204 of file prod5_pid_validation_data_mc.py.

PandAna.reco_validation.prod5_pid_validation_data_mc.invalid

Definition at line 8 of file prod5_pid_validation_data_mc.py.

PandAna.reco_validation.prod5_pid_validation_data_mc.KLP = KL+['rec.vtx.elastic.fuzzyk.png_idx']

Definition at line 33 of file prod5_pid_validation_data_mc.py.

PandAna.reco_validation.prod5_pid_validation_data_mc.None

Definition at line 203 of file prod5_pid_validation_data_mc.py.

PandAna.reco_validation.prod5_pid_validation_data_mc.parser = argparse.ArgumentParser('Plot prod5 and prod4 PID distributions')

Definition at line 202 of file prod5_pid_validation_data_mc.py.

PandAna.reco_validation.prod5_pid_validation_data_mc.type

Definition at line 203 of file prod5_pid_validation_data_mc.py.