txt_utils.py
Go to the documentation of this file.
1 from __future__ import division
2 import numpy as np
3 
4 MAX_EXAMPLES = 2000
5 PIXEL_SCALE = 100.0
6 VERTEX_SCALE = 0.01
7 CUT_THRESHOLD = 0.05
8 
9 def parse_text(line):
10  """
11  Create a data dictionary based on a single line of text.
12 
13  Args
14  line (str): one line of raw data from the CSV
15 
16  Returns
17  (dict) with keys/values of extracted variables
18  """
19  data = line.split(' ')[:35]
20  info = {}
21  info['run'] = data[0]
22  info['subrun'] = data[1]
23  info['event'] = data[2]
24  info['subevent'] = data[3]
25  info['slice'] = data[4]
26  info['sheId'] = data[5]
27  # event and prong identifiers
28  info['event_tag'] = int(info['run'] + info['subrun'] + info['event'] + info['subevent'] + info['slice'])
29  if info['sheId'] != '-1':
30  info['prong_tag'] = int(info['run'] + info['subrun'] + info['event'] + info['subevent'] + info['slice'] + info['sheId'])
31  else:
32  info['prong_tag'] = None
33  # current reconstruction
34  info['sheE'] = float(data[6])
35  info['sheNuE'] = float(data[7])
36  # location
37  info['view'] = int(data[17])
38  info['planeLocal'] = int(data[18])
39  info['cellLocal'] = int(data[19])
40  info['plane'] = int(data[20])
41  info['cell'] = int(data[21])
42  info['energy'] = float(data[22])
43  # true prong
44  info['trueProng'] = int(data[23])
45  info['truePx'] = float(data[24])
46  info['truePy'] = float(data[25])
47  info['truePz'] = float(data[26])
48  info['trueE'] = float(data[27])
49  # true nu
50  info['trueNuPdg'] = int(data[31])
51  info['trueCC'] = int(data[32])
52  info['trueNuMode'] = int(data[33])
53  info['trueNuE'] = float(data[34])
54  return(info)
55 
56 def check_electron_prong(deposit):
57  """
58  Check whether a deposit belongs to a electron prong.
59  """
60  return(deposit['trueProng'] == 11 or deposit['trueProng'] == -11)
61 
62 def check_nue_event(deposit):
63  """
64  Check whether a deposit belongs to a nue event.
65  """
66  return(deposit['trueNuPdg'] == 12 or deposit['trueNuPdg'] == -12)
67 
68 def check_muon_prong(deposit):
69  """
70  Check whether a deposit belongs to a muon prong.
71  """
72  return(deposit['trueProng'] == 13 or deposit['trueProng'] == -13)
73 
74 def check_numu_event(deposit):
75  """
76  Check whether a deposit belongs to a numu event.
77  """
78  return(deposit['trueNuPdg'] == 14 or deposit['trueNuPdg'] == -14)
79 
80 def add_event_energy(targets, i, deposit):
81  """
82  Add neutrino energy to regression targets.
83  """
84  targets[i, 0] = deposit['trueNuE']
85 
86 def add_prong_energy(targets, i, deposit):
87  """
88  Add lepton energy and momentum to regression targets.
89  """
90  targets[i, 0] = deposit['trueE']
91  targets[i, 1] = deposit['truePx']
92  targets[i, 2] = deposit['truePy']
93  targets[i, 3] = deposit['truePz']
94 
95 def add_vertex(vertices, i, deposit):
96  """
97  Add vertex X or Y.
98  """
99  if deposit['view'] == 0:
100  vertices[i, 0] = deposit['cell']
101  else:
102  vertices[i, 1] = deposit['cell']
103 
104 def divide_plane(features, i, deposit):
105  """
106  Divide plane number by 2 to reduce dimension by half.
107  """
108  features[i, deposit['view'], (deposit['planeLocal'] + 30) // 2, deposit['cellLocal'] + 70] = deposit['energy']
109 
110 def constant_scale(features, vertices, targets):
111  """
112  Preprocess data by constant scaling.
113  """
114  features = features * PIXEL_SCALE
115  vertices = vertices * VERTEX_SCALE
116 
117 def energy_ratio_cut(features, vertices, targets):
118  """
119  Remove examples where the sum of energy deposits is too low.
120  """
121  sum_energy = np.sum(np.sum(np.sum(features, axis=1), axis=1), axis=1)
122  ratio = targets[:, 0] / sum_energy
123  select = ratio < CUT_THRESHOLD
124  features = features[select, :, :, :] * PIXEL_SCALE
125  vertices = vertices[select, :] * VERTEX_SCALE
126  targets = targets[select, :]
127 
128 def event_iter_from_txt(file_path, pixel_map_dim, check_criterion, add_target, formula, preprocess):
129  """
130  Generic function to iterate events from a txt file.
131 
132  Args
133  file_path: string of txt file path
134  pixel_map_dim: tuple of pixel map dimensions
135  check_criterion: boolean function for event/prong inclusion criterion
136  formula: function for adding features
137  preprocess: function for preprocessing
138 
139  Returns
140  features: 4d numpy array of pixel maps
141  vertices: 2d numpy array of vertices
142  targets: 2d numpy array of regression targets
143  """
144  features = np.zeros((MAX, 2, pixel_map_dim[0], pixel_map_dim[1]))
145  vertices = np.zeros((MAX, 2))
146  targets = np.zeros((MAX, 1))
147  i = -1
148  current_event_tag = -1
149  match = False
150  with open(file_path, 'r') as f:
151  for line in f:
152  deposit = parse_text(line)
153  if current_event_tag != deposit['event_tag']:
154  current_event_tag = deposit['event_tag']
155  if check_criterion(deposit):
156  match = True
157  i += 1
158  if i == MAX_EXAMPLES:
159  break
160  add_event_energy(targets, i, deposit)
161  if match:
162  formula(features, i, deposit)
163  if deposit['prong_tag'] is not None:
164  add_vertex(vertices, i, deposit)
165  features = features[0:(i + 1), :, :, :]
166  vertices = vertices[0:(i + 1), :]
167  targets = targets[0:(i + 1), :]
168  return(features, vertices, targets)
169 
170 def prong_iter_from_txt(file_path, pixel_map_dim, check_criterion, formula, preprocess):
171  """
172  Generic function to iterate prongs from a txt file.
173 
174  Args
175  file_path: string of txt file path
176  pixel_map_dim: tuple of pixel map dimensions
177  check_criterion: boolean function for event/prong inclusion criterion
178  formula: function for adding features
179  preprocess: function for preprocessing
180 
181  Returns
182  features: 4d numpy array of pixel maps
183  vertices: 2d numpy array of vertices
184  targets: 2d numpy array of regression targets
185  """
186  features = np.zeros((MAX, 2, pixel_map_dim[0], pixel_map_dim[1]))
187  vertices = np.zeros((MAX, 2))
188  targets = np.zeros((MAX, 1))
189  i = -1
190  current_prong_tag = -1
191  match = False
192  with open(file_path, 'r') as f:
193  for line in f:
194  deposit = parse_text(line)
195  if deposit['prong_tag'] is None:
196  continue
197  if current_prong_tag != deposit['prong_tag']:
198  current_prong_tag = deposit['prong_tag']
199  if check_criterion(deposit):
200  match = True
201  i += 1
202  if i == MAX_EXAMPLES:
203  break
204  add_prong_energy(targets, i, deposit)
205  if match:
206  formula(features, i, deposit)
207  add_vertex(vertices, i, deposit)
208  features = features[0:(i + 1), :, :, :]
209  vertices = vertices[0:(i + 1), :]
210  targets = targets[0:(i + 1), :]
211  preprocess(features, vertices, targets)
212  return(features, vertices, targets)
213 
214 
216  """
217  Pixelmap generator class for Keras.
218  """
219 
220  def __init__(self, pixel_map_type, pixel_map_dim, check_criterion, formula, preprocess):
221  """
222  Args
223  pixel_map_type: 'prong' or 'event'
224  pixel_map_dim: tuple (pixel_map_dim1, pixel_map_dim2)
225  check_criterion: boolean function for event/prong inclusion criterion
226  formula: function for adding features
227  preprocess: function for preprocessing
228  """
229  self.pixel_map_type = pixel_map_type
230  self.pixel_map_dim = pixel_map_dim
231  self.check_criterion = check_criterion
232  self.formula = formula
233  self.preprocess = preprocess
234 
235  def flow(self, txt_files, batch_num, batch_size):
236  """
237  Return pixelmaps from txt files indefinitely.
238 
239  Args
240  txt_files: list of file names
241  batch_num: number of batches from each file
242  batch_size: size of each batch
243  """
244  while 1:
245  for i, f in enumurate(txt_files):
246  if pixel_map_type == 'prong':
247  X, V, y = prong_iter_from_txt(f, self.pixel_map_dim, self.check_criterion, self.formula, self.preprocess)
248  elif pixel_map_type == 'event':
249  X, V, y = event_iter_from_txt(f, self.pixel_map_dim, self.check_criterion, self.formula, self.preprocess)
250  n = X.shape[0]
251  pixel_map_dim1 = X.shape[2]
252  pixel_map_dim2 = X.shape[3]
253  for j in range(0, batch_num):
254  select = np.random.randint(low=0, high=n, size=batch_size)
255  batchX = X[select, :, :, :]
256  batchX_1 = batchX[:, 0, :, :].reshape(batch_size, 1, pixel_map_dim1, pixel_map_dim2)
257  batchX_2 = batchX[:, 1, :, :].reshape(batch_size, 1, pixel_map_dim1, pixel_map_dim2)
258  batchV = V[select, :]
259  batchY = y[select, :]
260  yield [batchX_1, batchX_2, batchV], batchY
def check_nue_event(deposit)
Definition: txt_utils.py:62
def __init__(self, pixel_map_type, pixel_map_dim, check_criterion, formula, preprocess)
Definition: txt_utils.py:220
def flow(self, txt_files, batch_num, batch_size)
Definition: txt_utils.py:235
def divide_plane(features, i, deposit)
Definition: txt_utils.py:104
def prong_iter_from_txt(file_path, pixel_map_dim, check_criterion, formula, preprocess)
Definition: txt_utils.py:170
def check_numu_event(deposit)
Definition: txt_utils.py:74
def add_prong_energy(targets, i, deposit)
Definition: txt_utils.py:86
def constant_scale(features, vertices, targets)
Definition: txt_utils.py:110
def add_event_energy(targets, i, deposit)
Definition: txt_utils.py:80
def parse_text(line)
Definition: txt_utils.py:9
def check_electron_prong(deposit)
Definition: txt_utils.py:56
procfile open("FD_BRL_v0.txt")
def add_vertex(vertices, i, deposit)
Definition: txt_utils.py:95
def event_iter_from_txt(file_path, pixel_map_dim, check_criterion, add_target, formula, preprocess)
Definition: txt_utils.py:128
def check_muon_prong(deposit)
Definition: txt_utils.py:68
def energy_ratio_cut(features, vertices, targets)
Definition: txt_utils.py:117