add_data.py
Go to the documentation of this file.
1 import os
2 import time
3 import sys
4 
5 sys.path.append('../')
6 
7 import h5py
8 import numpy as np
9 
10 from keras.models import load_model
11 from PandAna import *
12 
13 
14 kVeto = Cut(lambda tables: tables['rec.sel.veto']['keep'] == 1)
15 
16 def kCVNVar(model):
17  classes = ['numuid','nueid','nutauid','ncid','cosmicid']
18 
19  def kVar(tables):
20  pms = tables['rec.training.cvnmaps']['cvnmap']
21  df = pms.apply(lambda x: model.predict(np.array([x]))[0])
22  return pd.DataFrame(df.values.tolist(), columns=classes, index=df.index)
23  return Var(kVar)
24 
25 def dfToDict(df):
26  withid = df.reset_index()
27  ret = {}
28  for col in list(withid):
29  ret[col] = withid[col].values[..., np.newaxis].astype(np.float32)
30  return ret
31 
32 if __name__ == '__main__':
33  # Miniprod 5 h5s
34  d = sys.argv[1]
35  stride = int(sys.argv[2])
36  offset = int(sys.argv[3])
37  print('Adding new cvns to files in '+d)
38  print('Stride: '+str(stride)+'; Offset: '+str(offset))
39  files = [f for f in os.listdir(d) if 'h5caf.h5' in f][offset::stride]
40  print('There are '+str(len(files))+' files.')
41 
42  # Make Models
43  # Base Model
44  modelBase = load_model('models/model_mynet_cos_best.h5')
45  # pTp Cut
46  modelPTP = load_model('models/model_mynet_ptp_best.h5')
47 
48  modellist = [modelBase, modelPTP]
49  namelist = ['veto', 'ptpcut']
50 
51  t0 = time.time()
52  # One file at a time to avoid problems with loading a bunch of pixel maps in memory
53  for f in files:
54  # Make a loader and the spectra to fill
55  tables = loader([os.path.join(d,f)])
56  specs = []
57  for m in modellist:
58  specs.append(spectrum(tables, kVeto, kCVNVar(m)))
59 
60  # GO GO GO
61  tables.Go()
62 
63  # Append the results to an existing file
64  #h5 = h5py.File(os.path.join(d,f), 'a')
65  # Or make a friend
66  h5 = h5py.File(os.path.join(outdir,f), 'a')
67 
68  for i,s in enumerate(specs):
69  thedict = dfToDict(s.df())
70  for dataset, vals in thedict.items():
71  datastr = 'rec.sel.cvn2020'+namelist[i]+'/'+dataset
72 
73  if datastr in h5:
74  del h5[datastr]
75  h5.create_dataset(datastr, data=vals)
76 
77  h5.close()
78  print('File '+f+' processed at '+str(time.time()-t0))
79 
80  print('Finished in '+str(time.time()-t0))
bool print