compare_h5_caf.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 
3 import argparse
4 import time
5 import re
6 import os
7 import sys
8 import h5py
9 import numpy as np
10 import warnings
11 
12 warnings.filterwarnings("ignore",category = RuntimeWarning)
13 
14 KL = ['run', 'subrun', 'cycle', 'evt', 'subevt']
15 skip = ['genVersion', 'daughterlist', 'motherlist', 'fuzzyk.png.bpf','hough','vdt','spill.bpos', 'spill.int', 'training', 'cvnmap']
16 
17 parser = argparse.ArgumentParser(description = 'Compare contents of a HDF5 file with its CAF equivalent.\rUses TTree Draw to check leaf content from the CAF files and prints a message if its different from the HDF5 file.\rSome branches are just too expensive to evaluate or the leaves are too deep for TTree::Draw to work. Others have no data filled or are defunct.\rBranches that contain these are skipped : '+str(skip))
18 parser.add_argument('-h5','--h5file', type=str, required=True,
19  help='Path to hdf5 file')
20 parser.add_argument('-r','--rootfile', type=str, required=True,
21  help='Path to CAF file')
22 parser.add_argument('-br', '--branches', type=str,
23  help='Check only branches matching regex condition')
24 parser.add_argument('-a', '--all', action='store_true',
25  help='Check all branches')
26 opts = parser.parse_args()
27 
28 import ROOT
29 ROOT.PyConfig.IgnoreCommandLineOptions = True
30 from ROOT import *
31 gROOT.SetBatch(True)
32 h5file = os.path.abspath(opts.h5file)
33 rootfile = os.path.abspath(opts.rootfile)
34 
35 fh5 = h5py.File(h5file, 'r')
36 fcaf = TFile(rootfile, 'read')
37 
38 keystocheck = []
39 if opts.all:
40  keystocheck = fh5.keys()
41 if opts.branches:
42  keystocheck = [key for key in fh5.keys() if re.match(opts.branches, key)]
43 if not len(keystocheck):
44  sys.exit('No valid keys found. Please provide either valid option -br or option -a')
45 
46 start = time.time()
47 for branchkey in keystocheck:
48  if any([check in branchkey for check in skip]):
49  continue
50 
51  print "Testing... ", branchkey
52  print "============================"
53  roottree = recTree
54  rootbranchkey = branchkey
55 
56  branch = fh5.get(branchkey)
57  branchkeys = []
58 
59  if branchkey.startswith('neutrino'):
60  roottree = nuTree
61  rootbranchkey = re.sub(r'neutrino','nu',branchkey)
62  branchkeys = [key for key in branch.keys() if (key not in KL) and ('idx' not in key)]
63  if branchkey.startswith('spill'):
64  roottree = spillTree
65  branchkeys = [key for key in branch.keys() if 'idx' not in key]
66  if branchkey.startswith('rec'):
67  branchkeys = [key for key in branch.keys() if (key not in KL) and ('idx' not in key)]
68 
69  idx = 0
70  total = len(branchkeys)
71  for key in branchkeys:
72  h5data = branch.get(key)[()].flatten()
73 
74  rootleafkey = rootbranchkey+'.'+key
75  if key == 'value':
76  rootleafkey = re.sub(r'.value','',rootbranchkey+'.'+key)
77 
78  roottree.SetEstimate(100000000)
79  roottree.Draw(rootleafkey)
80  nentries = roottree.GetSelectedRows()
81  rootdata = np.zeros(nentries)
82  roottemp = roottree.GetV1()
83  for i in range(nentries):
84  rootdata[i] = roottemp[i]
85 
86  if len(h5data) != len(rootdata):
87  print "************"
88  print "NEntries mismatch!"
89  continue
90  if not all(h5data == h5data):
91  if not (len(h5data) == len(rootdata)):
92  print "************"
93  print branchkey+'.'+key
94  print "Entries w/ NaNs. Size mismatch!"
95  continue
96  h5data = h5data[h5data==h5data]
97  rootdata = rootdata[rootdata == rootdata]
98  if not all(h5data == rootdata):
99  print "************"
100  print branchkey+'.'+key
101  print "Non NaN data doesn't match!"
102  continue
103  else:
104  if not all(h5data == rootdata):
105  print "************"
106  print branchkey+'.'+key
107  print "Doesn't match"
108  continue
109 
110 print "============================"
111 print "Total time taken : ", time.time() - start