gen_hdf5record.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 
3 # This script is intended to be compatible with both python2 and python3
4 from __future__ import print_function
5 
6 from pygccxml import *
7 
8 import os
9 import sys
10 
11 # Don't spam pygccxml deprecation warnings
12 import warnings
13 warnings.simplefilter(action = 'once', category = DeprecationWarning)
14 
15 # Types that we can assume are already defined, so don't form part of our
16 # dependency tree.
17 fundamental_types = ['int', 'float', 'double', 'bool', 'unsigned int',
18  'short', 'short int', 'short unsigned int',
19  'long', 'long unsigned int',
20  'long long int', 'unsigned char',
21  'size_t']
22 
23 # These are written for every table already
24 reserved = ['run', 'subrun', 'cycle', 'evt', 'subevt']
25 
26 enums = []
27 
28 def is_fundamental(type):
29  if type in fundamental_types: return True
30  if type in enums: return True
31  if type[:5] == 'caf::': return is_fundamental(type[5:])
32  return False
33 
34 
35 def translate_type(type):
36  if type in enums or type == 'bool': return 'unsigned short'
37  if type[:5] == 'caf::': return translate_type(type[5:])
38  return type
39 
40 
41 def is_vector(type):
42  return type[:12] == 'std::vector<'
43 
44 
45 def vector_contents(type):
46  assert is_vector(type)
47  return type[12:type.find(',')]
48 
49 
50 def is_array(type):
51  return '[' in type
52 
53 
54 def array_type(type):
55  assert is_array(type)
56  # There always seems to be a space before the dimensions
57  return type[:type.find('[')-1]
58 
59 
60 def array_size(type):
61  assert is_array(type)
62  return int(type[type.find('[')+1:-1])
63 
64 
66  assert not is_fundamental(type)
67  assert not is_vector(type)
68  assert not is_array(type)
69 
70  if type[:5] == 'caf::': return type_to_hdf5_type(type[5:])
71 
72  if type == 'StandardRecord': return 'HDF5Record'
73 
74  return 'HDF5'+type[2:]
75 
76 
77 def vetoed(type):
78  if type == 'std::string': return True
79  return False
80 
81 def base_class(klass):
82  assert len(klass.bases) < 2, 'Support for multiple base classes not implemented'
83  if len(klass.bases) == 1:
84  return klass.bases[0].related_class
85  return None
86 
87 # Recurse to find member variables including all base classes
89  vars = []
90  for v in list(klass.variables()):
91  if vetoed(str(v.decl_type)):
92  sys.stderr.write('Skipping '+str(v.decl_type)+' '+v.name+'\n')
93  else:
94  vars += [v]
95  base = base_class(klass)
96  if base: vars += variables_inc_bases(base)
97  return vars
98 
99 
100 if len(sys.argv) < 1 or len(sys.argv) > 3:
101  print('Usage: gen_hdf5record.py [/path/to/header/outputs/] [/path/to/cxx/outputs/]')
102  sys.exit(1)
103 
104 headerDir = os.getcwd()
105 if len(sys.argv) >= 2: headerDir = sys.argv[1]
106 cxxDir = os.getcwd()
107 if len(sys.argv) >= 3: cxxDir = sys.argv[2]
108 
109 # Locate the castxml executable
110 generator_path, generator_name = utils.find_xml_generator()
111 
112 # Figure out where our source files are
113 priv = os.environ['SRT_PRIVATE_CONTEXT'] if 'SRT_PRIVATE_CONTEXT' in os.environ else None
114 pub = os.environ['SRT_PUBLIC_CONTEXT'] if 'SRT_PUBLIC_CONTEXT' in os.environ else None
115 
116 # For cmake build
117 if not pub: pub = os.environ['NOVASOFT_DIR']
118 
119 context = priv if (priv and os.path.exists(priv+'/StandardRecord/')) else pub
120 
121 path = []
122 if priv: path += [priv]
123 path += [pub]
124 path += [os.environ['ROOT_INC']]
125 
126 config = parser.xml_generator_configuration_t(
127  xml_generator_path=generator_path,
128  xml_generator=generator_name,
129  include_paths=path,
130  cflags='-std=c++1z -Wno-unknown-warning-option'#,
131 # start_with_declarations='caf::StandardRecord'
132  )
133 
134 decls = parser.parse([context+'/StandardRecord/StandardRecord.h'],
135  config)
136 
137 global_namespace = declarations.get_global_namespace(decls)
138 ns = global_namespace.namespace('caf')
139 
140 enums += [e.name for e in ns.enumerations()]
141 
142 # Keep track of which classes we've written out so far, for purposes of
143 # dependency tracking.
144 emitted = []
145 
146 
147 disclaimer = '''// This file was auto-generated by gen_hdf5record.py.
148 // DO NOT EDIT IT DIRECTLY.
149 // For documentation of the fields see the regular StandardRecord.h'''
150 
151 funcs = '''inline std::string Join(const std::string& a, const std::string& b)
152 {
153  if(a.empty()) return b;
154  return a+"."+b;
155 }
156 
157 #include <iostream>
158 
159 typedef std::initializer_list<hep_hpc::hdf5::PropertyList> plist_init;
160 
161 /// Helper for make_cols() - specialized for regular fields
162 template<class T> hep_hpc::hdf5::Column<T>
163 make_col(const size_t& chunkSize,
164  const size_t& elementsPerChunk,
165  const bool& shuffleInts,
166  const size_t& compressionLevel,
167  const std::string& name,
168  std::enable_if_t<!std::is_array<T>::value, void*> dummy = 0)
169 {
170  // for now, support chunking by number of elements in chunk
171  // and chunking by chunk size in MB. This should be temporary
172  size_t chunking;
173  if(elementsPerChunk == 0) {
174  chunking = chunkSize * chunkSize / sizeof(T);
175  }
176  else if(chunkSize == 0) {
177  chunking = elementsPerChunk;
178  }
179 
180  // if integral type, use shuffle compression
181  if constexpr(std::is_integral_v<T>)
182  if(shuffleInts)
183  return hep_hpc::hdf5::make_scalar_column<T>(name,
184  chunking,
185  {hep_hpc::hdf5::PropertyList{H5P_DATASET_CREATE}
186  (&H5Pset_shuffle)
187  (&H5Pset_deflate, compressionLevel)});
188  else
189  return hep_hpc::hdf5::make_scalar_column<T>(name,
190  chunking,
191  {hep_hpc::hdf5::PropertyList{H5P_DATASET_CREATE}
192  (&H5Pset_deflate, compressionLevel)});
193  else
194  return hep_hpc::hdf5::make_scalar_column<T>(name,
195  chunking,
196  {hep_hpc::hdf5::PropertyList{H5P_DATASET_CREATE}
197  (&H5Pset_deflate, compressionLevel)});
198 }
199 
200 /// Helper for make_cols() - specialized for arrays
201 template<class T> hep_hpc::hdf5::Column<std::remove_extent_t<T>>
202 make_col(const size_t& chunkSize,
203  const size_t& elementsPerChunk,
204  const bool& shuffleInts,
205  const size_t& compressionLevel,
206  const std::string& name,
207  std::enable_if_t<std::is_array<T>::value, void*> dummy = 0)
208 {
209  // for now, support chunking by number of elements in chunk
210  // and chunking by chunk size in KB. This should be temporary
211  size_t chunking;
212  if(elementsPerChunk == 0) {
213  chunking = std::max(1024 * chunkSize / sizeof(T), (size_t) 1);
214  }
215  else if(chunkSize == 0) {
216  chunking = elementsPerChunk;
217  }
218 
219  // if integral type, use shuffle compression
220  if constexpr(std::is_integral_v<std::remove_extent_t<T>>)
221  if(shuffleInts)
222  return hep_hpc::hdf5::make_column<std::remove_extent_t<T>>(name,
223  std::extent<T>::value,
224  chunking,
225  {hep_hpc::hdf5::PropertyList{H5P_DATASET_CREATE}
226  (&H5Pset_shuffle)
227  (&H5Pset_deflate, compressionLevel)});
228  else
229  return hep_hpc::hdf5::make_column<std::remove_extent_t<T>>(name,
230  std::extent<T>::value,
231  chunking,
232  {hep_hpc::hdf5::PropertyList{H5P_DATASET_CREATE}
233  (&H5Pset_deflate, compressionLevel)});
234  else
235  return hep_hpc::hdf5::make_column<std::remove_extent_t<T>>(name,
236  std::extent<T>::value,
237  chunking,
238  {hep_hpc::hdf5::PropertyList{H5P_DATASET_CREATE}
239  (&H5Pset_deflate, compressionLevel)});
240 }
241 
242 /// Return a tuple of hdf5 columns, with the given names and types
243 template<class... Types, class... Names> static auto
244 make_cols(const size_t& chunkSize,
245  const size_t& elementsPerChunk,
246  const bool& shuffleInts,
247  const size_t& compressionLevel,
248  Names... names)
249 {
250  static_assert(sizeof...(Names) == sizeof...(Types),
251  "Must specify same number of column types and column names");
252  return std::make_tuple(make_col<Types>(chunkSize, elementsPerChunk, shuffleInts, compressionLevel, names)...);
253 }
254 
255 /// Wrap a basic datatype up as if it's a class with a single member
256 template<class W, class... Tidx> class Wrapped
257 {
258 public:
259  template<class... Cols> Wrapped(hid_t f, const std::string& name, const size_t& chunkSize, const size_t& elementsPerChunk, const bool& shuffleInts, const size_t& compressionLevel, Cols... cols)
260  : vals(f, name, make_cols<Tidx..., W>(chunkSize, elementsPerChunk, shuffleInts, compressionLevel, cols..., "value"))
261  {
262  }
263 
264  void Fill(Tidx... idxs, W x) {vals.insert(idxs..., x);}
265 
266  hep_hpc::hdf5::Ntuple<Tidx..., W> vals;
267 };
268 '''
269 
270 # From this point on everything we print goes to HDF5Record.h. Remember to send
271 # messages for the user to stderr.
272 sys.stdout = open(headerDir+'/HDF5Record.h', 'w')
273 
274 print(disclaimer)
275 print()
276 print('#pragma once')
277 print()
278 print('#include "StandardRecord/StandardRecord.h"')
279 print()
280 print('#include "hep_hpc/hdf5/File.hpp"')
281 print('#include "hep_hpc/hdf5/Ntuple.hpp"')
282 print('#include "hep_hpc/hdf5/PropertyList.hpp"')
283 print('#include "hep_hpc/hdf5/make_column.hpp"')
284 print()
285 print('namespace hdf5')
286 print('{')
287 print()
288 print(funcs)
289 
290 debug = False
291 
292 def deps_emitted(klass):
293  pt = type_to_hdf5_type(klass.name)
294  base = base_class(klass)
295  if base: base = type_to_hdf5_type(base.name)
296 
297  if base and base not in emitted:
298  if debug: sys.stderr.write('Skipping '+pt+' because of '+base+'\n')
299  return False
300 
301  for v in variables_inc_bases(klass):
302  if is_fundamental(str(v.decl_type)):
303  continue
304  elif is_vector(str(v.decl_type)):
305  type = vector_contents(str(v.decl_type))
306  if is_fundamental(type): continue
307  type = type_to_hdf5_type(type)
308  elif is_array(str(v.decl_type)):
309  assert is_fundamental(array_type(str(v.decl_type)))
310  continue
311  elif type_to_hdf5_type(str(v.decl_type)) in ['HDF5Vector3D', 'HDF5LorentzVector']:
312  continue
313  else:
314  type = type_to_hdf5_type(str(v.decl_type))
315 
316  if type not in emitted:
317  if debug: sys.stderr.write('Skipping '+pt+' because of '+type+'\n')
318  return False
319 
320  return True
321 
322 
323 def write_hd5record_h(klass):
324  base = base_class(klass)
325  if base: base = type_to_hdf5_type(base.name)
326 
327  print('/// HDF5 encoding of \\ref', klass.name)
328 # if base:
329 # print 'class', pt+': public', base
330 # else:
331  print('template<class... Tidx> class', pt)
332 
333  print('{')
334  print('public:')
335  print(' template<class... Cols> '+pt+'(hid_t f, const std::string& name, const size_t& chunkSize, const size_t& elementsPerChunk, const bool& shuffleInts, const size_t& compressionLevel, Cols... cols);')
336  print()
337  print(' void Fill(Tidx... idxs, const caf::'+klass.name+'& sr);')
338  print()
339 
340  types = []
341  names = []
342  for v in variables_inc_bases(klass):
343  type = str(v.decl_type)
344 
345  if is_fundamental(type):
346  if v.name not in reserved:
347  types += [translate_type(type)]
348  names += [v.name]
349  elif is_vector(type):
350  inner_type = vector_contents(str(v.decl_type))
351  if not is_fundamental(inner_type):
352  print(' std::unique_ptr<'+type_to_hdf5_type(inner_type)+'<Tidx..., unsigned int>>', v.name+';')
353  else:
354  print(' std::unique_ptr<Wrapped<'+inner_type+', Tidx..., unsigned int>>', v.name+';')
355  elif is_array(type):
356  inner_type = array_type(type)
357  size = array_size(type)
358  assert is_fundamental(inner_type)
359  types += [inner_type]
360  names += [v.name]
361  elif type_to_hdf5_type(type) == 'HDF5Vector3D':
362  types += ['float', 'float', 'float']
363  names += [v.name+'.x', v.name+'.y', v.name+'.z']
364  elif type_to_hdf5_type(type) == 'HDF5LorentzVector':
365  types += ['float', 'float', 'float', 'float']
366  names += [v.name+'.E', v.name+'.px', v.name+'.py', v.name+'.pz']
367  else:
368  print(' std::unique_ptr<'+type_to_hdf5_type(str(v.decl_type))+'<Tidx...>>', v.name+';')
369 
370  if len(types) > 0:
371  print(' // '+', '.join(names))
372  print(' hep_hpc::hdf5::Ntuple<Tidx..., '+', '.join(types)+'> vals;')
373 
374  print('};');
375  print()
376 
377  if debug: sys.stderr.write('Wrote '+pt+'\n')
378 
379 
380 while True:
381  anyWritten = False
382  anySkipped = False
383 
384  for klass in ns.classes():
385  pt = type_to_hdf5_type(klass.name)
386  if pt in emitted: continue # Wrote this one already
387  if pt == 'HDF5Vector3D' or pt == 'HDF5LorentzVector': continue # no need, inlined where used
388 
389  if not deps_emitted(klass):
390  # Unmet dependencies, come back to it
391  anySkipped = True
392  continue
393 
394  write_hd5record_h(klass)
395 
396  anyWritten = True
397  emitted += [pt]
398  emitted += ['VectorProxy<'+pt+'>']
399 
400  if not anySkipped: break # We're done
401  if anyWritten: continue # Try for some more
402 
403  if not debug:
404  # Go round one more time to provide feedback
405  debug = True
406  else:
407  sys.stderr.write('Unable to meet all dependencies\n')
408  sys.exit(1)
409 
410 print('} // end namespace')
411 
412 
413 # And now we're writing to HDF5Record.cxx
414 sys.stdout = open(cxxDir+'/HDF5Record.cxx', 'w')
415 
416 print(disclaimer)
417 print()
418 print('#include "HDF5Maker/HDF5Record.h"')
419 print()
420 print('#include <tuple>')
421 print()
422 print('namespace hdf5\n{')
423 
424 # No need to specifically order the functions in the cxx file
425 for klass in ns.classes():
426  pt = type_to_hdf5_type(klass.name)
427  if pt == 'HDF5Vector3D' or pt == 'HDF5LorentzVector': continue # no need, inlined where used
428 
429  # Constructor
430  print('template<class... Tidx> template<class... Cols>',pt+'<Tidx...>::'+pt+'(hid_t f, const std::string& name, const size_t& chunkSize, const size_t& elementsPerChunk, const bool& shuffleInts, const size_t& compressionLevel, Cols... cols)')
431  # Initializer list
432  inits = []
433 # if base_class(klass): inits += [type_to_hdf5_type(base_class(klass).name)+'(tr, name)']
434  types = []
435  names = []
436  for v in variables_inc_bases(klass):
437  if is_vector(str(v.decl_type)):
438  inner_type = vector_contents(str(v.decl_type))
439  if is_fundamental(inner_type):
440  inits += [v.name + '(std::make_unique<Wrapped<'+(inner_type)+', Tidx..., unsigned int>>(f, Join(name, "'+v.name+'"), chunkSize, elementsPerChunk, shuffleInts, compressionLevel, cols..., Join(name, "'+v.name+'")+"_idx"))']
441  else:
442  inits += [v.name + '(std::make_unique<'+type_to_hdf5_type(inner_type)+'<Tidx..., unsigned int>>(f, Join(name, "'+v.name+'"), chunkSize, elementsPerChunk, shuffleInts, compressionLevel, cols..., Join(name, "'+v.name+'")+"_idx"))']
443  elif is_array(str(v.decl_type)):
444  inner_type = array_type(str(v.decl_type))
445  size = array_size(str(v.decl_type))
446  assert is_fundamental(inner_type)
447  types += [inner_type+'['+str(size)+']']
448  names += [v.name]
449  elif is_fundamental(str(v.decl_type)):
450  if v.name not in reserved:
451  types += [translate_type(str(v.decl_type))]
452  names += [v.name]
453  elif type_to_hdf5_type(str(v.decl_type)) == 'HDF5Vector3D':
454  types += ['float', 'float', 'float']
455  names += [v.name+'.x', v.name+'.y', v.name+'.z']
456  elif type_to_hdf5_type(str(v.decl_type)) == 'HDF5LorentzVector':
457  types += ['float', 'float', 'float', 'float']
458  names += [v.name+'.E', v.name+'.px', v.name+'.py', v.name+'.pz']
459  else:
460  inits += [v.name + '(std::make_unique<'+type_to_hdf5_type(str(v.decl_type))+'<Tidx...>>(f, Join(name, "'+v.name+'"), chunkSize, elementsPerChunk, shuffleInts, compressionLevel, cols...))']
461 
462  if len(names) > 0:
463  inits += ['vals(f, name, make_cols<Tidx..., '+(', '.join(types))+'>(chunkSize, elementsPerChunk, shuffleInts, compressionLevel, cols..., '+', '.join(['"'+n+'"' for n in names])+'))']
464 
465  if len(inits) > 0:
466  print(' : '+',\n '.join(inits))
467  print('{\n}\n')
468 
469  print('template<class... Tidx> void '+pt+'<Tidx...>::Fill(Tidx... idxs, const caf::'+klass.name+'& sr)')
470  print('{')
471  for v in variables_inc_bases(klass):
472  if is_vector(str(v.decl_type)):
473  inner_type = vector_contents(str(v.decl_type))
474  print(' for(unsigned int i = 0; i < sr.'+v.name+'.size(); ++i) '+v.name+'->Fill(idxs..., i, sr.'+v.name+'[i]);')
475  elif not is_array(str(v.decl_type)) and not is_fundamental(str(v.decl_type)) and type_to_hdf5_type(str(v.decl_type)) not in ['HDF5Vector3D', 'HDF5LorentzVector']:
476  print(' '+v.name+'->Fill(idxs..., sr.'+v.name+');')
477 
478  if len(names) > 0:
479  print(' vals.insert(idxs..., '+', '.join(['sr.'+n for n in names])+');')
480  print('}')
481  print()
482 
483 print('// Instantiate the ones we\'ll use')
484 print('template class HDF5Record<unsigned int, unsigned int, int, unsigned int, unsigned short>;')
485 print('template HDF5Record<unsigned int, unsigned int, int, unsigned int, unsigned short>::HDF5Record(hid_t f, const std::string& name, const size_t& chunkSize, const size_t& elementsPerChunk, const bool& shuffleInts, const size_t& compressionLevel, const char*, const char*, const char*, const char*, const char*);')
486 print()
487 print('template class HDF5Spill<unsigned int, unsigned int, unsigned int>;')
488 print('template HDF5Spill<unsigned int, unsigned int, unsigned int>::HDF5Spill(hid_t f, const std::string& name, const size_t& chunkSize, const size_t& elementsPerChunk, const bool& shuffleInts, const size_t& compressionLevel, const char*, const char*, const char*);')
489 print()
490 print('template class HDF5Neutrino<unsigned int, unsigned int, int, unsigned int>;')
491 print('template HDF5Neutrino<unsigned int, unsigned int, int, unsigned int>::HDF5Neutrino(hid_t f, const std::string& name, const size_t& chunkSize, const size_t& elementsPerChunk, const bool& shuffleInts, const size_t& compressionLevel, const char*, const char*, const char*, const char*);')
492 print()
493 print('} // namespace')
494 
495 
496 sys.stderr.write('Wrote HDF5Record.h and HDF5Record.cxx\n')
def base_class(klass)
def array_type(type)
def vetoed(type)
def is_vector(type)
def is_fundamental(type)
def type_to_hdf5_type(type)
def translate_type(type)
def vector_contents(type)
bool print
def array_size(type)
procfile open("FD_BRL_v0.txt")
def is_array(type)
def variables_inc_bases(klass)
def write_hd5record_h(klass)
def deps_emitted(klass)