nova_h5_metadata.py
Go to the documentation of this file.
1 # Extract the metadata for HDF5 files.
2 
3 from twisted.internet import defer, threads
4 from twisted.python import log
5 
6 import fts.util
7 import fts.metadata_extractors
8 import h5py
9 import re,sys,json,os,os.path
10 
11 import MetadataUtils
12 
13 def _createMetadata(filename, group, filesizebytes):
14  md = {'file_name': filename, 'group':group, 'file_size': filesizebytes}
15 
16  md['data_tier'] = unicode('h5')
17 
18  return md
19 
20 class NovaH5(fts.metadata_extractors.MetadataExtractorRunCommand):
21  name = "nova-h5"
22  _concurrent_limit = 4
23 
24  @defer.inlineCallbacks
25  def getMetadataFile(self, filestate):
26  jsonName = filestate.getLocalFilePath()
27 
28  if not '.h5' in jsonName: return
29  jsonName = jsonName.replace('.h5', '.json')
30 
31  exists = yield self._checkmdfile(jsonName, filestate)
32  if not exists: return
33  defer.returnValue(jsonName)
34 
35  @defer.inlineCallbacks
36  def extract( self, filestate, *args, **kwargs):
37  group = 'nova'
38  try:
39  jsonfilename = yield self.getMetadataFile(filestate)
40  if jsonfilename:
41  jsonfile = yield threads.deferToThread(open, jsonfilename)
42  md = _createMetadata(filestate.getFileName(), group, filestate.getFileSize())
43  log.msg("Using metadata file %s for %s" % (jsonfilename, filestate.getFileName()))
44  if md:
45  defer.returnValue(md)
46 
47  except Exception as e:
48  print e
49  print 'Exception reading json file, falling back to direct extraction'
50 
51  # find the executable on the path
52  md = _createMetadata(filestate.getFileName(), group, filestate.getFileSize())
53  defer.returnValue(md)
54 
55 novaH5Extractor = NovaH5()
def _createMetadata(filename, group, filesizebytes)
def extract(self, filestate, args, kwargs)
def getMetadataFile(self, filestate)