Classes | Functions | Variables
MetadataUtils Namespace Reference

Classes

class  metaDataMgr
 

Functions

def isNone (value)
 
def appendField (base, extension)
 
def addMetadataToFCL (fclFile, parName, parValue)
 
def unCamelCase (s)
 
def cleanup_metadata (in_md)
 
def createMetadata (inFile)
 

Variables

 detectors
 
 neutrinoGenerators
 allow this to potentially expand beyond just generators we use now More...
 
 neutrinoGenieTunes
 
 cosmicGenerators
 
 run_pattern
 
 sim_pattern
 
 stream_pattern
 
 parent_pattern
 

Function Documentation

def MetadataUtils.addMetadataToFCL (   fclFile,
  parName,
  parValue 
)

Definition at line 42 of file MetadataUtils.py.

42 def addMetadataToFCL(fclFile,parName,parValue):
43  fclFile.write("\nphysics.analyzers.metadata.params.")
44  fclFile.write(parName)
45  fclFile.write(": ")
46  fclFile.write(parValue)
47  fclFile.write(" \n")
48 
def addMetadataToFCL(fclFile, parName, parValue)
def MetadataUtils.appendField (   base,
  extension 
)

Definition at line 32 of file MetadataUtils.py.

References isNone().

32 def appendField(base,extension):
33 
34  if isNone( base) and isNone(extension): return "none"
35  elif isNone( base): return extension
36  elif isNone(extension): return base
37  elif base == extension: return base
38 
39  return base +"_" + extension
40 
41 
def isNone(value)
def appendField(base, extension)
def MetadataUtils.cleanup_metadata (   in_md)
Numerous keys are stored in the files in ways that we need to 'translate'
    so that they can be given to SAM. 

Definition at line 55 of file MetadataUtils.py.

References site_stats_from_log.get, makeTrainCVSamples.int, parse_dependency_file_t.list, runNovaSAM.str, and unCamelCase().

Referenced by createMetadata().

55 def cleanup_metadata(in_md):
56  """ Numerous keys are stored in the files in ways that we need to 'translate'
57  so that they can be given to SAM. """
58 
59  md = {}
60  for k, v in in_md.iteritems():
61  k = str(k)
62  if len(k) == 0:
63  continue
64  if isinstance(v, basestring) and len(v) == 0:
65  v = "none"
66 
67  # thanks, sam_metadata_dumper
68  if '.' not in k:
69  k = unCamelCase(k)
70  else:
71  k = k.lower()
72  v = str(v)
73 
74  # fix for poorly formatted metadata from FileReducer pre-r24852
75  if k.lower() == "parents" and '","file_name":' in v:
76  v = v.replace('","file_name":', '"},{"file_name":')
77 
78  # note that for correct JSON serialization,
79  # any lists or maps need to be processed into Python lists/maps.
80  # if that fails, just fall back to the string we had.
81  if isinstance(v, basestring) and any(c in v for c in "[]{}"):
82  try:
83  v = eval(v)
84  except:
85  pass
86 
87  # specific bugfix for run list duplicates
88  # (fixed upstream in r24859)
89  if k == "runs":
90  v = list(set([tuple(run) for run in v]))
91 
92  # sam_metadata_dumper spits out [ run, subrun, event ].
93  # but the field itself should just be the run number...
94  if k in ('first_event', 'last_event'):
95  try:
96  v = int(v)
97  except TypeError:
98  if len(v) == 3:
99  v = v[2]
100  else:
101  continue
102 
103  # this is historical, but I can't guarantee it's not needed any more,
104  # even though every file I can see now has this as an int...
105  if k == 'simulated.cycle':
106  v = eval(v)
107 
108  # more type coersion
109  if k in ('start_date', 'end_date'):
110  v = long(v)
111 
112  # ummm... yeah.
113  if k == 'process_name' and md.get('application',[]).get('name') is None:
114  k = 'application_name'
115  elif k == 'stream_name':
116  k = 'data_stream'
117 
118  if k.startswith('application_'):
119  if 'application' not in md:
120  md['application'] = {}
121  md['application'][k[12:]] = v
122  elif k in ('file_format_era', 'file_format_version'):
123  pass
124  else:
125  md[k] = v
126 
127  return md
128 
def unCamelCase(s)
def cleanup_metadata(in_md)
def MetadataUtils.createMetadata (   inFile)

Definition at line 129 of file MetadataUtils.py.

References cleanup_metadata(), samweb_client.utility.fileEnstoreChecksum(), and runNovaSAM.str.

Referenced by runNovaSAM.checkAndMoveFiles().

129 def createMetadata(inFile):
130  filesize = os.path.getsize(inFile)
131  filename = os.path.basename(inFile)
132  if filename.endswith("caf.root"):
133  md = {'file_name': filename, 'group': 'nova', 'file_size': filesize, 'file_format': 'unknown'}
134  try:
135  # data = subprocess.check_output(["extractCAFMetadata", str(inFile)])
136  data = subprocess.check_output(["extractCAFMetadata", str(inFile)])
137  except subprocess.CalledProcessError:
138  print "extractCAFMetadata failed!"
139  return None
140 
141  dumperDict = json.loads(data[data.find("{"):])
142 
143  md.update(cleanup_metadata(dumperDict))
144  md['crc'] = fileEnstoreChecksum(inFile)
145 
146  return md
147  elif filename.endswith(".h5"):
148  md = {'file_name': filename, 'group': 'nova', 'file_size': filesize, 'file_format': 'unknown'}
149  try:
150  data = subprocess.check_output(["extractHDF5Metadata", str(inFile)])
151  except subprocess.CalledProcessError:
152  print "extractHDF5Metadata failed!"
153  return None
154 
155  dumperDict = json.loads(data[data.find("{"):])
156 
157  md.update(cleanup_metadata(dumperDict))
158  md['crc'] = fileEnstoreChecksum(inFile)
159 
160  return md
161 
162  else:
163  md = {'file_name': filename, 'group': 'nova', 'file_size': filesize, 'file_format': 'artroot'}
164  try:
165  data = subprocess.check_output(["sam_metadata_dumper", str(inFile)])
166  except subprocess.CalledProcessError:
167  print "sam_metadata_dumper failed!"
168  return None
169 
170  # hilariously, sam_metadata_dumper prints out the 'parents' metadata twice:
171  # once, with format like '[ "<f1>", "<f2>", ... ]' (incorrect?)
172  # and again, with format like '[ {"filename": "<f1>"}, {"filename": "<f2>"}, ...]' (correct?)
173  # eliminate the first one.
174  matched = False
175  for matches in parent_pattern.findall(data):
176  match = matches[-1]
177  parent_dict = eval("{%s}" % match)
178  if isinstance(parent_dict["parents"][0], basestring):
179  matched = True
180  break
181  if matched:
182  data = data.replace("".join(matches), "")
183 
184  dumperDict = json.loads(data.replace("\x1b[?1034h", ""))
185 
186  for dumperKey in dumperDict:
187  if str(inFile) in str(dumperKey):
188  tmpMD = dumperDict[dumperKey]
189  md.update(cleanup_metadata(tmpMD))
190  break
191  md['crc'] = fileEnstoreChecksum(inFile)
192 
193  return md
194 
def fileEnstoreChecksum(path)
Definition: utility.py:50
def createMetadata(inFile)
def cleanup_metadata(in_md)
def MetadataUtils.isNone (   value)

Definition at line 27 of file MetadataUtils.py.

Referenced by appendField(), MetadataUtils.metaDataMgr.getOutputFileName(), and MetadataUtils.metaDataMgr.isSam4Users().

27 def isNone(value):
28  if value == None : return True
29  if value == "none" : return True
30  return False
31 
def isNone(value)
def MetadataUtils.unCamelCase (   s)
Convert CamelCase to camel_case 

Definition at line 49 of file MetadataUtils.py.

Referenced by cleanup_metadata().

49 def unCamelCase(s):
50  """ Convert CamelCase to camel_case """
51  # \B matches an empty string which is NOT at the beginning of a word
52  # so requiring this means no _ will be inserted at the start
53  return re.sub( r'\B([A-Z])', r'_\1', s).lower()
54 
def unCamelCase(s)

Variable Documentation

MetadataUtils.cosmicGenerators

Definition at line 19 of file MetadataUtils.py.

MetadataUtils.detectors

Definition at line 10 of file MetadataUtils.py.

MetadataUtils.neutrinoGenerators

allow this to potentially expand beyond just generators we use now

Definition at line 17 of file MetadataUtils.py.

MetadataUtils.neutrinoGenieTunes

Definition at line 18 of file MetadataUtils.py.

MetadataUtils.parent_pattern

Definition at line 25 of file MetadataUtils.py.

MetadataUtils.run_pattern

Definition at line 21 of file MetadataUtils.py.

MetadataUtils.sim_pattern

Definition at line 22 of file MetadataUtils.py.

MetadataUtils.stream_pattern

Definition at line 23 of file MetadataUtils.py.