common_tools.py
Go to the documentation of this file.
1 ###########################
2 # CONSTANTS
3 STANDARD_POT = 6e20
4 ###########################
5 # print a failure message, from:
6 # https://cdcvs.fnal.gov/redmine/projects/novaart/repository/entry/trunk/NovaGridUtils/lib/python/NovaGridUtils.py
7 def fail(msg):
8  import sys
9  print "\n\033[91mtools: --- ERROR"
10  print msg
11  print "\033[0m\n"
12  sys.exit(1)
13 ###########################
14 # print a warning message, from:
15 # https://cdcvs.fnal.gov/redmine/projects/novaart/repository/entry/trunk/NovaGridUtils/lib/python/NovaGridUtils.py
16 def warn(msg):
17  print "\n\033[93mtool: --- WARNING"
18  print msg
19  print "\033[0m\n"
20 ###########################
21 # Get jobsub client job id from submission log
22 def getJobID(log):
23  return_id = False
24  for l in log:
25  if "JobsubJobId of first job" in l:
26  return_id = l.split(":")[1].strip()
27  if return_id == False:
28  print "tools: getJobID failed. Given log:"
29  for l in log: print l.strip()
30  return return_id
31 ###########################
32 # Get SAM URL from submission log
33 def getSAMURL(log):
34  return_id = False
35  for l in log:
36  if "Station monitor will be " in l:
37  return_id = l.split("/")[-1].strip()
38  if return_id == False:
39  print "tools: getSAMURL failed. Given log:"
40  for l in log: print l.strip()
41  return return_id
42 ###########################
43 # Get SAM station monitor from submission log
45  return_id = False
46  for l in log:
47  if "Station monitor will be " in l:
48  return_id = l.split("Station monitor will be ")[-1].strip()
49  if return_id == False:
50  print "tools: getStationMonitor failed. Given log:"
51  for l in log: print l.strip()
52  return return_id
53 ###########################
54 # get creation time of a file from:
55 # http://stackoverflow.com/questions/237079/how-to-get-file-creation-modification-date-times-in-python
57  import os
58  import datetime
59  t = os.path.getmtime(filename)
60  return datetime.datetime.fromtimestamp(t)
61 ###########################
62 # fetch log files for a jobsub job
63 def fetchLogs(job_id, run_datetime=False, min_caching_age_days=10, verbose=False):
64  import subprocess
65  import datetime
66  import getpass
67  import time
68  import glob
69  import os
70  if ("SETUP_JOBSUB_CLIENT" not in os.environ):
71  print "tools: Jobsub client has not been setup. Cannot fetch logs without the tools to do this"
72  print "tools: Aborting\n"
73  exit(1)
74 
75  if job_id == False: return []
76  if (not run_datetime): run_datetime = datetime.datetime.now()
77 
78  log_dir = "%s/jobsub_logs/%s"%(os.environ["NOVAANAVALID_GRID"],job_id)
79  if verbose: print "tools: fetching logs to dir: %s"%log_dir
80  fetch_logs = True
81  # check for existing instance
82  if os.path.exists(log_dir):
83  if verbose: print "tools: - log directory exists"
84  # folder_datetime = fileModificationDatetime(log_dir)
85  now = datetime.datetime.now()
86  d_t = datetime.datetime.now() - run_datetime
87  if verbose: print "tools: run datetime: %s, now: %s, days: %i"%(run_datetime,now,d_t.days)
88  if d_t.days < min_caching_age_days:
89  if verbose: print "tools: re-fetching logs"
90  rm_command = "rm %s/*"%log_dir
91  os.system(rm_command)
92  else:
93  if verbose: print "tools: using existing logs"
94  fetch_logs = False
95 
96  if fetch_logs:
97  fetch_command = "jobsub_fetchlog -G nova --job=%s --unzipdir=%s"%(job_id,log_dir)
98  if "novapro" in getpass.getuser(): fetch_command+=" --role=Production"
99 
100  process = subprocess.Popen(fetch_command.split(" "), stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
101  while process.poll() == None: time.sleep(0.1)
102  if process.returncode != 0:
103  print "tools : --- Exception caught while executing fetch log, return code: %i"%process.returncode
104  print "tools : command run: %s"%fetch_command
105  for line in process.stdout.readlines():
106  print line.strip()
107  # subprocess.check_call(fetch_command.split())
108 
109  file_list = glob.glob("%s/*"%log_dir)
110  return file_list
111 ###########################
112 # find the root file output of a component
113 def findRootOutput(component, verbose=False, testing=False):
114  if "glob" not in dir(): import glob
115  import os
116  output_dir = component.output_dir
117  if testing:
118  output_dir = output_dir.replace(os.environ["NOVAANAVALID_GRID"],os.environ["NOVAANAVALID_GRID_TEST"])
119 
120  if hasattr(component,"sam") and component.sam:
121  file_name = "%s/hadd_%s_%s.root"%(component.output_dir, component.name, component.version)
122  if os.path.exists(file_name):
123  root_files = [file_name]
124  else:
125  root_files = []
126  else:
127  root_files = glob.glob("%s/*.root"%output_dir)
128  if verbose:
129  print "tools: Found %i files"%len(root_files)
130  for root_file in root_files:
131  print "tools: - %s"%root_file
132  return root_files
133 ###########################
134 # open all histograms
135 def openPlots(root_file_name, skip_2d=False, directories=False, verbose=False):
136  from ROOT import TFile
137  master = {}
138  if verbose: print "tools: Opening: %s"%root_file_name
139  open_root_file = TFile.Open( root_file_name )
140  assert open_root_file.IsOpen()
141  if directories:
142  folder_list = open_root_file.GetListOfKeys()
143  folder_list.sort()
144  if verbose: print "tools: Found %i keys"%len(folder_list)
145  for folder in folder_list:
146  folder_name = folder.GetName()
147  directory = open_root_file.GetDirectory(folder_name)
148  histogram_list = directory.GetListOfKeys()
149  histogram_list.sort()
150  for histogram in histogram_list:
151  name = histogram.GetName()
152  master["%s_%s"%(folder_name,name)] = directory.Get(name)
153  assert master["%s_%s"%(folder_name,name)], "tools: Failure opening file: %s, histogram: %s/%s"%(root_name,folder_name,name)
154  else:
155  histogram_list = open_root_file.GetListOfKeys()
156  histogram_list.sort()
157  print "run : Found %i keys"%len(histogram_list)
158  for histogram in histogram_list:
159  name = histogram.GetName()
160  if (skip_2d and ("_vs_" in name)): continue
161  master[name] = open_root_file.Get(name)
162  assert master[name], "tools: Failure opening file: %s, histogram: %s"%(root_name,name)
163  return master, open_root_file
164 ###########################
165 # check if a histogram name follows the logical naming convention
166 def isLogicalHistogramName(name, strict=False):
167  import naming_schema
168  if "/" in name: name = name.split("/")[-1]
169  if ".png" in name: name = name[:-4]
170  # histograms should follow a logical naming convention:
171  # <category>/<observable>-<reconstruction level>-<truth level>
172  if len(name.split("-")) < 2: return False
173  if strict:
174  category = getHistogramCategory(name)
175  if category not in naming_schema.categories.keys(): return False
176 
177  observable = getHistogramObservable(name)
178  if observable not in naming_schema.observables.keys(): return False
179 
180  reco_level = getHistogramRecoLevel(name)
181  if reco_level not in naming_schema.reco_levels.keys(): return False
182 
183  true_level = getHistogramTrueLevel(name)
184  if true_level not in naming_schema.true_levels.keys(): return False
185 
186  return True
187 ###########################
188 # return the logical category of a histogram
190  if "/" in name: name = name.split("/")[-1]
191  if ".png" in name: name = name[:-4]
192  return name.split("-")[0]
193 ###########################
194 # return the logical observable of a histogram
196  if "/" in name: name = name.split("/")[-1]
197  if ".png" in name: name = name[:-4]
198  return name.split("-")[1]
199 ###########################
200 # return the logical reco-level of a histogram
202  import naming_schema
203  if "/" in name: name = name.split("/")[-1]
204  if ".png" in name: name = name[:-4]
205  name = name.split("-")
206  if len(name) == 2:
207  # no reco or truth level
208  return ""
209  if len(name) == 3:
210  # could be reco or truth, disambiguate
211  if name[2] in naming_schema.true_levels.keys(): return ""
212  if not (name[2] in naming_schema.reco_levels.keys()):
213  fail("tools: unknown reco level found:%s (from: %s)"%(name[2],name))
214  return name[2]
215  if len(name) == 4:
216  # both reco and true, so return reco
217  if not (name[2] in naming_schema.reco_levels.keys()):
218  fail("tools: unknown reco level found: %s (from: %s)"%(name[2],name))
219  assert False
220  return name[2]
221  return ""
222 ###########################
223 # return the logical true-level of a histogram
225  import naming_schema
226  if "/" in name: name = name.split("/")[-1]
227  if ".png" in name: name = name[:-4]
228  name = name.split("-")
229  if len(name) == 2:
230  # no reco or truth level
231  return ""
232  if len(name) == 3:
233  # could be reco or truth, disambiguate
234  if name[2] in naming_schema.reco_levels.keys(): return ""
235  if not (name[2] in naming_schema.true_levels.keys()):
236  fail("tools: unknown true level found: %s (from: %s)"%(name[2],name))
237  return name[2]
238  if len(name) == 4:
239  # both reco and true, so return true
240  if not (name[3] in naming_schema.true_levels.keys()):
241  fail("tools: unknown true level found: %s (from: %s)"%(name[3],name))
242  return name[3]
243  return ""
244 ###########################
245 # make a directory
246 def mkdir(directory, verbose=False):
247  import os
248  if verbose: print "tools: mkdir: %s"%directory
249  if (not os.path.exists(directory)):
250  os.system("mkdir -p %s"%directory)
251 ###########################
252 # Read a bunch of statistics from an array of plots
253 def getPlotStats(plots, area_norm=True, verbose=True):
254  from PlotStatistics import PlotStatistics
255  from ROOT import TH1F, TH1D, TGraph
256  stats = PlotStatistics()
257  if (type(plots[0]) in [TH1F, TH1D]):
258  if area_norm:
259  stats.normalisations = [("area","area_norm_"),("none","")]
260  else:
261  stats.normalisations = [("none","")]
262  else:
263  stats.normalisations = []
264  for i,plot in enumerate(plots):
265  if type(plot) in [TGraph]:
266  stats.entries.append(-9)
267  stats.sum_of_weights.append(-9)
268  else:
269  stats.entries .append(plot.GetEntries())
270  stats.sum_of_weights.append(plot.GetSumOfWeights())
271  if type(plot) in [TH1F,TH1D]:
272  stats.means .append(plot.GetMean())
273  stats.medians .append(getMedian(plot))
274  stats.modes .append(plot.GetXaxis().GetBinCenter(plot.GetMaximumBin()))
275  if i == 0:
276  stats.mean_diffs.append(0)
277  else:
278  try:
279  mean_diff = ((plot.GetMean() - stats.means[0]) / stats.means[0]) * 100.
280  except:
281  mean_diff = 0.
282  stats.mean_diffs.append(mean_diff)
283  else:
284  stats.means .append(-9)
285  stats.medians .append(-9)
286  stats.modes .append(-9)
287  stats.mean_diffs.append(-9)
288  return stats
289 ###########################
290 # function for getting the median of a histogram
291 def getMedian(histogram):
292  quantiles = getQuantiles(histogram,2)
293  return quantiles[0]
294 ###########################
295 # function for dividing histograms into quantiles
296 def getQuantiles(histogram,n_bins):
297  from array import array
298  quantiles = array("d",[(a+1.)/n_bins for a in range(n_bins)])
299  locations = array("d",[0 for a in range(n_bins)])
300  histogram.GetQuantiles(n_bins,locations,quantiles)
301  return locations
302 ###########################
303 # convert a directory to a URL
304 def directoryToURL(directory):
305  return directory.replace("/nusoft/app/web/htdoc/","http://nusoft.fnal.gov/")
306 ###########################
307 # find the maximum and minimum of a list of histograms
308 def findMinMax(histograms,ignore_zero=False,include_errors=True,verbose=False):
309  this_max = 0
310  this_min = 10e15
311  for i,d in enumerate(histograms):
312  name = d.GetName()
313  start = d.GetXaxis().GetFirst()
314  for bin in range(start, d.GetXaxis().GetLast()+1):
315  entries = d.GetBinContent(bin)
316  error = 0
317  if include_errors: error = d.GetBinError(bin)
318 
319  if (ignore_zero and entries ==0): continue
320  if ((entries) and ((entries-error) < this_min)):
321  this_min = entries-error
322  if verbose: print 'tools: new minium minimum %.2f, from %s, bin[%i]'%(this_min, name, bin)
323  if ((entries) and ((entries+error) > this_max)):
324  this_max = entries+error
325  if verbose: print 'tools: new maximum maximum %.2f, from %s, bin[%i]'%(this_max, name, bin)
326  if ignore_zero and this_min == 0:
327  this_min = 0.01
328 
329  if this_min == 10e15:
330  # print "no no-zero minimum found. Using zero instead, although these plots are probably rubbish"
331  this_min = 0
332 
333  return this_min, this_max
334 ###########################
335 # plots can have different truth levels and different reco levels, group them here
336 def findPlotTypes(plots, verbose=True):
337  plots.sort()
338  plot_types = []
339  reco_levels = []
340  true_levels = []
341  for plot in plots:
342  category = getHistogramCategory(plot)
343  observable = getHistogramObservable(plot)
344  plot_type = "%s-%s"%(category,observable)
345  if (plot_type not in plot_types): plot_types.append(plot_type)
346  reco_level = getHistogramRecoLevel(plot)
347  if (reco_level not in reco_levels) and (category not in ["meta"]): reco_levels.append(reco_level)
348  true_level = getHistogramTrueLevel(plot)
349  if (true_level not in true_levels) and (category not in ["meta"]): true_levels.append(true_level)
350  if verbose:
351  print "view : found %i plot types, %i reco-levels and %i true-levels"%(len(plot_types),len(reco_levels),len(true_levels))
352  return plot_types,reco_levels,true_levels
353 ###########################
354 # check if these levels are the standard ones
356  import nameing_schema
357  standard = set(naming_schema.standard_truth_components)
358  return set(levels) == standard
359 ###########################
360 # get all of the different histogram observables
362  observables = []
363  for plot in plots:
364  observable = getHistogramObservable(plot)
365  if observable in observables: continue
366  observables.append(observable)
367  return observables
368 ###########################
369 # get all of the different histogram categories
371  plot_types = []
372  for plot in plots:
373  category = getHistogramCategory(plot)
374  observable = getHistogramObservable(plot)
375  plot_type = "%s-%s"%(category,observable)
376  if (plot_type not in plot_types): plot_types.append(plot_type)
377  return plot_types
378 ###########################
379 # Find the maximum skew of a list of histograms
380 def findMaxSkew(plots,verbose=True):
381  if verbose:
382  print "vtool: --- find Max Skew"
383 
384  max_skew = plots[0].GetSkewness()
385  for i,plot in enumerate(plots):
386  this_skew = plot.GetSkewness()
387  if abs(this_skew) > abs(max_skew):
388  max_skew = this_skew
389  if verbose:
390  print "vtool: [%i] %s: %f"%(i,plot.GetName(),this_skew)
391 
392  if verbose: print "vtool: Max: %f"%max_skew
393  return max_skew
394 ###########################
395 # Find the maximum asymmetry of a list of histograms
396 def findMaxAsymmetry(plots,verbose=False):
397  if verbose:
398  print "vtool: --- find Max Asymmetry"
399 
400  max_asym = findAsymmetry(plots[0])
401  for i,plot in enumerate(plots):
402  this_asym = findAsymmetry(plot)
403  if abs(this_asym) > abs(max_asym):
404  max_asym = this_asym
405  if verbose:
406  print "vtool: [%i] %s: %f"%(i,plot.GetName(),this_asym)
407 
408  if verbose: print "vtool: Max: %f"%max_asym
409  return max_asym
410 ###########################
411 # Find the asymmetry of histogram
412 def findAsymmetry(plot):
413  n_bins = plot.GetNbinsX()
414  low_int = plot.Integral(0, int(n_bins/2))
415  high_int = plot.Integral(int(n_bins/2), n_bins)
416  try:
417  asym = (high_int - low_int) / (low_int + high_int)
418  except ZeroDivisionError:
419  asym = 0.
420  return asym
421 ###########################
422 # Sort a list of plots
423 def sortPlotList(plots):
424  import naming_schema
425  plots.sort(key=lambda plot: "%s-%s"%(getHistogramCategory(plot),naming_schema.observableName(getHistogramObservable(plot))))
426  return plots
427 ###########################
428 # check if a validation matches a logical pattern
429 def filterValidation(filter_pattern, name, version):
430  tokens = filter_pattern.split(":")
431  if len(tokens) == 1: tokens.append("*")
432  assert len(tokens) == 2, "Filter list doesn't match expected pattern (*:*): %s"%filter_pattern
433 
434  if (tokens[0] != "*") and (tokens[0] != name): return False
435  if (tokens[1] != "*") and (tokens[1] != version): return False
436 
437  return True
438 
439 
def getJobID(log)
Get jobsub client job id from submission log.
Definition: common_tools.py:22
def getHistogramObservable(name)
return the logical observable of a histogram
def getHistogramRecoLevel(name)
return the logical reco-level of a histogram
def isStandardTrueLevels(levels)
check if these levels are the standard ones
def getHistogramCategory(name)
return the logical category of a histogram
def findMaxAsymmetry(plots, verbose=False)
Find the maximum asymmetry of a list of histograms.
void abs(TH1 *hist)
def getSAMURL(log)
Get SAM URL from submission log.
Definition: common_tools.py:33
def findRootOutput(component, verbose=False, testing=False)
find the root file output of a component
def getQuantiles(histogram, n_bins)
function for dividing histograms into quantiles
::xsd::cxx::tree::type type
Definition: Database.h:110
def filterValidation(filter_pattern, name, version)
check if a validation matches a logical pattern
def findMaxSkew(plots, verbose=True)
Find the maximum skew of a list of histograms.
def sortPlotList(plots)
Sort a list of plots.
def findAsymmetry(plot)
Find the asymmetry of histogram.
def getHistogramCategoriesAndObservables(plots)
get all of the different histogram categories
def findPlotTypes(plots, verbose=True)
plots can have different truth levels and different reco levels, group them here
def directoryToURL(directory)
convert a directory to a URL
def fail(msg)
print a failure message, from: https://cdcvs.fnal.gov/redmine/projects/novaart/repository/entry/trunk...
Definition: common_tools.py:7
def openPlots(root_file_name, skip_2d=False, directories=False, verbose=False)
open all histograms
def warn(msg)
print a warning message, from: https://cdcvs.fnal.gov/redmine/projects/novaart/repository/entry/trunk...
Definition: common_tools.py:16
def getHistogramObservables(plots)
get all of the different histogram observables
def fetchLogs(job_id, run_datetime=False, min_caching_age_days=10, verbose=False)
fetch log files for a jobsub job
Definition: common_tools.py:63
def mkdir(directory, verbose=False)
make a directory
def getHistogramTrueLevel(name)
return the logical true-level of a histogram
TDirectory * dir
Definition: macro.C:5
def getPlotStats(plots, area_norm=True, verbose=True)
Read a bunch of statistics from an array of plots.
exit(0)
def fileModificationDatetime(filename)
get creation time of a file from: http://stackoverflow.com/questions/237079/how-to-get-file-creation-...
Definition: common_tools.py:56
def isLogicalHistogramName(name, strict=False)
check if a histogram name follows the logical naming convention
def findMinMax(histograms, ignore_zero=False, include_errors=True, verbose=False)
find the maximum and minimum of a list of histograms
def getMedian(histogram)
function for getting the median of a histogram
def getStationMonitor(log)
Get SAM station monitor from submission log.
Definition: common_tools.py:44
def observableName(name)
get a histogram observable name