MakeCanvases.py
Go to the documentation of this file.
1 """
2  MakeCanvases.py:
3  Build canvases used in validation web pages.
4 
5  Original author: J. Wolcott <jwolcott@fnal.gov>
6  Date: September 2016
7 """
8 
9 import argparse
10 import copy
11 import itertools
12 import os.path
13 import sys
14 
15 import ROOT
16 
17 from controllers.ControllerBase import ControllerBase
18 from models.Organizational import PlotCollectionKey
19 from models.Organizational import ComparisonRegistry
20 from models.Organizational import Configuration
21 from models.Organizational import PlotID
22 from models.Organizational import PlotLibrary
23 from models.Organizational import PlotRegistry
24 from models.Organizational import PlotSet
25 from models.PlotInfo import CanvasSummary
26 from models.PlotInfo import ComparisonSummary
27 from models.PlotInfo import Exposure
28 from tools.ImageMaker import ImageMaker
29 from tools import PathTools
30 
31 OUTPUT_TYPES = [
32 # "svg",
33 # "json",
34  "root",
35 ]
36 
37 if "json" in OUTPUT_TYPES:
38  try:
39  ROOT.TBufferJSON
40  except AttributeError:
41  OUTPUT_TYPES.remove("json")
42 
43 GOOD_COLORS = [
44  ROOT.kRed,
45  ROOT.kBlue,
46  ROOT.kGreen+3,
47  ROOT.kBlack,
48 
49  ROOT.kOrange+2, # brown
50  ROOT.kMagenta+1,
51  ROOT.kSpring-1,
52  ROOT.kGray+2,
53 
54  ROOT.kRed-6,
55  ROOT.kCyan+1,
56  ROOT.kYellow+2,
57  ROOT.kTeal-7,
58 
59 # ROOT.kViolet-4, # looks too much like kMagenta+1
60 # ROOT.kRed+3, # looks sorta black
61 # ROOT.kViolet+7, # looks too much like kBlue
62 # ROOT.kAzure+7, # looks too much like kViolet-4
63 # ROOT.kGreen, # too light
64 ]
65 
67  """ Syntactic sugar object to make the setting of a TObject's directory to None easier.
68  If NO_DIR is a NoDirectory instance, write
69  NO_DIR >> obj
70  to get back the object with its Directory set to None.
71  """
72  def __rshift__(self, obj):
73  if not hasattr(obj, "SetDirectory"):
74  return obj
75 
76  obj.SetDirectory(None)
77  return obj
78 NO_DIR = NoDirectory()
79 
80 class Warner(object):
81  """ Syntactic sugar object to make writing a single instance of a warning to stderr
82  (no matter how many times it's called) easier.
83  """
84  def __init__(self):
85  self._warned = set()
86 
87  def __call__(self, warning):
88  if warning in self._warned:
89  return
90 
91  self._warned.add(warning)
92  print >> sys.stderr, "\n\n\033[31m\033[1mWarning:\033[0m", warning, "\n"
93 
94 SINGLE_WARN = Warner()
95 
97  def __init__(self, accept_filters=[], reject_filters=[]):
98  self.accept_filters = accept_filters
99  self.reject_filters = reject_filters
100 
101  def KeepHist(self, name):
102  if self.accept_filters:
103  return any(name.startswith(flt) for flt in self.accept_filters)
104 
105  return not any(name.startswith(flt) for flt in self.reject_filters)
106 
107 
108  def ExtractHistsFromDirectory(self, tdirectory, base_path=""):
109  for tkey in tdirectory.GetListOfKeys():
110  obj = tkey.ReadObj()
111  name = obj.GetName()
112 
113  if isinstance(obj, ROOT.TH1) and self.KeepHist(name):
114  splitted = name.rsplit("/", 1)
115  if len(splitted) > 1:
116  path, name = splitted
117  obj.SetName(name)
118  else:
119  path = ""
120  obj.exposure = None
121  yield base_path + ("/" if base_path and path else "") + path, (NO_DIR >> obj)
122 
123  # these were CAFAna Spectra, once
124  elif isinstance(obj, ROOT.TDirectory) and \
125  obj.Get("type") and \
126  obj.Get("type").GetName() == "Spectrum" and \
127  self.KeepHist(obj.GetName()):
128  h = obj.Get("hist")
129  args = {
130  "POT": obj.Get("pot"),
131  "livetime": obj.Get("livetime"),
132  }
133  for exp_type, hist in args.items():
134  args[exp_type] = None if hist.GetBinContent(1) == 0 else hist.GetBinContent(1)
135  h.exposure = Exposure(**args)
136  h.SetName(obj.GetName())
137 
138  # labels for new-style CAFs are stored separately
139  for label_idx, axis_name in enumerate("XYZ"):
140  title = obj.Get("label%d" % label_idx)
141  if title:
142  getattr(h, "Get%saxis" % axis_name)().SetTitle(str(title))
143 
144  yield base_path, (NO_DIR >> h)
145 
146  elif isinstance(obj, ROOT.TDirectory):
147  path = name if not base_path else "/".join((base_path, name))
148  for path, h in self.ExtractHistsFromDirectory(obj, base_path=path):
149  yield path, (NO_DIR >> h)
150 
151 class CanvasMaker(ControllerBase):
153  """ Helper type to do comparisons to a reference histogram.
154  Used in the display variations within CanvasMaker. """
155 
156  def __init__(self, comparison_function, ignore_ref=False):
157  self.comp_fn = comparison_function
158  self.ignore_ref = ignore_ref # basically a hack so that area normalization works (where you don't want the ref). better solutions welcome
159 
160  def __call__(self, reference, subject):
161  if not self.ignore_ref:
162  ref_val = self.comp_fn(reference)
163  sub_val = self.comp_fn(subject)
164 
165  try:
166 # print self.comp_fn
167 # print reference, ref_val
168 # print subject, sub_val
169  return ref_val/sub_val if not self.ignore_ref else sub_val
170  except:
171  return None
172 
173  # variations in the arguments to the ImageMaker
174  DISPLAY_VARIATIONS = {
175  "Log y": {
176  "enabled": { "log_y": True },
177  "disabled": { "log_y": False }
178  },
179 
180  "Normalization": {
181  # use the _ReferenceHistComp for 'raw' as well
182  # so that a normalization factor is generated for each histogram
183  # (which is what the draw routine expects)
184  "raw": { "norm_factors": _ReferenceHistComp(lambda h: None) },
185  "area": {
186  "norm_factors": _ReferenceHistComp(lambda h: 100./h.GetSumOfWeights() if h.GetSumOfWeights() > 0 else 1, ignore_ref=True),
187  "axis_labels": ";Percent of entries"
188  },
189  "exposure": {
190  "norm_factors": _ReferenceHistComp(lambda h: h.exposure.POT or h.exposure.livetime if hasattr(h, "exposure") and h.exposure else None)
191  },
192 
193  },
194  }
195  DISPLAY_VARIATIONS_NOJSROOT = ["Log y",]
196 
197  def __init__(self, config):
198  super(CanvasMaker, self).__init__(config)
199 
201 
202  self.histogram_table = None
203  self.histogram_extractor = HistogramExtractor(accept_filters=self.config.plot_accept_filters,
204  reject_filters=self.config.plot_reject_filters)
205 
206  def LoadHistograms(self):
207  """ Sift through all the files in the configuration and discover histograms. """
208 
209  # only do once
210  if self.histogram_table is not None:
211  return
212 
213  self.histogram_table = PlotLibrary()
214 
215  # todo: would like to ensure that only input files that are newer than any pre-existing registry are remade.
216  # however, want everything to be in the comparisons. hmm.
217 
218  for dataset_name, file_list in self.config.histogram_topdirs.iteritems():
219  # maybe the "file list" is just one file
220  if isinstance(file_list, basestring):
221  file_list = [file_list,]
222 
223  for file_name in file_list:
224  assert file_name.lower().endswith(".root"), "Can't open non-ROOT file input '%s'" % file_name
225 
226  f = ROOT.TFile(file_name)
227  assert f, "Can't open ROOT file '%s'" % file_name
228 
229  for path, histogram in self.histogram_extractor.ExtractHistsFromDirectory(f):
230  if isinstance(histogram, (ROOT.TH3,)):
231  print >> sys.stderr, "Don't know how to handle 3D histograms. Skipping histogram:", histogram.GetName()
232  continue
233  self.histogram_table[PlotCollectionKey(PlotID(histogram.GetName()), dataset_name, path)] = histogram
234 
235  def MakeCanvases(self):
236  """ Master method for canvas making. """
237 
238  self.LoadHistograms()
239 
240  self.im = ImageMaker()
241 
242  self.plot_registry = PlotRegistry()
243  self.comparison_registry = ComparisonRegistry()
244 
245  # supplemental collections that will be used for aggregations
246  overlays = PlotLibrary()
247  sums = PlotLibrary()
248 
249  by_name_then_dataset = PlotLibrary()
250 
251  # make canvases for each histogram (or set of histograms, if there are categories)
252  for key, histogram in self.histogram_table.iteritems():
253  if histogram.GetTitle() == "":
254  histogram.SetTitle(key.plot_id.name)
255 
256  # key with name and path but no category
257  no_category_key = PlotCollectionKey(
258  plot_id={
259  "name": key.plot_id.name,
260  "categories": [],
261  },
262  data_set=None,
263  path=key.path
264  )
265  by_name_then_dataset.setdefault(no_category_key, {}).setdefault(key.data_set, PlotSet()).add(key)
266 
267  # couple extra things to do for category breakdowns...
268  # (note that breakouts & sums are only made for the last category
269  # to keep the combinatorics under control)
270  if len(key.plot_id.categories) > 0:
271  # slice off the last 'category group' to make a collection of all the plots in the same category.
272  # use the magic string in PlotID to identify it
273  categories = key.plot_id.categories[:-1]
274  categories.append(PlotID.OVERLAY_STRING)
275  plotid_args = {
276  "name": key.plot_id.name,
277  "categories": categories,
278  "groups": key.plot_id.groups
279  }
280  plotid_args["categories"] = categories
281  overlay_key = PlotCollectionKey( PlotID(plotid_args), key.data_set, key.path )
282  overlays.setdefault(overlay_key, {})[key.plot_id.categories[-1]] = histogram
283 
284  # make a 'sum of all categories version'
285  categories[-1] = PlotID.SUM_STRING
286  plotid_args["categories"] = categories
287  sum_key = PlotCollectionKey( PlotID(plotid_args), key.data_set, key.path )
288  if sum_key not in sums:
289  sum_histogram = histogram.Clone( sum_key.plot_id.id_str )
290  sum_histogram.exposure = histogram.exposure
291  sum_histogram.SetTitle(histogram.GetTitle() or "Untitled plot")
292  sums[sum_key] = sum_histogram
293  overlays[overlay_key][PlotID.SUM_STRING] = sum_histogram
294  else:
295  sums[sum_key].Add(histogram)
296 
297  self.WriteAndRegisterCanvas({histogram.GetTitle(): histogram}, self.PrepareOutputPath(key.data_set, key.path, key.plot_id.id_str), key)
298 
299  # also write out the sums separately
300  for key, histogram in sums.iteritems():
302  {histogram.GetTitle(): histogram},
303  self.PrepareOutputPath(key.data_set, key.path, key.plot_id.id_str),
304  key
305  )
306  self.histogram_table[key] = histogram
307 
308 
309  # make overlay plots for plots with multiple categories
310  for key, histogram_collection in overlays.iteritems():
312  histogram_collection,
313  self.PrepareOutputPath(key.data_set, key.path, key.plot_id.id_str),
314  key,
315  plot_order=self.config.plot_order,
316  )
317 
318  # dataset comparisons.
319  # a bit tricky:
320  # we always assume that:
321  # * the 'deepest' category set fully partitions the available choices;
322  # * that categories 'higher' than the deepest one are interchangeable
323  # and should all be shown as options.
324  # so we have to be pretty careful to do the comparisons correctly
325  # if different datasets have different numbers of categories
326  # (frequently happens when comparing data
327  # to MC interaction type breakdowns in the same distribution)...
328  comparison_dirname = PathTools.ComparisonSubdirName(self.config.histogram_topdirs)
329  for no_category_key, plots_by_dataset in by_name_then_dataset.iteritems():
330  if len(plots_by_dataset) < 2:
331  continue
332 
333 
334  category_lists = {}
335  max_num_gps = 0
336  for dataset_name, plot_keys in plots_by_dataset.iteritems():
337  category_lists[dataset_name] = []
338  cl = category_lists[dataset_name]
339  for plot_key in plot_keys:
340  for cat_idx, cat_name in enumerate(plot_key.plot_id.categories):
341  if len(cl) < cat_idx + 1:
342  cl.append(set())
343  max_num_gps = max(max_num_gps, cat_idx+1)
344  cl[cat_idx].add(cat_name)
345 
346  identical_keys = True
347  for gp_idx in range(max_num_gps):
348  # if the datasets don't even have the same *number* of categories, clearly their keys are different
349  if len(set([len(category_lists[ds]) for ds in category_lists])) != 1:
350  identical_keys = False
351  break
352 
353  if len(set.union(*[category_lists[ds][gp_idx] for ds in category_lists])) != len(category_lists[category_lists.keys()[0]][gp_idx]):
354  identical_keys = False
355  break
356 
357  # now combine all the keys from the datasets together
358  category_lists = [ set.union(*[cat_list[cat_idx] for ds, cat_list in category_lists.iteritems() if cat_idx < len(cat_list)]) for cat_idx in range(max_num_gps)]
359 
360  # remember, we assume that the last category group
361  # fully partitions the space, so don't choose from
362  # among the deepest level.
363  # the exception here is if all datasets' plots have
364  # exactly the same category partitioning; in that case
365  # we should compare them category-by-category.
366  if len(category_lists) > 0 and not identical_keys:
367  del category_lists[-1]
368 
369  # go through the category options in sequence.
370  # whenever we find a sum, that's the last level
371  # for this dataset.
372  for category_combination in itertools.product(*category_lists):
373  to_plot = {}
374  for dataset_name in plots_by_dataset:
375  categories = []
376  i = -2 # want to try [], [SUM_STRING], [category 1][SUM_STRING], ...
377  matched = False
378  no_sum = False
379  while not matched:
380  reduced_groups = {k: v for k,v in plot_key.plot_id.groups.iteritems() if v < len(categories)}
381  reduced_key = PlotCollectionKey(
382  plot_id={
383  "name": plot_key.plot_id.name,
384  "categories": categories,
385  "groups": reduced_groups
386  },
387  data_set = dataset_name,
388  path=no_category_key.path
389  )
390 
391  if reduced_key in self.histogram_table:
392  # we found a sum, but we haven't got one of each
393  # category group, so we know the longest key is longer
394  # than this one. we therefore don't want the sum.
395  # instead, we want to go around once more
396  # without the SUM_STRING to get the
397  # single category that matches the category_combination.
398  if len(categories) <= len(category_combination) and len(categories) > 0 and categories[-1] == PlotID.SUM_STRING:
399  if len(categories) > 0:
400  del categories[-1] # strip off SUM_STRING
401  no_sum = True
402  else:
403  matched = True
404  to_plot[dataset_name] = reduced_key
405  break
406 
407  i += 1
408  if i >= len(category_combination):
409  break
410 
411  if no_sum:
412  categories.append(category_combination[i])
413  else:
414  if i == -1:
415  categories.append(PlotID.SUM_STRING)
416  else:
417  categories.insert(-1, category_combination[i])
418 
419 
420 
421  if not matched:
422  print "Could not find plot for category sequence", category_combination, "in plot / dataset =", no_category_key.plot_id, "/", dataset_name
423  continue
424 
425  # if any of the plots are 2D, we (usually) can't do a useful comparison.
426  # don't try unless forced to.
427  if any(isinstance(self.histogram_table[key], ROOT.TH2) for key in to_plot.itervalues()) and not self.config.force_2d_comp:
428  SINGLE_WARN("comparisons will not be made for 2D plots. Pass 'force_2d_comp' as true in your config to change.")
429  continue
430 
431  # find one key that contains all the others.
432  # that'll be the one we use to identify this comparison.
433  strictest_key = None
434  for key in to_plot.values():
435  stricter = all(key.plot_id in other_key.plot_id for other_key in to_plot.values() if other_key != key)
436  if stricter:
437  strictest_key = key
438  break
439  assert strictest_key, "Couldn't find one key that contains all the others in this collection. Keys in collection: %s" % to_plot.values()
440 
441  # remove the last category (the one with the SUM_STRING in it)
442  # (unless it doesn't have SUM_STRING in it because it's from the top level)
443  # fixme: should have a way to edit the PlotID in-place, but would require
444  # making 'PlotID.categories' and 'PlotID.groups' properties
445  # too. no time right now.
446  ##### todo: want to ALSO do the sum_string one if the matching keys
447  #### across the different datasets are all from the same level
448  categories = strictest_key.plot_id.categories
449  if len(strictest_key.plot_id.categories) > 0 and categories[-1] == PlotID.SUM_STRING:
450  categories = strictest_key.plot_id.categories[:-1]
451  comparison_key = PlotCollectionKey(
452  plot_id={
453  "name": strictest_key.plot_id.name,
454  "categories": categories,
455  "groups": {k: v for k,v in strictest_key.plot_id.groups.iteritems() if v < len(strictest_key.plot_id.categories)-1}
456  },
457  data_set=None,
458  path=no_category_key.path
459  )
460 
461  # finally, draw & register
462  plots = {ds: self.histogram_table[key] for ds, key in to_plot.iteritems()}
463  ratio_denom = None
464  if self.config.ratio_denom is not None and self.config.ratio_denom in plots:
465  ratio_denom = self.config.ratio_denom
467  histograms=plots,
468  outfile_stub=self.PrepareOutputPath(PathTools.DATASET_COMPARISON_DIRNAME, comparison_dirname, no_category_key.path, comparison_key.plot_id.id_str),
469  key=comparison_key,
470  include_ratio=True,
471  plot_order=self.config.plot_order,
472  ratio_denom=ratio_denom
473  )
474 
475  if len(to_plot) == 2:
476  c = ComparisonSummary({key: self.histogram_table[key] for key in to_plot.itervalues()})
477  self.comparison_registry.comparisons[comparison_key] = c
478 
479  self.plot_registry.Serialize(self.config.validation_dir)
480  self.comparison_registry.Serialize(self.config.validation_dir)
481 
482  def WriteAndRegisterCanvas(self, histograms, outfile_stub, key, include_ratio=False, plot_order=[], ratio_denom=None):
483  outdir = os.path.dirname(outfile_stub)
484  if not os.path.isdir(outdir):
485  os.makedirs(outdir)
486 
487  # use the specified order (if relevant).
488  # if any histograms not in the specified order, alphabetize them
489  # (reverse alphabetized, actually, so that they wind up
490  # in descending alphabetical order in the legend).
491  # also be sure to put all_sum at the top (drawn last)
492  histogram_keys = [p for p in plot_order if p in histograms]
493  histogram_keys += [p for p in reversed(sorted(histograms.keys())) if p not in plot_order]
494  if PlotID.SUM_STRING in histogram_keys:
495  histogram_keys.pop(histogram_keys.index(PlotID.SUM_STRING))
496  histogram_keys.append(PlotID.SUM_STRING)
497  histogram_objs = [histograms[p] for p in histogram_keys]
498  ref_idx = histogram_keys.index(ratio_denom) if ratio_denom in histogram_keys else 0
499  sample_hist = histogram_objs[ref_idx]
500 
501  # enumerate all the possible combinations
502  option_groups = CanvasMaker.DISPLAY_VARIATIONS.keys()
503  option_choices = copy.deepcopy(CanvasMaker.DISPLAY_VARIATIONS.values()) # don't want to run into aliasing issues
504 
505  # check the choices for _ReferenceHistComp objects,
506  # which signify we have some more work to do.
507  # for now, the 'reference' histogram is either the first one
508  # or the one specified by ref_key
509  for choice_list in option_choices:
510  for choice_opts in choice_list.itervalues():
511  for arg, val in choice_opts.items():
512  if isinstance(val, CanvasMaker._ReferenceHistComp):
513  choice_opts[arg] = [val(sample_hist, h) for h in histogram_objs]
514 
515  for option_choice_set in itertools.product(*option_choices):
516 
517  # special case: "exposure" normalization is dumb if we don't have exposure info
518  if option_choice_set[option_groups.index("Normalization")] == "exposure" and \
519  not any(hasattr(h, "exposure") and h.exposure for h in histogram_objs):
520  continue
521 
522  files_written = []
523  for suffix in OUTPUT_TYPES:
524  option_string = ""
525  option_args = {}
526  for option_idx, option_choice_name in enumerate(option_choice_set):
527  if suffix in ("json", "root") and option_groups[option_idx] in CanvasMaker.DISPLAY_VARIATIONS_NOJSROOT:
528  continue
529 
530  option_string += "{group=%s,cat=%s}" % (option_groups[option_idx], option_choice_name)
531  option_args.update(option_choices[option_idx][option_choice_name])
532 
533  args = {
534  "input_histograms": histogram_objs,
535  "ref_hist_idx": ref_idx,
536  "labels": histogram_keys,
537  "colours": GOOD_COLORS,
538  "axis_labels": "%s;%s" % (sample_hist.GetXaxis().GetTitle(), sample_hist.GetYaxis().GetTitle()),
539  "save_as": outfile_stub + option_string,
540  "suffixes": ["." + suffix,],
541  "with_ratio": include_ratio,
542  }
543 
544  # sigh. special-case because sometimes the options only want to update ONE of the axis labels
545  # (e.g. for normalization)
546  if "axis_labels" in option_args:
547  xaxis_pairs, yaxis_pairs = zip(*(args["axis_labels"].split(";") for args in (option_args, args)))
548  option_args["axis_labels"] = ";".join( [update or default for update, default in (xaxis_pairs, yaxis_pairs) ] )
549 
550  args.update(option_args)
551 
552  files_written += self.im.draw(**args)
553  self.n_images_written += len(files_written)
554  print "\rWrote %d canvases" % self.n_images_written,
555  sys.stdout.flush()
556 # for f in files_written:
557 # print " wrote canvas:", f
558 
559 
560  plot_id = PlotID(key.plot_id.id_str + option_string)
561  name = key.plot_id.id_str if len(histograms) > 1 else histogram_keys[0]
562  info = CanvasSummary(name=name, plots=histogram_objs, labels=histogram_keys)
563  registry_key = PlotCollectionKey(plot_id=plot_id, data_set=None if include_ratio else key.data_set, path=files_written[0])
564  if include_ratio:
565  self.plot_registry.AddComparisonPlot(registry_key, info)
566  else:
567  self.plot_registry.AddPlot(registry_key, info)
568 
569 # bootstrap, load config, go!
570 if __name__ == "__main__":
571  parser = argparse.ArgumentParser(description='Run validation.')
572  parser.add_argument('yaml_config', metavar='yaml_config',
573  help='JSON configuration as specified in README.txt')
574 
575  args = parser.parse_args()
576 
577  config = Configuration.FromYAML(args.yaml_config)
578 
579  maker = CanvasMaker(config)
580 
581  print "Building canvases..."
582  print " (base output directory: %s)" % config.validation_dir
583  maker.MakeCanvases()
584  print "... done. Bye."
void split(double tt, double *fr)
void Exposure()
Definition: getTimePeak.C:54
def __init__(self, accept_filters=[], reject_filters=[])
Definition: MakeCanvases.py:97
def WriteAndRegisterCanvas(self, histograms, outfile_stub, key, include_ratio=False, plot_order=[], ratio_denom=None)
gargamelle SetTitle("Gargamelle #nu_{e} CC data")
Definition: novas.h:112
std::string GetName(int i)
def __init__(self, comparison_function, ignore_ref=False)
static void Add(TH3D *h, const int bx, const int by, const int bz, const double w)
T max(sqlite3 *const db, std::string const &table_name, std::string const &column_name)
Definition: statistics.h:66
def ExtractHistsFromDirectory(self, tdirectory, base_path="")