Organizational.py
Go to the documentation of this file.
1 """
2  models/Organizational.py:
3  Models related to plot organization used in the validation.
4 
5  Original author: J. Wolcott <jwolcott@fnal.gov>
6  Date: September 2016
7 """
8 
9 import collections
10 import cPickle
11 import os.path
12 import pprint
13 import re
14 import yaml
15 
17  """ Central configuration for Validation run. """
18 
19  MANDATORY = [
20  "histogram_topdirs",
21  "output_dir",
22  "validation_name",
23  ]
24 
25  DEFAULTS = {
26  "plot_accept_filters": [], # accept only plots with names starting with any of these strings
27  "plot_reject_filters": [], # reject plots with names starting with any of these strings
28  "static_files_url": None,
29  "make_dirs": True,
30  "ratio_denom": None, # in multi-dataset comparisons, which dataset is the denominator?
31  "plot_order": [], # organize plots in this order (applies both to lowest level categories in single-sample plots and datasets in multi-dataset comparisons; any unrecognized name is ignored)
32  "force_2d_comp": False, # by default 2D plots won't be compared since the comparisons are usually useless. Set to True to re-enable
33  }
34 
35  @staticmethod
36  def FromYAML(yaml_fname):
37  return Configuration(**yaml.load(open(yaml_fname)))
38 
39  def __init__(self, **kwargs):
40  # dispatch special args for treatment. otherwise just set
41  for kw, val in kwargs.iteritems():
42  kw = kw.lower()
43  if kw == "folders":
44  self.folders = Configuration._ParseFolders(val)
45  elif kw in Configuration.MANDATORY or kw in Configuration.DEFAULTS:
46  setattr(self, kw, val)
47  else:
48  raise KeyError("Unknown configuration parameter '%s' with value: %s" % (kw, val))
49 
50  # set defaults...
51  for kw, val in Configuration.DEFAULTS.iteritems():
52  if not hasattr(self, kw):
53  setattr(self, kw, val)
54 
55  self._Validate()
56 
57  self.validation_dir = os.path.join(self.output_dir, self.validation_name)
58 
59  @staticmethod
60  def _ParseFolders(folder_args):
61  return [Folder.FromDict(folder_config) for folder_config in folder_args]
62 
63 
64  def _Validate(self):
65  """ Ensure mandatory attributes were specified, consistency between arguments, etc. """
66 
67  missing_attrs = [k for k in Configuration.MANDATORY if not hasattr(self, k)]
68  if len(missing_attrs) > 0:
69  raise ValueError("Configuration attributes %s are all mandatory (missing: %s)" % (Configuration.MANDATORY, missing_attrs))
70 
71  if len(self.plot_accept_filters) > 0 and len(self.plot_reject_filters) > 0:
72  raise ValueError("Can't specify both 'plot_accept_filters' and 'plot_reject_filters' in Configuration")
73 
74 class PlotID(object):
75  """ Wrapper around plot identifier. Simplifies access to 'group' interface.
76 
77  ID string as input should have the following format:
78 
79  name_string<group1_string><group2_string>[possibly more <group_string>s]
80 
81  where the <group_string>s can either be formatted as
82 
83  {category_name}
84 
85  or
86 
87  {group=group name,cat=category name}
88 
89  Valid example:
90  sample_plot{nocuts}{group=MC,cat=DIS}
91 
92  Note that if you specify a category using the category-only syntax ('{category_name}'),
93  you must ensure it always occurs in the same sequence in any plots that are intended
94  to be grouped together (i.e., have the same name_string).
95  """
96 
97  CATEGORY_PATTERN = re.compile(r"(\{group=(?P<group>.+?),cat=(?P<group_cat>.+?)\}|\{(?P<cat>.+?)\})")
98  OVERLAY_STRING = "all_overlay"
99  SUM_STRING = "all_sum"
100 
101  def __init__(self, init):
102  # passed a path string
103  if isinstance(init, basestring):
104  self.id_str = init
105  # passed another PlotID instance -- just copy
106  elif hasattr(init, "id_str"):
107  self.id_str = init.id_str
108  # passed a dictionary with the name, categories, and possibly groups specified
109  elif hasattr(init, "__contains__") and all(prop in init for prop in ("name", "categories")):
110  id_str = init["name"]
111  groups_by_idx = {} if "groups" not in init else dict(zip(init["groups"].itervalues(), init["groups"].iterkeys()))
112  for idx, cat in enumerate(init["categories"]):
113  if idx in groups_by_idx:
114  id_str += "{group=%s,cat=%s}" % (groups_by_idx[idx], cat)
115  else:
116  id_str += "{%s}" % cat
117  self.id_str = id_str
118 
119  # categories, groups will now be re-deduced from the string
120  # check that we got the same thing we began with...
121  assert self.categories == init["categories"] and \
122  ( ("groups" not in init and len(self.groups) == 0) or \
123  (init["groups"] == self.groups) ), \
124  "Didn't correctly reconstruct groups and categories for PlotID. Given: %s, %s; reconstructed: %s, %s" \
125  % (init["groups"], init["categories"], self.groups, self.categories)
126 
127  @property
128  def id_str(self):
129  return self._id_str
130 
131  @id_str.setter
132  def id_str(self, val):
133  self._id_str = val
134  self._Parse()
135 
136  def __contains__(self, other):
137  """ This ID 'contains' another if:
138  * they share the same 'name'
139  * this ID has at least as long a category list
140  * the categories match up to the end of the shorter list
141  * the group lists contain no incompatible category links. """
142 
143  if not all( hasattr(other, prop) for prop in ("name", "categories", "groups") ):
144  return False
145 
146  if self.name != other.name:
147  return False
148 
149  if len(self.categories) > len(other.categories):
150  return False
151 
152  min_length = min(len(self.categories), len(other.categories))
153  if self.categories[:min_length] != other.categories[:min_length]:
154  return False
155 
156  # make sure the category indices point to the same group names,
157  # where they overlap...
158  for key in set(self.groups) & set(other.groups):
159  if self.groups[key] != other.groups[key]:
160  return False
161 
162  # also make sure that the groups always point to the same
163  # category indices as well
164  my_reverse_map = {v:k for k, v in self.groups.iteritems()}
165  other_reverse_map = {v:k for k, v in other.groups.iteritems()}
166  for key in set(my_reverse_map) & set(other_reverse_map):
167  if my_reverse_map[key] != other_reverse_map[key]:
168  return False
169 
170  return True
171 
172 
173  def __eq__(self, other):
174  return hasattr(other, "id_str") and self.id_str == other.id_str
175 
176  def __repr__(self):
177  return "'%s'" % self.id_str
178 
179  def _Parse(self):
180  """ Try to understand the ID string. """
181 
182  # sequential list of categories buried in the ID
183  self.categories = []
184 
185  # the indices of named groups in the category list above
186  # (so, e.g., the category within group "MC_breakdown" might
187  # correspond to index number 2 in the list:
188  # groups["MC_breakdown"] = 2)
189  self.groups = {}
190 
191  splitted = self.id_str.split("{", 1)
192  self.name = splitted[0]
193  if len(splitted) == 1:
194  return
195 
196  remnants = "{" + splitted[1] # the initial "{" got eaten by the split... gotta put it back
197  for match in PlotID.CATEGORY_PATTERN.finditer(remnants):
198  if all([match.group(k) for k in ("group","group_cat")]):
199  self.categories.append(match.group("group_cat"))
200  self.groups[match.group("group")] = len(self.categories) - 1
201  elif match.group("cat"):
202  self.categories.append(match.group("cat"))
203  else:
204  # can't be a match any other way...
205  raise ValueError("Something bizarre happened in parsing PlotID. Matched string pieces: %s" % match.groupdict())
206 
207  # strip off the matched part
208  remnants = remnants.replace(match.group(0), "", 1)
209 
210  if len(remnants) > 0:
211  raise ValueError("Couldn't parse plot name. Full string: '%s'; unprocessed leftover text: '%s'" % (self.id_str, remnants))
212 
214  def __init__(self, plot_id, data_set, path=""):
215  # funky initialization syntax required
216  # since __setattr__ overridden for immutability purposes below
217  plot_id = plot_id if isinstance(plot_id, PlotID) else PlotID(plot_id)
218  object.__setattr__(self, "plot_id", plot_id)
219  object.__setattr__(self, "path", path)
220  object.__setattr__(self, "data_set", data_set)
221 
222  def __hash__(self):
223  return hash((self.plot_id.id_str, self.data_set, self.path))
224 
225  def __contains__(self, other):
226  """ One Key 'contains' another if one of their PlotIDs contains the other
227  and their other attributes are the same. """
228 
229  if not all(hasattr(other, attr) for attr in ("plot_id", "data_set", "path")):
230  return False
231 
232  return self.plot_id in other.plot_id and \
233  all(getattr(self, prop) == getattr(other, prop) for prop in ("data_set", "path"))
234 
235  def __eq__(self, other):
236  if not all(hasattr(other, attr) for attr in ("plot_id", "data_set", "path")):
237  return False
238  return (self.plot_id, self.data_set, self.path) == (other.plot_id, other.data_set, other.path)
239 
240  # to cope with Python 2 bug that makes default __ne__() ~ 'not is' instead of '! __eq__()'
241  def __ne__(self, other):
242  return not (self == other)
243 
244  def __repr__(self):
245  return "%s|%s|%s" % (self.plot_id, self.data_set, self.path)
246 
247  # keys for dictionaries aren't allowed to change.
248  # this won't 100% guarantee it (some weird edge cases),
249  # but is enough to prevent accidents
250  def __delattr__(self, attr):
251  raise TypeError("%s is intended to be immutable" % self.__class__)
252  def __setattr__(self, attr, val):
253  raise TypeError("%s is intended to be immutable" % self.__class__)
254 
255 class PlotSet(collections.MutableSet):
256  """ Collection of unique plot keys. """
257  def __init__(self):
258  self._cache = set()
259 
260  # emulate set interface.
261  # defining these methods allows the collections.MutableSet mixin to deduce the rest
262  # (see https://docs.python.org/2/library/collections.html#collections.MutableSet)
263 
264  def __contains__(self, obj):
265  if not isinstance(obj, PlotCollectionKey):
266  raise TypeError("%s items must be of type %s, not %s" % (self.__class__.__name__, PlotCollectionKey, type(obj)))
267  return obj in self._cache
268 
269  def __iter__(self):
270  return iter(self._cache)
271 
272  def __len__(self):
273  return len(self._cache)
274 
275  def __repr__(self):
276  return repr(self._cache)
277 
278  def add(self, obj):
279  if not isinstance(obj, PlotCollectionKey):
280  raise TypeError("%s items must be of type %s, not %s" % (self.__class__.__name__, PlotCollectionKey, type(obj)))
281 
282  self._cache.add(obj)
283 
284  def discard(self, obj):
285  self._cache.discard(obj)
286 
287  ### addons
288 
290  """ Iterate over the collections of plots with unique names """
291 
292  work = list(self._cache) # make a copy
293 
294  while len(work) > 0:
295  collection = PlotSet()
296  plot_name = work[0].plot_id.name
297  collection.add([work.pop(0)])
298 
299  # iterate backwards because we're going to be removing stuff
300  # and we don't want the indices of unchecked stuff jumping under us
301  for plot_idx in range(len(work)-1, 0, -1):
302  if work[plot_idx].plot_id.name == plot_name:
303  collection.add(work.pop(plot_idx))
304 
305  yield collection
306 
307 class PlotLibrary(collections.MutableMapping):
308  """ Lookup collection for plots, keyed by PlotID """
309 
310  def __init__(self):
311  self._cache = {}
312 
313  def __repr__(self):
314  return repr(self._cache)
315 
316  # emulate container interface.
317  # defining these methods allows the collections.MutableMapping mixin to deduce the rest
318  # (see https://docs.python.org/2/library/collections.html#collections.MutableMapping)
319  def __contains__(self, key):
320  return key in self._cache
321 
322  def __delitem__(self, key):
323  del self._cache[key]
324 
325  def __getitem__(self, key):
326  if not isinstance(key, PlotCollectionKey):
327  raise TypeError("%s keys must be of type %s, not %s" % (self.__class__.__name__, PlotCollectionKey, type(key)))
328 
329  try:
330  return self._cache[key]
331  except KeyError:
332  # customize the error message -- don't expose the internal dict
333  raise KeyError("%s has no such plot: %s" % (self.__class__.__name__, key))
334 
335  def __iter__(self):
336  return iter(self._cache)
337 
338  def __len__(self):
339  return len(self._cache)
340 
341  def __setitem__(self, key, value):
342  if not isinstance(key, PlotCollectionKey):
343  raise TypeError("%s keys must be of type %s" % (self.__class__.__name__, PlotCollectionKey))
344 
345  self._cache[key] = value
346 
348  SERIALIZE_FILENAME = "comparison_registry.pkl"
349 
350  def __init__(self):
352 
353  def Serialize(self, base_path):
354  cPickle.dump(self, open(os.path.join(base_path, ComparisonRegistry.SERIALIZE_FILENAME), "w"))
355 
356  @staticmethod
357  def Deserialize(file_name):
358  obj = cPickle.load(open(file_name))
359  if not isinstance(obj, ComparisonRegistry):
360  raise TypeError("Object loaded from alleged ComparisonRegistry file is actually a %s" % type(obj))
361  return obj
362 
363 
364 
366  """ Collect summaries of plots drawn and where they were written in the same place """
367 
368  SERIALIZE_FILENAME = "plot_registry.pkl"
369 
370  def __init__(self):
373 
374  def __repr__(self):
375  return "PlotRegistry:\n by dataset: %s\n cross-dataset comparisons: %s" % (pprint.pformat(self.plots_by_dataset), pprint.pformat(self.dataset_comparisons))
376 
377  def AddPlot(self, plot_key, plot_summary):
378  assert isinstance(plot_key, PlotCollectionKey)
379 
380  self.plots_by_dataset.setdefault(plot_key.data_set, PlotLibrary())[plot_key] = plot_summary
381 
382  def AddComparisonPlot(self, plot_key, plot_summary):
383  self.dataset_comparisons[plot_key] = plot_summary
384 
385  def AddDatasetPlots(self, dataset, plot_summaries):
386  collection = self.plots_by_dataset.setdefault(dataset, PlotLibrary())
387  collection.update(plot_summaries)
388 
389  def Serialize(self, base_path):
390  cPickle.dump(self, open(os.path.join(base_path, PlotRegistry.SERIALIZE_FILENAME), "w"))
391 
392  @staticmethod
393  def Deserialize(file_name):
394  obj = cPickle.load(open(file_name))
395  if not isinstance(obj, PlotRegistry):
396  raise TypeError("Object loaded from alleged PlotRegistry file is actually a %s" % type(obj))
397  return obj
398 
399 class Folder(object):
400  """ Simple container for creating structure in plot collections. """
401 
402  ALLOWED_ATTRS = [
403  "name",
404  "plot_ids",
405  "subfolders",
406  ]
407 
408  @staticmethod
409  def FromDict(config_dict):
410  # JSON/YAML format (and thus Python dictionary) for a folder looks like:
411  # {
412  # "name": "folder1",
413  # "plot_ids": ["key1", "key2"],
414  #
415  # "subfolders": [
416  # {
417  # "name": "subfolder1",
418  # "plot_ids": [ "key1.1", "key1.2" ]
419  # },
420  #
421  # {
422  # "name": "subfolder2",
423  # "plot_ids": [ "key2.1", "key2.2" ]
424  # }
425  # ]
426  # }
427  assert "name" in config_dict, "Folder configuration must specify 'name'"
428 
429  for attr in config_dict:
430  assert attr in Folder.ALLOWED_ATTRS, "Unrecognized attribute in folder configuration: '%s'" % attr
431 
432  args = {attr: config_dict[attr] for attr in Folder.ALLOWED_ATTRS if attr in config_dict}
433 
434  return Folder(**args)
435 
436  def __init__(self, name, plot_ids=[], subfolders=[]):
437  self.name = name
438 
439  assert hasattr(plot_ids, "__iter__"), "Invalid configuration supplied for Folder plot entries: '%s'" % plot_ids
440  self.plot_entries = [PlotID(p) if not isinstance(p, PlotID) else p for p in plot_ids]
441 
442  assert hasattr(subfolders, "__iter__"), "Invalid configuration supplied for Folder subfolders: '%s'" % subfolders
443  self.subfolders = []
444  for subfolder in subfolders:
445  if isinstance(subfolder, Folder):
446  self.subfolders.append(subfolder)
447  elif hasattr(subfolder, "__contains__"):
448  self.subfolders.append(Folder.FromDict(subfolder))
449 
450 
def AddDatasetPlots(self, dataset, plot_summaries)
def __init__(self, name, plot_ids=[], subfolders=[])
Definition: novas.h:112
procfile open("FD_BRL_v0.txt")
static float min(const float a, const float b, const float c)
Definition: absgeo.cxx:45
def AddComparisonPlot(self, plot_key, plot_summary)
char name[SIZE_OF_OBJ_NAME]
Definition: novas.h:116
def AddPlot(self, plot_key, plot_summary)
def __init__(self, plot_id, data_set, path="")