generic.tools.HistogramTools Namespace Reference

## Functions

def getMedian (histogram)
function for getting the median of a histogram More...

def getQuantiles (histogram, n_bins)
function for dividing histograms into quantiles More...

def histRange (histograms, include_errors=True, allow_suppressed_zero=False, suppress_outliers=True, use_headroom=True)
find the maximum and minimum of a list of histograms More...

def findMinMax (histograms, ignore_zero=False, include_errors=True, ignore_outliers=False, verbose=False)
find the maximum and minimum of a list of histograms More...

def findMaxSkew (plots, verbose=True)
Find the maximum skew of a list of histograms. More...

def findMaxAsymmetry (plots, verbose=False)
Find the maximum asymmetry of a list of histograms. More...

def findAsymmetry (plot)
Find the asymmetry of histogram. More...

def getPlotStats (plots, area_norm=True, verbose=True)
Read a bunch of statistics from an array of plots. More...

def normaliseHistogram (histogram, area=1., verbose=False, preserve_errors=True)
normalise a histogram More...

def makeRatioHistograms (numerators, denominators, preserve_errors=True, sumw2=True, verbose=False)
divide lists of histograms More...

def getHistogramStatErrors (central, verbose=False)
get the statistical errors on a histogram More...

def setHistogramFractionalErrors (original, error, error_in_percent=False, verbose=False)
Set fractional errors on a histogram. More...

## Detailed Description

```  HistogramTools.py:
Models used in the validation.

Original author: J. Wolcott <jwolcott@fnal.gov>
extracted from tools by M. Tamsett
Date: September 2016
```

## Function Documentation

 def generic.tools.HistogramTools.findAsymmetry ( plot )

Find the asymmetry of histogram.

Definition at line 131 of file HistogramTools.py.

References makeTrainCVSamples.int.

Referenced by generic.tools.HistogramTools.findMaxAsymmetry().

131 def findAsymmetry(plot):
132  n_bins = plot.GetNbinsX()
133  low_int = plot.Integral(0, int(n_bins/2))
134  high_int = plot.Integral(int(n_bins/2), n_bins)
135  try:
136  asym = (high_int - low_int) / (low_int + high_int)
137  except ZeroDivisionError:
138  asym = 0.
139  return asym
140
def findAsymmetry(plot)
Find the asymmetry of histogram.
 def generic.tools.HistogramTools.findMaxAsymmetry ( plots, verbose = `False` )

Find the maximum asymmetry of a list of histograms.

Definition at line 114 of file HistogramTools.py.

References abs(), and generic.tools.HistogramTools.findAsymmetry().

114 def findMaxAsymmetry(plots,verbose=False):
115  if verbose:
116  print "vtool: --- find Max Asymmetry"
117
118  max_asym = findAsymmetry(plots[0])
119  for i,plot in enumerate(plots):
120  this_asym = findAsymmetry(plot)
121  if abs(this_asym) > abs(max_asym):
122  max_asym = this_asym
123  if verbose:
124  print "vtool: [%i] %s: %f"%(i,plot.GetName(),this_asym)
125
126  if verbose: print "vtool: Max: %f"%max_asym
127  return max_asym
128
void abs(TH1 *hist)
def findMaxAsymmetry(plots, verbose=False)
Find the maximum asymmetry of a list of histograms.
def findAsymmetry(plot)
Find the asymmetry of histogram.
 def generic.tools.HistogramTools.findMaxSkew ( plots, verbose = `True` )

Find the maximum skew of a list of histograms.

Definition at line 97 of file HistogramTools.py.

References abs().

97 def findMaxSkew(plots,verbose=True):
98  if verbose:
99  print "vtool: --- find Max Skew"
100
101  max_skew = plots[0].GetSkewness()
102  for i,plot in enumerate(plots):
103  this_skew = plot.GetSkewness()
104  if abs(this_skew) > abs(max_skew):
105  max_skew = this_skew
106  if verbose:
107  print "vtool: [%i] %s: %f"%(i,plot.GetName(),this_skew)
108
109  if verbose: print "vtool: Max: %f"%max_skew
110  return max_skew
111
void abs(TH1 *hist)
def findMaxSkew(plots, verbose=True)
Find the maximum skew of a list of histograms.
 def generic.tools.HistogramTools.findMinMax ( histograms, ignore_zero = `False`, include_errors = `True`, ignore_outliers = `False`, verbose = `False` )

find the maximum and minimum of a list of histograms

Definition at line 66 of file HistogramTools.py.

References PandAna.Demos.demo1.range.

66 def findMinMax(histograms, ignore_zero=False, include_errors=True, ignore_outliers=False, verbose=False):
67
68
69  this_max = 0
70  this_min = 10e15
71  for i,d in enumerate(histograms):
72  name = d.GetName()
73  start = d.GetXaxis().GetFirst()
74  for bin in range(start, d.GetXaxis().GetLast()+1):
75  entries = d.GetBinContent(bin)
76  error = 0
77  if include_errors: error = d.GetBinError(bin)
78
79  if (ignore_zero and entries ==0): continue
80  if ((entries) and ((entries-error) < this_min)):
81  this_min = entries-error
82  if verbose: print 'tools: new minium minimum %.2f, from %s, bin[%i]'%(this_min, name, bin)
83  if ((entries) and ((entries+error) > this_max)):
84  this_max = entries+error
85  if verbose: print 'tools: new maximum maximum %.2f, from %s, bin[%i]'%(this_max, name, bin)
86  if ignore_zero and this_min == 0:
87  this_min = 0.01
88
89  if this_min == 10e15:
90  # print "no no-zero minimum found. Using zero instead, although these plots are probably rubbish"
91  this_min = 0
92
93  return this_min, this_max
94
def findMinMax(histograms, ignore_zero=False, include_errors=True, ignore_outliers=False, verbose=False)
find the maximum and minimum of a list of histograms
 def generic.tools.HistogramTools.getHistogramStatErrors ( central, verbose = `False` )

get the statistical errors on a histogram

Definition at line 266 of file HistogramTools.py.

References PandAna.Demos.demo1.range.

Referenced by generic.tools.HistogramTools.makeRatioHistograms().

266 def getHistogramStatErrors(central,verbose=False):
267  if verbose: print "ho : == GetStatErrors"
268  if verbose: print "ho : central values: ",central
269  if type(central) == list:
270  return_histograms = []
271  for hist in central:
272  return_histogram = hist.Clone(hist.GetName()+"_maximum_deviation")
273  for i in range(hist.GetNbinsX()+1):
274  if hist.GetBinContent(i):
275  fractional_error = hist.GetBinError(i) / hist.GetBinContent(i)
276  else:
277  fractional_error = 0
278  return_histogram.SetBinContent(i,fractional_error)
279  return_histogram.SetBinError(i,0)
280  return_histograms.append(return_histogram)
281  return return_histograms
282  else:
283  return_histogram = central.Clone("maximum_deviation")
284  for i in range(central.GetNbinsX()+1):
285  if central.GetBinContent(i):
286  fractional_error = central.GetBinError(i) / central.GetBinContent(i)
287  else:
288  fractional_error = 0
289  return_histogram.SetBinContent(i,fractional_error)
290  return_histogram.SetBinError(i,0)
291
292  #assert False
293  return return_histogram
294
def getHistogramStatErrors(central, verbose=False)
get the statistical errors on a histogram
 def generic.tools.HistogramTools.getMedian ( histogram )

function for getting the median of a histogram

Definition at line 15 of file HistogramTools.py.

References generic.tools.HistogramTools.getQuantiles().

Referenced by generic.tools.HistogramTools.getPlotStats().

15 def getMedian(histogram):
16  quantiles = getQuantiles(histogram,2)
17  return quantiles[0]
18
def getQuantiles(histogram, n_bins)
function for dividing histograms into quantiles
def getMedian(histogram)
function for getting the median of a histogram
 def generic.tools.HistogramTools.getPlotStats ( plots, area_norm = `True`, verbose = `True` )

Read a bunch of statistics from an array of plots.

Definition at line 143 of file HistogramTools.py.

References append(), and generic.tools.HistogramTools.getMedian().

143 def getPlotStats(plots, area_norm=True, verbose=True):
144  from PlotStatistics import PlotStatistics
145  from ROOT import TH1F, TH1D, TGraph
146  stats = PlotStatistics()
147  if (type(plots[0]) in [TH1F, TH1D]):
148  if area_norm:
149  stats.normalisations = [("area","area_norm_"),("none","")]
150  else:
151  stats.normalisations = [("none","")]
152  else:
153  stats.normalisations = []
154  for i,plot in enumerate(plots):
155  if type(plot) in [TGraph]:
156  stats.entries.append(-9)
157  stats.sum_of_weights.append(-9)
158  else:
159  stats.entries .append(plot.GetEntries())
160  stats.sum_of_weights.append(plot.GetSumOfWeights())
161  if type(plot) in [TH1F,TH1D]:
162  stats.means .append(plot.GetMean())
163  stats.medians .append(getMedian(plot))
164  stats.modes .append(plot.GetXaxis().GetBinCenter(plot.GetMaximumBin()))
165  if i == 0:
166  stats.mean_diffs.append(0)
167  else:
168  try:
169  mean_diff = ((plot.GetMean() - stats.means[0]) / stats.means[0]) * 100.
170  except:
171  mean_diff = 0.
172  stats.mean_diffs.append(mean_diff)
173  else:
174  stats.means .append(-9)
175  stats.medians .append(-9)
176  stats.modes .append(-9)
177  stats.mean_diffs.append(-9)
178  return stats
179
void append()
Definition: append.C:24
def getPlotStats(plots, area_norm=True, verbose=True)
Read a bunch of statistics from an array of plots.
def getMedian(histogram)
function for getting the median of a histogram
 def generic.tools.HistogramTools.getQuantiles ( histogram, n_bins )

function for dividing histograms into quantiles

Definition at line 21 of file HistogramTools.py.

References PandAna.Demos.demo1.range.

Referenced by generic.tools.HistogramTools.getMedian().

21 def getQuantiles(histogram,n_bins):
22  from array import array
23  quantiles = array("d",[(a+1.)/n_bins for a in range(n_bins)])
24  locations = array("d",[0 for a in range(n_bins)])
25  histogram.GetQuantiles(n_bins,locations,quantiles)
26  return locations
27
def getQuantiles(histogram, n_bins)
function for dividing histograms into quantiles
 def generic.tools.HistogramTools.histRange ( histograms, include_errors = `True`, allow_suppressed_zero = `False`, suppress_outliers = `True`, use_headroom = `True` )

find the maximum and minimum of a list of histograms

Definition at line 30 of file HistogramTools.py.

References makeTrainCVSamples.int.

30 def histRange(histograms, include_errors=True, allow_suppressed_zero=False, suppress_outliers=True, use_headroom=True):
31  vals = []
32  for h in histograms:
33  bin_nums = [getattr(h, "Get%saxis" % axis)().GetNbins() for axis in ("X", "Y", "Z")]
34  for bin_nums in itertools.product(*(xrange(1, ub+1) for ub in bin_nums)):
35  bin_num = h.GetBin(*bin_nums)
36  if include_errors:
38  vals.append(h.GetBinContent(bin_num) + sign * h.GetBinError(bin_num))
39  else:
40  vals.append(h.GetBinContent(bin_num))
41
42  vals.sort()
43
44
45  # outlier suppression is achieved by taking the central 98% of values
47  max_idx = len(vals) - 1
48  min_idx = 0
49  if suppress_outliers:
50  idx_adjust = int(math.floor((len(vals)-1) * 0.01))
53
54
55  minimum = vals[min_idx] if allow_suppressed_zero or vals[min_idx] < 0 else 0
56  maximum = vals[max_idx] if allow_suppressed_zero or vals[max_idx] > 0 else 0
57
58
60  maximum *= 1.15
61
62  return minimum, maximum
63
def histRange(histograms, include_errors=True, allow_suppressed_zero=False, suppress_outliers=True, use_headroom=True)
find the maximum and minimum of a list of histograms
 def generic.tools.HistogramTools.makeRatioHistograms ( numerators, denominators, preserve_errors = `True`, sumw2 = `True`, verbose = `False` )

divide lists of histograms

Definition at line 229 of file HistogramTools.py.

229 def makeRatioHistograms(numerators, denominators, preserve_errors=True, sumw2=True, verbose=False):
230  return_histograms = []
231  if verbose:
232  print "\nvtool: --- Histogram Operations: Make Ratio Histograms"
233  print "vtool: error preservation: ",preserve_errors
234  print "vtool: %i numerators"%len(numerators)
235  print "vtool: %i denominators"%len(denominators)
236
237  if len(denominators) == 1:
238  if verbose: print "vtool: Using the same denominator for all ratios"
239
240  for i,n in enumerate(numerators):
241  if verbose: print "vtool: numerator[%i]: "%i,n," entries: ",n.GetSumOfWeights()
242  if len(denominators) == 1: this_denominator = denominators[0]
243  else: this_denominator = denominators[i]
244  if verbose: print "vtool: denominator: ",this_denominator," entries: ",this_denominator.GetSumOfWeights()
245
246  e = n.Clone(n.GetName()+"_numerator_%i"%i)
247  if sumw2 and (e.GetSumw2N() == 0 ): e.Sumw2()
248  d = this_denominator.Clone(this_denominator.GetName()+"_denominator_%i"%i)
249
250  e.Divide(d)
251  if preserve_errors:
252  fractional_errors = getHistogramStatErrors(numerators[i])
253  e = setHistogramFractionalErrors(e,fractional_errors)
254
255  #if verbose: print e
256  for i in range(e.GetNbinsX()+2):
257  if verbose: print "vtool: - bin[%2i]: numerator: %10.4f +/- %10.4f, denominator: %10.4f +/- %10.4f, ratio: %5.4f, error: %10.4f"%(i,n.GetBinContent(i),n.GetBinError(i),d.GetBinContent(i),d.GetBinError(i),e.GetBinContent(i),e.GetBinError(i))
258  #assert False
259  return_histograms.append(e)
260
261  if verbose: print return_histograms
262  return return_histograms
263
def makeRatioHistograms(numerators, denominators, preserve_errors=True, sumw2=True, verbose=False)
divide lists of histograms
def getHistogramStatErrors(central, verbose=False)
get the statistical errors on a histogram
def setHistogramFractionalErrors(original, error, error_in_percent=False, verbose=False)
Set fractional errors on a histogram.
 def generic.tools.HistogramTools.normaliseHistogram ( histogram, area = `1.`, verbose = `False`, preserve_errors = `True` )

normalise a histogram

Definition at line 182 of file HistogramTools.py.

References PandAna.Demos.demo1.range.

182 def normaliseHistogram(histogram, area=1., verbose=False, preserve_errors=True):
183  from ROOT import TH2D, TH2F
184  if verbose:
185  print "vtool: --- NormaliseHistogram"
186  print "vtool: - histogram: ",histogram.GetName()
187  print "vtool: - area: ",area
188  return_histogram = histogram.Clone(histogram.GetName()+"_normalised_to_%.0f"%area)
189  if ( histogram.GetSumOfWeights()):
190  scale_factor = area / histogram.GetSumOfWeights()
191  return_histogram.Scale(scale_factor)
192  else:
193  scale_factor = 1
194  if verbose: print "vtool: - histogram SumOfWeights is zero, setting to 1"
195
196  if verbose:
197  print "vtool: - histogram SumOfWeights: %e"%histogram.GetSumOfWeights()
198  print "vtool: - scale factor: %e"%scale_factor
199  print "vtool: - final SumOfWeights: %e"%return_histogram.GetSumOfWeights()
200
201  if preserve_errors:
202  if verbose: print "vtool: Scaling errors"
203
204  for i in range(histogram.GetNbinsX()+1):
205  if ((type(histogram) == TH2D) or (type(histogram) == TH2F)):
206  for j in range(histogram.GetNbinsY()+1):
207  value = histogram.GetBinContent(i,j)
208  error = histogram.GetBinError(i,j)
209  if value: fractional_error = error/value
210  else: fractional_error = 0
211  new_value = return_histogram.GetBinContent(i,j)
212  new_error = fractional_error*new_value
213  #if verbose: print "vtool: - bin[%i][%i]: value: %.4f, error: %.4f, fractional error: %.4f, new value: %.4f new error: %.4f"%(i,j,value,error,fractional_error,new_value,new_error)
214  return_histogram.SetBinError(i,j,new_error)
215  else:
216  value = histogram.GetBinContent(i)
217  error = histogram.GetBinError(i)
218  if value: fractional_error = error/value
219  else: fractional_error = 0
220  new_value = return_histogram.GetBinContent(i)
221  new_error = fractional_error*new_value
222  #if verbose: print "vtool: - bin[%i]: value: %.4f, error: %.4f, fractional error: %.4f, new value: %.4f new error: %.4f"%(i,value,error,fractional_error,new_value,new_error)
223  return_histogram.SetBinError(i,new_error)
224
225  return return_histogram
226
def normaliseHistogram(histogram, area=1., verbose=False, preserve_errors=True)
normalise a histogram
 def generic.tools.HistogramTools.setHistogramFractionalErrors ( original, error, error_in_percent = `False`, verbose = `False` )

Set fractional errors on a histogram.

Definition at line 297 of file HistogramTools.py.

References PandAna.Demos.demo1.range.

Referenced by generic.tools.HistogramTools.makeRatioHistograms().

297 def setHistogramFractionalErrors(original,error,error_in_percent=False,verbose=False):
298  if verbose:
299  print "vtool: --- SetFractionalErrors"
300  print "vtool: original: ",original.GetName()
301  print "vtool: errors: ",error.GetName()
302  print "vtool: errors in percent: ",error_in_percent
303
304  return_histogram = original.Clone(original.GetName()+"_fractional_error")
305
306  for i in range(original.GetNbinsX()+1):
307  value = original.GetBinContent(i)
308  fractional_error = error.GetBinContent(i)
309  if error_in_percent: fractional_error = fractional_error / 100.
310  absolute_error = value*fractional_error
311  if verbose: print "vtool: - bin[%i]: value: %.4f, fractional error: %.4f, absolute error: %.4f"%(i,value,fractional_error,absolute_error)
312  return_histogram.SetBinError(i,absolute_error)
313  #assert False
314  return return_histogram
def setHistogramFractionalErrors(original, error, error_in_percent=False, verbose=False)
Set fractional errors on a histogram.