generate_hists.py
Go to the documentation of this file.
1 """
2  generate_hists.py:
3  Make histograms to use for testing the validation system.
4 
5  Original author: J. Wolcott <jwolcott@fnal.gov>
6  Date: September 2016
7 """
8 
9 import os.path
10 import random
11 
12 
13 import ROOT
14 
15 OUTDIR = "/nova/ana/users/jwolcott/mc_valid/new_test"
16 
17 # three "datasets" will have the same distribution,
18 # which is broken into three "categories"/components.
19 # the datasets differ by 'exposure' (number of entries
20 # sampled from each distribution).
21 DATASET_NUMEVTS = {
22  "test_dataset1": 1e5,
23  "test_dataset2": 1e4,
24  "test_dataset3": 1e3,
25 }
26 
27 MOCK_DATA_DATASETS = [
28  "test_dataset3",
29 ]
30 
31 COMPONENT_DISTRIBUTIONS = [
32  "TMath::Gaus(x, 1)",
33  "TMath::Landau(x-2, 1)",
34  "2*TMath::Student(3*x, 5)",
35 ]
36 sum_f = ROOT.TF1("fsum", " + ".join(COMPONENT_DISTRIBUTIONS), 0, 5)
37 integral = sum_f.Integral(0, 5)
38 
39 
40 COMPONENT_FUNCTIONS = [ ROOT.TF1("%f * f%d" % (1./integral, fnum), distr, 0, 5) for fnum, distr in enumerate(COMPONENT_DISTRIBUTIONS) ]
41 thresholds = [f.Integral(0, 5)/integral for f in COMPONENT_FUNCTIONS] # threshold for each distribution for rejection sampling
42 for i in range(1, len(thresholds)):
43  thresholds[i] += thresholds[i-1]
44 
45 for dataset_name, num_evts in DATASET_NUMEVTS.iteritems():
46  outf = ROOT.TFile( os.path.join(OUTDIR, "test_hists_%s.root" % dataset_name), "recreate" )
47  assert not outf.IsZombie()
48 
49  hists = []
50  if dataset_name in MOCK_DATA_DATASETS:
51  hists.append(ROOT.TH1D("dir1/h_var", "Test ariable;Var (unit);Events", 50, 0, 5))
52  else:
53  for hist_num in range(len(COMPONENT_FUNCTIONS)):
54  hists.append(ROOT.TH1D("dir1/h_var{cat%d}" % hist_num, "Test variable;Var (unit);Events", 50, 0, 5))
55 
56  for evt_num in xrange(int(num_evts)):
57  # pick distribution via rejection sampling
58  rnd = random.random()
59  for distr_num, threshold in enumerate(thresholds):
60  if rnd < threshold or distr_num == len(thresholds)-1:
61  break
62 
63  # now sample from that distribution itself
64  val = COMPONENT_FUNCTIONS[distr_num].GetRandom()
65  h = hists[distr_num] if len(hists) == len(COMPONENT_DISTRIBUTIONS) else hists[0]
66  h.Fill(val)
67 
68  for h in hists:
69  h.Write()
70 
71  outf.Close()