Functions
PandAna.Demos.tute_plots Namespace Reference

Functions

def MakeDeltaCVNPlots (sig, title='test', color='blue', slc={}, folder='.')
 
def CalculateEffPur (sig, bkg, bins)
 
def PrevCVNCut (df_sig, wgt_sig, df_bkg, wgt_bkg, index)
 
def MakeCVNDistPlot (sig, bkg, title='test', folder='.')
 

Function Documentation

def PandAna.Demos.tute_plots.CalculateEffPur (   sig,
  bkg,
  bins 
)

Definition at line 60 of file tute_plots.py.

References PandAna.Demos.demo1.range, and sum.

Referenced by PandAna.Demos.tute_plots.MakeCVNDistPlot().

60 def CalculateEffPur(sig, bkg, bins):
61  sig_tot = sum(sig)
62  bkg_tot = sum(bkg)
63  width = bins[1]-bins[0]
64  nbins = len(bins)-1
65  sig_eff = np.array([])
66  bkg_rej = np.array([])
67  for i in range(nbins):
68  sig_sel = sum(sig[i:nbins])
69  bkg_sel = sum(bkg[i:nbins])
70  sig_eff = np.append(sig_eff, sig_sel/sig_tot)
71  bkg_rej = np.append(bkg_rej, bkg_sel/(sig_sel+bkg_sel))
72  return sig_eff, bkg_rej
73 
74 # calculate efficiency and background contamination for the loosepresel optimized prod5 cvn cut
def CalculateEffPur(sig, bkg, bins)
Definition: tute_plots.py:60
Double_t sum
Definition: plot.C:31
def PandAna.Demos.tute_plots.MakeCVNDistPlot (   sig,
  bkg,
  title = 'test',
  folder = '.' 
)

Definition at line 86 of file tute_plots.py.

References PandAna.Demos.tute_plots.CalculateEffPur(), PandAna.Demos.tute_plots.PrevCVNCut(), and PandAna.Demos.demo1.range.

86 def MakeCVNDistPlot(sig, bkg, title = 'test', folder='.'):
87  var = ['cvnoldpresel', 'cvnloosepreselptp']
88  colors = [plt.cm.tab10(i+2) for i in range(len(var))]
89 
90  sig_weight = sig._weight
91  bkg_weight = bkg._weight
92 
93  sigy, bkgy = [], []
94  sig_effs, bkg_rejs = [], []
95  x = None
96  xspan = (0, 1)
97  nbins = 20
98  pot = 1.2e21
99  numbers = "Signal : %0.2f, Bkg: %0.2f" % (sig.integral(pot), bkg.integral(pot))
100  # check effeciency and background contamination (1-signal purity)
101  prev_eff, prev_bkgfrac = PrevCVNCut(sig.df(), sig_weight, bkg.df(), bkg_weight, title)
102  for v in var:
103  nsig, sig_bins = np.histogram(sig.df()[v], nbins, xspan, weights=sig_weight)
104  nbkg, bkg_bins = np.histogram(bkg.df()[v], nbins, xspan, weights=bkg_weight)
105  nsig *= pot/sig.POT()
106  nbkg *= pot/bkg.POT()
107  sig_eff, bkg_rej = CalculateEffPur(nsig, nbkg, sig_bins)
108  sig_effs.append(sig_eff)
109  bkg_rejs.append(bkg_rej)
110  sigy.append(nsig)
111  bkgy.append(nbkg)
112  x = sig_bins
113  fig, ax = plt.subplots()
114  for i in range(len(sigy)):
115  ax.hist(x[:-1], x, weights=sigy[i], histtype='step', color=colors[i], label=var[i])
116  ax.hist(x[:-1], x, weights=bkgy[i], histtype='step', linestyle='dashed', color=colors[i])
117  ax.set_title(title)
118  ax.set_xlabel('CVN')
119  ax.set_ylabel('Events')
120  ax.set_yscale('log')
121  plt.legend(loc='upper center')
122  if not os.path.isdir(folder):
123  os.mkdir(folder)
124  fig.savefig('%s/%s_dist.pdf'%(folder,title))
125 
126  fig, ax = plt.subplots()
127  for i in range(len(sig_effs)):
128  ax.plot(bkg_rejs[i], sig_effs[i], color=colors[i], label=var[i])
129  ax.axhline(y=prev_eff, linestyle='dashed', color='black')
130  ax.axvline(x=prev_bkgfrac, linestyle='dashed', color='black')
131  ax.set_title(numbers)
132  ax.set_ylabel('Signal Efficiency')
133  ax.set_xlabel('Background Contamination')
134  plt.legend(loc='best')
135  fig.savefig('%s/%s_roc.pdf'%(folder,title))
136 
def CalculateEffPur(sig, bkg, bins)
Definition: tute_plots.py:60
def MakeCVNDistPlot(sig, bkg, title='test', folder='.')
Definition: tute_plots.py:86
def PrevCVNCut(df_sig, wgt_sig, df_bkg, wgt_bkg, index)
Definition: tute_plots.py:75
def PandAna.Demos.tute_plots.MakeDeltaCVNPlots (   sig,
  title = 'test',
  color = 'blue',
  slc = {},
  folder = '.' 
)

Definition at line 15 of file tute_plots.py.

References cet::sqlite.max(), PandAna.Demos.demo1.range, and sum.

15 def MakeDeltaCVNPlots(sig, title='test', color='blue', slc={}, folder='.'):
16  var = 'cvnloosepreselptp'
17  xspan = (-1, 1)
18  nbins = 40
19  pot = 1.2e21
20 
21  sig_df = sig.df()
22  wgt = sig._weight
23 
24  key = ''
25  val = [None]
26  if len(slc):
27  val = slc.values()[0]
28  key = slc.keys()[0]
29  fig, ax = plt.subplots()
30  colors = [plt.cm.tab10(i) for i in range(max(len(val)-1,1))]
31  for i in range(max(len(val)-1, 1)):
32  if val[i] == None:
33  df = sig_df[var] - sig_df['cvnoldpresel']
34  else:
35  df = sig_df[var][(sig_df[key] >= val[i]) & (sig_df[key] >= val[i+1])] - \
36  sig_df['cvnoldpresel'][(sig_df[key] >= val[i]) & (sig_df[key] >= val[i+1])]
37 
38  wgt = sig._weight[(sig_df[key] >= val[i]) & (sig_df[key] >= val[i+1])]
39  n, bins = np.histogram(df, nbins, xspan, weights=wgt)
40  n *= pot/sig.POT()
41  n = n/sum(n)
42  label = 'No cut'
43  if val[i] != None:
44  label = '%s >= %.1f & %s < %.1f' % (key, val[i], key, val[i+1])
45  ax.hist(bins[:-1], bins, weights=n, histtype='step', color=colors[i],label=label)
46  ax.set_xlabel('cvnloosepreselptp - cvnoldpresel')
47  ax.set_ylabel('Events')
48 
49  fig.suptitle(title)
50  plt.legend(loc='upper left')
51  plt.show()
52  if not os.path.isdir(folder):
53  os.mkdir(folder)
54  if len(slc):
55  fig.savefig('%s/%s_delta_with_%s.pdf'%(folder, title, key))
56  else:
57  fig.savefig('%s/%s_delta_all.pdf'%(folder, title))
58 
59 # calculate efficiency and background contaminations for a range of cvn cuts
def MakeDeltaCVNPlots(sig, title='test', color='blue', slc={}, folder='.')
Definition: tute_plots.py:15
Double_t sum
Definition: plot.C:31
T max(sqlite3 *const db, std::string const &table_name, std::string const &column_name)
Definition: statistics.h:68
def PandAna.Demos.tute_plots.PrevCVNCut (   df_sig,
  wgt_sig,
  df_bkg,
  wgt_bkg,
  index 
)

Definition at line 75 of file tute_plots.py.

References sum.

Referenced by PandAna.Demos.tute_plots.MakeCVNDistPlot().

75 def PrevCVNCut(df_sig, wgt_sig, df_bkg, wgt_bkg, index):
76  sig = pd.concat([df_sig, wgt_sig], axis=1)
77  bkg = pd.concat([df_bkg, wgt_bkg], axis=1)
78  val = 0.84 # prod5 tuned cut
79  sig = sig[sig['cvnloosepreselptp'] >= val]
80  bkg = bkg[bkg['cvnloosepreselptp'] >= val]
81  eff = sig['weight'].sum()/wgt_sig.sum()
82  bkg_frac = bkg['weight'].sum()/(sig['weight'].sum()+bkg['weight'].sum())
83  return eff, bkg_frac
84 
85 # function to plot the CVN distributions along with the ROC curves
def PrevCVNCut(df_sig, wgt_sig, df_bkg, wgt_bkg, index)
Definition: tute_plots.py:75
Double_t sum
Definition: plot.C:31