SplineUtils.cxx
Go to the documentation of this file.
2 #include <iostream>
3 #include "TF1.h"
4 #include "TF2.h"
5 #include "TH1.h"
6 #include "TH2.h"
7 #include "TSpline.h"
8 #include "TGraphErrors.h"
9 #include "TCanvas.h"
10 #include "TStyle.h"
11 #include "TMinuit.h"
12 
13 namespace ana {
14  //
15  // Lead with all the static, private functions. Public ones are down
16  // below.
17  // A way to get the MINUIT FCN access to some of the data we need
18  //
19  static char gsFitObjective = kMODES;
20  static unsigned int gsMknot;
21  static TH2* gsTH2 = 0;
22  static TGraphErrors* gsTGraph = 0;
23  //
24  //....................................................................
25  //
26  // Determine a good set of m knot locations given an list of (x,y)
27  // data.
28  //
29  // m - How many seeds to generate?
30  // xknot - x locations of seed knots
31  // yknot - y locations of seed knots
32  //
33  static void best_spline_seeds(unsigned int m,
34  double* xknot,
35  double* yknot)
36  {
37  //
38  // Check some assumptions before proceeding
39  //
40  if (m<2) {
41  std::cerr << "best_spline_seeds: Require m>=2 but m="
42  << m << std::endl;
43  abort();
44  }
45  //
46  // Strategy for setting knot locations is: Place the xknots so that
47  // each corresponds to a fixed fraction of the data in the
48  // histogram. So, for example, 5 knots will be placed so that 25% of
49  // the data is between knot 0 and 1, 25% between 1 and 2, and so
50  // on. Then, place the yknots at the locations of the maximum
51  // scanning vertically along the plot.
52  //
53  //
54  TH1D* xproj = gsTH2->ProjectionX();
55  unsigned int nx = xproj->GetNbinsX();
56  //
57  // If the profile has a large number of events in the overflow bin
58  // we are not likely to be happy with the results up there. Print a
59  // warning to the user.
60  //
61  double noverflow = xproj->GetBinContent(xproj->GetNbinsX()+1);
62  if (noverflow>0) {
63  std::cerr << "best_spline: Histogram " << gsTH2->GetName() << " has "
64  << noverflow << " events in the overflow bin.\n"
65  << " Extrapolation using splines is unreliable.\n"
66  << " Probably you want to extend the range of your x-axis."
67  << std::endl;
68  }
69 
70  std::vector<unsigned int> ix(m);
71  double ftgt = 1.0/(double)(m-1);
72 
73  ix[0] = 1;
74  ix[m-1] = nx;
75  unsigned int i, j;
76  if (m>2) {
77  //
78  // For each of the internal knots...
79  //
80  for (i=1; i<=m-2; ++i) {
81  //
82  // ...find the fraction of events between j and the last knot
83  // location...
84  //
85  for (j=ix[i-1]+1; j<=nx; ++j) {
86  //
87  // ...and when that fraction trips over the target value,
88  // place a new knot
89  //
90  if (xproj->Integral(ix[i-1],j)/xproj->Integral()>=ftgt) {
91  ix[i] = j;
92  break;
93  }
94  } // Loop on j bins
95  } // Loop on i knots
96  } // if m>2
97  for (i=0; i<m; ++i) {
98  xknot[i] = xproj->GetBinCenter(ix[i]);
99  TH1D* yproj = gsTH2->ProjectionY("",ix[i],ix[i]);
100  nx = yproj->GetNbinsX();
101  for (j=1; j<=nx; ++j) {
102  if (yproj->Integral(1,j)/yproj->Integral()>0.5) {
103  yknot[i] = yproj->GetBinCenter(j);
104  break;
105  }
106  }
107  }
108  }
109 
110  //......................................................................
111  //
112  // Function to optimize when searching for the mode of a 1D
113  // distribution. See: Bickel, Journal of Statistical Computation and
114  // Simulation; 73 (2003), 899-912 and fits
115  //
116  static double best_spline_mode_fcn(double x, const TH1* h)
117  {
118  double n = h->Integral();
119  double sigma = h->GetRMS();
120  double power = 2/5.;
121  double hsqr = 0.81*sigma*sigma/pow(n,power);
122 
123  double f = 0.0;
124  int i;
125  for (i=1; i<=h->GetNbinsX(); ++i) {
126  double d = x-h->GetBinCenter(i);
127  double ni = h->GetBinContent(i);
128  f += ni*exp(-0.5*d*d/hsqr);
129  }
130  return f;
131  }
132 
133  //......................................................................
134  //
135  // The function we want to minimize in LEAST_SQUARES mode
136  //
137  static void best_spline_fcn0(int& /* npar */,
138  double* /* g */,
139  double& chi2,
140  double* par,
141  int /* istat */)
142  {
143  unsigned int i, j;
144  double xknot[kMAXKNOT];
145  double yknot[kMAXKNOT];
146  //
147  // Unpack the data from this trial. The parameters are m pairs of
148  // (x,y) knot locations and two derivatives. Use those to build a
149  // spline function
150  //
151  for (i=0; i<gsMknot; ++i) {
152  xknot[i] = par[2*i+0];
153  yknot[i] = par[2*i+1];
154  }
155  double b1 = par[2*gsMknot];
156  double e1 = par[2*gsMknot+1];
157  TSpline3 s("best_spline_fcn", xknot, yknot, gsMknot, "b1,e1", b1, e1);
158 
159  //
160  // Make a chi^2 comparison of the spline to the actual data which
161  // has been stored globally for easy access from this function
162  //
163  unsigned int nx = gsTH2->GetNbinsX();
164  unsigned int ny = gsTH2->GetNbinsY();
165  TAxis* xaxis = gsTH2->GetXaxis();
166  TAxis* yaxis = gsTH2->GetXaxis();
167  chi2 = 0.0;
168  for (i=1; i<=nx; ++i) {
169  double x = xaxis->GetBinCenter(i);
170  for (j=1; j<=ny; ++j) {
171  double n = gsTH2->GetBinContent(i,j);
172  if (n>0.0) {
173  double y = yaxis->GetBinCenter(j);
174  double d = (y-s.Eval(x));
175  chi2 += n*d*d;
176  }
177  }
178  }
179  }
180 
181  //......................................................................
182  //
183  // The function to minimize in "mode 1" where we minimize thi chi2 to
184  // the modes of the verical slices
185  //
186  static void best_spline_fcn1(int& /* npar */,
187  double* /* g */,
188  double& chi2,
189  double* par,
190  int /* istat */)
191  {
192  unsigned int i;
193  double xknot[kMAXKNOT];
194  double yknot[kMAXKNOT];
195  //
196  // Unpack the data from this trial. The parameters are m pairs of
197  // (x,y) knot locations and two derivatives. Use those to build a
198  // spline function
199  //
200  for (i=0; i<gsMknot; ++i) {
201  xknot[i] = par[2*i+0];
202  yknot[i] = par[2*i+1];
203  }
204  double b1 = par[2*gsMknot];
205  double e1 = par[2*gsMknot+1];
206  TSpline3 s("best_spline_fcn", xknot, yknot, gsMknot, "b1,e1", b1, e1);
207 
208  unsigned int n = gsTGraph->GetN();
209  chi2 = 0;
210  double x;
211  double y;
212  double d;
213  double sig;
214  for (i=0; i<n; ++i) {
215  gsTGraph->GetPoint(i, x, y);
216  sig = gsTGraph->GetErrorY(i);
217  d = (y-s.Eval(x))/sig;
218  chi2 += d*d;
219  }
220  }
221 
222  //......................................................................
223  //
224  // Given a TH2, make a TGraph of the mode of each vertical slice of
225  // the histogram with error bars. Ownership of TGraph belongs to
226  // caller.
227  //
228  static TGraphErrors* best_spline_make_mode_graph(TH2* h)
229  {
230  std::vector<double> x;
231  std::vector<double> y;
232  std::vector<double> s;
233  double xtmp;
234  double ytmp;
235  double stmp;
236  int i;
237  for (i=1; i<h->GetNbinsX(); ++i) {
238  TH1D* slice = h->ProjectionY("_py",i,i);
239  xtmp = h->GetXaxis()->GetBinCenter(i);
240  ytmp = best_spline_find_mode(slice, &stmp);
241  //
242  // Accept valid points, flagged by having reasonably estimated
243  // error bars
244  //
245  if (stmp>0.0) {
246  x.push_back(xtmp);
247  y.push_back(ytmp);
248  s.push_back(stmp);
249  }
250  }
251  unsigned int j;
252  TGraphErrors* g = new TGraphErrors(x.size());
253  for (j=0; j<x.size(); ++j) {
254  g->SetPoint(j, x[j], y[j]);
255  g->SetPointError(j, 0, s[j]);
256  }
257  return g;
258  }
259 
260  ////////////////////////////////////////////////////////////////////
261  //
262  // The functions that form the public interface
263  //
264  //..................................................................
265  double best_spline_find_mode(TH1* h, double* sig)
266  {
267  unsigned int i;
268  unsigned int nstep = 10;
269  double x1 = h->GetXaxis()->GetXmin();
270  double x2 = h->GetXaxis()->GetXmax();
271  double dx = (x2-x1)*1.1/(float)nstep;
272 
273  double f;
274  double x;
275  double fmax = 0.0;
276  double mode = 0.0;
277  while (dx>1e-6) {
278  for (i=0; i<=nstep; ++i) {
279  x = x1 + i*dx;
280  f = best_spline_mode_fcn(x, h);
281  if (f>fmax) {
282  fmax = f;
283  mode = x;
284  }
285  }
286  dx *= 0.5;
287  x1 = mode-0.5*nstep*dx;
288  x2 = mode+0.5*nstep*dx;
289  }
290  if (sig) {
291  double n = h->Integral();
292  if (n>2) {
293  double sigma = h->GetRMS();
294  double power = 2/5.;
295  double hsqr = sigma*sigma/pow(n,power);
296  *sig = sqrt(hsqr);
297  if (*sig==0.0) *sig = -99.;
298  }
299  else {
300  *sig = -99;
301  }
302  }
303  return mode;
304  }
305 
306  //....................................................................
307 
309  {
310  if (obj==kMODES) gsFitObjective = kMODES;
311  else gsFitObjective = kLEAST_SQUARES;
312  }
313 
314  //....................................................................
315 
316  TGraphErrors* best_spline_get_mode_graph() {
317  if (gsTGraph==0) {
318  std::cerr <<
319  "SplineUtils::best_spline_get_mode_graph: Mode graph is empty. " <<
320  "Returning null." << std::endl;
321  }
322  return gsTGraph;
323  }
324 
325  //....................................................................
326 
327  static void best_spline_set_histo(TH2* h)
328  {
329  gsTH2 = h;
330  }
331 
332  //......................................................................
333 
334  TSpline3 best_spline(TH2* histo,
335  unsigned int m,
336  double* xknot,
337  double* yknot,
338  double* b1rtn,
339  double* e1rtn,
340  double* chi2,
341  double* sigxknot,
342  double* sigyknot,
343  double* sigb1rtn,
344  double* sige1rtn,
345  const char* title)
346  {
347  unsigned int i;
348  if (m<2) {
349  std::cerr << "best_spline: Needs at least two points, got "
350  << m << std::endl;
351  }
352  //
353  // Get ourselves setup. Configurate the static global data needed by
354  // the FCN and get ourselves some reasonable seed knot locations to
355  // work with.
356  //
357  gsMknot = m;
358  best_spline_set_histo(histo);
359  best_spline_seeds(m, xknot, yknot);
360 
361  TMinuit minuit(2*m+2);
362  minuit.SetPrintLevel(-1); // ssh...
363  if (gsFitObjective==kMODES) {
364  if (gsTGraph) delete gsTGraph;
365  gsTGraph = best_spline_make_mode_graph(histo);
366  minuit.SetFCN(best_spline_fcn1);
367  }
368  else {
369  minuit.SetFCN(best_spline_fcn0);
370  }
371  //
372  // Build the parameters into MINUIT, the m knot locations.
373  //
374  unsigned int ip; // Parameter index
375  char nm[256]; // Parameter name
376  double vstart; // Seed point
377  double vmin; // Forced lower limit on parameter
378  double vmax; // Forced upper limit on parameter
379  double vstep; // Initial guess at parameter step size
380  int ierr; // MINUIT error code
381  double eps = 1e-6;
382  for (i=0; i<m; ++i) {
383  sprintf(nm, "x%u",i);
384  ip = 2*i;
385  vstart = xknot[i];
386  vstep = 0.01;
387  //
388  // Rules for the allowed variations in x are unfortunately a
389  // little complicated. Generally they are allowed to vary from
390  // where they are to the midpoint of the next seed. Change that
391  // near the end points which are fixed in place.
392  //
393  if (i==0) {
394  vmin = vmax = 0.0; // Left edge point is fixed
395  }
396  else if (i==m-1) {
397  vmin = vmax = 0.0; // Right edge point is fixed
398  }
399  else if (i==1) {
400  vmin = xknot[0]+eps; // Left edge is the fixed left point
401  vmax = 0.5*(xknot[1]+xknot[2]); // Right edge is midway to next point
402  }
403  else if (i==m-2) {
404  vmin = 0.5*(xknot[i]+xknot[i-1]); // Left edge is midway to next point
405  vmax = xknot[m-1]-eps; // Right edge is fixed right point
406  }
407  else {
408  vmin = 0.5*(xknot[i]+xknot[i-1]); // Left edge is midway to next point
409  vmax = 0.5*(xknot[i]+xknot[i+1]); // Right edge is midway to next point
410  }
411  minuit.mnparm(ip, nm, vstart, vstep, vmin, vmax, ierr);
412  if (vmin==vmax) minuit.FixParameter(ip);
413 
414  sprintf(nm, "y%u",i);
415  ip = 2*i+1;
416  vstart = yknot[i];
417  vmin = 0.0;
418  vmax = 0.0;
419  vstep = 0.01;
420  minuit.mnparm(ip, nm, vstart, vstep, vmin, vmax, ierr);
421  }
422  //
423  // Last two parameters are the derivatives at the end points
424  //
425  ip = 2*m;
426  vstart = 0;
427  vstep = 0.01;
428  vmin = 0;
429  vmax = 0;
430  minuit.mnparm(ip, "b1", vstart, vstep, vmin, vmax, ierr);
431  ip = 2*m+1;
432  vstart = 0;
433  vstep = 0.1;
434  vmin = 0;
435  vmax = 0;
436  minuit.mnparm(ip, "e1", vstart, vstep, vmin, vmax, ierr);
437 
438  //
439  // OK - minimize. Use simplex method to start, then refine with
440  // migrad and minos for error estimation.
441  //
442  minuit.mnsimp();
443  double arg[2] = {1000.0, 1.0};
444  minuit.mnexcm("MIGRAD", arg, 2, ierr);
445  minuit.mnmnos();
446  //
447  // Unpack some results of the fit
448  //
449  double fmin, edm, errdef;
450  int nvpar, nparx, icstat;
451  minuit.mnstat(fmin, edm, errdef, nvpar, nparx, icstat);
452  if (chi2) *chi2 = fmin;
453 
454  //
455  // Stuff results back into the output.
456  //
457  double p, sigp;
458  for (i=0; i<m; ++i) {
459  minuit.GetParameter(2*i, p, sigp);
460  xknot[i] = p;
461  if(sigxknot) sigxknot[i] = sigp;
462 
463  minuit.GetParameter(2*i+1, p, sigp);
464  yknot[i] = p;
465  if(sigyknot) sigyknot[i] = sigp;
466  }
467  minuit.GetParameter(2*m+0, p, sigp);
468  *b1rtn = p;
469  if (sigb1rtn!=0) *sigb1rtn = sigp;
470 
471  minuit.GetParameter(2*m+1, p, sigp);
472  *e1rtn = p;
473  if (sige1rtn!=0) *sige1rtn = sigp;
474 
475  TSpline3 spline(title, xknot, yknot, m, "b1,e1", *b1rtn, *e1rtn);
476  return spline;
477  }
478 
479  /// make_simple_res_plot
480  /// \brief: do as the function name says...
481  ///
482  TH1F* make_simple_res_plot(TH2* h, const TSpline3& spl)
483  {
484  TH1F* hRes = new TH1F("res","simple resolution;res", 200, -1.5, 1.5);
485 
486  // loop over the TH1 to calculate the simple resolution
487  for (int i = 1; i <= h->GetNbinsX(); ++i) {
488 
489  double R = spl.Eval(h->GetXaxis()->GetBinCenter(i));
490 
491  for (int j = 1; j <= h->GetNbinsY(); ++j) {
492 
493  double T = h->GetYaxis()->GetBinCenter(j);
494  if (T == 0.0) continue; // should never happen but just in case...
495  double res = (R-T)/T;
496  double weight = h->GetBinContent(h->GetBin(i,j));
497 
498  hRes->Fill(res, weight);
499 
500  } // end loop over j
501  } // end loop over i
502 
503  return hRes;
504  }
505 
506  //////////////////////////////////////////////////////////////////////
507  // Here and below are the units tests
508  //....................................................................
509  static TH2* make_test_th2()
510  {
511  static TF2 func("func",
512  "exp(-(((0.2+x-x**2/10)-y)/sqrt(x))**2)/(x+1)**2",
513  0,5,
514  0,5);
515  static TH2F h("h","h",100,0,5,100,0,5);
516  h.FillRandom("func", 100000);
517  //
518  // Put one event in the overflow to see if it trips a warning
519  //
520  h.Fill(10,10);
521  return &h;
522  }
523 
524  //......................................................................
525 
527  {
529 
530  double xknot[5];
531  double yknot[5];
532  best_spline_seeds(5, xknot, yknot);
533 
534  gStyle->SetPalette();
535  gsTH2->Draw("colz");
536  TGraph* g = new TGraph(5, xknot, yknot);
537  g->SetMarkerStyle(20);
538  g->SetMarkerColor(kBlack);
539  g->Draw("same,p");
540  }
541 
542  //......................................................................
543 
544  void test_best_spline(int m)
545  {
547  TH2* histo = make_test_th2();
548  const unsigned int nk = 5;
549  double xk[nk];
550  double yk[nk];
551  double e1;
552  double b1;
553  double chi2;
554  double sigxk[nk];
555  double sigyk[nk];
556  double sige1;
557  double sigb1;
558  const char* title = "test_spline";
559 
560  best_spline(histo, nk, xk, yk, &b1, &e1, &chi2,
561  sigxk, sigyk, &sigb1, &sige1,
562  title);
563 
564  TSpline* s = new TSpline3("test_spline", xk, yk, nk, "b1, e1", b1, e1);
565  unsigned int n = histo->GetNbinsX();
566  double x[1000];
567  double y[1000];
568  unsigned int i;
569  for (i=0; i<n; ++i) {
570  x[i] = histo->GetXaxis()->GetBinCenter(i+1);
571  y[i] = s->Eval(x[i]);
572  }
573  s->SaveAs("best_spline_saveas.cc","");
574 
575  gStyle->SetPalette();
576  TCanvas* c = new TCanvas("c","c",800,800);
577  c->SetLogz();
578  TGraph* gs = new TGraph(n, x, y);
579  gs->SetLineStyle(1);
580  gs->SetLineWidth(2);
581  gs->SetLineColor(kRed);
582  gs->SetMarkerStyle(20);
583  TGraph* gk = new TGraph(nk, xk, yk);
584  gk->SetMarkerStyle(20);
585  gk->SetMarkerSize(1.2);
586  gk->SetMarkerColor(kRed);
587  histo->Draw("colz");
588  gs->Draw("same");
589  gk->Draw("same,p");
590  if (gsTGraph) {
591  gsTGraph->SetMarkerStyle(24);
592  gsTGraph->SetMarkerColor(kBlack);
593  gsTGraph->Draw("pe,same");
594  }
595  TF1* f = new TF1("f","(0.2+x-x**2/10)",0,10);
596  f->Draw("same");
597  }
598 
599  //......................................................................
600 
602  {
603  static TF2 f("f","exp(-((y-x-0.01*x**2)/x)**2)/x",0,10,0,10);
604  static TH2F h("h","h",100,0,10,100,0,10);
605  h.FillRandom("f",100000);
606  TGraphErrors* g = best_spline_make_mode_graph(&h);
607  h.Draw("box");
608  g->SetLineColor(kRed);
609  g->SetMarkerStyle(24);
610  g->SetMarkerColor(kRed);
611  g->Draw("pe,same");
612  }
613 
614  //......................................................................
615 
617  {
618  static TF1 f("f","100*exp(-(x-3)**2/9)+20*exp(-(x-7)**2/25)",-10,10);
619  static TH1F h("h","h",200,-10,10);
620  static TH1F xh("x","x",1200,0,6);
621 
622  double x, sigx;
623  for (int i=0; i<1000; ++i) {
624  h.Reset();
625  h.FillRandom("f",10000);
626  x = best_spline_find_mode(&h, &sigx);
627  xh.Fill(x);
628  }
629  xh.Draw();
630  }
631 }
632 ////////////////////////////////////////////////////////////////////////
double best_spline_find_mode(TH1 *h, double *sig)
Given a 1D histogram, find an estimate of the mode following the method of Bickel, Journal of Statistical Computation and Simulation; 73 (2003), 899-912.
static const int kMAXKNOT
Definition: SplineUtils.h:61
fvar< T > fmin(const fvar< T > &x1, const fvar< T > &x2)
Definition: fmin.hpp:14
enum BeamMode kRed
Cuts and Vars for the 2020 FD DiF Study.
Definition: vars.h:6
const Var weight
Float_t x1[n_points_granero]
Definition: compare.C:5
void test_best_spline(int m)
const char * p
Definition: xmltok.h:285
static unsigned int gsMknot
Definition: SplineUtils.cxx:20
T sqrt(T number)
Definition: d0nt_math.hpp:156
static char gsFitObjective
Definition: SplineUtils.cxx:19
constexpr T pow(T x)
Definition: pow.h:75
static void best_spline_set_histo(TH2 *h)
static constexpr Double_t nm
Definition: Munits.h:133
Int_t par
Definition: SimpleIterate.C:24
std::string yaxis
static TH2 * make_test_th2()
static const char kMODES
Definition: SplineUtils.h:69
OStream cerr
Definition: OStream.cxx:7
void test_best_spline_mode_graph()
TString ip
Definition: loadincs.C:5
TH1F * make_simple_res_plot(TH2 *h, const TSpline3 &spl)
: do as the function name says...
double chi2()
std::string xaxis
static void best_spline_fcn0(int &, double *, double &chi2, double *par, int)
void test_best_spline_find_mode()
const XML_Char * s
Definition: expat.h:262
double dx[NP][NC]
static void best_spline_seeds(unsigned int m, double *xknot, double *yknot)
Definition: SplineUtils.cxx:33
TGraphErrors * best_spline_get_mode_graph()
If we&#39;ve chosen to fit to modes, get access to the graph of the modes vs. x.
fvar< T > exp(const fvar< T > &x)
Definition: exp.hpp:10
Float_t d
Definition: plot.C:236
#define R(x)
double func(double x, double y)
const double j
Definition: BetheBloch.cxx:29
TH2D * histo
TSpline3 best_spline(TH2 *histo, unsigned int m, double *xknot, double *yknot, double *b1rtn, double *e1rtn, double *chi2, double *sigxknot, double *sigyknot, double *sigb1rtn, double *sige1rtn, const char *title)
Find the best spline that passes through a set of data.
double sigma(TH1F *hist, double percentile)
void best_spline_set_objective(char obj)
Allow switching between the two objective functions (minimum overall chi^2 or minimum approach to the...
static void best_spline_fcn1(int &, double *, double &chi2, double *par, int)
: Given a 2D histogram, find the best TSpline3 that described the y vs. x trend in the data...
static TGraphErrors * best_spline_make_mode_graph(TH2 *h)
double T
Definition: Xdiff_gwt.C:5
void test_best_spline_seeds()
static TH2 * gsTH2
Definition: SplineUtils.cxx:21
Float_t e
Definition: plot.C:35
static const char kLEAST_SQUARES
Definition: SplineUtils.h:68
fvar< T > fmax(const fvar< T > &x1, const fvar< T > &x2)
Definition: fmax.hpp:22
static TGraphErrors * gsTGraph
Definition: SplineUtils.cxx:22
static double best_spline_mode_fcn(double x, const TH1 *h)