FastJet  3.3.3
BackgroundEstimatorBase.cc
1 //FJSTARTHEADER
2 // $Id: BackgroundEstimatorBase.cc 4420 2019-11-29 09:28:20Z soyez $
3 //
4 // Copyright (c) 2005-2019, Matteo Cacciari, Gavin P. Salam and Gregory Soyez
5 //
6 //----------------------------------------------------------------------
7 // This file is part of FastJet.
8 //
9 // FastJet is free software; you can redistribute it and/or modify
10 // it under the terms of the GNU General Public License as published by
11 // the Free Software Foundation; either version 2 of the License, or
12 // (at your option) any later version.
13 //
14 // The algorithms that underlie FastJet have required considerable
15 // development. They are described in the original FastJet paper,
16 // hep-ph/0512210 and in the manual, arXiv:1111.6097. If you use
17 // FastJet as part of work towards a scientific publication, please
18 // quote the version you use and include a citation to the manual and
19 // optionally also to hep-ph/0512210.
20 //
21 // FastJet is distributed in the hope that it will be useful,
22 // but WITHOUT ANY WARRANTY; without even the implied warranty of
23 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24 // GNU General Public License for more details.
25 //
26 // You should have received a copy of the GNU General Public License
27 // along with FastJet. If not, see <http://www.gnu.org/licenses/>.
28 //----------------------------------------------------------------------
29 //FJENDHEADER
30 
31 
32 #include "fastjet/tools/BackgroundEstimatorBase.hh"
33 
34 using namespace std;
35 
36 FASTJET_BEGIN_NAMESPACE // defined in fastjet/internal/base.hh
37 
38 LimitedWarning BackgroundEstimatorBase::_warnings_empty_area;
39 
40 //----------------------------------------------------------------------
41 // given a quantity in a vector (e.g. pt_over_area) and knowledge
42 // about the number of empty jets, calculate the median and
43 // stand_dev_if_gaussian (roughly from the 16th percentile)
44 //
45 // If do_fj2_calculation is set to true then this performs FastJet
46 // 2.X estimation of the standard deviation, which has a spurious
47 // offset in the limit of a small number of jets.
48 void BackgroundEstimatorBase::_median_and_stddev(const vector<double> & quantity_vector,
49  double n_empty_jets,
50  double & median,
51  double & stand_dev_if_gaussian,
52  bool do_fj2_calculation) const {
53 
54  // this check is redundant (the code below behaves sensibly even
55  // with a zero size), but serves as a reminder of what happens if
56  // the quantity vector is zero-sized
57  if (quantity_vector.size() == 0) {
58  median = 0;
59  stand_dev_if_gaussian = 0;
60  return;
61  }
62 
63  vector<double> sorted_quantity_vector = quantity_vector;
64  sort(sorted_quantity_vector.begin(), sorted_quantity_vector.end());
65 
66  // empty area can sometimes be negative; with small ranges this can
67  // become pathological, so warn the user
68  int n_jets_used = sorted_quantity_vector.size();
69  if (n_empty_jets < -n_jets_used/4.0)
70  _warnings_empty_area.warn("BackgroundEstimatorBase::_median_and_stddev(...): the estimated empty area is suspiciously large and negative and may lead to an over-estimation of rho. This may be due to (i) a rare statistical fluctuation or (ii) too small a range used to estimate the background properties.");
71 
72  // now get the median & error, accounting for empty jets;
73  // define the fractions of distribution at median, median-1sigma
74  double posn[2] = {0.5, (1.0-0.6827)/2.0};
75  double res[2];
76  for (int i = 0; i < 2; i++) {
77  res[i] = _percentile(sorted_quantity_vector, posn[i], n_empty_jets,
78  do_fj2_calculation);
79  }
80 
81  median = res[0];
82  stand_dev_if_gaussian = res[0] - res[1];
83 }
84 
85 
86 //----------------------------------------------------------------------
87 // computes a percentile of a given _sorted_ vector of quantities
88 // - sorted_quantities the (sorted) vector contains the data sample
89 // - percentile the percentile (defined between 0 and 1) to compute
90 // - nempty an additional number of 0's
91 // (considered at the beginning of
92 // the quantity vector)
93 // - do_fj2_calculation carry out the calculation as it
94 // was done in fj2 (suffers from "edge effects")
95 double BackgroundEstimatorBase::_percentile(const vector<double> & sorted_quantities,
96  const double percentile,
97  const double nempty,
98  const bool do_fj2_calculation
99  ) const {
100  assert(percentile >= 0.0 && percentile <= 1.0);
101 
102  int quantities_size = sorted_quantities.size();
103  if (quantities_size == 0) return 0;
104 
105  double total_njets = quantities_size + nempty;
106  double percentile_pos;
107  if (do_fj2_calculation) {
108  percentile_pos = (total_njets-1)*percentile - nempty;
109  } else {
110  percentile_pos = (total_njets)*percentile - nempty - 0.5;
111  }
112 
113  double result;
114  if (percentile_pos >= 0 && quantities_size > 1) {
115  int int_percentile_pos = int(percentile_pos);
116 
117  // avoid potential overflow issues
118  if (int_percentile_pos+1 > quantities_size-1){
119  int_percentile_pos = quantities_size-2;
120  percentile_pos = quantities_size-1;
121  }
122 
123  result =
124  sorted_quantities[int_percentile_pos] * (int_percentile_pos+1-percentile_pos)
125  + sorted_quantities[int_percentile_pos+1] * (percentile_pos - int_percentile_pos);
126 
127 
128  } else if (percentile_pos > -0.5 && quantities_size >= 1
129  && !do_fj2_calculation) {
130  // in the LHS of this "bin", just keep a constant value (we could have
131  // interpolated to zero, but this might misbehave in cases where all jets
132  // are active, because it would go to zero too fast)
133  result = sorted_quantities[0];
134  } else {
135  result = 0.0;
136  }
137  return result;
138 
139 
140 }
141 
142 
143 FASTJET_END_NAMESPACE // defined in fastjet/internal/base.hh