FastJet  3.1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Modules Pages
ClusterSequenceAreaBase.cc
1 
2 //FJSTARTHEADER
3 // $Id: ClusterSequenceAreaBase.cc 3433 2014-07-23 08:17:03Z salam $
4 //
5 // Copyright (c) 2005-2014, Matteo Cacciari, Gavin P. Salam and Gregory Soyez
6 //
7 //----------------------------------------------------------------------
8 // This file is part of FastJet.
9 //
10 // FastJet is free software; you can redistribute it and/or modify
11 // it under the terms of the GNU General Public License as published by
12 // the Free Software Foundation; either version 2 of the License, or
13 // (at your option) any later version.
14 //
15 // The algorithms that underlie FastJet have required considerable
16 // development. They are described in the original FastJet paper,
17 // hep-ph/0512210 and in the manual, arXiv:1111.6097. If you use
18 // FastJet as part of work towards a scientific publication, please
19 // quote the version you use and include a citation to the manual and
20 // optionally also to hep-ph/0512210.
21 //
22 // FastJet is distributed in the hope that it will be useful,
23 // but WITHOUT ANY WARRANTY; without even the implied warranty of
24 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25 // GNU General Public License for more details.
26 //
27 // You should have received a copy of the GNU General Public License
28 // along with FastJet. If not, see <http://www.gnu.org/licenses/>.
29 //----------------------------------------------------------------------
30 //FJENDHEADER
31 
32 
33 
34 
35 #include "fastjet/ClusterSequenceAreaBase.hh"
36 #include <algorithm>
37 
38 FASTJET_BEGIN_NAMESPACE
39 
40 using namespace std;
41 
42 
43 /// allow for warnings
44 LimitedWarning ClusterSequenceAreaBase::_warnings;
45 LimitedWarning ClusterSequenceAreaBase::_warnings_zero_area;
46 LimitedWarning ClusterSequenceAreaBase::_warnings_empty_area;
47 
48 //----------------------------------------------------------------------
49 /// return the total area, within the selector's range, that is free
50 /// of jets.
51 ///
52 /// Calculate this as (range area) - \sum_{i in range} A_i
53 ///
54 /// for ClusterSequences with explicit ghosts, assume that there will
55 /// never be any empty area, i.e. it is always filled in by pure
56 /// ghosts jets. This holds for seq.rec. algorithms
57 double ClusterSequenceAreaBase::empty_area(const Selector & selector) const {
58 
59  if (has_explicit_ghosts()) {return 0.0;}
60  else { return empty_area_from_jets(inclusive_jets(0.0), selector);}
61 
62 }
63 
64 //----------------------------------------------------------------------
65 /// return the total area, within range, that is free of jets.
66 ///
67 /// Calculate this as (range area) - \sum_{i in range} A_i
68 ///
69 double ClusterSequenceAreaBase::empty_area_from_jets(
70  const std::vector<PseudoJet> & all_jets,
71  const Selector & selector) const {
72  _check_selector_good_for_median(selector);
73 
74  double empty = selector.area();
75  for (unsigned i = 0; i < all_jets.size(); i++) {
76  if (selector.pass(all_jets[i])) empty -= area(all_jets[i]);
77  }
78  return empty;
79 }
80 
81 double ClusterSequenceAreaBase::median_pt_per_unit_area(const Selector & selector) const {
82  return median_pt_per_unit_something(selector,false);
83 }
84 
85 double ClusterSequenceAreaBase::median_pt_per_unit_area_4vector(const Selector & selector) const {
86  return median_pt_per_unit_something(selector,true);
87 }
88 
89 
90 //----------------------------------------------------------------------
91 /// the median of (pt/area) for jets contained within range, counting
92 /// the empty area as if it were made up of a collection of empty
93 /// jets each of area (0.55 * pi R^2).
94 double ClusterSequenceAreaBase::median_pt_per_unit_something(
95  const Selector & selector, bool use_area_4vector) const {
96 
97  double median, sigma, mean_area;
98  get_median_rho_and_sigma(selector, use_area_4vector, median, sigma, mean_area);
99  return median;
100 
101 }
102 
103 
104 //----------------------------------------------------------------------
105 /// fits a form pt_per_unit_area(y) = a + b*y^2 for jets in range.
106 /// exclude_above allows one to exclude large values of pt/area from fit.
107 /// use_area_4vector = true uses the 4vector areas.
108 void ClusterSequenceAreaBase::parabolic_pt_per_unit_area(
109  double & a, double & b, const Selector & selector,
110  double exclude_above, bool use_area_4vector) const {
111  // sanity check on the selector: we require a finite area and that
112  // it applies jet by jet (see BackgroundEstimator for more advanced
113  // usage)
114  _check_selector_good_for_median(selector);
115 
116  int n=0;
117  int n_excluded = 0;
118  double mean_f=0, mean_x2=0, mean_x4=0, mean_fx2=0;
119 
120  vector<PseudoJet> incl_jets = inclusive_jets();
121 
122  for (unsigned i = 0; i < incl_jets.size(); i++) {
123  if (selector.pass(incl_jets[i])) {
124  double this_area;
125  if ( use_area_4vector ) {
126  this_area = area_4vector(incl_jets[i]).perp();
127  } else {
128  this_area = area(incl_jets[i]);
129  }
130  double f = incl_jets[i].perp()/this_area;
131  if (exclude_above <= 0.0 || f < exclude_above) {
132  double x = incl_jets[i].rap(); double x2 = x*x;
133  mean_f += f;
134  mean_x2 += x2;
135  mean_x4 += x2*x2;
136  mean_fx2 += f*x2;
137  n++;
138  } else {
139  n_excluded++;
140  }
141  }
142  }
143 
144  if (n <= 1) {
145  // meaningful results require at least two jets inside the
146  // area -- mind you if there are empty jets we should be in
147  // any case doing something special...
148  a = 0.0;
149  b = 0.0;
150  } else {
151  mean_f /= n;
152  mean_x2 /= n;
153  mean_x4 /= n;
154  mean_fx2 /= n;
155 
156  b = (mean_f*mean_x2 - mean_fx2)/(mean_x2*mean_x2 - mean_x4);
157  a = mean_f - b*mean_x2;
158  }
159  //cerr << "n_excluded = "<< n_excluded << endl;
160 }
161 
162 
163 
164 void ClusterSequenceAreaBase::get_median_rho_and_sigma(
165  const Selector & selector, bool use_area_4vector,
166  double & median, double & sigma, double & mean_area) const {
167 
168  vector<PseudoJet> incl_jets = inclusive_jets();
169  get_median_rho_and_sigma(incl_jets, selector, use_area_4vector,
170  median, sigma, mean_area, true);
171 }
172 
173 
174 void ClusterSequenceAreaBase::get_median_rho_and_sigma(
175  const vector<PseudoJet> & all_jets,
176  const Selector & selector, bool use_area_4vector,
177  double & median, double & sigma, double & mean_area,
178  bool all_are_incl) const {
179 
180  _check_jet_alg_good_for_median();
181 
182  // sanity check on the selector: we require a finite area and that
183  // it applies jet by jet (see BackgroundEstimator for more advanced
184  // usage)
185  _check_selector_good_for_median(selector);
186 
187  vector<double> pt_over_areas;
188  double total_area = 0.0;
189  double total_njets = 0;
190 
191  for (unsigned i = 0; i < all_jets.size(); i++) {
192  if (selector.pass(all_jets[i])) {
193  double this_area;
194  if (use_area_4vector) {
195  this_area = area_4vector(all_jets[i]).perp();
196  } else {
197  this_area = area(all_jets[i]);
198  }
199 
200  if (this_area>0) {
201  pt_over_areas.push_back(all_jets[i].perp()/this_area);
202  } else {
203  _warnings_zero_area.warn("ClusterSequenceAreaBase::get_median_rho_and_sigma(...): discarded jet with zero area. Zero-area jets may be due to (i) too large a ghost area (ii) a jet being outside the ghost range (iii) the computation not being done using an appropriate algorithm (kt;C/A).");
204  }
205 
206  total_area += this_area;
207  total_njets += 1.0;
208  }
209  }
210 
211  // there is nothing inside our region, so answer will always be zero
212  if (pt_over_areas.size() == 0) {
213  median = 0.0;
214  sigma = 0.0;
215  mean_area = 0.0;
216  return;
217  }
218 
219  // get median (pt/area) [this is the "old" median definition. It considers
220  // only the "real" jets in calculating the median, i.e. excluding the
221  // only-ghost ones; it will be supplemented with more info below]
222  sort(pt_over_areas.begin(), pt_over_areas.end());
223 
224  // now get the median & error, accounting for empty jets
225  // define the fractions of distribution at median, median-1sigma
226  double posn[2] = {0.5, (1.0-0.6827)/2.0};
227  double res[2];
228 
229  double n_empty, empty_a;
230  if (has_explicit_ghosts()) {
231  // NB: the following lines of code are potentially incorrect in cases
232  // where there are unclustered particles (empty_area would do a better job,
233  // at least for active areas). This is not an issue with kt or C/A, or other
234  // algorithms that cluster all particles (and the median estimation should in
235  // any case only be done with kt or C/A!)
236  empty_a = 0.0;
237  n_empty = 0;
238  } else if (all_are_incl) {
239  // the default case
240  empty_a = empty_area(selector);
241  n_empty = n_empty_jets(selector);
242  } else {
243  // this one is intended to be used when e.g. one runs C/A, then looks at its
244  // exclusive jets in order to get an effective smaller R value, and passes those
245  // to this routine.
246  empty_a = empty_area_from_jets(all_jets, selector);
247  mean_area = total_area / total_njets; // temporary value
248  n_empty = empty_a / mean_area;
249  }
250  //cout << "*** tot_area = " << total_area << ", empty_a = " << empty_a << endl;
251  //cout << "*** n_empty = " << n_empty << ", ntotal = " << total_njets << endl;
252  total_njets += n_empty;
253  total_area += empty_a;
254 
255  // we need an int (rather than an unsigned int) with the size of the
256  // pt_over_areas array, because we'll often be doing subtraction of
257  // -1, negating it, etc. All of these operations go crazy with unsigned ints.
258  int pt_over_areas_size = pt_over_areas.size();
259  if (n_empty < -pt_over_areas_size/4.0)
260  _warnings_empty_area.warn("ClusterSequenceAreaBase::get_median_rho_and_sigma(...): the estimated empty area is suspiciously large and negative and may lead to an over-estimation of rho. This may be due to (i) a rare statistical fluctuation or (ii) too small a range used to estimate the background properties.");
261 
262  for (int i = 0; i < 2; i++) {
263  double nj_median_pos =
264  (pt_over_areas_size-1.0 + n_empty)*posn[i] - n_empty;
265  double nj_median_ratio;
266  if (nj_median_pos >= 0 && pt_over_areas_size > 1) {
267  int int_nj_median = int(nj_median_pos);
268 
269  // avoid potential overflow issues
270  if (int_nj_median+1 > pt_over_areas_size-1){
271  int_nj_median = pt_over_areas_size-2;
272  nj_median_pos = pt_over_areas_size-1;
273  }
274 
275  nj_median_ratio =
276  pt_over_areas[int_nj_median] * (int_nj_median+1-nj_median_pos)
277  + pt_over_areas[int_nj_median+1] * (nj_median_pos - int_nj_median);
278  } else {
279  nj_median_ratio = 0.0;
280  }
281  res[i] = nj_median_ratio;
282  }
283  median = res[0];
284  double error = res[0] - res[1];
285  mean_area = total_area / total_njets;
286  sigma = error * sqrt(mean_area);
287 }
288 
289 
290 /// return a vector of all subtracted jets, using area_4vector, given rho.
291 /// Only inclusive_jets above ptmin are subtracted and returned.
292 /// the ordering is the same as that of sorted_by_pt(cs.inclusive_jets()),
293 /// i.e. not necessarily ordered in pt once subtracted
294 vector<PseudoJet> ClusterSequenceAreaBase::subtracted_jets(const double rho,
295  const double ptmin)
296  const {
297  vector<PseudoJet> sub_jets;
298  vector<PseudoJet> jets_local = sorted_by_pt(inclusive_jets(ptmin));
299  for (unsigned i=0; i<jets_local.size(); i++) {
300  PseudoJet sub_jet = subtracted_jet(jets_local[i],rho);
301  sub_jets.push_back(sub_jet);
302  }
303  return sub_jets;
304 }
305 
306 /// return a vector of subtracted jets, using area_4vector.
307 /// Only inclusive_jets above ptmin are subtracted and returned.
308 /// the ordering is the same as that of sorted_by_pt(cs.inclusive_jets()),
309 /// i.e. not necessarily ordered in pt once subtracted
310 vector<PseudoJet> ClusterSequenceAreaBase::subtracted_jets(
311  const Selector & selector,
312  const double ptmin)
313  const {
314  double rho = median_pt_per_unit_area_4vector(selector);
315  return subtracted_jets(rho,ptmin);
316 }
317 
318 
319 /// return a subtracted jet, using area_4vector, given rho
320 PseudoJet ClusterSequenceAreaBase::subtracted_jet(const PseudoJet & jet,
321  const double rho) const {
322  PseudoJet area4vect = area_4vector(jet);
323  PseudoJet sub_jet;
324  // sanity check
325  if (rho*area4vect.perp() < jet.perp() ) {
326  sub_jet = jet - rho*area4vect;
327  } else { sub_jet = PseudoJet(0.0,0.0,0.0,0.0); }
328 
329  // make sure the subtracted jet has the same index (cluster, user, csw)
330  // (i.e. "looks like") the original jet
332  sub_jet.set_user_index(jet.user_index());
333  // do not use CS::_set_structure_shared_ptr here, which should
334  // only be called to maintain the tally during construction
336  return sub_jet;
337 }
338 
339 
340 /// return a subtracted jet, using area_4vector; note that this is
341 /// potentially inefficient if repeatedly used for many different
342 /// jets, because rho will be recalculated each time around.
343 PseudoJet ClusterSequenceAreaBase::subtracted_jet(const PseudoJet & jet,
344  const Selector & selector) const {
345  double rho = median_pt_per_unit_area_4vector(selector);
346  PseudoJet sub_jet = subtracted_jet(jet, rho);
347  return sub_jet;
348 }
349 
350 
351 /// return the subtracted pt, given rho
352 double ClusterSequenceAreaBase::subtracted_pt(const PseudoJet & jet,
353  const double rho,
354  bool use_area_4vector) const {
355  if ( use_area_4vector ) {
356  PseudoJet sub_jet = subtracted_jet(jet,rho);
357  return sub_jet.perp();
358  } else {
359  return jet.perp() - rho*area(jet);
360  }
361 }
362 
363 
364 /// return the subtracted pt; note that this is
365 /// potentially inefficient if repeatedly used for many different
366 /// jets, because rho will be recalculated each time around.
367 double ClusterSequenceAreaBase::subtracted_pt(const PseudoJet & jet,
368  const Selector & selector,
369  bool use_area_4vector) const {
370  if ( use_area_4vector ) {
371  PseudoJet sub_jet = subtracted_jet(jet,selector);
372  return sub_jet.perp();
373  } else {
374  double rho = median_pt_per_unit_area(selector);
375  return subtracted_pt(jet,rho,false);
376  }
377 }
378 
379 // check the selector is suited for the computations i.e. applies jet
380 // by jet and has a finite area
381 void ClusterSequenceAreaBase::_check_selector_good_for_median(const Selector &selector) const{
382  // make sure the selector has a finite area
383  if ((! has_explicit_ghosts()) && (! selector.has_finite_area())){
384  throw Error("ClusterSequenceAreaBase: empty area can only be computed from selectors with a finite area");
385  }
386 
387  // make sure the selector applies jet by jet
388  if (! selector.applies_jet_by_jet()){
389  throw Error("ClusterSequenceAreaBase: empty area can only be computed from selectors that apply jet by jet");
390  }
391 }
392 
393 
394 /// check the jet algorithm is suitable (and if not issue a warning)
395 void ClusterSequenceAreaBase::_check_jet_alg_good_for_median() const {
396  if (jet_def().jet_algorithm() != kt_algorithm
397  && jet_def().jet_algorithm() != cambridge_algorithm
398  && jet_def().jet_algorithm() != cambridge_for_passive_algorithm) {
399  _warnings.warn("ClusterSequenceAreaBase: jet_def being used may not be suitable for estimating diffuse backgrounds (good options are kt, cam)");
400  }
401 }
402 
403 
404 
405 FASTJET_END_NAMESPACE
vector< PseudoJet > sorted_by_pt(const vector< PseudoJet > &jets)
return a vector of jets sorted into decreasing kt2
Definition: PseudoJet.cc:799
bool applies_jet_by_jet() const
returns true if this can be applied jet by jet
Definition: Selector.hh:215
void set_user_index(const int index)
set the user_index, intended to allow the user to add simple identifying information to a particle/je...
Definition: PseudoJet.hh:334
const SharedPtr< PseudoJetStructureBase > & structure_shared_ptr() const
return a reference to the shared pointer to the PseudoJetStructureBase associated with this PseudoJet...
Definition: PseudoJet.cc:513
int cluster_hist_index() const
return the cluster_hist_index, intended to be used by clustering routines.
Definition: PseudoJet.hh:770
void set_cluster_hist_index(const int index)
set the cluster_hist_index, intended to be used by clustering routines.
Definition: PseudoJet.hh:772
base class corresponding to errors that can be thrown by FastJet
Definition: Error.hh:47
bool has_finite_area() const
returns true if it has a meaningful and finite area (i.e.
Definition: Selector.hh:250
Class that encodes information about cuts and other selection criteria that can be applied to PseudoJ...
Definition: Selector.hh:149
double area() const
returns the rapidity-phi area associated with the Selector (throws InvalidArea if the area does not m...
Definition: Selector.cc:189
int user_index() const
return the user_index,
Definition: PseudoJet.hh:331
void set_structure_shared_ptr(const SharedPtr< PseudoJetStructureBase > &structure)
set the associated structure
Definition: PseudoJet.cc:464
double perp() const
returns the scalar transverse momentum
Definition: PseudoJet.hh:143
bool pass(const PseudoJet &jet) const
return true if the jet passes the selection
Definition: Selector.hh:178
Class to contain pseudojets, including minimal information of use to jet-clustering routines...
Definition: PseudoJet.hh:67