FastJet  3.3.3
ClusterSequenceAreaBase.cc
1 
2 //FJSTARTHEADER
3 // $Id: ClusterSequenceAreaBase.cc 4420 2019-11-29 09:28:20Z soyez $
4 //
5 // Copyright (c) 2005-2019, Matteo Cacciari, Gavin P. Salam and Gregory Soyez
6 //
7 //----------------------------------------------------------------------
8 // This file is part of FastJet.
9 //
10 // FastJet is free software; you can redistribute it and/or modify
11 // it under the terms of the GNU General Public License as published by
12 // the Free Software Foundation; either version 2 of the License, or
13 // (at your option) any later version.
14 //
15 // The algorithms that underlie FastJet have required considerable
16 // development. They are described in the original FastJet paper,
17 // hep-ph/0512210 and in the manual, arXiv:1111.6097. If you use
18 // FastJet as part of work towards a scientific publication, please
19 // quote the version you use and include a citation to the manual and
20 // optionally also to hep-ph/0512210.
21 //
22 // FastJet is distributed in the hope that it will be useful,
23 // but WITHOUT ANY WARRANTY; without even the implied warranty of
24 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25 // GNU General Public License for more details.
26 //
27 // You should have received a copy of the GNU General Public License
28 // along with FastJet. If not, see <http://www.gnu.org/licenses/>.
29 //----------------------------------------------------------------------
30 //FJENDHEADER
31 
32 
33 
34 
35 #include "fastjet/ClusterSequenceAreaBase.hh"
36 #include <algorithm>
37 
38 FASTJET_BEGIN_NAMESPACE
39 
40 using namespace std;
41 
42 
43 /// allow for warnings
44 LimitedWarning ClusterSequenceAreaBase::_warnings;
45 LimitedWarning ClusterSequenceAreaBase::_warnings_zero_area;
46 LimitedWarning ClusterSequenceAreaBase::_warnings_empty_area;
47 
48 //----------------------------------------------------------------------
49 /// return the total area, within the selector's range, that is free
50 /// of jets.
51 ///
52 /// Calculate this as (range area) - \sum_{i in range} A_i
53 ///
54 /// for ClusterSequences with explicit ghosts, assume that there will
55 /// never be any empty area, i.e. it is always filled in by pure
56 /// ghosts jets. This holds for seq.rec. algorithms
57 double ClusterSequenceAreaBase::empty_area(const Selector & selector) const {
58 
59  if (has_explicit_ghosts()) {return 0.0;}
60  else { return empty_area_from_jets(inclusive_jets(0.0), selector);}
61 
62 }
63 
64 //----------------------------------------------------------------------
65 /// return the total area, within range, that is free of jets.
66 ///
67 /// Calculate this as (range area) - \sum_{i in range} A_i
68 ///
69 double ClusterSequenceAreaBase::empty_area_from_jets(
70  const std::vector<PseudoJet> & all_jets,
71  const Selector & selector) const {
72  _check_selector_good_for_median(selector);
73 
74  double empty = selector.area();
75  for (unsigned i = 0; i < all_jets.size(); i++) {
76  if (selector.pass(all_jets[i])) empty -= area(all_jets[i]);
77  }
78  return empty;
79 }
80 
81 // this is deprecated but used by other deprecated methods. So we hide
82 // the implementation in a protected method so that (i) it can still
83 // be used internally (without generating a compile-time warning when
84 // building FastJet) and the interface can be marked as deprecated.
85 // This can disappear once all the public interfaces have disappeared.
86 double ClusterSequenceAreaBase::median_pt_per_unit_area(const Selector & selector) const {
87  return _median_pt_per_unit_area(selector);
88 }
89 
90 // the hidden implementation
91 double ClusterSequenceAreaBase::_median_pt_per_unit_area(const Selector & selector) const {
92  return _median_pt_per_unit_something(selector,false);
93 }
94 
95 
96 
97 // this is deprecated but used by other deprecated methods. So we hide
98 // the implementation in a protected method so that (i) it can still
99 // be used internally (without generating a compile-time warning when
100 // building FastJet) and the interface can be marked as deprecated.
101 // This can disappear once all the public interfaces have disappeared.
102 double ClusterSequenceAreaBase::median_pt_per_unit_area_4vector(const Selector & selector) const {
103  return _median_pt_per_unit_area_4vector(selector);
104 }
105 
106 // the deprecated interface
107 double ClusterSequenceAreaBase::_median_pt_per_unit_area_4vector(const Selector & selector) const {
108  return _median_pt_per_unit_something(selector,true);
109 }
110 
111 
112 //----------------------------------------------------------------------
113 // this is deprecated but used by other deprecated methods. So we hide
114 // the implementation in a protected method so that (i) it can still
115 // be used internally (without generating a compile-time warning when
116 // building FastJet) and the interface can be marked as deprecated.
117 // This can disappear once all the public interfaces have disappeared.
118 double ClusterSequenceAreaBase::median_pt_per_unit_something(
119  const Selector & selector, bool use_area_4vector) const {
120  return _median_pt_per_unit_something(selector, use_area_4vector);
121 }
122 
123 // the median of (pt/area) for jets contained within range, counting
124 // the empty area as if it were made up of a collection of empty
125 // jets each of area (0.55 * pi R^2).
126 double ClusterSequenceAreaBase::_median_pt_per_unit_something(
127  const Selector & selector, bool use_area_4vector) const {
128  double median, sigma, mean_area;
129  _get_median_rho_and_sigma(selector, use_area_4vector, median, sigma, mean_area);
130  return median;
131 }
132 
133 
134 //----------------------------------------------------------------------
135 /// fits a form pt_per_unit_area(y) = a + b*y^2 for jets in range.
136 /// exclude_above allows one to exclude large values of pt/area from
137 /// fit. use_area_4vector = true uses the 4vector areas.
138 void ClusterSequenceAreaBase::parabolic_pt_per_unit_area(
139  double & a, double & b, const Selector & selector,
140  double exclude_above, bool use_area_4vector) const {
141  return _parabolic_pt_per_unit_area(a, b, selector, exclude_above, use_area_4vector);
142 }
143 
144 void ClusterSequenceAreaBase::_parabolic_pt_per_unit_area(
145  double & a, double & b, const Selector & selector,
146  double exclude_above, bool use_area_4vector) const {
147  // sanity check on the selector: we require a finite area and that
148  // it applies jet by jet (see BackgroundEstimator for more advanced
149  // usage)
150  _check_selector_good_for_median(selector);
151 
152  int n=0;
153  int n_excluded = 0;
154  double mean_f=0, mean_x2=0, mean_x4=0, mean_fx2=0;
155 
156  vector<PseudoJet> incl_jets = inclusive_jets();
157 
158  for (unsigned i = 0; i < incl_jets.size(); i++) {
159  if (selector.pass(incl_jets[i])) {
160  double this_area;
161  if ( use_area_4vector ) {
162  this_area = area_4vector(incl_jets[i]).perp();
163  } else {
164  this_area = area(incl_jets[i]);
165  }
166  double f = incl_jets[i].perp()/this_area;
167  if (exclude_above <= 0.0 || f < exclude_above) {
168  double x = incl_jets[i].rap(); double x2 = x*x;
169  mean_f += f;
170  mean_x2 += x2;
171  mean_x4 += x2*x2;
172  mean_fx2 += f*x2;
173  n++;
174  } else {
175  n_excluded++;
176  }
177  }
178  }
179 
180  if (n <= 1) {
181  // meaningful results require at least two jets inside the
182  // area -- mind you if there are empty jets we should be in
183  // any case doing something special...
184  a = 0.0;
185  b = 0.0;
186  } else {
187  mean_f /= n;
188  mean_x2 /= n;
189  mean_x4 /= n;
190  mean_fx2 /= n;
191 
192  b = (mean_f*mean_x2 - mean_fx2)/(mean_x2*mean_x2 - mean_x4);
193  a = mean_f - b*mean_x2;
194  }
195  //cerr << "n_excluded = "<< n_excluded << endl;
196 }
197 
198 
199 //----------------------------------------------------------------------
200 void ClusterSequenceAreaBase::get_median_rho_and_sigma(
201  const Selector & selector, bool use_area_4vector,
202  double & median, double & sigma, double & mean_area) const {
203  _get_median_rho_and_sigma(selector, use_area_4vector, median, sigma, mean_area);
204 }
205 
206 void ClusterSequenceAreaBase::_get_median_rho_and_sigma(
207  const Selector & selector, bool use_area_4vector,
208  double & median, double & sigma, double & mean_area) const {
209 
210  vector<PseudoJet> incl_jets = inclusive_jets();
211  _get_median_rho_and_sigma(incl_jets, selector, use_area_4vector,
212  median, sigma, mean_area, true);
213 }
214 
215 void ClusterSequenceAreaBase::get_median_rho_and_sigma(
216  const vector<PseudoJet> & all_jets,
217  const Selector & selector, bool use_area_4vector,
218  double & median, double & sigma, double & mean_area,
219  bool all_are_incl) const {
220  _get_median_rho_and_sigma(all_jets, selector, use_area_4vector,
221  median, sigma, mean_area, all_are_incl);
222 }
223 
224 void ClusterSequenceAreaBase::_get_median_rho_and_sigma(
225  const vector<PseudoJet> & all_jets,
226  const Selector & selector, bool use_area_4vector,
227  double & median, double & sigma, double & mean_area,
228  bool all_are_incl) const {
229 
230  _check_jet_alg_good_for_median();
231 
232  // sanity check on the selector: we require a finite area and that
233  // it applies jet by jet (see BackgroundEstimator for more advanced
234  // usage)
235  _check_selector_good_for_median(selector);
236 
237  vector<double> pt_over_areas;
238  double total_area = 0.0;
239  double total_njets = 0;
240 
241  for (unsigned i = 0; i < all_jets.size(); i++) {
242  if (selector.pass(all_jets[i])) {
243  double this_area;
244  if (use_area_4vector) {
245  this_area = area_4vector(all_jets[i]).perp();
246  } else {
247  this_area = area(all_jets[i]);
248  }
249 
250  if (this_area>0) {
251  pt_over_areas.push_back(all_jets[i].perp()/this_area);
252  } else {
253  _warnings_zero_area.warn("ClusterSequenceAreaBase::get_median_rho_and_sigma(...): discarded jet with zero area. Zero-area jets may be due to (i) too large a ghost area (ii) a jet being outside the ghost range (iii) the computation not being done using an appropriate algorithm (kt;C/A).");
254  }
255 
256  total_area += this_area;
257  total_njets += 1.0;
258  }
259  }
260 
261  // there is nothing inside our region, so answer will always be zero
262  if (pt_over_areas.size() == 0) {
263  median = 0.0;
264  sigma = 0.0;
265  mean_area = 0.0;
266  return;
267  }
268 
269  // get median (pt/area) [this is the "old" median definition. It considers
270  // only the "real" jets in calculating the median, i.e. excluding the
271  // only-ghost ones; it will be supplemented with more info below]
272  sort(pt_over_areas.begin(), pt_over_areas.end());
273 
274  // now get the median & error, accounting for empty jets
275  // define the fractions of distribution at median, median-1sigma
276  double posn[2] = {0.5, (1.0-0.6827)/2.0};
277  double res[2];
278 
279  double n_empty, empty_a;
280  if (has_explicit_ghosts()) {
281  // NB: the following lines of code are potentially incorrect in cases
282  // where there are unclustered particles (empty_area would do a better job,
283  // at least for active areas). This is not an issue with kt or C/A, or other
284  // algorithms that cluster all particles (and the median estimation should in
285  // any case only be done with kt or C/A!)
286  empty_a = 0.0;
287  n_empty = 0;
288  } else if (all_are_incl) {
289  // the default case
290  empty_a = empty_area(selector);
291  n_empty = n_empty_jets(selector);
292  } else {
293  // this one is intended to be used when e.g. one runs C/A, then looks at its
294  // exclusive jets in order to get an effective smaller R value, and passes those
295  // to this routine.
296  empty_a = empty_area_from_jets(all_jets, selector);
297  mean_area = total_area / total_njets; // temporary value
298  n_empty = empty_a / mean_area;
299  }
300  //cout << "*** tot_area = " << total_area << ", empty_a = " << empty_a << endl;
301  //cout << "*** n_empty = " << n_empty << ", ntotal = " << total_njets << endl;
302  total_njets += n_empty;
303  total_area += empty_a;
304 
305  // we need an int (rather than an unsigned int) with the size of the
306  // pt_over_areas array, because we'll often be doing subtraction of
307  // -1, negating it, etc. All of these operations go crazy with unsigned ints.
308  int pt_over_areas_size = pt_over_areas.size();
309  if (n_empty < -pt_over_areas_size/4.0)
310  _warnings_empty_area.warn("ClusterSequenceAreaBase::get_median_rho_and_sigma(...): the estimated empty area is suspiciously large and negative and may lead to an over-estimation of rho. This may be due to (i) a rare statistical fluctuation or (ii) too small a range used to estimate the background properties.");
311 
312  for (int i = 0; i < 2; i++) {
313  double nj_median_pos =
314  (pt_over_areas_size-1.0 + n_empty)*posn[i] - n_empty;
315  double nj_median_ratio;
316  if (nj_median_pos >= 0 && pt_over_areas_size > 1) {
317  int int_nj_median = int(nj_median_pos);
318 
319  // avoid potential overflow issues
320  if (int_nj_median+1 > pt_over_areas_size-1){
321  int_nj_median = pt_over_areas_size-2;
322  nj_median_pos = pt_over_areas_size-1;
323  }
324 
325  nj_median_ratio =
326  pt_over_areas[int_nj_median] * (int_nj_median+1-nj_median_pos)
327  + pt_over_areas[int_nj_median+1] * (nj_median_pos - int_nj_median);
328  } else {
329  nj_median_ratio = 0.0;
330  }
331  res[i] = nj_median_ratio;
332  }
333  median = res[0];
334  double error = res[0] - res[1];
335  mean_area = total_area / total_njets;
336  sigma = error * sqrt(mean_area);
337 }
338 
339 
340 // return a vector of all subtracted jets, using area_4vector, given rho.
341 // Only inclusive_jets above ptmin are subtracted and returned.
342 // the ordering is the same as that of sorted_by_pt(cs.inclusive_jets()),
343 // i.e. not necessarily ordered in pt once subtracted
344 vector<PseudoJet> ClusterSequenceAreaBase::subtracted_jets(const double rho,
345  const double ptmin)
346  const {
347  return _subtracted_jets(rho,ptmin);
348 }
349 
350 vector<PseudoJet> ClusterSequenceAreaBase::_subtracted_jets(const double rho,
351  const double ptmin)
352  const {
353  vector<PseudoJet> sub_jets;
354  vector<PseudoJet> jets_local = sorted_by_pt(inclusive_jets(ptmin));
355  for (unsigned i=0; i<jets_local.size(); i++) {
356  PseudoJet sub_jet = _subtracted_jet(jets_local[i],rho);
357  sub_jets.push_back(sub_jet);
358  }
359  return sub_jets;
360 }
361 
362 // return a vector of subtracted jets, using area_4vector.
363 // Only inclusive_jets above ptmin are subtracted and returned.
364 // the ordering is the same as that of sorted_by_pt(cs.inclusive_jets()),
365 // i.e. not necessarily ordered in pt once subtracted
366 vector<PseudoJet> ClusterSequenceAreaBase::subtracted_jets(
367  const Selector & selector,
368  const double ptmin)
369  const {
370  double rho = _median_pt_per_unit_area_4vector(selector);
371  return _subtracted_jets(rho,ptmin);
372 }
373 
374 
375 /// return a subtracted jet, using area_4vector, given rho
376 PseudoJet ClusterSequenceAreaBase::subtracted_jet(const PseudoJet & jet,
377  const double rho) const {
378  return _subtracted_jet(jet, rho);
379 }
380 
381 PseudoJet ClusterSequenceAreaBase::_subtracted_jet(const PseudoJet & jet,
382  const double rho) const {
383  PseudoJet area4vect = area_4vector(jet);
384  PseudoJet sub_jet;
385  // sanity check
386  if (rho*area4vect.perp() < jet.perp() ) {
387  sub_jet = jet - rho*area4vect;
388  } else { sub_jet = PseudoJet(0.0,0.0,0.0,0.0); }
389 
390  // make sure the subtracted jet has the same index (cluster, user, csw)
391  // (i.e. "looks like") the original jet
393  sub_jet.set_user_index(jet.user_index());
394  // do not use CS::_set_structure_shared_ptr here, which should
395  // only be called to maintain the tally during construction
397  return sub_jet;
398 }
399 
400 
401 /// return a subtracted jet, using area_4vector; note that this is
402 /// potentially inefficient if repeatedly used for many different
403 /// jets, because rho will be recalculated each time around.
404 PseudoJet ClusterSequenceAreaBase::subtracted_jet(const PseudoJet & jet,
405  const Selector & selector) const {
406  return _subtracted_jet(jet, selector);
407 }
408 
409 PseudoJet ClusterSequenceAreaBase::_subtracted_jet(const PseudoJet & jet,
410  const Selector & selector) const {
411  double rho = _median_pt_per_unit_area_4vector(selector);
412  PseudoJet sub_jet = _subtracted_jet(jet, rho);
413  return sub_jet;
414 }
415 
416 
417 /// return the subtracted pt, given rho
418 double ClusterSequenceAreaBase::subtracted_pt(const PseudoJet & jet,
419  const double rho,
420  bool use_area_4vector) const {
421  return _subtracted_pt(jet, rho, use_area_4vector);
422 }
423 
424 double ClusterSequenceAreaBase::_subtracted_pt(const PseudoJet & jet,
425  const double rho,
426  bool use_area_4vector) const {
427  if ( use_area_4vector ) {
428  PseudoJet sub_jet = _subtracted_jet(jet,rho);
429  return sub_jet.perp();
430  } else {
431  return jet.perp() - rho*area(jet);
432  }
433 }
434 
435 
436 /// return the subtracted pt; note that this is
437 /// potentially inefficient if repeatedly used for many different
438 /// jets, because rho will be recalculated each time around.
439 double ClusterSequenceAreaBase::subtracted_pt(const PseudoJet & jet,
440  const Selector & selector,
441  bool use_area_4vector) const {
442  if ( use_area_4vector ) {
443  PseudoJet sub_jet = _subtracted_jet(jet,selector);
444  return sub_jet.perp();
445  } else {
446  double rho = _median_pt_per_unit_area(selector);
447  return _subtracted_pt(jet,rho,false);
448  }
449 }
450 
451 // check the selector is suited for the computations i.e. applies jet
452 // by jet and has a finite area
453 void ClusterSequenceAreaBase::_check_selector_good_for_median(const Selector &selector) const{
454  // make sure the selector has a finite area
455  if ((! has_explicit_ghosts()) && (! selector.has_finite_area())){
456  throw Error("ClusterSequenceAreaBase: empty area can only be computed from selectors with a finite area");
457  }
458 
459  // make sure the selector applies jet by jet
460  if (! selector.applies_jet_by_jet()){
461  throw Error("ClusterSequenceAreaBase: empty area can only be computed from selectors that apply jet by jet");
462  }
463 }
464 
465 
466 /// check the jet algorithm is suitable (and if not issue a warning)
467 void ClusterSequenceAreaBase::_check_jet_alg_good_for_median() const {
468  if (jet_def().jet_algorithm() != kt_algorithm
469  && jet_def().jet_algorithm() != cambridge_algorithm
470  && jet_def().jet_algorithm() != cambridge_for_passive_algorithm) {
471  _warnings.warn("ClusterSequenceAreaBase: jet_def being used may not be suitable for estimating diffuse backgrounds (good options are kt, cam)");
472  }
473 }
474 
475 
476 
477 FASTJET_END_NAMESPACE
a version of cambridge with a special distance measure for particles whose pt is < extra_param(); thi...
vector< PseudoJet > sorted_by_pt(const vector< PseudoJet > &jets)
return a vector of jets sorted into decreasing kt2
Definition: PseudoJet.cc:774
void set_user_index(const int index)
set the user_index, intended to allow the user to add simple identifying information to a particle/je...
Definition: PseudoJet.hh:346
bool has_finite_area() const
returns true if it has a meaningful and finite area (i.e.
Definition: Selector.hh:250
the longitudinally invariant kt algorithm
const SharedPtr< PseudoJetStructureBase > & structure_shared_ptr() const
return a reference to the shared pointer to the PseudoJetStructureBase associated with this PseudoJet...
Definition: PseudoJet.cc:517
bool applies_jet_by_jet() const
returns true if this can be applied jet by jet
Definition: Selector.hh:215
double perp() const
returns the scalar transverse momentum
Definition: PseudoJet.hh:145
void set_cluster_hist_index(const int index)
set the cluster_hist_index, intended to be used by clustering routines.
Definition: PseudoJet.hh:786
bool pass(const PseudoJet &jet) const
return true if the jet passes the selection
Definition: Selector.hh:178
base class corresponding to errors that can be thrown by FastJet
Definition: Error.hh:47
double area() const
returns the rapidity-phi area associated with the Selector (throws InvalidArea if the area does not m...
Definition: Selector.cc:189
int cluster_hist_index() const
return the cluster_hist_index, intended to be used by clustering routines.
Definition: PseudoJet.hh:784
Class that encodes information about cuts and other selection criteria that can be applied to PseudoJ...
Definition: Selector.hh:149
void set_structure_shared_ptr(const SharedPtr< PseudoJetStructureBase > &structure)
set the associated structure
Definition: PseudoJet.cc:468
the longitudinally invariant variant of the cambridge algorithm (aka Aachen algoithm).
Class to contain pseudojets, including minimal information of use to jet-clustering routines...
Definition: PseudoJet.hh:67
int user_index() const
return the user_index,
Definition: PseudoJet.hh:343