FastJet 3.4.1
ClusterSequenceAreaBase.cc
1
2//FJSTARTHEADER
3// $Id$
4//
5// Copyright (c) 2005-2023, Matteo Cacciari, Gavin P. Salam and Gregory Soyez
6//
7//----------------------------------------------------------------------
8// This file is part of FastJet.
9//
10// FastJet is free software; you can redistribute it and/or modify
11// it under the terms of the GNU General Public License as published by
12// the Free Software Foundation; either version 2 of the License, or
13// (at your option) any later version.
14//
15// The algorithms that underlie FastJet have required considerable
16// development. They are described in the original FastJet paper,
17// hep-ph/0512210 and in the manual, arXiv:1111.6097. If you use
18// FastJet as part of work towards a scientific publication, please
19// quote the version you use and include a citation to the manual and
20// optionally also to hep-ph/0512210.
21//
22// FastJet is distributed in the hope that it will be useful,
23// but WITHOUT ANY WARRANTY; without even the implied warranty of
24// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25// GNU General Public License for more details.
26//
27// You should have received a copy of the GNU General Public License
28// along with FastJet. If not, see <http://www.gnu.org/licenses/>.
29//----------------------------------------------------------------------
30//FJENDHEADER
31
32
33
34
35#include "fastjet/ClusterSequenceAreaBase.hh"
36#include <algorithm>
37
38FASTJET_BEGIN_NAMESPACE
39
40using namespace std;
41
42
43/// allow for warnings
44LimitedWarning ClusterSequenceAreaBase::_warnings;
45LimitedWarning ClusterSequenceAreaBase::_warnings_zero_area;
46LimitedWarning ClusterSequenceAreaBase::_warnings_empty_area;
47
48//----------------------------------------------------------------------
49/// return the total area, within the selector's range, that is free
50/// of jets.
51///
52/// Calculate this as (range area) - \sum_{i in range} A_i
53///
54/// for ClusterSequences with explicit ghosts, assume that there will
55/// never be any empty area, i.e. it is always filled in by pure
56/// ghosts jets. This holds for seq.rec. algorithms
57double ClusterSequenceAreaBase::empty_area(const Selector & selector) const {
58
59 if (has_explicit_ghosts()) {return 0.0;}
60 else { return empty_area_from_jets(inclusive_jets(0.0), selector);}
61
62}
63
64//----------------------------------------------------------------------
65/// return the total area, within range, that is free of jets.
66///
67/// Calculate this as (range area) - \sum_{i in range} A_i
68///
69double ClusterSequenceAreaBase::empty_area_from_jets(
70 const std::vector<PseudoJet> & all_jets,
71 const Selector & selector) const {
72 _check_selector_good_for_median(selector);
73
74 double empty = selector.area();
75 for (unsigned i = 0; i < all_jets.size(); i++) {
76 if (selector.pass(all_jets[i])) empty -= area(all_jets[i]);
77 }
78 return empty;
79}
80
81// this is deprecated but used by other deprecated methods. So we hide
82// the implementation in a protected method so that (i) it can still
83// be used internally (without generating a compile-time warning when
84// building FastJet) and the interface can be marked as deprecated.
85// This can disappear once all the public interfaces have disappeared.
86double ClusterSequenceAreaBase::median_pt_per_unit_area(const Selector & selector) const {
87 return _median_pt_per_unit_area(selector);
88}
89
90// the hidden implementation
91double ClusterSequenceAreaBase::_median_pt_per_unit_area(const Selector & selector) const {
92 return _median_pt_per_unit_something(selector,false);
93}
94
95
96
97// this is deprecated but used by other deprecated methods. So we hide
98// the implementation in a protected method so that (i) it can still
99// be used internally (without generating a compile-time warning when
100// building FastJet) and the interface can be marked as deprecated.
101// This can disappear once all the public interfaces have disappeared.
102double ClusterSequenceAreaBase::median_pt_per_unit_area_4vector(const Selector & selector) const {
103 return _median_pt_per_unit_area_4vector(selector);
104}
105
106// the deprecated interface
107double ClusterSequenceAreaBase::_median_pt_per_unit_area_4vector(const Selector & selector) const {
108 return _median_pt_per_unit_something(selector,true);
109}
110
111
112//----------------------------------------------------------------------
113// this is deprecated but used by other deprecated methods. So we hide
114// the implementation in a protected method so that (i) it can still
115// be used internally (without generating a compile-time warning when
116// building FastJet) and the interface can be marked as deprecated.
117// This can disappear once all the public interfaces have disappeared.
118double ClusterSequenceAreaBase::median_pt_per_unit_something(
119 const Selector & selector, bool use_area_4vector) const {
120 return _median_pt_per_unit_something(selector, use_area_4vector);
121}
122
123// the median of (pt/area) for jets contained within range, counting
124// the empty area as if it were made up of a collection of empty
125// jets each of area (0.55 * pi R^2).
126double ClusterSequenceAreaBase::_median_pt_per_unit_something(
127 const Selector & selector, bool use_area_4vector) const {
128 double median, sigma, mean_area;
129 _get_median_rho_and_sigma(selector, use_area_4vector, median, sigma, mean_area);
130 return median;
131}
132
133
134//----------------------------------------------------------------------
135/// fits a form pt_per_unit_area(y) = a + b*y^2 for jets in range.
136/// exclude_above allows one to exclude large values of pt/area from
137/// fit. use_area_4vector = true uses the 4vector areas.
138void ClusterSequenceAreaBase::parabolic_pt_per_unit_area(
139 double & a, double & b, const Selector & selector,
140 double exclude_above, bool use_area_4vector) const {
141 return _parabolic_pt_per_unit_area(a, b, selector, exclude_above, use_area_4vector);
142}
143
144void ClusterSequenceAreaBase::_parabolic_pt_per_unit_area(
145 double & a, double & b, const Selector & selector,
146 double exclude_above, bool use_area_4vector) const {
147 // sanity check on the selector: we require a finite area and that
148 // it applies jet by jet (see BackgroundEstimator for more advanced
149 // usage)
150 _check_selector_good_for_median(selector);
151
152 int n=0;
153 int n_excluded = 0;
154 double mean_f=0, mean_x2=0, mean_x4=0, mean_fx2=0;
155
156 vector<PseudoJet> incl_jets = inclusive_jets();
157
158 for (unsigned i = 0; i < incl_jets.size(); i++) {
159 if (selector.pass(incl_jets[i])) {
160 double this_area;
161 if ( use_area_4vector ) {
162 this_area = area_4vector(incl_jets[i]).perp();
163 } else {
164 this_area = area(incl_jets[i]);
165 }
166 double f = incl_jets[i].perp()/this_area;
167 if (exclude_above <= 0.0 || f < exclude_above) {
168 double x = incl_jets[i].rap(); double x2 = x*x;
169 mean_f += f;
170 mean_x2 += x2;
171 mean_x4 += x2*x2;
172 mean_fx2 += f*x2;
173 n++;
174 } else {
175 n_excluded++;
176 }
177 }
178 }
179
180 if (n <= 1) {
181 // meaningful results require at least two jets inside the
182 // area -- mind you if there are empty jets we should be in
183 // any case doing something special...
184 a = 0.0;
185 b = 0.0;
186 } else {
187 mean_f /= n;
188 mean_x2 /= n;
189 mean_x4 /= n;
190 mean_fx2 /= n;
191
192 b = (mean_f*mean_x2 - mean_fx2)/(mean_x2*mean_x2 - mean_x4);
193 a = mean_f - b*mean_x2;
194 }
195 //cerr << "n_excluded = "<< n_excluded << endl;
196}
197
198
199//----------------------------------------------------------------------
200void ClusterSequenceAreaBase::get_median_rho_and_sigma(
201 const Selector & selector, bool use_area_4vector,
202 double & median, double & sigma, double & mean_area) const {
203 _get_median_rho_and_sigma(selector, use_area_4vector, median, sigma, mean_area);
204}
205
206void ClusterSequenceAreaBase::_get_median_rho_and_sigma(
207 const Selector & selector, bool use_area_4vector,
208 double & median, double & sigma, double & mean_area) const {
209
210 vector<PseudoJet> incl_jets = inclusive_jets();
211 _get_median_rho_and_sigma(incl_jets, selector, use_area_4vector,
212 median, sigma, mean_area, true);
213}
214
215void ClusterSequenceAreaBase::get_median_rho_and_sigma(
216 const vector<PseudoJet> & all_jets,
217 const Selector & selector, bool use_area_4vector,
218 double & median, double & sigma, double & mean_area,
219 bool all_are_incl) const {
220 _get_median_rho_and_sigma(all_jets, selector, use_area_4vector,
221 median, sigma, mean_area, all_are_incl);
222}
223
224void ClusterSequenceAreaBase::_get_median_rho_and_sigma(
225 const vector<PseudoJet> & all_jets,
226 const Selector & selector, bool use_area_4vector,
227 double & median, double & sigma, double & mean_area,
228 bool all_are_incl) const {
229
230 _check_jet_alg_good_for_median();
231
232 // sanity check on the selector: we require a finite area and that
233 // it applies jet by jet (see BackgroundEstimator for more advanced
234 // usage)
235 _check_selector_good_for_median(selector);
236
237 vector<double> pt_over_areas;
238 double total_area = 0.0;
239 double total_njets = 0;
240
241 for (unsigned i = 0; i < all_jets.size(); i++) {
242 if (selector.pass(all_jets[i])) {
243 double this_area;
244 if (use_area_4vector) {
245 this_area = area_4vector(all_jets[i]).perp();
246 } else {
247 this_area = area(all_jets[i]);
248 }
249
250 if (this_area>0) {
251 pt_over_areas.push_back(all_jets[i].perp()/this_area);
252 } else {
253 _warnings_zero_area.warn("ClusterSequenceAreaBase::get_median_rho_and_sigma(...): discarded jet with zero area. Zero-area jets may be due to (i) too large a ghost area (ii) a jet being outside the ghost range (iii) the computation not being done using an appropriate algorithm (kt;C/A).");
254 }
255
256 total_area += this_area;
257 total_njets += 1.0;
258 }
259 }
260
261 // there is nothing inside our region, so answer will always be zero
262 if (pt_over_areas.size() == 0) {
263 median = 0.0;
264 sigma = 0.0;
265 mean_area = 0.0;
266 return;
267 }
268
269 // get median (pt/area) [this is the "old" median definition. It considers
270 // only the "real" jets in calculating the median, i.e. excluding the
271 // only-ghost ones; it will be supplemented with more info below]
272 sort(pt_over_areas.begin(), pt_over_areas.end());
273
274 // now get the median & error, accounting for empty jets
275 // define the fractions of distribution at median, median-1sigma
276 double posn[2] = {0.5, (1.0-0.6827)/2.0};
277 double res[2];
278
279 double n_empty, empty_a;
280 if (has_explicit_ghosts()) {
281 // NB: the following lines of code are potentially incorrect in cases
282 // where there are unclustered particles (empty_area would do a better job,
283 // at least for active areas). This is not an issue with kt or C/A, or other
284 // algorithms that cluster all particles (and the median estimation should in
285 // any case only be done with kt or C/A!)
286 empty_a = 0.0;
287 n_empty = 0;
288 } else if (all_are_incl) {
289 // the default case
290 empty_a = empty_area(selector);
291 n_empty = n_empty_jets(selector);
292 } else {
293 // this one is intended to be used when e.g. one runs C/A, then looks at its
294 // exclusive jets in order to get an effective smaller R value, and passes those
295 // to this routine.
296 empty_a = empty_area_from_jets(all_jets, selector);
297 mean_area = total_area / total_njets; // temporary value
298 n_empty = empty_a / mean_area;
299 }
300 //cout << "*** tot_area = " << total_area << ", empty_a = " << empty_a << endl;
301 //cout << "*** n_empty = " << n_empty << ", ntotal = " << total_njets << endl;
302 total_njets += n_empty;
303 total_area += empty_a;
304
305 // we need an int (rather than an unsigned int) with the size of the
306 // pt_over_areas array, because we'll often be doing subtraction of
307 // -1, negating it, etc. All of these operations go crazy with unsigned ints.
308 int pt_over_areas_size = pt_over_areas.size();
309 if (n_empty < -pt_over_areas_size/4.0) {
310 _warnings_empty_area.warn("ClusterSequenceAreaBase::get_median_rho_and_sigma(...): the estimated empty area is suspiciously large and negative and may lead to an over-estimation of rho. This may be due to (i) a rare statistical fluctuation or (ii) too small a range used to estimate the background properties.");
311 }
312
313 for (int i = 0; i < 2; i++) {
314 double nj_median_pos =
315 (pt_over_areas_size-1.0 + n_empty)*posn[i] - n_empty;
316 double nj_median_ratio;
317 if (nj_median_pos >= 0 && pt_over_areas_size > 1) {
318 int int_nj_median = int(nj_median_pos);
319
320 // avoid potential overflow issues
321 if (int_nj_median+1 > pt_over_areas_size-1){
322 int_nj_median = pt_over_areas_size-2;
323 nj_median_pos = pt_over_areas_size-1;
324 }
325
326 nj_median_ratio =
327 pt_over_areas[int_nj_median] * (int_nj_median+1-nj_median_pos)
328 + pt_over_areas[int_nj_median+1] * (nj_median_pos - int_nj_median);
329 } else {
330 nj_median_ratio = 0.0;
331 }
332 res[i] = nj_median_ratio;
333 }
334 median = res[0];
335 double error = res[0] - res[1];
336 mean_area = total_area / total_njets;
337 sigma = error * sqrt( max(0.0, mean_area) );
338}
339
340
341// return a vector of all subtracted jets, using area_4vector, given rho.
342// Only inclusive_jets above ptmin are subtracted and returned.
343// the ordering is the same as that of sorted_by_pt(cs.inclusive_jets()),
344// i.e. not necessarily ordered in pt once subtracted
345vector<PseudoJet> ClusterSequenceAreaBase::subtracted_jets(const double rho,
346 const double ptmin)
347 const {
348 return _subtracted_jets(rho,ptmin);
349}
350
351vector<PseudoJet> ClusterSequenceAreaBase::_subtracted_jets(const double rho,
352 const double ptmin)
353 const {
354 vector<PseudoJet> sub_jets;
355 vector<PseudoJet> jets_local = sorted_by_pt(inclusive_jets(ptmin));
356 for (unsigned i=0; i<jets_local.size(); i++) {
357 PseudoJet sub_jet = _subtracted_jet(jets_local[i],rho);
358 sub_jets.push_back(sub_jet);
359 }
360 return sub_jets;
361}
362
363// return a vector of subtracted jets, using area_4vector.
364// Only inclusive_jets above ptmin are subtracted and returned.
365// the ordering is the same as that of sorted_by_pt(cs.inclusive_jets()),
366// i.e. not necessarily ordered in pt once subtracted
367vector<PseudoJet> ClusterSequenceAreaBase::subtracted_jets(
368 const Selector & selector,
369 const double ptmin)
370 const {
371 double rho = _median_pt_per_unit_area_4vector(selector);
372 return _subtracted_jets(rho,ptmin);
373}
374
375
376/// return a subtracted jet, using area_4vector, given rho
377PseudoJet ClusterSequenceAreaBase::subtracted_jet(const PseudoJet & jet,
378 const double rho) const {
379 return _subtracted_jet(jet, rho);
380}
381
382PseudoJet ClusterSequenceAreaBase::_subtracted_jet(const PseudoJet & jet,
383 const double rho) const {
384 PseudoJet area4vect = area_4vector(jet);
385 PseudoJet sub_jet;
386 // sanity check
387 if (rho*area4vect.perp() < jet.perp() ) {
388 sub_jet = jet - rho*area4vect;
389 } else { sub_jet = PseudoJet(0.0,0.0,0.0,0.0); }
390
391 // make sure the subtracted jet has the same index (cluster, user, csw)
392 // (i.e. "looks like") the original jet
394 sub_jet.set_user_index(jet.user_index());
395 // do not use CS::_set_structure_shared_ptr here, which should
396 // only be called to maintain the tally during construction
398 return sub_jet;
399}
400
401
402/// return a subtracted jet, using area_4vector; note that this is
403/// potentially inefficient if repeatedly used for many different
404/// jets, because rho will be recalculated each time around.
405PseudoJet ClusterSequenceAreaBase::subtracted_jet(const PseudoJet & jet,
406 const Selector & selector) const {
407 return _subtracted_jet(jet, selector);
408}
409
410PseudoJet ClusterSequenceAreaBase::_subtracted_jet(const PseudoJet & jet,
411 const Selector & selector) const {
412 double rho = _median_pt_per_unit_area_4vector(selector);
413 PseudoJet sub_jet = _subtracted_jet(jet, rho);
414 return sub_jet;
415}
416
417
418/// return the subtracted pt, given rho
419double ClusterSequenceAreaBase::subtracted_pt(const PseudoJet & jet,
420 const double rho,
421 bool use_area_4vector) const {
422 return _subtracted_pt(jet, rho, use_area_4vector);
423}
424
425double ClusterSequenceAreaBase::_subtracted_pt(const PseudoJet & jet,
426 const double rho,
427 bool use_area_4vector) const {
428 if ( use_area_4vector ) {
429 PseudoJet sub_jet = _subtracted_jet(jet,rho);
430 return sub_jet.perp();
431 } else {
432 return jet.perp() - rho*area(jet);
433 }
434}
435
436
437/// return the subtracted pt; note that this is
438/// potentially inefficient if repeatedly used for many different
439/// jets, because rho will be recalculated each time around.
440double ClusterSequenceAreaBase::subtracted_pt(const PseudoJet & jet,
441 const Selector & selector,
442 bool use_area_4vector) const {
443 if ( use_area_4vector ) {
444 PseudoJet sub_jet = _subtracted_jet(jet,selector);
445 return sub_jet.perp();
446 } else {
447 double rho = _median_pt_per_unit_area(selector);
448 return _subtracted_pt(jet,rho,false);
449 }
450}
451
452// check the selector is suited for the computations i.e. applies jet
453// by jet and has a finite area
454void ClusterSequenceAreaBase::_check_selector_good_for_median(const Selector &selector) const{
455 // make sure the selector has a finite area
456 if ((! has_explicit_ghosts()) && (! selector.has_finite_area())){
457 throw Error("ClusterSequenceAreaBase: empty area can only be computed from selectors with a finite area");
458 }
459
460 // make sure the selector applies jet by jet
461 if (! selector.applies_jet_by_jet()){
462 throw Error("ClusterSequenceAreaBase: empty area can only be computed from selectors that apply jet by jet");
463 }
464}
465
466
467/// check the jet algorithm is suitable (and if not issue a warning)
468void ClusterSequenceAreaBase::_check_jet_alg_good_for_median() const {
469 if (jet_def().jet_algorithm() != kt_algorithm
470 && jet_def().jet_algorithm() != cambridge_algorithm
471 && jet_def().jet_algorithm() != cambridge_for_passive_algorithm) {
472 _warnings.warn("ClusterSequenceAreaBase: jet_def being used may not be suitable for estimating diffuse backgrounds (good options are kt, cam)");
473 }
474}
475
476
477
478FASTJET_END_NAMESPACE
base class corresponding to errors that can be thrown by FastJet
Definition: Error.hh:52
Class to contain pseudojets, including minimal information of use to jet-clustering routines.
Definition: PseudoJet.hh:68
void set_user_index(const int index)
set the user_index, intended to allow the user to add simple identifying information to a particle/je...
Definition: PseudoJet.hh:392
double perp() const
returns the scalar transverse momentum
Definition: PseudoJet.hh:158
void set_cluster_hist_index(const int index)
set the cluster_hist_index, intended to be used by clustering routines.
Definition: PseudoJet.hh:836
int cluster_hist_index() const
return the cluster_hist_index, intended to be used by clustering routines.
Definition: PseudoJet.hh:834
void set_structure_shared_ptr(const SharedPtr< PseudoJetStructureBase > &structure_in)
set the associated structure
Definition: PseudoJet.cc:561
int user_index() const
return the user_index,
Definition: PseudoJet.hh:389
const SharedPtr< PseudoJetStructureBase > & structure_shared_ptr() const
return a reference to the shared pointer to the PseudoJetStructureBase associated with this PseudoJet
Definition: PseudoJet.cc:614
Class that encodes information about cuts and other selection criteria that can be applied to PseudoJ...
Definition: Selector.hh:149
bool has_finite_area() const
returns true if it has a meaningful and finite area (i.e.
Definition: Selector.hh:250
bool pass(const PseudoJet &jet) const
return true if the jet passes the selection
Definition: Selector.hh:178
bool applies_jet_by_jet() const
returns true if this can be applied jet by jet
Definition: Selector.hh:215
double area() const
returns the rapidity-phi area associated with the Selector (throws InvalidArea if the area does not m...
Definition: Selector.cc:189
vector< PseudoJet > sorted_by_pt(const vector< PseudoJet > &jets)
return a vector of jets sorted into decreasing kt2
Definition: PseudoJet.cc:871