FastJet 3.0.2
|
00001 00002 //STARTHEADER 00003 // $Id: ClusterSequenceAreaBase.cc 2687 2011-11-14 11:17:51Z soyez $ 00004 // 00005 // Copyright (c) 2005-2011, Matteo Cacciari, Gavin P. Salam and Gregory Soyez 00006 // 00007 //---------------------------------------------------------------------- 00008 // This file is part of FastJet. 00009 // 00010 // FastJet is free software; you can redistribute it and/or modify 00011 // it under the terms of the GNU General Public License as published by 00012 // the Free Software Foundation; either version 2 of the License, or 00013 // (at your option) any later version. 00014 // 00015 // The algorithms that underlie FastJet have required considerable 00016 // development and are described in hep-ph/0512210. If you use 00017 // FastJet as part of work towards a scientific publication, please 00018 // include a citation to the FastJet paper. 00019 // 00020 // FastJet is distributed in the hope that it will be useful, 00021 // but WITHOUT ANY WARRANTY; without even the implied warranty of 00022 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00023 // GNU General Public License for more details. 00024 // 00025 // You should have received a copy of the GNU General Public License 00026 // along with FastJet. If not, see <http://www.gnu.org/licenses/>. 00027 //---------------------------------------------------------------------- 00028 //ENDHEADER 00029 00030 00031 00032 00033 #include "fastjet/ClusterSequenceAreaBase.hh" 00034 #include <algorithm> 00035 00036 FASTJET_BEGIN_NAMESPACE 00037 00038 using namespace std; 00039 00040 00041 /// allow for warnings 00042 LimitedWarning ClusterSequenceAreaBase::_warnings; 00043 LimitedWarning ClusterSequenceAreaBase::_warnings_zero_area; 00044 LimitedWarning ClusterSequenceAreaBase::_warnings_empty_area; 00045 00046 //---------------------------------------------------------------------- 00047 /// return the total area, within the selector's range, that is free 00048 /// of jets. 00049 /// 00050 /// Calculate this as (range area) - \sum_{i in range} A_i 00051 /// 00052 /// for ClusterSequences with explicit ghosts, assume that there will 00053 /// never be any empty area, i.e. it is always filled in by pure 00054 /// ghosts jets. This holds for seq.rec. algorithms 00055 double ClusterSequenceAreaBase::empty_area(const Selector & selector) const { 00056 00057 if (has_explicit_ghosts()) {return 0.0;} 00058 else { return empty_area_from_jets(inclusive_jets(0.0), selector);} 00059 00060 } 00061 00062 //---------------------------------------------------------------------- 00063 /// return the total area, within range, that is free of jets. 00064 /// 00065 /// Calculate this as (range area) - \sum_{i in range} A_i 00066 /// 00067 double ClusterSequenceAreaBase::empty_area_from_jets( 00068 const std::vector<PseudoJet> & all_jets, 00069 const Selector & selector) const { 00070 _check_selector_good_for_median(selector); 00071 00072 double empty = selector.area(); 00073 for (unsigned i = 0; i < all_jets.size(); i++) { 00074 if (selector.pass(all_jets[i])) empty -= area(all_jets[i]); 00075 } 00076 return empty; 00077 } 00078 00079 double ClusterSequenceAreaBase::median_pt_per_unit_area(const Selector & selector) const { 00080 return median_pt_per_unit_something(selector,false); 00081 } 00082 00083 double ClusterSequenceAreaBase::median_pt_per_unit_area_4vector(const Selector & selector) const { 00084 return median_pt_per_unit_something(selector,true); 00085 } 00086 00087 00088 //---------------------------------------------------------------------- 00089 /// the median of (pt/area) for jets contained within range, counting 00090 /// the empty area as if it were made up of a collection of empty 00091 /// jets each of area (0.55 * pi R^2). 00092 double ClusterSequenceAreaBase::median_pt_per_unit_something( 00093 const Selector & selector, bool use_area_4vector) const { 00094 00095 double median, sigma, mean_area; 00096 get_median_rho_and_sigma(selector, use_area_4vector, median, sigma, mean_area); 00097 return median; 00098 00099 } 00100 00101 00102 //---------------------------------------------------------------------- 00103 /// fits a form pt_per_unit_area(y) = a + b*y^2 for jets in range. 00104 /// exclude_above allows one to exclude large values of pt/area from fit. 00105 /// use_area_4vector = true uses the 4vector areas. 00106 void ClusterSequenceAreaBase::parabolic_pt_per_unit_area( 00107 double & a, double & b, const Selector & selector, 00108 double exclude_above, bool use_area_4vector) const { 00109 // sanity check on the selector: we require a finite area and that 00110 // it applies jet by jet (see BackgroundEstimator for more advanced 00111 // usage) 00112 _check_selector_good_for_median(selector); 00113 00114 int n=0; 00115 int n_excluded = 0; 00116 double mean_f=0, mean_x2=0, mean_x4=0, mean_fx2=0; 00117 00118 vector<PseudoJet> incl_jets = inclusive_jets(); 00119 00120 for (unsigned i = 0; i < incl_jets.size(); i++) { 00121 if (selector.pass(incl_jets[i])) { 00122 double this_area; 00123 if ( use_area_4vector ) { 00124 this_area = area_4vector(incl_jets[i]).perp(); 00125 } else { 00126 this_area = area(incl_jets[i]); 00127 } 00128 double f = incl_jets[i].perp()/this_area; 00129 if (exclude_above <= 0.0 || f < exclude_above) { 00130 double x = incl_jets[i].rap(); double x2 = x*x; 00131 mean_f += f; 00132 mean_x2 += x2; 00133 mean_x4 += x2*x2; 00134 mean_fx2 += f*x2; 00135 n++; 00136 } else { 00137 n_excluded++; 00138 } 00139 } 00140 } 00141 00142 if (n <= 1) { 00143 // meaningful results require at least two jets inside the 00144 // area -- mind you if there are empty jets we should be in 00145 // any case doing something special... 00146 a = 0.0; 00147 b = 0.0; 00148 } else { 00149 mean_f /= n; 00150 mean_x2 /= n; 00151 mean_x4 /= n; 00152 mean_fx2 /= n; 00153 00154 b = (mean_f*mean_x2 - mean_fx2)/(mean_x2*mean_x2 - mean_x4); 00155 a = mean_f - b*mean_x2; 00156 } 00157 //cerr << "n_excluded = "<< n_excluded << endl; 00158 } 00159 00160 00161 00162 void ClusterSequenceAreaBase::get_median_rho_and_sigma( 00163 const Selector & selector, bool use_area_4vector, 00164 double & median, double & sigma, double & mean_area) const { 00165 00166 vector<PseudoJet> incl_jets = inclusive_jets(); 00167 get_median_rho_and_sigma(incl_jets, selector, use_area_4vector, 00168 median, sigma, mean_area, true); 00169 } 00170 00171 00172 void ClusterSequenceAreaBase::get_median_rho_and_sigma( 00173 const vector<PseudoJet> & all_jets, 00174 const Selector & selector, bool use_area_4vector, 00175 double & median, double & sigma, double & mean_area, 00176 bool all_are_incl) const { 00177 00178 _check_jet_alg_good_for_median(); 00179 00180 // sanity check on the selector: we require a finite area and that 00181 // it applies jet by jet (see BackgroundEstimator for more advanced 00182 // usage) 00183 _check_selector_good_for_median(selector); 00184 00185 vector<double> pt_over_areas; 00186 double total_area = 0.0; 00187 double total_njets = 0; 00188 00189 for (unsigned i = 0; i < all_jets.size(); i++) { 00190 if (selector.pass(all_jets[i])) { 00191 double this_area; 00192 if (use_area_4vector) { 00193 this_area = area_4vector(all_jets[i]).perp(); 00194 } else { 00195 this_area = area(all_jets[i]); 00196 } 00197 00198 if (this_area>0) { 00199 pt_over_areas.push_back(all_jets[i].perp()/this_area); 00200 } else { 00201 _warnings_zero_area.warn("ClusterSequenceAreaBase::get_median_rho_and_sigma(...): discarded jet with zero area. Zero-area jets may be due to (i) too large a ghost area (ii) a jet being outside the ghost range (iii) the computation not being done using an appropriate algorithm (kt;C/A)."); 00202 } 00203 00204 total_area += this_area; 00205 total_njets += 1.0; 00206 } 00207 } 00208 00209 // there is nothing inside our region, so answer will always be zero 00210 if (pt_over_areas.size() == 0) { 00211 median = 0.0; 00212 sigma = 0.0; 00213 mean_area = 0.0; 00214 return; 00215 } 00216 00217 // get median (pt/area) [this is the "old" median definition. It considers 00218 // only the "real" jets in calculating the median, i.e. excluding the 00219 // only-ghost ones; it will be supplemented with more info below] 00220 sort(pt_over_areas.begin(), pt_over_areas.end()); 00221 00222 // now get the median & error, accounting for empty jets 00223 // define the fractions of distribution at median, median-1sigma 00224 double posn[2] = {0.5, (1.0-0.6827)/2.0}; 00225 double res[2]; 00226 00227 double n_empty, empty_a; 00228 if (has_explicit_ghosts()) { 00229 // NB: the following lines of code are potentially incorrect in cases 00230 // where there are unclustered particles (empty_area would do a better job, 00231 // at least for active areas). This is not an issue with kt or C/A, or other 00232 // algorithms that cluster all particles (and the median estimation should in 00233 // any case only be done with kt or C/A!) 00234 empty_a = 0.0; 00235 n_empty = 0; 00236 } else if (all_are_incl) { 00237 // the default case 00238 empty_a = empty_area(selector); 00239 n_empty = n_empty_jets(selector); 00240 } else { 00241 // this one is intended to be used when e.g. one runs C/A, then looks at its 00242 // exclusive jets in order to get an effective smaller R value, and passes those 00243 // to this routine. 00244 empty_a = empty_area_from_jets(all_jets, selector); 00245 mean_area = total_area / total_njets; // temporary value 00246 n_empty = empty_a / mean_area; 00247 } 00248 //cout << "*** tot_area = " << total_area << ", empty_a = " << empty_a << endl; 00249 //cout << "*** n_empty = " << n_empty << ", ntotal = " << total_njets << endl; 00250 total_njets += n_empty; 00251 total_area += empty_a; 00252 00253 // we need an int (rather than an unsigned int) with the size of the 00254 // pt_over_areas array, because we'll often be doing subtraction of 00255 // -1, negating it, etc. All of these operations go crazy with unsigned ints. 00256 int pt_over_areas_size = pt_over_areas.size(); 00257 if (n_empty < -pt_over_areas_size/4.0) 00258 _warnings_empty_area.warn("ClusterSequenceAreaBase::get_median_rho_and_sigma(...): the estimated empty area is suspiciously large and negative and may lead to an over-estimation of rho. This may be due to (i) a rare statistical fluctuation or (ii) too small a range used to estimate the background properties."); 00259 00260 for (int i = 0; i < 2; i++) { 00261 double nj_median_pos = 00262 (pt_over_areas_size-1.0 + n_empty)*posn[i] - n_empty; 00263 double nj_median_ratio; 00264 if (nj_median_pos >= 0 && pt_over_areas_size > 1) { 00265 int int_nj_median = int(nj_median_pos); 00266 00267 // avoid potential overflow issues 00268 if (int_nj_median+1 > pt_over_areas_size-1){ 00269 int_nj_median = pt_over_areas_size-2; 00270 nj_median_pos = pt_over_areas_size-1; 00271 } 00272 00273 nj_median_ratio = 00274 pt_over_areas[int_nj_median] * (int_nj_median+1-nj_median_pos) 00275 + pt_over_areas[int_nj_median+1] * (nj_median_pos - int_nj_median); 00276 } else { 00277 nj_median_ratio = 0.0; 00278 } 00279 res[i] = nj_median_ratio; 00280 } 00281 median = res[0]; 00282 double error = res[0] - res[1]; 00283 mean_area = total_area / total_njets; 00284 sigma = error * sqrt(mean_area); 00285 } 00286 00287 00288 /// return a vector of all subtracted jets, using area_4vector, given rho. 00289 /// Only inclusive_jets above ptmin are subtracted and returned. 00290 /// the ordering is the same as that of sorted_by_pt(cs.inclusive_jets()), 00291 /// i.e. not necessarily ordered in pt once subtracted 00292 vector<PseudoJet> ClusterSequenceAreaBase::subtracted_jets(const double rho, 00293 const double ptmin) 00294 const { 00295 vector<PseudoJet> sub_jets; 00296 vector<PseudoJet> jets_local = sorted_by_pt(inclusive_jets(ptmin)); 00297 for (unsigned i=0; i<jets_local.size(); i++) { 00298 PseudoJet sub_jet = subtracted_jet(jets_local[i],rho); 00299 sub_jets.push_back(sub_jet); 00300 } 00301 return sub_jets; 00302 } 00303 00304 /// return a vector of subtracted jets, using area_4vector. 00305 /// Only inclusive_jets above ptmin are subtracted and returned. 00306 /// the ordering is the same as that of sorted_by_pt(cs.inclusive_jets()), 00307 /// i.e. not necessarily ordered in pt once subtracted 00308 vector<PseudoJet> ClusterSequenceAreaBase::subtracted_jets( 00309 const Selector & selector, 00310 const double ptmin) 00311 const { 00312 double rho = median_pt_per_unit_area_4vector(selector); 00313 return subtracted_jets(rho,ptmin); 00314 } 00315 00316 00317 /// return a subtracted jet, using area_4vector, given rho 00318 PseudoJet ClusterSequenceAreaBase::subtracted_jet(const PseudoJet & jet, 00319 const double rho) const { 00320 PseudoJet area4vect = area_4vector(jet); 00321 PseudoJet sub_jet; 00322 // sanity check 00323 if (rho*area4vect.perp() < jet.perp() ) { 00324 sub_jet = jet - rho*area4vect; 00325 } else { sub_jet = PseudoJet(0.0,0.0,0.0,0.0); } 00326 00327 // make sure the subtracted jet has the same index (cluster, user, csw) 00328 // (i.e. "looks like") the original jet 00329 sub_jet.set_cluster_hist_index(jet.cluster_hist_index()); 00330 sub_jet.set_user_index(jet.user_index()); 00331 // do not use CS::_set_structure_shared_ptr here, which should 00332 // only be called to maintain the tally during construction 00333 sub_jet.set_structure_shared_ptr(jet.structure_shared_ptr()); 00334 return sub_jet; 00335 } 00336 00337 00338 /// return a subtracted jet, using area_4vector; note that this is 00339 /// potentially inefficient if repeatedly used for many different 00340 /// jets, because rho will be recalculated each time around. 00341 PseudoJet ClusterSequenceAreaBase::subtracted_jet(const PseudoJet & jet, 00342 const Selector & selector) const { 00343 double rho = median_pt_per_unit_area_4vector(selector); 00344 PseudoJet sub_jet = subtracted_jet(jet, rho); 00345 return sub_jet; 00346 } 00347 00348 00349 /// return the subtracted pt, given rho 00350 double ClusterSequenceAreaBase::subtracted_pt(const PseudoJet & jet, 00351 const double rho, 00352 bool use_area_4vector) const { 00353 if ( use_area_4vector ) { 00354 PseudoJet sub_jet = subtracted_jet(jet,rho); 00355 return sub_jet.perp(); 00356 } else { 00357 return jet.perp() - rho*area(jet); 00358 } 00359 } 00360 00361 00362 /// return the subtracted pt; note that this is 00363 /// potentially inefficient if repeatedly used for many different 00364 /// jets, because rho will be recalculated each time around. 00365 double ClusterSequenceAreaBase::subtracted_pt(const PseudoJet & jet, 00366 const Selector & selector, 00367 bool use_area_4vector) const { 00368 if ( use_area_4vector ) { 00369 PseudoJet sub_jet = subtracted_jet(jet,selector); 00370 return sub_jet.perp(); 00371 } else { 00372 double rho = median_pt_per_unit_area(selector); 00373 return subtracted_pt(jet,rho,false); 00374 } 00375 } 00376 00377 // check the selector is suited for the computations i.e. applies jet 00378 // by jet and has a finite area 00379 void ClusterSequenceAreaBase::_check_selector_good_for_median(const Selector &selector) const{ 00380 // make sure the selector has a finite area 00381 if ((! has_explicit_ghosts()) && (! selector.has_finite_area())){ 00382 throw Error("ClusterSequenceAreaBase: empty area can only be computed from selectors with a finite area"); 00383 } 00384 00385 // make sure the selector applies jet by jet 00386 if (! selector.applies_jet_by_jet()){ 00387 throw Error("ClusterSequenceAreaBase: empty area can only be computed from selectors that apply jet by jet"); 00388 } 00389 } 00390 00391 00392 /// check the jet algorithm is suitable (and if not issue a warning) 00393 void ClusterSequenceAreaBase::_check_jet_alg_good_for_median() const { 00394 if (jet_def().jet_algorithm() != kt_algorithm 00395 && jet_def().jet_algorithm() != cambridge_algorithm 00396 && jet_def().jet_algorithm() != cambridge_for_passive_algorithm) { 00397 _warnings.warn("ClusterSequenceAreaBase: jet_def being used may not be suitable for estimating diffuse backgrounds (good options are kt, cam)"); 00398 } 00399 } 00400 00401 00402 00403 FASTJET_END_NAMESPACE