FastJet 3.0beta1
|
00001 00002 //STARTHEADER 00003 // $Id: ClusterSequenceAreaBase.cc 2414 2011-07-14 06:16:33Z salam $ 00004 // 00005 // Copyright (c) 2005-2006, Matteo Cacciari and Gavin Salam 00006 // 00007 //---------------------------------------------------------------------- 00008 // This file is part of FastJet. 00009 // 00010 // FastJet is free software; you can redistribute it and/or modify 00011 // it under the terms of the GNU General Public License as published by 00012 // the Free Software Foundation; either version 2 of the License, or 00013 // (at your option) any later version. 00014 // 00015 // The algorithms that underlie FastJet have required considerable 00016 // development and are described in hep-ph/0512210. If you use 00017 // FastJet as part of work towards a scientific publication, please 00018 // include a citation to the FastJet paper. 00019 // 00020 // FastJet is distributed in the hope that it will be useful, 00021 // but WITHOUT ANY WARRANTY; without even the implied warranty of 00022 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00023 // GNU General Public License for more details. 00024 // 00025 // You should have received a copy of the GNU General Public License 00026 // along with FastJet; if not, write to the Free Software 00027 // Foundation, Inc.: 00028 // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 00029 //---------------------------------------------------------------------- 00030 //ENDHEADER 00031 00032 00033 00034 00035 #include "fastjet/ClusterSequenceAreaBase.hh" 00036 #include <algorithm> 00037 00038 FASTJET_BEGIN_NAMESPACE 00039 00040 using namespace std; 00041 00042 00043 /// allow for warnings 00044 LimitedWarning ClusterSequenceAreaBase::_warnings; 00045 LimitedWarning ClusterSequenceAreaBase::_warnings_zero_area; 00046 LimitedWarning ClusterSequenceAreaBase::_warnings_empty_area; 00047 00048 //---------------------------------------------------------------------- 00049 /// return the total area, within the selector's range, that is free 00050 /// of jets. 00051 /// 00052 /// Calculate this as (range area) - \sum_{i in range} A_i 00053 /// 00054 /// for ClusterSequences with explicit ghosts, assume that there will 00055 /// never be any empty area, i.e. it is always filled in by pure 00056 /// ghosts jets. This holds for seq.rec. algorithms 00057 double ClusterSequenceAreaBase::empty_area(const Selector & selector) const { 00058 00059 if (has_explicit_ghosts()) {return 0.0;} 00060 else { return empty_area_from_jets(inclusive_jets(0.0), selector);} 00061 00062 } 00063 00064 //---------------------------------------------------------------------- 00065 /// return the total area, within range, that is free of jets. 00066 /// 00067 /// Calculate this as (range area) - \sum_{i in range} A_i 00068 /// 00069 double ClusterSequenceAreaBase::empty_area_from_jets( 00070 const std::vector<PseudoJet> & all_jets, 00071 const Selector & selector) const { 00072 _check_selector_good_for_median(selector); 00073 00074 double empty = selector.area(); 00075 for (unsigned i = 0; i < all_jets.size(); i++) { 00076 if (selector.pass(all_jets[i])) empty -= area(all_jets[i]); 00077 } 00078 return empty; 00079 } 00080 00081 double ClusterSequenceAreaBase::median_pt_per_unit_area(const Selector & selector) const { 00082 return median_pt_per_unit_something(selector,false); 00083 } 00084 00085 double ClusterSequenceAreaBase::median_pt_per_unit_area_4vector(const Selector & selector) const { 00086 return median_pt_per_unit_something(selector,true); 00087 } 00088 00089 00090 //---------------------------------------------------------------------- 00091 /// the median of (pt/area) for jets contained within range, counting 00092 /// the empty area as if it were made up of a collection of empty 00093 /// jets each of area (0.55 * pi R^2). 00094 double ClusterSequenceAreaBase::median_pt_per_unit_something( 00095 const Selector & selector, bool use_area_4vector) const { 00096 00097 double median, sigma, mean_area; 00098 get_median_rho_and_sigma(selector, use_area_4vector, median, sigma, mean_area); 00099 return median; 00100 00101 } 00102 00103 00104 //---------------------------------------------------------------------- 00105 /// fits a form pt_per_unit_area(y) = a + b*y^2 for jets in range. 00106 /// exclude_above allows one to exclude large values of pt/area from fit. 00107 /// use_area_4vector = true uses the 4vector areas. 00108 void ClusterSequenceAreaBase::parabolic_pt_per_unit_area( 00109 double & a, double & b, const Selector & selector, 00110 double exclude_above, bool use_area_4vector) const { 00111 // sanity check on the selector: we require a finite area and that 00112 // it applies jet by jet (see BackgroundEstimator for more advanced 00113 // usage) 00114 _check_selector_good_for_median(selector); 00115 00116 int n=0; 00117 int n_excluded = 0; 00118 double mean_f=0, mean_x2=0, mean_x4=0, mean_fx2=0; 00119 00120 vector<PseudoJet> incl_jets = inclusive_jets(); 00121 00122 for (unsigned i = 0; i < incl_jets.size(); i++) { 00123 if (selector.pass(incl_jets[i])) { 00124 double this_area; 00125 if ( use_area_4vector ) { 00126 this_area = area_4vector(incl_jets[i]).perp(); 00127 } else { 00128 this_area = area(incl_jets[i]); 00129 } 00130 double f = incl_jets[i].perp()/this_area; 00131 if (exclude_above <= 0.0 || f < exclude_above) { 00132 double x = incl_jets[i].rap(); double x2 = x*x; 00133 mean_f += f; 00134 mean_x2 += x2; 00135 mean_x4 += x2*x2; 00136 mean_fx2 += f*x2; 00137 n++; 00138 } else { 00139 n_excluded++; 00140 } 00141 } 00142 } 00143 00144 if (n <= 1) { 00145 // meaningful results require at least two jets inside the 00146 // area -- mind you if there are empty jets we should be in 00147 // any case doing something special... 00148 a = 0.0; 00149 b = 0.0; 00150 } else { 00151 mean_f /= n; 00152 mean_x2 /= n; 00153 mean_x4 /= n; 00154 mean_fx2 /= n; 00155 00156 b = (mean_f*mean_x2 - mean_fx2)/(mean_x2*mean_x2 - mean_x4); 00157 a = mean_f - b*mean_x2; 00158 } 00159 //cerr << "n_excluded = "<< n_excluded << endl; 00160 } 00161 00162 00163 00164 void ClusterSequenceAreaBase::get_median_rho_and_sigma( 00165 const Selector & selector, bool use_area_4vector, 00166 double & median, double & sigma, double & mean_area) const { 00167 00168 vector<PseudoJet> incl_jets = inclusive_jets(); 00169 get_median_rho_and_sigma(incl_jets, selector, use_area_4vector, 00170 median, sigma, mean_area, true); 00171 } 00172 00173 00174 void ClusterSequenceAreaBase::get_median_rho_and_sigma( 00175 const vector<PseudoJet> & all_jets, 00176 const Selector & selector, bool use_area_4vector, 00177 double & median, double & sigma, double & mean_area, 00178 bool all_are_incl) const { 00179 00180 _check_jet_alg_good_for_median(); 00181 00182 // sanity check on the selector: we require a finite area and that 00183 // it applies jet by jet (see BackgroundEstimator for more advanced 00184 // usage) 00185 _check_selector_good_for_median(selector); 00186 00187 vector<double> pt_over_areas; 00188 double total_area = 0.0; 00189 double total_njets = 0; 00190 00191 for (unsigned i = 0; i < all_jets.size(); i++) { 00192 if (selector.pass(all_jets[i])) { 00193 double this_area; 00194 if (use_area_4vector) { 00195 this_area = area_4vector(all_jets[i]).perp(); 00196 } else { 00197 this_area = area(all_jets[i]); 00198 } 00199 00200 if (this_area>0) { 00201 pt_over_areas.push_back(all_jets[i].perp()/this_area); 00202 } else { 00203 _warnings_zero_area.warn("ClusterSequenceAreaBase::get_median_rho_and_sigma(...): discarded jet with zero area. Zero-area jets may be due to (i) too large a ghost area (ii) a jet being outside the ghost range (iii) the computation not being done using an appropriate algorithm (kt;C/A)."); 00204 } 00205 00206 total_area += this_area; 00207 total_njets += 1.0; 00208 } 00209 } 00210 00211 // there is nothing inside our region, so answer will always be zero 00212 if (pt_over_areas.size() == 0) { 00213 median = 0.0; 00214 sigma = 0.0; 00215 mean_area = 0.0; 00216 return; 00217 } 00218 00219 // get median (pt/area) [this is the "old" median definition. It considers 00220 // only the "real" jets in calculating the median, i.e. excluding the 00221 // only-ghost ones; it will be supplemented with more info below] 00222 sort(pt_over_areas.begin(), pt_over_areas.end()); 00223 00224 // now get the median & error, accounting for empty jets 00225 // define the fractions of distribution at median, median-1sigma 00226 double posn[2] = {0.5, (1.0-0.6827)/2.0}; 00227 double res[2]; 00228 00229 double n_empty, empty_a; 00230 if (has_explicit_ghosts()) { 00231 // NB: the following lines of code are potentially incorrect in cases 00232 // where there are unclustered particles (empty_area would do a better job, 00233 // at least for active areas). This is not an issue with kt or C/A, or other 00234 // algorithms that cluster all particles (and the median estimation should in 00235 // any case only be done with kt or C/A!) 00236 empty_a = 0.0; 00237 n_empty = 0; 00238 } else if (all_are_incl) { 00239 // the default case 00240 empty_a = empty_area(selector); 00241 n_empty = n_empty_jets(selector); 00242 } else { 00243 // this one is intended to be used when e.g. one runs C/A, then looks at its 00244 // exclusive jets in order to get an effective smaller R value, and passes those 00245 // to this routine. 00246 empty_a = empty_area_from_jets(all_jets, selector); 00247 mean_area = total_area / total_njets; // temporary value 00248 n_empty = empty_a / mean_area; 00249 } 00250 //cout << "*** tot_area = " << total_area << ", empty_a = " << empty_a << endl; 00251 //cout << "*** n_empty = " << n_empty << ", ntotal = " << total_njets << endl; 00252 total_njets += n_empty; 00253 total_area += empty_a; 00254 00255 // we need an int (rather than an unsigned int) with the size of the 00256 // pt_over_areas array, because we'll often be doing subtraction of 00257 // -1, negating it, etc. All of these operations go crazy with unsigned ints. 00258 int pt_over_areas_size = pt_over_areas.size(); 00259 if (n_empty < -pt_over_areas_size/4.0) 00260 _warnings_empty_area.warn("ClusterSequenceAreaBase::get_median_rho_and_sigma(...): the estimated empty area is suspiciously large and negative and may lead to an over-estimation of rho. This may be due to (i) a rare statistical fluctuation or (ii) too small a range used to estimate the background properties."); 00261 00262 for (int i = 0; i < 2; i++) { 00263 double nj_median_pos = 00264 (pt_over_areas_size-1.0 + n_empty)*posn[i] - n_empty; 00265 double nj_median_ratio; 00266 if (nj_median_pos >= 0 && pt_over_areas_size > 1) { 00267 int int_nj_median = int(nj_median_pos); 00268 00269 // avoid potential overflow issues 00270 if (int_nj_median+1 > pt_over_areas_size-1){ 00271 int_nj_median = pt_over_areas_size-2; 00272 nj_median_pos = pt_over_areas_size-1; 00273 } 00274 00275 nj_median_ratio = 00276 pt_over_areas[int_nj_median] * (int_nj_median+1-nj_median_pos) 00277 + pt_over_areas[int_nj_median+1] * (nj_median_pos - int_nj_median); 00278 } else { 00279 nj_median_ratio = 0.0; 00280 } 00281 res[i] = nj_median_ratio; 00282 } 00283 median = res[0]; 00284 double error = res[0] - res[1]; 00285 mean_area = total_area / total_njets; 00286 sigma = error * sqrt(mean_area); 00287 } 00288 00289 00290 /// return a vector of all subtracted jets, using area_4vector, given rho. 00291 /// Only inclusive_jets above ptmin are subtracted and returned. 00292 /// the ordering is the same as that of sorted_by_pt(cs.inclusive_jets()), 00293 /// i.e. not necessarily ordered in pt once subtracted 00294 vector<PseudoJet> ClusterSequenceAreaBase::subtracted_jets(const double rho, 00295 const double ptmin) 00296 const { 00297 vector<PseudoJet> sub_jets; 00298 vector<PseudoJet> jets = sorted_by_pt(inclusive_jets(ptmin)); 00299 for (unsigned i=0; i<jets.size(); i++) { 00300 PseudoJet sub_jet = subtracted_jet(jets[i],rho); 00301 sub_jets.push_back(sub_jet); 00302 } 00303 return sub_jets; 00304 } 00305 00306 /// return a vector of subtracted jets, using area_4vector. 00307 /// Only inclusive_jets above ptmin are subtracted and returned. 00308 /// the ordering is the same as that of sorted_by_pt(cs.inclusive_jets()), 00309 /// i.e. not necessarily ordered in pt once subtracted 00310 vector<PseudoJet> ClusterSequenceAreaBase::subtracted_jets( 00311 const Selector & selector, 00312 const double ptmin) 00313 const { 00314 double rho = median_pt_per_unit_area_4vector(selector); 00315 return subtracted_jets(rho,ptmin); 00316 } 00317 00318 00319 /// return a subtracted jet, using area_4vector, given rho 00320 PseudoJet ClusterSequenceAreaBase::subtracted_jet(const PseudoJet & jet, 00321 const double rho) const { 00322 PseudoJet area4vect = area_4vector(jet); 00323 PseudoJet sub_jet; 00324 // sanity check 00325 if (rho*area4vect.perp() < jet.perp() ) { 00326 sub_jet = jet - rho*area4vect; 00327 } else { sub_jet = PseudoJet(0.0,0.0,0.0,0.0); } 00328 00329 // make sure the subtracted jet has the same index (cluster, user, csw) 00330 // (i.e. "looks like") the original jet 00331 sub_jet.set_cluster_hist_index(jet.cluster_hist_index()); 00332 sub_jet.set_user_index(jet.user_index()); 00333 // do not use CS::_set_structure_shared_ptr here, which should 00334 // only be called to maintain the tally during construction 00335 sub_jet.set_structure_shared_ptr(jet.structure_shared_ptr()); 00336 return sub_jet; 00337 } 00338 00339 00340 /// return a subtracted jet, using area_4vector; note that this is 00341 /// potentially inefficient if repeatedly used for many different 00342 /// jets, because rho will be recalculated each time around. 00343 PseudoJet ClusterSequenceAreaBase::subtracted_jet(const PseudoJet & jet, 00344 const Selector & selector) const { 00345 double rho = median_pt_per_unit_area_4vector(selector); 00346 PseudoJet sub_jet = subtracted_jet(jet, rho); 00347 return sub_jet; 00348 } 00349 00350 00351 /// return the subtracted pt, given rho 00352 double ClusterSequenceAreaBase::subtracted_pt(const PseudoJet & jet, 00353 const double rho, 00354 bool use_area_4vector) const { 00355 if ( use_area_4vector ) { 00356 PseudoJet sub_jet = subtracted_jet(jet,rho); 00357 return sub_jet.perp(); 00358 } else { 00359 return jet.perp() - rho*area(jet); 00360 } 00361 } 00362 00363 00364 /// return the subtracted pt; note that this is 00365 /// potentially inefficient if repeatedly used for many different 00366 /// jets, because rho will be recalculated each time around. 00367 double ClusterSequenceAreaBase::subtracted_pt(const PseudoJet & jet, 00368 const Selector & selector, 00369 bool use_area_4vector) const { 00370 if ( use_area_4vector ) { 00371 PseudoJet sub_jet = subtracted_jet(jet,selector); 00372 return sub_jet.perp(); 00373 } else { 00374 double rho = median_pt_per_unit_area(selector); 00375 return subtracted_pt(jet,rho,false); 00376 } 00377 } 00378 00379 // check the selector is suited for the computations i.e. applies jet 00380 // by jet and has a finite area 00381 void ClusterSequenceAreaBase::_check_selector_good_for_median(const Selector &selector) const{ 00382 // make sure the selector has a finite area 00383 if ((! has_explicit_ghosts()) && (! selector.has_finite_area())){ 00384 throw Error("ClusterSequenceAreaBase: empty area can only be computed from selectors with a finite area"); 00385 } 00386 00387 // make sure the selector applies jet by jet 00388 if (! selector.applies_jet_by_jet()){ 00389 throw Error("ClusterSequenceAreaBase: empty area can only be computed from selectors that apply jet by jet"); 00390 } 00391 } 00392 00393 00394 /// check the jet algorithm is suitable (and if not issue a warning) 00395 void ClusterSequenceAreaBase::_check_jet_alg_good_for_median() const { 00396 if (jet_def().jet_algorithm() != kt_algorithm 00397 && jet_def().jet_algorithm() != cambridge_algorithm 00398 && jet_def().jet_algorithm() != cambridge_for_passive_algorithm) { 00399 _warnings.warn("ClusterSequenceAreaBase: jet_def being used may not be suitable for estimating diffuse backgrounds (good options are kt, cam)"); 00400 } 00401 } 00402 00403 00404 00405 FASTJET_END_NAMESPACE