FastJet 3.0.6
ClusterSequenceAreaBase.cc
00001 
00002 //STARTHEADER
00003 // $Id: ClusterSequenceAreaBase.cc 2687 2011-11-14 11:17:51Z soyez $
00004 //
00005 // Copyright (c) 2005-2011, Matteo Cacciari, Gavin P. Salam and Gregory Soyez
00006 //
00007 //----------------------------------------------------------------------
00008 // This file is part of FastJet.
00009 //
00010 //  FastJet is free software; you can redistribute it and/or modify
00011 //  it under the terms of the GNU General Public License as published by
00012 //  the Free Software Foundation; either version 2 of the License, or
00013 //  (at your option) any later version.
00014 //
00015 //  The algorithms that underlie FastJet have required considerable
00016 //  development and are described in hep-ph/0512210. If you use
00017 //  FastJet as part of work towards a scientific publication, please
00018 //  include a citation to the FastJet paper.
00019 //
00020 //  FastJet is distributed in the hope that it will be useful,
00021 //  but WITHOUT ANY WARRANTY; without even the implied warranty of
00022 //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00023 //  GNU General Public License for more details.
00024 //
00025 //  You should have received a copy of the GNU General Public License
00026 //  along with FastJet. If not, see <http://www.gnu.org/licenses/>.
00027 //----------------------------------------------------------------------
00028 //ENDHEADER
00029 
00030 
00031 
00032 
00033 #include "fastjet/ClusterSequenceAreaBase.hh"
00034 #include <algorithm>
00035 
00036 FASTJET_BEGIN_NAMESPACE
00037 
00038 using namespace std;
00039 
00040 
00041 /// allow for warnings
00042 LimitedWarning ClusterSequenceAreaBase::_warnings;
00043 LimitedWarning ClusterSequenceAreaBase::_warnings_zero_area;
00044 LimitedWarning ClusterSequenceAreaBase::_warnings_empty_area;
00045 
00046 //----------------------------------------------------------------------
00047 /// return the total area, within the selector's range, that is free
00048 /// of jets.
00049 /// 
00050 /// Calculate this as (range area) - \sum_{i in range} A_i
00051 ///
00052 /// for ClusterSequences with explicit ghosts, assume that there will
00053 /// never be any empty area, i.e. it is always filled in by pure
00054 /// ghosts jets. This holds for seq.rec. algorithms
00055 double ClusterSequenceAreaBase::empty_area(const Selector & selector) const {
00056 
00057   if (has_explicit_ghosts()) {return 0.0;}
00058   else { return empty_area_from_jets(inclusive_jets(0.0), selector);}
00059 
00060 }
00061 
00062 //----------------------------------------------------------------------
00063 /// return the total area, within range, that is free of jets.
00064 /// 
00065 /// Calculate this as (range area) - \sum_{i in range} A_i
00066 ///
00067 double ClusterSequenceAreaBase::empty_area_from_jets(
00068                       const std::vector<PseudoJet> & all_jets,
00069                       const Selector & selector) const {
00070   _check_selector_good_for_median(selector);
00071 
00072   double empty = selector.area();
00073   for (unsigned i = 0; i < all_jets.size(); i++) {
00074     if (selector.pass(all_jets[i])) empty -= area(all_jets[i]);
00075   }
00076   return empty;
00077 }
00078 
00079 double ClusterSequenceAreaBase::median_pt_per_unit_area(const Selector & selector) const {
00080   return median_pt_per_unit_something(selector,false);
00081 }
00082 
00083 double ClusterSequenceAreaBase::median_pt_per_unit_area_4vector(const Selector & selector) const {
00084   return median_pt_per_unit_something(selector,true);
00085 }
00086 
00087 
00088 //----------------------------------------------------------------------
00089 /// the median of (pt/area) for jets contained within range, counting
00090 /// the empty area as if it were made up of a collection of empty
00091 /// jets each of area (0.55 * pi R^2).
00092 double ClusterSequenceAreaBase::median_pt_per_unit_something(
00093                 const Selector & selector, bool use_area_4vector) const {
00094 
00095   double median, sigma, mean_area;
00096   get_median_rho_and_sigma(selector, use_area_4vector, median, sigma, mean_area);
00097   return median;
00098 
00099 }
00100 
00101 
00102 //----------------------------------------------------------------------
00103 /// fits a form pt_per_unit_area(y) = a + b*y^2 for jets in range. 
00104 /// exclude_above allows one to exclude large values of pt/area from fit. 
00105 /// use_area_4vector = true uses the 4vector areas.
00106 void ClusterSequenceAreaBase::parabolic_pt_per_unit_area(
00107        double & a, double & b, const Selector & selector, 
00108        double exclude_above, bool use_area_4vector) const {
00109   // sanity check on the selector: we require a finite area and that
00110   // it applies jet by jet (see BackgroundEstimator for more advanced
00111   // usage)
00112   _check_selector_good_for_median(selector);
00113 
00114   int n=0;
00115   int n_excluded = 0;
00116   double mean_f=0, mean_x2=0, mean_x4=0, mean_fx2=0; 
00117 
00118   vector<PseudoJet> incl_jets = inclusive_jets();
00119 
00120   for (unsigned i = 0; i < incl_jets.size(); i++) {
00121     if (selector.pass(incl_jets[i])) {
00122       double this_area;
00123       if ( use_area_4vector ) {
00124           this_area = area_4vector(incl_jets[i]).perp();     
00125       } else {
00126           this_area = area(incl_jets[i]);
00127       }
00128       double f = incl_jets[i].perp()/this_area;
00129       if (exclude_above <= 0.0 || f < exclude_above) {
00130         double x = incl_jets[i].rap(); double x2 = x*x;
00131         mean_f   += f;
00132         mean_x2  += x2;
00133         mean_x4  += x2*x2;
00134         mean_fx2 += f*x2;
00135         n++;
00136       } else {
00137         n_excluded++;
00138       }
00139     }
00140   }
00141 
00142   if (n <= 1) {
00143     // meaningful results require at least two jets inside the
00144     // area -- mind you if there are empty jets we should be in 
00145     // any case doing something special...
00146     a = 0.0;
00147     b = 0.0;
00148   } else {
00149     mean_f   /= n;
00150     mean_x2  /= n;
00151     mean_x4  /= n;
00152     mean_fx2 /= n;
00153     
00154     b = (mean_f*mean_x2 - mean_fx2)/(mean_x2*mean_x2 - mean_x4);
00155     a = mean_f - b*mean_x2;
00156   }
00157   //cerr << "n_excluded = "<< n_excluded << endl;
00158 }
00159 
00160 
00161 
00162 void ClusterSequenceAreaBase::get_median_rho_and_sigma(
00163             const Selector & selector, bool use_area_4vector,
00164             double & median, double & sigma, double & mean_area) const {
00165 
00166   vector<PseudoJet> incl_jets = inclusive_jets();
00167   get_median_rho_and_sigma(incl_jets, selector, use_area_4vector,
00168                            median, sigma, mean_area, true);
00169 }
00170 
00171 
00172 void ClusterSequenceAreaBase::get_median_rho_and_sigma(
00173             const vector<PseudoJet> & all_jets,
00174             const Selector & selector, bool use_area_4vector,
00175             double & median, double & sigma, double & mean_area,
00176             bool all_are_incl) const {
00177 
00178   _check_jet_alg_good_for_median();
00179 
00180   // sanity check on the selector: we require a finite area and that
00181   // it applies jet by jet (see BackgroundEstimator for more advanced
00182   // usage)
00183   _check_selector_good_for_median(selector);
00184 
00185   vector<double> pt_over_areas;
00186   double total_area  = 0.0;
00187   double total_njets = 0;
00188 
00189   for (unsigned i = 0; i < all_jets.size(); i++) {
00190     if (selector.pass(all_jets[i])) {
00191       double this_area;
00192       if (use_area_4vector) {
00193           this_area = area_4vector(all_jets[i]).perp();
00194       } else {
00195           this_area = area(all_jets[i]);
00196       }
00197 
00198       if (this_area>0) {
00199         pt_over_areas.push_back(all_jets[i].perp()/this_area);
00200       } else {
00201         _warnings_zero_area.warn("ClusterSequenceAreaBase::get_median_rho_and_sigma(...): discarded jet with zero area. Zero-area jets may be due to (i) too large a ghost area (ii) a jet being outside the ghost range (iii) the computation not being done using an appropriate algorithm (kt;C/A).");
00202       }
00203 
00204       total_area  += this_area;
00205       total_njets += 1.0;
00206     }
00207   }
00208 
00209   // there is nothing inside our region, so answer will always be zero
00210   if (pt_over_areas.size() == 0) {
00211     median = 0.0;
00212     sigma  = 0.0;
00213     mean_area = 0.0;
00214     return;
00215   }
00216   
00217   // get median (pt/area) [this is the "old" median definition. It considers
00218   // only the "real" jets in calculating the median, i.e. excluding the
00219   // only-ghost ones; it will be supplemented with more info below]
00220   sort(pt_over_areas.begin(), pt_over_areas.end());
00221 
00222   // now get the median & error, accounting for empty jets
00223   // define the fractions of distribution at median, median-1sigma
00224   double posn[2] = {0.5, (1.0-0.6827)/2.0};
00225   double res[2];
00226   
00227   double n_empty, empty_a;
00228   if (has_explicit_ghosts()) {
00229     // NB: the following lines of code are potentially incorrect in cases
00230     //     where there are unclustered particles (empty_area would do a better job,
00231     //     at least for active areas). This is not an issue with kt or C/A, or other
00232     //     algorithms that cluster all particles (and the median estimation should in 
00233     //     any case only be done with kt or C/A!)
00234     empty_a = 0.0;
00235     n_empty = 0;
00236   } else if (all_are_incl) {
00237     // the default case
00238     empty_a = empty_area(selector);
00239     n_empty = n_empty_jets(selector);
00240   } else {
00241     // this one is intended to be used when e.g. one runs C/A, then looks at its
00242     // exclusive jets in order to get an effective smaller R value, and passes those
00243     // to this routine.
00244     empty_a = empty_area_from_jets(all_jets, selector);
00245     mean_area = total_area / total_njets; // temporary value
00246     n_empty   = empty_a / mean_area;
00247   }
00248   //cout << "*** tot_area = " << total_area << ", empty_a = " << empty_a << endl;
00249   //cout << "*** n_empty = " << n_empty << ", ntotal =  " << total_njets << endl;
00250   total_njets += n_empty;
00251   total_area  += empty_a;
00252 
00253   // we need an int (rather than an unsigned int) with the size of the
00254   // pt_over_areas array, because we'll often be doing subtraction of
00255   // -1, negating it, etc. All of these operations go crazy with unsigned ints.
00256   int pt_over_areas_size = pt_over_areas.size();
00257   if (n_empty < -pt_over_areas_size/4.0)
00258     _warnings_empty_area.warn("ClusterSequenceAreaBase::get_median_rho_and_sigma(...): the estimated empty area is suspiciously large and negative and may lead to an over-estimation of rho. This may be due to (i) a rare statistical fluctuation or (ii) too small a range used to estimate the background properties.");
00259 
00260   for (int i = 0; i < 2; i++) {
00261     double nj_median_pos = 
00262       (pt_over_areas_size-1.0 + n_empty)*posn[i] - n_empty;
00263     double nj_median_ratio;
00264     if (nj_median_pos >= 0 && pt_over_areas_size > 1) {
00265       int int_nj_median = int(nj_median_pos);
00266  
00267      // avoid potential overflow issues
00268       if (int_nj_median+1 > pt_over_areas_size-1){
00269         int_nj_median = pt_over_areas_size-2;
00270         nj_median_pos = pt_over_areas_size-1;
00271       }
00272 
00273       nj_median_ratio = 
00274         pt_over_areas[int_nj_median] * (int_nj_median+1-nj_median_pos)
00275         + pt_over_areas[int_nj_median+1] * (nj_median_pos - int_nj_median);
00276     } else {
00277       nj_median_ratio = 0.0;
00278     }
00279     res[i] = nj_median_ratio;
00280   }
00281   median = res[0];
00282   double error  = res[0] - res[1];
00283   mean_area = total_area / total_njets;
00284   sigma  = error * sqrt(mean_area);
00285 }
00286 
00287 
00288 /// return a vector of all subtracted jets, using area_4vector, given rho.
00289 /// Only inclusive_jets above ptmin are subtracted and returned.
00290 /// the ordering is the same as that of sorted_by_pt(cs.inclusive_jets()),
00291 /// i.e. not necessarily ordered in pt once subtracted
00292 vector<PseudoJet> ClusterSequenceAreaBase::subtracted_jets(const double rho,
00293                                                            const double ptmin) 
00294                                                            const {
00295   vector<PseudoJet> sub_jets;
00296   vector<PseudoJet> jets_local = sorted_by_pt(inclusive_jets(ptmin));
00297   for (unsigned i=0; i<jets_local.size(); i++) {
00298      PseudoJet sub_jet = subtracted_jet(jets_local[i],rho);
00299      sub_jets.push_back(sub_jet);
00300   }
00301   return sub_jets;
00302 }
00303 
00304 /// return a vector of subtracted jets, using area_4vector.
00305 /// Only inclusive_jets above ptmin are subtracted and returned.
00306 /// the ordering is the same as that of sorted_by_pt(cs.inclusive_jets()),
00307 /// i.e. not necessarily ordered in pt once subtracted
00308 vector<PseudoJet> ClusterSequenceAreaBase::subtracted_jets(
00309                                                  const Selector & selector, 
00310                                                  const double ptmin)
00311                                                  const {
00312   double rho = median_pt_per_unit_area_4vector(selector);
00313   return subtracted_jets(rho,ptmin);
00314 }
00315 
00316 
00317 /// return a subtracted jet, using area_4vector, given rho
00318 PseudoJet ClusterSequenceAreaBase::subtracted_jet(const PseudoJet & jet,
00319                                                   const double rho) const {
00320   PseudoJet area4vect = area_4vector(jet);
00321   PseudoJet sub_jet;
00322   // sanity check
00323   if (rho*area4vect.perp() < jet.perp() ) { 
00324     sub_jet = jet - rho*area4vect;
00325   } else { sub_jet = PseudoJet(0.0,0.0,0.0,0.0); }
00326   
00327   // make sure the subtracted jet has the same index (cluster, user, csw)
00328   // (i.e. "looks like") the original jet
00329   sub_jet.set_cluster_hist_index(jet.cluster_hist_index());
00330   sub_jet.set_user_index(jet.user_index());
00331   // do not use CS::_set_structure_shared_ptr here, which should
00332   // only be called to maintain the tally during construction
00333   sub_jet.set_structure_shared_ptr(jet.structure_shared_ptr());
00334   return sub_jet;
00335 }
00336 
00337 
00338 /// return a subtracted jet, using area_4vector;  note that this is
00339 /// potentially inefficient if repeatedly used for many different
00340 /// jets, because rho will be recalculated each time around.
00341 PseudoJet ClusterSequenceAreaBase::subtracted_jet(const PseudoJet & jet,
00342                                        const Selector & selector) const {
00343   double rho = median_pt_per_unit_area_4vector(selector);
00344   PseudoJet sub_jet = subtracted_jet(jet, rho);
00345   return sub_jet;
00346 }
00347 
00348 
00349 /// return the subtracted pt, given rho
00350 double ClusterSequenceAreaBase::subtracted_pt(const PseudoJet & jet,
00351                                               const double rho,
00352                                               bool use_area_4vector) const {
00353   if ( use_area_4vector ) { 
00354      PseudoJet sub_jet = subtracted_jet(jet,rho);
00355      return sub_jet.perp();
00356   } else {
00357      return jet.perp() - rho*area(jet);
00358   }
00359 }  
00360 
00361 
00362 /// return the subtracted pt; note that this is
00363 /// potentially inefficient if repeatedly used for many different
00364 /// jets, because rho will be recalculated each time around.
00365 double ClusterSequenceAreaBase::subtracted_pt(const PseudoJet & jet,
00366                                               const Selector & selector,
00367                                               bool use_area_4vector) const {
00368   if ( use_area_4vector ) { 
00369      PseudoJet sub_jet = subtracted_jet(jet,selector);
00370      return sub_jet.perp();
00371   } else {
00372      double rho = median_pt_per_unit_area(selector);
00373      return subtracted_pt(jet,rho,false);
00374   }
00375 }  
00376 
00377 // check the selector is suited for the computations i.e. applies jet
00378 // by jet and has a finite area
00379 void ClusterSequenceAreaBase::_check_selector_good_for_median(const Selector &selector) const{
00380   // make sure the selector has a finite area
00381   if ((! has_explicit_ghosts()) &&  (! selector.has_finite_area())){
00382     throw Error("ClusterSequenceAreaBase: empty area can only be computed from selectors with a finite area");
00383   }
00384 
00385   // make sure the selector applies jet by jet
00386   if (! selector.applies_jet_by_jet()){
00387     throw Error("ClusterSequenceAreaBase: empty area can only be computed from selectors that apply jet by jet");
00388   }
00389 }
00390 
00391 
00392 /// check the jet algorithm is suitable (and if not issue a warning)
00393 void ClusterSequenceAreaBase::_check_jet_alg_good_for_median() const {
00394   if (jet_def().jet_algorithm() != kt_algorithm
00395       && jet_def().jet_algorithm() != cambridge_algorithm
00396       && jet_def().jet_algorithm() !=  cambridge_for_passive_algorithm) {
00397     _warnings.warn("ClusterSequenceAreaBase: jet_def being used may not be suitable for estimating diffuse backgrounds (good options are kt, cam)");
00398   }
00399 }
00400 
00401 
00402 
00403 FASTJET_END_NAMESPACE
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends