FastJet 3.0beta1
ClusterSequenceAreaBase.cc
00001 
00002 //STARTHEADER
00003 // $Id: ClusterSequenceAreaBase.cc 2414 2011-07-14 06:16:33Z salam $
00004 //
00005 // Copyright (c) 2005-2006, Matteo Cacciari and Gavin Salam
00006 //
00007 //----------------------------------------------------------------------
00008 // This file is part of FastJet.
00009 //
00010 //  FastJet is free software; you can redistribute it and/or modify
00011 //  it under the terms of the GNU General Public License as published by
00012 //  the Free Software Foundation; either version 2 of the License, or
00013 //  (at your option) any later version.
00014 //
00015 //  The algorithms that underlie FastJet have required considerable
00016 //  development and are described in hep-ph/0512210. If you use
00017 //  FastJet as part of work towards a scientific publication, please
00018 //  include a citation to the FastJet paper.
00019 //
00020 //  FastJet is distributed in the hope that it will be useful,
00021 //  but WITHOUT ANY WARRANTY; without even the implied warranty of
00022 //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00023 //  GNU General Public License for more details.
00024 //
00025 //  You should have received a copy of the GNU General Public License
00026 //  along with FastJet; if not, write to the Free Software
00027 //  Foundation, Inc.:
00028 //      59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00029 //----------------------------------------------------------------------
00030 //ENDHEADER
00031 
00032 
00033 
00034 
00035 #include "fastjet/ClusterSequenceAreaBase.hh"
00036 #include <algorithm>
00037 
00038 FASTJET_BEGIN_NAMESPACE
00039 
00040 using namespace std;
00041 
00042 
00043 /// allow for warnings
00044 LimitedWarning ClusterSequenceAreaBase::_warnings;
00045 LimitedWarning ClusterSequenceAreaBase::_warnings_zero_area;
00046 LimitedWarning ClusterSequenceAreaBase::_warnings_empty_area;
00047 
00048 //----------------------------------------------------------------------
00049 /// return the total area, within the selector's range, that is free
00050 /// of jets.
00051 /// 
00052 /// Calculate this as (range area) - \sum_{i in range} A_i
00053 ///
00054 /// for ClusterSequences with explicit ghosts, assume that there will
00055 /// never be any empty area, i.e. it is always filled in by pure
00056 /// ghosts jets. This holds for seq.rec. algorithms
00057 double ClusterSequenceAreaBase::empty_area(const Selector & selector) const {
00058 
00059   if (has_explicit_ghosts()) {return 0.0;}
00060   else { return empty_area_from_jets(inclusive_jets(0.0), selector);}
00061 
00062 }
00063 
00064 //----------------------------------------------------------------------
00065 /// return the total area, within range, that is free of jets.
00066 /// 
00067 /// Calculate this as (range area) - \sum_{i in range} A_i
00068 ///
00069 double ClusterSequenceAreaBase::empty_area_from_jets(
00070                       const std::vector<PseudoJet> & all_jets,
00071                       const Selector & selector) const {
00072   _check_selector_good_for_median(selector);
00073 
00074   double empty = selector.area();
00075   for (unsigned i = 0; i < all_jets.size(); i++) {
00076     if (selector.pass(all_jets[i])) empty -= area(all_jets[i]);
00077   }
00078   return empty;
00079 }
00080 
00081 double ClusterSequenceAreaBase::median_pt_per_unit_area(const Selector & selector) const {
00082   return median_pt_per_unit_something(selector,false);
00083 }
00084 
00085 double ClusterSequenceAreaBase::median_pt_per_unit_area_4vector(const Selector & selector) const {
00086   return median_pt_per_unit_something(selector,true);
00087 }
00088 
00089 
00090 //----------------------------------------------------------------------
00091 /// the median of (pt/area) for jets contained within range, counting
00092 /// the empty area as if it were made up of a collection of empty
00093 /// jets each of area (0.55 * pi R^2).
00094 double ClusterSequenceAreaBase::median_pt_per_unit_something(
00095                 const Selector & selector, bool use_area_4vector) const {
00096 
00097   double median, sigma, mean_area;
00098   get_median_rho_and_sigma(selector, use_area_4vector, median, sigma, mean_area);
00099   return median;
00100 
00101 }
00102 
00103 
00104 //----------------------------------------------------------------------
00105 /// fits a form pt_per_unit_area(y) = a + b*y^2 for jets in range. 
00106 /// exclude_above allows one to exclude large values of pt/area from fit. 
00107 /// use_area_4vector = true uses the 4vector areas.
00108 void ClusterSequenceAreaBase::parabolic_pt_per_unit_area(
00109        double & a, double & b, const Selector & selector, 
00110        double exclude_above, bool use_area_4vector) const {
00111   // sanity check on the selector: we require a finite area and that
00112   // it applies jet by jet (see BackgroundEstimator for more advanced
00113   // usage)
00114   _check_selector_good_for_median(selector);
00115 
00116   int n=0;
00117   int n_excluded = 0;
00118   double mean_f=0, mean_x2=0, mean_x4=0, mean_fx2=0; 
00119 
00120   vector<PseudoJet> incl_jets = inclusive_jets();
00121 
00122   for (unsigned i = 0; i < incl_jets.size(); i++) {
00123     if (selector.pass(incl_jets[i])) {
00124       double this_area;
00125       if ( use_area_4vector ) {
00126           this_area = area_4vector(incl_jets[i]).perp();     
00127       } else {
00128           this_area = area(incl_jets[i]);
00129       }
00130       double f = incl_jets[i].perp()/this_area;
00131       if (exclude_above <= 0.0 || f < exclude_above) {
00132         double x = incl_jets[i].rap(); double x2 = x*x;
00133         mean_f   += f;
00134         mean_x2  += x2;
00135         mean_x4  += x2*x2;
00136         mean_fx2 += f*x2;
00137         n++;
00138       } else {
00139         n_excluded++;
00140       }
00141     }
00142   }
00143 
00144   if (n <= 1) {
00145     // meaningful results require at least two jets inside the
00146     // area -- mind you if there are empty jets we should be in 
00147     // any case doing something special...
00148     a = 0.0;
00149     b = 0.0;
00150   } else {
00151     mean_f   /= n;
00152     mean_x2  /= n;
00153     mean_x4  /= n;
00154     mean_fx2 /= n;
00155     
00156     b = (mean_f*mean_x2 - mean_fx2)/(mean_x2*mean_x2 - mean_x4);
00157     a = mean_f - b*mean_x2;
00158   }
00159   //cerr << "n_excluded = "<< n_excluded << endl;
00160 }
00161 
00162 
00163 
00164 void ClusterSequenceAreaBase::get_median_rho_and_sigma(
00165             const Selector & selector, bool use_area_4vector,
00166             double & median, double & sigma, double & mean_area) const {
00167 
00168   vector<PseudoJet> incl_jets = inclusive_jets();
00169   get_median_rho_and_sigma(incl_jets, selector, use_area_4vector,
00170                            median, sigma, mean_area, true);
00171 }
00172 
00173 
00174 void ClusterSequenceAreaBase::get_median_rho_and_sigma(
00175             const vector<PseudoJet> & all_jets,
00176             const Selector & selector, bool use_area_4vector,
00177             double & median, double & sigma, double & mean_area,
00178             bool all_are_incl) const {
00179 
00180   _check_jet_alg_good_for_median();
00181 
00182   // sanity check on the selector: we require a finite area and that
00183   // it applies jet by jet (see BackgroundEstimator for more advanced
00184   // usage)
00185   _check_selector_good_for_median(selector);
00186 
00187   vector<double> pt_over_areas;
00188   double total_area  = 0.0;
00189   double total_njets = 0;
00190 
00191   for (unsigned i = 0; i < all_jets.size(); i++) {
00192     if (selector.pass(all_jets[i])) {
00193       double this_area;
00194       if (use_area_4vector) {
00195           this_area = area_4vector(all_jets[i]).perp();
00196       } else {
00197           this_area = area(all_jets[i]);
00198       }
00199 
00200       if (this_area>0) {
00201         pt_over_areas.push_back(all_jets[i].perp()/this_area);
00202       } else {
00203         _warnings_zero_area.warn("ClusterSequenceAreaBase::get_median_rho_and_sigma(...): discarded jet with zero area. Zero-area jets may be due to (i) too large a ghost area (ii) a jet being outside the ghost range (iii) the computation not being done using an appropriate algorithm (kt;C/A).");
00204       }
00205 
00206       total_area  += this_area;
00207       total_njets += 1.0;
00208     }
00209   }
00210 
00211   // there is nothing inside our region, so answer will always be zero
00212   if (pt_over_areas.size() == 0) {
00213     median = 0.0;
00214     sigma  = 0.0;
00215     mean_area = 0.0;
00216     return;
00217   }
00218   
00219   // get median (pt/area) [this is the "old" median definition. It considers
00220   // only the "real" jets in calculating the median, i.e. excluding the
00221   // only-ghost ones; it will be supplemented with more info below]
00222   sort(pt_over_areas.begin(), pt_over_areas.end());
00223 
00224   // now get the median & error, accounting for empty jets
00225   // define the fractions of distribution at median, median-1sigma
00226   double posn[2] = {0.5, (1.0-0.6827)/2.0};
00227   double res[2];
00228   
00229   double n_empty, empty_a;
00230   if (has_explicit_ghosts()) {
00231     // NB: the following lines of code are potentially incorrect in cases
00232     //     where there are unclustered particles (empty_area would do a better job,
00233     //     at least for active areas). This is not an issue with kt or C/A, or other
00234     //     algorithms that cluster all particles (and the median estimation should in 
00235     //     any case only be done with kt or C/A!)
00236     empty_a = 0.0;
00237     n_empty = 0;
00238   } else if (all_are_incl) {
00239     // the default case
00240     empty_a = empty_area(selector);
00241     n_empty = n_empty_jets(selector);
00242   } else {
00243     // this one is intended to be used when e.g. one runs C/A, then looks at its
00244     // exclusive jets in order to get an effective smaller R value, and passes those
00245     // to this routine.
00246     empty_a = empty_area_from_jets(all_jets, selector);
00247     mean_area = total_area / total_njets; // temporary value
00248     n_empty   = empty_a / mean_area;
00249   }
00250   //cout << "*** tot_area = " << total_area << ", empty_a = " << empty_a << endl;
00251   //cout << "*** n_empty = " << n_empty << ", ntotal =  " << total_njets << endl;
00252   total_njets += n_empty;
00253   total_area  += empty_a;
00254 
00255   // we need an int (rather than an unsigned int) with the size of the
00256   // pt_over_areas array, because we'll often be doing subtraction of
00257   // -1, negating it, etc. All of these operations go crazy with unsigned ints.
00258   int pt_over_areas_size = pt_over_areas.size();
00259   if (n_empty < -pt_over_areas_size/4.0)
00260     _warnings_empty_area.warn("ClusterSequenceAreaBase::get_median_rho_and_sigma(...): the estimated empty area is suspiciously large and negative and may lead to an over-estimation of rho. This may be due to (i) a rare statistical fluctuation or (ii) too small a range used to estimate the background properties.");
00261 
00262   for (int i = 0; i < 2; i++) {
00263     double nj_median_pos = 
00264       (pt_over_areas_size-1.0 + n_empty)*posn[i] - n_empty;
00265     double nj_median_ratio;
00266     if (nj_median_pos >= 0 && pt_over_areas_size > 1) {
00267       int int_nj_median = int(nj_median_pos);
00268  
00269      // avoid potential overflow issues
00270       if (int_nj_median+1 > pt_over_areas_size-1){
00271         int_nj_median = pt_over_areas_size-2;
00272         nj_median_pos = pt_over_areas_size-1;
00273       }
00274 
00275       nj_median_ratio = 
00276         pt_over_areas[int_nj_median] * (int_nj_median+1-nj_median_pos)
00277         + pt_over_areas[int_nj_median+1] * (nj_median_pos - int_nj_median);
00278     } else {
00279       nj_median_ratio = 0.0;
00280     }
00281     res[i] = nj_median_ratio;
00282   }
00283   median = res[0];
00284   double error  = res[0] - res[1];
00285   mean_area = total_area / total_njets;
00286   sigma  = error * sqrt(mean_area);
00287 }
00288 
00289 
00290 /// return a vector of all subtracted jets, using area_4vector, given rho.
00291 /// Only inclusive_jets above ptmin are subtracted and returned.
00292 /// the ordering is the same as that of sorted_by_pt(cs.inclusive_jets()),
00293 /// i.e. not necessarily ordered in pt once subtracted
00294 vector<PseudoJet> ClusterSequenceAreaBase::subtracted_jets(const double rho,
00295                                                            const double ptmin) 
00296                                                            const {
00297   vector<PseudoJet> sub_jets;
00298   vector<PseudoJet> jets = sorted_by_pt(inclusive_jets(ptmin));
00299   for (unsigned i=0; i<jets.size(); i++) {
00300      PseudoJet sub_jet = subtracted_jet(jets[i],rho);
00301      sub_jets.push_back(sub_jet);
00302   }
00303   return sub_jets;
00304 }
00305 
00306 /// return a vector of subtracted jets, using area_4vector.
00307 /// Only inclusive_jets above ptmin are subtracted and returned.
00308 /// the ordering is the same as that of sorted_by_pt(cs.inclusive_jets()),
00309 /// i.e. not necessarily ordered in pt once subtracted
00310 vector<PseudoJet> ClusterSequenceAreaBase::subtracted_jets(
00311                                                  const Selector & selector, 
00312                                                  const double ptmin)
00313                                                  const {
00314   double rho = median_pt_per_unit_area_4vector(selector);
00315   return subtracted_jets(rho,ptmin);
00316 }
00317 
00318 
00319 /// return a subtracted jet, using area_4vector, given rho
00320 PseudoJet ClusterSequenceAreaBase::subtracted_jet(const PseudoJet & jet,
00321                                                   const double rho) const {
00322   PseudoJet area4vect = area_4vector(jet);
00323   PseudoJet sub_jet;
00324   // sanity check
00325   if (rho*area4vect.perp() < jet.perp() ) { 
00326     sub_jet = jet - rho*area4vect;
00327   } else { sub_jet = PseudoJet(0.0,0.0,0.0,0.0); }
00328   
00329   // make sure the subtracted jet has the same index (cluster, user, csw)
00330   // (i.e. "looks like") the original jet
00331   sub_jet.set_cluster_hist_index(jet.cluster_hist_index());
00332   sub_jet.set_user_index(jet.user_index());
00333   // do not use CS::_set_structure_shared_ptr here, which should
00334   // only be called to maintain the tally during construction
00335   sub_jet.set_structure_shared_ptr(jet.structure_shared_ptr());
00336   return sub_jet;
00337 }
00338 
00339 
00340 /// return a subtracted jet, using area_4vector;  note that this is
00341 /// potentially inefficient if repeatedly used for many different
00342 /// jets, because rho will be recalculated each time around.
00343 PseudoJet ClusterSequenceAreaBase::subtracted_jet(const PseudoJet & jet,
00344                                        const Selector & selector) const {
00345   double rho = median_pt_per_unit_area_4vector(selector);
00346   PseudoJet sub_jet = subtracted_jet(jet, rho);
00347   return sub_jet;
00348 }
00349 
00350 
00351 /// return the subtracted pt, given rho
00352 double ClusterSequenceAreaBase::subtracted_pt(const PseudoJet & jet,
00353                                               const double rho,
00354                                               bool use_area_4vector) const {
00355   if ( use_area_4vector ) { 
00356      PseudoJet sub_jet = subtracted_jet(jet,rho);
00357      return sub_jet.perp();
00358   } else {
00359      return jet.perp() - rho*area(jet);
00360   }
00361 }  
00362 
00363 
00364 /// return the subtracted pt; note that this is
00365 /// potentially inefficient if repeatedly used for many different
00366 /// jets, because rho will be recalculated each time around.
00367 double ClusterSequenceAreaBase::subtracted_pt(const PseudoJet & jet,
00368                                               const Selector & selector,
00369                                               bool use_area_4vector) const {
00370   if ( use_area_4vector ) { 
00371      PseudoJet sub_jet = subtracted_jet(jet,selector);
00372      return sub_jet.perp();
00373   } else {
00374      double rho = median_pt_per_unit_area(selector);
00375      return subtracted_pt(jet,rho,false);
00376   }
00377 }  
00378 
00379 // check the selector is suited for the computations i.e. applies jet
00380 // by jet and has a finite area
00381 void ClusterSequenceAreaBase::_check_selector_good_for_median(const Selector &selector) const{
00382   // make sure the selector has a finite area
00383   if ((! has_explicit_ghosts()) &&  (! selector.has_finite_area())){
00384     throw Error("ClusterSequenceAreaBase: empty area can only be computed from selectors with a finite area");
00385   }
00386 
00387   // make sure the selector applies jet by jet
00388   if (! selector.applies_jet_by_jet()){
00389     throw Error("ClusterSequenceAreaBase: empty area can only be computed from selectors that apply jet by jet");
00390   }
00391 }
00392 
00393 
00394 /// check the jet algorithm is suitable (and if not issue a warning)
00395 void ClusterSequenceAreaBase::_check_jet_alg_good_for_median() const {
00396   if (jet_def().jet_algorithm() != kt_algorithm
00397       && jet_def().jet_algorithm() != cambridge_algorithm
00398       && jet_def().jet_algorithm() !=  cambridge_for_passive_algorithm) {
00399     _warnings.warn("ClusterSequenceAreaBase: jet_def being used may not be suitable for estimating diffuse backgrounds (good options are kt, cam)");
00400   }
00401 }
00402 
00403 
00404 
00405 FASTJET_END_NAMESPACE
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends