FastJet 3.0.0
BackgroundEstimatorBase.cc
00001 //STARTHEADER
00002 // $Id: BackgroundEstimatorBase.cc 2616 2011-09-30 18:03:40Z salam $
00003 //
00004 // Copyright (c) 2005-2011, Matteo Cacciari, Gavin P. Salam and Gregory Soyez
00005 //
00006 //----------------------------------------------------------------------
00007 // This file is part of FastJet.
00008 //
00009 //  FastJet is free software; you can redistribute it and/or modify
00010 //  it under the terms of the GNU General Public License as published by
00011 //  the Free Software Foundation; either version 2 of the License, or
00012 //  (at your option) any later version.
00013 //
00014 //  The algorithms that underlie FastJet have required considerable
00015 //  development and are described in hep-ph/0512210. If you use
00016 //  FastJet as part of work towards a scientific publication, please
00017 //  include a citation to the FastJet paper.
00018 //
00019 //  FastJet is distributed in the hope that it will be useful,
00020 //  but WITHOUT ANY WARRANTY; without even the implied warranty of
00021 //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00022 //  GNU General Public License for more details.
00023 //
00024 //  You should have received a copy of the GNU General Public License
00025 //  along with FastJet. If not, see <http://www.gnu.org/licenses/>.
00026 //----------------------------------------------------------------------
00027 //ENDHEADER
00028 
00029 
00030 #include "fastjet/tools/BackgroundEstimatorBase.hh"
00031 
00032 using namespace std;
00033 
00034 FASTJET_BEGIN_NAMESPACE      // defined in fastjet/internal/base.hh
00035 
00036 LimitedWarning BackgroundEstimatorBase::_warnings_empty_area;
00037 
00038 //----------------------------------------------------------------------
00039 // given a quantity in a vector (e.g. pt_over_area) and knowledge
00040 // about the number of empty jets, calculate the median and
00041 // stand_dev_if_gaussian (roughly from the 16th percentile)
00042 //
00043 // If do_fj2_calculation is set to true then this performs FastJet
00044 // 2.X estimation of the standard deviation, which has a spurious
00045 // offset in the limit of a small number of jets.
00046 void BackgroundEstimatorBase::_median_and_stddev(const vector<double> & quantity_vector, 
00047                                                  double n_empty_jets, 
00048                                                  double & median, 
00049                                                  double & stand_dev_if_gaussian,
00050                                                  bool do_fj2_calculation) const {
00051 
00052   // this check is redundant (the code below behaves sensibly even
00053   // with a zero size), but serves as a reminder of what happens if
00054   // the quantity vector is zero-sized
00055   if (quantity_vector.size() == 0) {
00056     median = 0;
00057     stand_dev_if_gaussian = 0;
00058     return;
00059   }
00060 
00061   vector<double> sorted_quantity_vector = quantity_vector;
00062   sort(sorted_quantity_vector.begin(), sorted_quantity_vector.end());
00063 
00064   // empty area can sometimes be negative; with small ranges this can
00065   // become pathological, so warn the user
00066   int n_jets_used = sorted_quantity_vector.size();
00067   if (n_empty_jets < -n_jets_used/4.0)
00068     _warnings_empty_area.warn("BackgroundEstimatorBase::_median_and_stddev(...): the estimated empty area is suspiciously large and negative and may lead to an over-estimation of rho. This may be due to (i) a rare statistical fluctuation or (ii) too small a range used to estimate the background properties.");
00069 
00070   // now get the median & error, accounting for empty jets;
00071   // define the fractions of distribution at median, median-1sigma
00072   double posn[2] = {0.5, (1.0-0.6827)/2.0};
00073   double res[2];
00074   for (int i = 0; i < 2; i++) {
00075     res[i] = _percentile(sorted_quantity_vector, posn[i], n_empty_jets, 
00076                          do_fj2_calculation);
00077   }
00078   
00079   median = res[0];
00080   stand_dev_if_gaussian = res[0] - res[1];
00081 }
00082 
00083 
00084 //----------------------------------------------------------------------
00085 // computes a percentile of a given _sorted_ vector of quantities
00086 //  - sorted_quantities        the (sorted) vector contains the data sample
00087 //  - percentile               the percentile (defined between 0 and 1) to compute
00088 //  - nempty                   an additional number of 0's
00089 //                             (considered at the beginning of 
00090 //                             the quantity vector)
00091 //  - do_fj2_calculation       carry out the calculation as it
00092 //                             was done in fj2 (suffers from "edge effects")
00093 double BackgroundEstimatorBase::_percentile(const vector<double> & sorted_quantities, 
00094                                             const double percentile, 
00095                                             const double nempty,
00096                                             const bool do_fj2_calculation
00097                                             ) const {
00098   assert(percentile >= 0.0 && percentile <= 1.0);
00099 
00100   int quantities_size = sorted_quantities.size();
00101   if (quantities_size == 0) return 0;
00102 
00103   double total_njets = quantities_size + nempty;
00104   double percentile_pos;
00105   if (do_fj2_calculation) {
00106     percentile_pos = (total_njets-1)*percentile - nempty;
00107   } else {
00108     percentile_pos = (total_njets)*percentile - nempty - 0.5;
00109   }
00110 
00111   double result;
00112   if (percentile_pos >= 0 && quantities_size > 1) {
00113     int int_percentile_pos = int(percentile_pos);
00114 
00115     // avoid potential overflow issues
00116     if (int_percentile_pos+1 > quantities_size-1){
00117       int_percentile_pos = quantities_size-2;
00118       percentile_pos = quantities_size-1;
00119     }
00120 
00121     result =
00122       sorted_quantities[int_percentile_pos] * (int_percentile_pos+1-percentile_pos)
00123       + sorted_quantities[int_percentile_pos+1] * (percentile_pos - int_percentile_pos);
00124     
00125 
00126   } else if (percentile_pos > -0.5 && quantities_size >= 1 
00127              && !do_fj2_calculation) {
00128     // in the LHS of this "bin", just keep a constant value (we could have
00129     // interpolated to zero, but this might misbehave in cases where all jets
00130     // are active, because it would go to zero too fast)
00131     result = sorted_quantities[0];
00132   } else {
00133     result = 0.0;
00134   }
00135   return result;
00136 
00137 
00138 }
00139 
00140 
00141 FASTJET_END_NAMESPACE        // defined in fastjet/internal/base.hh
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends