FastJet 3.0beta1
BackgroundEstimatorBase.cc
00001 //STARTHEADER
00002 // $Id: BackgroundEstimatorBase.cc 2439 2011-07-21 08:13:17Z soyez $
00003 //
00004 // Copyright (c) 2005-2011, Matteo Cacciari, Gavin Salam and Gregory Soyez
00005 //
00006 //----------------------------------------------------------------------
00007 // This file is part of FastJet.
00008 //
00009 //  FastJet is free software; you can redistribute it and/or modify
00010 //  it under the terms of the GNU General Public License as published by
00011 //  the Free Software Foundation; either version 2 of the License, or
00012 //  (at your option) any later version.
00013 //
00014 //  The algorithms that underlie FastJet have required considerable
00015 //  development and are described in hep-ph/0512210. If you use
00016 //  FastJet as part of work towards a scientific publication, please
00017 //  include a citation to the FastJet paper.
00018 //
00019 //  FastJet is distributed in the hope that it will be useful,
00020 //  but WITHOUT ANY WARRANTY; without even the implied warranty of
00021 //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00022 //  GNU General Public License for more details.
00023 //
00024 //  You should have received a copy of the GNU General Public License
00025 //  along with FastJet; if not, write to the Free Software
00026 //  Foundation, Inc.:
00027 //      59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00028 //----------------------------------------------------------------------
00029 //ENDHEADER
00030 
00031 
00032 #include "fastjet/tools/BackgroundEstimatorBase.hh"
00033 
00034 using namespace std;
00035 
00036 FASTJET_BEGIN_NAMESPACE      // defined in fastjet/internal/base.hh
00037 
00038 LimitedWarning BackgroundEstimatorBase::_warnings_empty_area;
00039 
00040 //----------------------------------------------------------------------
00041 // given a quantity in a vector (e.g. pt_over_area) and knowledge
00042 // about the number of empty jets, calculate the median and
00043 // stand_dev_if_gaussian (roughly from the 16th percentile)
00044 //
00045 // If do_fj2_calculation is set to true then this performs FastJet
00046 // 2.X estimation of the standard deviation, which has a spurious
00047 // offset in the limit of a small number of jets.
00048 void BackgroundEstimatorBase::_median_and_stddev(const vector<double> & quantity_vector, 
00049                                                  double n_empty_jets, 
00050                                                  double & median, 
00051                                                  double & stand_dev_if_gaussian,
00052                                                  bool do_fj2_calculation) const {
00053 
00054   // this check is redundant (the code below behaves sensibly even
00055   // with a zero size), but serves as a reminder of what happens if
00056   // the quantity vector is zero-sized
00057   if (quantity_vector.size() == 0) {
00058     median = 0;
00059     stand_dev_if_gaussian = 0;
00060     return;
00061   }
00062 
00063   vector<double> sorted_quantity_vector = quantity_vector;
00064   sort(sorted_quantity_vector.begin(), sorted_quantity_vector.end());
00065 
00066   // empty area can sometimes be negative; with small ranges this can
00067   // become pathological, so warn the user
00068   int n_jets_used = sorted_quantity_vector.size();
00069   if (n_empty_jets < -n_jets_used/4.0)
00070     _warnings_empty_area.warn("BackgroundEstimatorBase::_median_and_stddev(...): the estimated empty area is suspiciously large and negative and may lead to an over-estimation of rho. This may be due to (i) a rare statistical fluctuation or (ii) too small a range used to estimate the background properties.");
00071 
00072   // now get the median & error, accounting for empty jets;
00073   // define the fractions of distribution at median, median-1sigma
00074   double posn[2] = {0.5, (1.0-0.6827)/2.0};
00075   double res[2];
00076   for (int i = 0; i < 2; i++) {
00077     res[i] = _percentile(sorted_quantity_vector, posn[i], n_empty_jets, 
00078                          do_fj2_calculation);
00079   }
00080   
00081   median = res[0];
00082   stand_dev_if_gaussian = res[0] - res[1];
00083 }
00084 
00085 
00086 //----------------------------------------------------------------------
00087 // computes a percentile of a given _sorted_ vector of quantities
00088 //  - sorted_quantities        the (sorted) vector contains the data sample
00089 //  - percentile               the percentile (defined between 0 and 1) to compute
00090 //  - nempty                   an additional number of 0's
00091 //                             (considered at the beginning of 
00092 //                             the quantity vector)
00093 //  - do_fj2_calculation       carry out the calculation as it
00094 //                             was done in fj2 (suffers from "edge effects")
00095 double BackgroundEstimatorBase::_percentile(const vector<double> & sorted_quantities, 
00096                                             const double percentile, 
00097                                             const double nempty,
00098                                             const bool do_fj2_calculation
00099                                             ) const {
00100   assert(percentile >= 0.0 && percentile <= 1.0);
00101 
00102   int quantities_size = sorted_quantities.size();
00103   if (quantities_size == 0) return 0;
00104 
00105   double total_njets = quantities_size + nempty;
00106   double percentile_pos;
00107   if (do_fj2_calculation) {
00108     percentile_pos = (total_njets-1)*percentile - nempty;
00109   } else {
00110     percentile_pos = (total_njets)*percentile - nempty - 0.5;
00111   }
00112 
00113   double result;
00114   if (percentile_pos >= 0 && quantities_size > 1) {
00115     int int_percentile_pos = int(percentile_pos);
00116 
00117     // avoid potential overflow issues
00118     if (int_percentile_pos+1 > quantities_size-1){
00119       int_percentile_pos = quantities_size-2;
00120       percentile_pos = quantities_size-1;
00121     }
00122 
00123     result =
00124       sorted_quantities[int_percentile_pos] * (int_percentile_pos+1-percentile_pos)
00125       + sorted_quantities[int_percentile_pos+1] * (percentile_pos - int_percentile_pos);
00126     
00127 
00128   } else if (percentile_pos > -0.5 && quantities_size >= 1 
00129              && !do_fj2_calculation) {
00130     // in the LHS of this "bin", just keep a constant value (we could have
00131     // interpolated to zero, but this might misbehave in cases where all jets
00132     // are active, because it would go to zero too fast)
00133     result = sorted_quantities[0];
00134   } else {
00135     result = 0.0;
00136   }
00137   return result;
00138 
00139 
00140 }
00141 
00142 
00143 FASTJET_END_NAMESPACE        // defined in fastjet/internal/base.hh
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends