fastjet: src/ClusterSequenceActiveArea.cc Source File

00001 //STARTHEADER
00002 // $Id: ClusterSequenceActiveArea.cc 514 2007-03-10 17:50:06Z salam $
00003 //
00004 // Copyright (c) 2005-2006, Matteo Cacciari and Gavin Salam
00005 //
00006 //----------------------------------------------------------------------
00007 // This file is part of FastJet.
00008 //
00009 //  FastJet is free software; you can redistribute it and/or modify
00010 //  it under the terms of the GNU General Public License as published by
00011 //  the Free Software Foundation; either version 2 of the License, or
00012 //  (at your option) any later version.
00013 //
00014 //  The algorithms that underlie FastJet have required considerable
00015 //  development and are described in hep-ph/0512210. If you use
00016 //  FastJet as part of work towards a scientific publication, please
00017 //  include a citation to the FastJet paper.
00018 //
00019 //  FastJet is distributed in the hope that it will be useful,
00020 //  but WITHOUT ANY WARRANTY; without even the implied warranty of
00021 //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00022 //  GNU General Public License for more details.
00023 //
00024 //  You should have received a copy of the GNU General Public License
00025 //  along with FastJet; if not, write to the Free Software
00026 //  Foundation, Inc.:
00027 //      59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00028 //----------------------------------------------------------------------
00029 //ENDHEADER
00030 
00031 #include "fastjet/PseudoJet.hh"
00032 #include "fastjet/ClusterSequence.hh"
00033 #include "fastjet/ClusterSequenceActiveArea.hh"
00034 #include "fastjet/ClusterSequenceActiveAreaExplicitGhosts.hh"
00035 #include<iostream>
00036 #include<vector>
00037 
00038 FASTJET_BEGIN_NAMESPACE      // defined in fastjet/internal/base.hh
00039 
00040 
00041 using namespace std;
00042 
00043 
00044 //int ClusterSequenceActiveArea::_n_seed_warnings = 0;
00045 //const int _max_seed_warnings = 10;
00046 
00047 void ClusterSequenceActiveArea::_initialise_and_run_AA (
00048                 const JetDefinition & jet_def,
00049                 const ActiveAreaSpec & area_spec,
00050                 const bool & writeout_combinations) 
00051 {
00052 
00053   // initialize our local area information
00054   _average_area.resize(2*_jets.size());  _average_area  = 0.0;
00055   _average_area2.resize(2*_jets.size()); _average_area2 = 0.0;
00056   _average_area_4vector.resize(2*_jets.size()); 
00057   _average_area_4vector = PseudoJet(0.0,0.0,0.0,0.0);
00058   _non_jet_area = 0.0; _non_jet_area2 = 0.0; _non_jet_number=0.0;
00059      
00060   // for future reference...
00061   _maxrap_for_area = area_spec.ghost_maxrap();
00062   _safe_rap_for_area = _maxrap_for_area - jet_def.R();
00063 
00064   // Make sure we'll have at least one repetition -- then we can
00065   // deduce the unghosted clustering sequence from one of the ghosted
00066   // sequences. If we do not have any repetitions, then get the
00067   // unghosted sequence from the plain unghosted clustering.
00068   //
00069   // NB: all decanting and filling of initial history will then
00070   // be carried out by base-class routine
00071   if (area_spec.repeat() <= 0) {
00072     _initialise_and_run(jet_def, writeout_combinations);
00073     return;
00074   }
00075 
00076   // transfer all relevant info into internal variables
00077   _decant_options(jet_def, writeout_combinations);
00078 
00079   // set up the history entries for the initial particles (those
00080   // currently in _jets)
00081   _fill_initial_history();
00082 
00083   // record the input jets as they are currently
00084   vector<PseudoJet> input_jets(_jets);
00085 
00086   // code for testing the unique tree
00087   vector<int> unique_tree;
00088 
00089   
00090   
00091 
00092   // run the clustering multiple times so as to get areas of all the jets
00093   for (int irepeat = 0; irepeat < area_spec.repeat(); irepeat++) {
00094 
00095     ClusterSequenceActiveAreaExplicitGhosts clust_seq(input_jets, 
00096                                                       jet_def, area_spec);
00097 
00098     if (irepeat == 0) {
00099       // take the non-ghost part of the history and put into our own
00100       // history.
00101       _transfer_ghost_free_history(clust_seq);
00102       // get the "unique" order that will be used for transferring all areas. 
00103       unique_tree = unique_history_order();
00104     }
00105 
00106     // transfer areas from clust_seq into our object
00107     _transfer_areas(unique_tree, clust_seq);
00108   }
00109   
00110   _average_area  /= area_spec.repeat();
00111   _average_area2 /= area_spec.repeat();
00112   if (area_spec.repeat() > 1) {
00113     _average_area2 = sqrt(abs(_average_area2 - _average_area*_average_area)/
00114                           (area_spec.repeat()-1));
00115   } else {
00116     _average_area2 = 0.0;
00117   }
00118 
00119   _non_jet_area  /= area_spec.repeat();
00120   _non_jet_area2 /= area_spec.repeat();
00121   _non_jet_area2  = sqrt(abs(_non_jet_area2 - _non_jet_area*_non_jet_area)/
00122                          area_spec.repeat());
00123   _non_jet_number /= area_spec.repeat();
00124 
00125   // following bizarre way of writing things is related to 
00126   // poverty of operations on PseudoJet objects (as well as some confusion
00127   // in one or two places)
00128   for (unsigned i = 0; i < _average_area_4vector.size(); i++) {
00129     _average_area_4vector[i] = (1.0/area_spec.repeat()) * _average_area_4vector[i];
00130   }
00131   //cerr << "Non-jet area = " << _non_jet_area << " +- " << _non_jet_area2<<endl;
00132 
00133   
00134 }
00135 
00136 
00137 //----------------------------------------------------------------------
00138 double ClusterSequenceActiveArea::pt_per_unit_area(
00139                        mean_pt_strategies strat, double range) const {
00140   
00141   vector<PseudoJet> incl_jets = inclusive_jets();
00142   vector<double> pt_over_areas;
00143 
00144   for (unsigned i = 0; i < incl_jets.size(); i++) {
00145     if (abs(incl_jets[i].rap()) < _safe_rap_for_area) {
00146       double this_area;
00147       if ( strat == median_4vector ) {
00148           this_area = area_4vector(incl_jets[i]).perp();
00149       } else {
00150           this_area = area(incl_jets[i]);
00151       }
00152       pt_over_areas.push_back(incl_jets[i].perp()/this_area);
00153     }
00154   }
00155   
00156   // there is nothing inside our region, so answer will always be zero
00157   if (pt_over_areas.size() == 0) {return 0.0;}
00158   
00159   // get median (pt/area) [this is the "old" median definition. It considers
00160   // only the "real" jets in calculating the median, i.e. excluding the
00161   // only-ghost ones]
00162   sort(pt_over_areas.begin(), pt_over_areas.end());
00163   double non_ghost_median_ratio = pt_over_areas[pt_over_areas.size()/2];
00164 
00165   // new median definition that takes into account non-jet area (i.e.
00166   // jets composed only of ghosts), and for fractional median position 
00167   // interpolates between the corresponding entries in the pt_over_areas array
00168   double nj_median_pos = (pt_over_areas.size()-1 - _non_jet_number)/2.0;
00169   double nj_median_ratio;
00170   if (nj_median_pos >= 0 && pt_over_areas.size() > 1) {
00171     int int_nj_median = int(nj_median_pos);
00172     nj_median_ratio = 
00173       pt_over_areas[int_nj_median] * (int_nj_median+1-nj_median_pos)
00174       + pt_over_areas[int_nj_median+1] * (nj_median_pos - int_nj_median);
00175   } else {
00176     nj_median_ratio = 0.0;
00177   }
00178 
00179 
00180   // get various forms of mean (pt/area)
00181   double pt_sum = 0.0, pt_sum_with_cut = 0.0;
00182   double area_sum = _non_jet_area, area_sum_with_cut = _non_jet_area;
00183   double ratio_sum = 0.0; 
00184   double ratio_n = _non_jet_number;
00185   for (unsigned i = 0; i < incl_jets.size(); i++) {
00186     if (abs(incl_jets[i].rap()) < _safe_rap_for_area) {
00187       double this_area;
00188       if ( strat == median_4vector ) {
00189           this_area = area_4vector(incl_jets[i]).perp();
00190       } else {
00191           this_area = area(incl_jets[i]);
00192       }
00193       pt_sum   += incl_jets[i].perp();
00194       area_sum += this_area;
00195       double ratio = incl_jets[i].perp()/this_area;
00196       if (ratio < range*nj_median_ratio) {
00197         pt_sum_with_cut   += incl_jets[i].perp();
00198         area_sum_with_cut += this_area;
00199         ratio_sum += ratio; ratio_n++;
00200       }
00201     }
00202   }
00203   
00204   if (strat == play) {
00205     double trunc_sum = 0, trunc_sumsqr = 0;
00206     vector<double> means(pt_over_areas.size()), sd(pt_over_areas.size());
00207     for (unsigned i = 0; i < pt_over_areas.size() ; i++ ) {
00208       double ratio = pt_over_areas[i];
00209       trunc_sum += ratio;
00210       trunc_sumsqr += ratio*ratio;
00211       means[i] = trunc_sum / (i+1);
00212       sd[i]    = sqrt(abs(means[i]*means[i]  - trunc_sumsqr/(i+1)));
00213       cerr << "i, means, sd: " <<i<<", "<< means[i] <<", "<<sd[i]<<", "<<
00214         sd[i]/sqrt(i+1.0)<<endl;
00215     }
00216     cout << "-----------------------------------"<<endl;
00217     for (unsigned i = 0; i <= pt_over_areas.size()/2 ; i++ ) {
00218       cout << "Median "<< i <<" = " << pt_over_areas[i]<<endl;
00219     }
00220     cout << "Number of non-jets: "<<_non_jet_number<<endl;
00221     cout << "Area of non-jets: "<<_non_jet_area<<endl;
00222     cout << "Default median position: " << (pt_over_areas.size()-1)/2.0<<endl;
00223     cout << "NJ median position: " << nj_median_pos <<endl;
00224     cout << "NJ median value: " << nj_median_ratio <<endl;
00225     return 0.0;
00226   }
00227 
00228   switch(strat) {
00229   case median:
00230   case median_4vector:
00231     return nj_median_ratio;
00232   case non_ghost_median:
00233     return non_ghost_median_ratio; 
00234   case pttot_over_areatot:
00235     return pt_sum / area_sum;
00236   case pttot_over_areatot_cut:
00237     return pt_sum_with_cut / area_sum_with_cut;
00238   case mean_ratio_cut:
00239     return ratio_sum/ratio_n;
00240   default:
00241     return nj_median_ratio;
00242   }
00243 
00244 }
00245 
00246 
00247 //----------------------------------------------------------------------
00248 // fit a parabola to pt/area as a function of rapidity, using the
00249 // formulae of CCN28-36 (which actually fits f = a+b*x^2)
00250 void ClusterSequenceActiveArea::parabolic_pt_per_unit_area(
00251        double & a, double & b, double raprange, double exclude_above,
00252        bool use_area_4vector) const {
00253   
00254   double this_raprange;
00255   if (raprange <= 0) {this_raprange = _safe_rap_for_area;}
00256   else {this_raprange = raprange;}
00257 
00258   int n=0;
00259   int n_excluded = 0;
00260   double mean_f=0, mean_x2=0, mean_x4=0, mean_fx2=0; 
00261 
00262   vector<PseudoJet> incl_jets = inclusive_jets();
00263 
00264   for (unsigned i = 0; i < incl_jets.size(); i++) {
00265     if (abs(incl_jets[i].rap()) < this_raprange) {
00266       double this_area;
00267       if ( use_area_4vector ) {
00268           this_area = area_4vector(incl_jets[i]).perp();     
00269       } else {
00270           this_area = area(incl_jets[i]);
00271       }
00272       double f = incl_jets[i].perp()/this_area;
00273       if (exclude_above <= 0.0 || f < exclude_above) {
00274         double x = incl_jets[i].rap(); double x2 = x*x;
00275         mean_f   += f;
00276         mean_x2  += x2;
00277         mean_x4  += x2*x2;
00278         mean_fx2 += f*x2;
00279         n++;
00280       } else {
00281         n_excluded++;
00282       }
00283     }
00284   }
00285 
00286   if (n <= 1) {
00287     // meaningful results require at least two jets inside the
00288     // area -- mind you if there are empty jets we should be in 
00289     // any case doing something special...
00290     a = 0.0;
00291     b = 0.0;
00292   } else {
00293     mean_f   /= n;
00294     mean_x2  /= n;
00295     mean_x4  /= n;
00296     mean_fx2 /= n;
00297     
00298     b = (mean_f*mean_x2 - mean_fx2)/(mean_x2*mean_x2 - mean_x4);
00299     a = mean_f - b*mean_x2;
00300   }
00301   //cerr << "n_excluded = "<< n_excluded << endl;
00302 }
00303 
00304 
00305 //----------------------------------------------------------------------
00308 void ClusterSequenceActiveArea::_transfer_ghost_free_history(
00309              const ClusterSequenceActiveAreaExplicitGhosts & ghosted_seq) {
00310   
00311   const vector<history_element> & gs_history  = ghosted_seq.history();
00312   vector<int> gs2self_hist_map(gs_history.size());
00313 
00314   // work our way through to first non-trivial combination
00315   unsigned igs = 0;
00316   unsigned iself = 0;
00317   while (gs_history[igs].parent1 == InexistentParent) {
00318     // record correspondence 
00319     if (!ghosted_seq.is_pure_ghost(igs)) {
00320       gs2self_hist_map[igs] = iself++; 
00321     } else {
00322       gs2self_hist_map[igs] = Invalid; 
00323     }
00324     igs++;
00325   };
00326 
00327   // make sure the count of non-ghost initial jets is equal to
00328   // what we already have in terms of initial jets
00329   assert(iself == _history.size());
00330 
00331   // now actually transfer things
00332   do  {
00333     // if we are a pure ghost, then go on to next round
00334     if (ghosted_seq.is_pure_ghost(igs)) {
00335       gs2self_hist_map[igs] = Invalid;
00336       continue;
00337     }
00338 
00339     const history_element & gs_hist_el = gs_history[igs];
00340 
00341     bool parent1_is_ghost = ghosted_seq.is_pure_ghost(gs_hist_el.parent1);
00342     bool parent2_is_ghost = ghosted_seq.is_pure_ghost(gs_hist_el.parent2);
00343 
00344     // if exactly one parent is a ghost then maintain info about the
00345     // non-ghost correspondence for this jet, and then go on to next
00346     // recombination in the ghosted sequence
00347     if (parent1_is_ghost && !parent2_is_ghost && gs_hist_el.parent2 >= 0) {
00348       gs2self_hist_map[igs] = gs2self_hist_map[gs_hist_el.parent2];
00349       continue;
00350     }
00351     if (!parent1_is_ghost && parent2_is_ghost) {
00352       gs2self_hist_map[igs] = gs2self_hist_map[gs_hist_el.parent1];
00353       continue;
00354     }
00355 
00356     // no parents are ghosts...
00357     if (gs_hist_el.parent2 >= 0) {
00358       // recombination of two non-ghosts
00359       gs2self_hist_map[igs] = _history.size();
00360       // record the recombination in our own sequence
00361       int newjet_k; // dummy var -- not used
00362       //cerr << igs << " " << gs_hist_el.parent1 << " " << gs_hist_el.parent2 << endl;
00363       //cerr << gs2self_hist_map[gs_hist_el.parent1] << " " << gs2self_hist_map[gs_hist_el.parent2] << endl;
00364       int jet_i = _history[gs2self_hist_map[gs_hist_el.parent1]].jetp_index;
00365       int jet_j = _history[gs2self_hist_map[gs_hist_el.parent2]].jetp_index;
00366       //cerr << "recombining "<< jet_i << " and "<< jet_j << endl;
00367       _do_ij_recombination_step(jet_i, jet_j, gs_hist_el.dij, newjet_k);
00368     } else {
00369       // we have a non-ghost that has become a beam-jet
00370       assert(gs_history[igs].parent2 == BeamJet);
00371       // record position
00372       gs2self_hist_map[igs] = _history.size();
00373       // record the recombination in our own sequence
00374       _do_iB_recombination_step(
00375              _history[gs2self_hist_map[gs_hist_el.parent1]].jetp_index,
00376              gs_hist_el.dij);
00377     }
00378   } while (++igs < gs_history.size());
00379 
00380   // finally transfer info about strategy used (which isn't necessarily
00381   // always the one that got asked for...)
00382   _strategy = ghosted_seq.strategy_used();
00383 }
00384 
00385 //----------------------------------------------------------------------
00386 void ClusterSequenceActiveArea::_transfer_areas(
00387             const vector<int> & unique_hist_order,
00388             const ClusterSequenceActiveAreaExplicitGhosts & ghosted_seq  ) {
00389 
00390   const vector<history_element> & gs_history  = ghosted_seq.history();
00391   const vector<PseudoJet>       & gs_jets     = ghosted_seq.jets();
00392   vector<int>    gs_unique_hist_order = ghosted_seq.unique_history_order();
00393 
00394   const double tolerance = 1e-13; // to decide when two jets are the same
00395 
00396   int j = -1;
00397   int hist_index = -1;
00398   
00399   valarray<double> our_areas(_history.size());
00400   our_areas = 0.0;
00401 
00402   valarray<PseudoJet> our_area_4vectors(_history.size());
00403   our_area_4vectors = PseudoJet(0.0,0.0,0.0,0.0);
00404 
00405   for (unsigned i = 0; i < gs_history.size(); i++) {
00406     // only consider composite particles
00407     unsigned gs_hist_index = gs_unique_hist_order[i];
00408     if (gs_hist_index < ghosted_seq.n_particles()) continue;
00409     const history_element & gs_hist = gs_history[gs_unique_hist_order[i]];
00410     int parent1 = gs_hist.parent1;
00411     int parent2 = gs_hist.parent2;
00412 
00413     if (parent2 == BeamJet) {
00414       // need to look at parent to get the actual jet
00415       const PseudoJet & jet = 
00416           gs_jets[gs_history[parent1].jetp_index];
00417       double area = ghosted_seq.area(jet);
00418       PseudoJet ext_area = ghosted_seq.area_4vector(jet);
00419 
00420       if (ghosted_seq.is_pure_ghost(parent1)) {
00421         if (abs(jet.rap()) < _safe_rap_for_area) {
00422           _non_jet_area  += area;
00423           _non_jet_area2 += area*area;
00424           _non_jet_number += 1;
00425         }
00426       } else {
00427 
00428         // get next "combined-particle" index in our own history
00429         // making sure we don't go beyond it's bounds (if we do
00430         // then we're in big trouble anyway...)
00431         while (++j < static_cast<int>(_history.size())) {
00432           hist_index = unique_hist_order[j];
00433           if (hist_index >= _initial_n) break;}
00434 
00435         // sanity check 
00436         const PseudoJet & refjet = 
00437           _jets[_history[_history[hist_index].parent1].jetp_index];
00438         //if (jet.perp2() != refjet.perp2()) {
00439         //if (abs(jet.perp2()-refjet.perp2()) > 
00440         //            tolerance*max(jet.perp2(),refjet.perp2())) {
00441 
00442         // If pt disagrees check E; if they both disagree there's a
00443         // problem here... NB: a massive particle with zero pt may
00444         // have its pt changed when a ghost is added -- this is why we
00445         // also require the energy to be wrong before complaining
00446         if (abs(jet.perp2()-refjet.perp2()) > 
00447                     tolerance*max(jet.perp2(),refjet.perp2())
00448             && abs(jet.E()-refjet.E()) > tolerance*max(jet.E(),refjet.E())) {
00449           cerr << jet.perp() << " " << refjet.perp() << " "
00450                << jet.perp() - refjet.perp() << endl;
00451           cerr << refjet.px() << " " 
00452                << refjet.py() << " " 
00453                << refjet.pz() << " " 
00454                << refjet.E() << endl;
00455           throw Error("Could not match clustering sequence for an inclusive jet when reconstructing areas"); }
00456 
00457         // set the area at this clustering stage
00458         our_areas[hist_index]  = area; 
00459         our_area_4vectors[hist_index]  = ext_area; 
00460 
00461         // update the parent as well -- that way its area is the area
00462         // immediately before clustering (i.e. resolve an ambiguity in
00463         // the Cambridge case and ensure in the kt case that the original
00464         // particles get a correct area)
00465         our_areas[_history[hist_index].parent1] = area;
00466         our_area_4vectors[_history[hist_index].parent1] = ext_area;
00467         
00468       }
00469     }
00470     else if (!ghosted_seq.is_pure_ghost(parent1) && 
00471              !ghosted_seq.is_pure_ghost(parent2)) {
00472 
00473       // get next "combined-particle" index in our own history
00474       while (++j < static_cast<int>(_history.size())) {
00475         hist_index = unique_hist_order[j];
00476         if (hist_index >= _initial_n) break;}
00477       
00478       const PseudoJet & jet = gs_jets[gs_hist.jetp_index];
00479       const PseudoJet & refjet = _jets[_history[hist_index].jetp_index];
00480 
00481       // run sanity check 
00482       if (abs(jet.perp2()-refjet.perp2()) > 
00483           tolerance*max(jet.perp2(),refjet.perp2())) {
00484           cerr << jet.perp() << " " << refjet.perp() << " "<< jet.perp() - refjet.perp() << endl;
00485           throw Error("Could not match clustering sequence for an exclusive jet when reconstructing areas"); }
00486 
00487       // update area and our local index (maybe redundant since later
00488       // the descendants will reupdate it?)
00489       double area  = ghosted_seq.area(jet);
00490       our_areas[hist_index]  += area; 
00491 
00492       PseudoJet ext_area = ghosted_seq.area_4vector(jet);
00493       //our_area_4vectors[hist_index] = our_area_4vectors[hist_index] + ext_area; 
00494       _jet_def.recombiner()->plus_equal(our_area_4vectors[hist_index], ext_area);
00495 
00496       // now update areas of parents (so that they becomes areas
00497       // immediately before clustering occurred). This is of use
00498       // because it allows us to set the areas of the original hard
00499       // particles in the kt algorithm; for the Cambridge case it
00500       // means a jet's area will be the area just before it clusters
00501       // with another hard jet.
00502       const PseudoJet & jet1 = gs_jets[gs_history[parent1].jetp_index];
00503       int our_parent1 = _history[hist_index].parent1;
00504       our_areas[our_parent1] = ghosted_seq.area(jet1);
00505       our_area_4vectors[our_parent1] = ghosted_seq.area_4vector(jet1);
00506 
00507       const PseudoJet & jet2 = gs_jets[gs_history[parent2].jetp_index];
00508       int our_parent2 = _history[hist_index].parent2;
00509       our_areas[our_parent2] = ghosted_seq.area(jet2);
00510       our_area_4vectors[our_parent2] = ghosted_seq.area_4vector(jet2);
00511     }
00512 
00513   }
00514 
00515   _average_area  += our_areas; 
00516   _average_area2 += our_areas*our_areas; 
00517 
00518   //_average_area_4vector += our_area_4vectors;
00519   // Use the proper recombination scheme when averaging the area_4vectors
00520   // over multiple ghost runs (i.e. the repeat stage);
00521   for (unsigned i = 0; i < _average_area_4vector.size(); i++) {
00522     _jet_def.recombiner()->plus_equal(_average_area_4vector[i],
00523                                        our_area_4vectors[i]);
00524   }
00525 }
00526 
00527 
00528 
00529 FASTJET_END_NAMESPACE
00530