726 lines
30 KiB
C++
726 lines
30 KiB
C++
|
/**********************************************************************************
|
||
|
* Project : TMVA - a Root-integrated toolkit for multivariate data analysis *
|
||
|
* Package : TMVA *
|
||
|
* Exectuable: TMVAClassificationApplication *
|
||
|
* *
|
||
|
* This macro provides a simple example on how to use the trained classifiers *
|
||
|
* within an analysis module *
|
||
|
**********************************************************************************/
|
||
|
|
||
|
//Calculating the TMVA response from for all events
|
||
|
//David Gerick
|
||
|
//Renata Kopecna
|
||
|
|
||
|
|
||
|
#include <cstdlib>
|
||
|
#include <vector>
|
||
|
#include <iostream>
|
||
|
#include <map>
|
||
|
#include <string>
|
||
|
#include <sstream>
|
||
|
|
||
|
#include "TChain.h"
|
||
|
#include "TFile.h"
|
||
|
#include "TMath.h"
|
||
|
#include "TTree.h"
|
||
|
#include "TString.h"
|
||
|
#include "TSystem.h"
|
||
|
#include "TROOT.h"
|
||
|
#include "TStopwatch.h"
|
||
|
|
||
|
#include "GlobalFunctions.hh"
|
||
|
#include "Paths.hpp"
|
||
|
#include "MVAclass.hpp"
|
||
|
//#include "HeidelbergFitter/LHCbStyle.h"
|
||
|
//#include "./MVA_b2kmm.cc"
|
||
|
|
||
|
#if not defined(__CINT__) || defined(__MAKECINT__)
|
||
|
#include "TMVA/TMVAGui.h"
|
||
|
#include "TMVA/Tools.h"
|
||
|
#include "TMVA/Reader.h"
|
||
|
#include "TMVA/MethodCuts.h"
|
||
|
#endif
|
||
|
|
||
|
using namespace TMVA;
|
||
|
|
||
|
//////////////////////////////////////////////////////
|
||
|
/// TMVAClassificationApplication()
|
||
|
/// for each event the BDTresponse is calculated and saved in a new branch in the tree.
|
||
|
/// based on the training of the BDT, which's information are stored in the xml file,
|
||
|
/// the BDTresponse is calculated for each event. The splitting of the data-set has to be
|
||
|
/// identical to the splitting of the training. Splitting can be done in years, Kshort
|
||
|
/// decay inside (LL) or outside the Velo (DD).
|
||
|
/// The used variables (ll. 195) have to have identical order than they are introduced to the
|
||
|
/// factory in MVA_b2kmm.cc, otherwise the program will not work
|
||
|
///
|
||
|
/// TMVAClassApp()
|
||
|
/// Perform the TMVAClassificationApplication function with predined values and strings
|
||
|
/// for the two subdecays!
|
||
|
///
|
||
|
/// TMVA2MC_AfterPreSelection()
|
||
|
/// Same function as above, but this will apply the BDT to the MC sample.
|
||
|
/// Apply to the tuple after the pre-selection!
|
||
|
/// Optional to the truthmatched or un-matched tree
|
||
|
///
|
||
|
/// TMVA2MC_AfterReweighting()
|
||
|
/// Same function as above, but this will apply the BDT to the MC sample.
|
||
|
/// Apply to the tuple after the reweighting, i.e. the BDTinput tuple!
|
||
|
/// Optional to the truthmatched or un-matched tree
|
||
|
///
|
||
|
/// TMVA2AllMC()
|
||
|
/// Get the MC assign with a BDTresponse for all years and magnet polarities at all stages and
|
||
|
/// for truthmatched and non-matched trees!
|
||
|
///
|
||
|
|
||
|
string InputToReader(string ReaderName){
|
||
|
if (ReaderName.find(":=", 0) == string::npos){
|
||
|
coutDebug("Reader name: " + ReaderName);
|
||
|
return ReaderName;
|
||
|
}
|
||
|
std::size_t position = ReaderName.find(":=");
|
||
|
|
||
|
//string tmp (ReaderName.begin(),ReaderName.begin()+position);
|
||
|
string tmp (ReaderName.begin()+position+2,ReaderName.end());
|
||
|
coutDebug("Reader name: " + tmp);
|
||
|
return tmp;
|
||
|
}
|
||
|
|
||
|
|
||
|
Int_t TMVAClassificationApplication_b2kmm(Int_t Run = 1, Int_t year = 2011, bool KshortDecayInVelo = false,
|
||
|
bool MC = false, bool ReferenceChannel = false, bool PHSP = false, bool UseLowQ2Range = false, bool IncSample = false){
|
||
|
#ifdef __CINT__
|
||
|
gROOT->ProcessLine( ".O0" ); // turn off optimization in CINT
|
||
|
#endif
|
||
|
|
||
|
//---------------------------------------------------------------
|
||
|
//NOT NEEDED AS IT LOOKS LIKE! (onlyi f you have different BDTs for different years)
|
||
|
bool SplitYears = false;
|
||
|
//---------------------------------------------------------------
|
||
|
|
||
|
//Check the sample
|
||
|
if (!checkMC(MC,ReferenceChannel,PHSP,true)) return 0;
|
||
|
//Check MVAcongif
|
||
|
if(!checkRun(Run)) return 0;
|
||
|
//Check if correct year was given (if Inc, just load the files, screw it)
|
||
|
if (!checkYear(year,MC,ReferenceChannel,PHSP)) return 0;
|
||
|
//---------------------------------------------------------------
|
||
|
|
||
|
// This loads the library
|
||
|
TMVA::Tools::Instance();
|
||
|
|
||
|
// Default MVA methods to be trained + tested
|
||
|
std::map<string,int> Use;
|
||
|
|
||
|
// --- Cut optimisation
|
||
|
Use["Cuts"] = 1;
|
||
|
Use["CutsD"] = 1;
|
||
|
Use["CutsPCA"] = 0;
|
||
|
Use["CutsGA"] = 0;
|
||
|
Use["CutsSA"] = 0;
|
||
|
//
|
||
|
// --- 1-dimensional likelihood ("naive Bayes estimator")
|
||
|
Use["Likelihood"] = 1;
|
||
|
Use["LikelihoodD"] = 0; // the "D" extension indicates decorrelated input variables (see option strings)
|
||
|
Use["LikelihoodPCA"] = 1; // the "PCA" extension indicates PCA-transformed input variables (see option strings)
|
||
|
Use["LikelihoodKDE"] = 0;
|
||
|
Use["LikelihoodMIX"] = 0;
|
||
|
//
|
||
|
// --- Mutidimensional likelihood and Nearest-Neighbour methods
|
||
|
Use["PDERS"] = 1;
|
||
|
Use["PDERSD"] = 0;
|
||
|
Use["PDERSPCA"] = 0;
|
||
|
Use["PDEFoam"] = 1;
|
||
|
Use["PDEFoamBoost"] = 0; // uses generalised MVA method boosting
|
||
|
Use["KNN"] = 1; // k-nearest neighbour method
|
||
|
//
|
||
|
// --- Linear Discriminant Analysis
|
||
|
Use["LD"] = 1; // Linear Discriminant identical to Fisher
|
||
|
Use["Fisher"] = 0;
|
||
|
Use["FisherG"] = 0;
|
||
|
Use["BoostedFisher"] = 0; // uses generalised MVA method boosting
|
||
|
Use["HMatrix"] = 0;
|
||
|
//
|
||
|
// --- Function Discriminant analysis
|
||
|
Use["FDA_GA"] = 1; // minimisation of user-defined function using Genetics Algorithm
|
||
|
Use["FDA_SA"] = 0;
|
||
|
Use["FDA_MC"] = 0;
|
||
|
Use["FDA_MT"] = 0;
|
||
|
Use["FDA_GAMT"] = 0;
|
||
|
Use["FDA_MCMT"] = 0;
|
||
|
//
|
||
|
// --- Neural Networks (all are feed-forward Multilayer Perceptrons)
|
||
|
Use["MLP"] = 0; // Recommended ANN
|
||
|
Use["MLPBFGS"] = 0; // Recommended ANN with optional training method
|
||
|
Use["MLPBNN"] = 1; // Recommended ANN with BFGS training method and bayesian regulator
|
||
|
Use["CFMlpANN"] = 0; // Depreciated ANN from ALEPH
|
||
|
Use["TMlpANN"] = 0; // ROOT's own ANN
|
||
|
//
|
||
|
// --- Support Vector Machine
|
||
|
Use["SVM"] = 1;
|
||
|
//
|
||
|
// --- Boosted Decision Trees
|
||
|
Use["BDT"] = 1; // uses Adaptive Boost
|
||
|
Use["BDTG"] = 0; // uses Gradient Boost
|
||
|
Use["BDTB"] = 0; // uses Bagging
|
||
|
Use["BDTD"] = 0; // decorrelation + Adaptive Boost
|
||
|
//
|
||
|
// --- Friedman's RuleFit method, ie, an optimised series of cuts ("rules")
|
||
|
Use["RuleFit"] = 1;
|
||
|
// ---------------------------------------------------------------
|
||
|
Use["Plugin"] = 0;
|
||
|
Use["Category"] = 0;
|
||
|
Use["SVM_Gauss"] = 0;
|
||
|
Use["SVM_Poly"] = 0;
|
||
|
Use["SVM_Lin"] = 0;
|
||
|
|
||
|
coutInfo("==> Start TMVAClassificationApplication");
|
||
|
|
||
|
// Select methods (don't look at this code - not of interest)
|
||
|
if (TMVAmethod != "") {
|
||
|
for (std::map<string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0;
|
||
|
|
||
|
std::vector<TString> mlist = gTools().SplitString( TMVAmethod, ',' );
|
||
|
for (UInt_t i=0; i<mlist.size(); i++) {
|
||
|
string regMethod(mlist[i]);
|
||
|
|
||
|
if (Use.find(regMethod) == Use.end()) {
|
||
|
coutERROR("Method \"" + regMethod + "\" not known in TMVA under this name. Choose among the following:");
|
||
|
for (std::map<string,int>::iterator it = Use.begin(); it != Use.end(); it++) coutERROR(it->first);
|
||
|
return 0;
|
||
|
}
|
||
|
Use[regMethod] = 1;
|
||
|
}
|
||
|
}
|
||
|
//Not completely sure what happens when one puts several methods as input to regMethod
|
||
|
|
||
|
// --------------------------------------------------------------------------------------------------
|
||
|
|
||
|
// --- Create the Reader object
|
||
|
|
||
|
TMVA::Reader *reader = new TMVA::Reader( "!Color:!Silent" );
|
||
|
TMVA::Reader *readerDD = new TMVA::Reader( "!Color:!Silent" );
|
||
|
TMVA::Reader *readerLL = new TMVA::Reader( "!Color:!Silent" );
|
||
|
|
||
|
// Create a set of variables and declare them to the reader
|
||
|
// - the variable names MUST corresponds in name and type to those given in the weight file(s) used
|
||
|
|
||
|
MVA_variables * InputVariables = NULL;
|
||
|
int Nvar = 0;
|
||
|
int Nbranch = 0;
|
||
|
|
||
|
MVA_variables * InputVariablesDD = NULL;
|
||
|
MVA_variables * InputVariablesLL = NULL;
|
||
|
int NvarDD = 0;
|
||
|
int NvarLL = 0;
|
||
|
int NbranchDD = 0;
|
||
|
int NbranchLL = 0;
|
||
|
|
||
|
if(Kst2Kspiplus && SplitDDandLL){
|
||
|
InputVariablesDD = new MVA_variables("DD");
|
||
|
InputVariablesDD->print();
|
||
|
NvarDD = InputVariablesDD->NumberOfVariables();
|
||
|
NbranchDD = InputVariablesDD->NumberOfBranches();
|
||
|
|
||
|
InputVariablesLL = new MVA_variables("LL");
|
||
|
InputVariablesLL->print();
|
||
|
NvarLL = InputVariablesLL->NumberOfVariables();
|
||
|
NbranchLL = InputVariablesLL->NumberOfBranches();
|
||
|
}
|
||
|
else {
|
||
|
InputVariables = new MVA_variables("");
|
||
|
InputVariables->print();
|
||
|
Nvar = InputVariables->NumberOfVariables();
|
||
|
Nbranch = InputVariables->NumberOfBranches();
|
||
|
}
|
||
|
|
||
|
Float_t f_varDD[NvarDD];
|
||
|
Float_t f_varLL[NvarLL];
|
||
|
Float_t f_var[Nvar];
|
||
|
|
||
|
//Feeding the reader(s)
|
||
|
if(Kst2Kspiplus && SplitDDandLL){
|
||
|
//DDtracks
|
||
|
for (int i = 0; i < NvarDD; i++)
|
||
|
readerDD->AddVariable( InputToReader(InputVariablesDD->AllVariables[i].ReaderName), &f_varDD[i]) ;
|
||
|
//LLtracks
|
||
|
for (int i = 0; i < NvarLL; i++)
|
||
|
readerLL->AddVariable( InputToReader(InputVariablesLL->AllVariables[i].ReaderName), &f_varLL[i]) ;
|
||
|
}
|
||
|
else
|
||
|
for (int i = 0; i < Nvar; i++){
|
||
|
reader->AddVariable( InputToReader(InputVariables->AllVariables[i].ReaderName), &f_var[i]);
|
||
|
}
|
||
|
|
||
|
// --- Book the MVA methods
|
||
|
|
||
|
TString dir = "weights/";
|
||
|
TString prefix, prefixDD, prefixLL;
|
||
|
if(Kst2Kspiplus){
|
||
|
if(SplitYears){
|
||
|
prefix = Form("%i_B2Kstmumu_%s", year, TheDecay.c_str());
|
||
|
prefixLL = Form("%i_B2Kstmumu_%s_LL", year, TheDecay.c_str());
|
||
|
prefixDD = Form("%i_B2Kstmumu_%s_DD", year, TheDecay.c_str());
|
||
|
}
|
||
|
else {
|
||
|
prefix = Form("B2Kstmumu_%s_Run%i", TheDecay.c_str(), Run);
|
||
|
prefixLL = Form("B2Kstmumu_%s_LL_Run%i", TheDecay.c_str(), Run);
|
||
|
prefixDD = Form("B2Kstmumu_%s_DD_Run%i", TheDecay.c_str(), Run);
|
||
|
}
|
||
|
}
|
||
|
else{ //pi0 channel(s)
|
||
|
if(SplitYears){
|
||
|
prefix = Form("%i_B2Kstmumu_%s%s", year, TheDecay.c_str(),SplitInQ2? (UseLowQ2Range ? "_lowQ2" : "_highQ2") : "");
|
||
|
}
|
||
|
else{
|
||
|
prefix = Form("B2Kstmumu_%s_Run%i%s", TheDecay.c_str(), Run,SplitInQ2? (UseLowQ2Range ? "_lowQ2" : "_highQ2") : "");
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Book method(s) and XML config file from BDT training and testing
|
||
|
for (std::map<string,int>::iterator it = Use.begin(); it != Use.end(); it++) {
|
||
|
if (it->second) {
|
||
|
TString methodName = TString(it->first) + TString(" method");
|
||
|
TString weightfile, weightfileDD, weightfileLL;
|
||
|
if(Kst2Kspiplus){
|
||
|
if(SplitDDandLL){
|
||
|
weightfileDD = dir + prefixDD + TString("_") + TString(it->first) + TString(".weights.xml");
|
||
|
weightfileLL = dir + prefixLL + TString("_") + TString(it->first) + TString(".weights.xml");
|
||
|
readerDD->BookMVA( methodName, weightfileDD );
|
||
|
readerLL->BookMVA( methodName, weightfileLL );
|
||
|
}
|
||
|
else { //combined DD and LL tracks for Kshort
|
||
|
weightfile = dir + prefix + TString("_") + TString(it->first) + TString(".weights.xml");
|
||
|
reader->BookMVA( methodName, weightfile );
|
||
|
}
|
||
|
}
|
||
|
else{ //pi0 channel(s)
|
||
|
weightfile = dir + prefix + TString("_") + TString(it->first)+ TString(".weights.xml");
|
||
|
reader->BookMVA( methodName, weightfile );
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Book output histograms
|
||
|
UInt_t nbin = 100;
|
||
|
TH1D *histBdt(0), *histBdtG(0), *histBdtD(0), *histMlp(0);
|
||
|
|
||
|
if (Use["BDT"]) histBdt = new TH1D( "MVA_BDT", "MVA_BDT", nbin, -0.8, 0.8 );
|
||
|
if (Use["BDTD"]) histBdtD = new TH1D( "MVA_BDTD", "MVA_BDTD", nbin, -0.8, 0.8 );
|
||
|
if (Use["BDTG"]) histBdtG = new TH1D( "MVA_BDTG", "MVA_BDTG", nbin, -1.0, 1.0 );
|
||
|
if (Use["MLP"]) histMlp = new TH1D( "MVA_MLP", "MVA_MLP", nbin, -1.0, 1.0 );
|
||
|
|
||
|
// Prepare input tree (this must be replaced by your data source)
|
||
|
// in this example, there is a toy tree with signal and one with background events
|
||
|
// we'll later on use only the "signal" events for the test in this example.
|
||
|
|
||
|
//If years are not split, take one Run at the time and set 'year-variable' accordingly to the Run
|
||
|
Int_t RunYear = 0;
|
||
|
if(!SplitYears) RunYear = year;
|
||
|
|
||
|
TChain* theTree = new TChain(MC ? "DecayTreeTruthMatched" : "DecayTree");
|
||
|
string inputPath = GetBDTinputFile(RunYear,MC,ReferenceChannel,PHSP,false);
|
||
|
if(SplitYears){
|
||
|
if(SplitDDandLL && Kst2Kspiplus){ //@David please check this, not sure if this is correct!
|
||
|
theTree->Add(GetBDTinputFile(year,MC,ReferenceChannel,PHSP,true).c_str());
|
||
|
theTree->Add(GetBDTinputFile(year,MC,ReferenceChannel,PHSP,false).c_str());
|
||
|
}
|
||
|
else theTree->Add(GetBDTinputFile(year,MC,ReferenceChannel,PHSP,false).c_str());
|
||
|
coutDebug("Reading " + GetBDTinputFile(year,MC,ReferenceChannel,PHSP,false));
|
||
|
}
|
||
|
else{
|
||
|
if(SplitDDandLL && Kst2Kspiplus){
|
||
|
theTree->Add(GetBDTinputFile(RunYear,MC,ReferenceChannel,PHSP,true).c_str());
|
||
|
theTree->Add(GetBDTinputFile(RunYear,MC,ReferenceChannel,PHSP,false).c_str());
|
||
|
}
|
||
|
else{
|
||
|
if (IncSample) replace(inputPath,"KplusPi0Resolved","Inclusive");
|
||
|
theTree->Add(inputPath.c_str());
|
||
|
coutDebug("Reading " + inputPath);
|
||
|
}
|
||
|
}
|
||
|
if(theTree == nullptr){
|
||
|
coutERROR("File(s) not found! Exit!");
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
coutInfo("--- TMVAClassificationApp : Using input tree: " + string(theTree->GetName()));
|
||
|
|
||
|
// --- Event loop
|
||
|
|
||
|
// Prepare the event tree
|
||
|
// - here the variable names have to corresponds to your tree
|
||
|
// - you can use the same variables as above which is slightly faster,
|
||
|
// but of course you can use different ones and copy the values inside the event loop
|
||
|
//
|
||
|
coutInfo("--- Select signal sample");
|
||
|
|
||
|
//Cout what variables are used in the TTree
|
||
|
if (Kst2Kspiplus && SplitDDandLL){
|
||
|
coutDebug("List of used branches: ");
|
||
|
std::cout << std::endl;
|
||
|
coutDebug("DD TRACKS: ");
|
||
|
for (vector<string>::iterator tracksIter1 = InputVariablesDD->AllBranches.begin(); tracksIter1 !=InputVariablesDD->AllBranches.end();++tracksIter1)
|
||
|
coutDebug("DD " + (*tracksIter1));
|
||
|
std::cout << std::endl;
|
||
|
coutDebug(" LL TRACKS: ");
|
||
|
for (vector<string>::iterator tracksIter1 = InputVariablesLL->AllBranches.begin(); tracksIter1 !=InputVariablesLL->AllBranches.end();++tracksIter1)
|
||
|
coutDebug("LL " + (*tracksIter1) );
|
||
|
std::cout << std::endl;
|
||
|
}
|
||
|
else{
|
||
|
coutDebug("List of used branches: ");
|
||
|
coutDebug("\t\t Note that Branches != reader!");
|
||
|
coutDebug("\t\t Number of branches: " + to_string(Nbranch));
|
||
|
for (vector<string>::iterator tracksIter1 = InputVariables->AllBranches.begin(); tracksIter1 !=InputVariables->AllBranches.end();++tracksIter1) coutDebug( (*tracksIter1) );
|
||
|
}
|
||
|
|
||
|
Int_t EventIsLL = 0;
|
||
|
Double_t d_var[Nbranch];
|
||
|
Double_t d_varDD[NbranchDD];
|
||
|
Double_t d_varLL[NbranchLL];
|
||
|
|
||
|
//because we have one tree for DD and LL tracks and only one of the two Double_t array items can be addressed to one branch, we copy the doubles afterwards manually.
|
||
|
//the following pair is created from checking the string names of the variables in both track vectors and saves the link (i.e. item 0 in DD copied to item 3 in LL: b_VarLL[3] = b_VarDD[0] )
|
||
|
std::vector< std::pair<int, int> > DoubleUsedVariablesInDDandLL;
|
||
|
DoubleUsedVariablesInDDandLL.clear();
|
||
|
|
||
|
int counter = 0; //Bruteforce, but oh well
|
||
|
|
||
|
//link double variables to branches
|
||
|
if (Kst2Kspiplus && SplitDDandLL){
|
||
|
|
||
|
//DD tracks
|
||
|
counter = 0;
|
||
|
for (vector<string>::iterator tracksIterDD = InputVariablesDD->AllBranches.begin(); tracksIterDD !=InputVariablesDD->AllBranches.end();++tracksIterDD){
|
||
|
theTree->SetBranchAddress((*tracksIterDD).c_str(), &d_varDD[counter]) ;
|
||
|
counter ++;
|
||
|
}
|
||
|
|
||
|
//LL tracks
|
||
|
counter = 0;
|
||
|
for (vector<string>::iterator tracksIterLL = InputVariablesLL->AllBranches.begin(); tracksIterLL !=InputVariablesLL->AllBranches.end();++tracksIterLL){
|
||
|
//Since some variables are used in DD and LL track BDT training, we have to find out which variableID in DD tracks this is, and later copy the value from b_varDD to b_varLL manually
|
||
|
bool VariableAlreadyAssignInDDtracks = false;
|
||
|
int varIDinDDtracks = 0;
|
||
|
|
||
|
for (vector<string>::iterator tracksIterDD = InputVariablesDD->AllBranches.begin(); tracksIterDD !=InputVariablesDD->AllBranches.end();++tracksIterDD){
|
||
|
if(strcmp((*tracksIterDD).c_str(), (*tracksIterLL).c_str()) == 0){
|
||
|
VariableAlreadyAssignInDDtracks = true;
|
||
|
break;
|
||
|
}
|
||
|
varIDinDDtracks++;
|
||
|
}
|
||
|
|
||
|
if(VariableAlreadyAssignInDDtracks){ //when a matching variable is found in DD and LL tracks, the link between DD ID and LL ID is stored in the vector
|
||
|
auto DD2LLlink = std::make_pair(varIDinDDtracks, counter);
|
||
|
DoubleUsedVariablesInDDandLL.push_back(DD2LLlink);
|
||
|
}
|
||
|
else { //if the LL variable is not used for DD BDT training, the double is linked 'normally' to the tree branch
|
||
|
theTree->SetBranchAddress((*tracksIterLL).c_str(), &d_varLL[counter]) ;
|
||
|
}
|
||
|
counter ++;
|
||
|
}
|
||
|
theTree->SetBranchAddress("KshortDecayInVeLo", &EventIsLL);
|
||
|
//finished DD and LL tracks
|
||
|
coutInfo("Found " + to_string(DoubleUsedVariablesInDDandLL.size()) + " variables that are used by DD and LL track BDT training:");
|
||
|
for (vector< std::pair<int, int> >::iterator it = DoubleUsedVariablesInDDandLL.begin(); it !=DoubleUsedVariablesInDDandLL.end();++it){
|
||
|
coutInfo(" LL #" + to_string(it->second) + " <-- DD #" + to_string(it->first) );
|
||
|
}
|
||
|
}
|
||
|
else{
|
||
|
counter = 0;
|
||
|
if(Kst2Kspiplus)theTree->SetBranchAddress("KshortDecayInVeLo", &EventIsLL);
|
||
|
for (vector<string>::iterator tracksIter1 = InputVariables->AllBranches.begin(); tracksIter1 !=InputVariables->AllBranches.end();++tracksIter1){
|
||
|
theTree->SetBranchAddress((*tracksIter1).c_str(), &d_var[counter]) ;
|
||
|
counter ++;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
string outputPath = GetBDToutputFile(year,Run,MC,ReferenceChannel,PHSP,KshortDecayInVelo,UseLowQ2Range,false);
|
||
|
if (IncSample) replace(outputPath,"KplusPi0Resolved","Inclusive");
|
||
|
TFile * output = new TFile (outputPath.c_str(),"RECREATE");
|
||
|
coutInfo("--- New file created! ");
|
||
|
|
||
|
theTree->SetBranchStatus("*",1);
|
||
|
coutInfo("Copy the data Tree... ");
|
||
|
TTree* newTree = theTree->CopyTree("");
|
||
|
coutInfo("Finished!");
|
||
|
|
||
|
coutInfo("Add branch to cloned tree!");
|
||
|
//Add new TBranch into TTree for BDT response value for each event
|
||
|
Double_t BDTresponse;
|
||
|
|
||
|
TBranch* Bra_BDT = newTree->Branch(TMVAmethod+"response", &BDTresponse, TMVAmethod+"response/D");
|
||
|
if(Bra_BDT == NULL){
|
||
|
coutERROR("New branch was not created successfully!");
|
||
|
return 0;
|
||
|
}
|
||
|
//Add new TBranch into TTree for random seperation of the dataset into two subsets:
|
||
|
Int_t RandomSubSet;
|
||
|
TBranch* Bra_RandomSubSet = newTree->Branch("RandomSubSet", &RandomSubSet, "RandomSubSet/I");
|
||
|
TRandom3 * RandomSubSetter = new TRandom3(13);
|
||
|
|
||
|
//get the formulas for each variable (i.e. log(x) or log(1-x) according to the settings in the txt file
|
||
|
std::vector<TFormula>formula;
|
||
|
std::vector<TFormula>formulaDD;
|
||
|
std::vector<TFormula>formulaLL;
|
||
|
|
||
|
if(Kst2Kspiplus && SplitDDandLL){
|
||
|
for (int i = 0; i < NvarDD; i++){
|
||
|
TFormula form = TFormula(Form("formDD_%s", InputVariablesDD->AllBranches[i].c_str()),InputVariablesDD->AllVariables[i].Formula.c_str());
|
||
|
formulaDD.push_back(form);
|
||
|
}
|
||
|
for (int i = 0; i < NvarLL; i++){
|
||
|
TFormula form = TFormula(Form("formLL_%s", InputVariablesLL->AllBranches[i].c_str()),InputVariablesLL->AllVariables[i].Formula.c_str());
|
||
|
formulaLL.push_back(form);
|
||
|
}
|
||
|
}
|
||
|
else{
|
||
|
for (int i = 0; i < Nvar; i++){
|
||
|
TFormula form = TFormula(Form("form_%s", InputVariables->AllBranches[i].c_str()),InputVariables->AllVariables[i].Formula.c_str());
|
||
|
formula.push_back(form);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
coutInfo("--- Processing: " + to_string(theTree->GetEntries()) + " events");
|
||
|
assert(theTree->GetEntries() == newTree->GetEntries());
|
||
|
TStopwatch sw;
|
||
|
sw.Start();
|
||
|
|
||
|
for (Long64_t ievt=0; ievt<theTree->GetEntries();ievt++) {
|
||
|
|
||
|
if (ievt%1000 == 0) coutInfo("--- ... Processing event: " + to_string(ievt));
|
||
|
|
||
|
newTree->GetEntry(ievt);
|
||
|
|
||
|
if (Kst2Kspiplus && SplitDDandLL){
|
||
|
if(!EventIsLL){ //DD tracks
|
||
|
for (int i = 0, k = 0; i < NvarDD; i++){
|
||
|
//formula = new TFormula("form",InputVariablesDD->AllVariables[i].Formula.c_str());
|
||
|
switch(InputVariablesDD->AllVariables[i].NoBr) {
|
||
|
case 1 : f_varDD[i] = (Float_t)formulaDD.at(i).Eval(d_varDD[k]);
|
||
|
k++;
|
||
|
break;
|
||
|
case 2 : f_varDD[i] = (Float_t)formulaDD.at(i).Eval(d_varDD[k],d_varDD[k+1]);
|
||
|
k+=2;
|
||
|
break;
|
||
|
case 3 : f_varDD[i] = (Float_t)formulaDD.at(i).Eval(d_varDD[k],d_varDD[k+1],d_varDD[k+2]);
|
||
|
k+=3;
|
||
|
break;
|
||
|
}
|
||
|
if (TMath::IsNaN(f_varDD[i])){
|
||
|
coutDebug("Formula " + string(formulaDD.at(i).GetTitle()) + "\tVariable " + to_string(i) + " \tvalue:" + to_string(f_varDD[i]) + "\tBranch value " +to_string(d_varDD[k-1]));
|
||
|
f_varDD[i] = 0.0; //quick temporary hack for ln(1-DIRA) problem
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
else{ //LL tracks
|
||
|
//copy the variables, which are used by both DD and LL tracks from the DD tracks array to LL tracks array!
|
||
|
for (vector< std::pair<int, int> >::iterator DD2LLlinkIter = DoubleUsedVariablesInDDandLL.begin(); DD2LLlinkIter != DoubleUsedVariablesInDDandLL.end();++DD2LLlinkIter){
|
||
|
d_varLL[DD2LLlinkIter->second] = d_varDD[DD2LLlinkIter->first];
|
||
|
}
|
||
|
|
||
|
for (int i = 0, k = 0; i < NvarLL; i++){
|
||
|
switch(InputVariablesLL->AllVariables[i].NoBr) {
|
||
|
case 1 : f_varLL[i] = (Float_t)formulaLL.at(i).Eval(d_varLL[k]);
|
||
|
k++;
|
||
|
break;
|
||
|
case 2 : f_varLL[i] = (Float_t)formulaLL.at(i).Eval(d_varLL[k],d_varLL[k+1]);
|
||
|
k+=2;
|
||
|
break;
|
||
|
case 3 : f_varLL[i] = (Float_t)formulaLL.at(i).Eval(d_varLL[k],d_varLL[k+1],d_varLL[k+2]);
|
||
|
k+=3;
|
||
|
break;
|
||
|
}
|
||
|
if (TMath::IsNaN(f_varLL[i])){
|
||
|
cout << "[DEBUG]\t\tFormula "<<formulaLL.at(i).GetTitle() << "\tVariable " << i << " \tvalue:" << f_varLL[i] << "\tBranch value "<<d_varLL[k-1] << endl;
|
||
|
f_varLL[i] = 0.0; //quick temporary hack for ln(1-DIRA) problem
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
else{ // no seperation between DD and LL. ALL pi_zero tuples
|
||
|
for (int i = 0, k = 0; i < Nvar; i++){
|
||
|
switch(InputVariables->AllVariables[i].NoBr) {
|
||
|
case 1 : f_var[i] = (Float_t)formula.at(i).Eval(d_var[k]);
|
||
|
k++;
|
||
|
break;
|
||
|
case 2 : f_var[i] = (Float_t)formula.at(i).Eval(d_var[k],d_var[k+1]);
|
||
|
k+=2;
|
||
|
break;
|
||
|
case 3 : f_var[i] = (Float_t)formula.at(i).Eval(d_var[k],d_var[k+1],d_var[k+2]);
|
||
|
k+=3;
|
||
|
break;
|
||
|
}
|
||
|
if (TMath::IsNaN(f_var[i])){
|
||
|
cout << "[DEBUG]\t\tFormula "<<formula.at(i).GetTitle() << "\tVariable " << i << " \tvalue:" << f_var[i] << "\tBranch value "<<d_var[k-1] << endl;
|
||
|
f_var[i] = -24.0; //quick temporary hack for ln(1-DIRA) problem
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
//evaluate the BDT response for this event with the according reader:
|
||
|
if (Use["BDT"]){
|
||
|
if(Kst2Kspiplus){
|
||
|
if(SplitDDandLL){
|
||
|
if(EventIsLL)BDTresponse = (Double_t)readerLL->EvaluateMVA( "BDT method");
|
||
|
else BDTresponse = (Double_t)readerDD->EvaluateMVA( "BDT method");
|
||
|
}
|
||
|
else BDTresponse = (Double_t)reader->EvaluateMVA( "BDT method");
|
||
|
}
|
||
|
else BDTresponse = (Double_t)reader->EvaluateMVA( "BDT method");
|
||
|
histBdt ->Fill(BDTresponse );
|
||
|
}
|
||
|
else if (Use["BDTD"]){
|
||
|
if(Kst2Kspiplus){
|
||
|
if(SplitDDandLL){
|
||
|
if(EventIsLL)BDTresponse = (Double_t)readerLL->EvaluateMVA( "BDTD method");
|
||
|
else BDTresponse = (Double_t)readerDD->EvaluateMVA( "BDTD method");
|
||
|
}
|
||
|
else BDTresponse = (Double_t)reader->EvaluateMVA( "BDTD method");
|
||
|
}
|
||
|
else BDTresponse = (Double_t)reader->EvaluateMVA( "BDTD method");
|
||
|
histBdtD ->Fill(BDTresponse);
|
||
|
}
|
||
|
else if (Use["BDTG"]) {
|
||
|
if(Kst2Kspiplus){
|
||
|
if(SplitDDandLL){
|
||
|
if(EventIsLL)BDTresponse = (Double_t)readerLL->EvaluateMVA( "BDTG method");
|
||
|
else BDTresponse = (Double_t)readerDD->EvaluateMVA( "BDTG method");
|
||
|
}
|
||
|
else BDTresponse = (Double_t)reader->EvaluateMVA( "BDTG method");
|
||
|
}
|
||
|
else BDTresponse = (Double_t)reader->EvaluateMVA( "BDTG method");
|
||
|
histBdtG ->Fill(BDTresponse);
|
||
|
}
|
||
|
else if (Use["MLP"]) {
|
||
|
if(Kst2Kspiplus){
|
||
|
if(SplitDDandLL){
|
||
|
if(EventIsLL)BDTresponse = (Double_t)readerLL->EvaluateMVA( "MLP method");
|
||
|
else BDTresponse = (Double_t)readerDD->EvaluateMVA( "MLP method");
|
||
|
}
|
||
|
else BDTresponse = (Double_t)reader->EvaluateMVA( "MLP method");
|
||
|
}
|
||
|
else BDTresponse = (Double_t)reader->EvaluateMVA( "MLP method");
|
||
|
histMlp ->Fill(BDTresponse);
|
||
|
}
|
||
|
|
||
|
//separate into two random subsets:
|
||
|
if(RandomSubSetter->Rndm() > 0.5)RandomSubSet = 1;
|
||
|
else RandomSubSet = -1;
|
||
|
|
||
|
//save value to TBranch:
|
||
|
Bra_BDT->Fill();
|
||
|
Bra_RandomSubSet->Fill();
|
||
|
|
||
|
}
|
||
|
|
||
|
// Get elapsed time
|
||
|
sw.Stop();
|
||
|
coutInfo("--- End of event loop: "); sw.Print();
|
||
|
|
||
|
output->cd();
|
||
|
//save updated tree
|
||
|
newTree->Write("",TObject::kWriteDelete);
|
||
|
coutInfo("--- Tree saved! ");
|
||
|
|
||
|
output->Close();
|
||
|
delete readerDD;
|
||
|
delete readerLL;
|
||
|
delete reader;
|
||
|
|
||
|
coutInfo("==> TMVAClassificationApplication is done!");
|
||
|
|
||
|
return 1;
|
||
|
|
||
|
}
|
||
|
|
||
|
Int_t TMVAClassAppData(Int_t Run){
|
||
|
|
||
|
std::vector<string> years = yearsData(Run);
|
||
|
bool UseLowQ2Range = false;
|
||
|
|
||
|
for (auto year: years){
|
||
|
if (!TMVAClassificationApplication_b2kmm(Run,stoi(year),false,false, false, false, UseLowQ2Range)) return 0;
|
||
|
}
|
||
|
if (KshortChannel){
|
||
|
for (auto year: years)
|
||
|
if (!TMVAClassificationApplication_b2kmm(Run,stoi(year),true,false, false, false, UseLowQ2Range)) return 0;
|
||
|
}
|
||
|
return 1;
|
||
|
|
||
|
}
|
||
|
|
||
|
Int_t TMVAClassAppMC(Int_t Run){
|
||
|
|
||
|
std::vector<string> years = yearsMC(false, false,Run);
|
||
|
|
||
|
bool UseLowQ2Range = false;
|
||
|
|
||
|
for (auto year: years){
|
||
|
cout << year << endl;
|
||
|
if (!TMVAClassificationApplication_b2kmm(Run,stoi(year),false,true, false, false, UseLowQ2Range)) return 0;
|
||
|
}
|
||
|
if (KshortChannel){
|
||
|
for (auto year: years)
|
||
|
if (!TMVAClassificationApplication_b2kmm(Run,stoi(year),true,true, false, false, UseLowQ2Range)) return 0;
|
||
|
}
|
||
|
return 1;
|
||
|
|
||
|
}
|
||
|
|
||
|
Int_t TMVAClassAppRefMC(Int_t Run){
|
||
|
|
||
|
std::vector<string> years = yearsMC(true, false,Run);
|
||
|
bool UseLowQ2Range = false;
|
||
|
|
||
|
for (auto year: years){
|
||
|
if (!TMVAClassificationApplication_b2kmm(Run,stoi(year),false,true, true, false, UseLowQ2Range)) return 0;
|
||
|
}
|
||
|
if (KshortChannel){
|
||
|
for (auto year: years)
|
||
|
if (!TMVAClassificationApplication_b2kmm(Run,stoi(year),true,true, true, false, UseLowQ2Range)) return 0;
|
||
|
}
|
||
|
return 1;
|
||
|
|
||
|
}
|
||
|
|
||
|
Int_t TMVAClassAppPHSP(Int_t Run){
|
||
|
|
||
|
std::vector<string> years = yearsMC(false,true,Run);
|
||
|
bool UseLowQ2Range = false;
|
||
|
|
||
|
for (auto year: years){
|
||
|
if (!TMVAClassificationApplication_b2kmm(Run,stoi(year),false,true, false, true, UseLowQ2Range)) return 0;
|
||
|
}
|
||
|
if (KshortChannel){
|
||
|
for (auto year: years)
|
||
|
if (!TMVAClassificationApplication_b2kmm(Run,stoi(year),true,true, false, true, UseLowQ2Range)) return 0;
|
||
|
}
|
||
|
return 1;
|
||
|
|
||
|
}
|
||
|
|
||
|
Int_t TMVAClassAppInc(Int_t Run){
|
||
|
std::vector<string> years = {"2011","2012","2016"};
|
||
|
bool UseLowQ2Range = false;
|
||
|
for (auto year: years){
|
||
|
if (!TMVAClassificationApplication_b2kmm(Run,stoi(year),false,true, false, false, UseLowQ2Range,true)) return 0;
|
||
|
}
|
||
|
return 1;
|
||
|
|
||
|
}
|
||
|
|
||
|
Int_t TMVAClassAppAll(Int_t Run = 1){
|
||
|
|
||
|
if (!TMVAClassAppData(Run)) return 0;
|
||
|
if (!TMVAClassAppMC(Run)) return 0;
|
||
|
if (!TMVAClassAppRefMC(Run)) return 0;
|
||
|
if (!TMVAClassAppPHSP(Run)) return 0;
|
||
|
|
||
|
return 1;
|
||
|
|
||
|
}
|
||
|
|
||
|
|