inclusive_detached_dilepton/bdt_classification.h

269 lines
6.7 KiB
C++

#ifndef BDT_CLASSIFICATION
#define BDT_CLASSIFICATION
#include <string>
#include <iostream>
#include <cmath>
#include <algorithm>
#include <filesystem>
#include <string_view>
#include "RtypesCore.h"
enum TVT
{
Double,
Float,
Int
};
class TV
{
private:
std::string data_name;
std::string mc_name;
std::string train_name;
TVT type;
Double_t mc_double_value;
Float_t mc_float_value;
Double_t data_double_value;
Float_t data_float_value;
TV(std::string data_name, std::string mc_name, std::string train_name, TVT type)
: data_name{data_name}, mc_name{mc_name}, train_name{train_name}, type{type}
{
}
public:
static TV *Float(std::string data_name, std::string mc_name, std::string train_name)
{
return new TV(data_name, mc_name, train_name, TVT::Float);
}
static TV *Float(std::string data_name, std::string mc_name)
{
return new TV(data_name, mc_name, data_name, TVT::Float);
}
static TV *Float(std::string data_name)
{
return new TV(data_name, data_name, data_name, TVT::Float);
}
static TV *Double(std::string data_name, std::string mc_name, std::string train_name)
{
return new TV(data_name, mc_name, train_name, TVT::Double);
}
static TV *Double(std::string data_name, std::string mc_name)
{
return new TV(data_name, mc_name, data_name, TVT::Double);
}
static TV *Double(std::string data_name)
{
return new TV(data_name, data_name, data_name, TVT::Double);
}
const char *GetDataName()
{
return data_name.c_str();
}
const char *GetMCName()
{
return mc_name.c_str();
}
const char *GetTrainName()
{
return train_name.c_str();
}
Double_t *GetMCDoubleRef()
{
return &mc_double_value;
}
Float_t *GetMCFloatRef()
{
return &mc_float_value;
}
Double_t *GetDataDoubleRef()
{
return &data_double_value;
}
Float_t *GetDataFloatRef()
{
return &data_float_value;
}
Double_t GetDataDouble()
{
return data_double_value;
}
Float_t GetDataFloat()
{
return data_float_value;
}
void PrintDataValue(int entry)
{
std::cout << data_name << " (" << entry << "): ";
if (IsDouble())
{
std::cout << data_double_value;
}
else if (IsFloat())
{
std::cout << data_float_value;
}
std::cout << std::endl;
}
void PrintMCValue(int entry)
{
std::cout << mc_name << " (" << entry << "): ";
if (IsDouble())
{
std::cout << mc_double_value;
}
else if (IsFloat())
{
std::cout << mc_float_value;
}
std::cout << std::endl;
}
bool IsDataFinite()
{
if (IsDouble())
{
return std::isfinite(data_double_value);
}
else if (IsFloat())
{
return std::isfinite(data_float_value);
}
return false;
}
bool IsMCFinite()
{
if (IsDouble())
{
return std::isfinite(mc_double_value);
}
else if (IsFloat())
{
return std::isfinite(mc_float_value);
}
return false;
}
bool IsDouble()
{
return type == TVT::Double;
}
bool IsFloat()
{
return type == TVT::Float;
}
};
void ConnectVarsToData(std::vector<TV *> vars, TChain *data_chain, TChain *mc_chain, TTree *sig_tree, TTree *bkg_tree)
{
for (size_t i = 0; i < vars.size(); i++)
{
if (vars[i]->IsDouble())
{
data_chain->SetBranchAddress(vars[i]->GetDataName(), vars[i]->GetDataDoubleRef());
mc_chain->SetBranchAddress(vars[i]->GetMCName(), vars[i]->GetMCDoubleRef());
sig_tree->Branch(vars[i]->GetTrainName(), vars[i]->GetMCDoubleRef(), TString::Format("%s/D", vars[i]->GetTrainName()));
bkg_tree->Branch(vars[i]->GetTrainName(), vars[i]->GetDataDoubleRef(), TString::Format("%s/D", vars[i]->GetTrainName()));
}
else if (vars[i]->IsFloat())
{
data_chain->SetBranchAddress(vars[i]->GetDataName(), vars[i]->GetDataFloatRef());
mc_chain->SetBranchAddress(vars[i]->GetMCName(), vars[i]->GetMCFloatRef());
sig_tree->Branch(vars[i]->GetTrainName(), vars[i]->GetMCFloatRef(), TString::Format("%s/F", vars[i]->GetTrainName()));
bkg_tree->Branch(vars[i]->GetTrainName(), vars[i]->GetDataFloatRef(), TString::Format("%s/F", vars[i]->GetTrainName()));
}
}
}
void TrainBDT(std::vector<TV *> vars, const char* unique_id, TTree *sig_tree, TTree *bkg_tree)
{
TString outfile_name = TString::Format("%s_tmva_out.root", unique_id);
TFile *output_file = TFile::Open(outfile_name, "RECREATE");
TString factory_options("V:Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Auto");
TMVA::Factory *factory = new TMVA::Factory(TString::Format("%s_factory", unique_id), output_file, factory_options);
TMVA::DataLoader *data_loader = new TMVA::DataLoader(TString::Format("%s_dataloader", unique_id));
for (int i = 0; i < vars.size(); i++)
{
std::cout << "@TMVA: Adding Branch: " << vars[i]->GetTrainName() << std::endl;
if (vars[i]->IsDouble())
{
data_loader->AddVariable(vars[i]->GetTrainName(), 'D');
}
else if (vars[i]->IsFloat())
{
data_loader->AddVariable(vars[i]->GetTrainName(), 'F');
}
}
Double_t signal_weight = 1.0, background_weight = 1.0;
data_loader->AddSignalTree(sig_tree, signal_weight);
data_loader->AddBackgroundTree(bkg_tree, background_weight);
data_loader->PrepareTrainingAndTestTree("", "", "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=EqualNumEvents:!V");
factory->BookMethod(data_loader, TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=600:MinNodeSize=2.5%:CreateMVAPdfs:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=20");
factory->TrainAllMethods();
factory->TestAllMethods();
factory->EvaluateAllMethods();
output_file->Close();
}
TMVA::Reader* SetupReader(std::vector<TV *> vars, Float_t* train_vars, const char* unique_id) {
TMVA::Reader *reader = new TMVA::Reader("!Color:!Silent");
for (size_t i = 0; i < vars.size(); i++)
{
reader->AddVariable(vars[i]->GetTrainName(), &train_vars[i]);
}
reader->BookMVA("BDT", TString::Format("./%s_dataloader/weights/%s_factory_BDT.weights.xml", unique_id, unique_id));
return reader;
}
void DrawBDTProbs(TH1D *histogram, const double cut_value, const char *folder)
{
std::filesystem::create_directory(TString::Format("output_files/analysis/%s", folder).Data());
TString name = TString::Format("%s_canvas", histogram->GetName());
TCanvas *c = new TCanvas(name, histogram->GetName(), 0, 0, 800, 600);
histogram->SetStats(0);
histogram->Draw();
TLine* line = new TLine(cut_value, 0, cut_value, histogram->GetMaximum());
line->SetLineColor(kRed);
line->SetLineStyle(kDashed);
line->Draw();
c->Draw();
c->SaveAs(TString::Format("output_files/analysis/%s/%s.pdf", folder, name.Data()).Data());
}
#endif