269 lines
6.7 KiB
C
269 lines
6.7 KiB
C
|
#ifndef BDT_CLASSIFICATION
|
||
|
#define BDT_CLASSIFICATION
|
||
|
|
||
|
#include <string>
|
||
|
#include <iostream>
|
||
|
#include <cmath>
|
||
|
#include <algorithm>
|
||
|
#include <filesystem>
|
||
|
#include <string_view>
|
||
|
|
||
|
#include "RtypesCore.h"
|
||
|
|
||
|
enum TVT
|
||
|
{
|
||
|
Double,
|
||
|
Float,
|
||
|
Int
|
||
|
};
|
||
|
|
||
|
class TV
|
||
|
{
|
||
|
private:
|
||
|
std::string data_name;
|
||
|
std::string mc_name;
|
||
|
std::string train_name;
|
||
|
TVT type;
|
||
|
Double_t mc_double_value;
|
||
|
Float_t mc_float_value;
|
||
|
Double_t data_double_value;
|
||
|
Float_t data_float_value;
|
||
|
|
||
|
TV(std::string data_name, std::string mc_name, std::string train_name, TVT type)
|
||
|
: data_name{data_name}, mc_name{mc_name}, train_name{train_name}, type{type}
|
||
|
{
|
||
|
}
|
||
|
|
||
|
public:
|
||
|
static TV *Float(std::string data_name, std::string mc_name, std::string train_name)
|
||
|
{
|
||
|
return new TV(data_name, mc_name, train_name, TVT::Float);
|
||
|
}
|
||
|
|
||
|
static TV *Float(std::string data_name, std::string mc_name)
|
||
|
{
|
||
|
return new TV(data_name, mc_name, data_name, TVT::Float);
|
||
|
}
|
||
|
|
||
|
static TV *Float(std::string data_name)
|
||
|
{
|
||
|
return new TV(data_name, data_name, data_name, TVT::Float);
|
||
|
}
|
||
|
|
||
|
static TV *Double(std::string data_name, std::string mc_name, std::string train_name)
|
||
|
{
|
||
|
return new TV(data_name, mc_name, train_name, TVT::Double);
|
||
|
}
|
||
|
|
||
|
static TV *Double(std::string data_name, std::string mc_name)
|
||
|
{
|
||
|
return new TV(data_name, mc_name, data_name, TVT::Double);
|
||
|
}
|
||
|
|
||
|
static TV *Double(std::string data_name)
|
||
|
{
|
||
|
return new TV(data_name, data_name, data_name, TVT::Double);
|
||
|
}
|
||
|
|
||
|
const char *GetDataName()
|
||
|
{
|
||
|
return data_name.c_str();
|
||
|
}
|
||
|
|
||
|
const char *GetMCName()
|
||
|
{
|
||
|
return mc_name.c_str();
|
||
|
}
|
||
|
|
||
|
const char *GetTrainName()
|
||
|
{
|
||
|
return train_name.c_str();
|
||
|
}
|
||
|
|
||
|
Double_t *GetMCDoubleRef()
|
||
|
{
|
||
|
return &mc_double_value;
|
||
|
}
|
||
|
|
||
|
Float_t *GetMCFloatRef()
|
||
|
{
|
||
|
return &mc_float_value;
|
||
|
}
|
||
|
|
||
|
Double_t *GetDataDoubleRef()
|
||
|
{
|
||
|
return &data_double_value;
|
||
|
}
|
||
|
|
||
|
Float_t *GetDataFloatRef()
|
||
|
{
|
||
|
return &data_float_value;
|
||
|
}
|
||
|
|
||
|
Double_t GetDataDouble()
|
||
|
{
|
||
|
return data_double_value;
|
||
|
}
|
||
|
|
||
|
Float_t GetDataFloat()
|
||
|
{
|
||
|
return data_float_value;
|
||
|
}
|
||
|
|
||
|
void PrintDataValue(int entry)
|
||
|
{
|
||
|
std::cout << data_name << " (" << entry << "): ";
|
||
|
if (IsDouble())
|
||
|
{
|
||
|
std::cout << data_double_value;
|
||
|
}
|
||
|
else if (IsFloat())
|
||
|
{
|
||
|
std::cout << data_float_value;
|
||
|
}
|
||
|
|
||
|
std::cout << std::endl;
|
||
|
}
|
||
|
|
||
|
void PrintMCValue(int entry)
|
||
|
{
|
||
|
std::cout << mc_name << " (" << entry << "): ";
|
||
|
if (IsDouble())
|
||
|
{
|
||
|
std::cout << mc_double_value;
|
||
|
}
|
||
|
else if (IsFloat())
|
||
|
{
|
||
|
std::cout << mc_float_value;
|
||
|
}
|
||
|
|
||
|
std::cout << std::endl;
|
||
|
}
|
||
|
|
||
|
bool IsDataFinite()
|
||
|
{
|
||
|
if (IsDouble())
|
||
|
{
|
||
|
return std::isfinite(data_double_value);
|
||
|
}
|
||
|
else if (IsFloat())
|
||
|
{
|
||
|
return std::isfinite(data_float_value);
|
||
|
}
|
||
|
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
bool IsMCFinite()
|
||
|
{
|
||
|
if (IsDouble())
|
||
|
{
|
||
|
return std::isfinite(mc_double_value);
|
||
|
}
|
||
|
else if (IsFloat())
|
||
|
{
|
||
|
return std::isfinite(mc_float_value);
|
||
|
}
|
||
|
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
bool IsDouble()
|
||
|
{
|
||
|
return type == TVT::Double;
|
||
|
}
|
||
|
|
||
|
bool IsFloat()
|
||
|
{
|
||
|
return type == TVT::Float;
|
||
|
}
|
||
|
};
|
||
|
|
||
|
void ConnectVarsToData(std::vector<TV *> vars, TChain *data_chain, TChain *mc_chain, TTree *sig_tree, TTree *bkg_tree)
|
||
|
{
|
||
|
for (size_t i = 0; i < vars.size(); i++)
|
||
|
{
|
||
|
if (vars[i]->IsDouble())
|
||
|
{
|
||
|
data_chain->SetBranchAddress(vars[i]->GetDataName(), vars[i]->GetDataDoubleRef());
|
||
|
mc_chain->SetBranchAddress(vars[i]->GetMCName(), vars[i]->GetMCDoubleRef());
|
||
|
sig_tree->Branch(vars[i]->GetTrainName(), vars[i]->GetMCDoubleRef(), TString::Format("%s/D", vars[i]->GetTrainName()));
|
||
|
bkg_tree->Branch(vars[i]->GetTrainName(), vars[i]->GetDataDoubleRef(), TString::Format("%s/D", vars[i]->GetTrainName()));
|
||
|
}
|
||
|
else if (vars[i]->IsFloat())
|
||
|
{
|
||
|
data_chain->SetBranchAddress(vars[i]->GetDataName(), vars[i]->GetDataFloatRef());
|
||
|
mc_chain->SetBranchAddress(vars[i]->GetMCName(), vars[i]->GetMCFloatRef());
|
||
|
sig_tree->Branch(vars[i]->GetTrainName(), vars[i]->GetMCFloatRef(), TString::Format("%s/F", vars[i]->GetTrainName()));
|
||
|
bkg_tree->Branch(vars[i]->GetTrainName(), vars[i]->GetDataFloatRef(), TString::Format("%s/F", vars[i]->GetTrainName()));
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void TrainBDT(std::vector<TV *> vars, const char* unique_id, TTree *sig_tree, TTree *bkg_tree)
|
||
|
{
|
||
|
TString outfile_name = TString::Format("%s_out.root", unique_id);
|
||
|
TFile *output_file = TFile::Open(outfile_name, "RECREATE");
|
||
|
|
||
|
TString factory_options("V:Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Auto");
|
||
|
|
||
|
TMVA::Factory *factory = new TMVA::Factory(TString::Format("%s_factory", unique_id), output_file, factory_options);
|
||
|
TMVA::DataLoader *data_loader = new TMVA::DataLoader(TString::Format("%s_dataloader", unique_id));
|
||
|
|
||
|
for (int i = 0; i < vars.size(); i++)
|
||
|
{
|
||
|
std::cout << "@TMVA: Adding Branch: " << vars[i]->GetTrainName() << std::endl;
|
||
|
if (vars[i]->IsDouble())
|
||
|
{
|
||
|
data_loader->AddVariable(vars[i]->GetTrainName(), 'D');
|
||
|
}
|
||
|
else if (vars[i]->IsFloat())
|
||
|
{
|
||
|
data_loader->AddVariable(vars[i]->GetTrainName(), 'F');
|
||
|
}
|
||
|
}
|
||
|
|
||
|
Double_t signal_weight = 1.0, background_weight = 1.0;
|
||
|
|
||
|
data_loader->AddSignalTree(sig_tree, signal_weight);
|
||
|
data_loader->AddBackgroundTree(bkg_tree, background_weight);
|
||
|
data_loader->PrepareTrainingAndTestTree("", "", "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=EqualNumEvents:!V");
|
||
|
|
||
|
factory->BookMethod(data_loader, TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=600:MinNodeSize=2.5%:CreateMVAPdfs:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=20");
|
||
|
|
||
|
factory->TrainAllMethods();
|
||
|
factory->TestAllMethods();
|
||
|
factory->EvaluateAllMethods();
|
||
|
|
||
|
output_file->Close();
|
||
|
}
|
||
|
|
||
|
TMVA::Reader* SetupReader(std::vector<TV *> vars, Float_t* train_vars, const char* unique_id) {
|
||
|
TMVA::Reader *reader = new TMVA::Reader("!Color:!Silent");
|
||
|
|
||
|
for (size_t i = 0; i < vars.size(); i++)
|
||
|
{
|
||
|
reader->AddVariable(vars[i]->GetTrainName(), &train_vars[i]);
|
||
|
}
|
||
|
|
||
|
reader->BookMVA("BDT", TString::Format("./%s_dataloader/weights/%s_factory_BDT.weights.xml", unique_id, unique_id));
|
||
|
|
||
|
return reader;
|
||
|
}
|
||
|
|
||
|
void DrawBDTProbs(TH1D *histogram, const double cut_value, const char *folder)
|
||
|
{
|
||
|
std::filesystem::create_directory(TString::Format("output_files/analysis/%s", folder).Data());
|
||
|
TString name = TString::Format("%s_canvas", histogram->GetName());
|
||
|
TCanvas *c = new TCanvas(name, histogram->GetName(), 0, 0, 800, 600);
|
||
|
histogram->SetStats(0);
|
||
|
histogram->Draw();
|
||
|
TLine* line = new TLine(cut_value, 0, cut_value, histogram->GetMaximum());
|
||
|
line->SetLineColor(kRed);
|
||
|
line->SetLineStyle(kDashed);
|
||
|
line->Draw();
|
||
|
c->Draw();
|
||
|
c->SaveAs(TString::Format("output_files/analysis/%s/%s.pdf", folder, name.Data()).Data());
|
||
|
}
|
||
|
|
||
|
#endif
|