|
|
//Functions to make pretty plots from ROOT TMVA training output
//Renata Kopecna
#include "GlobalFunctions.hh"
#include "Design.hpp"
#include "Paths.hpp"
#include "MVAclass.hpp"
#include <sys/stat.h> //mkdir
using namespace std;
TFile *GetTMVAfile(int year, int Run, bool SplitYears, bool KShortDecaysInVelo, int nConfiguration, bool UseLowQ2Range, string customTMbranch, bool gammaTM){ TFile * file = new TFile(GetBDTConfigFile(SplitYears,year,Run,KShortDecaysInVelo,nConfiguration,UseLowQ2Range, customTMbranch, gammaTM).c_str(),"OPEN"); if (file == NULL) coutERROR("Cannot read input file " + GetBDTConfigFile(SplitYears,year,Run,KShortDecaysInVelo,nConfiguration,UseLowQ2Range, customTMbranch, gammaTM) + "!"); std::cout << "Reading file '" << GetBDTConfigFile(SplitYears,year,Run,KShortDecaysInVelo,nConfiguration,UseLowQ2Range, customTMbranch, gammaTM) << "'" << std::endl; return file; }
TH2F *GetCorrelationMatrix(TFile *file, string type){ //type = S,B
string path = "CorrelationMatrix" + type; TH2F *h2_corrMatrix = (TH2F*)file->Get(path.c_str()); if (h2_corrMatrix == NULL) coutERROR("Couldn't get correlation matrix for " + type + "!"); return h2_corrMatrix; } TH2F *GetCorrelationMatrixSignal(TFile *file){ return GetCorrelationMatrix(file,"S"); } TH2F *GetCorrelationMatrixBackground(TFile *file){ return GetCorrelationMatrix(file,"B"); }
string oberFolder(string method){ return (method.find("BDT") != std::string::npos ? "BDT" : method); }
TH1F *GetTMVAresponse(TFile *file, string method, string type){ //type = S,B, Train_S, Train_B
string path = "Method_" + oberFolder(method) + "/" + method + "/MVA_" + method + "_" + type; TH1F *h_TMVAresp = (TH1F*)file->Get(path.c_str()); if (h_TMVAresp == NULL) coutERROR("Couldn't get TMVA response for " + method + " and type " + type + "!"); return h_TMVAresp; } TH1F *GetTMVAresponseSignal(TFile *file, string method){ return GetTMVAresponse(file, method,"S"); } TH1F *GetTMVAresponseBackground(TFile *file, string method){ return GetTMVAresponse(file, method,"B"); } TH1F *GetTMVAresponseTrainingSignal(TFile *file, string method){ return GetTMVAresponse(file, method,"Train_S"); } TH1F *GetTMVAresponseTrainingBackground(TFile *file, string method){ return GetTMVAresponse(file, method,"Train_B"); }
TH1F *GetVariableSignal(TFile *file, string variable){ string path = "InputVariables_Id/"+variable+"__Signal_Id"; TH1F *h_varS = (TH1F*)file->Get(path.c_str()); if (h_varS == NULL) coutERROR("Couldn't get " + variable + "_S distribution!"); return h_varS; } TH1F *GetVariableBackground(TFile *file, string variable){ string path = "InputVariables_Id/"+variable+"__Background_Id"; TH1F *h_varB = (TH1F*)file->Get(path.c_str()); if (h_varB == NULL) coutERROR("Couldn't get " + variable + "_B distribution!"); return h_varB; }
TH1F *GetEfficiencySignal(TFile *file, string method){ string path = "Method_" + oberFolder(method) + "/" +method + "/MVA_" + method + "_effS"; TH1F *h_effS = (TH1F*)file->Get(path.c_str()); if (h_effS == NULL) coutERROR("Couldn't get signal efficiency for " + method + "!"); return h_effS; } TH1F *GetEfficiencyBackground(TFile *file, string method){ string path = "Method_" + oberFolder(method) + "/" +method + "/MVA_" + method + "_effB"; TH1F *h_effB = (TH1F*)file->Get(path.c_str()); if (h_effB == NULL) coutERROR("Couldn't get background efficiency for " + method + "!"); return h_effB; }
TH1F *GetROC(TFile *file, string method){ string path = "Method_" + oberFolder(method) + "/" +method + "/MVA_" + method + "_rejBvsS"; TH1F *h_ROC = (TH1F*)file->Get(path.c_str()); if (h_ROC == NULL) coutERROR("Couldn't get ROC curve for " + method + "!"); return h_ROC; }
TTree *GetTrainTree(TFile *file){ TTree *t_train = (TTree*)file->Get("TrainTree"); return t_train; } TTree *GetTestTree(TFile *file){ TTree *t_test = (TTree*)file->Get("TestTree"); return t_test; }
int GetTrainEvents(TFile *file){ TTree *t_train = (TTree*)file->Get("TrainTree"); int n = t_train->GetEntries(); delete t_train; return n; } int GetTrainSignalEvents(TFile *file){ TTree *t_train = (TTree*)file->Get("TrainTree"); int nSig = t_train->Draw("classID","classID==0"); delete t_train; coutDebug("nSig = " + to_string(nSig)); return nSig; } int GetTrainBackgroundEvents(TFile *file){ TTree *t_train = (TTree*)file->Get("TrainTree"); int nBkg = t_train->Draw("classID","classID==1"); delete t_train; coutDebug("nBkg = " + to_string(nBkg)); return nBkg; }
//Make plots from file
bool SaveCorrelationPlot(TFile *file, string type, string savePath){ TH2F *h_corr = GetCorrelationMatrix(file, type); //S or B
if (h_corr == NULL) return 0; DesignCorrelationPlots(h_corr);
TCanvas *c_corr= c_Correlation(type); c_corr->cd();
h_corr->Draw("COLZTEXT"); string path = savePath + "Correlation"+ type +".eps"; c_corr->SaveAs(path.c_str()); replace(path,".eps",".root"); c_corr->SaveAs(path.c_str());
h_corr->Clear(); delete h_corr; c_corr->Clear(); delete c_corr;
return 1;
}
//Load all variable names from MVA reader
vector <string> v_variables(string DL="") { MVA_variables * InputVariables = new MVA_variables(DL); vector <string> v_var; for (auto & var : InputVariables->GetAllReaderNames()){ size_t pos = var.find(":"); if (pos != string::npos){ string tmp = var; tmp.erase(tmp.begin()+pos,tmp.end()); v_var.push_back(tmp);//var.erase(var.begin(),var.end()));
} else v_var.push_back(var); }
return v_var; }
bool SaveVariablesSignalVsBackground(TFile *file, string savePath, string DL=""){ //Check if folder for the plots exists and if not, create one
string folder_path = savePath + "variables/"; struct stat st; if (stat(folder_path.c_str(),&st)!=0 && mkdir(folder_path.c_str(), 0755)==-1){ coutERROR("Folder "+folder_path+" couldn't be created!"); return 0; }
//get vector of variables
vector <string> v_var = v_variables(DL); if (v_var.empty()){ coutERROR("Variable vector is empty!"); return 0; }
//loop over variables
for (auto& var : v_var){ //Create a TCanvas
TCanvas *c_variable = c_VariablesSignalVsBackground(var); c_variable->cd();
//Get signal and background histograms
TH1F *h_variableS = GetVariableSignal(file,var); TH1F *h_variableB = GetVariableBackground(file,var);
//normalize them
h_variableS->Scale(1.0/(h_variableS->GetEntries()*h_variableS->GetXaxis()->GetBinWidth(3))); h_variableB->Scale(1.0/(h_variableB->GetEntries()*h_variableB->GetXaxis()->GetBinWidth(3)));
//Make it pretty
designVariablesSignalVsBackground(h_variableS,h_variableB);
//Plot it
h_variableS->Draw("HIST"); h_variableB->Draw("HIST SAME");
//Save it
string path = folder_path+var+".eps"; c_variable->SaveAs(path.c_str()); replace(path,".eps",".root"); c_variable->SaveAs(path.c_str());
//Delete it (the comments would make a cool DaftPunk song)
h_variableS->Clear(); h_variableB->Clear(); delete h_variableS; delete h_variableB; c_variable->Clear(); delete c_variable;
}
return 1;
}
bool SaveMVAResponse(TFile *file, string savePath, string method){ //Check if folder for the plots exists and if not, create one
//Check for valid method
if (method != "BDT" && method !="BDTG" && method != "MLP"){ coutERROR("Wrong method used in SaveMVAResponse! Choose from [BDT, BDTG, MLP]!"); return 0; }
//Create a TCanvas
string c_name = method + "_response"; TCanvas *c_response = c_VariablesSignalVsBackground(c_name.c_str()); c_response->cd();
//Get signal and background histograms
TH1F *h_responseS = GetTMVAresponseSignal(file,method); TH1F *h_responseB = GetTMVAresponseBackground(file,method);
//Set log scale if not BDT
if (method != "BDT") c_response->SetLogy();
//Make it pretty
designResponseSignalVsBackground(h_responseS,h_responseB, method);
//Plot it
h_responseS->Draw("BAR"); h_responseB->Draw("BAR SAME");
//Add legend
TLegend *leg = new TLegend(0.13,0.9,0.4,0.79); leg->AddEntry(h_responseS, "Signal","f"); leg->AddEntry(h_responseB, "Background","f"); leg->Draw("SAME");
//Save it
string path = savePath+method+"_Response.eps"; c_response->SaveAs(path.c_str()); replace(path,".eps",".root"); c_response->SaveAs(path.c_str());
//Delete it (the comments would make a cool DaftPunk song)
h_responseS->Clear(); h_responseB->Clear(); delete h_responseS; delete h_responseB; c_response->Clear(); delete c_response;
return 1;
}
bool SaveMVAOvertraining(TFile *file, string savePath, string method){ //Check if folder for the plots exists and if not, create one
//Check for valid method
if (method != "BDT" && method !="BDTG" && method != "MLP"){ coutERROR("Wrong method used in SaveMVAOvertraining! Choose from [BDT, BDTG, MLP]!"); return 0; }
//Create a TCanvas
string c_name = method + "_overtrain"; TCanvas *c_overtrain = c_VariablesSignalVsBackground(c_name.c_str()); c_overtrain->cd();
//Get signal and background histograms
TH1F *h_overtrainS = GetTMVAresponseSignal(file,method); TH1F *h_overtrainB = GetTMVAresponseBackground(file,method);
//Get signal and background histograms from training
TH1F *h_overtrainTrainS = GetTMVAresponseTrainingSignal(file,method); TH1F *h_overtrainTrainB = GetTMVAresponseTrainingBackground(file,method);
//Set log scale if not BDT
if (method != "BDT") c_overtrain->SetLogy();
//Make it pretty
designResponseSignalVsBackground(h_overtrainS,h_overtrainB, method); designOvertraining(h_overtrainTrainS,h_overtrainTrainB);
//Plot it
h_overtrainS->Draw("][ HIST"); h_overtrainB->Draw("][ HIST SAME"); h_overtrainTrainS->Draw("PSAME"); h_overtrainTrainB->Draw("PSAME");
//Add legends
TLegend *leg = new TLegend(0.2,0.93,0.55,0.82);; leg->AddEntry(h_overtrainS, "Signal (test)","f"); leg->AddEntry(h_overtrainB, "Background (test)","f"); leg->Draw("SAME");
TLegend *legTrain = new TLegend(0.55,0.93,0.89,0.82); legTrain->AddEntry(h_overtrainTrainS, "Signal (training)","lep"); legTrain->AddEntry(h_overtrainTrainB, "Background (training)","lep"); legTrain->Draw("SAME");
//Save it
string path = savePath+method+"_Overtraining.eps"; c_overtrain->SaveAs(path.c_str()); replace(path,".eps",".root"); c_overtrain->SaveAs(path.c_str());
//Delete it
h_overtrainS->Clear(); h_overtrainB->Clear(); h_overtrainTrainS->Clear(); h_overtrainTrainB->Clear(); delete h_overtrainS; delete h_overtrainB; delete h_overtrainTrainS; delete h_overtrainTrainB; c_overtrain->Clear(); delete c_overtrain;
return 1;
}
bool SaveEfficiency(TFile *file, string savePath, string method, int nSig, int nBkg){
//Create a TCanvas
string c_name = method + "_efficiency"; TCanvas *c_eff = c_Efficiency(c_name.c_str()); c_eff->cd();
//Create two pads (to get significance y-axis on the right)
TPad *pad1 = new TPad("pad1","",0,0,1,1); TPad *pad2 = new TPad("pad2","",0,0,1,1); pad2->SetFillStyle(4000); //will be transparent
pad2->SetFrameFillStyle(0); pad1->SetRightMargin(0.16); pad2->SetRightMargin(0.16); pad1->Draw(); pad1->cd();
//Get signal+background efficiency
TH1F *h_efficiencyS = GetEfficiencySignal(file,method); TH1F *h_efficiencyB = GetEfficiencyBackground(file,method);
//Calculate purity
TH1F *h_purity = (TH1F*)h_efficiencyS->Clone(); h_purity->Scale(nSig); TH1F *h_SplusB = (TH1F*)h_efficiencyS->Clone(); h_SplusB->Scale(nSig); h_SplusB->Add(h_efficiencyB,nBkg); h_purity->Divide(h_SplusB);
//Calculate significance
TH1F *h_significance = (TH1F*)h_efficiencyS->Clone(); h_significance->Scale(nSig); for (int b = 0; b < h_SplusB->GetNbinsX(); b++){ h_SplusB->SetBinContent(b,TMath::Sqrt(h_SplusB->GetBinContent(b))); //TODO check width
} h_significance->Divide(h_SplusB);
//Make it pretty
designEfficiency(h_efficiencyS,h_efficiencyB,h_purity,h_significance, method);
//Add legends
TLegend *leg = new TLegend(0.15,0.93,0.52,0.82); leg->AddEntry(h_efficiencyS, "Signal efficiency","l"); leg->AddEntry(h_efficiencyB, "Background efficiency","l");
TLegend *legSignificance = new TLegend(0.52,0.93,0.8,0.82); legSignificance->AddEntry(h_purity, "Purity","l"); legSignificance->AddEntry(h_significance, "Significance","l");
//Calculate highest significance
double maxSignificance = h_significance->GetMaximum(); double maxSignificanceMLPcut = h_significance->GetBinCenter(h_significance->GetMaximumBin()); double xmin = h_efficiencyS->GetBinLowEdge(1); TPaveText *text = significanceText(xmin,nSig, nBkg, maxSignificance, maxSignificanceMLPcut);
//Plot it
pad1->cd(); pad1->SetGridy(); pad1->SetGridx(); h_efficiencyS->Draw("SAME"); h_efficiencyB->Draw("SAME"); h_purity->Draw("SAME"); leg->Draw("SAME"); text->Draw("SAME"); pad1->Update(); c_eff->cd();
pad2->Draw(); pad2->cd(); h_significance->Draw("Y+"); h_significance->Draw("SAME"); legSignificance->Draw("SAME"); pad2->Update(); c_eff->cd();
//Save it
string path = savePath+method+"_Efficiency.eps"; c_eff->SaveAs(path.c_str()); replace(path,".eps",".root"); c_eff->SaveAs(path.c_str());
//Delete it
h_efficiencyS->Clear(); h_efficiencyB->Clear(); h_purity->Clear(); h_significance->Clear(); delete h_efficiencyS; delete h_efficiencyB; delete h_purity; delete h_significance; c_eff->Clear(); delete c_eff;
return 1;
}
string getROCmethod(TH1F *h_ROC){ string name = h_ROC->GetTitle(); name.erase(0,4); //removes MVA_ from the title
return name;
}
bool SaveROCs(TFile *file, string savePath, vector <string> methods){
//Create a TCanvas
TCanvas *c_ROC = c_ROCplot("ROCs"); c_ROC->cd();
//Get all the histograms
vector <TH1F*> v_h_ROC; for (auto & method:methods){ if (GetROC(file,method)==NULL){ //Check if histogram exists
coutERROR("Method " +method + "is not in the MVA output file!"); return 0; } else v_h_ROC.push_back(GetROC(file,method)); }
//add Legend
TLegend *leg = new TLegend(0.25,0.25,0.5,0.35);
//make it pretty and draw it
for_indexed(auto & h_ROC : v_h_ROC){ leg->AddEntry(h_ROC,getROCmethod(h_ROC).c_str(),"l"); designROC(h_ROC,i); if (i>0) h_ROC->Draw("C SAME"); else h_ROC->Draw("C"); } leg->SetBorderSize(0); leg->SetFillStyle(0); leg->Draw();
//Save it
string path = savePath+"ROCs.eps"; c_ROC->SaveAs(path.c_str()); replace(path,".eps",".root"); c_ROC->SaveAs(path.c_str());
//Delete it
v_h_ROC.clear(); c_ROC->Clear(); delete c_ROC;
return 1;
}
bool SaveMultipleROCS(vector <TFile *> files, string savePath, string method){
//Create a TCanvas
TCanvas *c_ROC = c_ROCplot("ROCs"); c_ROC->cd();
//Get all the histograms
vector <TH1F*> v_h_ROC; for (auto & file:files){ if (GetROC(file,method)==NULL){ //Check if histogram exists
coutERROR("Method " +method + "is not in the MVA output file " + file->GetPath() + "!"); return 0; } else v_h_ROC.push_back(GetROC(file,method)); }
//add Legend
TLegend *leg = new TLegend(0.22,0.22,0.5,0.38);
//make it pretty and draw it
for_indexed(auto & h_ROC : v_h_ROC){ leg->AddEntry(h_ROC,Form("%s_%i",method.c_str(),((int) i)),"l"); designROC(h_ROC,i); if (i>0) h_ROC->Draw("C SAME"); else h_ROC->Draw("C"); } leg->SetBorderSize(0); leg->SetFillStyle(0); leg->Draw();
//Save it
string path = savePath+"multipleROCs.eps"; c_ROC->SaveAs(path.c_str()); replace(path,".eps",".root"); c_ROC->SaveAs(path.c_str());
//Delete it
v_h_ROC.clear(); c_ROC->Clear(); delete c_ROC;
return 1;
}
void nEvents(int year, int Run, bool SplitYears, bool KshortDecaysInVelo,int nConfiguration,bool UseLowQ2Range, int& nSig, int& nBkg){ //I haven't figured out a better way to get the total number of signal+background events
int arr[2]={0,0}; nSig = 0; nBkg = 0; if (KshortChannel){ //TO FILL
if (!SplitYears){ if (Run==1){ if (KshortDecaysInVelo){ //Run1 LL
arr[0] = 97; arr[1] = 451; } else{ //Run1 DD
arr[0] = 158; arr[1] = 2035; } } else if (Run == 2){ if (KshortDecaysInVelo){ //Run2 LL
arr[0] = 291; arr[1] = 1016; } else{ //Run2 DD
arr[0] = 492; arr[1] = 4813; } } else if (Run ==12){ //TO FILL
} else return; } else{ //Split Years
//TO FILL
} } else{ if (!SplitYears){ if (Run ==1) { arr[0] = 67; arr[1] = 14; } else if (Run == 2){ arr[0] = 297; arr[1] = 203; } else if (Run ==12){ //TO FILL
} else return; } else{ //Split Years
//TO FILL
} } if(arr[0]==0 && arr[1]==0) coutWarning("No event numbers have been assigned! Please fill in the hardcoded values in Test.cpp"); else cout << "[INFO]\t\t nSig=" << arr[0] << " nBkg=" << arr[1] << endl; nSig = arr[0]; nBkg = arr[1]; return; }
bool SaveAllFromOneFile(int year, int Run, bool SplitYears, bool KshortDecaysInVelo,int nConfiguration, bool UseLowQ2Range, string customTMbranch, bool gammaTM){ vector <string> methods; if (KshortChannel) methods = {"BDT","BDTG"}; else methods = {"BDT","BDTG","MLP"};
TFile *file= GetTMVAfile(year, Run, SplitYears, KshortDecaysInVelo, nConfiguration, UseLowQ2Range, customTMbranch, gammaTM); if (file == NULL) return 0;
//Check if folder for the plots exists and if not, create one
string folder_path = GetTMVAplotsFolder(year, Run, SplitYears, KshortDecaysInVelo,nConfiguration,UseLowQ2Range, customTMbranch, gammaTM); //First, try through Unix command (easier than trying to make a workaround in c++)
string command = "mkdir -p " + folder_path; system(command.c_str()); //Now check if it has been created indeed
struct stat st; if (stat(folder_path.c_str(),&st)!=0){ coutERROR("Folder "+folder_path+" couldn't be created!"); return 0; }
//Save correlations plots
if (!SaveCorrelationPlot(file,"S",folder_path)) return 0; if (!SaveCorrelationPlot(file,"B",folder_path)) return 0;
//Save all variables plots
SaveVariablesSignalVsBackground(file, folder_path, (KshortChannel ? (KshortDecaysInVelo ? "LL" : "DD") : ""));
int nSig = 0; int nBkg = 0;
nEvents(year, Run, SplitYears,KshortDecaysInVelo,nConfiguration,UseLowQ2Range, nSig, nBkg);
for (auto &method : methods){ //Save overtraining
if (!SaveMVAOvertraining(file,folder_path,method)) return 0; //Save efficiency
if (!SaveEfficiency(file,folder_path,method,nSig, nBkg)) return 0; }
//Save ROCs
if (!SaveROCs(file, folder_path,methods)) return 0;
return 1; }
bool SaveMultipleROCS(int Run, int nLow, int nHigh){
vector <TFile*> testFiles;
for (int f = nLow; f < nHigh; f++){ testFiles.push_back(GetTMVAfile(2011,Run,false,false,f,false,"",true)); } string path = GetTMVAplotsFolder(2011,Run,false,false,0,false); replace(path,"Config0/",""); coutDebug(path); return (SaveMultipleROCS(testFiles,path,"MLP"));
}
|