//Functions to make pretty plots from ROOT TMVA training output //Renata Kopecna #include "GlobalFunctions.hh" #include "Design.hpp" #include "Paths.hpp" #include "MVAclass.hpp" #include //mkdir using namespace std; TFile *GetTMVAfile(int year, int Run, bool SplitYears, bool KShortDecaysInVelo, int nConfiguration, bool UseLowQ2Range, string customTMbranch, bool gammaTM){ TFile * file = new TFile(GetBDTConfigFile(SplitYears,year,Run,KShortDecaysInVelo,nConfiguration,UseLowQ2Range, customTMbranch, gammaTM).c_str(),"OPEN"); if (file == NULL) coutERROR("Cannot read input file " + GetBDTConfigFile(SplitYears,year,Run,KShortDecaysInVelo,nConfiguration,UseLowQ2Range, customTMbranch, gammaTM) + "!"); std::cout << "Reading file '" << GetBDTConfigFile(SplitYears,year,Run,KShortDecaysInVelo,nConfiguration,UseLowQ2Range, customTMbranch, gammaTM) << "'" << std::endl; return file; } TH2F *GetCorrelationMatrix(TFile *file, string type){ //type = S,B string path = "CorrelationMatrix" + type; TH2F *h2_corrMatrix = (TH2F*)file->Get(path.c_str()); if (h2_corrMatrix == NULL) coutERROR("Couldn't get correlation matrix for " + type + "!"); return h2_corrMatrix; } TH2F *GetCorrelationMatrixSignal(TFile *file){ return GetCorrelationMatrix(file,"S"); } TH2F *GetCorrelationMatrixBackground(TFile *file){ return GetCorrelationMatrix(file,"B"); } string oberFolder(string method){ return (method.find("BDT") != std::string::npos ? "BDT" : method); } TH1F *GetTMVAresponse(TFile *file, string method, string type){ //type = S,B, Train_S, Train_B string path = "Method_" + oberFolder(method) + "/" + method + "/MVA_" + method + "_" + type; TH1F *h_TMVAresp = (TH1F*)file->Get(path.c_str()); if (h_TMVAresp == NULL) coutERROR("Couldn't get TMVA response for " + method + " and type " + type + "!"); return h_TMVAresp; } TH1F *GetTMVAresponseSignal(TFile *file, string method){ return GetTMVAresponse(file, method,"S"); } TH1F *GetTMVAresponseBackground(TFile *file, string method){ return GetTMVAresponse(file, method,"B"); } TH1F *GetTMVAresponseTrainingSignal(TFile *file, string method){ return GetTMVAresponse(file, method,"Train_S"); } TH1F *GetTMVAresponseTrainingBackground(TFile *file, string method){ return GetTMVAresponse(file, method,"Train_B"); } TH1F *GetVariableSignal(TFile *file, string variable){ string path = "InputVariables_Id/"+variable+"__Signal_Id"; TH1F *h_varS = (TH1F*)file->Get(path.c_str()); if (h_varS == NULL) coutERROR("Couldn't get " + variable + "_S distribution!"); return h_varS; } TH1F *GetVariableBackground(TFile *file, string variable){ string path = "InputVariables_Id/"+variable+"__Background_Id"; TH1F *h_varB = (TH1F*)file->Get(path.c_str()); if (h_varB == NULL) coutERROR("Couldn't get " + variable + "_B distribution!"); return h_varB; } TH1F *GetEfficiencySignal(TFile *file, string method){ string path = "Method_" + oberFolder(method) + "/" +method + "/MVA_" + method + "_effS"; TH1F *h_effS = (TH1F*)file->Get(path.c_str()); if (h_effS == NULL) coutERROR("Couldn't get signal efficiency for " + method + "!"); return h_effS; } TH1F *GetEfficiencyBackground(TFile *file, string method){ string path = "Method_" + oberFolder(method) + "/" +method + "/MVA_" + method + "_effB"; TH1F *h_effB = (TH1F*)file->Get(path.c_str()); if (h_effB == NULL) coutERROR("Couldn't get background efficiency for " + method + "!"); return h_effB; } TH1F *GetROC(TFile *file, string method){ string path = "Method_" + oberFolder(method) + "/" +method + "/MVA_" + method + "_rejBvsS"; TH1F *h_ROC = (TH1F*)file->Get(path.c_str()); if (h_ROC == NULL) coutERROR("Couldn't get ROC curve for " + method + "!"); return h_ROC; } TTree *GetTrainTree(TFile *file){ TTree *t_train = (TTree*)file->Get("TrainTree"); return t_train; } TTree *GetTestTree(TFile *file){ TTree *t_test = (TTree*)file->Get("TestTree"); return t_test; } int GetTrainEvents(TFile *file){ TTree *t_train = (TTree*)file->Get("TrainTree"); int n = t_train->GetEntries(); delete t_train; return n; } int GetTrainSignalEvents(TFile *file){ TTree *t_train = (TTree*)file->Get("TrainTree"); int nSig = t_train->Draw("classID","classID==0"); delete t_train; coutDebug("nSig = " + to_string(nSig)); return nSig; } int GetTrainBackgroundEvents(TFile *file){ TTree *t_train = (TTree*)file->Get("TrainTree"); int nBkg = t_train->Draw("classID","classID==1"); delete t_train; coutDebug("nBkg = " + to_string(nBkg)); return nBkg; } //Make plots from file bool SaveCorrelationPlot(TFile *file, string type, string savePath){ TH2F *h_corr = GetCorrelationMatrix(file, type); //S or B if (h_corr == NULL) return 0; DesignCorrelationPlots(h_corr); TCanvas *c_corr= c_Correlation(type); c_corr->cd(); h_corr->Draw("COLZTEXT"); string path = savePath + "Correlation"+ type +".eps"; c_corr->SaveAs(path.c_str()); replace(path,".eps",".root"); c_corr->SaveAs(path.c_str()); h_corr->Clear(); delete h_corr; c_corr->Clear(); delete c_corr; return 1; } //Load all variable names from MVA reader vector v_variables(string DL="") { MVA_variables * InputVariables = new MVA_variables(DL); vector v_var; for (auto & var : InputVariables->GetAllReaderNames()){ size_t pos = var.find(":"); if (pos != string::npos){ string tmp = var; tmp.erase(tmp.begin()+pos,tmp.end()); v_var.push_back(tmp);//var.erase(var.begin(),var.end())); } else v_var.push_back(var); } return v_var; } bool SaveVariablesSignalVsBackground(TFile *file, string savePath, string DL=""){ //Check if folder for the plots exists and if not, create one string folder_path = savePath + "variables/"; struct stat st; if (stat(folder_path.c_str(),&st)!=0 && mkdir(folder_path.c_str(), 0755)==-1){ coutERROR("Folder "+folder_path+" couldn't be created!"); return 0; } //get vector of variables vector v_var = v_variables(DL); if (v_var.empty()){ coutERROR("Variable vector is empty!"); return 0; } //loop over variables for (auto& var : v_var){ //Create a TCanvas TCanvas *c_variable = c_VariablesSignalVsBackground(var); c_variable->cd(); //Get signal and background histograms TH1F *h_variableS = GetVariableSignal(file,var); TH1F *h_variableB = GetVariableBackground(file,var); //normalize them h_variableS->Scale(1.0/(h_variableS->GetEntries()*h_variableS->GetXaxis()->GetBinWidth(3))); h_variableB->Scale(1.0/(h_variableB->GetEntries()*h_variableB->GetXaxis()->GetBinWidth(3))); //Make it pretty designVariablesSignalVsBackground(h_variableS,h_variableB); //Plot it h_variableS->Draw("HIST"); h_variableB->Draw("HIST SAME"); //Save it string path = folder_path+var+".eps"; c_variable->SaveAs(path.c_str()); replace(path,".eps",".root"); c_variable->SaveAs(path.c_str()); //Delete it (the comments would make a cool DaftPunk song) h_variableS->Clear(); h_variableB->Clear(); delete h_variableS; delete h_variableB; c_variable->Clear(); delete c_variable; } return 1; } bool SaveMVAResponse(TFile *file, string savePath, string method){ //Check if folder for the plots exists and if not, create one //Check for valid method if (method != "BDT" && method !="BDTG" && method != "MLP"){ coutERROR("Wrong method used in SaveMVAResponse! Choose from [BDT, BDTG, MLP]!"); return 0; } //Create a TCanvas string c_name = method + "_response"; TCanvas *c_response = c_VariablesSignalVsBackground(c_name.c_str()); c_response->cd(); //Get signal and background histograms TH1F *h_responseS = GetTMVAresponseSignal(file,method); TH1F *h_responseB = GetTMVAresponseBackground(file,method); //Set log scale if not BDT if (method != "BDT") c_response->SetLogy(); //Make it pretty designResponseSignalVsBackground(h_responseS,h_responseB, method); //Plot it h_responseS->Draw("BAR"); h_responseB->Draw("BAR SAME"); //Add legend TLegend *leg = new TLegend(0.13,0.9,0.4,0.79); leg->AddEntry(h_responseS, "Signal","f"); leg->AddEntry(h_responseB, "Background","f"); leg->Draw("SAME"); //Save it string path = savePath+method+"_Response.eps"; c_response->SaveAs(path.c_str()); replace(path,".eps",".root"); c_response->SaveAs(path.c_str()); //Delete it (the comments would make a cool DaftPunk song) h_responseS->Clear(); h_responseB->Clear(); delete h_responseS; delete h_responseB; c_response->Clear(); delete c_response; return 1; } bool SaveMVAOvertraining(TFile *file, string savePath, string method){ //Check if folder for the plots exists and if not, create one //Check for valid method if (method != "BDT" && method !="BDTG" && method != "MLP"){ coutERROR("Wrong method used in SaveMVAOvertraining! Choose from [BDT, BDTG, MLP]!"); return 0; } //Create a TCanvas string c_name = method + "_overtrain"; TCanvas *c_overtrain = c_VariablesSignalVsBackground(c_name.c_str()); c_overtrain->cd(); //Get signal and background histograms TH1F *h_overtrainS = GetTMVAresponseSignal(file,method); TH1F *h_overtrainB = GetTMVAresponseBackground(file,method); //Get signal and background histograms from training TH1F *h_overtrainTrainS = GetTMVAresponseTrainingSignal(file,method); TH1F *h_overtrainTrainB = GetTMVAresponseTrainingBackground(file,method); //Set log scale if not BDT if (method != "BDT") c_overtrain->SetLogy(); //Make it pretty designResponseSignalVsBackground(h_overtrainS,h_overtrainB, method); designOvertraining(h_overtrainTrainS,h_overtrainTrainB); //Plot it h_overtrainS->Draw("][ HIST"); h_overtrainB->Draw("][ HIST SAME"); h_overtrainTrainS->Draw("PSAME"); h_overtrainTrainB->Draw("PSAME"); //Add legends TLegend *leg = new TLegend(0.2,0.93,0.55,0.82);; leg->AddEntry(h_overtrainS, "Signal (test)","f"); leg->AddEntry(h_overtrainB, "Background (test)","f"); leg->Draw("SAME"); TLegend *legTrain = new TLegend(0.55,0.93,0.89,0.82); legTrain->AddEntry(h_overtrainTrainS, "Signal (training)","lep"); legTrain->AddEntry(h_overtrainTrainB, "Background (training)","lep"); legTrain->Draw("SAME"); //Save it string path = savePath+method+"_Overtraining.eps"; c_overtrain->SaveAs(path.c_str()); replace(path,".eps",".root"); c_overtrain->SaveAs(path.c_str()); //Delete it h_overtrainS->Clear(); h_overtrainB->Clear(); h_overtrainTrainS->Clear(); h_overtrainTrainB->Clear(); delete h_overtrainS; delete h_overtrainB; delete h_overtrainTrainS; delete h_overtrainTrainB; c_overtrain->Clear(); delete c_overtrain; return 1; } bool SaveEfficiency(TFile *file, string savePath, string method, int nSig, int nBkg){ //Create a TCanvas string c_name = method + "_efficiency"; TCanvas *c_eff = c_Efficiency(c_name.c_str()); c_eff->cd(); //Create two pads (to get significance y-axis on the right) TPad *pad1 = new TPad("pad1","",0,0,1,1); TPad *pad2 = new TPad("pad2","",0,0,1,1); pad2->SetFillStyle(4000); //will be transparent pad2->SetFrameFillStyle(0); pad1->SetRightMargin(0.16); pad2->SetRightMargin(0.16); pad1->Draw(); pad1->cd(); //Get signal+background efficiency TH1F *h_efficiencyS = GetEfficiencySignal(file,method); TH1F *h_efficiencyB = GetEfficiencyBackground(file,method); //Calculate purity TH1F *h_purity = (TH1F*)h_efficiencyS->Clone(); h_purity->Scale(nSig); TH1F *h_SplusB = (TH1F*)h_efficiencyS->Clone(); h_SplusB->Scale(nSig); h_SplusB->Add(h_efficiencyB,nBkg); h_purity->Divide(h_SplusB); //Calculate significance TH1F *h_significance = (TH1F*)h_efficiencyS->Clone(); h_significance->Scale(nSig); for (int b = 0; b < h_SplusB->GetNbinsX(); b++){ h_SplusB->SetBinContent(b,TMath::Sqrt(h_SplusB->GetBinContent(b))); //TODO check width } h_significance->Divide(h_SplusB); //Make it pretty designEfficiency(h_efficiencyS,h_efficiencyB,h_purity,h_significance, method); //Add legends TLegend *leg = new TLegend(0.15,0.93,0.52,0.82); leg->AddEntry(h_efficiencyS, "Signal efficiency","l"); leg->AddEntry(h_efficiencyB, "Background efficiency","l"); TLegend *legSignificance = new TLegend(0.52,0.93,0.8,0.82); legSignificance->AddEntry(h_purity, "Purity","l"); legSignificance->AddEntry(h_significance, "Significance","l"); //Calculate highest significance double maxSignificance = h_significance->GetMaximum(); double maxSignificanceMLPcut = h_significance->GetBinCenter(h_significance->GetMaximumBin()); double xmin = h_efficiencyS->GetBinLowEdge(1); TPaveText *text = significanceText(xmin,nSig, nBkg, maxSignificance, maxSignificanceMLPcut); //Plot it pad1->cd(); pad1->SetGridy(); pad1->SetGridx(); h_efficiencyS->Draw("SAME"); h_efficiencyB->Draw("SAME"); h_purity->Draw("SAME"); leg->Draw("SAME"); text->Draw("SAME"); pad1->Update(); c_eff->cd(); pad2->Draw(); pad2->cd(); h_significance->Draw("Y+"); h_significance->Draw("SAME"); legSignificance->Draw("SAME"); pad2->Update(); c_eff->cd(); //Save it string path = savePath+method+"_Efficiency.eps"; c_eff->SaveAs(path.c_str()); replace(path,".eps",".root"); c_eff->SaveAs(path.c_str()); //Delete it h_efficiencyS->Clear(); h_efficiencyB->Clear(); h_purity->Clear(); h_significance->Clear(); delete h_efficiencyS; delete h_efficiencyB; delete h_purity; delete h_significance; c_eff->Clear(); delete c_eff; return 1; } string getROCmethod(TH1F *h_ROC){ string name = h_ROC->GetTitle(); name.erase(0,4); //removes MVA_ from the title return name; } bool SaveROCs(TFile *file, string savePath, vector methods){ //Create a TCanvas TCanvas *c_ROC = c_ROCplot("ROCs"); c_ROC->cd(); //Get all the histograms vector v_h_ROC; for (auto & method:methods){ if (GetROC(file,method)==NULL){ //Check if histogram exists coutERROR("Method " +method + "is not in the MVA output file!"); return 0; } else v_h_ROC.push_back(GetROC(file,method)); } //add Legend TLegend *leg = new TLegend(0.25,0.25,0.5,0.35); //make it pretty and draw it for_indexed(auto & h_ROC : v_h_ROC){ leg->AddEntry(h_ROC,getROCmethod(h_ROC).c_str(),"l"); designROC(h_ROC,i); if (i>0) h_ROC->Draw("C SAME"); else h_ROC->Draw("C"); } leg->SetBorderSize(0); leg->SetFillStyle(0); leg->Draw(); //Save it string path = savePath+"ROCs.eps"; c_ROC->SaveAs(path.c_str()); replace(path,".eps",".root"); c_ROC->SaveAs(path.c_str()); //Delete it v_h_ROC.clear(); c_ROC->Clear(); delete c_ROC; return 1; } bool SaveMultipleROCS(vector files, string savePath, string method){ //Create a TCanvas TCanvas *c_ROC = c_ROCplot("ROCs"); c_ROC->cd(); //Get all the histograms vector v_h_ROC; for (auto & file:files){ if (GetROC(file,method)==NULL){ //Check if histogram exists coutERROR("Method " +method + "is not in the MVA output file " + file->GetPath() + "!"); return 0; } else v_h_ROC.push_back(GetROC(file,method)); } //add Legend TLegend *leg = new TLegend(0.22,0.22,0.5,0.38); //make it pretty and draw it for_indexed(auto & h_ROC : v_h_ROC){ leg->AddEntry(h_ROC,Form("%s_%i",method.c_str(),((int) i)),"l"); designROC(h_ROC,i); if (i>0) h_ROC->Draw("C SAME"); else h_ROC->Draw("C"); } leg->SetBorderSize(0); leg->SetFillStyle(0); leg->Draw(); //Save it string path = savePath+"multipleROCs.eps"; c_ROC->SaveAs(path.c_str()); replace(path,".eps",".root"); c_ROC->SaveAs(path.c_str()); //Delete it v_h_ROC.clear(); c_ROC->Clear(); delete c_ROC; return 1; } void nEvents(int year, int Run, bool SplitYears, bool KshortDecaysInVelo,int nConfiguration,bool UseLowQ2Range, int& nSig, int& nBkg){ //I haven't figured out a better way to get the total number of signal+background events int arr[2]={0,0}; nSig = 0; nBkg = 0; if (KshortChannel){ //TO FILL if (!SplitYears){ if (Run==1){ if (KshortDecaysInVelo){ //Run1 LL arr[0] = 97; arr[1] = 451; } else{ //Run1 DD arr[0] = 158; arr[1] = 2035; } } else if (Run == 2){ if (KshortDecaysInVelo){ //Run2 LL arr[0] = 291; arr[1] = 1016; } else{ //Run2 DD arr[0] = 492; arr[1] = 4813; } } else if (Run ==12){ //TO FILL } else return; } else{ //Split Years //TO FILL } } else{ if (!SplitYears){ if (Run ==1) { arr[0] = 67; arr[1] = 14; } else if (Run == 2){ arr[0] = 297; arr[1] = 203; } else if (Run ==12){ //TO FILL } else return; } else{ //Split Years //TO FILL } } if(arr[0]==0 && arr[1]==0) coutWarning("No event numbers have been assigned! Please fill in the hardcoded values in Test.cpp"); else cout << "[INFO]\t\t nSig=" << arr[0] << " nBkg=" << arr[1] << endl; nSig = arr[0]; nBkg = arr[1]; return; } bool SaveAllFromOneFile(int year, int Run, bool SplitYears, bool KshortDecaysInVelo,int nConfiguration, bool UseLowQ2Range, string customTMbranch, bool gammaTM){ vector methods; if (KshortChannel) methods = {"BDT","BDTG"}; else methods = {"BDT","BDTG","MLP"}; TFile *file= GetTMVAfile(year, Run, SplitYears, KshortDecaysInVelo, nConfiguration, UseLowQ2Range, customTMbranch, gammaTM); if (file == NULL) return 0; //Check if folder for the plots exists and if not, create one string folder_path = GetTMVAplotsFolder(year, Run, SplitYears, KshortDecaysInVelo,nConfiguration,UseLowQ2Range, customTMbranch, gammaTM); //First, try through Unix command (easier than trying to make a workaround in c++) string command = "mkdir -p " + folder_path; system(command.c_str()); //Now check if it has been created indeed struct stat st; if (stat(folder_path.c_str(),&st)!=0){ coutERROR("Folder "+folder_path+" couldn't be created!"); return 0; } //Save correlations plots if (!SaveCorrelationPlot(file,"S",folder_path)) return 0; if (!SaveCorrelationPlot(file,"B",folder_path)) return 0; //Save all variables plots SaveVariablesSignalVsBackground(file, folder_path, (KshortChannel ? (KshortDecaysInVelo ? "LL" : "DD") : "")); int nSig = 0; int nBkg = 0; nEvents(year, Run, SplitYears,KshortDecaysInVelo,nConfiguration,UseLowQ2Range, nSig, nBkg); for (auto &method : methods){ //Save overtraining if (!SaveMVAOvertraining(file,folder_path,method)) return 0; //Save efficiency if (!SaveEfficiency(file,folder_path,method,nSig, nBkg)) return 0; } //Save ROCs if (!SaveROCs(file, folder_path,methods)) return 0; return 1; } bool SaveMultipleROCS(int Run, int nLow, int nHigh){ vector testFiles; for (int f = nLow; f < nHigh; f++){ testFiles.push_back(GetTMVAfile(2011,Run,false,false,f,false,"",true)); } string path = GetTMVAplotsFolder(2011,Run,false,false,0,false); replace(path,"Config0/",""); coutDebug(path); return (SaveMultipleROCS(testFiles,path,"MLP")); }