ROOT Analysis for the Inclusive Detachted Dilepton Trigger Lines
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

561 lines
17 KiB

  1. #include "TH1D.h"
  2. #include "TH2D.h"
  3. #include "THStack.h"
  4. #include "TGraph.h"
  5. #include "TTree.h"
  6. #include "TChain.h"
  7. #include "TFile.h"
  8. #include "TCanvas.h"
  9. #include "TROOT.h"
  10. #include "TStyle.h"
  11. #include "TColor.h"
  12. #include "TLorentzVector.h"
  13. #include "TRandom3.h"
  14. #include "TLorentzVector.h"
  15. #include "TMVA/Factory.h"
  16. #include "TMVA/DataLoader.h"
  17. #include "TMVA/Reader.h"
  18. #include "RooDataHist.h"
  19. #include "RooRealVar.h"
  20. #include "RooPlot.h"
  21. #include "RooGaussian.h"
  22. #include "RooExponential.h"
  23. #include "RooRealConstant.h"
  24. #include "RooAddPdf.h"
  25. #include "RooFitResult.h"
  26. #include "RooProduct.h"
  27. #include "RooCrystalBall.h"
  28. #include "RooBreitWigner.h"
  29. #include <string>
  30. #include <iostream>
  31. #include <cmath>
  32. const bool TRAIN_TMVA = false;
  33. const bool EVALUATE_TMVA = true;
  34. const int N_BINS = 100;
  35. const double B_PLUS_MASS = 5279.;
  36. const double J_PSI_MASS = 3096.;
  37. const Double_t MASS_HIST_MIN = 5100.;
  38. const Double_t MASS_HIST_MAX = 6000.;
  39. RooPlot *CreateRooFit(TH1D *hist);
  40. bool inRange(double value, double center, double low_intvl, double up_intvl) {
  41. return center - low_intvl < value && value < center + up_intvl;
  42. }
  43. bool inRange(double value, double center, double intvl) {
  44. return inRange(value, center, intvl, intvl);
  45. }
  46. class FourVector {
  47. private:
  48. Float_t px;
  49. Float_t py;
  50. Float_t pz;
  51. Float_t energy;
  52. public:
  53. TLorentzVector LV() {
  54. return TLorentzVector(px, py, pz, energy);
  55. }
  56. void Print() {
  57. std::cout << TString::Format("(PX: %f, PY: %f, PZ: %f, E: %f)", px, py, pz, energy) << std::endl;
  58. }
  59. void Connect(TChain* chain, std::string name) {
  60. chain->SetBranchAddress(TString::Format("%s_PX", name.c_str()), &px);
  61. chain->SetBranchAddress(TString::Format("%s_PY", name.c_str()), &py);
  62. chain->SetBranchAddress(TString::Format("%s_PZ", name.c_str()), &pz);
  63. chain->SetBranchAddress(TString::Format("%s_ENERGY", name.c_str()), &energy);
  64. }
  65. };
  66. enum TVT
  67. {
  68. Double,
  69. Float,
  70. Int
  71. };
  72. class TV
  73. {
  74. private:
  75. std::string data_name;
  76. std::string mc_name;
  77. std::string train_name;
  78. TVT type;
  79. Double_t mc_double_value;
  80. Float_t mc_float_value;
  81. Double_t data_double_value;
  82. Float_t data_float_value;
  83. TV(std::string data_name, std::string mc_name, std::string train_name, TVT type)
  84. : data_name{data_name}, mc_name{mc_name}, train_name{train_name}, type{type}
  85. {
  86. }
  87. public:
  88. static TV Float(std::string data_name, std::string mc_name, std::string train_name)
  89. {
  90. return TV(data_name, mc_name, train_name, TVT::Float);
  91. }
  92. static TV Float(std::string data_name, std::string mc_name)
  93. {
  94. return TV(data_name, mc_name, data_name, TVT::Float);
  95. }
  96. static TV Float(std::string data_name)
  97. {
  98. return TV(data_name, data_name, data_name, TVT::Float);
  99. }
  100. static TV Double(std::string data_name, std::string mc_name, std::string train_name)
  101. {
  102. return TV(data_name, mc_name, train_name, TVT::Double);
  103. }
  104. static TV Double(std::string data_name, std::string mc_name)
  105. {
  106. return TV(data_name, mc_name, data_name, TVT::Double);
  107. }
  108. static TV Double(std::string data_name)
  109. {
  110. return TV(data_name, data_name, data_name, TVT::Double);
  111. }
  112. const char *GetDataName()
  113. {
  114. return data_name.c_str();
  115. }
  116. const char *GetMCName()
  117. {
  118. return mc_name.c_str();
  119. }
  120. const char *GetTrainName()
  121. {
  122. return train_name.c_str();
  123. }
  124. Double_t *GetMCDoubleRef()
  125. {
  126. return &mc_double_value;
  127. }
  128. Float_t *GetMCFloatRef()
  129. {
  130. return &mc_float_value;
  131. }
  132. Double_t *GetDataDoubleRef()
  133. {
  134. return &data_double_value;
  135. }
  136. Float_t *GetDataFloatRef()
  137. {
  138. return &data_float_value;
  139. }
  140. Double_t GetDataDouble()
  141. {
  142. return data_double_value;
  143. }
  144. Float_t GetDataFloat()
  145. {
  146. return data_float_value;
  147. }
  148. void PrintDataValue(int entry)
  149. {
  150. std::cout << data_name << " (" << entry << "): ";
  151. if (IsDouble())
  152. {
  153. std::cout << data_double_value;
  154. }
  155. else if (IsFloat())
  156. {
  157. std::cout << data_float_value;
  158. }
  159. std::cout << std::endl;
  160. }
  161. void PrintMCValue(int entry)
  162. {
  163. std::cout << mc_name << " (" << entry << "): ";
  164. if (IsDouble())
  165. {
  166. std::cout << mc_double_value;
  167. }
  168. else if (IsFloat())
  169. {
  170. std::cout << mc_float_value;
  171. }
  172. std::cout << std::endl;
  173. }
  174. bool IsDataFinite()
  175. {
  176. if (IsDouble())
  177. {
  178. return std::isfinite(data_double_value);
  179. }
  180. else if (IsFloat())
  181. {
  182. return std::isfinite(data_float_value);
  183. }
  184. return false;
  185. }
  186. bool IsMCFinite()
  187. {
  188. if (IsDouble())
  189. {
  190. return std::isfinite(mc_double_value);
  191. }
  192. else if (IsFloat())
  193. {
  194. return std::isfinite(mc_float_value);
  195. }
  196. return false;
  197. }
  198. bool IsDouble()
  199. {
  200. return type == TVT::Double;
  201. }
  202. bool IsFloat()
  203. {
  204. return type == TVT::Float;
  205. }
  206. };
  207. int train_bdt()
  208. {
  209. std::cout << TString::Format("Starting Up With TRAIN_TMVA=%d AND EVALUATE_TMVA=%d.", TRAIN_TMVA, EVALUATE_TMVA) << std::endl;
  210. // files to load
  211. std::vector<std::string> data_filenames =
  212. {
  213. "/auto/data/pfeiffer/inclusive_detached_dilepton/data_samples/BuToKpMuMu_Collision23_Beam6800GeV-VeloClosed-MagDown-Excl-UT_RealData_Sprucing23r1_90000000_RD.root"};
  214. TChain *data_chain = new TChain("SpruceRD_BuToKpMuMu/DecayTree");
  215. for (unsigned int i = 0; i < data_filenames.size(); i++)
  216. {
  217. data_chain->Add(data_filenames.at(i).c_str());
  218. }
  219. // files to load
  220. std::vector<std::string> mc_filenames =
  221. {
  222. "/auto/data/pfeiffer/inclusive_detached_dilepton/MC/BuToKpMuMu_rd_btoxll_simulation_12143001_MagDown_v0r0p6316365_FULLSTREAM.root"};
  223. TChain *mc_chain = new TChain("BuToKpMuMu_noPID/DecayTree");
  224. for (unsigned int i = 0; i < mc_filenames.size(); i++)
  225. {
  226. mc_chain->Add(mc_filenames.at(i).c_str());
  227. }
  228. std::vector<TV> vars{
  229. TV::Float("Bplus_PT", "B_PT"),
  230. TV::Float("Bplus_BPVFDCHI2", "B_BPVFDCHI2"),
  231. TV::Float("Bplus_BPVDIRA", "B_BPVDIRA"),
  232. TV::Float("Jpsi_BPVIPCHI2", "Jpsi_BPVIPCHI2"),
  233. // TV::Float("Jpsi_BPVDIRA", "Jpsi_BPVDIRA"),
  234. TV::Float("Jpsi_PT", "Jpsi_PT"),
  235. TV::Float("Kplus_BPVIPCHI2", "K_BPVIPCHI2"),
  236. TV::Float("Kplus_PT", "K_PT"),
  237. // TV::Double("Kplus_PID_K", "K_PID_K"),
  238. TV::Double("Kplus_PROBNN_K", "K_PROBNN_K"),
  239. TV::Float("muplus_BPVIPCHI2", "L1_BPVIPCHI2"),
  240. TV::Float("muminus_BPVIPCHI2", "L2_BPVIPCHI2"),
  241. };
  242. TTree *sig_tree = new TTree("TreeS", "tree containing signal data");
  243. TTree *bkg_tree = new TTree("TreeB", "tree containing background data");
  244. Double_t Bplus_M, B_M, K_PID_K;
  245. data_chain->SetBranchAddress("Bplus_M", &Bplus_M);
  246. mc_chain->SetBranchAddress("B_M", &B_M);
  247. mc_chain->SetBranchAddress("K_PID_K", &K_PID_K);
  248. FourVector muplus_4v, muminus_4v;
  249. muplus_4v.Connect(mc_chain, "L1");
  250. muminus_4v.Connect(mc_chain, "L2");
  251. for (size_t i = 0; i < vars.size(); i++)
  252. {
  253. if (vars[i].IsDouble())
  254. {
  255. data_chain->SetBranchAddress(vars[i].GetDataName(), vars[i].GetDataDoubleRef());
  256. mc_chain->SetBranchAddress(vars[i].GetMCName(), vars[i].GetMCDoubleRef());
  257. sig_tree->Branch(vars[i].GetTrainName(), vars[i].GetMCDoubleRef(), TString::Format("%s/D", vars[i].GetTrainName()));
  258. bkg_tree->Branch(vars[i].GetTrainName(), vars[i].GetDataDoubleRef(), TString::Format("%s/D", vars[i].GetTrainName()));
  259. }
  260. else if (vars[i].IsFloat())
  261. {
  262. data_chain->SetBranchAddress(vars[i].GetDataName(), vars[i].GetDataFloatRef());
  263. mc_chain->SetBranchAddress(vars[i].GetMCName(), vars[i].GetMCFloatRef());
  264. sig_tree->Branch(vars[i].GetTrainName(), vars[i].GetMCFloatRef(), TString::Format("%s/F", vars[i].GetTrainName()));
  265. bkg_tree->Branch(vars[i].GetTrainName(), vars[i].GetDataFloatRef(), TString::Format("%s/F", vars[i].GetTrainName()));
  266. }
  267. }
  268. unsigned int data_entries = data_chain->GetEntries();
  269. unsigned int mc_entries = mc_chain->GetEntries();
  270. if (TRAIN_TMVA)
  271. {
  272. std::cout << "----- Start TMVA Setup -----" << std::endl;
  273. std::cout << "----- Setting Up Signal and Background Tree -----" << std::endl;
  274. unsigned int added_bck_entries = 0;
  275. unsigned int added_sig_entries = 0;
  276. std::cout << "----- Processing Data -----" << std::endl;
  277. for (unsigned int i = 0; i < data_entries; i++)
  278. {
  279. data_chain->GetEntry(i);
  280. bool skip = false;
  281. for (size_t j = 0; j < vars.size(); j++)
  282. {
  283. if (!vars[j].IsDataFinite())
  284. {
  285. vars[j].PrintDataValue(i);
  286. skip = true;
  287. }
  288. }
  289. if (skip)
  290. {
  291. continue;
  292. }
  293. if (Bplus_M > 5500.)
  294. {
  295. bkg_tree->Fill();
  296. added_bck_entries++;
  297. }
  298. }
  299. std::cout << "----- Processing Simulation -----" << std::endl;
  300. for (unsigned int i = 0; i < mc_entries; i++)
  301. {
  302. mc_chain->GetEntry(i);
  303. bool skip = false;
  304. for (size_t j = 0; j < vars.size(); j++)
  305. {
  306. if (!vars[j].IsMCFinite())
  307. {
  308. vars[j].PrintMCValue(i);
  309. skip = true;
  310. }
  311. }
  312. Double_t mumu_inv_mass = (muplus_4v.LV() + muminus_4v.LV()).M();
  313. if (skip || !inRange(B_M, B_PLUS_MASS, 100.) || !inRange(mumu_inv_mass, J_PSI_MASS, 100.) || K_PID_K < 0.)
  314. {
  315. continue;
  316. }
  317. sig_tree->Fill();
  318. added_sig_entries++;
  319. if (added_sig_entries >= added_bck_entries * 2) {
  320. break;
  321. }
  322. }
  323. std::cout << "----- Start TMVA Training -----" << std::endl;
  324. std::cout << TString::Format("With %d Signal and %d Background Events.", added_sig_entries, added_bck_entries) << std::endl;
  325. TString outfile_name("tmva_butokpmumu_out.root");
  326. TFile *output_file = TFile::Open(outfile_name, "RECREATE");
  327. TString factory_options("V:Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Auto");
  328. TMVA::Factory *factory = new TMVA::Factory("tmva_butokpmumu", output_file, factory_options);
  329. TMVA::DataLoader *data_loader = new TMVA::DataLoader("dataloader");
  330. for (int i = 0; i < vars.size(); i++)
  331. {
  332. std::cout << "Adding Branch: " << vars[i].GetTrainName() << std::endl;
  333. if (vars[i].IsDouble())
  334. {
  335. data_loader->AddVariable(vars[i].GetTrainName(), 'D');
  336. }
  337. else if (vars[i].IsFloat())
  338. {
  339. data_loader->AddVariable(vars[i].GetTrainName(), 'F');
  340. }
  341. }
  342. Double_t signal_weight = 1.0, background_weight = 1.0;
  343. data_loader->AddSignalTree(sig_tree, signal_weight);
  344. data_loader->AddBackgroundTree(bkg_tree, background_weight);
  345. data_loader->PrepareTrainingAndTestTree("", "", "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V");
  346. factory->BookMethod(data_loader, TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=600:MinNodeSize=2.5%:CreateMVAPdfs:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=20");
  347. factory->TrainAllMethods();
  348. factory->TestAllMethods();
  349. factory->EvaluateAllMethods();
  350. output_file->Close();
  351. std::cout << "----- Finished TMVA Setup & Training -----" << std::endl;
  352. }
  353. else
  354. {
  355. std::cout << "----- Skipped TMVA Setup & Training -----" << std::endl;
  356. }
  357. if (EVALUATE_TMVA)
  358. {
  359. std::cout << "----- Start TMVA Evaluation of Data -----" << std::endl;
  360. TMVA::Reader *reader = new TMVA::Reader("!Color:!Silent");
  361. Float_t *train_vars = new Float_t[vars.size()];
  362. for (size_t i = 0; i < vars.size(); i++)
  363. {
  364. reader->AddVariable(vars[i].GetTrainName(), &train_vars[i]);
  365. }
  366. reader->BookMVA("BDT", "./dataloader/weights/tmva_butokpmumu_BDT.weights.xml");
  367. TH1D *h1_probs = new TH1D("h1_probs", "BDT Probabilities", N_BINS, -1, 1);
  368. TH1D *h1_Bplus_M = new TH1D("h1_Bplus_M", "B^{+} Mass", N_BINS, MASS_HIST_MIN, MASS_HIST_MAX);
  369. for (unsigned int i = 0; i < data_entries; i++)
  370. {
  371. data_chain->GetEntry(i);
  372. bool skip = false;
  373. for (size_t j = 0; j < vars.size(); j++)
  374. {
  375. if (!vars[j].IsDataFinite())
  376. {
  377. vars[j].PrintDataValue(i);
  378. skip = true;
  379. break;
  380. }
  381. if (vars[j].IsDouble())
  382. {
  383. train_vars[j] = vars[j].GetDataDouble();
  384. }
  385. else if (vars[j].IsFloat())
  386. {
  387. train_vars[j] = vars[j].GetDataFloat();
  388. }
  389. }
  390. if (skip)
  391. {
  392. continue;
  393. }
  394. double mva_response = reader->EvaluateMVA("BDT");
  395. h1_probs->Fill(mva_response);
  396. const double mva_cut_value = 0; // 0.09; // -0.02;
  397. if (mva_response > mva_cut_value)
  398. {
  399. h1_Bplus_M->Fill(Bplus_M);
  400. }
  401. }
  402. std::cout << "----- Finished TMVA Evaluation of Data -----" << std::endl;
  403. TCanvas *c1 = new TCanvas("c1", "c1", 0, 0, 1200, 800);
  404. c1->Divide(2, 1);
  405. c1->cd(1);
  406. h1_probs->Draw();
  407. c1->cd(2);
  408. h1_Bplus_M->Draw();
  409. c1->Draw();
  410. TCanvas *c3 = new TCanvas("c3", "Canvas 3", 0, 0, 1000, 600);
  411. auto fitFrame = CreateRooFit(h1_Bplus_M);
  412. fitFrame->Draw();
  413. c3->Draw();
  414. }
  415. else
  416. {
  417. std::cout << "----- Skipped TMVA Evaluation of Data -----" << std::endl;
  418. }
  419. return 0;
  420. }
  421. RooPlot *CreateRooFit(TH1D *hist)
  422. {
  423. RooRealVar roo_var_mass("roo_var_mass", "B+ Mass Variable", MASS_HIST_MIN, MASS_HIST_MAX);
  424. roo_var_mass.setRange("fitting_range", MASS_HIST_MIN, MASS_HIST_MAX);
  425. roo_var_mass.setRange("plot_range", MASS_HIST_MIN, MASS_HIST_MAX);
  426. TString hist_name = "roohist_bplus_M";
  427. RooDataHist roohist_bplus_M(hist_name, "B Plus Mass Histogram", roo_var_mass, RooFit::Import(*hist));
  428. RooRealVar roo_sig_bw_mean("roo_sig_bw_mean", "Mass BW Mean", 5250., 5100., 5400.);
  429. RooRealVar roo_sig_bw_with("roo_sig_bw_with", "Mass BW Width", 20., 0., 50.);
  430. RooBreitWigner roo_sig_bw("roo_sig_bw", "B+ Signal Breit Wigner", roo_var_mass, roo_sig_bw_mean, roo_sig_bw_with);
  431. RooRealVar roo_bkg_exp_c("roo_bkg_exp_c", "Background C", -0.000693147, -0.002, 0.);
  432. RooExponential roo_bkg_exp("roo_bkg_exp", "B+ Mass Background Exp", roo_var_mass, roo_bkg_exp_c);
  433. RooRealVar roo_var_mass_nsig("roo_var_mass_nsig", "B+ Mass N Signal", 0., hist->GetEntries());
  434. RooRealVar roo_var_mass_nbkg("roo_var_mass_nbkg", "B+ Mass N Background", 0., hist->GetEntries());
  435. TString pdf_name = "roo_pdf_sig_plus_bkg";
  436. RooAddPdf roo_pdf_sig_plus_bkg(pdf_name, "Sig + Bkg PDF",
  437. RooArgList(roo_sig_bw, roo_bkg_exp),
  438. RooArgList(roo_var_mass_nsig, roo_var_mass_nbkg));
  439. RooPlot *roo_frame_mass = roo_var_mass.frame();
  440. roohist_bplus_M.plotOn(roo_frame_mass, RooFit::Binning(N_BINS), RooFit::Name(hist_name), RooFit::MarkerColor(15), RooFit::Range("plot_range"));
  441. RooFitResult *fitres = roo_pdf_sig_plus_bkg.fitTo(roohist_bplus_M, RooFit::Save(), RooFit::PrintLevel(1), RooFit::Range("fitting_range"));
  442. roo_pdf_sig_plus_bkg.plotOn(roo_frame_mass, RooFit::VisualizeError(*fitres, 1), RooFit::Range("plot_range"), RooFit::FillColor(kOrange + 1), RooFit::FillStyle(3144));
  443. roo_pdf_sig_plus_bkg.plotOn(roo_frame_mass, RooFit::LineColor(kRed), RooFit::LineStyle(kSolid), RooFit::Range("plot_range"), RooFit::Name(pdf_name));
  444. roo_pdf_sig_plus_bkg.plotOn(roo_frame_mass, RooFit::Components(RooArgSet(roo_bkg_exp)), RooFit::LineColor(kBlue), RooFit::LineStyle(kDashed), RooFit::Range("plot_range"));
  445. // roo_sig_cb.plotOn(roo_frame_mass, RooFit::LineColor(kAlpine), RooFit::LineStyle(kDashed), RooFit::Range("fitting_range"));
  446. // roo_bkg_exp.plotOn(roo_frame_mass, RooFit::LineColor(kOrange), RooFit::LineStyle(kDashed), RooFit::Range("fitting_range"));
  447. roo_pdf_sig_plus_bkg.paramOn(roo_frame_mass, RooFit::Layout(0.50, 0.99, 0.90));
  448. roo_frame_mass->getAttText()->SetTextSize(0.027);
  449. return roo_frame_mass;
  450. }