EWP-BplusToKstMuMu-AngAna/Code/Selection/ComparisonTool/compareUltimateSource.py

344 lines
15 KiB
Python
Raw Normal View History

from ROOT import gROOT, gDirectory, gStyle, TChain, TTree, TAxis, TH1D, TLegend, TCanvas, TPad, TLine
#TODO check what needs to be loaded where
import subprocess
import time
import numpy as np
import re #for splitting with multiple deliminers
from array import array
import numexpr
from Utils import *
from Plots import *
from LHCbStyle import *
from Globals import *
'''
Tool for comparing two variables from two datasets.
Yes, writting it in Python3 would be smarter,
but default is Python2 on all servers so far
'''
Min = 0.0
Max = 0.0
#TODO stripping comparisons
def compareUltimate( variable1 = "Abs(K_star_plus_PT-K_plus_PT)/10", variable2 = "Sqrt(B_plus_PT)",
year1 = 2011, year2 = 2012, cut1 = "", cut2 = "",
magnet1 = "both", MC1 = False, TM1 = False, ReferenceChannel1 = False, PHSP1 = False, Preselected1 = True, BDTed1 = False,
magnet2 = "both", MC2 = True, TM2 = False, ReferenceChannel2 = False, PHSP2 = False, Preselected2 = True, BDTed2 = False,
sWeighted1 = True, bWeighted1 = False, b2Dweighted1 = False, weightBranch1 = firstMCweight(),
sWeighted2 = False, bWeighted2 = False, b2Dweighted2 = True, weightBranch2 = firstMCweight(),
bPrint = False, KshortDecaysInVelo1 = False, KshortDecaysInVelo2 = False, verbose = True):
#TODO: add run option
#if((plotLog1 and plotLogOneMinus1) or (plotLog2 and plotLogOneMinus2)):
# raise Exception("Only Log( var ) or Log( 1 - var ) option possible, but both flags were set!")
#No need to set it in here, will be taken care of by definning variable
#The only requiremnt here that for cuts, no operations are allowed for now (no sqrt(x) < 0.5 or so)
gROOT.SetBatch(1)
#Set LHCb style for plots
LHCbStyle()
#TODO add check for valid particles
#Force TM for weighted MC
if (b2Dweighted1 or bWeighted1): TM1 = True
if (b2Dweighted2 or bWeighted2): TM2 = True
if (MC1): year1 = checkMCyear(year1,ReferenceChannel1, PHSP1)
if (MC2): year2 = checkMCyear(year2,ReferenceChannel2, PHSP2)
if (magnet1 == "down"):
cut1 = ("Polarity=65535") if (cut1=="") else (cut1 + "&&Polarity=65535") #Somehow, it overflows to 2^16-1
if (magnet1 == "up"):
cut1 = ("Polarity=1") if (cut1=="") else (cut1 + "&&Polarity=1")
if (magnet2 == "down"):
cut2 = ("Polarity=65535") if (cut2=="") else (cut2 + "&&Polarity=65535")#Somehow, it overflows to 2^16-1
if (magnet2 == "up"):
cut2 = ("Polarity=1") if (cut2=="") else (cut2 + "&&Polarity=1")
if (TM1):
cut1 = ("TMedBKGCAT=1") if (cut1=="") else (cut1 + "&& TMedBKGCAT=1")
if (TM2):
cut2 = ("TMedBKGCAT=1") if (cut2=="") else (cut2 + "&& TMedBKGCAT=1")
#raise an exception if MC and data is selected
if ( (not MC1 and (TM1 or ReferenceChannel1 or PHSP1)) or (not MC2 and (TM2 or ReferenceChannel1 or PHSP1)) ) :
raise Exception('Data cannot be TruthMatched nor RefChan nor PHSP! Check your options!')
#Set DTF if required
if (UseDTF()):
variable1 = replace_variables_to_DTF(variable1)
variable2 = replace_variables_to_DTF(variable2)
if (verbose):
print "[DEBUG]\t\tUsing variables ", variable1, variable2
#Get Run
Run1 = 1 if (year1 < 2013) else 2
Run2 = 1 if (year2 < 2013) else 2
#Set up a dictionary with dataset information
optionsDictionary1 = getOptionsDictionary(year1, Run1, magnet1, MC1, TM1, ReferenceChannel1, PHSP1, Preselected1,
BDTed1, sWeighted1, bWeighted1, b2Dweighted1, weightBranch1,KshortDecaysInVelo1, False)
optionsDictionary2 = getOptionsDictionary(year2, Run2, magnet2, MC2, TM2, ReferenceChannel2, PHSP2, Preselected2,
BDTed2, sWeighted2, bWeighted2, b2Dweighted2, weightBranch2,KshortDecaysInVelo2, False)
#Check for year+sample
if (not checkYearSample(optionsDictionary1)[0]):
print checkYearSample(optionsDictionary1)[1]
return 0
if (not checkYearSample(optionsDictionary2)[0]):
print checkYearSample(optionsDictionary2)[1]
return 0
#In case of the need of build, uncomment
#import os
#os.system("g++ getPathForPython.cc `root-config --libs --cflags` -o getPathForPython")
#Read the trees
tree1 = TChain(treeName(MC1,TM1,Preselected1))
tree2 = TChain(treeName(MC2,TM2,Preselected2))
addToTChain(tree1, optionsDictionary1, verbose)
addToTChain(tree2, optionsDictionary2, verbose)
#Load the branches into basic histograms
histName1 = histName(variable1,optionsDictionary1)+"1"
histName2 = histName(variable2,optionsDictionary2)+"2"
drawVar1 = addTMathTags(variable1)
drawVar2 = addTMathTags(variable2)
if verbose:
print "[DEBUG]\t\tVariable names are: ", histName1, histName2
print "[DEBUG]\t\tDrawing variables: ", drawVar1 , drawVar2
#Create default histograms directly from the tree
tree1.Draw("%s >> %s" % (drawVar1,histName1) )
hist1Drawn = gDirectory.Get(histName1)
if verbose: print "[DEBUG]\t\tDrawing into ", hist1Drawn.GetName()
tree2.Draw("%s >> %s" % (drawVar2,histName2) )
hist2Drawn = gDirectory.Get(histName2)
if verbose: print "[DEBUG]\t\tDrawing into ", hist2Drawn.GetName()
#Check if histograms are empty
if (hist1Drawn.GetEntries()==0):
raise Exception('Created histogram1 is empty! Check your cuts and variable!')
if (hist2Drawn.GetEntries()==0):
raise Exception('Created histogram2 is empty! Check your cuts and variable!')
#Check if the variable is integer
isVar1Int = isInt(variable1)
isVar2Int = isInt(variable2)
hist1InfoList = []
hist2InfoList = []
dictVarPlotLimits = dictVarPlotLimitsKshort(KshortDecaysInVelo1 or KshortDecaysInVelo2) if KshortChannel() else dictVarPlotLimitsKplus()
if (isVar1Int):
hist1InfoList = dictVarPlotLimitsInt()[variable1]
else: # Check for non-empty bins at the edges and set xaxis range accordingly
nBins = hist1Drawn.GetXaxis().GetNbins()
firstBin = 0
lastBin = nBins+1
# check if variable needs manually adjusted range
if (variable1 in dictVarPlotLimits.keys()):
if ("Min" in dictVarPlotLimits[variable1].keys()): # If min is defined, adjust
min1 = dictVarPlotLimits[variable1]["Min"]
else:
while (hist1Drawn.GetBinContent(firstBin) == 0): # If not, loop over to get first bin
firstBin = firstBin+1
min1 = hist1Drawn.GetXaxis().GetBinLowEdge(firstBin)
if ("Max" in dictVarPlotLimits[variable1].keys()): # If max is defined, adjust
max1 = dictVarPlotLimits[variable1]["Max"]
else:
while (hist1Drawn.GetBinContent(lastBin) == 0): # If not, loop over to get the last bin
lastBin = lastBin-1
max1 = hist1Drawn.GetXaxis().GetBinUpEdge(lastBin)
else: # if variable doesn't need manual adjustement, loop over to get edge bins
while (hist1Drawn.GetBinContent(firstBin) == 0):
firstBin = firstBin+1
min1 = hist1Drawn.GetXaxis().GetBinLowEdge(firstBin)
while (hist1Drawn.GetBinContent(lastBin) == 0):
lastBin = lastBin-1
max1 = hist1Drawn.GetXaxis().GetBinUpEdge(lastBin)
hist1InfoList = [nBins/2, min1, max1]
if (isVar2Int):
hist2InfoList = dictVarPlotLimitsInt()[variable2]
else: # Check for non-empty bins at the edges and set xaxis range accordingly
nBins = hist2Drawn.GetXaxis().GetNbins()
firstBin = 0
lastBin = nBins+1
# check if variable needs manually adjusted range
if (variable2 in dictVarPlotLimits.keys()):
if ("Min" in dictVarPlotLimits[variable2].keys()): # If min is defined, adjust
min2 = dictVarPlotLimits[variable2]["Min"]
else:
while (hist2Drawn.GetBinContent(firstBin) == 0): # If not, loop over to get first bin
firstBin = firstBin+2
min2 = hist2Drawn.GetXaxis().GetBinLowEdge(firstBin)
if ("Max" in dictVarPlotLimits[variable2].keys()): # If max is defined, adjust
max2 = dictVarPlotLimits[variable2]["Max"]
else:
while (hist2Drawn.GetBinContent(lastBin) == 0): # If not, loop over to get the last bin
lastBin = lastBin-2
max2 = hist2Drawn.GetXaxis().GetBinUpEdge(lastBin)
else: # if variable doesn't need manual adjustement, loop over to get edge bins
while (hist2Drawn.GetBinContent(firstBin) == 0):
firstBin = firstBin+2
min2 = hist2Drawn.GetXaxis().GetBinLowEdge(firstBin)
while (hist2Drawn.GetBinContent(lastBin) == 0):
lastBin = lastBin-1
max2 = hist2Drawn.GetXaxis().GetBinUpEdge(lastBin)
hist2InfoList = [nBins/2, min2, max2]
if verbose:
print "[DEBUG]\t\tVariable %s with %i bins from %f to %f." %(variable1,hist1InfoList[0],hist1InfoList[1],hist1InfoList[2])
print "[DEBUG]\t\tVariable %s with %i bins from %f to %f." % (variable2,hist2InfoList[0],hist2InfoList[1],hist2InfoList[2])
#Set xranges and nBins to be equal for both variables
#nBins are necessary for proper histogram division
histInfoList = [
max(hist1InfoList[0], hist2InfoList[0]),
min(hist1InfoList[1], hist2InfoList[1]),
max(hist1InfoList[2], hist2InfoList[2])
]
#TODO have a function for Xaxis name and for tags
#parse variables and cuts in a way that all needed branches are loaded
variables1List = []
variables1Dict = {}
variables2List = []
variables2Dict = {}
#Create new histograms
hist1 = TH1D("hist1","hist1",histInfoList[0],histInfoList[1],histInfoList[2])
hist2 = TH1D("hist2","hist2",histInfoList[0],histInfoList[1],histInfoList[2])
hist1.Sumw2()
hist2.Sumw2()
#Load variables from only branches actually used
tree1.SetBranchStatus("*",0)
tree2.SetBranchStatus("*",0)
listOfVars1 = getListOfUsedVariables(variable1,cut1)
listOfVars2 = getListOfUsedVariables(variable2,cut2)
#add weight branches
if (sWeighted1): listOfVars1 = np.append(listOfVars1,"N_Bplus_sw")
if (sWeighted2): listOfVars2 = np.append(listOfVars2,"N_Bplus_sw")
if (bWeighted1 and TM1): listOfVars1 = np.append(listOfVars1,"weight_nLongTracks") #TODO: check the branch name
elif (b2Dweighted1 and TM1): listOfVars1 = np.append(listOfVars1,"weight2D_nLongTracks")
if (bWeighted2 and TM2): listOfVars2 = np.append(listOfVars2,"weight_nLongTracks")
elif (b2Dweighted2 and TM2): listOfVars2 = np.append(listOfVars2,"weight2D_nLongTracks")
if verbose:
print "[DEBUG]\t\tActivating branches in tree1: ", listOfVars1
print "[DEBUG]\t\tActivating branches in tree2: ", listOfVars2
#Assign variables with either a double or integer #WeAllHateRoot
for var in listOfVars1:
tree1.SetBranchStatus(var,1)
if(isInt(var)): variables1Dict[var] = array('i',[0])
else: variables1Dict[var] = array('d',[0])
tree1.SetBranchAddress(var,variables1Dict[var])
for var in listOfVars2:
tree2.SetBranchStatus(var,1)
if(isInt(var)): variables2Dict[var] = array('i',[0])
else: variables2Dict[var] = array('d',[0])
tree2.SetBranchAddress(var,variables2Dict[var])
if verbose:
print "[DEBUG]\t\tDefining variables for branches in tree1: ", variables1Dict
print "[DEBUG]\t\tDefining variables for branches in tree2: ", variables2Dict
if verbose:
print "[DEBUG]\t\tUsing cuts1: ", cut1
print "[DEBUG]\t\tUsing cuts2: ", cut2
for evt in xrange(tree1.GetEntries()):
if ( (evt+1) % 10000 < 0.01): print "[INFO]\tReding event ", evt+1, " from tree1"
tree1.GetEntry(evt)
if not evaluateCut(cut1,variables1Dict): continue
hist1.Fill(evaluateVariable(variable1,variables1Dict),evaluateWeight(variables1Dict,optionsDictionary1))
for evt in xrange(tree2.GetEntries()):
if ( (evt+1) % 10000 < 0.01): print "[INFO]\tReding event ", evt+1, " from tree2"
tree2.GetEntry(evt)
if not evaluateCut(cut2,variables2Dict): continue
hist2.Fill(evaluateVariable(variable2,variables2Dict),evaluateWeight(variables2Dict,optionsDictionary2))
#Clone histograms to make a ratio
#TODO check if it's really necessary
hist1Clone = hist1.Clone('hist1Clone')
hist2Clone = hist2.Clone('hist2Clone')
#Normalize histograms
if (hist1Clone.Integral() == 0):
raise Exception('Filled histogram1 is empty! Check your options!')
if (hist2Clone.Integral() == 0):
raise Exception('Filled histogram2 is empty! Check your options!')
hist1Clone.Scale(1.0/hist1Clone.Integral())
hist2Clone.Scale(1.0/hist2Clone.Integral())
histRatio = hist2Clone.Clone('histRatio')
histRatio.Divide(hist1Clone)
histRatio.Draw()
#Plotting
lowerPlotHeight = 0.6;
canvas = TCanvas('canvas','ratio and superposition')
pad1 = TPad("pad1", "ratio",0.0,lowerPlotHeight,1.0,1.0,0);
pad2 = TPad("pad2", "superposition",0.0,0.0,1.0,lowerPlotHeight,0);
pad1.Draw();
pad2.Draw();
xAxisLabel = defineXAxisLabels(variable1,variable2)
makePlot(hist1Clone, hist2Clone, histRatio, pad1, pad2, xAxisLabel, histInfoList[1], histInfoList[2], lowerPlotHeight)
#TODO put this into plotting function, where it god-knows-why doesn't work
pad1.cd()
unityline = TLine(histInfoList[1], 1.0, histInfoList[2], 1.0)
unityline.SetLineStyle(2)
unityline.SetLineColor(4)
unityline.Draw()
#Legend definition and drawing
LegendXposition = 0.4;
LegendYposition = 0.62;
pad2.cd()
leg = TLegend(LegendXposition,LegendYposition,LegendXposition + 0.25, LegendYposition + 0.28);
tag1, tag2 = defineLegendTags(optionsDictionary1,optionsDictionary2)
pad2.cd()
if (len(variable1) > 30):
leg.AddEntry(hist1Clone,"#splitline{%s}{#splitline{%s}{%s}}" % (variable1, tag1, cut1), "lpe");
else:
leg.AddEntry(hist1Clone,"#splitline{%s%s}{%s}" % (variable1, tag1, cut1), "lpe");
if (len(variable2) > 30):
leg.AddEntry(hist2Clone,"#splitline{%s}{#splitline{%s}{%s}}" % (variable2, tag2, cut2), "lpe");
else:
leg.AddEntry(hist2Clone,"#splitline{%s%s}{%s}" % (variable2, tag2, cut2), "lpe");
leg.SetTextSize(0.04/lowerPlotHeight);
leg.Draw()
canvas.Print(getPlotFileName(variable1, optionsDictionary1, cut1, variable2, optionsDictionary2, cut2))
return
if __name__ == '__main__':
compareUltimate()