# Renata Kopecna from ROOT import gROOT, gDirectory, gStyle, TChain, TTree, TAxis, TH1D, TFile import subprocess import time import numpy as np import re #for splitting with multiple deliminers from array import array import numexpr import argparse import sys import os import re from Globals import * ''' Tools used in compareUlitmate.py ''' #Define a list of variables that are considered integers intVarsList = ["nTracks" ,"nLongTracks" ,"nVeloTracks" ,"nTTracks" ,"nDownstreamTracks" ,"nUpstreamTracks" ,"nCandidate" ,"totCandidates" ,"nSPDHits" ,"nPV" ,"B_plus_ID" ,"B_plus_TRUEID" ,"K_star_plus_ID" ,"K_plus_ID" ,"pi_zero_resolved_ID" ,"mu_plus_ID" ,"mu_minus_ID" ,"Polarity" ,"nDiMuonMassBin" ,"TMedBKGCAT" ] def expressionList(): return ["Sqrt" ,"Abs" ,"Sin" ,"Cos" ,"Exp" ,"Log" ] #Define a function that returns true for variables that are integers def isInt(variable=""): if variable in intVarsList: return True else: return False def treeName(MC=False, TM=False, Preselected=False): if (TM or (MC and Preselected)): #TODO return "DecayTreeTruthMatched" elif (not Preselected): return "b2KstKs0pimumu_Tuple" if KshortChannel() else "b2KstKpi0mumuResolvedTuple/DecayTree" else: return "DecayTree" def getOptionsDictionary(year, Run, magnet, MC, TM, ReferenceChannel, PHSP, Preselected, BDTed, sWeighted, bWeighted, b2Dweighted, weightBranch, KshortDecaysInVelo, UseLowQ2Range): dictionaryTmp = { 'year': year, 'Run' : Run, 'magnet' : magnet, 'MC' : int(MC), 'TM' : int(TM), 'ReferenceChannel' : int(ReferenceChannel), 'PHSP' : int(PHSP), 'Preselected' : int(Preselected), 'BDTed' : BDTed, 'sWeighted' : sWeighted, 'bWeighted' : bWeighted, 'b2Dweighted' : b2Dweighted, 'weightBranch' : weightBranch, 'KshortDecaysInVelo' : int(KshortDecaysInVelo), 'UseLowQ2Range' : UseLowQ2Range } return dictionaryTmp def checkYearSample(optionsDict): #If wrong year/MC, skip if optionsDict['year'] not in yearList(): return False, "Wrong Year!" if (not optionsDict['MC']): return True, "" else: if (KshortChannel()): if (optionsDict['PHSP']): return True, "" elif (optionsDict['ReferenceChannel']): if (optionsDict['year'] < 2013 or optionsDict['year'] == 2016): return True, "" #Referenche channel available only for Run I else: return False, "RefChannel only available for Run I!" else: return True, "" else: if (optionsDict['PHSP']): return True, "" elif (optionsDict['ReferenceChannel']): if (optionsDict['year'] < 2017): return True, "" #Referenche channel available only for Run I else: return False, "RefChannel only available for < 2017!" else: return True, "" def checkYearSample(year,MC,ReferenceChannel,PHSP,KshortChannel): #If wrong year/MC, skip if year not in yearList(): return False, "Wrong Year!" if (not MC): return True, "" else: if (KshortChannel): if (PHSP): return True, "" elif (ReferenceChannel): if (year < 2013 or year == 2016): return True, "" #Referenche channel available only for Run I else: return False, "RefChannel only available for Run I!" else: return True, "" else: if (PHSP): if (year < 2019): return True, "" else: return False, "" elif (ReferenceChannel): if (year < 2017): return True, "" #Referenche channel available only for Run I else: return False, "RefChannel only available for < 2017!" else: if (year > 2017): return False, "No MC for 2018!" else: return True, "" def getTreePath(optionsDict, verbose): #getPathForPython usage: # getPathForPython command year Run magnet Preselected MC ReferenceChannel PHSP KshortDecayInVelo Selection UseLowQ2Range #It has to have everything to keep it easy #Get first file name filePath2 = "" command = "/home/lhcb/kopecna/B2KstarMuMu_clean/Code/Selection/ComparisonTool/getPathForPython " #TODO maybe replace by a current path if(optionsDict['BDTed']): options = "BDToutput %(year)i %(Run)i %(magnet)s 0 %(MC)r %(ReferenceChannel)r %(PHSP)r %(KshortDecaysInVelo)r 0 %(UseLowQ2Range)r" %optionsDict if verbose: print ("[DEBUG]\t\tRunning a c++ script to get the name of the file to open: ", command + options) #Read second line from the c++ script (first one is the Hello...) filePath = subprocess.check_output(command + options, shell=True,universal_newlines=True).split('\n')[1] elif (optionsDict['sWeighted'] or optionsDict['bWeighted'] or optionsDict['b2Dweighted']): options = "BDTinput %(year)i %(Run)i %(magnet)s 0 %(MC)r %(ReferenceChannel)r %(PHSP)r %(KshortDecaysInVelo)r 0 %(UseLowQ2Range)r" %optionsDict if verbose: print ("[DEBUG]\t\tRunning a c++ script to get the name of the file to open: ", command + options) #Read second line from the c++ script (first one is the Hello...) filePath = subprocess.check_output(command + options, shell=True,universal_newlines=True).split('\n')[1] else: if (optionsDict['magnet']=="both"): #down options = "input %(year)i %(Run)i down %(Preselected)r %(MC)r %(ReferenceChannel)r %(PHSP)r %(KshortDecaysInVelo)r 0 %(UseLowQ2Range)r" %optionsDict if verbose: print ("[DEBUG]\t\tRunning a c++ script to get the name of the file to open: ", command + options) filePath = subprocess.check_output(command + options, shell=True,universal_newlines=True).split('\n')[1] #up options = "input %(year)i %(Run)i up %(Preselected)r %(MC)r %(ReferenceChannel)r %(PHSP)r %(KshortDecaysInVelo)r 0 %(UseLowQ2Range)r" %optionsDict if verbose: print ("[DEBUG]\t\tRunning a c++ script to get the name of the file to open: ", command + options) filePath2 = subprocess.check_output(command + options, shell=True,universal_newlines=True).split('\n')[1] else: options = "input %(year)i %(Run)i %(magnet)s %(Preselected)r %(MC)r %(ReferenceChannel)r %(PHSP)r %(KshortDecaysInVelo)r 0 %(UseLowQ2Range)r" %optionsDict if verbose: print ("[DEBUG]\t\tRunning a c++ script to get the name of the file to open: ", command + options) filePath = subprocess.check_output(command + options, shell=True,universal_newlines=True).split('\n')[1] return filePath, filePath2 def getTreeList(year, Run, MC,TM,Reference,PHSP, verbose): #WIP #getPathForPython usage: # getPathForPython command year Run magnet Preselected MC ReferenceChannel PHSP KshortDecayInVelo Selection UseLowQ2Range #It has to have everything to keep it easy #Get first file name filePath2 = "" command = "/home/lhcb/kopecna/B2KstarMuMu_clean/Code/Selection/ComparisonTool/getPathForPython " #TODO maybe replace by a current path yearArr = yearRunArray(year,Run, MC,Reference,PHSP) optionsDict = { 'MC' : int(MC), 'TM' : int(TM), 'Ref' : int(Reference), 'PHSP' : int(PHSP), } #if (verbose): print(yearArr) filePath = [] for yr in yearArr.T: if (verbose): print ("Opening year", yr) s_yr = str(yr[0]) s_run = str(yr[1]) options = "BDToutput "+s_yr+" " + s_run+ " both 1 %(MC)r %(Ref)r %(PHSP)r 0 0 0" %optionsDict if verbose: print ("[DEBUG]\t\tRunning a c++ script to get the name of the file to open: ", command + options) #Read second line from the c++ script (first one is the Hello...) filePath.append(subprocess.check_output(command + options, shell=True,universal_newlines=True).split('\n')[1]) if (verbose): print ("Files read: ", filePath) return filePath def addToTChain(tree, optionsDict, verbose): filePath, filePath2 = getTreePath(optionsDict=optionsDict, verbose=verbose) #Get first file name if(optionsDict['BDTed'] or optionsDict['sWeighted'] or optionsDict['bWeighted'] or optionsDict['b2Dweighted']): print ("[INFO]\tOpening", filePath) tree.Add(filePath) else: if (optionsDict['magnet']=="both"): print ("[INFO]\tOpening", filePath) tree.Add(filePath) print ("[INFO]\tOpening", filePath2) tree.Add(filePath2) else: print ("[INFO]\tOpening", filePath) tree.Add(filePath) def addTMathTags(varName): for expr in expressionList(): varName = varName.replace(expr,"TMath::"+expr) return varName def list_of_DTF_vars (): return [ "_PT", "_PX", "_PY", "_PZ", "_PE", "_P" "_ETA" , "_MM", "B_plus_M" , "K_star_plus_M" , "pi_zero_resolved_plus_M" ] def replace_variables_to_DTF(variable = ""): #Append "_DTF" to everything in the list above by for var in list_of_DTF_vars(): if (variable.find(var) != -1): positions = [i.start() for i in re.finditer(var, variable)] #print (positions) for i in range(len(positions)): pos=positions[i] variable = variable[:pos+len(var)] + "_DTF" + variable[pos+len(var):] positions = [x+4 for x in positions] #Remove false DTF variales, such as MinIP (searching for _M) removeList = re.findall('DTF[A-Z]', variable) #print (removeList) for remove in removeList: positions = [i.start() for i in re.finditer(remove, variable)] for i in range(len(positions)): pos=positions[i] variable = variable[:pos-1] + variable[pos+3:] positions = [x-4 for x in positions] return variable def getListOfUsedVariables(variable, cut): tmp = str(variable) + "|" + str(cut) #| is there as a separator tmp = tmp.replace("gamma1","gammaa") tmp = tmp.replace("gamma2","gammab") #Protect from removing numbers tmp = re.sub(r'[0-9]+', '', tmp) #remove numbers tmp = tmp.replace("gammaa","gamma1") tmp = tmp.replace("gammab","gamma2") #Protect from removing numbers tmp = tmp.replace("CHI","CHI2") #check CHI2 numbers tmp = tmp.replace("DOCA","DOCA1") #check CHI2 numbers tmp = tmp.replace("Q","Q2") #check Q2 numbers #TODO tmp = tmp.replace(".","") #remove decimals left from numbers tmp = tmp.replace("-","|") #remove minus from negative numbers, keep an operator tmp = tmp.replace("TMath::","") #remove functions for expr in expressionList(): tmp = tmp.replace(expr,"") #Remove expressions tmp = tmp.replace(expr.lower(),"") #remove expressions without capital tmp = tmp.replace("<=","<") tmp = tmp.replace(">=",">") tmp = tmp.replace("==","=") tmp = tmp.replace("&&","&") tmp = tmp.replace("||","|") tmp = tmp.replace("!","") #tmp = re.sub(r"\s+", "", tmp) #Removes whitespaces, s matches all whitespaces branchList = re.split('[+ - * / ( ) < > = & | ]', tmp) branchList = filter(None, branchList) #removes empty strings branchList = np.unique(branchList) #Removes duplicates return branchList def evaluateCut(cut,variablesDict): #TODO: should be easy to let it be able take operations into account if (cut == ""): return True tmp = str(cut) for varName,varValue in variablesDict.items(): tmp = tmp.replace(str(varName),str(varValue[0])) tmp = tmp.replace ("||"," or ") tmp = tmp.replace ("&&"," and ") tmp = tmp.replace ("="," == ") tmp = tmp.replace ("! =="," != ") #in case #print ("[DEBUG]\t\t", tmp) return eval(tmp) def makeVariablePythonFriendly(variable): tmp = variable tmp = tmp.replace("TMath::","") #remove ROOT tags tmp = tmp.lower() #replaces e.g. Log by log and so on return tmp def evaluateVariable(variable,variablesDict): tmp = variable for varName,varValue in variablesDict.items(): tmp = tmp.replace(str(varName),str(varValue[0])) tmp = makeVariablePythonFriendly(tmp) return numexpr.evaluate(tmp).item() def evaluateWeight(variablesDict,optionsDict): #TODO if (optionsDict["sWeighted"]): return variablesDict["N_Bplus_sw"][0] elif (optionsDict["bWeighted"]): return variablesDict["weight_nLongTracks"][0] elif (optionsDict["b2Dweighted"]): return variablesDict["weight2D_nLongTracks"][0] else: return 1.0 def datasetTag(optionsDict): tag = "" if (optionsDict['MC']): if (optionsDict['ReferenceChannel']): tag = "RefChannel" elif (optionsDict['PHSP']): tag = "PHSP" else: tag = "MC" if (optionsDict['TM']): tag = tag + "_TM" else: tag = "data" if not (optionsDict['Preselected']): tag = tag + "_Strip" return tag def weightTag(optionsDict): tag = "" if (optionsDict['sWeighted']): tag = "_sWeighted" if (optionsDict['bWeighted']): tag = "_1DWeight" if (optionsDict['b2Dweighted']): tag = "_2DWeight" return tag def KshortDecaysInVeloTag(KshortDecaysInVelo=False): return "" if not KshortChannel() else ("_LL" if KshortDecaysInVelo else "_DD" ) def variableTag(variable=""): name = variable.replace("TMath::","") name = name.replace("(","") name = name.replace(")","") name = name.replace("|","") name = name.replace("/","_over_") name = name.replace("*","_x_") return name def histName(variable, optionsDict): name = variableTag(variable) name = name.replace("TMath::","") #remove functions for expr in (expressionList()): #For whatever reason I have to but the brackets there name = name.replace(expr.lower(),expr) #Add capitals to functions name = "Hist_" + str(optionsDict['year']) + "_" + datasetTag(optionsDict) name = name + KshortDecaysInVeloTag(optionsDict) + weightTag(optionsDict) return name def stopWatch(value): '''From seconds to Days;Hours:Minutes;Seconds''' valueD = (((value/365)/24)/60) Days = int (valueD) valueH = (valueD-Days)*365 Hours = int(valueH) valueM = (valueH - Hours)*24 Minutes = int(valueM) valueS = (valueM - Minutes)*60 Seconds = int(valueS) print (Days,";",Hours,":",Minutes,";",Seconds) class ShowArgumentsParser(argparse.ArgumentParser): def error(self, message): sys.stderr.write('error: %s\n\n' %message) parser.print_usage(sys.stderr) sys.stderr.write('\n'+self.description) sys.exit(2) def getTreeWithPairingBranch(treeFile,tree,run,test): print ("Creating the pairing function...") #Load needed branches eventNumber = array('L',[0]) runNumber = array('L',[0]) #Activate and read branches tree.SetBranchStatus('*',0) tree.SetBranchStatus('eventNumber',1) tree.SetBranchStatus('runNumber',1) tree.SetBranchAddress('eventNumber',eventNumber) tree.SetBranchAddress('runNumber',runNumber) #Add new branch pairingNumber = array('L',[0]) b_pairingNumber = tree.Branch("pairingNumber", pairingNumber, 'pairingNumber/l') #Loop over events and save pairingNumber[0] = 10 for evt in range (tree.GetEntries()): tree.GetEntry(evt) #pi(a,b)= 1/2(a+b)(a+b+1)+b if (run ==2): rn = runNumber[0] - 100000 else: rn = runNumber[0] if (eventNumber[0]>6100000000): #ONLY FOR DATA 2017!!! rn = 1 #Because the high numbers come from only one run en = eventNumber[0]-6100000000 else: en = eventNumber[0] pairingNumber[0] = int((en+rn)*(en+rn+1)/2+rn) if (test): print (en,rn,pairingNumber[0]) b_pairingNumber.Fill() #Get tree path treePath = treeFile.GetPath() treePath = treePath.replace(".root:/",".root") if (test): print ("treePath:", treePath) #Get tree name treeName = tree.GetName() if (test): print ("treeName:", treeName) #Just for safety reasons, close the file and open it again print ("Writing into file",treePath) treeFile.Write("",TFile.kOverwrite) treeFile.Close() print ("Open file",treePath) treeFileNew = TFile.Open(treePath,"UPDATE") treeNew = treeFileNew.Get(treeName) return treeFileNew,treeNew def checkMCyear(year, ReferenceChannel, PHSP): if ((not KshortChannel) and (not ReferenceChannel) and (not PHSP) and year == 2015): return 2016 else: return year