Update 'Utils3.py'

Renata Kopecná 2022-02-01 17:26:51 +01:00
parent 1a61301eab
commit 93e005aaba

@ -5,152 +5,86 @@ Helper functions originally created for [[the comparison tool|Comparison-tool]].
## Classes ## Classes
### class ShowArgumentsParser(argparse.ArgumentParser): ### class ShowArgumentsParser(argparse.ArgumentParser):
def error(self, message): Class used for pretty parser error printouts
sys.stderr.write('error: %s\n\n' %message)
parser.print_usage(sys.stderr)
sys.stderr.write('\n'+self.description)
sys.exit(2)
## Global variables ## Global variables
intVarsList = ["nTracks"
,"nLongTracks"
,"nVeloTracks"
,"nTTracks"
,"nDownstreamTracks"
,"nUpstreamTracks"
,"nCandidate"
,"totCandidates"
,"nSPDHits"
,"nPV"
,"B_plus_ID"
,"B_plus_TRUEID"
,"K_star_plus_ID"
,"K_plus_ID"
,"pi_zero_resolved_ID"
,"mu_plus_ID"
,"mu_minus_ID"
,"Polarity"
,"nDiMuonMassBin"
,"TMedBKGCAT"
]
expressionList(): ### intVarsList
return ["Sqrt" A list of trree branches that are saved as integers
,"Abs"
,"Sin"
,"Cos"
,"Exp"
,"Log"
]
## Functions ## Functions
### def isInt(variable=""): ### def expressionList()
A function that returns true for variables that are integers * **Return**
* A list of used expressions, used for pasring any input formula
### def treeName(MC=False, TM=False, Preselected=False)
Returns the correct tree name for given sample.
### def isInt()
### def getOptionsDictionary(year, Run, magnet, MC, TM, ReferenceChannel, PHSP, Preselected, BDTed, sWeighted, bWeighted, b2Dweighted, weightBranch, KshortDecaysInVelo, UseLowQ2Range) * **Parameters**
Returns a dicitonary with all possible saved options * variable=""
dictionaryTmp = { * **Return**
'year': year, * True is the variable is stored as an integer, false otherwise
'Run' : Run,
'magnet' : magnet,
'MC' : int(MC),
'TM' : int(TM),
'ReferenceChannel' : int(ReferenceChannel),
'PHSP' : int(PHSP),
'Preselected' : int(Preselected),
'BDTed' : BDTed,
'sWeighted' : sWeighted,
'bWeighted' : bWeighted,
'b2Dweighted' : b2Dweighted,
'weightBranch' : weightBranch,
'KshortDecaysInVelo' : int(KshortDecaysInVelo),
'UseLowQ2Range' : UseLowQ2Range
}
return dictionaryTmp
### def checkYearSample(optionsDict) ### def treeName()
Checks whether the selected data sample defined by optionsDict makes sense. If yes, returns True, otherwise False
* **Parameters**
* MC=False, TM=False, Preselected=False
* **Return**
* The correct tree name for given sample.
### def getOptionsDictionary(y)
* **Parameters**
* year
* Run
* magnet
* MC
* TM
* ReferenceChannel
* PHSP
* Preselected
* BDTed
* sWeighted
* bWeighted
* b2Dweighted
* weightBranch
* KshortDecaysInVelo
* UseLowQ2Range
* **Reurn**
* A dicitonary with all possible saved options in
### def checkYearSample()
* **Parameters**
* optionsDict
* **Return**
* Checks whether the selected data sample defined by optionsDict makes sense. If yes, returns True, otherwise False
### def checkYearSample(year,MC,ReferenceChannel,PHSP,KshortChannel) ### def checkYearSample()
Checks whether the selected data sample defined by the options makes sense. If yes, returns True, otherwise False
* **Parameters**
* year,MC,ReferenceChannel,PHSP,KshortChannel
* **Return**
* Checks whether the selected data sample defined by the options makes sense. If yes, returns True, otherwise False
### def getTreePath(optionsDict, verbose) ### def getTreePath()
Calls [[getPathForPython|getPathForPython]]. * **Parameters**
getPathForPython usage: * optionsDict, verbose
getPathForPython command year Run magnet Preselected MC ReferenceChannel PHSP KshortDecayInVelo Selection UseLowQ2Range * **Return**
It has to have everything to keep it easy * Returns the path to the tree based on the optionsDict. Calls [[getPathForPython|getPathForPython]].
filePath2 = ""
command = "/home/lhcb/kopecna/B2KstarMuMu/code/ewp-Bplus2Kstmumu-AngAna/CodeForTests/CompareUltimate/getPathForPython " #TODO maybe replace by a current path
if(optionsDict['BDTed']): ### def addToTChain()
options = "BDToutput %(year)i %(Run)i %(magnet)s 0 %(MC)r %(ReferenceChannel)r %(PHSP)r %(KshortDecaysInVelo)r 0 %(UseLowQ2Range)r" %optionsDict
if verbose: print ("[DEBUG]\t\tRunning a c++ script to get the name of the file to open: ", command + options) * **Parameters**
#Read second line from the c++ script (first one is the Hello...) * tree, optionsDict, verbose
filePath = subprocess.check_output(command + options, shell=True,universal_newlines=True).split('\n')[1] * **Return**
elif (optionsDict['sWeighted'] or optionsDict['bWeighted'] or optionsDict['b2Dweighted']):
options = "BDTinput %(year)i %(Run)i %(magnet)s 0 %(MC)r %(ReferenceChannel)r %(PHSP)r %(KshortDecaysInVelo)r 0 %(UseLowQ2Range)r" %optionsDict
if verbose: print ("[DEBUG]\t\tRunning a c++ script to get the name of the file to open: ", command + options)
#Read second line from the c++ script (first one is the Hello...)
filePath = subprocess.check_output(command + options, shell=True,universal_newlines=True).split('\n')[1]
else:
if (optionsDict['magnet']=="both"):
#down
options = "input %(year)i %(Run)i down %(Preselected)r %(MC)r %(ReferenceChannel)r %(PHSP)r %(KshortDecaysInVelo)r 0 %(UseLowQ2Range)r" %optionsDict
if verbose: print ("[DEBUG]\t\tRunning a c++ script to get the name of the file to open: ", command + options)
filePath = subprocess.check_output(command + options, shell=True,universal_newlines=True).split('\n')[1]
#up
options = "input %(year)i %(Run)i up %(Preselected)r %(MC)r %(ReferenceChannel)r %(PHSP)r %(KshortDecaysInVelo)r 0 %(UseLowQ2Range)r" %optionsDict
if verbose: print ("[DEBUG]\t\tRunning a c++ script to get the name of the file to open: ", command + options)
filePath2 = subprocess.check_output(command + options, shell=True,universal_newlines=True).split('\n')[1]
else:
options = "input %(year)i %(Run)i %(magnet)s %(Preselected)r %(MC)r %(ReferenceChannel)r %(PHSP)r %(KshortDecaysInVelo)r 0 %(UseLowQ2Range)r" %optionsDict
if verbose: print ("[DEBUG]\t\tRunning a c++ script to get the name of the file to open: ", command + options)
filePath = subprocess.check_output(command + options, shell=True,universal_newlines=True).split('\n')[1]
return filePath, filePath2
def getTreeList(year, Run, MC,TM,Reference,PHSP, verbose): #WIP
#getPathForPython usage:
# getPathForPython command year Run magnet Preselected MC ReferenceChannel PHSP KshortDecayInVelo Selection UseLowQ2Range
#It has to have everything to keep it easy
#Get first file name
filePath2 = ""
command = "/home/lhcb/kopecna/B2KstarMuMu/code/ewp-Bplus2Kstmumu-AngAna/CodeForTests/CompareUltimate/getPathForPython " #TODO maybe replace by a current path
yearArr = yearRunArray(year,Run, MC,Reference,PHSP)
optionsDict = {
'MC' : int(MC),
'TM' : int(TM),
'Ref' : int(Reference),
'PHSP' : int(PHSP),
}
#if (verbose): print(yearArr)
filePath = []
for yr in yearArr.T:
if (verbose): print ("Opening year", yr)
s_yr = str(yr[0])
s_run = str(yr[1])
options = "BDToutput "+s_yr+" " + s_run+ " both 1 %(MC)r %(Ref)r %(PHSP)r 0 0 0" %optionsDict
if verbose: print ("[DEBUG]\t\tRunning a c++ script to get the name of the file to open: ", command + options)
#Read second line from the c++ script (first one is the Hello...)
filePath.append(subprocess.check_output(command + options, shell=True,universal_newlines=True).split('\n')[1])
if (verbose): print ("Files read: ", filePath)
return filePath
### def addToTChain(tree, optionsDict, verbose):
filePath, filePath2 = getTreePath(optionsDict=optionsDict, verbose=verbose) filePath, filePath2 = getTreePath(optionsDict=optionsDict, verbose=verbose)
#Get first file name #Get first file name
@ -168,161 +102,98 @@ def getTreeList(year, Run, MC,TM,Reference,PHSP, verbose): #WIP
tree.Add(filePath) tree.Add(filePath)
### def addTMathTags(varName): ### def addTMathTags()
* **Parameters**
* varName
* **Return**
*
for expr in expressionList(): for expr in expressionList():
varName = varName.replace(expr,"TMath::"+expr) varName = varName.replace(expr,"TMath::"+expr)
return varName return varName
### def list_of_DTF_vars (): ### def list_of_DTF_vars ()
return [
"_PT",
"_PX",
"_PY",
"_PZ",
"_PE",
"_P"
"_ETA" ,
"_MM",
"B_plus_M" ,
"K_star_plus_M" ,
"pi_zero_resolved_plus_M"
]
### def replace_variables_to_DTF(variable = ""): * **Return**
* A list of variables that can have a DTF suffix
### def replace_variables_to_DTF()
Pretty much appends \"_DTF\" to everything
* **Parameters**
* variable = ""
* **Return**
* the updated name of the variable (with \"_DTF\" appended)
#Append "_DTF" to everything in the list above by
for var in list_of_DTF_vars():
if (variable.find(var) != -1):
positions = [i.start() for i in re.finditer(var, variable)]
#print (positions)
for i in range(len(positions)):
pos=positions[i]
variable = variable[:pos+len(var)] + "_DTF" + variable[pos+len(var):]
positions = [x+4 for x in positions]
#Remove false DTF variales, such as MinIP (searching for _M)
removeList = re.findall('DTF[A-Z]', variable)
#print (removeList)
for remove in removeList:
positions = [i.start() for i in re.finditer(remove, variable)]
for i in range(len(positions)):
pos=positions[i]
variable = variable[:pos-1] + variable[pos+3:]
positions = [x-4 for x in positions]
return variable
### def getListOfUsedVariables(variable, cut):
tmp = str(variable) + "|" + str(cut) #| is there as a separator
tmp = tmp.replace("gamma1","gammaa")
tmp = tmp.replace("gamma2","gammab") #Protect from removing numbers
tmp = re.sub(r'[0-9]+', '', tmp) #remove numbers
tmp = tmp.replace("gammaa","gamma1")
tmp = tmp.replace("gammab","gamma2") #Protect from removing numbers
tmp = tmp.replace("CHI","CHI2") #check CHI2 numbers
tmp = tmp.replace("DOCA","DOCA1") #check CHI2 numbers
tmp = tmp.replace("Q","Q2") #check Q2 numbers #TODO
tmp = tmp.replace(".","") #remove decimals left from numbers
tmp = tmp.replace("-","|") #remove minus from negative numbers, keep an operator
tmp = tmp.replace("TMath::","") #remove functions
for expr in expressionList():
tmp = tmp.replace(expr,"") #Remove expressions
tmp = tmp.replace(expr.lower(),"") #remove expressions without capital
tmp = tmp.replace("<=","<")
tmp = tmp.replace(">=",">")
tmp = tmp.replace("==","=")
tmp = tmp.replace("&&","&")
tmp = tmp.replace("||","|")
tmp = tmp.replace("!","")
#tmp = re.sub(r"\s+", "", tmp) #Removes whitespaces, s matches all whitespaces
branchList = re.split('[+ - * / ( ) < > = & | ]', tmp)
branchList = filter(None, branchList) #removes empty strings
branchList = np.unique(branchList) #Removes duplicates
return branchList
### def evaluateCut(cut,variablesDict): ### def getListOfUsedVariables(
#TODO: should be easy to let it be able take operations into account Parses the desired cut on the given variable to be readable by python
if (cut == ""): return True * **Parameters**
tmp = str(cut) * variable, cut
for varName,varValue in variablesDict.items(): * **Return**
tmp = tmp.replace(str(varName),str(varValue[0])) * List of branches that need to be loaded
tmp = tmp.replace ("||"," or ") *
tmp = tmp.replace ("&&"," and ") ### def evaluateCut()
tmp = tmp.replace ("="," == ") * **Parameters**
tmp = tmp.replace ("! =="," != ") #in case * cut,variablesDict
#print ("[DEBUG]\t\t", tmp) * **Return**
return eval(tmp) * parses and evaluates the cut using eval()
### def makeVariablePythonFriendly(variable): ### def makeVariablePythonFriendly()
tmp = variable Removes ROOT tags and lowers all the letters
tmp = tmp.replace("TMath::","") #remove ROOT tags * **Parameters**
tmp = tmp.lower() #replaces e.g. Log by log and so on * variable
return tmp * **Return**
* variable parsed to be readable by python
### def evaluateVariable(variable,variablesDict): ### def evaluateVariable()
* **Parameters**
* variable,variablesDict
* **Return**
*
tmp = variable tmp = variable
for varName,varValue in variablesDict.items(): for varName,varValue in variablesDict.items():
tmp = tmp.replace(str(varName),str(varValue[0])) tmp = tmp.replace(str(varName),str(varValue[0]))
tmp = makeVariablePythonFriendly(tmp) tmp = makeVariablePythonFriendly(tmp)
return numexpr.evaluate(tmp).item() return numexpr.evaluate(tmp).item()
### def evaluateWeight(variablesDict,optionsDict): #TODO ### def evaluateWeight() #TODO
if (optionsDict["sWeighted"]): return variablesDict["N_Bplus_sw"][0] * **Parameters**
elif (optionsDict["bWeighted"]): return variablesDict["weight_nLongTracks"][0] * variablesDict,optionsDict
elif (optionsDict["b2Dweighted"]): return variablesDict["weight2D_nLongTracks"][0] * **Return**
else: return 1.0 * weight for the given optionsDict
### def datasetTag(optionsDict): ### def datasetTag()
tag = "" * **Parameters**
if (optionsDict['MC']): * optionsDict
if (optionsDict['ReferenceChannel']): * **Return**
tag = "RefChannel" * Tag (name) for the desired dataset
elif (optionsDict['PHSP']):
tag = "PHSP"
else:
tag = "MC"
if (optionsDict['TM']):
tag = tag + "_TM"
else:
tag = "data"
if not (optionsDict['Preselected']):
tag = tag + "_Strip"
return tag
### def weightTag(optionsDict): ### def weightTag()
tag = "" * **Parameters**
if (optionsDict['sWeighted']): tag = "_sWeighted" * optionsDict
if (optionsDict['bWeighted']): tag = "_1DWeight" * **Return**
if (optionsDict['b2Dweighted']): tag = "_2DWeight" * weight tag for the given optionsDict
return tag *
### def KshortDecaysInVeloTag()
* **Parameters**
* KshortDecaysInVelo=False
* **Return**
* Empty string if K+pi0, otherwise '\_LL' if KshortDecaysInVelo else '\_DD'
### def KshortDecaysInVeloTag(KshortDecaysInVelo=False): ### def variableTag()
return "" if not KshortChannel() else ("_LL" if KshortDecaysInVelo else "_DD" ) * **Parameters**
* variable=""
### def variableTag(variable=""): * **Return**
name = variable.replace("TMath::","") * parsed and cleanedup variable name for saving the file. It is not perfext but works somewhat
name = name.replace("(","")
name = name.replace(")","")
name = name.replace("|","")
name = name.replace("/","_over_")
name = name.replace("*","_x_")
return name
### def histName(variable, optionsDict): ### def histName()
* **Parameters**
* variable, optionsDict
* **Return**
*
name = variableTag(variable) name = variableTag(variable)
name = name.replace("TMath::","") #remove functions name = name.replace("TMath::","") #remove functions
for expr in (expressionList()): #For whatever reason I have to but the brackets there for expr in (expressionList()): #For whatever reason I have to but the brackets there
@ -333,79 +204,25 @@ def getTreeList(year, Run, MC,TM,Reference,PHSP, verbose): #WIP
return name return name
### def stopWatch(value): ### def stopWatch()
'''From seconds to Days;Hours:Minutes;Seconds'''
valueD = (((value/365)/24)/60) Prints the time passed between `value` and now.
Days = int (valueD) * **Parameters**
* value
valueH = (valueD-Days)*365
Hours = int(valueH)
valueM = (valueH - Hours)*24
Minutes = int(valueM)
valueS = (valueM - Minutes)*60
Seconds = int(valueS)
print (Days,";",Hours,":",Minutes,";",Seconds) ### def getTreeWithPairingBranch()
Adds the branch with the corresponding pairing function to the tree. Pairing function is a unique number calculated from the event and run numbers. This is calculated using the [Cantor pairing function](https://en.wikipedia.org/wiki/Pairing_function#Cantor_pairing_function)
* **Parameters**
* treeFile,tree,run,test
* **Return**
* The new tree file and the new tree
### def checkMCyear()
* **Parameters**
* year, ReferenceChannel, PHSP
* **Return**
### def getTreeWithPairingBranch(treeFile,tree,run,test):
print ("Creating the pairing function...")
#Load needed branches
eventNumber = array('L',[0])
runNumber = array('L',[0])
#Activate and read branches
tree.SetBranchStatus('*',0)
tree.SetBranchStatus('eventNumber',1)
tree.SetBranchStatus('runNumber',1)
tree.SetBranchAddress('eventNumber',eventNumber)
tree.SetBranchAddress('runNumber',runNumber)
#Add new branch
pairingNumber = array('L',[0])
b_pairingNumber = tree.Branch("pairingNumber", pairingNumber, 'pairingNumber/l')
#Loop over events and save
pairingNumber[0] = 10
for evt in range (tree.GetEntries()):
tree.GetEntry(evt) #pi(a,b)= 1/2(a+b)(a+b+1)+b
if (run ==2): rn = runNumber[0] - 100000
else: rn = runNumber[0]
if (eventNumber[0]>6100000000): #ONLY FOR DATA 2017!!!
rn = 1 #Because the high numbers come from only one run
en = eventNumber[0]-6100000000
else:
en = eventNumber[0]
pairingNumber[0] = int((en+rn)*(en+rn+1)/2+rn)
if (test): print (en,rn,pairingNumber[0])
b_pairingNumber.Fill()
#Get tree path
treePath = treeFile.GetPath()
treePath = treePath.replace(".root:/",".root")
if (test): print ("treePath:", treePath)
#Get tree name
treeName = tree.GetName()
if (test): print ("treeName:", treeName)
#Just for safety reasons, close the file and open it again
print ("Writing into file",treePath)
treeFile.Write("",TFile.kOverwrite)
treeFile.Close()
print ("Open file",treePath)
treeFileNew = TFile.Open(treePath,"UPDATE")
treeNew = treeFileNew.Get(treeName)
return treeFileNew,treeNew
### def checkMCyear(year, ReferenceChannel, PHSP):
if ((not KshortChannel) and (not ReferenceChannel) and (not PHSP) and year == 2015): return 2016 if ((not KshortChannel) and (not ReferenceChannel) and (not PHSP) and year == 2015): return 2016
else: return year else: return year