Compare commits

..

15 Commits

Author SHA1 Message Date
c562013e27 Enhanced README 2022-09-15 12:15:29 +02:00
0f8ca4f87d Enhanced README 2022-09-15 12:13:30 +02:00
3905f6bb57 Enhanced README 2022-09-15 12:10:52 +02:00
13f509fa1d Added b2kstll custom fixes 2022-09-15 12:06:35 +02:00
696e5503e2 Updated README 2022-09-02 12:20:02 +02:00
e626160426 Adjusted setup in readme 2022-08-30 11:45:57 +02:00
03066c590e Exchanged requirements type 2022-08-30 11:40:31 +02:00
dc867874cd Skimmed requirements further 2022-08-24 15:54:11 +02:00
2c90c74eac Included Python Version 2022-08-24 15:50:24 +02:00
37f3c39657 Skimmed requirements.txt 2022-08-24 15:07:50 +02:00
a473f9ccef Updated properties env file 2022-08-24 12:25:16 +02:00
84cc89889c Updated README 2022-08-24 12:21:41 +02:00
a850513fd5 Updated README 2022-08-24 12:17:55 +02:00
eb400f021a README File added 2022-08-24 12:05:31 +02:00
0bc6296fb7 Python Scripts used for Angular Analysis for B+toK*+Pi0mumu 2022-08-24 11:23:16 +02:00
28 changed files with 13292 additions and 0 deletions

View File

@ -0,0 +1,79 @@
import os
import dotenv
import argparse
import sys
import pandas as pd
import zfit
from prettytable import PrettyTable
dotenv.load_dotenv('../properties.env')
sys.path.insert(0, os.getenv('SYS_PATH'))
from b2kstll.models.angular import B2Kstll
from b2kstll.plot import plot_distributions
from hep_analytics.processing.extract import FileManager
from hep_analytics.processing.transform import select_feature
FILE = os.getenv('FILE_GEN')
def main():
parser = argparse.ArgumentParser()
parser.add_argument('--q2bin', dest ='q2bin', default = 0)
args = parser.parse_args()
Q2BIN = int(args.q2bin)
bin_ranges = [(0.25, 4.00), (4.00, 8.00), (11.00, 12.50), (15.00, 18.00), (1.10, 6.00), (1.1, 2.5), (2.5, 4.0), (4.0, 6.0), (6.0, 8.0)]
print(f"Selected Q2 Bin Range is {bin_ranges[Q2BIN]}")
file_manager = FileManager(file = FILE, tree = "Events",
branches = ["q2", "costhetak", "costhetal", "phi"])
data = file_manager.extract_data()
q2, costhetak, costhetal, phi = data[0], data[1], data[2], data[3]
q2, indices = select_feature(feature = q2, limits = bin_ranges[Q2BIN])
costhetak = costhetak[indices]
costhetal = costhetal[indices]
phi = phi[indices]
lower_costhetak_cut = float(os.getenv('LOWER_COSTHETAK_CUT'))
upper_costhetak_cut = float(os.getenv('UPPER_COSTHETAK_CUT'))
costhetak, indices = select_feature(feature = costhetak, limits = (lower_costhetak_cut, upper_costhetak_cut))
q2 = q2[indices]
costhetal = costhetal[indices]
phi = phi[indices]
angular_data = pd.DataFrame({'ctl': costhetal, 'ctk': costhetak, 'phi': phi})
angular_data.to_csv(f"angular_fit_generator_bin_{Q2BIN}.csv", index = False)
x = B2Kstll('ctl','ctk','phi')
obs, pdf, _ = x.get_pdf('PWave')
datazfit = zfit.Data.from_pandas(df = angular_data, obs = obs)
nll = zfit.loss.UnbinnedNLL(model = pdf, data = datazfit)
minimizer = zfit.minimize.Minuit()
result = minimizer.minimize(nll)
print(result)
print(result.params, pdf)
param_errors, _ = result.errors()
print(param_errors)
info_table = PrettyTable(["Variable", "Value", "Lower Error", "Upper Error"])
fit_labels = ["AFB", "FL", "S3", "S4", "S5", "S7", "S8", "S9"]
for name in fit_labels:
value = result.params[name]["value"]
lower = result.params[name]["minuit_minos"]["lower"]
upper = result.params[name]["minuit_minos"]["upper"]
info_table.add_row([name, value, lower, upper])
print(info_table)
plot_distributions(result, suffix = f"accPwavePDF_generator_level_bin_{Q2BIN}")
if __name__ == "__main__":
main()

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,73 @@
import os
import dotenv
import sys
import argparse
import mplhep
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
dotenv.load_dotenv('../properties.env')
sys.path.insert(0, os.getenv('SYS_PATH'))
from analysis.efficiency import get_efficiency_model_class
from hep_analytics.processing.extract import FileManager
from hep_analytics.processing.transform import select_feature
FILE_MC_PHSP = os.getenv('MC_PHSP_FILE')
def main():
parser = argparse.ArgumentParser()
parser.add_argument('--q2bin', dest ='q2bin', default = 0)
args = parser.parse_args()
Q2BIN = int(args.q2bin)
mplhep.style.use("LHCb2")
bin_ranges = [(0.25, 4.00), (4.00, 8.00), (11.00, 12.50), (15.00, 18.00), (1.10, 6.00), (1.1, 2.5), (2.5, 4.0), (4.0, 6.0), (6.0, 8.0)]
print(f"Selected Q2 Bin Range is {bin_ranges[Q2BIN]}")
filemanager = FileManager(file = FILE_MC_PHSP, tree = "Events", branches = ["q2", "costhetak", "costhetal", "phi"])
mc_phsp_data = filemanager.extract_data()
q2_mc_phsp, theta_k_mc_phsp, theta_l_mc_phsp, phi_mc_phsp = mc_phsp_data[0], mc_phsp_data[1], mc_phsp_data[2], mc_phsp_data[3]
q2_mc_phsp, indices = select_feature(feature = q2_mc_phsp, limits = bin_ranges[Q2BIN])
phi_mc_phsp = phi_mc_phsp[indices]
theta_l_mc_phsp = theta_l_mc_phsp[indices]
theta_k_mc_phsp = theta_k_mc_phsp[indices]
lower_costhetak_cut = float(os.getenv('LOWER_COSTHETAK_CUT'))
upper_costhetak_cut = float(os.getenv('UPPER_COSTHETAK_CUT'))
theta_k_mc_phsp, indices = select_feature(feature = theta_k_mc_phsp, limits = (lower_costhetak_cut, upper_costhetak_cut))
q2_mc_phsp = q2_mc_phsp[indices]
phi_mc_phsp = phi_mc_phsp[indices]
theta_l_mc_phsp = theta_l_mc_phsp[indices]
data_mc_phsp = [phi_mc_phsp, theta_l_mc_phsp, theta_k_mc_phsp, q2_mc_phsp]
df = pd.DataFrame({'ctl': data_mc_phsp[1], 'ctk': data_mc_phsp[2], 'phi': data_mc_phsp[0], 'q2': q2_mc_phsp})
orders = {"ctl": 4, "ctk": 6, "phi": 2}
ranges = {"ctl": [-1.0, 1.0], "ctk": [lower_costhetak_cut, upper_costhetak_cut], "phi": [-np.pi, np.pi]}
EffClass = get_efficiency_model_class('legendre')
eff = EffClass.fit(df, ['ctl', 'ctk', 'phi'], ranges = ranges,
legendre_orders = orders, calculate_cov = False, chunk_size = 2000)
out_file = eff.write_to_disk(f'acc_3d_JpsiKstMC_{Q2BIN}_bin.yaml')
print(out_file)
labels = {'ctl': r'$\cos \theta_L$', 'ctk': r'$\cos \theta_K$', 'phi': '$\phi$', 'q2': '$q^2$ [GeV$^2$]'}
for v in ['ctl', 'ctk', 'phi']:
plt.subplots(figsize = (15, 10))
plt.xlim(*ranges[v])
x, y = eff.project_efficiency(v, n_points = 1000)
plt.plot(x, y, 'b-')
plt.hist(df[v], density = True, bins = 50, color = 'grey', alpha = 0.5)
plt.ylabel("a.u.", horizontalalignment = 'right', y = 1.0)
plt.xlabel(labels[v], horizontalalignment = 'right', x = 1.0)
plt.savefig(f'acc_3d_JpsiKstMC_phsp_mc_{v}_{Q2BIN}_bin.pdf')
plt.close()
if __name__ == "__main__":
main()

View File

@ -0,0 +1,77 @@
import os
import dotenv
import sys
import argparse
import pandas as pd
import mplhep
import zfit
from prettytable import PrettyTable
dotenv.load_dotenv('../properties.env')
sys.path.insert(0, os.getenv('SYS_PATH'))
from b2kstll.models.angular import B2Kstll
from b2kstll.plot import plot_distributions
from hep_analytics.processing.extract import FileManager
from hep_analytics.processing.transform import select_feature
FILE_MC = os.getenv('MC_FILE')
def main():
parser = argparse.ArgumentParser()
parser.add_argument('--q2bin', dest ='q2bin', default = 0)
args = parser.parse_args()
Q2BIN = int(args.q2bin)
mplhep.style.use("LHCb2")
bin_ranges = [(0.25, 4.00), (4.00, 8.00), (11.00, 12.50), (15.00, 18.00), (1.10, 6.00), (1.1, 2.5), (2.5, 4.0), (4.0, 6.0), (6.0, 8.0)]
print(f"Selected Q2 Bin Range is {bin_ranges[Q2BIN]}")
filemanager = FileManager(file = FILE_MC, tree = "Events", branches = ["q2", "costhetak", "costhetal", "phi"])
mc_data = filemanager.extract_data()
q2_mc, theta_k_mc, theta_l_mc, phi_mc = mc_data[0], mc_data[1], mc_data[2], mc_data[3]
q2_mc, indices = select_feature(feature = q2_mc, limits = bin_ranges[Q2BIN])
phi_mc = phi_mc[indices]
theta_l_mc = theta_l_mc[indices]
theta_k_mc = theta_k_mc[indices]
lower_costhetak_cut = float(os.getenv('LOWER_COSTHETAK_CUT'))
upper_costhetak_cut = float(os.getenv('UPPER_COSTHETAK_CUT'))
theta_k_mc, indices = select_feature(feature = theta_k_mc, limits = (lower_costhetak_cut, upper_costhetak_cut))
q2_mc = q2_mc[indices]
phi_mc = phi_mc[indices]
theta_l_mc = theta_l_mc[indices]
angular_data = pd.DataFrame({'ctl': theta_l_mc, 'ctk': theta_k_mc, 'phi': phi_mc})
angular_data.to_csv(f"ang_fit_mc_{Q2BIN}_bin.csv", index = False)
x = B2Kstll('ctl','ctk','phi')
x.set_acc(f"./acc_3d_JpsiKstMC_{Q2BIN}_bin.yaml")
obs, pdf, params = x.get_pdf('PWave')
datazfit = zfit.Data.from_pandas(df = angular_data, obs = obs)
nll = zfit.loss.UnbinnedNLL(model = pdf, data = datazfit)
minimizer = zfit.minimize.Minuit()
result = minimizer.minimize(nll)
param_errors, _ = result.errors()
print(param_errors)
info_table = PrettyTable(["Variable", "Value", "Lower Error", "Upper Error"])
fit_labels = ["AFB", "FL", "S3", "S4", "S5", "S7", "S8", "S9"]
for name in fit_labels:
value = result.params[name]["value"]
lower = result.params[name]["minuit_minos"]["lower"]
upper = result.params[name]["minuit_minos"]["upper"]
info_table.add_row([name, value, lower, upper])
print(info_table)
plot_distributions(result, suffix = f"accPwavePDF_mc_ang_fit_{Q2BIN}_bin")
if __name__ == "__main__":
main()

View File

@ -0,0 +1,83 @@
import re
import numpy as np
from prettytable import PrettyTable
def main():
bin_labels = r"$1.10 < q^2 < 6.00$"
sample_1 = """+----------+------------------------+------------------------+-----------------------+\n
| Variable | Value | Lower Error | Upper Error |\n
+----------+------------------------+------------------------+-----------------------+\n
| AFB | -0.030693939006905927 | -0.0027448398251912667 | 0.0026690127045430573 |\n
| FL | 0.7641860897390124 | -0.0031519177028962353 | 0.00318403722094425 |\n
| S3 | -0.009793642767841643 | -0.0039500538800350165 | 0.003962284039325228 |\n
| S4 | -0.11948833798312776 | -0.005018301784506872 | 0.005112053156795224 |\n
| S5 | -0.14672854148123884 | -0.005077800231656763 | 0.0051383664423323315 |\n
| S7 | -0.0009713979621990259 | -0.005287904531482017 | 0.005275166744474821 |\n
| S8 | 0.003178551357900045 | -0.005257961171467131 | 0.005258879438751017 |\n
| S9 | 0.005757808359963191 | -0.004016272402049568 | 0.004019873761938199 |\n
+----------+------------------------+------------------------+-----------------------+""" # GENERATOR LEVEL
sample_2 = """+---------------------------------------------------------------------------------+\n
| q2_split_$1.10 < q^2 < 6.00$_average |\n
+----------+-----------------------+-----------------------+----------------------+\n
| Variable | Value | Lower Error | Upper Error |\n
+----------+-----------------------+-----------------------+----------------------+\n
| AFB | -0.03611162259371692 | -0.008731742137981694 | 0.008731742137981694 |\n
| FL | 0.7816031469957773 | -0.010041813091402643 | 0.010041813091402643 |\n
| S3 | -0.004913319742985065 | -0.010997797016445892 | 0.010997797016445892 |\n
| S4 | -0.11446696701061239 | -0.0177373887414066 | 0.0177373887414066 |\n
| S5 | -0.1162615465324004 | -0.016777187198089353 | 0.016777187198089353 |\n
| S7 | 0.013450446832457594 | -0.017351060768029697 | 0.017351060768029697 |\n
| S8 | 0.012943806403734165 | -0.018166933762978635 | 0.018166933762978635 |\n
| S9 | -0.015991133298640343 | -0.011753160273233804 | 0.011753160273233804 |\n
+----------+-----------------------+-----------------------+----------------------+""" # MC LEVEL
variables = ["AFB", "FL", "S3", "S4", "S5", "S7", "S8", "S9"]
select_regex = [".*AFB.*\d+", ".*FL.*\d+", ".*S3.*\d+", ".*S4.*\d+", ".*S5.*\d+", ".*S7.*\d+", ".*S8.*\d+", ".*S9.*\d+"]
value_regex = "[- ]\d+.\d+"
sample_table_1 = {'1': {'AFB': None, 'FL': None, 'S3': None, 'S4': None, 'S5': None, 'S7': None, 'S8': None, 'S9': None}}
sample_table_2 = {'1': {'AFB': None, 'FL': None, 'S3': None, 'S4': None, 'S5': None, 'S7': None, 'S8': None, 'S9': None}}
samples = [sample_1, sample_2]
for variable, regex in zip(variables, select_regex):
sample_1_splitted = samples[0].splitlines()
for line in sample_1_splitted:
subresult = re.match(regex, line)
if subresult != None:
substring = subresult.group(0)
result = re.findall(value_regex, substring)
value = np.float64(result[0])
lower_err = np.float64(result[1])
upper_err = np.float64(result[2])
sample_table_1['1'][variable] = (value, lower_err, upper_err)
sample_2_splitted = samples[1].splitlines()
for line in sample_2_splitted:
subresult = re.match(regex, line)
if subresult != None:
substring = subresult.group(0)
result = re.findall(value_regex, substring)
value = np.float64(result[0])
lower_err = np.float64(result[1])
upper_err = np.float64(result[2])
sample_table_2['1'][variable] = (value, lower_err, upper_err)
info_table = PrettyTable(["Variable", r"|MC Average - Gen|", "Lower Error", "Upper Error", "Stat. Significance"])
for name in variables:
index = '1'
delta_value = np.abs(sample_table_1[index][name][0] - sample_table_2[index][name][0])
quadratic_lower = -np.sqrt(sample_table_1[index][name][1]**2 + sample_table_2[index][name][1]**2)
quadratic_upper = np.sqrt(sample_table_1[index][name][2]**2 + sample_table_2[index][name][2]**2)
stat_significance = np.abs(delta_value / quadratic_lower)
info_table.add_row([name, delta_value, quadratic_lower, quadratic_upper, stat_significance])
info_table.title = f"{bin_labels} Average over 3 Bins in MC"
print(info_table)
if __name__ == "__main__":
main()

View File

@ -0,0 +1,81 @@
import re
import numpy as np
from prettytable import PrettyTable
def main():
bin_labels = r"$1.10 < q^2 < 6.00$"
sample_1 = """+----------+------------------------+------------------------+-----------------------+\n
| Variable | Value | Lower Error | Upper Error |\n
+----------+------------------------+------------------------+-----------------------+\n
| AFB | -0.030693939006905927 | -0.0027448398251912667 | 0.0026690127045430573 |\n
| FL | 0.7641860897390124 | -0.0031519177028962353 | 0.00318403722094425 |\n
| S3 | -0.009793642767841643 | -0.0039500538800350165 | 0.003962284039325228 |\n
| S4 | -0.11948833798312776 | -0.005018301784506872 | 0.005112053156795224 |\n
| S5 | -0.14672854148123884 | -0.005077800231656763 | 0.0051383664423323315 |\n
| S7 | -0.0009713979621990259 | -0.005287904531482017 | 0.005275166744474821 |\n
| S8 | 0.003178551357900045 | -0.005257961171467131 | 0.005258879438751017 |\n
| S9 | 0.005757808359963191 | -0.004016272402049568 | 0.004019873761938199 |\n
+----------+------------------------+------------------------+-----------------------+""" # GENERATOR LEVEL
sample_2 = """+----------+-----------------------+------------------------+-----------------------+\n
| Variable | Value | Lower Error | Upper Error |\n
+----------+-----------------------+------------------------+-----------------------+\n
| AFB | -0.012550006150333942 | -0.0026956457681952418 | 0.0026965400452885245 |\n
| FL | 0.775244817820803 | -0.0030583204477219544 | 0.003052276555621123 |\n
| S3 | -0.004731791649125892 | -0.0035876206689923096 | 0.0035671656402230033 |\n
| S4 | -0.12872329698226329 | -0.0054353577466807685 | 0.005452103886085265 |\n
| S5 | -0.13102847916212362 | -0.005094396678852025 | 0.005083576918431511 |\n
| S7 | 0.014829855067600298 | -0.005251337297972407 | 0.005258433080994564 |\n
| S8 | 0.011919411650166327 | -0.005666331650738382 | 0.005691209183046813 |\n
| S9 | -0.013513044362093558 | -0.0036909184587611017 | 0.0036915155625497006 |\n
+----------+-----------------------+------------------------+-----------------------+""" # MC LEVEL
variables = ["AFB", "FL", "S3", "S4", "S5", "S7", "S8", "S9"]
select_regex = [".*AFB.*\d+", ".*FL.*\d+", ".*S3.*\d+", ".*S4.*\d+", ".*S5.*\d+", ".*S7.*\d+", ".*S8.*\d+", ".*S9.*\d+"]
value_regex = "[- ]\d+.\d+"
sample_table_1 = {'1': {'AFB': None, 'FL': None, 'S3': None, 'S4': None, 'S5': None, 'S7': None, 'S8': None, 'S9': None}}
sample_table_2 = {'1': {'AFB': None, 'FL': None, 'S3': None, 'S4': None, 'S5': None, 'S7': None, 'S8': None, 'S9': None}}
samples = [sample_1, sample_2]
for variable, regex in zip(variables, select_regex):
sample_1_splitted = samples[0].splitlines()
for line in sample_1_splitted:
subresult = re.match(regex, line)
if subresult != None:
substring = subresult.group(0)
result = re.findall(value_regex, substring)
value = np.float64(result[0])
lower_err = np.float64(result[1])
upper_err = np.float64(result[2])
sample_table_1['1'][variable] = (value, lower_err, upper_err)
sample_2_splitted = samples[1].splitlines()
for line in sample_2_splitted:
subresult = re.match(regex, line)
if subresult != None:
substring = subresult.group(0)
result = re.findall(value_regex, substring)
value = np.float64(result[0])
lower_err = np.float64(result[1])
upper_err = np.float64(result[2])
sample_table_2['1'][variable] = (value, lower_err, upper_err)
info_table = PrettyTable(["Variable", r"|MC reweighted - Gen|", "Lower Error", "Upper Error", "Stat. Significance"])
for name in variables:
index = '1'
delta_value = np.abs(sample_table_1[index][name][0] - sample_table_2[index][name][0])
quadratic_lower = -np.sqrt(sample_table_1[index][name][1]**2 + sample_table_2[index][name][1]**2)
quadratic_upper = np.sqrt(sample_table_1[index][name][2]**2 + sample_table_2[index][name][2]**2)
stat_significance = np.abs(delta_value / quadratic_lower)
info_table.add_row([name, delta_value, quadratic_lower, quadratic_upper, stat_significance])
info_table.title = f"{bin_labels}"
print(info_table)
if __name__ == "__main__":
main()

View File

@ -0,0 +1,81 @@
import re
import numpy as np
from prettytable import PrettyTable
def main():
bin_labels = r"$1.10 < q^2 < 6.00$"
sample_1 = """+----------+------------------------+------------------------+-----------------------+\n
| Variable | Value | Lower Error | Upper Error |\n
+----------+------------------------+------------------------+-----------------------+\n
| AFB | -0.030693939006905927 | -0.0027448398251912667 | 0.0026690127045430573 |\n
| FL | 0.7641860897390124 | -0.0031519177028962353 | 0.00318403722094425 |\n
| S3 | -0.009793642767841643 | -0.0039500538800350165 | 0.003962284039325228 |\n
| S4 | -0.11948833798312776 | -0.005018301784506872 | 0.005112053156795224 |\n
| S5 | -0.14672854148123884 | -0.005077800231656763 | 0.0051383664423323315 |\n
| S7 | -0.0009713979621990259 | -0.005287904531482017 | 0.005275166744474821 |\n
| S8 | 0.003178551357900045 | -0.005257961171467131 | 0.005258879438751017 |\n
| S9 | 0.005757808359963191 | -0.004016272402049568 | 0.004019873761938199 |\n
+----------+------------------------+------------------------+-----------------------+""" # GENERATOR LEVEL
sample_2 = """+----------+-----------------------+-----------------------+----------------------+\n
| Variable | Value | Lower Error | Upper Error |\n
+----------+-----------------------+-----------------------+----------------------+\n
| AFB | -0.006858788562096526 | -0.006288887141153866 | 0.006202211516385528 |\n
| FL | 0.7741933027966281 | -0.007196527356496095 | 0.006980839734409447 |\n
| S3 | -0.005794055258693013 | -0.008257409210173627 | 0.008381420647898805 |\n
| S4 | -0.13252141252161223 | -0.012754778513953539 | 0.012393845350580537 |\n
| S5 | -0.13982805972042112 | -0.011883305368342971 | 0.011682110709131568 |\n
| S7 | 0.01499315459684105 | -0.012227400419826324 | 0.012191295064893278 |\n
| S8 | 0.010750628153590385 | -0.013096041377945507 | 0.013227158788846905 |\n
| S9 | -0.013084904237743692 | -0.00855475117009418 | 0.008616146883219716 |\n
+----------+-----------------------+-----------------------+----------------------+""" # MC LEVEL
variables = ["AFB", "FL", "S3", "S4", "S5", "S7", "S8", "S9"]
select_regex = [".*AFB.*\d+", ".*FL.*\d+", ".*S3.*\d+", ".*S4.*\d+", ".*S5.*\d+", ".*S7.*\d+", ".*S8.*\d+", ".*S9.*\d+"]
value_regex = "[- ]\d+.\d+"
sample_table_1 = {'1': {'AFB': None, 'FL': None, 'S3': None, 'S4': None, 'S5': None, 'S7': None, 'S8': None, 'S9': None}}
sample_table_2 = {'1': {'AFB': None, 'FL': None, 'S3': None, 'S4': None, 'S5': None, 'S7': None, 'S8': None, 'S9': None}}
samples = [sample_1, sample_2]
for variable, regex in zip(variables, select_regex):
sample_1_splitted = samples[0].splitlines()
for line in sample_1_splitted:
subresult = re.match(regex, line)
if subresult != None:
substring = subresult.group(0)
result = re.findall(value_regex, substring)
value = np.float64(result[0])
lower_err = np.float64(result[1])
upper_err = np.float64(result[2])
sample_table_1['1'][variable] = (value, lower_err, upper_err)
sample_2_splitted = samples[1].splitlines()
for line in sample_2_splitted:
subresult = re.match(regex, line)
if subresult != None:
substring = subresult.group(0)
result = re.findall(value_regex, substring)
value = np.float64(result[0])
lower_err = np.float64(result[1])
upper_err = np.float64(result[2])
sample_table_2['1'][variable] = (value, lower_err, upper_err)
info_table = PrettyTable(["Variable", r"|MC - Gen|", "Lower Error", "Upper Error", "Stat. Significance"])
for name in variables:
index = '1'
delta_value = np.abs(sample_table_1[index][name][0] - sample_table_2[index][name][0])
quadratic_lower = -np.sqrt(sample_table_1[index][name][1]**2 + sample_table_2[index][name][1]**2)
quadratic_upper = np.sqrt(sample_table_1[index][name][2]**2 + sample_table_2[index][name][2]**2)
stat_significance = np.abs(delta_value / quadratic_lower)
info_table.add_row([name, delta_value, quadratic_lower, quadratic_upper, stat_significance])
info_table.title = f"{bin_labels}"
print(info_table)
if __name__ == "__main__":
main()

View File

@ -0,0 +1,94 @@
import os
import dotenv
import sys
import argparse
import mplhep
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
dotenv.load_dotenv('../properties.env')
sys.path.insert(0, os.getenv('SYS_PATH'))
from analysis.efficiency import get_efficiency_model_class
from hep_analytics.processing.extract import FileManager
from hep_analytics.processing.transform import select_feature, reweight_feature
from hep_analytics.processing.visualisation import reweight_comparing_plot
FILE_GEN = os.getenv('GEN_FILE')
FILE_MC_PHSP = os.getenv('MC_PHSP_FILE')
def main():
parser = argparse.ArgumentParser()
parser.add_argument('--q2bin', dest ='q2bin', default = 0)
args = parser.parse_args()
Q2BIN = int(args.q2bin)
mplhep.style.use("LHCb2")
bin_ranges = [(0.25, 4.00), (4.00, 8.00), (11.00, 12.50), (15.00, 18.00), (1.10, 6.00), (1.1, 2.5), (2.5, 4.0), (4.0, 6.0), (6.0, 8.0)]
print(f"Selected Q2 Bin Range is {bin_ranges[Q2BIN]}")
bin_labels = [r"$0.25 < q^2 < 4.00$", r"$4.00 < q^2 < 8.00$", r"$11.00 < q^2 < 12.50$",
r"$15.00 < q^2 < 18.00$", r"$1.10 < q^2 < 6.00$",
r"$1.1 < q^2 < 2.5$", r"$2.5 < q^2 < 4.0$", r"$4.0 < q^2 < 6.0$", r"$6.0 < q^2 < 8.0$"]
filemanager = FileManager(file = FILE_MC_PHSP, tree = "Events", branches = ["q2", "costhetak", "costhetal", "phi"])
mc_phsp_data = filemanager.extract_data()
q2_mc_phsp, theta_k_mc_phsp, theta_l_mc_phsp, phi_mc_phsp = mc_phsp_data[0], mc_phsp_data[1], mc_phsp_data[2], mc_phsp_data[3]
q2_mc_phsp, indices = select_feature(feature = q2_mc_phsp, limits = bin_ranges[Q2BIN])
phi_mc_phsp = phi_mc_phsp[indices]
theta_l_mc_phsp = theta_l_mc_phsp[indices]
theta_k_mc_phsp = theta_k_mc_phsp[indices]
lower_costhetak_cut = float(os.getenv('LOWER_COSTHETAK_CUT'))
upper_costhetak_cut = float(os.getenv('UPPER_COSTHETAK_CUT'))
theta_k_mc_phsp, indices = select_feature(feature = theta_k_mc_phsp, limits = (lower_costhetak_cut, upper_costhetak_cut))
q2_mc_phsp = q2_mc_phsp[indices]
phi_mc_phsp = phi_mc_phsp[indices]
theta_l_mc_phsp = theta_l_mc_phsp[indices]
filemanager = FileManager(file = FILE_GEN, tree = "Events", branches = ["q2", "costhetak", "costhetal", "phi"])
gen_data = filemanager.extract_data()
q2_gen, theta_k_gen, theta_l_gen, phi_gen = gen_data[0], gen_data[1], gen_data[2], gen_data[3]
q2_gen, indices = select_feature(feature = q2_gen, limits = bin_ranges[Q2BIN])
phi_gen = phi_gen[indices]
theta_l_gen = theta_l_gen[indices]
theta_k_gen = theta_k_gen[indices]
theta_k_gen, indices = select_feature(feature = theta_k_gen, limits = (lower_costhetak_cut, upper_costhetak_cut))
q2_gen = q2_gen[indices]
phi_gen = phi_gen[indices]
theta_l_gen = theta_l_gen[indices]
q2_mc_phsp_weights = reweight_feature(original_feature = q2_mc_phsp, target_feature = q2_gen, n_bins = 25)
reweight_comparing_plot(original_feature = q2_mc_phsp, target_feature = q2_gen, weights = q2_mc_phsp_weights,
n_bins = 25, suptitle = f"{bin_labels[Q2BIN]}",
titles = ["Q2 MC PHSP", "Q2 MC PHSP (reweighted)", "Q2 Gen"], save = f"reweighted_q2_gen_mc_phsp_{Q2BIN}_bin.png")
df = pd.DataFrame({'ctl': theta_l_mc_phsp, 'ctk': theta_k_mc_phsp, 'phi': phi_mc_phsp, 'q2': q2_mc_phsp, 'weights': q2_mc_phsp_weights})
orders = {"ctl": 4, "ctk": 6, "phi": 2}
ranges = {"ctl": [-1.0, 1.0], "ctk": [lower_costhetak_cut, upper_costhetak_cut], "phi": [-np.pi, np.pi]}
EffClass = get_efficiency_model_class('legendre')
eff = EffClass.fit(df, ['ctl', 'ctk', 'phi'], weight_var = "weights", ranges = ranges,
legendre_orders = orders, calculate_cov = False, chunk_size = 2000)
out_file = eff.write_to_disk(f'acc_3d_JpsiKstMC_reweighted_{Q2BIN}_bin.yaml')
print(out_file)
labels = {'ctl': r'$\cos \theta_L$', 'ctk': r'$\cos \theta_K$', 'phi': '$\phi$', 'q2': '$q^2$ [GeV$^2$]'}
for v in ['ctl', 'ctk', 'phi']:
fig, ax = plt.subplots(figsize = (15, 10))
plt.xlim(*ranges[v])
x, y = eff.project_efficiency(v, n_points = 1000)
plt.plot(x, y, 'b-')
plt.hist(df[v], density = True, bins = 50, color = 'grey', alpha = 0.5)
plt.ylabel("a.u.", horizontalalignment = 'right', y = 1.0)
plt.xlabel(labels[v], horizontalalignment = 'right', x = 1.0)
plt.savefig(f'acc_3d_JpsiKstMC_{v}_{Q2BIN}_bin.pdf')
plt.close()
if __name__ == "__main__":
main()

View File

@ -0,0 +1,97 @@
import os
import dotenv
import sys
import argparse
import pandas as pd
import mplhep
import zfit
from prettytable import PrettyTable
dotenv.load_dotenv('../properties.env')
sys.path.insert(0, os.getenv('SYS_PATH'))
from b2kstll.models.angular import B2Kstll
from b2kstll.plot import plot_distributions
from hep_analytics.processing.extract import FileManager
from hep_analytics.processing.transform import select_feature, reweight_feature
from hep_analytics.processing.visualisation import reweight_comparing_plot
FILE_MC = os.getenv('MC_FILE')
FILE_GEN = os.getenv('GEN_FILE')
def main():
parser = argparse.ArgumentParser()
parser.add_argument('--q2bin', dest ='q2bin', default = 0)
args = parser.parse_args()
Q2BIN = int(args.q2bin)
mplhep.style.use("LHCb2")
bin_ranges = [(0.25, 4.00), (4.00, 8.00), (11.00, 12.50), (15.00, 18.00), (1.10, 6.00), (1.1, 2.5), (2.5, 4.0), (4.0, 6.0), (6.0, 8.0)]
print(f"Selected Q2 Bin Range is {bin_ranges[Q2BIN]}")
bin_labels = [r"$0.25 < q^2 < 4.00$", r"$4.00 < q^2 < 8.00$", r"$11.00 < q^2 < 12.50$",
r"$15.00 < q^2 < 18.00$", r"$1.10 < q^2 < 6.00$",
r"$1.1 < q^2 < 2.5$", r"$2.5 < q^2 < 4.0$", r"$4.0 < q^2 < 6.0$", r"$6.0 < q^2 < 8.0$"]
filemanager = FileManager(file = FILE_MC, tree = "Events", branches = ["q2", "costhetak", "costhetal", "phi"])
mc_data = filemanager.extract_data()
q2_mc, theta_k_mc, theta_l_mc, phi_mc = mc_data[0], mc_data[1], mc_data[2], mc_data[3]
q2_mc, indices = select_feature(feature = q2_mc, limits = bin_ranges[Q2BIN])
phi_mc = phi_mc[indices]
theta_l_mc = theta_l_mc[indices]
theta_k_mc = theta_k_mc[indices]
lower_costhetak_cut = float(os.getenv('LOWER_COSTHETAK_CUT'))
upper_costhetak_cut = float(os.getenv('UPPER_COSTHETAK_CUT'))
theta_k_mc, indices = select_feature(feature = theta_k_mc, limits = (lower_costhetak_cut, upper_costhetak_cut))
q2_mc = q2_mc[indices]
phi_mc = phi_mc[indices]
theta_l_mc = theta_l_mc[indices]
filemanager = FileManager(file = FILE_GEN, tree = "Events", branches = ["q2", "costhetak", "costhetal", "phi"])
gen_data = filemanager.extract_data()
q2_gen, theta_k_gen, theta_l_gen, phi_gen = gen_data[0], gen_data[1], gen_data[2], gen_data[3]
q2_gen, indices = select_feature(feature = q2_gen, limits = bin_ranges[Q2BIN])
phi_gen = phi_gen[indices]
theta_l_gen = theta_l_gen[indices]
theta_k_gen = theta_k_gen[indices]
theta_k_gen, indices = select_feature(feature = theta_k_gen, limits = (lower_costhetak_cut, upper_costhetak_cut))
q2_gen = q2_gen[indices]
phi_gen = phi_gen[indices]
theta_l_gen = theta_l_gen[indices]
angular_data = pd.DataFrame({'ctl': theta_l_mc, 'ctk': theta_k_mc, 'phi': phi_mc})
angular_data.to_csv(f"ang_fit_mc_q2_bins_{Q2BIN}_bin.csv", index = False)
x = B2Kstll('ctl','ctk','phi')
x.set_acc(f"./acc_3d_JpsiKstMC_reweighted_4_bin.yaml")
obs, pdf, _ = x.get_pdf('PWave')
q2_mc_weights = reweight_feature(original_feature = q2_mc, target_feature = q2_gen, n_bins = 25)
reweight_comparing_plot(original_feature = q2_mc, target_feature = q2_gen, weights = q2_mc_weights,
n_bins = 25, suptitle = f"{bin_labels[Q2BIN]}", titles = ["Q2 MC", "Q2 MC (reweighted)", "Q2 Gen"], save = "reweighted_q2_gen_mc.png")
datazfit = zfit.Data.from_pandas(df = angular_data, obs = obs, weights = q2_mc_weights)
nll = zfit.loss.UnbinnedNLL(model = pdf, data = datazfit)
minimizer = zfit.minimize.Minuit()
result = minimizer.minimize(nll)
param_errors, _ = result.errors()
print(param_errors)
info_table = PrettyTable(["Variable", "Value", "Lower Error", "Upper Error"])
fit_labels = ["AFB", "FL", "S3", "S4", "S5", "S7", "S8", "S9"]
for name in fit_labels:
value = result.params[name]["value"]
lower = result.params[name]["minuit_minos"]["lower"]
upper = result.params[name]["minuit_minos"]["upper"]
info_table.add_row([name, value, lower, upper])
print(info_table)
plot_distributions(result, suffix = f"accPwavePDF_{Q2BIN}_bin_reweighted_mc")
if __name__ == "__main__":
main()

View File

@ -0,0 +1,97 @@
import re
import numpy as np
from prettytable import PrettyTable
def main():
table_1 = """+----------+-----------------------+-----------------------+----------------------+\n
| Variable | Value | Lower Error | Upper Error |\n
+----------+-----------------------+-----------------------+----------------------+\n
| AFB | -0.1327941714665493 | -0.011751983569979923 | 0.011695518975582573 |\n
| FL | 0.778784190310989 | -0.013786738313616998 | 0.01334631516152801 |\n
| S3 | 0.012016752919524067 | -0.014186281678810328 | 0.01390470699489175 |\n
| S4 | 0.0030912112690471196 | -0.02494551503535478 | 0.02500682819273369 |\n
| S5 | 0.06257482631549156 | -0.022576669869741432 | 0.02267902660423071 |\n
| S7 | 0.026461647341068015 | -0.022870493984060422 | 0.022709046371542273 |\n
| S8 | 0.022528200778115424 | -0.024500119399894143 | 0.02444883875849664 |\n
| S9 | -0.010047596608480971 | -0.015100958783891871 | 0.01497061747614531 |\n
+----------+-----------------------+-----------------------+----------------------+"""
table_2 = """+----------+-----------------------+-----------------------+----------------------+\n
| Variable | Value | Lower Error | Upper Error |\n
+----------+-----------------------+-----------------------+----------------------+\n
| AFB | -0.044313915998597596 | -0.011266005942710903 | 0.011376116194169396 |\n
| FL | 0.8080898316779715 | -0.012903371237386076 | 0.012627755576193413 |\n
| S3 | -0.001350693384098943 | -0.01425102196417036 | 0.014014236056631763 |\n
| S4 | -0.16348388538343442 | -0.022982703026786865 | 0.02353050491741336 |\n
| S5 | -0.12998638915138938 | -0.02207623402690808 | 0.02230804022923196 |\n
| S7 | -0.019175072605277906 | -0.022219297227352726 | 0.02233298418372788 |\n
| S8 | 0.03896504393685167 | -0.02306667997644898 | 0.023056355590581525 |\n
| S9 | -0.026410777123662296 | -0.01497373735832672 | 0.015237714795859537 |\n
+----------+-----------------------+-----------------------+----------------------+"""
table_3 = """+----------+-----------------------+-----------------------+----------------------+\n
| Variable | Value | Lower Error | Upper Error |\n
+----------+-----------------------+-----------------------+----------------------+\n
| AFB | 0.06877321968399613 | -0.00873785505692959 | 0.008866035437567673 |\n
| FL | 0.7579354189983714 | -0.01048129883366948 | 0.01017491937670441 |\n
| S3 | -0.02540601876438032 | -0.012075664333196083 | 0.012192632205060838 |\n
| S4 | -0.18300822691744986 | -0.015545142299112552 | 0.016136099542591972 |\n
| S5 | -0.28137307676130335 | -0.0160784362375397 | 0.01630021297009425 |\n
| S7 | 0.033064765761582675 | -0.01837491321971226 | 0.018457429889962827 |\n
| S8 | -0.022661825503764606 | -0.018831356033962507 | 0.018816177761568176 |\n
| S9 | -0.01151502616377776 | -0.012778180225448249 | 0.013091600028830737 |\n
+----------+-----------------------+-----------------------+----------------------+"""
bin_labels = [r"$0.25 < q^2 < 4.00$", r"$4.00 < q^2 < 8.00$", r"$11.00 < q^2 < 12.50$",
r"$15.00 < q^2 < 18.00$", r"$1.10 < q^2 < 6.00$",
r"$1.1 < q^2 < 2.5$", r"$2.5 < q^2 < 4.0$", r"$4.0 < q^2 < 6.0$", r"$6.0 < q^2 < 8.0$"]
tables = [table_1, table_2, table_3]
variables = ["AFB", "FL", "S3", "S4", "S5", "S7", "S8", "S9"]
select_regex = [".*AFB.*\d+", ".*FL.*\d+", ".*S3.*\d+", ".*S4.*\d+", ".*S5.*\d+", ".*S7.*\d+", ".*S8.*\d+", ".*S9.*\d+"]
value_regex = "[- ]\d+.\d+"
samples = {'1': {'AFB': None, 'FL': None, 'S3': None, 'S4': None, 'S5': None, 'S7': None, 'S8': None, 'S9': None},
'2': {'AFB': None, 'FL': None, 'S3': None, 'S4': None, 'S5': None, 'S7': None, 'S8': None, 'S9': None},
'3': {'AFB': None, 'FL': None, 'S3': None, 'S4': None, 'S5': None, 'S7': None, 'S8': None, 'S9': None},
}
for index in range(len(tables)):
for variable, regex in zip(variables, select_regex):
splitted = tables[index].splitlines()
for line in splitted:
subresult = re.match(regex, line)
if subresult != None:
substring = subresult.group(0)
result = re.findall(value_regex, substring)
value = np.float64(result[0])
lower_err = np.float64(result[1])
upper_err = np.float64(result[2])
samples[str(index + 1)][variable] = (value, lower_err, upper_err)
fit_variables_split = [[] for _ in range(8)]
fit_errors_split = [[] for _ in range(8)]
for value in samples.values():
count = 0
for fitvalue in value.values():
fit_variables_split[count].append(fitvalue[0])
fit_errors_split[count].append(np.sqrt(fitvalue[1]**2 + fitvalue[2]**2))
count += 1
info_table = PrettyTable(["Variable", "Value", "Lower Error", "Upper Error"])
info_table.title = f"q2_split_{bin_labels[4]}_average"
fit_labels = ["AFB", "FL", "S3", "S4", "S5", "S7", "S8", "S9"]
count = 0
for name, fit_split_var, fit_err_split in zip(fit_labels, fit_variables_split, fit_errors_split):
fit_avg = np.mean(fit_split_var)
error_split = 1/3 * np.sqrt(fit_err_split[0]**2 + fit_err_split[1]**2 + fit_err_split[2]**2)
info_table.add_row([name, fit_avg, -error_split, error_split])
count += 1
print(info_table)
if __name__ == "__main__":
main()

View File

@ -0,0 +1,43 @@
# Angular Analysis of B+toK*+Pi0Mu-Mu+
## Setup
For this setup you will need to install miniconda on the machine you are working and do the following procedure:
1. `conda env create -f requirements.yaml`
2. Check with `conda activate angularanalysis` if the environment is working
3. Check out the following page in order to install the analysis-tool package: `https://github.com/jonas-eschle/analysis-tools`
4. Check out the following page in order to install the angular-analysis B->K*ll package: `https://gitlab.cern.ch/LHCb-RD/ewp-bd2ksteeangular-legacy/-/tree/master/b2kstll`
Afterwards you can use your conda environment in order to run the code. But before you do, you need to setup the properties.env. Inside of properties.env you will find different configuration variables which are used throughout the code in order to control the behavior of the code. Below you can find a table which summarizes what these environment variables do.
|Environment Variable|Use-Case|
| -------------------- | -------- |
|LOWER_COSTHETAK_CUT |Controls the lower boundary of cos(theta_K)|
|UPPER_COSTHETAK_CUT |Controls the upper boundary of cos(theta_K)|
|SYS_PATH|We need one local package in order to make the code work, namely hep_analytics which you can also find in this repo, the interpreter needs to find that package, so set SYS_PATH to the relative path to hep_analytics|
|GEN_FILE|Path to the Generator File|
|MC_FILE|Path to the MC File|
|MC_PHSP_FILE|Path to the PHSP MC File|
If you want to see how to set the environment variables, have a look at `properties.env`. Also, I have run the code on the LHCbA1 machine but it should make no difference where you run the code.
You are done (apply also the custom fixes described down below)!
## Custom Fix
The `b2kstll` package has a few caveats when you want to apply a cos(theta_K) cut since the normalisation will be wrong in this case for the pdfs. So in order to compensate for that, I changed a few lines in the b2kstll package code, have a look at the `b2kstll/models` folder, there you will find all the files which I modified. But bear in mind that I only modified the PWave pdf, for other pdfs the issue is still persistent.
## Useful Scripts
### root_file_inspect.py
This script can be used to look inside a root file and see which branches and features it includes. To kick it off, give it a try with:
1. `python root_file_inspect.py --file=ROOT_FILE_PATH --regex=.*`
You do not need to set the regex flag but it can be useful if you search for some particular feature. This script depends on the file `rootio.py`.
### diff_pair_tables_*.py
There are 3 scripts which are prefixed with `diff_pair_tables` and which are very similar but have different use-cases. You can use them as templates whenever you have generated tables by `prettytable` or another library/tool and use them in order to automatically extract the values out of these tables. Then you do not have to type them manually into your code.
## hep_analytics
### package: pathing
The module `aggregate.py` exposes the `PathAggregator` which is very simple and simply builds a path string. For only this reason the class is not very useful but when your directory path setup is more complex and you do not want to manually type out all relevant paths, this class can be easily extended and adapted.
### package: processing
Exposes three modules: `extract`, `transform`, `visualisation`. `extract` exposes the `FileManager` which can be used to extract the relevant features out of the branches of a `root` file. `transform` exposes useful functions like `select_feature` in order to make cuts and `reweight_feature`. Take note that `select_feature` is not the most efficient way of doing cuts, for more efficient usage use the functionality of `uproot3` directly. `visualisation` exposes `reweight_comparing_plot` in order to plot 3 diagrams, two original features and the reweighted feature.
### Here you can find my root files
Under `/work/skuza/thesis/Tuples`

View File

@ -0,0 +1,382 @@
import warnings
from math import pi
import numpy as np
import tensorflow as tf
import zfit
from zfit import z
import yaml
import sympy
from b2kstll.models.Kpi import KpiPWavePDF, KpiSWavePDF, ReKpiPandSWavePDF, ImKpiPandSWavePDF
from b2kstll.models.acceptance import AccPDF
from b2kstll.utils import get_coeffs
import analysis.efficiency
from analysis.efficiency import load_efficiency_model
from particle import Particle
from b2kstll.models.angular_sympy import angular_func_sympy
class AngularPDF(zfit.pdf.BasePDF):
""" Generic angular PDF built from input function and corresponding parameters
Args:
ArbitraryFNC (function: Any arbitrary angular function
params (params): parameters of interest in the given function
obs (`zfit.Space`):
name (str):
dtype (tf.DType):
"""
def __init__(self, obs, params, func, name):
super().__init__(obs=obs, params=params, name=name+"PDF")
self._user_func = func
_N_OBS = 3
VALID_LIMITS = {'costheta_l': (-1, 1),
'costheta_k': (-1, 1),
'phi': (-pi, pi)}
def _pdf(self, x, norm_range):
costheta_l, costheta_k, phi = z.unstack_x(x)
pdf = self._user_func(**self.params, costheta_l=costheta_l, costheta_k=costheta_k, phi=phi)
return pdf
# Calculate Normalisation
def get_normalisation(FL, upper_ctk):
kwargs = dict(
FL = sympy.Symbol('FL'),
AFB = sympy.Symbol('AFB'),
S3 = sympy.Symbol('S3'),
S4 = sympy.Symbol('S4'),
S5 = sympy.Symbol('S5'),
S7 = sympy.Symbol('S7'),
S8 = sympy.Symbol('S8'),
S9 = sympy.Symbol('S9'),
costheta_l = sympy.Symbol('costheta_l'),
costheta_k = sympy.Symbol('costheta_k'),
phi = sympy.Symbol('phi')
)
sympy_val = angular_func_sympy(**kwargs)
integral = sympy.integrate(sympy_val,
(kwargs['costheta_l'], -1, 1),
(kwargs['costheta_k'], -1, upper_ctk),
(kwargs['phi'], -sympy.pi, sympy.pi),)
fun = sympy.lambdify((kwargs["FL"]),integral, 'tensorflow')
return fun(FL)
UPPER_CTK = 0.85
def PWave(FL, S3, S4, S5, AFB, S7, S8, S9, costheta_l, costheta_k, phi, backend=tf):
"""Full d4Gamma/dq2dOmega for Bd -> Kst ll (l=e,mu)
Angular distribution obtained in the total PDF (using LHCb convention JHEP 02 (2016) 104)
i.e. the valid of the angles is given for
- phi: [-pi, pi]
- theta_K: [-pi, pi]
- theta_l: [-pi, pi]
The function is normalized over a finite range and therefore a PDF.
Args:
FL (`zfit.Parameter`): Fraction of longitudinal polarisation of the Kst
S3 (`zfit.Parameter`): A_perp^2 - A_para^2 / A_zero^2 + A_para^2 + A_perp^2 (L, R)
S4 (`zfit.Parameter`): RE(A_zero*^2 * A_para^2) / A_zero^2 + A_para^2 + A_perp^2 (L, R)
S5 (`zfit.Parameter`): RE(A_zero*^2 * A_perp^2) / A_zero^2 + A_para^2 + A_perp^2 (L, R)
AFB (`zfit.Parameter`): Forward-backward asymmetry of the di-lepton system (also i.e. 3/4 * S6s)
S7 (`zfit.Parameter`): IM(A_zero*^2 * A_para^2) / A_zero^2 + A_para^2 + A_perp^2 (L, R)
S8 (`zfit.Parameter`): IM(A_zero*^2 * A_perp^2) / A_zero^2 + A_para^2 + A_perp^2 (L, R)
S9 (`zfit.Parameter`): IM(A_perp*^2 * A_para^2) / A_zero^2 + A_para^2 + A_perp^2 (L, R)
obs (`zfit.Space`):
name (str):
dtype (tf.DType):
"""
sintheta_k = backend.sqrt(1.0 - costheta_k * costheta_k)
sintheta_l = backend.sqrt(1.0 - costheta_l * costheta_l)
sintheta_2k = (1.0 - costheta_k * costheta_k)
sintheta_2l = (1.0 - costheta_l * costheta_l)
sin2theta_k = (2.0 * sintheta_k * costheta_k)
cos2theta_l = (2.0 * costheta_l * costheta_l - 1.0)
sin2theta_l = (2.0 * sintheta_l * costheta_l)
pdf = ((3.0 / 4.0) * (1.0 - FL) * sintheta_2k +
FL * costheta_k * costheta_k +
(1.0 / 4.0) * (1.0 - FL) * sintheta_2k * cos2theta_l +
-1.0 * FL * costheta_k * costheta_k * cos2theta_l +
S3 * sintheta_2k * sintheta_2l * backend.cos(2.0 * phi) +
S4 * sin2theta_k * sin2theta_l * backend.cos(phi) +
S5 * sin2theta_k * sintheta_l * backend.cos(phi) +
(4.0 / 3.0) * AFB * sintheta_2k * costheta_l +
S7 * sin2theta_k * sintheta_l * backend.sin(phi) +
S8 * sin2theta_k * sin2theta_l * backend.sin(phi) +
S9 * sintheta_2k * sintheta_2l * backend.sin(2.0 * phi))
#return (9.0 / (32.0 * pi)) * pdf
return pdf / get_normalisation(FL = FL, upper_ctk = UPPER_CTK)
def SWave(costheta_l, costheta_k, phi, backend=tf):
"""Full d4Gamma/dq2dOmega for Swave part only
Angular distribution obtained in the total PDF (using LHCb convention JHEP 02 (2016) 104)
i.e. the valid of the angles is given for
- phi: [-pi, pi]
- theta_K: [-pi, pi]
- theta_l: [-pi, pi]
The function is normalized over a finite range and therefore a PDF.
Args:
obs (`zfit.Space`):
name (str):
dtype (tf.DType):
"""
sintheta_2l = (1.0 - costheta_l * costheta_l)
pdf = (3.0 / (16.0 * pi)) * sintheta_2l
return pdf
def PandSWaveInterference(Ss1, Ss2, Ss3, Ss4, Ss5, costheta_l, costheta_k, phi, backend=tf):
"""Full d4Gamma/dq2dOmega for the Real interference term between P and S waves
Angular distribution obtained in the total PDF (using LHCb convention JHEP 02 (2016) 104)
i.e. the valid of the angles is given for
- phi: [-pi, pi]
- theta_K: [-pi, pi]
- theta_l: [-pi, pi]
The function is normalized over a finite range and therefore a PDF.
Args:
Ss1, Ss2, Ss3, Ss4, Ss5 (`zfit.Parameter`)
obs (`zfit.Space`):
name (str):
dtype (tf.DType):
"""
sintheta_l = backend.sqrt(1.0 - costheta_l * costheta_l)
sintheta_k = backend.sqrt(1.0 - costheta_k * costheta_k)
sintheta_2l = (1.0 - costheta_l * costheta_l)
sin2theta_l = (2.0 * sintheta_l * costheta_l)
pdf = (Ss1 * sintheta_2l * costheta_k +
Ss2 * sin2theta_l * sintheta_k * backend.cos(phi) +
Ss3 * sintheta_l * sintheta_k * backend.cos(phi) +
Ss4 * sintheta_l * sintheta_k * backend.sin(phi) +
Ss5 * sin2theta_l * sintheta_k * backend.sin(phi))
return (3.0 / (16.0 * pi)) * pdf
class AngularPandSWavePDF(zfit.pdf.BaseFunctor):
"""Full d4Gamma/dq2dOmega for both spin 0 and 1 for Bd -> Kst ll (l=e,mu) decays
Angular distribution obtained in the total PDF (using LHCb convention JHEP 02 (2016) 104)
i.e. the valid of the angles is given for
- phi: [-pi, pi]
- theta_K: [0, pi]
- theta_l: [0, pi]
The function is normalized over a finite range and therefore a PDF.
Args:
AngularPWavePDF (`zfit.pdf.ZPDF`: P-wave normalised PDF
AngularSWavePDF (`zfit.pdf.ZPDF`: S-wave interference part normalised PDF
FS (`zfit.Parameter`): S-wave fraction, i.e. |A^L_S|^2 + |A^R_S|^2/ Sum(|A^{S,R}_{S,zero,para,perp}|^2)
obs (`zfit.Space`):
name (str):
dtype (tf.DType):
"""
def __init__(self, PWave, SWave, PandSWaveInterference, FS, obs):
super().__init__(pdfs=[PWave, SWave, PandSWaveInterference], name="AngularPandSWavePDF", params={'FS': FS}, obs=obs)
_N_OBS = 3
VALID_LIMITS = {'costheta_l': (-1, 1),
'costheta_k': (-1, 1),
'phi': (-pi, pi)}
def _pdf(self, x, norm_range):
FS = self.params['FS']
PWave, SWave, PandSWaveInterference = self.pdfs
pdf = (1 - FS) * PWave.pdf(x) + FS * SWave.pdf(x) + PandSWaveInterference.pdf(x)
return pdf
# A bit of handling
class B2Kstll:
"""Factory class for B->K*ll angular fit"""
PDFS = {'PWave': (PWave, ['FL', 'S3', 'S4', 'S5', 'AFB', 'S7', 'S8', 'S9']),
'SWave': (SWave, []),
'PandSWaveInterference' : (PandSWaveInterference, ['Ss1', 'Ss2', 'Ss3', 'Ss4', 'Ss5']),
#'PandSWaveInterference5D': (PandSWaveInterference5D, ['Ss1re', 'Ss2re', 'Ss3re', 'Ss4re', 'Ss5re',
# 'Ss1im', 'Ss2im', 'Ss3im', 'Ss4im', 'Ss5im']),
'PandSWave' : (AngularPandSWavePDF, ['FS'])}
#'PandSWave5D': (AngularPandSWavePDF5D, ['FS'])}
KPI = {'PWave': (KpiPWavePDF, ['Kst_892'], [313]),
'SWave': (KpiSWavePDF, ['Kst_1430'], [10311]),
'RePandSWave': (ReKpiPandSWavePDF, ['Kst_892', 'Kst_1430'], [313, 10311]),
'ImPandSWave': (ImKpiPandSWavePDF, ['Kst_892', 'Kst_1430'], [313, 10311])}
def __init__(self, cosThetaL, cosThetaK, Phi, q2_min=None, q2_max=None):
"""Initialize observable names."""
self._obs_names = {'costheta_l': cosThetaL,
'costheta_k': cosThetaK,
'phi': Phi}
costheta_l = zfit.Space(cosThetaL, limits=(-1.0, 1.0))
costheta_k = zfit.Space(cosThetaK, limits=(-1.0, UPPER_CTK))
phi = zfit.Space(Phi, limits=(-pi, pi))
self._spaces = costheta_l * costheta_k * phi
self.params = {}
self.acc = None
self.acc_integral = None
self.Kpi = None
self.q2_bin_acc = None
self.q2_min = q2_min
self.q2_max = q2_max
def set_q2_bin_acceptance(self, q2_bin_acc):
"""Set the q2 value used to slice the acceptance function for parametrization in 4D"""
self.q2_bin_acc = q2_bin_acc
def set_acc(self, file_name, q2_bin_acc=None):
"""Set an acceptance function to the signal PDF."""
# Load coefficients from MoM
#coeffs_yaml = load_efficiency_model(file_name)
#coeffs = coeffs_yaml.get_coefficients()
self.acc,self.accmodel = get_coeffs(file_name)
self.set_q2_bin_acceptance(q2_bin_acc)
def set_acc_integral(self, file_name, str_order):
"""Set the integral of the PDF x acceptance."""
# Load integral from a yaml file
with open(file_name, 'r') as f:
dict_int = yaml.load(f, Loader=yaml.FullLoader)
integral = dict_int[0][str_order]
self.acc_integral = sympy.sympify(integral)
def set_Kpi(self, name):
""" Set the Kpi invariant mass system."""
#self._has_Kpi = True
self.Kpi = name
def get_pdf(self, name):
"""Get corresponding PDF and paramaters."""
fnc_class, param_names = self.PDFS[name]
def get_params(param_list):
out = {}
for param in param_list:
if param not in self.params:
if param == 'FL':
config = [0.6, 0, 1]
elif param == 'FS':
config = [0.1, 0, 1]
else:
config = [0.0, -2, 2]
self.params.update({param: zfit.Parameter(param, *config)})
out[param] = self.params[param]
return out
obs = self._spaces
def get_Kpi_pdf(name, model):
"""Set the corresponding PDF for the Kpi system and parameters."""
pdf_class, param_names, param_id = self.KPI[model]
def get_res(param_list, param_id):
out = {}
for param, idparam in zip(param_names, param_id):
if not any([i for i in self.params if param in i.lower()]):
m = Particle.from_pdgid(idparam).mass * 1e-03
w = Particle.from_pdgid(idparam).width * 1e-03
self.params.update({param+"_M": zfit.Parameter(param+"_M", m, floating=False)})
self.params.update({param+"_W": zfit.Parameter(param+"_W", w, floating=False)})
out[param+"_M"] = self.params[param+"_M"]
out[param+"_W"] = self.params[param+"_W"]
return out
# Add Kpi dimension
mKpi = zfit.Space(name, limits=(0.79555, 0.99555))
self._spaces *= mKpi
params = get_res(param_names, param_id)
pdf = pdf_class(obs=mKpi, params=params, name=model, q2_bin=(self.q2_min+self.q2_max)/2.)
return pdf
if name == 'PandSWave':
pdf_tot, params_tot = self.PDFS[name]
get_params(params_tot)
listPDF = dict.fromkeys(['PWave', 'SWave', 'PandSWaveInterference'])
for iPDF in listPDF:
ifncWave, iparamWave = self.PDFS[iPDF]
initParams = get_params(iparamWave)
listPDF[iPDF] = AngularPDF(obs=obs, params=initParams, func=ifncWave, name=iPDF)
if self.Kpi:
pdf_Kpi = get_Kpi_pdf(self.Kpi, iPDF)
listPDF[iPDF] *= pdf_Kpi
params = self.params
pdf = pdf_tot(obs=obs, PWave=listPDF["PWave"], SWave=listPDF["SWave"], PandSWaveInterference=listPDF["PandSWaveInterference"], FS=self.params['FS'])
if self.acc is not None:
pdf = AccPDF(pdf, self.acc, self.accmodel, obs=obs, dict_names=self._obs_names, q2_slice=self.q2_bin_acc, name=name)
else:
params = get_params(param_names)
pdf = AngularPDF(obs=obs, params=params, func=fnc_class, name=name)
if self.acc is not None:
pdf = AccPDF(pdf, self.acc, self.accmodel, obs=obs, dict_names=self._obs_names, q2_slice=self.q2_bin_acc, name=name)
if self.acc_integral:
tf_expr = sympy.lambdify(self.acc_integral.free_symbols, self.acc_integral, 'tensorflow')
integral = integral_acceptance(params, tf_expr)
pdf.register_analytic_integral(func=integral, limits=zfit.Space(axes=(0,1,2), limits=obs.limits))
if self.Kpi:
pdf_Kpi = get_Kpi_pdf(self.Kpi, name)
pdf = pdf * pdf_Kpi
params = self.params
obs = self._spaces
return obs, pdf, params

View File

@ -0,0 +1,63 @@
import inspect
import sympy
def angular_func_sympy(FL, AFB, S3, S4, S5, S7, S8, S9, costheta_l, costheta_k, phi):
sintheta_k = sympy.sqrt(1.0 - costheta_k * costheta_k)
sintheta_l = sympy.sqrt(1.0 - costheta_l * costheta_l)
sintheta_2k = (1.0 - costheta_k * costheta_k)
sintheta_2l = (1.0 - costheta_l * costheta_l)
sin2theta_k = (2.0 * sintheta_k * costheta_k)
cos2theta_l = (2.0 * costheta_l * costheta_l - 1.0)
sin2theta_l = (2.0 * sintheta_l * costheta_l)
pdf = ((3.0 / 4.0) * (1.0 - FL) * sintheta_2k +
FL * costheta_k * costheta_k +
(1.0 / 4.0) * (1.0 - FL) * sintheta_2k * cos2theta_l +
-1.0 * FL * costheta_k * costheta_k * cos2theta_l +
S3 * sintheta_2k * sintheta_2l * sympy.cos(2.0 * phi) +
S4 * sin2theta_k * sin2theta_l * sympy.cos(phi) +
S5 * sin2theta_k * sintheta_l * sympy.cos(phi) +
(4.0 / 3.0) * AFB * sintheta_2k * costheta_l +
S7 * sin2theta_k * sintheta_l * sympy.sin(phi) +
S8 * sin2theta_k * sin2theta_l * sympy.sin(phi) +
S9 * sintheta_2k * sintheta_2l * sympy.sin(2.0 * phi))
return pdf
""" kwargs = dict(
FL = sympy.Symbol('FL'),
AFB = sympy.Symbol('AFB'),
S3 = sympy.Symbol('S3'),
S4 = sympy.Symbol('S4'),
S5 = sympy.Symbol('S5'),
S7 = sympy.Symbol('S7'),
S8 = sympy.Symbol('S8'),
S9 = sympy.Symbol('S9'),
costheta_l = sympy.Symbol('costheta_l'),
costheta_k = sympy.Symbol('costheta_k'),
phi = sympy.Symbol('phi')
)
lower_ctl = sympy.Symbol('lower_ctl')
upper_ctl = sympy.Symbol('upper_ctl')
lower_ctk = sympy.Symbol('lower_ctk')
upper_ctk = sympy.Symbol('upper_ctk')
lower_phi = sympy.Symbol('lower_phi')
upper_phi = sympy.Symbol('upper_phi')
sympy_val = angular_func_sympy(**kwargs)
print('starting integration')
integral = sympy.integrate(sympy_val,
(kwargs['costheta_l'], -1, 1),
(kwargs['costheta_k'], -1, 1),
(kwargs['phi'], -sympy.pi, sympy.pi),)
# integral = sympy.integrate(sympy_val,
# kwargs['costheta_l'],
# kwargs['costheta_k'],
# kwargs['phi'])
print(integral)
fun = sympy.lambdify((lower_ctl, lower_ctk, lower_phi),integral, 'tensorflow')
print(inspect.getsource(fun)) """

View File

@ -0,0 +1,7 @@
LOWER_COSTHETAK_CUT=-1.00
UPPER_COSTHETAK_CUT=0.851
SYS_PATH=../../
SYS_PATH_LEVEL_1=../
GEN_FILE=../../KplusPi0Resolved_Run2_MC_EvtGen_FCNC.root
MC_FILE=../../KplusPi0Resolved_Run2_MC_FCNC.root
MC_PHSP_FILE=../../KplusPi0Resolved_Run2_PHSP_FCNC.root

View File

@ -0,0 +1,24 @@
name: angularanalysis
channels:
- defaults
dependencies:
- python=3.9.12
- iminuit
- numpy
- pandas
- matplotlib
- seaborn
- pytest
- pip
- pip:
- uproot==4.2.3
- awkward==1.8.0
- zfit
- prettytable
- PyRoot==0.3.0
- python-dotenv==0.20.0
- sympy
- uncertainties==3.1.6
- xgboost==1.6.1
- mplhep
- hep-ml

View File

@ -0,0 +1,29 @@
import os
import dotenv
import sys
import argparse
dotenv.load_dotenv('properties.env')
sys.path.insert(0, os.getenv('SYS_PATH_LEVEL_1'))
from rootio import RootIO
def main():
parser = argparse.ArgumentParser()
parser.add_argument('--file', dest = 'file', default = '')
parser.add_argument('--regex', dest = 'regex', default = '')
args = parser.parse_args()
FILE = args.file
regex = args.regex
rootio = RootIO(FILE)
if regex:
rootio.get_information(regex = regex)
return
rootio.get_information()
if __name__ == "__main__":
main()

View File

@ -0,0 +1,37 @@
import os
import re
import uproot
class RootIO:
def __init__(self, path_to_file: str):
if not os.path.exists(path = path_to_file):
raise ValueError("Path does not exists!")
self.path_to_file = path_to_file
def get_information(self, regex: str = None) -> None:
with uproot.open(self.path_to_file) as file:
print(f"File classnames:\n{file.classnames()}")
for classname, classtype in file.classnames().items():
print(f"Classname:\n{classname}")
print(f"Classtype:\n{classtype}")
print(f"Content of the class:")
for name, branch in file[classname].items():
print(f"{name}: {branch}")
if regex != None:
print(f"Regex search results:")
attributes = file[classname].keys()
for attribute in attributes:
result = self.__find_pattern_in_text(regex = regex, text = attribute)
if result != "":
print(f"{result}")
def __find_pattern_in_text(self, regex: str, text: str) -> str:
search_result = re.search(pattern = regex, string = text)
result = ""
if search_result != None:
result = search_result.group(0)
return result

View File

@ -0,0 +1,13 @@
class PathAggregator:
def __init__(self, basefolder: str, directory: str, file: str, year: str = None) -> None:
self.basefolder = basefolder
self.directory = directory
self.file = file
self.year = year
def get_path_to_file(self) -> str:
if self.year != None:
return f"./{self.basefolder}/{self.directory}/{self.year}/{self.file}"
return f"./{self.basefolder}/{self.directory}/{self.file}"

View File

@ -0,0 +1,20 @@
import numpy as np
import uproot
import typing
from dataclasses import dataclass
@dataclass
class FileManager:
file: str
tree: str
branches: list[str]
def extract_data(self) -> list[np.ndarray]:
with uproot.open(self.file) as file:
file_tree = file[self.tree]
data = []
for branch in self.branches:
data.append(file_tree[branch].array(library = "np"))
return data

View File

@ -0,0 +1,20 @@
import numpy as np
from hep_ml.reweight import BinsReweighter
def select_feature(feature: np.ndarray, limits: tuple[float, float]) -> tuple[np.ndarray, list]:
selection_indices = []
for index, value in enumerate(feature):
if value > limits[0] and value < limits[1]:
selection_indices.append(index)
return feature[selection_indices], selection_indices
def reweight_feature(original_feature: list, target_feature: list, n_bins: int, n_neighs: int = 2):
original_weights = np.ones(len(original_feature))
bin_reweighter = BinsReweighter(n_bins = n_bins, n_neighs = n_neighs)
bin_reweighter.fit(original = original_feature, target = target_feature, original_weight = original_weights)
return bin_reweighter.predict_weights(original = original_feature, original_weight = original_weights)

View File

@ -0,0 +1,22 @@
import matplotlib.pyplot as plt
import mplhep
mplhep.style.use("LHCb2")
def reweight_comparing_plot(original_feature: list, target_feature: list, weights: list, n_bins: int, suptitle: str, titles: list[str], save: str) -> None:
if len(original_feature) != len(weights):
raise ValueError(f"original_feature and weights need to have the same dimension!")
fig, ax = plt.subplots(nrows = 1, ncols = 3, figsize = (30, 12.5))
ax[0].hist(original_feature, bins = n_bins)
ax[0].set_ylabel("counts/a.u.")
ax[0].set_title(titles[0])
ax[1].hist(original_feature, bins = n_bins, weights = weights)
ax[1].set_title(titles[1])
ax[2].hist(target_feature, bins = n_bins)
ax[2].set_title(titles[2])
fig.suptitle(suptitle)
fig.savefig(f"{save}")