Browse Source

Python Scripts used for Angular Analysis for B+toK*+Pi0mumu

master
rskuza 2 years ago
parent
commit
0bc6296fb7
  1. 79
      Code/Scripts/Python Scripts/Generator Level Fit/angular_fit.py
  2. 2374
      Code/Scripts/Python Scripts/MC Fit/acc_3d_JpsiKstMC_4_bin.yaml
  3. 2374
      Code/Scripts/Python Scripts/MC Fit/acc_3d_JpsiKstMC_5_bin.yaml
  4. 2374
      Code/Scripts/Python Scripts/MC Fit/acc_3d_JpsiKstMC_6_bin.yaml
  5. 2374
      Code/Scripts/Python Scripts/MC Fit/acc_3d_JpsiKstMC_7_bin.yaml
  6. 2374
      Code/Scripts/Python Scripts/MC Fit/acc_3d_JpsiKstMC_reweighted_4_bin.yaml
  7. 73
      Code/Scripts/Python Scripts/MC Fit/angular_acceptance_modeling.py
  8. 77
      Code/Scripts/Python Scripts/MC Fit/angular_fit.py
  9. 83
      Code/Scripts/Python Scripts/MC Fit/diff_pair_tables_average_mc.py
  10. 81
      Code/Scripts/Python Scripts/MC Fit/diff_pair_tables_average_mc_reweighted.py
  11. 81
      Code/Scripts/Python Scripts/MC Fit/diff_pair_tables_mc_gen.py
  12. 94
      Code/Scripts/Python Scripts/MC Fit/reweighted_angular_acceptance_modeling.py
  13. 97
      Code/Scripts/Python Scripts/MC Fit/reweighted_angular_fit.py
  14. 97
      Code/Scripts/Python Scripts/MC Fit/table_average_to_compare.py
  15. 7
      Code/Scripts/Python Scripts/properties.env
  16. 208
      Code/Scripts/Python Scripts/requirements.txt
  17. 29
      Code/Scripts/Python Scripts/root_file_inspect.py
  18. 37
      Code/Scripts/Python Scripts/rootio.py
  19. 0
      Code/Scripts/hep_analytics/__init__.py
  20. 0
      Code/Scripts/hep_analytics/pathing/__init__.py
  21. 13
      Code/Scripts/hep_analytics/pathing/aggregate.py
  22. 0
      Code/Scripts/hep_analytics/processing/__init__.py
  23. 20
      Code/Scripts/hep_analytics/processing/extract.py
  24. 20
      Code/Scripts/hep_analytics/processing/transform.py
  25. 22
      Code/Scripts/hep_analytics/processing/visualisation.py

79
Code/Scripts/Python Scripts/Generator Level Fit/angular_fit.py

@ -0,0 +1,79 @@
import os
import dotenv
import argparse
import sys
import pandas as pd
import zfit
from prettytable import PrettyTable
dotenv.load_dotenv('../properties.env')
sys.path.insert(0, os.getenv('SYS_PATH'))
from b2kstll.models.angular import B2Kstll
from b2kstll.plot import plot_distributions
from hep_analytics.processing.extract import FileManager
from hep_analytics.processing.transform import select_feature
FILE = os.getenv('FILE_GEN')
def main():
parser = argparse.ArgumentParser()
parser.add_argument('--q2bin', dest ='q2bin', default = 0)
args = parser.parse_args()
Q2BIN = int(args.q2bin)
bin_ranges = [(0.25, 4.00), (4.00, 8.00), (11.00, 12.50), (15.00, 18.00), (1.10, 6.00), (1.1, 2.5), (2.5, 4.0), (4.0, 6.0), (6.0, 8.0)]
print(f"Selected Q2 Bin Range is {bin_ranges[Q2BIN]}")
file_manager = FileManager(file = FILE, tree = "Events",
branches = ["q2", "costhetak", "costhetal", "phi"])
data = file_manager.extract_data()
q2, costhetak, costhetal, phi = data[0], data[1], data[2], data[3]
q2, indices = select_feature(feature = q2, limits = bin_ranges[Q2BIN])
costhetak = costhetak[indices]
costhetal = costhetal[indices]
phi = phi[indices]
lower_costhetak_cut = float(os.getenv('LOWER_COSTHETAK_CUT'))
upper_costhetak_cut = float(os.getenv('UPPER_COSTHETAK_CUT'))
costhetak, indices = select_feature(feature = costhetak, limits = (lower_costhetak_cut, upper_costhetak_cut))
q2 = q2[indices]
costhetal = costhetal[indices]
phi = phi[indices]
angular_data = pd.DataFrame({'ctl': costhetal, 'ctk': costhetak, 'phi': phi})
angular_data.to_csv(f"angular_fit_generator_bin_{Q2BIN}.csv", index = False)
x = B2Kstll('ctl','ctk','phi')
obs, pdf, _ = x.get_pdf('PWave')
datazfit = zfit.Data.from_pandas(df = angular_data, obs = obs)
nll = zfit.loss.UnbinnedNLL(model = pdf, data = datazfit)
minimizer = zfit.minimize.Minuit()
result = minimizer.minimize(nll)
print(result)
print(result.params, pdf)
param_errors, _ = result.errors()
print(param_errors)
info_table = PrettyTable(["Variable", "Value", "Lower Error", "Upper Error"])
fit_labels = ["AFB", "FL", "S3", "S4", "S5", "S7", "S8", "S9"]
for name in fit_labels:
value = result.params[name]["value"]
lower = result.params[name]["minuit_minos"]["lower"]
upper = result.params[name]["minuit_minos"]["upper"]
info_table.add_row([name, value, lower, upper])
print(info_table)
plot_distributions(result, suffix = f"accPwavePDF_generator_level_bin_{Q2BIN}")
if __name__ == "__main__":
main()

2374
Code/Scripts/Python Scripts/MC Fit/acc_3d_JpsiKstMC_4_bin.yaml
File diff suppressed because it is too large
View File

2374
Code/Scripts/Python Scripts/MC Fit/acc_3d_JpsiKstMC_5_bin.yaml
File diff suppressed because it is too large
View File

2374
Code/Scripts/Python Scripts/MC Fit/acc_3d_JpsiKstMC_6_bin.yaml
File diff suppressed because it is too large
View File

2374
Code/Scripts/Python Scripts/MC Fit/acc_3d_JpsiKstMC_7_bin.yaml
File diff suppressed because it is too large
View File

2374
Code/Scripts/Python Scripts/MC Fit/acc_3d_JpsiKstMC_reweighted_4_bin.yaml
File diff suppressed because it is too large
View File

73
Code/Scripts/Python Scripts/MC Fit/angular_acceptance_modeling.py

@ -0,0 +1,73 @@
import os
import dotenv
import sys
import argparse
import mplhep
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
dotenv.load_dotenv('../properties.env')
sys.path.insert(0, os.getenv('SYS_PATH'))
from analysis.efficiency import get_efficiency_model_class
from hep_analytics.processing.extract import FileManager
from hep_analytics.processing.transform import select_feature
FILE_MC_PHSP = os.getenv('MC_PHSP_FILE')
def main():
parser = argparse.ArgumentParser()
parser.add_argument('--q2bin', dest ='q2bin', default = 0)
args = parser.parse_args()
Q2BIN = int(args.q2bin)
mplhep.style.use("LHCb2")
bin_ranges = [(0.25, 4.00), (4.00, 8.00), (11.00, 12.50), (15.00, 18.00), (1.10, 6.00), (1.1, 2.5), (2.5, 4.0), (4.0, 6.0), (6.0, 8.0)]
print(f"Selected Q2 Bin Range is {bin_ranges[Q2BIN]}")
filemanager = FileManager(file = FILE_MC_PHSP, tree = "Events", branches = ["q2", "costhetak", "costhetal", "phi"])
mc_phsp_data = filemanager.extract_data()
q2_mc_phsp, theta_k_mc_phsp, theta_l_mc_phsp, phi_mc_phsp = mc_phsp_data[0], mc_phsp_data[1], mc_phsp_data[2], mc_phsp_data[3]
q2_mc_phsp, indices = select_feature(feature = q2_mc_phsp, limits = bin_ranges[Q2BIN])
phi_mc_phsp = phi_mc_phsp[indices]
theta_l_mc_phsp = theta_l_mc_phsp[indices]
theta_k_mc_phsp = theta_k_mc_phsp[indices]
lower_costhetak_cut = float(os.getenv('LOWER_COSTHETAK_CUT'))
upper_costhetak_cut = float(os.getenv('UPPER_COSTHETAK_CUT'))
theta_k_mc_phsp, indices = select_feature(feature = theta_k_mc_phsp, limits = (lower_costhetak_cut, upper_costhetak_cut))
q2_mc_phsp = q2_mc_phsp[indices]
phi_mc_phsp = phi_mc_phsp[indices]
theta_l_mc_phsp = theta_l_mc_phsp[indices]
data_mc_phsp = [phi_mc_phsp, theta_l_mc_phsp, theta_k_mc_phsp, q2_mc_phsp]
df = pd.DataFrame({'ctl': data_mc_phsp[1], 'ctk': data_mc_phsp[2], 'phi': data_mc_phsp[0], 'q2': q2_mc_phsp})
orders = {"ctl": 4, "ctk": 6, "phi": 2}
ranges = {"ctl": [-1.0, 1.0], "ctk": [lower_costhetak_cut, upper_costhetak_cut], "phi": [-np.pi, np.pi]}
EffClass = get_efficiency_model_class('legendre')
eff = EffClass.fit(df, ['ctl', 'ctk', 'phi'], ranges = ranges,
legendre_orders = orders, calculate_cov = False, chunk_size = 2000)
out_file = eff.write_to_disk(f'acc_3d_JpsiKstMC_{Q2BIN}_bin.yaml')
print(out_file)
labels = {'ctl': r'$\cos \theta_L$', 'ctk': r'$\cos \theta_K$', 'phi': '$\phi$', 'q2': '$q^2$ [GeV$^2$]'}
for v in ['ctl', 'ctk', 'phi']:
plt.subplots(figsize = (15, 10))
plt.xlim(*ranges[v])
x, y = eff.project_efficiency(v, n_points = 1000)
plt.plot(x, y, 'b-')
plt.hist(df[v], density = True, bins = 50, color = 'grey', alpha = 0.5)
plt.ylabel("a.u.", horizontalalignment = 'right', y = 1.0)
plt.xlabel(labels[v], horizontalalignment = 'right', x = 1.0)
plt.savefig(f'acc_3d_JpsiKstMC_phsp_mc_{v}_{Q2BIN}_bin.pdf')
plt.close()
if __name__ == "__main__":
main()

77
Code/Scripts/Python Scripts/MC Fit/angular_fit.py

@ -0,0 +1,77 @@
import os
import dotenv
import sys
import argparse
import pandas as pd
import mplhep
import zfit
from prettytable import PrettyTable
dotenv.load_dotenv('../properties.env')
sys.path.insert(0, os.getenv('SYS_PATH'))
from b2kstll.models.angular import B2Kstll
from b2kstll.plot import plot_distributions
from hep_analytics.processing.extract import FileManager
from hep_analytics.processing.transform import select_feature
FILE_MC = os.getenv('MC_FILE')
def main():
parser = argparse.ArgumentParser()
parser.add_argument('--q2bin', dest ='q2bin', default = 0)
args = parser.parse_args()
Q2BIN = int(args.q2bin)
mplhep.style.use("LHCb2")
bin_ranges = [(0.25, 4.00), (4.00, 8.00), (11.00, 12.50), (15.00, 18.00), (1.10, 6.00), (1.1, 2.5), (2.5, 4.0), (4.0, 6.0), (6.0, 8.0)]
print(f"Selected Q2 Bin Range is {bin_ranges[Q2BIN]}")
filemanager = FileManager(file = FILE_MC, tree = "Events", branches = ["q2", "costhetak", "costhetal", "phi"])
mc_data = filemanager.extract_data()
q2_mc, theta_k_mc, theta_l_mc, phi_mc = mc_data[0], mc_data[1], mc_data[2], mc_data[3]
q2_mc, indices = select_feature(feature = q2_mc, limits = bin_ranges[Q2BIN])
phi_mc = phi_mc[indices]
theta_l_mc = theta_l_mc[indices]
theta_k_mc = theta_k_mc[indices]
lower_costhetak_cut = float(os.getenv('LOWER_COSTHETAK_CUT'))
upper_costhetak_cut = float(os.getenv('UPPER_COSTHETAK_CUT'))
theta_k_mc, indices = select_feature(feature = theta_k_mc, limits = (lower_costhetak_cut, upper_costhetak_cut))
q2_mc = q2_mc[indices]
phi_mc = phi_mc[indices]
theta_l_mc = theta_l_mc[indices]
angular_data = pd.DataFrame({'ctl': theta_l_mc, 'ctk': theta_k_mc, 'phi': phi_mc})
angular_data.to_csv(f"ang_fit_mc_{Q2BIN}_bin.csv", index = False)
x = B2Kstll('ctl','ctk','phi')
x.set_acc(f"./acc_3d_JpsiKstMC_{Q2BIN}_bin.yaml")
obs, pdf, params = x.get_pdf('PWave')
datazfit = zfit.Data.from_pandas(df = angular_data, obs = obs)
nll = zfit.loss.UnbinnedNLL(model = pdf, data = datazfit)
minimizer = zfit.minimize.Minuit()
result = minimizer.minimize(nll)
param_errors, _ = result.errors()
print(param_errors)
info_table = PrettyTable(["Variable", "Value", "Lower Error", "Upper Error"])
fit_labels = ["AFB", "FL", "S3", "S4", "S5", "S7", "S8", "S9"]
for name in fit_labels:
value = result.params[name]["value"]
lower = result.params[name]["minuit_minos"]["lower"]
upper = result.params[name]["minuit_minos"]["upper"]
info_table.add_row([name, value, lower, upper])
print(info_table)
plot_distributions(result, suffix = f"accPwavePDF_mc_ang_fit_{Q2BIN}_bin")
if __name__ == "__main__":
main()

83
Code/Scripts/Python Scripts/MC Fit/diff_pair_tables_average_mc.py

@ -0,0 +1,83 @@
import re
import numpy as np
from prettytable import PrettyTable
def main():
bin_labels = r"$1.10 < q^2 < 6.00$"
sample_1 = """+----------+------------------------+------------------------+-----------------------+\n
| Variable | Value | Lower Error | Upper Error |\n
+----------+------------------------+------------------------+-----------------------+\n
| AFB | -0.030693939006905927 | -0.0027448398251912667 | 0.0026690127045430573 |\n
| FL | 0.7641860897390124 | -0.0031519177028962353 | 0.00318403722094425 |\n
| S3 | -0.009793642767841643 | -0.0039500538800350165 | 0.003962284039325228 |\n
| S4 | -0.11948833798312776 | -0.005018301784506872 | 0.005112053156795224 |\n
| S5 | -0.14672854148123884 | -0.005077800231656763 | 0.0051383664423323315 |\n
| S7 | -0.0009713979621990259 | -0.005287904531482017 | 0.005275166744474821 |\n
| S8 | 0.003178551357900045 | -0.005257961171467131 | 0.005258879438751017 |\n
| S9 | 0.005757808359963191 | -0.004016272402049568 | 0.004019873761938199 |\n
+----------+------------------------+------------------------+-----------------------+""" # GENERATOR LEVEL
sample_2 = """+---------------------------------------------------------------------------------+\n
| q2_split_$1.10 < q^2 < 6.00$_average |\n
+----------+-----------------------+-----------------------+----------------------+\n
| Variable | Value | Lower Error | Upper Error |\n
+----------+-----------------------+-----------------------+----------------------+\n
| AFB | -0.03611162259371692 | -0.008731742137981694 | 0.008731742137981694 |\n
| FL | 0.7816031469957773 | -0.010041813091402643 | 0.010041813091402643 |\n
| S3 | -0.004913319742985065 | -0.010997797016445892 | 0.010997797016445892 |\n
| S4 | -0.11446696701061239 | -0.0177373887414066 | 0.0177373887414066 |\n
| S5 | -0.1162615465324004 | -0.016777187198089353 | 0.016777187198089353 |\n
| S7 | 0.013450446832457594 | -0.017351060768029697 | 0.017351060768029697 |\n
| S8 | 0.012943806403734165 | -0.018166933762978635 | 0.018166933762978635 |\n
| S9 | -0.015991133298640343 | -0.011753160273233804 | 0.011753160273233804 |\n
+----------+-----------------------+-----------------------+----------------------+""" # MC LEVEL
variables = ["AFB", "FL", "S3", "S4", "S5", "S7", "S8", "S9"]
select_regex = [".*AFB.*\d+", ".*FL.*\d+", ".*S3.*\d+", ".*S4.*\d+", ".*S5.*\d+", ".*S7.*\d+", ".*S8.*\d+", ".*S9.*\d+"]
value_regex = "[- ]\d+.\d+"
sample_table_1 = {'1': {'AFB': None, 'FL': None, 'S3': None, 'S4': None, 'S5': None, 'S7': None, 'S8': None, 'S9': None}}
sample_table_2 = {'1': {'AFB': None, 'FL': None, 'S3': None, 'S4': None, 'S5': None, 'S7': None, 'S8': None, 'S9': None}}
samples = [sample_1, sample_2]
for variable, regex in zip(variables, select_regex):
sample_1_splitted = samples[0].splitlines()
for line in sample_1_splitted:
subresult = re.match(regex, line)
if subresult != None:
substring = subresult.group(0)
result = re.findall(value_regex, substring)
value = np.float64(result[0])
lower_err = np.float64(result[1])
upper_err = np.float64(result[2])
sample_table_1['1'][variable] = (value, lower_err, upper_err)
sample_2_splitted = samples[1].splitlines()
for line in sample_2_splitted:
subresult = re.match(regex, line)
if subresult != None:
substring = subresult.group(0)
result = re.findall(value_regex, substring)
value = np.float64(result[0])
lower_err = np.float64(result[1])
upper_err = np.float64(result[2])
sample_table_2['1'][variable] = (value, lower_err, upper_err)
info_table = PrettyTable(["Variable", r"|MC Average - Gen|", "Lower Error", "Upper Error", "Stat. Significance"])
for name in variables:
index = '1'
delta_value = np.abs(sample_table_1[index][name][0] - sample_table_2[index][name][0])
quadratic_lower = -np.sqrt(sample_table_1[index][name][1]**2 + sample_table_2[index][name][1]**2)
quadratic_upper = np.sqrt(sample_table_1[index][name][2]**2 + sample_table_2[index][name][2]**2)
stat_significance = np.abs(delta_value / quadratic_lower)
info_table.add_row([name, delta_value, quadratic_lower, quadratic_upper, stat_significance])
info_table.title = f"{bin_labels} Average over 3 Bins in MC"
print(info_table)
if __name__ == "__main__":
main()

81
Code/Scripts/Python Scripts/MC Fit/diff_pair_tables_average_mc_reweighted.py

@ -0,0 +1,81 @@
import re
import numpy as np
from prettytable import PrettyTable
def main():
bin_labels = r"$1.10 < q^2 < 6.00$"
sample_1 = """+----------+------------------------+------------------------+-----------------------+\n
| Variable | Value | Lower Error | Upper Error |\n
+----------+------------------------+------------------------+-----------------------+\n
| AFB | -0.030693939006905927 | -0.0027448398251912667 | 0.0026690127045430573 |\n
| FL | 0.7641860897390124 | -0.0031519177028962353 | 0.00318403722094425 |\n
| S3 | -0.009793642767841643 | -0.0039500538800350165 | 0.003962284039325228 |\n
| S4 | -0.11948833798312776 | -0.005018301784506872 | 0.005112053156795224 |\n
| S5 | -0.14672854148123884 | -0.005077800231656763 | 0.0051383664423323315 |\n
| S7 | -0.0009713979621990259 | -0.005287904531482017 | 0.005275166744474821 |\n
| S8 | 0.003178551357900045 | -0.005257961171467131 | 0.005258879438751017 |\n
| S9 | 0.005757808359963191 | -0.004016272402049568 | 0.004019873761938199 |\n
+----------+------------------------+------------------------+-----------------------+""" # GENERATOR LEVEL
sample_2 = """+----------+-----------------------+------------------------+-----------------------+\n
| Variable | Value | Lower Error | Upper Error |\n
+----------+-----------------------+------------------------+-----------------------+\n
| AFB | -0.012550006150333942 | -0.0026956457681952418 | 0.0026965400452885245 |\n
| FL | 0.775244817820803 | -0.0030583204477219544 | 0.003052276555621123 |\n
| S3 | -0.004731791649125892 | -0.0035876206689923096 | 0.0035671656402230033 |\n
| S4 | -0.12872329698226329 | -0.0054353577466807685 | 0.005452103886085265 |\n
| S5 | -0.13102847916212362 | -0.005094396678852025 | 0.005083576918431511 |\n
| S7 | 0.014829855067600298 | -0.005251337297972407 | 0.005258433080994564 |\n
| S8 | 0.011919411650166327 | -0.005666331650738382 | 0.005691209183046813 |\n
| S9 | -0.013513044362093558 | -0.0036909184587611017 | 0.0036915155625497006 |\n
+----------+-----------------------+------------------------+-----------------------+""" # MC LEVEL
variables = ["AFB", "FL", "S3", "S4", "S5", "S7", "S8", "S9"]
select_regex = [".*AFB.*\d+", ".*FL.*\d+", ".*S3.*\d+", ".*S4.*\d+", ".*S5.*\d+", ".*S7.*\d+", ".*S8.*\d+", ".*S9.*\d+"]
value_regex = "[- ]\d+.\d+"
sample_table_1 = {'1': {'AFB': None, 'FL': None, 'S3': None, 'S4': None, 'S5': None, 'S7': None, 'S8': None, 'S9': None}}
sample_table_2 = {'1': {'AFB': None, 'FL': None, 'S3': None, 'S4': None, 'S5': None, 'S7': None, 'S8': None, 'S9': None}}
samples = [sample_1, sample_2]
for variable, regex in zip(variables, select_regex):
sample_1_splitted = samples[0].splitlines()
for line in sample_1_splitted:
subresult = re.match(regex, line)
if subresult != None:
substring = subresult.group(0)
result = re.findall(value_regex, substring)
value = np.float64(result[0])
lower_err = np.float64(result[1])
upper_err = np.float64(result[2])
sample_table_1['1'][variable] = (value, lower_err, upper_err)
sample_2_splitted = samples[1].splitlines()
for line in sample_2_splitted:
subresult = re.match(regex, line)
if subresult != None:
substring = subresult.group(0)
result = re.findall(value_regex, substring)
value = np.float64(result[0])
lower_err = np.float64(result[1])
upper_err = np.float64(result[2])
sample_table_2['1'][variable] = (value, lower_err, upper_err)
info_table = PrettyTable(["Variable", r"|MC reweighted - Gen|", "Lower Error", "Upper Error", "Stat. Significance"])
for name in variables:
index = '1'
delta_value = np.abs(sample_table_1[index][name][0] - sample_table_2[index][name][0])
quadratic_lower = -np.sqrt(sample_table_1[index][name][1]**2 + sample_table_2[index][name][1]**2)
quadratic_upper = np.sqrt(sample_table_1[index][name][2]**2 + sample_table_2[index][name][2]**2)
stat_significance = np.abs(delta_value / quadratic_lower)
info_table.add_row([name, delta_value, quadratic_lower, quadratic_upper, stat_significance])
info_table.title = f"{bin_labels}"
print(info_table)
if __name__ == "__main__":
main()

81
Code/Scripts/Python Scripts/MC Fit/diff_pair_tables_mc_gen.py

@ -0,0 +1,81 @@
import re
import numpy as np
from prettytable import PrettyTable
def main():
bin_labels = r"$1.10 < q^2 < 6.00$"
sample_1 = """+----------+------------------------+------------------------+-----------------------+\n
| Variable | Value | Lower Error | Upper Error |\n
+----------+------------------------+------------------------+-----------------------+\n
| AFB | -0.030693939006905927 | -0.0027448398251912667 | 0.0026690127045430573 |\n
| FL | 0.7641860897390124 | -0.0031519177028962353 | 0.00318403722094425 |\n
| S3 | -0.009793642767841643 | -0.0039500538800350165 | 0.003962284039325228 |\n
| S4 | -0.11948833798312776 | -0.005018301784506872 | 0.005112053156795224 |\n
| S5 | -0.14672854148123884 | -0.005077800231656763 | 0.0051383664423323315 |\n
| S7 | -0.0009713979621990259 | -0.005287904531482017 | 0.005275166744474821 |\n
| S8 | 0.003178551357900045 | -0.005257961171467131 | 0.005258879438751017 |\n
| S9 | 0.005757808359963191 | -0.004016272402049568 | 0.004019873761938199 |\n
+----------+------------------------+------------------------+-----------------------+""" # GENERATOR LEVEL
sample_2 = """+----------+-----------------------+-----------------------+----------------------+\n
| Variable | Value | Lower Error | Upper Error |\n
+----------+-----------------------+-----------------------+----------------------+\n
| AFB | -0.006858788562096526 | -0.006288887141153866 | 0.006202211516385528 |\n
| FL | 0.7741933027966281 | -0.007196527356496095 | 0.006980839734409447 |\n
| S3 | -0.005794055258693013 | -0.008257409210173627 | 0.008381420647898805 |\n
| S4 | -0.13252141252161223 | -0.012754778513953539 | 0.012393845350580537 |\n
| S5 | -0.13982805972042112 | -0.011883305368342971 | 0.011682110709131568 |\n
| S7 | 0.01499315459684105 | -0.012227400419826324 | 0.012191295064893278 |\n
| S8 | 0.010750628153590385 | -0.013096041377945507 | 0.013227158788846905 |\n
| S9 | -0.013084904237743692 | -0.00855475117009418 | 0.008616146883219716 |\n
+----------+-----------------------+-----------------------+----------------------+""" # MC LEVEL
variables = ["AFB", "FL", "S3", "S4", "S5", "S7", "S8", "S9"]
select_regex = [".*AFB.*\d+", ".*FL.*\d+", ".*S3.*\d+", ".*S4.*\d+", ".*S5.*\d+", ".*S7.*\d+", ".*S8.*\d+", ".*S9.*\d+"]
value_regex = "[- ]\d+.\d+"
sample_table_1 = {'1': {'AFB': None, 'FL': None, 'S3': None, 'S4': None, 'S5': None, 'S7': None, 'S8': None, 'S9': None}}
sample_table_2 = {'1': {'AFB': None, 'FL': None, 'S3': None, 'S4': None, 'S5': None, 'S7': None, 'S8': None, 'S9': None}}
samples = [sample_1, sample_2]
for variable, regex in zip(variables, select_regex):
sample_1_splitted = samples[0].splitlines()
for line in sample_1_splitted:
subresult = re.match(regex, line)
if subresult != None:
substring = subresult.group(0)
result = re.findall(value_regex, substring)
value = np.float64(result[0])
lower_err = np.float64(result[1])
upper_err = np.float64(result[2])
sample_table_1['1'][variable] = (value, lower_err, upper_err)
sample_2_splitted = samples[1].splitlines()
for line in sample_2_splitted:
subresult = re.match(regex, line)
if subresult != None:
substring = subresult.group(0)
result = re.findall(value_regex, substring)
value = np.float64(result[0])
lower_err = np.float64(result[1])
upper_err = np.float64(result[2])
sample_table_2['1'][variable] = (value, lower_err, upper_err)
info_table = PrettyTable(["Variable", r"|MC - Gen|", "Lower Error", "Upper Error", "Stat. Significance"])
for name in variables:
index = '1'
delta_value = np.abs(sample_table_1[index][name][0] - sample_table_2[index][name][0])
quadratic_lower = -np.sqrt(sample_table_1[index][name][1]**2 + sample_table_2[index][name][1]**2)
quadratic_upper = np.sqrt(sample_table_1[index][name][2]**2 + sample_table_2[index][name][2]**2)
stat_significance = np.abs(delta_value / quadratic_lower)
info_table.add_row([name, delta_value, quadratic_lower, quadratic_upper, stat_significance])
info_table.title = f"{bin_labels}"
print(info_table)
if __name__ == "__main__":
main()

94
Code/Scripts/Python Scripts/MC Fit/reweighted_angular_acceptance_modeling.py

@ -0,0 +1,94 @@
import os
import dotenv
import sys
import argparse
import mplhep
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
dotenv.load_dotenv('../properties.env')
sys.path.insert(0, os.getenv('SYS_PATH'))
from analysis.efficiency import get_efficiency_model_class
from hep_analytics.processing.extract import FileManager
from hep_analytics.processing.transform import select_feature, reweight_feature
from hep_analytics.processing.visualisation import reweight_comparing_plot
FILE_GEN = os.getenv('GEN_FILE')
FILE_MC_PHSP = os.getenv('MC_PHSP_FILE')
def main():
parser = argparse.ArgumentParser()
parser.add_argument('--q2bin', dest ='q2bin', default = 0)
args = parser.parse_args()
Q2BIN = int(args.q2bin)
mplhep.style.use("LHCb2")
bin_ranges = [(0.25, 4.00), (4.00, 8.00), (11.00, 12.50), (15.00, 18.00), (1.10, 6.00), (1.1, 2.5), (2.5, 4.0), (4.0, 6.0), (6.0, 8.0)]
print(f"Selected Q2 Bin Range is {bin_ranges[Q2BIN]}")
bin_labels = [r"$0.25 < q^2 < 4.00$", r"$4.00 < q^2 < 8.00$", r"$11.00 < q^2 < 12.50$",
r"$15.00 < q^2 < 18.00$", r"$1.10 < q^2 < 6.00$",
r"$1.1 < q^2 < 2.5$", r"$2.5 < q^2 < 4.0$", r"$4.0 < q^2 < 6.0$", r"$6.0 < q^2 < 8.0$"]
filemanager = FileManager(file = FILE_MC_PHSP, tree = "Events", branches = ["q2", "costhetak", "costhetal", "phi"])
mc_phsp_data = filemanager.extract_data()
q2_mc_phsp, theta_k_mc_phsp, theta_l_mc_phsp, phi_mc_phsp = mc_phsp_data[0], mc_phsp_data[1], mc_phsp_data[2], mc_phsp_data[3]
q2_mc_phsp, indices = select_feature(feature = q2_mc_phsp, limits = bin_ranges[Q2BIN])
phi_mc_phsp = phi_mc_phsp[indices]
theta_l_mc_phsp = theta_l_mc_phsp[indices]
theta_k_mc_phsp = theta_k_mc_phsp[indices]
lower_costhetak_cut = float(os.getenv('LOWER_COSTHETAK_CUT'))
upper_costhetak_cut = float(os.getenv('UPPER_COSTHETAK_CUT'))
theta_k_mc_phsp, indices = select_feature(feature = theta_k_mc_phsp, limits = (lower_costhetak_cut, upper_costhetak_cut))
q2_mc_phsp = q2_mc_phsp[indices]
phi_mc_phsp = phi_mc_phsp[indices]
theta_l_mc_phsp = theta_l_mc_phsp[indices]
filemanager = FileManager(file = FILE_GEN, tree = "Events", branches = ["q2", "costhetak", "costhetal", "phi"])
gen_data = filemanager.extract_data()
q2_gen, theta_k_gen, theta_l_gen, phi_gen = gen_data[0], gen_data[1], gen_data[2], gen_data[3]
q2_gen, indices = select_feature(feature = q2_gen, limits = bin_ranges[Q2BIN])
phi_gen = phi_gen[indices]
theta_l_gen = theta_l_gen[indices]
theta_k_gen = theta_k_gen[indices]
theta_k_gen, indices = select_feature(feature = theta_k_gen, limits = (lower_costhetak_cut, upper_costhetak_cut))
q2_gen = q2_gen[indices]
phi_gen = phi_gen[indices]
theta_l_gen = theta_l_gen[indices]
q2_mc_phsp_weights = reweight_feature(original_feature = q2_mc_phsp, target_feature = q2_gen, n_bins = 25)
reweight_comparing_plot(original_feature = q2_mc_phsp, target_feature = q2_gen, weights = q2_mc_phsp_weights,
n_bins = 25, suptitle = f"{bin_labels[Q2BIN]}",
titles = ["Q2 MC PHSP", "Q2 MC PHSP (reweighted)", "Q2 Gen"], save = f"reweighted_q2_gen_mc_phsp_{Q2BIN}_bin.png")
df = pd.DataFrame({'ctl': theta_l_mc_phsp, 'ctk': theta_k_mc_phsp, 'phi': phi_mc_phsp, 'q2': q2_mc_phsp, 'weights': q2_mc_phsp_weights})
orders = {"ctl": 4, "ctk": 6, "phi": 2}
ranges = {"ctl": [-1.0, 1.0], "ctk": [lower_costhetak_cut, upper_costhetak_cut], "phi": [-np.pi, np.pi]}
EffClass = get_efficiency_model_class('legendre')
eff = EffClass.fit(df, ['ctl', 'ctk', 'phi'], weight_var = "weights", ranges = ranges,
legendre_orders = orders, calculate_cov = False, chunk_size = 2000)
out_file = eff.write_to_disk(f'acc_3d_JpsiKstMC_reweighted_{Q2BIN}_bin.yaml')
print(out_file)
labels = {'ctl': r'$\cos \theta_L$', 'ctk': r'$\cos \theta_K$', 'phi': '$\phi$', 'q2': '$q^2$ [GeV$^2$]'}
for v in ['ctl', 'ctk', 'phi']:
fig, ax = plt.subplots(figsize = (15, 10))
plt.xlim(*ranges[v])
x, y = eff.project_efficiency(v, n_points = 1000)
plt.plot(x, y, 'b-')
plt.hist(df[v], density = True, bins = 50, color = 'grey', alpha = 0.5)
plt.ylabel("a.u.", horizontalalignment = 'right', y = 1.0)
plt.xlabel(labels[v], horizontalalignment = 'right', x = 1.0)
plt.savefig(f'acc_3d_JpsiKstMC_{v}_{Q2BIN}_bin.pdf')
plt.close()
if __name__ == "__main__":
main()

97
Code/Scripts/Python Scripts/MC Fit/reweighted_angular_fit.py

@ -0,0 +1,97 @@
import os
import dotenv
import sys
import argparse
import pandas as pd
import mplhep
import zfit
from prettytable import PrettyTable
dotenv.load_dotenv('../properties.env')
sys.path.insert(0, os.getenv('SYS_PATH'))
from b2kstll.models.angular import B2Kstll
from b2kstll.plot import plot_distributions
from hep_analytics.processing.extract import FileManager
from hep_analytics.processing.transform import select_feature, reweight_feature
from hep_analytics.processing.visualisation import reweight_comparing_plot
FILE_MC = os.getenv('MC_FILE')
FILE_GEN = os.getenv('GEN_FILE')
def main():
parser = argparse.ArgumentParser()
parser.add_argument('--q2bin', dest ='q2bin', default = 0)
args = parser.parse_args()
Q2BIN = int(args.q2bin)
mplhep.style.use("LHCb2")
bin_ranges = [(0.25, 4.00), (4.00, 8.00), (11.00, 12.50), (15.00, 18.00), (1.10, 6.00), (1.1, 2.5), (2.5, 4.0), (4.0, 6.0), (6.0, 8.0)]
print(f"Selected Q2 Bin Range is {bin_ranges[Q2BIN]}")
bin_labels = [r"$0.25 < q^2 < 4.00$", r"$4.00 < q^2 < 8.00$", r"$11.00 < q^2 < 12.50$",
r"$15.00 < q^2 < 18.00$", r"$1.10 < q^2 < 6.00$",
r"$1.1 < q^2 < 2.5$", r"$2.5 < q^2 < 4.0$", r"$4.0 < q^2 < 6.0$", r"$6.0 < q^2 < 8.0$"]
filemanager = FileManager(file = FILE_MC, tree = "Events", branches = ["q2", "costhetak", "costhetal", "phi"])
mc_data = filemanager.extract_data()
q2_mc, theta_k_mc, theta_l_mc, phi_mc = mc_data[0], mc_data[1], mc_data[2], mc_data[3]
q2_mc, indices = select_feature(feature = q2_mc, limits = bin_ranges[Q2BIN])
phi_mc = phi_mc[indices]
theta_l_mc = theta_l_mc[indices]
theta_k_mc = theta_k_mc[indices]
lower_costhetak_cut = float(os.getenv('LOWER_COSTHETAK_CUT'))
upper_costhetak_cut = float(os.getenv('UPPER_COSTHETAK_CUT'))
theta_k_mc, indices = select_feature(feature = theta_k_mc, limits = (lower_costhetak_cut, upper_costhetak_cut))
q2_mc = q2_mc[indices]
phi_mc = phi_mc[indices]
theta_l_mc = theta_l_mc[indices]
filemanager = FileManager(file = FILE_GEN, tree = "Events", branches = ["q2", "costhetak", "costhetal", "phi"])
gen_data = filemanager.extract_data()
q2_gen, theta_k_gen, theta_l_gen, phi_gen = gen_data[0], gen_data[1], gen_data[2], gen_data[3]
q2_gen, indices = select_feature(feature = q2_gen, limits = bin_ranges[Q2BIN])
phi_gen = phi_gen[indices]
theta_l_gen = theta_l_gen[indices]
theta_k_gen = theta_k_gen[indices]
theta_k_gen, indices = select_feature(feature = theta_k_gen, limits = (lower_costhetak_cut, upper_costhetak_cut))
q2_gen = q2_gen[indices]
phi_gen = phi_gen[indices]
theta_l_gen = theta_l_gen[indices]
angular_data = pd.DataFrame({'ctl': theta_l_mc, 'ctk': theta_k_mc, 'phi': phi_mc})
angular_data.to_csv(f"ang_fit_mc_q2_bins_{Q2BIN}_bin.csv", index = False)
x = B2Kstll('ctl','ctk','phi')
x.set_acc(f"./acc_3d_JpsiKstMC_reweighted_4_bin.yaml")
obs, pdf, _ = x.get_pdf('PWave')
q2_mc_weights = reweight_feature(original_feature = q2_mc, target_feature = q2_gen, n_bins = 25)
reweight_comparing_plot(original_feature = q2_mc, target_feature = q2_gen, weights = q2_mc_weights,
n_bins = 25, suptitle = f"{bin_labels[Q2BIN]}", titles = ["Q2 MC", "Q2 MC (reweighted)", "Q2 Gen"], save = "reweighted_q2_gen_mc.png")
datazfit = zfit.Data.from_pandas(df = angular_data, obs = obs, weights = q2_mc_weights)
nll = zfit.loss.UnbinnedNLL(model = pdf, data = datazfit)
minimizer = zfit.minimize.Minuit()
result = minimizer.minimize(nll)
param_errors, _ = result.errors()
print(param_errors)
info_table = PrettyTable(["Variable", "Value", "Lower Error", "Upper Error"])
fit_labels = ["AFB", "FL", "S3", "S4", "S5", "S7", "S8", "S9"]
for name in fit_labels:
value = result.params[name]["value"]
lower = result.params[name]["minuit_minos"]["lower"]
upper = result.params[name]["minuit_minos"]["upper"]
info_table.add_row([name, value, lower, upper])
print(info_table)
plot_distributions(result, suffix = f"accPwavePDF_{Q2BIN}_bin_reweighted_mc")
if __name__ == "__main__":
main()

97
Code/Scripts/Python Scripts/MC Fit/table_average_to_compare.py

@ -0,0 +1,97 @@
import re
import numpy as np
from prettytable import PrettyTable
def main():
table_1 = """+----------+-----------------------+-----------------------+----------------------+\n
| Variable | Value | Lower Error | Upper Error |\n
+----------+-----------------------+-----------------------+----------------------+\n
| AFB | -0.1327941714665493 | -0.011751983569979923 | 0.011695518975582573 |\n
| FL | 0.778784190310989 | -0.013786738313616998 | 0.01334631516152801 |\n
| S3 | 0.012016752919524067 | -0.014186281678810328 | 0.01390470699489175 |\n
| S4 | 0.0030912112690471196 | -0.02494551503535478 | 0.02500682819273369 |\n
| S5 | 0.06257482631549156 | -0.022576669869741432 | 0.02267902660423071 |\n
| S7 | 0.026461647341068015 | -0.022870493984060422 | 0.022709046371542273 |\n
| S8 | 0.022528200778115424 | -0.024500119399894143 | 0.02444883875849664 |\n
| S9 | -0.010047596608480971 | -0.015100958783891871 | 0.01497061747614531 |\n
+----------+-----------------------+-----------------------+----------------------+"""
table_2 = """+----------+-----------------------+-----------------------+----------------------+\n
| Variable | Value | Lower Error | Upper Error |\n
+----------+-----------------------+-----------------------+----------------------+\n
| AFB | -0.044313915998597596 | -0.011266005942710903 | 0.011376116194169396 |\n
| FL | 0.8080898316779715 | -0.012903371237386076 | 0.012627755576193413 |\n
| S3 | -0.001350693384098943 | -0.01425102196417036 | 0.014014236056631763 |\n
| S4 | -0.16348388538343442 | -0.022982703026786865 | 0.02353050491741336 |\n
| S5 | -0.12998638915138938 | -0.02207623402690808 | 0.02230804022923196 |\n
| S7 | -0.019175072605277906 | -0.022219297227352726 | 0.02233298418372788 |\n
| S8 | 0.03896504393685167 | -0.02306667997644898 | 0.023056355590581525 |\n
| S9 | -0.026410777123662296 | -0.01497373735832672 | 0.015237714795859537 |\n
+----------+-----------------------+-----------------------+----------------------+"""
table_3 = """+----------+-----------------------+-----------------------+----------------------+\n
| Variable | Value | Lower Error | Upper Error |\n
+----------+-----------------------+-----------------------+----------------------+\n
| AFB | 0.06877321968399613 | -0.00873785505692959 | 0.008866035437567673 |\n
| FL | 0.7579354189983714 | -0.01048129883366948 | 0.01017491937670441 |\n
| S3 | -0.02540601876438032 | -0.012075664333196083 | 0.012192632205060838 |\n
| S4 | -0.18300822691744986 | -0.015545142299112552 | 0.016136099542591972 |\n
| S5 | -0.28137307676130335 | -0.0160784362375397 | 0.01630021297009425 |\n
| S7 | 0.033064765761582675 | -0.01837491321971226 | 0.018457429889962827 |\n
| S8 | -0.022661825503764606 | -0.018831356033962507 | 0.018816177761568176 |\n
| S9 | -0.01151502616377776 | -0.012778180225448249 | 0.013091600028830737 |\n
+----------+-----------------------+-----------------------+----------------------+"""
bin_labels = [r"$0.25 < q^2 < 4.00$", r"$4.00 < q^2 < 8.00$", r"$11.00 < q^2 < 12.50$",
r"$15.00 < q^2 < 18.00$", r"$1.10 < q^2 < 6.00$",
r"$1.1 < q^2 < 2.5$", r"$2.5 < q^2 < 4.0$", r"$4.0 < q^2 < 6.0$", r"$6.0 < q^2 < 8.0$"]
tables = [table_1, table_2, table_3]
variables = ["AFB", "FL", "S3", "S4", "S5", "S7", "S8", "S9"]
select_regex = [".*AFB.*\d+", ".*FL.*\d+", ".*S3.*\d+", ".*S4.*\d+", ".*S5.*\d+", ".*S7.*\d+", ".*S8.*\d+", ".*S9.*\d+"]
value_regex = "[- ]\d+.\d+"
samples = {'1': {'AFB': None, 'FL': None, 'S3': None, 'S4': None, 'S5': None, 'S7': None, 'S8': None, 'S9': None},
'2': {'AFB': None, 'FL': None, 'S3': None, 'S4': None, 'S5': None, 'S7': None, 'S8': None, 'S9': None},
'3': {'AFB': None, 'FL': None, 'S3': None, 'S4': None, 'S5': None, 'S7': None, 'S8': None, 'S9': None},
}
for index in range(len(tables)):
for variable, regex in zip(variables, select_regex):
splitted = tables[index].splitlines()
for line in splitted:
subresult = re.match(regex, line)
if subresult != None:
substring = subresult.group(0)
result = re.findall(value_regex, substring)
value = np.float64(result[0])
lower_err = np.float64(result[1])
upper_err = np.float64(result[2])
samples[str(index + 1)][variable] = (value, lower_err, upper_err)
fit_variables_split = [[] for _ in range(8)]
fit_errors_split = [[] for _ in range(8)]
for value in samples.values():
count = 0
for fitvalue in value.values():
fit_variables_split[count].append(fitvalue[0])
fit_errors_split[count].append(np.sqrt(fitvalue[1]**2 + fitvalue[2]**2))
count += 1
info_table = PrettyTable(["Variable", "Value", "Lower Error", "Upper Error"])
info_table.title = f"q2_split_{bin_labels[4]}_average"
fit_labels = ["AFB", "FL", "S3", "S4", "S5", "S7", "S8", "S9"]
count = 0
for name, fit_split_var, fit_err_split in zip(fit_labels, fit_variables_split, fit_errors_split):
fit_avg = np.mean(fit_split_var)
error_split = 1/3 * np.sqrt(fit_err_split[0]**2 + fit_err_split[1]**2 + fit_err_split[2]**2)
info_table.add_row([name, fit_avg, -error_split, error_split])
count += 1
print(info_table)
if __name__ == "__main__":
main()

7
Code/Scripts/Python Scripts/properties.env

@ -0,0 +1,7 @@
LOWER_COSTHETAK_CUT=-1.00
UPPER_COSTHETAK_CUT=0.851
SYS_PATH=../../
SYS_PATH_LEVEL_1=../
GEN_FILE=../../KplusPi0Resolved_Run2_MC_EvtGen_FCNC.root
MC_FILE=../../KplusPi0Resolved_Run2_MC_FCNC.root
MC_PHSP_FILE="../../KplusPi0Resolved_Run2_PHSP_FCNC.root"

208
Code/Scripts/Python Scripts/requirements.txt

@ -0,0 +1,208 @@
absl-py==0.15.0
aenum==3.1.11
algopy==0.5.7
analysis @ git+https://github.com/mayou36/analysis-tools@5db2f64e48cac26954d182b62eea7665d26847d2
appdirs==1.4.4
argon2-cffi @ file:///home/conda/feedstock_root/build_artifacts/argon2-cffi_1640817743617/work
argon2-cffi-bindings @ file:///home/conda/feedstock_root/build_artifacts/argon2-cffi-bindings_1649500321618/work
asdf==2.11.1
asdf-standard==1.0.2
asdf-transform-schemas==0.2.2
asttokens @ file:///home/conda/feedstock_root/build_artifacts/asttokens_1618968359944/work
astunparse==1.6.3
attrs @ file:///home/conda/feedstock_root/build_artifacts/attrs_1640799537051/work
awkward==1.8.0
awkward0==0.15.5
-e git+https://gitlab.cern.ch/LHCb-RD/ewp-bd2ksteeangular-legacy.git@c5e306ac36ce7de31ff8c0e2f1833c1d3b2e1dcb#egg=b2kstll&subdirectory=b2kstll
backcall @ file:///home/conda/feedstock_root/build_artifacts/backcall_1592338393461/work
backports.functools-lru-cache @ file:///home/conda/feedstock_root/build_artifacts/backports.functools_lru_cache_1618230623929/work
beautifulsoup4 @ file:///home/conda/feedstock_root/build_artifacts/beautifulsoup4_1649463573192/work
bleach @ file:///home/conda/feedstock_root/build_artifacts/bleach_1649361991009/work
cachetools==4.2.4
certifi==2021.10.8
cffi @ file:///home/conda/feedstock_root/build_artifacts/cffi_1636046055389/work
charset-normalizer==2.0.12
ckmutil==0.3.2
clang==5.0
cloudpickle==2.0.0
colorama @ file:///home/conda/feedstock_root/build_artifacts/colorama_1602866480661/work
colored==1.4.3
colorlog==6.6.0
ConfigArgParse==1.5.3
connection-pool==0.0.3
contextlib2==21.6.0
cycler==0.11.0
datrie==0.8.2
debugpy @ file:///home/conda/feedstock_root/build_artifacts/debugpy_1649586347330/work
decorator @ file:///home/conda/feedstock_root/build_artifacts/decorator_1641555617451/work
defusedxml @ file:///home/conda/feedstock_root/build_artifacts/defusedxml_1615232257335/work
dm-tree==0.1.7
docutils==0.18.1
dotmap==1.3.30
dpath==2.0.6
entrypoints @ file:///home/conda/feedstock_root/build_artifacts/entrypoints_1643888246732/work
executing @ file:///home/conda/feedstock_root/build_artifacts/executing_1646044401614/work
fasteners==0.17.3
fastjsonschema @ file:///home/conda/feedstock_root/build_artifacts/python-fastjsonschema_1641751198313/work/dist
flatbuffers==1.12
flavio==2.3.3
flit_core @ file:///home/conda/feedstock_root/build_artifacts/flit-core_1645629044586/work/source/flit_core
fonttools==4.33.3
formulate==0.1.1
future==0.18.2
gast==0.4.0
gitdb==4.0.9
GitPython==3.1.27
google-auth==1.35.0
google-auth-oauthlib==0.4.6
google-pasta==0.2.0
grpcio==1.46.1
h5py==3.1.0
hep-ml==0.7.1
hepstats==0.5.0
hepunits==2.2.0
idna==3.3
iminuit @ file:///home/conda/feedstock_root/build_artifacts/iminuit_1649770305045/work
importlib-metadata @ file:///home/conda/feedstock_root/build_artifacts/importlib-metadata_1648728288044/work
importlib-resources @ file:///home/conda/feedstock_root/build_artifacts/importlib_resources_1650169872758/work
iniconfig==1.1.1
ipykernel @ file:///home/conda/feedstock_root/build_artifacts/ipykernel_1649684276995/work/dist/ipykernel-6.13.0-py3-none-any.whl
ipyopt==0.11.0
ipyparallel @ file:///home/conda/feedstock_root/build_artifacts/ipyparallel_1652183878568/work
ipython @ file:///home/conda/feedstock_root/build_artifacts/ipython_1651238283435/work
ipython-genutils==0.2.0
jedi @ file:///home/conda/feedstock_root/build_artifacts/jedi_1649067096717/work
Jinja2 @ file:///home/conda/feedstock_root/build_artifacts/jinja2_1651774399431/work
jmespath==1.0.0
joblib==1.1.0
jsonschema @ file:///home/conda/feedstock_root/build_artifacts/jsonschema-meta_1651798819471/work
jupyter-client @ file:///home/conda/feedstock_root/build_artifacts/jupyter_client_1652061014773/work
jupyter-core @ file:///home/conda/feedstock_root/build_artifacts/jupyter_core_1652365269273/work
jupyterlab-pygments @ file:///home/conda/feedstock_root/build_artifacts/jupyterlab_pygments_1649936611996/work
keras==2.6.0
Keras-Preprocessing==1.1.2
kiwisolver==1.4.2
llvmlite==0.38.0
Markdown==3.3.7
MarkupSafe @ file:///home/conda/feedstock_root/build_artifacts/markupsafe_1648737556467/work
matplotlib==3.5.2
matplotlib-inline @ file:///home/conda/feedstock_root/build_artifacts/matplotlib-inline_1631080358261/work
metakernel @ file:///home/conda/feedstock_root/build_artifacts/metakernel_1648594625035/work
mistune @ file:///home/conda/feedstock_root/build_artifacts/mistune_1635844679781/work
mplhep==0.3.23
mplhep-data==0.0.3
mpmath==1.2.1
nbclient @ file:///home/conda/feedstock_root/build_artifacts/nbclient_1652155694429/work
nbconvert @ file:///home/conda/feedstock_root/build_artifacts/nbconvert-meta_1649676641343/work
nbformat @ file:///home/conda/feedstock_root/build_artifacts/nbformat_1651607001005/work
nest-asyncio @ file:///home/conda/feedstock_root/build_artifacts/nest-asyncio_1648959695634/work
nlopt==2.7.0
notebook @ file:///home/conda/feedstock_root/build_artifacts/notebook_1650363291341/work
numba @ file:///home/conda/feedstock_root/build_artifacts/numba_1652226553013/work
numdifftools==0.9.40
numexpr==2.8.1
numpy==1.19.5
oauthlib==3.2.0
opt-einsum==3.3.0
ordered-set==4.1.0
packaging @ file:///home/conda/feedstock_root/build_artifacts/packaging_1637239678211/work
pandas==1.4.2
pandocfilters @ file:///home/conda/feedstock_root/build_artifacts/pandocfilters_1631603243851/work
parso @ file:///home/conda/feedstock_root/build_artifacts/parso_1638334955874/work
particle==0.20.1
patsy==0.5.2
pexpect @ file:///home/conda/feedstock_root/build_artifacts/pexpect_1602535608087/work
pickleshare @ file:///home/conda/feedstock_root/build_artifacts/pickleshare_1602536217715/work
Pillow==9.1.0
plac==1.3.5
pluggy==1.0.0
portalocker @ file:///home/conda/feedstock_root/build_artifacts/portalocker_1645465641398/work
prettytable==3.3.0
progressbar2==4.0.0
prometheus-client @ file:///home/conda/feedstock_root/build_artifacts/prometheus_client_1649447152425/work
prompt-toolkit @ file:///home/conda/feedstock_root/build_artifacts/prompt-toolkit_1649130487073/work
protobuf==3.20.1
psutil @ file:///home/conda/feedstock_root/build_artifacts/psutil_1648872973590/work
ptyprocess @ file:///home/conda/feedstock_root/build_artifacts/ptyprocess_1609419310487/work/dist/ptyprocess-0.7.0-py2.py3-none-any.whl
PuLP==2.6.0
pure-eval @ file:///home/conda/feedstock_root/build_artifacts/pure_eval_1642875951954/work
py==1.11.0
pyasn1==0.4.8
pyasn1-modules==0.2.8
pycparser @ file:///home/conda/feedstock_root/build_artifacts/pycparser_1636257122734/work
Pygments @ file:///home/conda/feedstock_root/build_artifacts/pygments_1650904496387/work
pylha==0.4
pyparsing @ file:///home/conda/feedstock_root/build_artifacts/pyparsing_1652235407899/work
PyRoot==0.3.0
pyrsistent @ file:///home/conda/feedstock_root/build_artifacts/pyrsistent_1649013355209/work
pytest==7.1.2
python-dateutil @ file:///home/conda/feedstock_root/build_artifacts/python-dateutil_1626286286081/work
python-dotenv==0.20.0
python-utils==3.2.2
pytz==2022.1
PyYAML==6.0
pyzmq @ file:///home/conda/feedstock_root/build_artifacts/pyzmq_1649055645150/work
ratelimiter==1.2.0.post0
requests==2.27.1
requests-oauthlib==1.3.1
retry==0.9.2
root-numpy @ file:///home/conda/feedstock_root/build_artifacts/root_numpy_1642275926538/work
root-pandas==0.7.0
rsa==4.8
rundec==0.5.2
scikit-learn==1.1.0
scipy==1.8.0
seaborn==0.11.2
semantic-version==2.9.0
Send2Trash @ file:///home/conda/feedstock_root/build_artifacts/send2trash_1628511208346/work
six==1.15.0
smart-open==6.0.0
smmap==5.0.0
snakemake==7.6.2
soupsieve @ file:///home/conda/feedstock_root/build_artifacts/soupsieve_1638550740809/work
stack-data @ file:///home/conda/feedstock_root/build_artifacts/stack_data_1644872665635/work
statsmodels==0.13.2
stopit==1.1.2
sympy==1.10.1
tables==3.7.0
tabulate==0.8.9
tensorboard==2.6.0
tensorboard-data-server==0.6.1
tensorboard-plugin-wit==1.8.1
tensorflow==2.6.3
tensorflow-addons==0.16.1
tensorflow-estimator==2.6.0
tensorflow-gpu==2.6.3
tensorflow-graphics-gpu==1.0.0
tensorflow-probability==0.13.0
termcolor==1.1.0
terminado @ file:///home/conda/feedstock_root/build_artifacts/terminado_1649103779998/work
texttable==1.6.4
tf-quant-finance==0.0.1.dev30
Theano==1.0.5
threadpoolctl==3.1.0
tinycss2 @ file:///home/conda/feedstock_root/build_artifacts/tinycss2_1637612658783/work
tomli==2.0.1
toposort==1.7
tornado @ file:///home/conda/feedstock_root/build_artifacts/tornado_1648827245914/work
tqdm @ file:///home/conda/feedstock_root/build_artifacts/tqdm_1649051611147/work
traitlets @ file:///home/conda/feedstock_root/build_artifacts/traitlets_1652201717608/work
typeguard==2.13.3
typing-extensions==3.10.0.2
uhi==0.3.1
uncertainties==3.1.6
uproot==4.2.3
uproot3-methods==0.10.1
urllib3==1.26.9
voluptuous==0.13.1
wcwidth @ file:///home/conda/feedstock_root/build_artifacts/wcwidth_1600965781394/work
webencodings==0.5.1
Werkzeug==2.1.2
wilson==2.2.1
wrapt==1.12.1
xgboost==1.6.1
xrootd==5.4.2
yamlloader==1.1.0
yte==1.4.0
zfit==0.8.3
zipp @ file:///home/conda/feedstock_root/build_artifacts/zipp_1649012893348/work

29
Code/Scripts/Python Scripts/root_file_inspect.py

@ -0,0 +1,29 @@
import os
import dotenv
import sys
import argparse
dotenv.load_dotenv('properties.env')
sys.path.insert(0, os.getenv('SYS_PATH_LEVEL_1'))
from rootio import RootIO
def main():
parser = argparse.ArgumentParser()
parser.add_argument('--file', dest = 'file', default = '')
parser.add_argument('--regex', dest = 'regex', default = '')
args = parser.parse_args()
FILE = args.file
regex = args.regex
rootio = RootIO(FILE)
if regex:
rootio.get_information(regex = regex)
return
rootio.get_information()
if __name__ == "__main__":
main()

37
Code/Scripts/Python Scripts/rootio.py

@ -0,0 +1,37 @@
import os
import re
import uproot
class RootIO:
def __init__(self, path_to_file: str):
if not os.path.exists(path = path_to_file):
raise ValueError("Path does not exists!")
self.path_to_file = path_to_file
def get_information(self, regex: str = None) -> None:
with uproot.open(self.path_to_file) as file:
print(f"File classnames:\n{file.classnames()}")
for classname, classtype in file.classnames().items():
print(f"Classname:\n{classname}")
print(f"Classtype:\n{classtype}")
print(f"Content of the class:")
for name, branch in file[classname].items():
print(f"{name}: {branch}")
if regex != None:
print(f"Regex search results:")
attributes = file[classname].keys()
for attribute in attributes:
result = self.__find_pattern_in_text(regex = regex, text = attribute)
if result != "":
print(f"{result}")
def __find_pattern_in_text(self, regex: str, text: str) -> str:
search_result = re.search(pattern = regex, string = text)
result = ""
if search_result != None:
result = search_result.group(0)
return result

0
Code/Scripts/hep_analytics/__init__.py

0
Code/Scripts/hep_analytics/pathing/__init__.py

13
Code/Scripts/hep_analytics/pathing/aggregate.py

@ -0,0 +1,13 @@
class PathAggregator:
def __init__(self, basefolder: str, directory: str, file: str, year: str = None) -> None:
self.basefolder = basefolder
self.directory = directory
self.file = file
self.year = year
def get_path_to_file(self) -> str:
if self.year != None:
return f"./{self.basefolder}/{self.directory}/{self.year}/{self.file}"
return f"./{self.basefolder}/{self.directory}/{self.file}"

0
Code/Scripts/hep_analytics/processing/__init__.py

20
Code/Scripts/hep_analytics/processing/extract.py

@ -0,0 +1,20 @@
import numpy as np
import uproot
import typing
from dataclasses import dataclass
@dataclass
class FileManager:
file: str
tree: str
branches: list[str]
def extract_data(self) -> list[np.ndarray]:
with uproot.open(self.file) as file:
file_tree = file[self.tree]
data = []
for branch in self.branches:
data.append(file_tree[branch].array(library = "np"))
return data

20
Code/Scripts/hep_analytics/processing/transform.py

@ -0,0 +1,20 @@
import numpy as np
from hep_ml.reweight import BinsReweighter
def select_feature(feature: np.ndarray, limits: tuple[float, float]) -> tuple[np.ndarray, list]:
selection_indices = []
for index, value in enumerate(feature):
if value > limits[0] and value < limits[1]:
selection_indices.append(index)
return feature[selection_indices], selection_indices
def reweight_feature(original_feature: list, target_feature: list, n_bins: int, n_neighs: int = 2):
original_weights = np.ones(len(original_feature))
bin_reweighter = BinsReweighter(n_bins = n_bins, n_neighs = n_neighs)
bin_reweighter.fit(original = original_feature, target = target_feature, original_weight = original_weights)
return bin_reweighter.predict_weights(original = original_feature, original_weight = original_weights)

22
Code/Scripts/hep_analytics/processing/visualisation.py

@ -0,0 +1,22 @@
import matplotlib.pyplot as plt
import mplhep
mplhep.style.use("LHCb2")
def reweight_comparing_plot(original_feature: list, target_feature: list, weights: list, n_bins: int, suptitle: str, titles: list[str], save: str) -> None:
if len(original_feature) != len(weights):
raise ValueError(f"original_feature and weights need to have the same dimension!")
fig, ax = plt.subplots(nrows = 1, ncols = 3, figsize = (30, 12.5))
ax[0].hist(original_feature, bins = n_bins)
ax[0].set_ylabel("counts/a.u.")
ax[0].set_title(titles[0])
ax[1].hist(original_feature, bins = n_bins, weights = weights)
ax[1].set_title(titles[1])
ax[2].hist(target_feature, bins = n_bins)
ax[2].set_title(titles[2])
fig.suptitle(suptitle)
fig.savefig(f"{save}")
Loading…
Cancel
Save