Enhanced README

2022-09-15 12:15:29 +02:00 · 2022-09-15 12:13:30 +02:00 · 2022-09-15 12:10:52 +02:00 · 2022-09-15 12:06:35 +02:00 · 2022-09-02 12:20:02 +02:00 · 2022-08-30 11:45:57 +02:00
28 changed files with 13292 additions and 0 deletions
--- a/Code/Scripts/Python
+++ b/Code/Scripts/Python
@ -0,0 +1,79 @@
+import os
+import dotenv
+import argparse
+import sys
+import pandas as pd
+import zfit
+
+from prettytable import PrettyTable
+
+dotenv.load_dotenv('../properties.env')
+
+sys.path.insert(0, os.getenv('SYS_PATH'))
+
+from b2kstll.models.angular import B2Kstll
+from b2kstll.plot import plot_distributions
+
+from hep_analytics.processing.extract import FileManager
+from hep_analytics.processing.transform import select_feature
+
+
+FILE = os.getenv('FILE_GEN')
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--q2bin', dest ='q2bin', default = 0)
+    args   = parser.parse_args()
+    Q2BIN  = int(args.q2bin)
+
+    bin_ranges = [(0.25, 4.00), (4.00, 8.00), (11.00, 12.50), (15.00, 18.00), (1.10, 6.00), (1.1, 2.5), (2.5, 4.0), (4.0, 6.0), (6.0, 8.0)]
+    print(f"Selected Q2 Bin Range is {bin_ranges[Q2BIN]}")
+
+    file_manager = FileManager(file = FILE, tree = "Events", 
+                               branches = ["q2", "costhetak", "costhetal", "phi"])
+    data         = file_manager.extract_data()
+    q2, costhetak, costhetal, phi = data[0], data[1], data[2], data[3]
+
+    q2, indices  = select_feature(feature = q2, limits = bin_ranges[Q2BIN])
+    costhetak    = costhetak[indices]
+    costhetal    = costhetal[indices]
+    phi          = phi[indices]
+
+    lower_costhetak_cut = float(os.getenv('LOWER_COSTHETAK_CUT'))
+    upper_costhetak_cut = float(os.getenv('UPPER_COSTHETAK_CUT'))
+    costhetak, indices  = select_feature(feature = costhetak, limits = (lower_costhetak_cut, upper_costhetak_cut))
+    q2                  = q2[indices]
+    costhetal           = costhetal[indices]
+    phi                 = phi[indices]
+        
+    angular_data = pd.DataFrame({'ctl': costhetal, 'ctk': costhetak, 'phi': phi})
+    angular_data.to_csv(f"angular_fit_generator_bin_{Q2BIN}.csv", index = False)
+    x            = B2Kstll('ctl','ctk','phi')
+    obs, pdf, _  = x.get_pdf('PWave')
+    datazfit     = zfit.Data.from_pandas(df = angular_data, obs = obs)
+    nll          = zfit.loss.UnbinnedNLL(model = pdf, data = datazfit)
+    minimizer    = zfit.minimize.Minuit()
+
+    result = minimizer.minimize(nll)
+    print(result)
+    print(result.params, pdf)
+
+    param_errors, _ = result.errors()
+    print(param_errors)
+
+    info_table = PrettyTable(["Variable", "Value", "Lower Error", "Upper Error"])
+    fit_labels = ["AFB", "FL", "S3", "S4", "S5", "S7", "S8", "S9"]
+    for name in fit_labels:
+        value = result.params[name]["value"]
+        lower = result.params[name]["minuit_minos"]["lower"]
+        upper = result.params[name]["minuit_minos"]["upper"]
+        info_table.add_row([name, value, lower, upper])
+
+    print(info_table)
+
+    plot_distributions(result, suffix = f"accPwavePDF_generator_level_bin_{Q2BIN}")
+
+
+if __name__ == "__main__":
+    main()
--- a/Fit/acc_3d_JpsiKstMC_4_bin.yaml
+++ b/Fit/acc_3d_JpsiKstMC_4_bin.yaml
--- a/Fit/acc_3d_JpsiKstMC_5_bin.yaml
+++ b/Fit/acc_3d_JpsiKstMC_5_bin.yaml
--- a/Fit/acc_3d_JpsiKstMC_6_bin.yaml
+++ b/Fit/acc_3d_JpsiKstMC_6_bin.yaml
--- a/Fit/acc_3d_JpsiKstMC_7_bin.yaml
+++ b/Fit/acc_3d_JpsiKstMC_7_bin.yaml
--- a/Fit/acc_3d_JpsiKstMC_reweighted_4_bin.yaml
+++ b/Fit/acc_3d_JpsiKstMC_reweighted_4_bin.yaml
--- a/Fit/angular_acceptance_modeling.py
+++ b/Fit/angular_acceptance_modeling.py
@ -0,0 +1,73 @@
+import os
+import dotenv
+import sys
+import argparse
+import mplhep
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+
+dotenv.load_dotenv('../properties.env')
+
+sys.path.insert(0, os.getenv('SYS_PATH'))
+
+from analysis.efficiency import get_efficiency_model_class
+
+from hep_analytics.processing.extract import FileManager
+from hep_analytics.processing.transform import select_feature
+
+FILE_MC_PHSP  = os.getenv('MC_PHSP_FILE')
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--q2bin', dest ='q2bin', default = 0)
+    args   = parser.parse_args()
+    Q2BIN  = int(args.q2bin)
+
+    mplhep.style.use("LHCb2")
+
+    bin_ranges = [(0.25, 4.00), (4.00, 8.00), (11.00, 12.50), (15.00, 18.00), (1.10, 6.00), (1.1, 2.5), (2.5, 4.0), (4.0, 6.0), (6.0, 8.0)]
+    print(f"Selected Q2 Bin Range is {bin_ranges[Q2BIN]}")
+
+    filemanager    = FileManager(file = FILE_MC_PHSP, tree = "Events", branches = ["q2", "costhetak", "costhetal", "phi"])
+    mc_phsp_data   = filemanager.extract_data()
+    q2_mc_phsp, theta_k_mc_phsp, theta_l_mc_phsp, phi_mc_phsp = mc_phsp_data[0], mc_phsp_data[1], mc_phsp_data[2], mc_phsp_data[3]
+    q2_mc_phsp, indices  = select_feature(feature = q2_mc_phsp, limits = bin_ranges[Q2BIN])
+    phi_mc_phsp     = phi_mc_phsp[indices]
+    theta_l_mc_phsp = theta_l_mc_phsp[indices]
+    theta_k_mc_phsp = theta_k_mc_phsp[indices]
+    lower_costhetak_cut = float(os.getenv('LOWER_COSTHETAK_CUT'))
+    upper_costhetak_cut = float(os.getenv('UPPER_COSTHETAK_CUT'))
+    theta_k_mc_phsp, indices  = select_feature(feature = theta_k_mc_phsp, limits = (lower_costhetak_cut, upper_costhetak_cut))
+    q2_mc_phsp      = q2_mc_phsp[indices]
+    phi_mc_phsp     = phi_mc_phsp[indices]
+    theta_l_mc_phsp = theta_l_mc_phsp[indices]
+    data_mc_phsp    = [phi_mc_phsp, theta_l_mc_phsp, theta_k_mc_phsp, q2_mc_phsp]
+    
+    df     = pd.DataFrame({'ctl': data_mc_phsp[1], 'ctk': data_mc_phsp[2], 'phi': data_mc_phsp[0], 'q2': q2_mc_phsp})
+    orders = {"ctl": 4, "ctk": 6, "phi": 2}
+    ranges = {"ctl": [-1.0, 1.0], "ctk": [lower_costhetak_cut, upper_costhetak_cut], "phi": [-np.pi, np.pi]}
+
+    EffClass = get_efficiency_model_class('legendre')
+    eff      = EffClass.fit(df, ['ctl', 'ctk', 'phi'], ranges = ranges,
+                            legendre_orders = orders, calculate_cov = False, chunk_size = 2000)
+
+    out_file = eff.write_to_disk(f'acc_3d_JpsiKstMC_{Q2BIN}_bin.yaml')
+    print(out_file)
+
+    labels = {'ctl': r'$\cos \theta_L$', 'ctk': r'$\cos \theta_K$', 'phi': '$\phi$', 'q2': '$q^2$ [GeV$^2$]'}
+    for v in ['ctl', 'ctk', 'phi']:
+        plt.subplots(figsize = (15, 10))
+        plt.xlim(*ranges[v])
+        x, y = eff.project_efficiency(v, n_points = 1000)
+        plt.plot(x, y, 'b-')
+        plt.hist(df[v], density = True, bins = 50, color = 'grey', alpha = 0.5)
+        plt.ylabel("a.u.", horizontalalignment = 'right', y = 1.0)
+        plt.xlabel(labels[v], horizontalalignment = 'right', x = 1.0)
+        plt.savefig(f'acc_3d_JpsiKstMC_phsp_mc_{v}_{Q2BIN}_bin.pdf')
+        plt.close()
+
+    
+if __name__ == "__main__":
+    main()
--- a/Code/Scripts/Python
+++ b/Code/Scripts/Python
@ -0,0 +1,77 @@
+import os
+import dotenv
+import sys
+import argparse
+import pandas as pd
+import mplhep
+import zfit
+
+from prettytable import PrettyTable
+
+dotenv.load_dotenv('../properties.env')
+
+sys.path.insert(0, os.getenv('SYS_PATH'))
+
+from b2kstll.models.angular import B2Kstll
+from b2kstll.plot import plot_distributions
+
+from hep_analytics.processing.extract import FileManager
+from hep_analytics.processing.transform import select_feature
+
+FILE_MC  = os.getenv('MC_FILE')
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--q2bin', dest ='q2bin', default = 0)
+    args   = parser.parse_args()
+    Q2BIN  = int(args.q2bin)
+
+    mplhep.style.use("LHCb2")
+
+    bin_ranges = [(0.25, 4.00), (4.00, 8.00), (11.00, 12.50), (15.00, 18.00), (1.10, 6.00), (1.1, 2.5), (2.5, 4.0), (4.0, 6.0), (6.0, 8.0)]
+    print(f"Selected Q2 Bin Range is {bin_ranges[Q2BIN]}")
+
+    filemanager    = FileManager(file = FILE_MC, tree = "Events", branches = ["q2", "costhetak", "costhetal", "phi"])
+    mc_data        = filemanager.extract_data()
+    q2_mc, theta_k_mc, theta_l_mc, phi_mc = mc_data[0], mc_data[1], mc_data[2], mc_data[3]
+    q2_mc, indices = select_feature(feature = q2_mc, limits = bin_ranges[Q2BIN])
+    phi_mc         = phi_mc[indices]
+    theta_l_mc     = theta_l_mc[indices]
+    theta_k_mc     = theta_k_mc[indices]
+    lower_costhetak_cut = float(os.getenv('LOWER_COSTHETAK_CUT'))
+    upper_costhetak_cut = float(os.getenv('UPPER_COSTHETAK_CUT'))
+    theta_k_mc, indices  = select_feature(feature = theta_k_mc, limits = (lower_costhetak_cut, upper_costhetak_cut))
+    q2_mc          = q2_mc[indices]
+    phi_mc         = phi_mc[indices]
+    theta_l_mc     = theta_l_mc[indices]
+    
+    angular_data = pd.DataFrame({'ctl': theta_l_mc, 'ctk': theta_k_mc, 'phi': phi_mc})
+    angular_data.to_csv(f"ang_fit_mc_{Q2BIN}_bin.csv", index = False)
+    x = B2Kstll('ctl','ctk','phi')
+    x.set_acc(f"./acc_3d_JpsiKstMC_{Q2BIN}_bin.yaml")
+    obs, pdf, params = x.get_pdf('PWave')
+
+    datazfit  = zfit.Data.from_pandas(df = angular_data, obs = obs)
+    nll       = zfit.loss.UnbinnedNLL(model = pdf, data = datazfit)
+    minimizer = zfit.minimize.Minuit()
+    result    = minimizer.minimize(nll)
+
+    param_errors, _ = result.errors()
+    print(param_errors)
+
+    info_table = PrettyTable(["Variable", "Value", "Lower Error", "Upper Error"])
+    fit_labels = ["AFB", "FL", "S3", "S4", "S5", "S7", "S8", "S9"]
+    for name in fit_labels:
+        value = result.params[name]["value"]
+        lower = result.params[name]["minuit_minos"]["lower"]
+        upper = result.params[name]["minuit_minos"]["upper"]
+        info_table.add_row([name, value, lower, upper])
+
+    print(info_table)
+
+    plot_distributions(result, suffix = f"accPwavePDF_mc_ang_fit_{Q2BIN}_bin")
+
+    
+if __name__ == "__main__":
+    main()
--- a/Fit/diff_pair_tables_average_mc.py
+++ b/Fit/diff_pair_tables_average_mc.py
@ -0,0 +1,83 @@
+import re
+import numpy as np
+
+from prettytable import PrettyTable
+
+
+def main():
+    bin_labels = r"$1.10 < q^2 < 6.00$"
+
+    sample_1             = """+----------+------------------------+------------------------+-----------------------+\n
+                              | Variable |         Value          |      Lower Error       |      Upper Error      |\n
+                              +----------+------------------------+------------------------+-----------------------+\n
+                              |   AFB    | -0.030693939006905927  | -0.0027448398251912667 | 0.0026690127045430573 |\n
+                              |    FL    |   0.7641860897390124   | -0.0031519177028962353 |  0.00318403722094425  |\n
+                              |    S3    | -0.009793642767841643  | -0.0039500538800350165 |  0.003962284039325228 |\n
+                              |    S4    |  -0.11948833798312776  | -0.005018301784506872  |  0.005112053156795224 |\n
+                              |    S5    |  -0.14672854148123884  | -0.005077800231656763  | 0.0051383664423323315 |\n
+                              |    S7    | -0.0009713979621990259 | -0.005287904531482017  |  0.005275166744474821 |\n
+                              |    S8    |  0.003178551357900045  | -0.005257961171467131  |  0.005258879438751017 |\n
+                              |    S9    |  0.005757808359963191  | -0.004016272402049568  |  0.004019873761938199 |\n
+                              +----------+------------------------+------------------------+-----------------------+""" # GENERATOR LEVEL
+
+    sample_2             = """+---------------------------------------------------------------------------------+\n
+                              |                       q2_split_$1.10 < q^2 < 6.00$_average                      |\n
+                              +----------+-----------------------+-----------------------+----------------------+\n
+                              | Variable |         Value         |      Lower Error      |     Upper Error      |\n
+                              +----------+-----------------------+-----------------------+----------------------+\n
+                              |   AFB    |  -0.03611162259371692 | -0.008731742137981694 | 0.008731742137981694 |\n
+                              |    FL    |   0.7816031469957773  | -0.010041813091402643 | 0.010041813091402643 |\n
+                              |    S3    | -0.004913319742985065 | -0.010997797016445892 | 0.010997797016445892 |\n
+                              |    S4    |  -0.11446696701061239 |  -0.0177373887414066  |  0.0177373887414066  |\n
+                              |    S5    |  -0.1162615465324004  | -0.016777187198089353 | 0.016777187198089353 |\n
+                              |    S7    |  0.013450446832457594 | -0.017351060768029697 | 0.017351060768029697 |\n
+                              |    S8    |  0.012943806403734165 | -0.018166933762978635 | 0.018166933762978635 |\n
+                              |    S9    | -0.015991133298640343 | -0.011753160273233804 | 0.011753160273233804 |\n
+                              +----------+-----------------------+-----------------------+----------------------+""" # MC LEVEL
+
+    variables    = ["AFB", "FL", "S3", "S4", "S5", "S7", "S8", "S9"]
+    select_regex = [".*AFB.*\d+", ".*FL.*\d+", ".*S3.*\d+", ".*S4.*\d+", ".*S5.*\d+", ".*S7.*\d+", ".*S8.*\d+", ".*S9.*\d+"]
+    value_regex  = "[- ]\d+.\d+"
+    sample_table_1   = {'1': {'AFB': None, 'FL': None, 'S3': None, 'S4': None, 'S5': None, 'S7': None, 'S8': None, 'S9': None}}
+    sample_table_2   = {'1': {'AFB': None, 'FL': None, 'S3': None, 'S4': None, 'S5': None, 'S7': None, 'S8': None, 'S9': None}}
+    samples = [sample_1, sample_2]
+
+    for variable, regex in zip(variables, select_regex):
+        sample_1_splitted = samples[0].splitlines()
+        for line in sample_1_splitted:
+            subresult = re.match(regex, line)
+            if subresult != None:
+                substring = subresult.group(0)
+                result    = re.findall(value_regex, substring)
+                value     = np.float64(result[0])
+                lower_err = np.float64(result[1])
+                upper_err = np.float64(result[2])
+
+                sample_table_1['1'][variable] = (value, lower_err, upper_err)
+
+        sample_2_splitted = samples[1].splitlines()
+        for line in sample_2_splitted:
+            subresult = re.match(regex, line)
+            if subresult != None:
+                substring = subresult.group(0)
+                result    = re.findall(value_regex, substring)
+                value     = np.float64(result[0])
+                lower_err = np.float64(result[1])
+                upper_err = np.float64(result[2])
+
+                sample_table_2['1'][variable] = (value, lower_err, upper_err)
+
+    info_table = PrettyTable(["Variable", r"|MC Average - Gen|", "Lower Error", "Upper Error", "Stat. Significance"])
+    for name in variables:
+        index = '1'
+        delta_value       = np.abs(sample_table_1[index][name][0] - sample_table_2[index][name][0])
+        quadratic_lower   = -np.sqrt(sample_table_1[index][name][1]**2 + sample_table_2[index][name][1]**2)
+        quadratic_upper   = np.sqrt(sample_table_1[index][name][2]**2 + sample_table_2[index][name][2]**2)
+        stat_significance = np.abs(delta_value / quadratic_lower)
+        info_table.add_row([name, delta_value, quadratic_lower, quadratic_upper, stat_significance])
+    info_table.title = f"{bin_labels} Average over 3 Bins in MC"
+    print(info_table)
+
+
+if __name__ == "__main__":
+    main()
--- a/Fit/diff_pair_tables_average_mc_reweighted.py
+++ b/Fit/diff_pair_tables_average_mc_reweighted.py
@ -0,0 +1,81 @@
+import re
+import numpy as np
+
+from prettytable import PrettyTable
+
+
+def main():
+    bin_labels = r"$1.10 < q^2 < 6.00$"
+
+    sample_1             = """+----------+------------------------+------------------------+-----------------------+\n
+                              | Variable |         Value          |      Lower Error       |      Upper Error      |\n
+                              +----------+------------------------+------------------------+-----------------------+\n
+                              |   AFB    | -0.030693939006905927  | -0.0027448398251912667 | 0.0026690127045430573 |\n
+                              |    FL    |   0.7641860897390124   | -0.0031519177028962353 |  0.00318403722094425  |\n
+                              |    S3    | -0.009793642767841643  | -0.0039500538800350165 |  0.003962284039325228 |\n
+                              |    S4    |  -0.11948833798312776  | -0.005018301784506872  |  0.005112053156795224 |\n
+                              |    S5    |  -0.14672854148123884  | -0.005077800231656763  | 0.0051383664423323315 |\n
+                              |    S7    | -0.0009713979621990259 | -0.005287904531482017  |  0.005275166744474821 |\n
+                              |    S8    |  0.003178551357900045  | -0.005257961171467131  |  0.005258879438751017 |\n
+                              |    S9    |  0.005757808359963191  | -0.004016272402049568  |  0.004019873761938199 |\n
+                              +----------+------------------------+------------------------+-----------------------+""" # GENERATOR LEVEL
+
+    sample_2             = """+----------+-----------------------+------------------------+-----------------------+\n
+                              | Variable |         Value         |      Lower Error       |      Upper Error      |\n
+                              +----------+-----------------------+------------------------+-----------------------+\n
+                              |   AFB    | -0.012550006150333942 | -0.0026956457681952418 | 0.0026965400452885245 |\n
+                              |    FL    |   0.775244817820803   | -0.0030583204477219544 |  0.003052276555621123 |\n
+                              |    S3    | -0.004731791649125892 | -0.0035876206689923096 | 0.0035671656402230033 |\n
+                              |    S4    |  -0.12872329698226329 | -0.0054353577466807685 |  0.005452103886085265 |\n
+                              |    S5    |  -0.13102847916212362 | -0.005094396678852025  |  0.005083576918431511 |\n
+                              |    S7    |  0.014829855067600298 | -0.005251337297972407  |  0.005258433080994564 |\n
+                              |    S8    |  0.011919411650166327 | -0.005666331650738382  |  0.005691209183046813 |\n
+                              |    S9    | -0.013513044362093558 | -0.0036909184587611017 | 0.0036915155625497006 |\n
+                              +----------+-----------------------+------------------------+-----------------------+""" # MC LEVEL
+
+    variables    = ["AFB", "FL", "S3", "S4", "S5", "S7", "S8", "S9"]
+    select_regex = [".*AFB.*\d+", ".*FL.*\d+", ".*S3.*\d+", ".*S4.*\d+", ".*S5.*\d+", ".*S7.*\d+", ".*S8.*\d+", ".*S9.*\d+"]
+    value_regex  = "[- ]\d+.\d+"
+    sample_table_1   = {'1': {'AFB': None, 'FL': None, 'S3': None, 'S4': None, 'S5': None, 'S7': None, 'S8': None, 'S9': None}}
+    sample_table_2   = {'1': {'AFB': None, 'FL': None, 'S3': None, 'S4': None, 'S5': None, 'S7': None, 'S8': None, 'S9': None}}
+    samples = [sample_1, sample_2]
+
+    for variable, regex in zip(variables, select_regex):
+        sample_1_splitted = samples[0].splitlines()
+        for line in sample_1_splitted:
+            subresult = re.match(regex, line)
+            if subresult != None:
+                substring = subresult.group(0)
+                result    = re.findall(value_regex, substring)
+                value     = np.float64(result[0])
+                lower_err = np.float64(result[1])
+                upper_err = np.float64(result[2])
+
+                sample_table_1['1'][variable] = (value, lower_err, upper_err)
+
+        sample_2_splitted = samples[1].splitlines()
+        for line in sample_2_splitted:
+            subresult = re.match(regex, line)
+            if subresult != None:
+                substring = subresult.group(0)
+                result    = re.findall(value_regex, substring)
+                value     = np.float64(result[0])
+                lower_err = np.float64(result[1])
+                upper_err = np.float64(result[2])
+
+                sample_table_2['1'][variable] = (value, lower_err, upper_err)
+
+    info_table = PrettyTable(["Variable", r"|MC reweighted - Gen|", "Lower Error", "Upper Error", "Stat. Significance"])
+    for name in variables:
+        index = '1'
+        delta_value       = np.abs(sample_table_1[index][name][0] - sample_table_2[index][name][0])
+        quadratic_lower   = -np.sqrt(sample_table_1[index][name][1]**2 + sample_table_2[index][name][1]**2)
+        quadratic_upper   = np.sqrt(sample_table_1[index][name][2]**2 + sample_table_2[index][name][2]**2)
+        stat_significance = np.abs(delta_value / quadratic_lower)
+        info_table.add_row([name, delta_value, quadratic_lower, quadratic_upper, stat_significance])
+    info_table.title = f"{bin_labels}"
+    print(info_table)
+
+
+if __name__ == "__main__":
+    main()
--- a/Fit/diff_pair_tables_mc_gen.py
+++ b/Fit/diff_pair_tables_mc_gen.py
@ -0,0 +1,81 @@
+import re
+import numpy as np
+
+from prettytable import PrettyTable
+
+
+def main():
+    bin_labels = r"$1.10 < q^2 < 6.00$"
+
+    sample_1             = """+----------+------------------------+------------------------+-----------------------+\n
+                              | Variable |         Value          |      Lower Error       |      Upper Error      |\n
+                              +----------+------------------------+------------------------+-----------------------+\n
+                              |   AFB    | -0.030693939006905927  | -0.0027448398251912667 | 0.0026690127045430573 |\n
+                              |    FL    |   0.7641860897390124   | -0.0031519177028962353 |  0.00318403722094425  |\n
+                              |    S3    | -0.009793642767841643  | -0.0039500538800350165 |  0.003962284039325228 |\n
+                              |    S4    |  -0.11948833798312776  | -0.005018301784506872  |  0.005112053156795224 |\n
+                              |    S5    |  -0.14672854148123884  | -0.005077800231656763  | 0.0051383664423323315 |\n
+                              |    S7    | -0.0009713979621990259 | -0.005287904531482017  |  0.005275166744474821 |\n
+                              |    S8    |  0.003178551357900045  | -0.005257961171467131  |  0.005258879438751017 |\n
+                              |    S9    |  0.005757808359963191  | -0.004016272402049568  |  0.004019873761938199 |\n
+                              +----------+------------------------+------------------------+-----------------------+""" # GENERATOR LEVEL
+
+    sample_2             = """+----------+-----------------------+-----------------------+----------------------+\n
+                              | Variable |         Value         |      Lower Error      |     Upper Error      |\n
+                              +----------+-----------------------+-----------------------+----------------------+\n
+                              |   AFB    | -0.006858788562096526 | -0.006288887141153866 | 0.006202211516385528 |\n
+                              |    FL    |   0.7741933027966281  | -0.007196527356496095 | 0.006980839734409447 |\n
+                              |    S3    | -0.005794055258693013 | -0.008257409210173627 | 0.008381420647898805 |\n
+                              |    S4    |  -0.13252141252161223 | -0.012754778513953539 | 0.012393845350580537 |\n
+                              |    S5    |  -0.13982805972042112 | -0.011883305368342971 | 0.011682110709131568 |\n
+                              |    S7    |  0.01499315459684105  | -0.012227400419826324 | 0.012191295064893278 |\n
+                              |    S8    |  0.010750628153590385 | -0.013096041377945507 | 0.013227158788846905 |\n
+                              |    S9    | -0.013084904237743692 |  -0.00855475117009418 | 0.008616146883219716 |\n
+                              +----------+-----------------------+-----------------------+----------------------+""" # MC LEVEL
+
+    variables    = ["AFB", "FL", "S3", "S4", "S5", "S7", "S8", "S9"]
+    select_regex = [".*AFB.*\d+", ".*FL.*\d+", ".*S3.*\d+", ".*S4.*\d+", ".*S5.*\d+", ".*S7.*\d+", ".*S8.*\d+", ".*S9.*\d+"]
+    value_regex  = "[- ]\d+.\d+"
+    sample_table_1   = {'1': {'AFB': None, 'FL': None, 'S3': None, 'S4': None, 'S5': None, 'S7': None, 'S8': None, 'S9': None}}
+    sample_table_2   = {'1': {'AFB': None, 'FL': None, 'S3': None, 'S4': None, 'S5': None, 'S7': None, 'S8': None, 'S9': None}}
+    samples = [sample_1, sample_2]
+
+    for variable, regex in zip(variables, select_regex):
+        sample_1_splitted = samples[0].splitlines()
+        for line in sample_1_splitted:
+            subresult = re.match(regex, line)
+            if subresult != None:
+                substring = subresult.group(0)
+                result    = re.findall(value_regex, substring)
+                value     = np.float64(result[0])
+                lower_err = np.float64(result[1])
+                upper_err = np.float64(result[2])
+
+                sample_table_1['1'][variable] = (value, lower_err, upper_err)
+
+        sample_2_splitted = samples[1].splitlines()
+        for line in sample_2_splitted:
+            subresult = re.match(regex, line)
+            if subresult != None:
+                substring = subresult.group(0)
+                result    = re.findall(value_regex, substring)
+                value     = np.float64(result[0])
+                lower_err = np.float64(result[1])
+                upper_err = np.float64(result[2])
+
+                sample_table_2['1'][variable] = (value, lower_err, upper_err)
+
+    info_table = PrettyTable(["Variable", r"|MC - Gen|", "Lower Error", "Upper Error", "Stat. Significance"])
+    for name in variables:
+        index = '1'
+        delta_value       = np.abs(sample_table_1[index][name][0] - sample_table_2[index][name][0])
+        quadratic_lower   = -np.sqrt(sample_table_1[index][name][1]**2 + sample_table_2[index][name][1]**2)
+        quadratic_upper   = np.sqrt(sample_table_1[index][name][2]**2 + sample_table_2[index][name][2]**2)
+        stat_significance = np.abs(delta_value / quadratic_lower)
+        info_table.add_row([name, delta_value, quadratic_lower, quadratic_upper, stat_significance])
+    info_table.title = f"{bin_labels}"
+    print(info_table)
+
+
+if __name__ == "__main__":
+    main()
--- a/Fit/reweighted_angular_acceptance_modeling.py
+++ b/Fit/reweighted_angular_acceptance_modeling.py
@ -0,0 +1,94 @@
+import os
+import dotenv
+import sys
+import argparse
+import mplhep
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+
+dotenv.load_dotenv('../properties.env')
+
+sys.path.insert(0, os.getenv('SYS_PATH'))
+
+from analysis.efficiency import get_efficiency_model_class
+
+from hep_analytics.processing.extract import FileManager
+from hep_analytics.processing.transform import select_feature, reweight_feature
+from hep_analytics.processing.visualisation import reweight_comparing_plot
+
+FILE_GEN     = os.getenv('GEN_FILE')
+FILE_MC_PHSP = os.getenv('MC_PHSP_FILE')
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--q2bin', dest ='q2bin', default = 0)
+    args   = parser.parse_args()
+    Q2BIN  = int(args.q2bin)
+
+    mplhep.style.use("LHCb2")
+
+    bin_ranges = [(0.25, 4.00), (4.00, 8.00), (11.00, 12.50), (15.00, 18.00), (1.10, 6.00), (1.1, 2.5), (2.5, 4.0), (4.0, 6.0), (6.0, 8.0)]
+    print(f"Selected Q2 Bin Range is {bin_ranges[Q2BIN]}")
+
+    bin_labels     = [r"$0.25 < q^2 < 4.00$", r"$4.00 < q^2 < 8.00$", r"$11.00 < q^2 < 12.50$",
+                      r"$15.00 < q^2 < 18.00$", r"$1.10 < q^2 < 6.00$",
+                      r"$1.1 < q^2 < 2.5$", r"$2.5 < q^2 < 4.0$", r"$4.0 < q^2 < 6.0$", r"$6.0 < q^2 < 8.0$"]
+
+    filemanager     = FileManager(file = FILE_MC_PHSP, tree = "Events", branches = ["q2", "costhetak", "costhetal", "phi"])
+    mc_phsp_data    = filemanager.extract_data()
+    q2_mc_phsp, theta_k_mc_phsp, theta_l_mc_phsp, phi_mc_phsp = mc_phsp_data[0], mc_phsp_data[1], mc_phsp_data[2], mc_phsp_data[3]
+    q2_mc_phsp, indices = select_feature(feature = q2_mc_phsp, limits = bin_ranges[Q2BIN])
+    phi_mc_phsp     = phi_mc_phsp[indices]
+    theta_l_mc_phsp = theta_l_mc_phsp[indices]
+    theta_k_mc_phsp = theta_k_mc_phsp[indices]
+    lower_costhetak_cut = float(os.getenv('LOWER_COSTHETAK_CUT'))
+    upper_costhetak_cut = float(os.getenv('UPPER_COSTHETAK_CUT'))
+    theta_k_mc_phsp, indices  = select_feature(feature = theta_k_mc_phsp, limits = (lower_costhetak_cut, upper_costhetak_cut))
+    q2_mc_phsp      = q2_mc_phsp[indices]
+    phi_mc_phsp     = phi_mc_phsp[indices]
+    theta_l_mc_phsp = theta_l_mc_phsp[indices]
+
+    filemanager = FileManager(file = FILE_GEN, tree = "Events", branches = ["q2", "costhetak", "costhetal", "phi"])
+    gen_data    = filemanager.extract_data()
+    q2_gen, theta_k_gen, theta_l_gen, phi_gen = gen_data[0], gen_data[1], gen_data[2], gen_data[3]
+    q2_gen, indices = select_feature(feature = q2_gen, limits = bin_ranges[Q2BIN])
+    phi_gen     = phi_gen[indices]
+    theta_l_gen = theta_l_gen[indices]
+    theta_k_gen = theta_k_gen[indices]
+    theta_k_gen, indices  = select_feature(feature = theta_k_gen, limits = (lower_costhetak_cut, upper_costhetak_cut))
+    q2_gen      = q2_gen[indices]
+    phi_gen     = phi_gen[indices]
+    theta_l_gen = theta_l_gen[indices]
+
+    q2_mc_phsp_weights = reweight_feature(original_feature = q2_mc_phsp, target_feature = q2_gen, n_bins = 25)
+    reweight_comparing_plot(original_feature = q2_mc_phsp, target_feature = q2_gen, weights = q2_mc_phsp_weights,
+        n_bins = 25, suptitle = f"{bin_labels[Q2BIN]}", 
+        titles = ["Q2 MC PHSP", "Q2 MC PHSP (reweighted)", "Q2 Gen"], save = f"reweighted_q2_gen_mc_phsp_{Q2BIN}_bin.png")
+    
+    df       = pd.DataFrame({'ctl': theta_l_mc_phsp, 'ctk': theta_k_mc_phsp, 'phi': phi_mc_phsp, 'q2': q2_mc_phsp, 'weights': q2_mc_phsp_weights})
+    orders   = {"ctl": 4, "ctk": 6, "phi": 2}
+    ranges   = {"ctl": [-1.0, 1.0], "ctk": [lower_costhetak_cut, upper_costhetak_cut], "phi": [-np.pi, np.pi]}
+    EffClass = get_efficiency_model_class('legendre')
+    eff      = EffClass.fit(df, ['ctl', 'ctk', 'phi'], weight_var = "weights", ranges = ranges,
+                            legendre_orders = orders, calculate_cov = False, chunk_size = 2000)
+
+    out_file = eff.write_to_disk(f'acc_3d_JpsiKstMC_reweighted_{Q2BIN}_bin.yaml')
+    print(out_file)
+
+    labels = {'ctl': r'$\cos \theta_L$', 'ctk': r'$\cos \theta_K$', 'phi': '$\phi$', 'q2': '$q^2$ [GeV$^2$]'}
+    for v in ['ctl', 'ctk', 'phi']:
+        fig, ax = plt.subplots(figsize = (15, 10))
+        plt.xlim(*ranges[v])
+        x, y = eff.project_efficiency(v, n_points = 1000)
+        plt.plot(x, y, 'b-')
+        plt.hist(df[v], density = True, bins = 50, color = 'grey', alpha = 0.5)
+        plt.ylabel("a.u.", horizontalalignment = 'right', y = 1.0)
+        plt.xlabel(labels[v], horizontalalignment = 'right', x = 1.0)
+        plt.savefig(f'acc_3d_JpsiKstMC_{v}_{Q2BIN}_bin.pdf')
+        plt.close()
+
+    
+if __name__ == "__main__":
+    main()
--- a/Fit/reweighted_angular_fit.py
+++ b/Fit/reweighted_angular_fit.py
@ -0,0 +1,97 @@
+import os
+import dotenv
+import sys
+import argparse
+import pandas as pd
+import mplhep
+import zfit
+from prettytable import PrettyTable
+
+dotenv.load_dotenv('../properties.env')
+
+sys.path.insert(0, os.getenv('SYS_PATH'))
+
+from b2kstll.models.angular import B2Kstll
+from b2kstll.plot import plot_distributions
+
+from hep_analytics.processing.extract import FileManager
+from hep_analytics.processing.transform import select_feature, reweight_feature
+from hep_analytics.processing.visualisation import reweight_comparing_plot
+
+FILE_MC  = os.getenv('MC_FILE')
+FILE_GEN = os.getenv('GEN_FILE')
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--q2bin', dest ='q2bin', default = 0)
+    args   = parser.parse_args()
+    Q2BIN  = int(args.q2bin)
+
+    mplhep.style.use("LHCb2")
+
+    bin_ranges = [(0.25, 4.00), (4.00, 8.00), (11.00, 12.50), (15.00, 18.00), (1.10, 6.00), (1.1, 2.5), (2.5, 4.0), (4.0, 6.0), (6.0, 8.0)]
+    print(f"Selected Q2 Bin Range is {bin_ranges[Q2BIN]}")
+
+    bin_labels = [r"$0.25 < q^2 < 4.00$", r"$4.00 < q^2 < 8.00$", r"$11.00 < q^2 < 12.50$",
+                  r"$15.00 < q^2 < 18.00$", r"$1.10 < q^2 < 6.00$",
+                  r"$1.1 < q^2 < 2.5$", r"$2.5 < q^2 < 4.0$", r"$4.0 < q^2 < 6.0$", r"$6.0 < q^2 < 8.0$"]
+
+    filemanager    = FileManager(file = FILE_MC, tree = "Events", branches = ["q2", "costhetak", "costhetal", "phi"])
+    mc_data        = filemanager.extract_data()
+    q2_mc, theta_k_mc, theta_l_mc, phi_mc = mc_data[0], mc_data[1], mc_data[2], mc_data[3]
+    q2_mc, indices = select_feature(feature = q2_mc, limits = bin_ranges[Q2BIN])
+    phi_mc         = phi_mc[indices]
+    theta_l_mc     = theta_l_mc[indices]
+    theta_k_mc     = theta_k_mc[indices]
+    lower_costhetak_cut = float(os.getenv('LOWER_COSTHETAK_CUT'))
+    upper_costhetak_cut = float(os.getenv('UPPER_COSTHETAK_CUT'))
+    theta_k_mc, indices  = select_feature(feature = theta_k_mc, limits = (lower_costhetak_cut, upper_costhetak_cut))
+    q2_mc          = q2_mc[indices]
+    phi_mc         = phi_mc[indices]
+    theta_l_mc     = theta_l_mc[indices]
+
+    filemanager = FileManager(file = FILE_GEN, tree = "Events", branches = ["q2", "costhetak", "costhetal", "phi"])
+    gen_data    = filemanager.extract_data()
+    q2_gen, theta_k_gen, theta_l_gen, phi_gen = gen_data[0], gen_data[1], gen_data[2], gen_data[3]
+    q2_gen, indices = select_feature(feature = q2_gen, limits = bin_ranges[Q2BIN])
+    phi_gen     = phi_gen[indices]
+    theta_l_gen = theta_l_gen[indices]
+    theta_k_gen = theta_k_gen[indices]
+    theta_k_gen, indices  = select_feature(feature = theta_k_gen, limits = (lower_costhetak_cut, upper_costhetak_cut))
+    q2_gen      = q2_gen[indices]
+    phi_gen     = phi_gen[indices]
+    theta_l_gen = theta_l_gen[indices]
+    
+    angular_data     = pd.DataFrame({'ctl': theta_l_mc, 'ctk': theta_k_mc, 'phi': phi_mc})
+    angular_data.to_csv(f"ang_fit_mc_q2_bins_{Q2BIN}_bin.csv", index = False)
+    x                = B2Kstll('ctl','ctk','phi')
+    x.set_acc(f"./acc_3d_JpsiKstMC_reweighted_4_bin.yaml")
+    obs, pdf, _      = x.get_pdf('PWave')
+
+    q2_mc_weights = reweight_feature(original_feature = q2_mc, target_feature = q2_gen, n_bins = 25)
+    reweight_comparing_plot(original_feature = q2_mc, target_feature = q2_gen, weights = q2_mc_weights,
+        n_bins = 25, suptitle = f"{bin_labels[Q2BIN]}", titles = ["Q2 MC", "Q2 MC (reweighted)", "Q2 Gen"], save = "reweighted_q2_gen_mc.png")
+
+    datazfit        = zfit.Data.from_pandas(df = angular_data, obs = obs, weights = q2_mc_weights)
+    nll             = zfit.loss.UnbinnedNLL(model = pdf, data = datazfit)
+    minimizer       = zfit.minimize.Minuit()
+    result          = minimizer.minimize(nll)
+    param_errors, _ = result.errors()
+    print(param_errors)
+
+    info_table = PrettyTable(["Variable", "Value", "Lower Error", "Upper Error"])
+    fit_labels = ["AFB", "FL", "S3", "S4", "S5", "S7", "S8", "S9"]
+    for name in fit_labels:
+        value = result.params[name]["value"]
+        lower = result.params[name]["minuit_minos"]["lower"]
+        upper = result.params[name]["minuit_minos"]["upper"]
+        info_table.add_row([name, value, lower, upper])
+
+    print(info_table)
+
+    plot_distributions(result, suffix = f"accPwavePDF_{Q2BIN}_bin_reweighted_mc")
+
+    
+if __name__ == "__main__":
+    main()
--- a/Fit/table_average_to_compare.py
+++ b/Fit/table_average_to_compare.py
@ -0,0 +1,97 @@
+import re
+import numpy as np
+
+from prettytable import PrettyTable
+
+
+def main():
+    table_1 = """+----------+-----------------------+-----------------------+----------------------+\n
+                 | Variable |         Value         |      Lower Error      |     Upper Error      |\n
+                 +----------+-----------------------+-----------------------+----------------------+\n
+                 |   AFB    |  -0.1327941714665493  | -0.011751983569979923 | 0.011695518975582573 |\n
+                 |    FL    |   0.778784190310989   | -0.013786738313616998 | 0.01334631516152801  |\n
+                 |    S3    |  0.012016752919524067 | -0.014186281678810328 | 0.01390470699489175  |\n
+                 |    S4    | 0.0030912112690471196 |  -0.02494551503535478 | 0.02500682819273369  |\n
+                 |    S5    |  0.06257482631549156  | -0.022576669869741432 | 0.02267902660423071  |\n
+                 |    S7    |  0.026461647341068015 | -0.022870493984060422 | 0.022709046371542273 |\n
+                 |    S8    |  0.022528200778115424 | -0.024500119399894143 | 0.02444883875849664  |\n
+                 |    S9    | -0.010047596608480971 | -0.015100958783891871 | 0.01497061747614531  |\n
+                 +----------+-----------------------+-----------------------+----------------------+"""
+
+    table_2 = """+----------+-----------------------+-----------------------+----------------------+\n
+                 | Variable |         Value         |      Lower Error      |     Upper Error      |\n
+                 +----------+-----------------------+-----------------------+----------------------+\n
+                 |   AFB    | -0.044313915998597596 | -0.011266005942710903 | 0.011376116194169396 |\n
+                 |    FL    |   0.8080898316779715  | -0.012903371237386076 | 0.012627755576193413 |\n
+                 |    S3    | -0.001350693384098943 |  -0.01425102196417036 | 0.014014236056631763 |\n
+                 |    S4    |  -0.16348388538343442 | -0.022982703026786865 | 0.02353050491741336  |\n
+                 |    S5    |  -0.12998638915138938 |  -0.02207623402690808 | 0.02230804022923196  |\n
+                 |    S7    | -0.019175072605277906 | -0.022219297227352726 | 0.02233298418372788  |\n
+                 |    S8    |  0.03896504393685167  |  -0.02306667997644898 | 0.023056355590581525 |\n
+                 |    S9    | -0.026410777123662296 |  -0.01497373735832672 | 0.015237714795859537 |\n
+                 +----------+-----------------------+-----------------------+----------------------+"""
+
+    table_3 = """+----------+-----------------------+-----------------------+----------------------+\n
+                 | Variable |         Value         |      Lower Error      |     Upper Error      |\n
+                 +----------+-----------------------+-----------------------+----------------------+\n
+                 |   AFB    |  0.06877321968399613  |  -0.00873785505692959 | 0.008866035437567673 |\n
+                 |    FL    |   0.7579354189983714  |  -0.01048129883366948 | 0.01017491937670441  |\n
+                 |    S3    |  -0.02540601876438032 | -0.012075664333196083 | 0.012192632205060838 |\n
+                 |    S4    |  -0.18300822691744986 | -0.015545142299112552 | 0.016136099542591972 |\n
+                 |    S5    |  -0.28137307676130335 |  -0.0160784362375397  | 0.01630021297009425  |\n
+                 |    S7    |  0.033064765761582675 |  -0.01837491321971226 | 0.018457429889962827 |\n
+                 |    S8    | -0.022661825503764606 | -0.018831356033962507 | 0.018816177761568176 |\n
+                 |    S9    |  -0.01151502616377776 | -0.012778180225448249 | 0.013091600028830737 |\n
+                 +----------+-----------------------+-----------------------+----------------------+"""
+
+    bin_labels     = [r"$0.25 < q^2 < 4.00$", r"$4.00 < q^2 < 8.00$", r"$11.00 < q^2 < 12.50$",
+                      r"$15.00 < q^2 < 18.00$", r"$1.10 < q^2 < 6.00$",
+                      r"$1.1 < q^2 < 2.5$", r"$2.5 < q^2 < 4.0$", r"$4.0 < q^2 < 6.0$", r"$6.0 < q^2 < 8.0$"]
+
+    tables       = [table_1, table_2, table_3]
+    variables    = ["AFB", "FL", "S3", "S4", "S5", "S7", "S8", "S9"]
+    select_regex = [".*AFB.*\d+", ".*FL.*\d+", ".*S3.*\d+", ".*S4.*\d+", ".*S5.*\d+", ".*S7.*\d+", ".*S8.*\d+", ".*S9.*\d+"]
+    value_regex  = "[- ]\d+.\d+"
+    samples      = {'1': {'AFB': None, 'FL': None, 'S3': None, 'S4': None, 'S5': None, 'S7': None, 'S8': None, 'S9': None},
+                    '2': {'AFB': None, 'FL': None, 'S3': None, 'S4': None, 'S5': None, 'S7': None, 'S8': None, 'S9': None},
+                    '3': {'AFB': None, 'FL': None, 'S3': None, 'S4': None, 'S5': None, 'S7': None, 'S8': None, 'S9': None},
+                   }
+
+    for index in range(len(tables)):
+        for variable, regex in zip(variables, select_regex):
+            splitted = tables[index].splitlines()
+            for line in splitted:
+                subresult = re.match(regex, line)
+                if subresult != None:
+                    substring = subresult.group(0)
+                    result    = re.findall(value_regex, substring)
+                    value     = np.float64(result[0])
+                    lower_err = np.float64(result[1])
+                    upper_err = np.float64(result[2])
+
+                    samples[str(index + 1)][variable] = (value, lower_err, upper_err)
+
+    fit_variables_split = [[] for _ in range(8)]
+    fit_errors_split    = [[] for _ in range(8)]
+    for value in samples.values():
+        count = 0
+        for fitvalue in value.values():
+            fit_variables_split[count].append(fitvalue[0])
+            fit_errors_split[count].append(np.sqrt(fitvalue[1]**2 + fitvalue[2]**2))
+            count += 1
+
+    info_table = PrettyTable(["Variable", "Value", "Lower Error", "Upper Error"])
+    info_table.title = f"q2_split_{bin_labels[4]}_average"
+    fit_labels = ["AFB", "FL", "S3", "S4", "S5", "S7", "S8", "S9"]
+    count = 0
+    for name, fit_split_var, fit_err_split in zip(fit_labels, fit_variables_split, fit_errors_split):
+        fit_avg     = np.mean(fit_split_var)
+        error_split = 1/3 * np.sqrt(fit_err_split[0]**2 + fit_err_split[1]**2 + fit_err_split[2]**2)
+        info_table.add_row([name, fit_avg, -error_split, error_split])
+        count += 1
+
+    print(info_table)
+
+    
+if __name__ == "__main__":
+    main()
--- a/Code/Scripts/Python
+++ b/Code/Scripts/Python
@ -0,0 +1,43 @@
+# Angular Analysis of B+toK*+Pi0Mu-Mu+
+## Setup
+For this setup you will need to install miniconda on the machine you are working and do the following procedure:
+1. `conda env create -f requirements.yaml`
+2. Check with `conda activate angularanalysis` if the environment is working 
+3. Check out the following page in order to install the analysis-tool package: `https://github.com/jonas-eschle/analysis-tools`
+4. Check out the following page in order to install the angular-analysis B->K*ll package: `https://gitlab.cern.ch/LHCb-RD/ewp-bd2ksteeangular-legacy/-/tree/master/b2kstll`
+
+Afterwards you can use your conda environment in order to run the code. But before you do, you need to setup the properties.env. Inside of properties.env you will find different configuration variables which are used throughout the code in order to control the behavior of the code. Below you can find a table which summarizes what these environment variables do.
+|Environment Variable|Use-Case|
+| -------------------- | -------- |
+|LOWER_COSTHETAK_CUT |Controls the lower boundary of cos(theta_K)|
+|UPPER_COSTHETAK_CUT |Controls the upper boundary of cos(theta_K)|
+|SYS_PATH|We need one local package in order to make the code work, namely hep_analytics which you can also find in this repo, the interpreter needs to find that package, so set SYS_PATH to the relative path to hep_analytics|
+|GEN_FILE|Path to the Generator File|
+|MC_FILE|Path to the MC File|
+|MC_PHSP_FILE|Path to the PHSP MC File|
+
+If you want to see how to set the environment variables, have a look at `properties.env`. Also, I have run the code on the LHCbA1 machine but it should make no difference where you run the code.
+You are done (apply also the custom fixes described down below)!
+
+## Custom Fix
+The `b2kstll` package has a few caveats when you want to apply a cos(theta_K) cut since the normalisation will be wrong in this case for the pdfs. So in order to compensate for that, I changed a few lines in the b2kstll package code, have a look at the `b2kstll/models` folder, there you will find all the files which I modified. But bear in mind that I only modified the PWave pdf, for other pdfs the issue is still persistent.
+
+## Useful Scripts
+### root_file_inspect.py
+This script can be used to look inside a root file and see which branches and features it includes. To kick it off, give it a try with:
+1. `python root_file_inspect.py --file=ROOT_FILE_PATH --regex=.*`
+
+You do not need to set the regex flag but it can be useful if you search for some particular feature. This script depends on the file `rootio.py`.
+
+### diff_pair_tables_*.py
+There are 3 scripts which are prefixed with `diff_pair_tables` and which are very similar but have different use-cases. You can use them as templates whenever you have generated tables by `prettytable` or another library/tool and use them in order to automatically extract the values out of these tables. Then you do not have to type them manually into your code.
+
+## hep_analytics
+### package: pathing
+The module `aggregate.py` exposes the `PathAggregator` which is very simple and simply builds a path string. For only this reason the class is not very useful but when your directory path setup is more complex and you do not want to manually type out all relevant paths, this class can be easily extended and adapted.
+
+### package: processing
+Exposes three modules: `extract`, `transform`, `visualisation`. `extract` exposes the `FileManager` which can be used to extract the relevant features out of the branches of a `root` file. `transform` exposes useful functions like `select_feature` in order to make cuts and `reweight_feature`. Take note that `select_feature` is not the most efficient way of doing cuts, for more efficient usage use the functionality of `uproot3` directly. `visualisation` exposes `reweight_comparing_plot` in order to plot 3 diagrams, two original features and the reweighted feature.
+
+### Here you can find my root files
+Under `/work/skuza/thesis/Tuples`
--- a/Scripts/b2kstll/models/angular.py
+++ b/Scripts/b2kstll/models/angular.py
@ -0,0 +1,382 @@
+import warnings
+from math import pi
+
+import numpy as np
+import tensorflow as tf
+import zfit
+from zfit import z
+
+import yaml
+import sympy
+
+from b2kstll.models.Kpi import KpiPWavePDF, KpiSWavePDF, ReKpiPandSWavePDF, ImKpiPandSWavePDF 
+from b2kstll.models.acceptance import AccPDF
+from b2kstll.utils import get_coeffs
+
+import analysis.efficiency
+from analysis.efficiency import load_efficiency_model
+ 
+from particle import Particle
+
+from b2kstll.models.angular_sympy import angular_func_sympy
+
+
+class AngularPDF(zfit.pdf.BasePDF):
+    """ Generic angular PDF built from input function and corresponding parameters
+
+        Args:
+            ArbitraryFNC (function: Any arbitrary angular function
+            params (params): parameters of interest in the given function 
+            obs (`zfit.Space`):
+            name (str):
+            dtype (tf.DType):
+    """
+    def __init__(self, obs, params, func, name):
+        super().__init__(obs=obs, params=params, name=name+"PDF")
+        self._user_func = func
+
+    _N_OBS = 3
+    VALID_LIMITS = {'costheta_l': (-1, 1),
+                    'costheta_k': (-1, 1),
+                    'phi': (-pi, pi)}
+
+    def _pdf(self, x, norm_range):
+        costheta_l, costheta_k, phi = z.unstack_x(x)
+
+        pdf = self._user_func(**self.params, costheta_l=costheta_l, costheta_k=costheta_k, phi=phi)
+        return pdf
+
+# Calculate Normalisation
+def get_normalisation(FL, upper_ctk):
+    kwargs = dict(
+        FL = sympy.Symbol('FL'),
+        AFB = sympy.Symbol('AFB'),
+        S3 = sympy.Symbol('S3'),
+        S4 = sympy.Symbol('S4'),
+        S5 = sympy.Symbol('S5'),
+        S7 = sympy.Symbol('S7'),
+        S8 = sympy.Symbol('S8'),
+        S9 = sympy.Symbol('S9'),
+        costheta_l = sympy.Symbol('costheta_l'),
+        costheta_k = sympy.Symbol('costheta_k'),
+        phi = sympy.Symbol('phi')
+    )
+
+    sympy_val = angular_func_sympy(**kwargs)
+    integral = sympy.integrate(sympy_val,
+                            (kwargs['costheta_l'], -1, 1),
+                            (kwargs['costheta_k'], -1, upper_ctk),
+                            (kwargs['phi'], -sympy.pi, sympy.pi),)
+
+    fun = sympy.lambdify((kwargs["FL"]),integral, 'tensorflow')
+
+    return fun(FL)
+
+UPPER_CTK = 0.85
+
+def PWave(FL, S3, S4, S5, AFB, S7, S8, S9, costheta_l, costheta_k, phi, backend=tf):
+
+    """Full d4Gamma/dq2dOmega for Bd -> Kst ll (l=e,mu)
+
+    Angular distribution obtained in the total PDF (using LHCb convention JHEP 02 (2016) 104)
+        i.e. the valid of the angles is given for
+            - phi: [-pi, pi]
+            - theta_K: [-pi, pi]
+            - theta_l: [-pi, pi]
+
+        The function is normalized over a finite range and therefore a PDF.
+
+        Args:
+            FL (`zfit.Parameter`): Fraction of longitudinal polarisation of the Kst
+            S3 (`zfit.Parameter`): A_perp^2 - A_para^2 / A_zero^2 + A_para^2 + A_perp^2 (L, R)
+            S4 (`zfit.Parameter`): RE(A_zero*^2 * A_para^2) / A_zero^2 + A_para^2 + A_perp^2 (L, R)
+            S5 (`zfit.Parameter`): RE(A_zero*^2 * A_perp^2) / A_zero^2 + A_para^2 + A_perp^2 (L, R)
+            AFB (`zfit.Parameter`): Forward-backward asymmetry of the di-lepton system (also i.e. 3/4 * S6s)
+            S7 (`zfit.Parameter`): IM(A_zero*^2 * A_para^2) / A_zero^2 + A_para^2 + A_perp^2 (L, R)
+            S8 (`zfit.Parameter`): IM(A_zero*^2 * A_perp^2) / A_zero^2 + A_para^2 + A_perp^2 (L, R)
+            S9 (`zfit.Parameter`): IM(A_perp*^2 * A_para^2) / A_zero^2 + A_para^2 + A_perp^2 (L, R)
+            obs (`zfit.Space`):
+            name (str):
+            dtype (tf.DType):
+    """
+
+    sintheta_k = backend.sqrt(1.0 - costheta_k * costheta_k)
+    sintheta_l = backend.sqrt(1.0 - costheta_l * costheta_l)
+
+    sintheta_2k = (1.0 - costheta_k * costheta_k)
+    sintheta_2l = (1.0 - costheta_l * costheta_l)
+
+    sin2theta_k = (2.0 * sintheta_k * costheta_k)
+    cos2theta_l = (2.0 * costheta_l * costheta_l - 1.0)
+    sin2theta_l = (2.0 * sintheta_l * costheta_l)
+
+    pdf = ((3.0 / 4.0) * (1.0 - FL) * sintheta_2k +
+           FL * costheta_k * costheta_k +
+           (1.0 / 4.0) * (1.0 - FL) * sintheta_2k * cos2theta_l +
+           -1.0 * FL * costheta_k * costheta_k * cos2theta_l +
+           S3 * sintheta_2k * sintheta_2l * backend.cos(2.0 * phi) +
+           S4 * sin2theta_k * sin2theta_l * backend.cos(phi) +
+           S5 * sin2theta_k * sintheta_l * backend.cos(phi) +
+           (4.0 / 3.0) * AFB * sintheta_2k * costheta_l +
+           S7 * sin2theta_k * sintheta_l * backend.sin(phi) +
+           S8 * sin2theta_k * sin2theta_l * backend.sin(phi) +
+           S9 * sintheta_2k * sintheta_2l * backend.sin(2.0 * phi))
+
+    #return (9.0 / (32.0 * pi)) * pdf
+    return pdf / get_normalisation(FL = FL, upper_ctk = UPPER_CTK)
+
+
+def SWave(costheta_l, costheta_k, phi, backend=tf):
+
+    """Full d4Gamma/dq2dOmega for Swave part only
+
+    Angular distribution obtained in the total PDF (using LHCb convention JHEP 02 (2016) 104)
+        i.e. the valid of the angles is given for
+            - phi: [-pi, pi]
+            - theta_K: [-pi, pi]
+            - theta_l: [-pi, pi]
+
+        The function is normalized over a finite range and therefore a PDF.
+
+        Args:
+            obs (`zfit.Space`):
+            name (str):
+            dtype (tf.DType):
+    """
+
+    sintheta_2l = (1.0 - costheta_l * costheta_l)
+    pdf = (3.0 / (16.0 * pi)) * sintheta_2l
+
+    return pdf
+
+
+def PandSWaveInterference(Ss1, Ss2, Ss3, Ss4, Ss5, costheta_l, costheta_k, phi,  backend=tf):
+
+    """Full d4Gamma/dq2dOmega for the Real interference term between P and S waves
+
+    Angular distribution obtained in the total PDF (using LHCb convention JHEP 02 (2016) 104)
+        i.e. the valid of the angles is given for
+            - phi: [-pi, pi]
+            - theta_K: [-pi, pi]
+            - theta_l: [-pi, pi]
+
+        The function is normalized over a finite range and therefore a PDF.
+
+        Args:
+            Ss1, Ss2, Ss3, Ss4, Ss5 (`zfit.Parameter`)
+            obs (`zfit.Space`):
+            name (str):
+            dtype (tf.DType):
+    """
+
+    sintheta_l = backend.sqrt(1.0 - costheta_l * costheta_l)
+    sintheta_k = backend.sqrt(1.0 - costheta_k * costheta_k)
+
+    sintheta_2l = (1.0 - costheta_l * costheta_l)
+    sin2theta_l = (2.0 * sintheta_l * costheta_l)
+
+    pdf = (Ss1 * sintheta_2l * costheta_k +
+           Ss2 * sin2theta_l * sintheta_k * backend.cos(phi) +
+           Ss3 * sintheta_l  * sintheta_k * backend.cos(phi) +
+           Ss4 * sintheta_l  * sintheta_k * backend.sin(phi) + 
+           Ss5 * sin2theta_l * sintheta_k * backend.sin(phi)) 
+
+    return (3.0 / (16.0 * pi)) * pdf
+
+
+class AngularPandSWavePDF(zfit.pdf.BaseFunctor):
+    """Full d4Gamma/dq2dOmega for both spin 0 and 1 for Bd -> Kst ll (l=e,mu) decays
+
+    Angular distribution obtained in the total PDF (using LHCb convention JHEP 02 (2016) 104)
+        i.e. the valid of the angles is given for
+            - phi: [-pi, pi]
+            - theta_K: [0, pi]
+            - theta_l: [0, pi]
+
+        The function is normalized over a finite range and therefore a PDF.
+
+        Args:
+            AngularPWavePDF (`zfit.pdf.ZPDF`: P-wave normalised PDF 
+            AngularSWavePDF (`zfit.pdf.ZPDF`: S-wave interference part normalised PDF 
+            FS (`zfit.Parameter`): S-wave fraction, i.e. |A^L_S|^2 + |A^R_S|^2/ Sum(|A^{S,R}_{S,zero,para,perp}|^2) 
+            obs (`zfit.Space`):
+            name (str):
+            dtype (tf.DType):
+    """
+
+    def __init__(self, PWave, SWave, PandSWaveInterference, FS, obs):
+        super().__init__(pdfs=[PWave, SWave, PandSWaveInterference], name="AngularPandSWavePDF", params={'FS': FS}, obs=obs)
+
+    _N_OBS = 3 
+    VALID_LIMITS = {'costheta_l': (-1, 1),
+                    'costheta_k': (-1, 1),
+                    'phi': (-pi, pi)}
+
+    def _pdf(self, x, norm_range):
+        FS  = self.params['FS']
+
+        PWave, SWave, PandSWaveInterference = self.pdfs
+        
+        pdf = (1 - FS) * PWave.pdf(x) + FS * SWave.pdf(x) + PandSWaveInterference.pdf(x)
+
+        return pdf
+
+
+
+# A bit of handling
+class B2Kstll:
+    """Factory class for B->K*ll angular fit"""
+
+    PDFS =     {'PWave': (PWave, ['FL', 'S3', 'S4', 'S5', 'AFB', 'S7', 'S8', 'S9']),
+                'SWave': (SWave, []),
+                'PandSWaveInterference'  : (PandSWaveInterference,   ['Ss1', 'Ss2', 'Ss3', 'Ss4', 'Ss5']),
+                #'PandSWaveInterference5D': (PandSWaveInterference5D, ['Ss1re', 'Ss2re', 'Ss3re', 'Ss4re', 'Ss5re', 
+                #                                                      'Ss1im', 'Ss2im', 'Ss3im', 'Ss4im', 'Ss5im']),
+                'PandSWave'  : (AngularPandSWavePDF,   ['FS'])}
+                #'PandSWave5D': (AngularPandSWavePDF5D, ['FS'])}
+
+    KPI  =      {'PWave': (KpiPWavePDF, ['Kst_892'], [313]),
+                 'SWave': (KpiSWavePDF, ['Kst_1430'], [10311]), 
+           'RePandSWave': (ReKpiPandSWavePDF, ['Kst_892', 'Kst_1430'], [313, 10311]),
+           'ImPandSWave': (ImKpiPandSWavePDF, ['Kst_892', 'Kst_1430'], [313, 10311])}
+
+
+    def __init__(self, cosThetaL, cosThetaK, Phi, q2_min=None, q2_max=None):
+        """Initialize observable names."""
+        self._obs_names = {'costheta_l': cosThetaL,
+                           'costheta_k': cosThetaK,
+                           'phi': Phi}
+
+        costheta_l = zfit.Space(cosThetaL, limits=(-1.0, 1.0))
+        costheta_k = zfit.Space(cosThetaK, limits=(-1.0, UPPER_CTK))
+        phi = zfit.Space(Phi, limits=(-pi, pi))
+        self._spaces = costheta_l * costheta_k * phi
+
+        self.params = {}
+
+        self.acc = None
+        self.acc_integral = None
+        self.Kpi = None
+        self.q2_bin_acc = None
+        self.q2_min = q2_min
+        self.q2_max = q2_max
+
+    def set_q2_bin_acceptance(self, q2_bin_acc):
+        """Set the q2 value used to slice the acceptance function for parametrization in 4D"""
+        self.q2_bin_acc = q2_bin_acc
+
+    def set_acc(self, file_name, q2_bin_acc=None):
+        """Set an acceptance function to the signal PDF."""
+
+        # Load coefficients from MoM
+        #coeffs_yaml = load_efficiency_model(file_name)
+        #coeffs = coeffs_yaml.get_coefficients() 
+
+        self.acc,self.accmodel = get_coeffs(file_name)
+        self.set_q2_bin_acceptance(q2_bin_acc)
+
+
+    def set_acc_integral(self, file_name, str_order):
+        """Set the integral of the PDF x acceptance."""
+
+        # Load integral from a yaml file
+        with open(file_name, 'r') as f:
+            dict_int = yaml.load(f, Loader=yaml.FullLoader)
+        
+        integral = dict_int[0][str_order]
+        self.acc_integral = sympy.sympify(integral) 
+
+
+    def set_Kpi(self, name):
+        """ Set the Kpi invariant mass system."""
+
+        #self._has_Kpi = True
+        self.Kpi = name
+
+
+    def get_pdf(self, name):
+        """Get corresponding PDF and paramaters."""
+        fnc_class, param_names = self.PDFS[name]
+
+        def get_params(param_list):
+            out = {}
+            for param in param_list:
+                if param not in self.params:
+                    if param == 'FL':
+                        config = [0.6, 0, 1]
+                    elif param == 'FS':
+                        config = [0.1, 0, 1]
+                    else: 
+                        config = [0.0, -2, 2]
+                    self.params.update({param: zfit.Parameter(param, *config)})
+                out[param] = self.params[param]
+            return out
+
+        obs = self._spaces 
+
+        def get_Kpi_pdf(name, model):
+            """Set the corresponding PDF for the Kpi system and parameters."""
+            pdf_class, param_names, param_id = self.KPI[model]
+
+            def get_res(param_list, param_id):
+                out = {}
+                for param, idparam in zip(param_names, param_id):
+                    if not any([i for i in self.params if param in i.lower()]):
+                        m = Particle.from_pdgid(idparam).mass * 1e-03
+                        w = Particle.from_pdgid(idparam).width * 1e-03
+                        self.params.update({param+"_M": zfit.Parameter(param+"_M", m, floating=False)})
+                        self.params.update({param+"_W": zfit.Parameter(param+"_W", w, floating=False)})
+                    out[param+"_M"] = self.params[param+"_M"]
+                    out[param+"_W"] = self.params[param+"_W"]
+                return out
+
+            # Add Kpi dimension
+            mKpi = zfit.Space(name, limits=(0.79555, 0.99555))
+            self._spaces *= mKpi
+
+            params = get_res(param_names, param_id)
+            pdf = pdf_class(obs=mKpi, params=params, name=model, q2_bin=(self.q2_min+self.q2_max)/2.)
+            return pdf 
+
+
+        if name == 'PandSWave':
+            pdf_tot, params_tot = self.PDFS[name] 
+            get_params(params_tot)
+
+            listPDF = dict.fromkeys(['PWave', 'SWave', 'PandSWaveInterference'])
+            for iPDF in listPDF:
+                ifncWave, iparamWave = self.PDFS[iPDF]
+                initParams = get_params(iparamWave)
+                listPDF[iPDF] = AngularPDF(obs=obs, params=initParams, func=ifncWave, name=iPDF)
+
+                if self.Kpi:
+                    pdf_Kpi = get_Kpi_pdf(self.Kpi, iPDF)
+                    listPDF[iPDF] *= pdf_Kpi
+
+            params = self.params
+            pdf = pdf_tot(obs=obs, PWave=listPDF["PWave"], SWave=listPDF["SWave"], PandSWaveInterference=listPDF["PandSWaveInterference"], FS=self.params['FS'])
+            if self.acc is not None: 
+                pdf = AccPDF(pdf, self.acc, self.accmodel, obs=obs, dict_names=self._obs_names, q2_slice=self.q2_bin_acc, name=name) 
+
+        else:
+            params = get_params(param_names)
+            pdf = AngularPDF(obs=obs, params=params, func=fnc_class, name=name)   
+
+            if self.acc is not None: 
+                pdf = AccPDF(pdf, self.acc, self.accmodel, obs=obs, dict_names=self._obs_names, q2_slice=self.q2_bin_acc, name=name) 
+
+                if self.acc_integral:
+
+                    tf_expr = sympy.lambdify(self.acc_integral.free_symbols, self.acc_integral, 'tensorflow')
+                    integral = integral_acceptance(params, tf_expr)
+                    pdf.register_analytic_integral(func=integral, limits=zfit.Space(axes=(0,1,2), limits=obs.limits))
+
+            if self.Kpi: 
+                pdf_Kpi = get_Kpi_pdf(self.Kpi, name)
+                pdf = pdf * pdf_Kpi
+
+                params = self.params
+                obs = self._spaces
+
+        return obs, pdf, params
+
--- a/Scripts/b2kstll/models/angular_sympy.py
+++ b/Scripts/b2kstll/models/angular_sympy.py
@ -0,0 +1,63 @@
+import inspect
+
+import sympy
+
+def angular_func_sympy(FL, AFB, S3, S4, S5, S7, S8, S9, costheta_l, costheta_k, phi):
+    sintheta_k = sympy.sqrt(1.0 - costheta_k * costheta_k)
+    sintheta_l = sympy.sqrt(1.0 - costheta_l * costheta_l)
+
+    sintheta_2k = (1.0 - costheta_k * costheta_k)
+    sintheta_2l = (1.0 - costheta_l * costheta_l)
+
+    sin2theta_k = (2.0 * sintheta_k * costheta_k)
+    cos2theta_l = (2.0 * costheta_l * costheta_l - 1.0)
+    sin2theta_l = (2.0 * sintheta_l * costheta_l)
+
+    pdf = ((3.0 / 4.0) * (1.0 - FL) * sintheta_2k +
+           FL * costheta_k * costheta_k +
+           (1.0 / 4.0) * (1.0 - FL) * sintheta_2k * cos2theta_l +
+           -1.0 * FL * costheta_k * costheta_k * cos2theta_l +
+           S3 * sintheta_2k * sintheta_2l * sympy.cos(2.0 * phi) +
+           S4 * sin2theta_k * sin2theta_l * sympy.cos(phi) +
+           S5 * sin2theta_k * sintheta_l * sympy.cos(phi) +
+           (4.0 / 3.0) * AFB * sintheta_2k * costheta_l +
+           S7 * sin2theta_k * sintheta_l * sympy.sin(phi) +
+           S8 * sin2theta_k * sin2theta_l * sympy.sin(phi) +
+           S9 * sintheta_2k * sintheta_2l * sympy.sin(2.0 * phi))
+    return pdf
+
+""" kwargs = dict(
+FL = sympy.Symbol('FL'),
+AFB = sympy.Symbol('AFB'),
+S3 = sympy.Symbol('S3'),
+S4 = sympy.Symbol('S4'),
+S5 = sympy.Symbol('S5'),
+S7 = sympy.Symbol('S7'),
+S8 = sympy.Symbol('S8'),
+S9 = sympy.Symbol('S9'),
+costheta_l = sympy.Symbol('costheta_l'),
+costheta_k = sympy.Symbol('costheta_k'),
+phi = sympy.Symbol('phi')
+)
+
+
+lower_ctl = sympy.Symbol('lower_ctl')
+upper_ctl = sympy.Symbol('upper_ctl')
+lower_ctk = sympy.Symbol('lower_ctk')
+upper_ctk = sympy.Symbol('upper_ctk')
+lower_phi = sympy.Symbol('lower_phi')
+upper_phi = sympy.Symbol('upper_phi')
+sympy_val = angular_func_sympy(**kwargs)
+print('starting integration')
+integral = sympy.integrate(sympy_val,
+                           (kwargs['costheta_l'], -1, 1),
+                           (kwargs['costheta_k'], -1, 1),
+                           (kwargs['phi'], -sympy.pi, sympy.pi),)
+
+# integral = sympy.integrate(sympy_val,
+#                            kwargs['costheta_l'],
+#                            kwargs['costheta_k'],
+#                            kwargs['phi'])
+print(integral)
+fun = sympy.lambdify((lower_ctl, lower_ctk, lower_phi),integral, 'tensorflow')
+print(inspect.getsource(fun)) """
--- a/Scripts/properties.env
+++ b/Scripts/properties.env
@ -0,0 +1,7 @@
+LOWER_COSTHETAK_CUT=-1.00
+UPPER_COSTHETAK_CUT=0.851
+SYS_PATH=../../
+SYS_PATH_LEVEL_1=../
+GEN_FILE=../../KplusPi0Resolved_Run2_MC_EvtGen_FCNC.root
+MC_FILE=../../KplusPi0Resolved_Run2_MC_FCNC.root
+MC_PHSP_FILE=../../KplusPi0Resolved_Run2_PHSP_FCNC.root
--- a/Scripts/requirements.yaml
+++ b/Scripts/requirements.yaml
@ -0,0 +1,24 @@
+name: angularanalysis
+channels:
+  - defaults
+dependencies:
+  - python=3.9.12
+  - iminuit
+  - numpy
+  - pandas
+  - matplotlib
+  - seaborn
+  - pytest
+  - pip
+  - pip:
+    - uproot==4.2.3
+    - awkward==1.8.0
+    - zfit
+    - prettytable
+    - PyRoot==0.3.0
+    - python-dotenv==0.20.0
+    - sympy
+    - uncertainties==3.1.6
+    - xgboost==1.6.1
+    - mplhep
+    - hep-ml
--- a/Scripts/root_file_inspect.py
+++ b/Scripts/root_file_inspect.py
@ -0,0 +1,29 @@
+import os
+import dotenv
+import sys
+import argparse
+
+dotenv.load_dotenv('properties.env')
+
+sys.path.insert(0, os.getenv('SYS_PATH_LEVEL_1'))
+
+from rootio import RootIO
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--file', dest = 'file', default = '')
+    parser.add_argument('--regex', dest = 'regex', default = '')
+    args   = parser.parse_args()
+    FILE   = args.file
+    regex  = args.regex
+
+    rootio = RootIO(FILE)
+    if regex:
+        rootio.get_information(regex = regex)
+        return
+    rootio.get_information()
+        
+
+if __name__ == "__main__":
+    main()
--- a/Code/Scripts/Python
+++ b/Code/Scripts/Python
@ -0,0 +1,37 @@
+import os
+import re
+import uproot
+
+
+class RootIO:
+    def __init__(self, path_to_file: str):
+        if not os.path.exists(path = path_to_file):
+            raise ValueError("Path does not exists!")
+        self.path_to_file = path_to_file
+
+
+    def get_information(self, regex: str = None) -> None:
+        with uproot.open(self.path_to_file) as file:
+            print(f"File classnames:\n{file.classnames()}")
+            for classname, classtype in file.classnames().items():
+                print(f"Classname:\n{classname}")
+                print(f"Classtype:\n{classtype}")
+                print(f"Content of the class:")
+                for name, branch in file[classname].items():
+                    print(f"{name}: {branch}")
+                if regex != None:
+                    print(f"Regex search results:")
+                    attributes = file[classname].keys()
+                    for attribute in attributes:
+                        result = self.__find_pattern_in_text(regex = regex, text = attribute)
+                        if result != "":
+                            print(f"{result}")
+
+
+    def __find_pattern_in_text(self, regex: str, text: str) -> str:
+        search_result = re.search(pattern = regex, string = text)
+        result        = ""
+        if search_result != None:
+            result = search_result.group(0)
+
+        return result
--- a/Code/Scripts/hep_analytics/init.py
+++ b/Code/Scripts/hep_analytics/init.py
--- a/Code/Scripts/hep_analytics/pathing/init.py
+++ b/Code/Scripts/hep_analytics/pathing/init.py
--- a/Code/Scripts/hep_analytics/pathing/aggregate.py
+++ b/Code/Scripts/hep_analytics/pathing/aggregate.py
@ -0,0 +1,13 @@
+class PathAggregator:
+    def __init__(self, basefolder: str, directory: str, file: str, year: str = None) -> None:
+        self.basefolder = basefolder
+        self.directory  = directory
+        self.file       = file
+        self.year       = year
+
+
+    def get_path_to_file(self) -> str:
+        if self.year != None:
+            return f"./{self.basefolder}/{self.directory}/{self.year}/{self.file}"
+
+        return f"./{self.basefolder}/{self.directory}/{self.file}"
--- a/Code/Scripts/hep_analytics/processing/init.py
+++ b/Code/Scripts/hep_analytics/processing/init.py
--- a/Code/Scripts/hep_analytics/processing/extract.py
+++ b/Code/Scripts/hep_analytics/processing/extract.py
@ -0,0 +1,20 @@
+import numpy as np
+import uproot
+import typing
+from dataclasses import dataclass
+
+
+@dataclass
+class FileManager:
+    file: str
+    tree: str
+    branches: list[str]
+
+    def extract_data(self) -> list[np.ndarray]:
+        with uproot.open(self.file) as file:
+            file_tree = file[self.tree]
+            data = []
+            for branch in self.branches:
+                data.append(file_tree[branch].array(library = "np"))
+
+        return data
--- a/Code/Scripts/hep_analytics/processing/transform.py
+++ b/Code/Scripts/hep_analytics/processing/transform.py
@ -0,0 +1,20 @@
+import numpy as np 
+
+from hep_ml.reweight import BinsReweighter
+
+
+def select_feature(feature: np.ndarray, limits: tuple[float, float]) -> tuple[np.ndarray, list]:
+    selection_indices = []
+    for index, value in enumerate(feature):
+        if value > limits[0] and value < limits[1]:
+            selection_indices.append(index)
+
+    return feature[selection_indices], selection_indices
+
+
+def reweight_feature(original_feature: list, target_feature: list, n_bins: int, n_neighs: int = 2):
+    original_weights = np.ones(len(original_feature))
+    bin_reweighter   = BinsReweighter(n_bins = n_bins, n_neighs = n_neighs)
+    bin_reweighter.fit(original = original_feature, target = target_feature, original_weight = original_weights)
+
+    return bin_reweighter.predict_weights(original = original_feature, original_weight = original_weights)
--- a/Code/Scripts/hep_analytics/processing/visualisation.py
+++ b/Code/Scripts/hep_analytics/processing/visualisation.py
@ -0,0 +1,22 @@
+import matplotlib.pyplot as plt
+import mplhep
+
+
+mplhep.style.use("LHCb2")
+
+def reweight_comparing_plot(original_feature: list, target_feature: list, weights: list, n_bins: int, suptitle: str, titles: list[str], save: str) -> None:
+    if len(original_feature) != len(weights):
+        raise ValueError(f"original_feature and weights need to have the same dimension!")
+
+    fig, ax = plt.subplots(nrows = 1, ncols = 3, figsize = (30, 12.5))
+
+    ax[0].hist(original_feature, bins = n_bins)
+    ax[0].set_ylabel("counts/a.u.")
+    ax[0].set_title(titles[0])
+    ax[1].hist(original_feature, bins = n_bins, weights = weights)
+    ax[1].set_title(titles[1])
+    ax[2].hist(target_feature, bins = n_bins)
+    ax[2].set_title(titles[2])
+
+    fig.suptitle(suptitle)
+    fig.savefig(f"{save}")
Author	SHA1	Message	Date
rskuza	c562013e27	Enhanced README	2022-09-15 12:15:29 +02:00
rskuza	0f8ca4f87d	Enhanced README	2022-09-15 12:13:30 +02:00
rskuza	3905f6bb57	Enhanced README	2022-09-15 12:10:52 +02:00
rskuza	13f509fa1d	Added b2kstll custom fixes	2022-09-15 12:06:35 +02:00
rskuza	696e5503e2	Updated README	2022-09-02 12:20:02 +02:00
rskuza	e626160426	Adjusted setup in readme	2022-08-30 11:45:57 +02:00
rskuza	03066c590e	Exchanged requirements type	2022-08-30 11:40:31 +02:00
rskuza	dc867874cd	Skimmed requirements further	2022-08-24 15:54:11 +02:00
rskuza	2c90c74eac	Included Python Version	2022-08-24 15:50:24 +02:00
rskuza	37f3c39657	Skimmed requirements.txt	2022-08-24 15:07:50 +02:00
rskuza	a473f9ccef	Updated properties env file	2022-08-24 12:25:16 +02:00
rskuza	84cc89889c	Updated README	2022-08-24 12:21:41 +02:00
rskuza	a850513fd5	Updated README	2022-08-24 12:17:55 +02:00
rskuza	eb400f021a	README File added	2022-08-24 12:05:31 +02:00
rskuza	0bc6296fb7	Python Scripts used for Angular Analysis for B+toK*+Pi0mumu	2022-08-24 11:23:16 +02:00