tracking-parametrisation-tuner/parameterisations/parameterise_track_model_el...

# flake8: noqa
from parameterisations.utils.parse_regression_coef_to_array import (
    parse_regression_coef_to_array,
)
from parameterisations.utils.fit_linear_regression_model import (
    fit_linear_regression_model,
)
import uproot
import argparse
from pathlib import Path


def parameterise_track_model(
    input_file: str = "data/param_data_selected.root",
    tree_name: str = "Selected",
) -> Path:
    """Function that calculates the parameterisations to estimate track model coefficients.

    Args:
        input_file (str, optional): Defaults to "data/param_data_selected.root".
        tree_name (str, optional): Defaults to "Selected".

    Returns:
        Path: Path to cpp code files containing the found parameters.
    """
    input_tree = uproot.open({input_file: tree_name})
    # this is an event list of dictionaries containing awkward arrays
    array = input_tree.arrays()
    array["dSlope_fringe"] = array["tx_ref"] - array["tx"]
    array["dSlope_fringe_abs"] = abs(array["dSlope_fringe"])
    array["yStraightRef"] = array["y"] + array["ty"] * (array["z_ref"] - array["z"])
    array["y_ref_straight_diff"] = array["y_ref"] - array["yStraightRef"]
    array["ty_ref_straight_diff"] = array["ty_ref"] - array["ty"]
    array["dSlope_xEndT"] = array["tx_l11"] - array["tx"]
    array["dSlope_yEndT"] = array["ty_l11"] - array["ty"]
    array["dSlope_xEndT_abs"] = abs(array["dSlope_xEndT"])
    array["dSlope_yEndT_abs"] = abs(array["dSlope_yEndT"])
    array["yStraightOut"] = array["y"] + array["ty"] * (array["z_out"] - array["z"])
    array["yDiffOut"] = array["y_out"] - array["yStraightOut"]
    array["yStraightEndT"] = array["y"] + array["ty"] * (9410.0 - array["z"])
    array["yDiffEndT"] = (
        array["y_l11"] + array["ty_l11"] * (9410.0 - array["z_l11"])
    ) - array["yStraightEndT"]

    stereo_layers = [1, 2, 5, 6, 9, 10]
    for layer in stereo_layers:
        array[f"y_straight_diff_l{layer}"] = (
            array[f"y_l{layer}"]
            - array["y"]
            - array["ty"] * (array[f"z_l{layer}"] - array["z"])
        )

    model_cx, poly_features_cx = fit_linear_regression_model(
        array,
        target_feat="CX_ex",
        features=["tx", "ty", "dSlope_fringe"],
        degree=3,
        keep_only_linear_in="dSlope_fringe",
        fit_intercept=False,
    )
    model_dx, poly_features_dx = fit_linear_regression_model(
        array,
        target_feat="DX_ex",
        features=["tx", "ty", "dSlope_fringe"],
        degree=3,
        keep_only_linear_in="dSlope_fringe",
        fit_intercept=False,
    )
    # this list has been found empirically by C.Hasse
    keep_y_corr = [
        "ty dSlope_fringe_abs",
        "ty tx^2 dSlope_fringe_abs",
        "ty^3 dSlope_fringe_abs",
        "ty^3 tx^2 dSlope_fringe_abs",
        "dSlope_fringe",
        "ty tx dSlope_fringe",
        "ty tx^3 dSlope_fringe",
        "ty^3 tx dSlope_fringe",
    ]
    model_y_corr_ref, poly_features_y_corr_ref = fit_linear_regression_model(
        array,
        target_feat="y_ref_straight_diff",
        features=["ty", "tx", "dSlope_fringe", "dSlope_fringe_abs"],
        keep=keep_y_corr,
        degree=6,
        fit_intercept=False,
    )
    rows = []
    for layer in stereo_layers:
        model_y_corr_l, poly_features_y_corr_l = fit_linear_regression_model(
            array,
            target_feat=f"y_straight_diff_l{layer}",
            features=["ty", "tx", "dSlope_fringe", "dSlope_fringe_abs"],
            keep=keep_y_corr,
            degree=6,
            fit_intercept=False,
        )
        rows.append(
            "{"
            + ",".join(
                [str(coef) + "f" for coef in model_y_corr_l.coef_ if coef != 0.0],
            )
            + "}",
        )

    model_ty_corr_ref, poly_features_ty_corr_ref = fit_linear_regression_model(
        array,
        target_feat="ty_ref_straight_diff",
        features=["ty", "tx", "dSlope_fringe", "dSlope_fringe_abs"],
        # this list was found by using Lasso regularisation to drop useless features
        keep=[
            "ty dSlope_fringe^2",
            "ty tx^2 dSlope_fringe_abs",
            "ty^3 dSlope_fringe_abs",
            "ty^3 tx^2 dSlope_fringe_abs",
            "ty tx dSlope_fringe",
            "ty tx^3 dSlope_fringe",
        ],
        degree=6,
        fit_intercept=False,
    )

    model_cy, poly_features_cy = fit_linear_regression_model(
        array,
        target_feat="CY_ex",
        features=["ty", "tx", "dSlope_fringe", "dSlope_fringe_abs"],
        # this list was found by using Lasso regularisation to drop useless features
        keep=[
            "ty dSlope_fringe^2",
            "ty dSlope_fringe_abs",
            "ty tx^2 dSlope_fringe_abs",
            "ty^3 dSlope_fringe_abs",
            "ty tx dSlope_fringe",
        ],
        degree=4,
        fit_intercept=False,
    )

    model_y_match, poly_features_y_match = fit_linear_regression_model(
        array,
        target_feat="yDiffOut",
        features=[
            "ty",
            "dSlope_xEndT",
            "dSlope_yEndT",
        ],
        keep=[
            "ty dSlope_yEndT^2",
            "ty dSlope_xEndT^2",
        ],
        degree=3,
        fit_intercept=False,
    )
    keep_y_match_precise = [
        "dSlope_yEndT",
        "ty dSlope_xEndT_abs",
        "ty dSlope_yEndT_abs",
        "ty dSlope_yEndT^2",
        "ty dSlope_xEndT^2",
        "ty tx dSlope_xEndT",
        "tx^2 dSlope_yEndT",
        "ty tx^2 dSlope_xEndT_abs",
        "ty^3 tx dSlope_xEndT",
    ]
    model_y_match_precise, poly_features_y_match_precise = fit_linear_regression_model(
        array,
        "yDiffEndT",
        [
            "ty",
            "tx",
            "dSlope_xEndT",
            "dSlope_yEndT",
            "dSlope_xEndT_abs",
            "dSlope_yEndT_abs",
        ],
        keep=keep_y_match_precise,
        degree=5,
    )

    cpp_cx = parse_regression_coef_to_array(model_cx, poly_features_cx, "cxParams")
    cpp_dx = parse_regression_coef_to_array(model_dx, poly_features_dx, "dxParams")
    cpp_y_corr_layers = parse_regression_coef_to_array(
        model_y_corr_l,
        poly_features_y_corr_l,
        "yCorrParamsLayers",
        rows=rows,
    )
    cpp_y_corr_ref = parse_regression_coef_to_array(
        model_y_corr_ref,
        poly_features_y_corr_ref,
        "yCorrParamsRef",
    )
    cpp_ty_corr_ref = parse_regression_coef_to_array(
        model_ty_corr_ref,
        poly_features_ty_corr_ref,
        "tyCorrParamsRef",
    )
    cpp_cy = parse_regression_coef_to_array(model_cy, poly_features_cy, "cyParams")
    cpp_y_match = parse_regression_coef_to_array(
        model_y_match,
        poly_features_y_match,
        "bendYParamsMatch",
    )
    cpp_y_match_precise = parse_regression_coef_to_array(
        model_y_match_precise,
        poly_features_y_match_precise,
        "bendYParams",
    )

    outpath = Path("parameterisations/result/track_model_params_electron.hpp")
    outpath.parent.mkdir(parents=True, exist_ok=True)
    with open(outpath, "w") as result:
        result.writelines(
            cpp_cx
            + cpp_dx
            + cpp_y_corr_layers
            + cpp_y_corr_ref
            + cpp_ty_corr_ref
            + cpp_cy
            + cpp_y_match
            + cpp_y_match_precise,
        )
    return outpath


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--input-file",
        type=str,
        help="Path to the input file",
        required=False,
    )
    parser.add_argument(
        "--tree-name",
        type=str,
        help="Path to the input file",
        required=False,
    )
    args = parser.parse_args()
    args_dict = {arg: val for arg, val in vars(args).items() if val is not None}
    outfile = parameterise_track_model(**args_dict)

    try:
        import subprocess

        # run clang-format for nicer looking result
        subprocess.run(
            [
                "clang-format",
                "-i",
                f"{outfile}",
            ],
            check=True,
        )
    except:
        pass