You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

717 lines
46 KiB

{
"cells": [
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"import uproot\n",
"import awkward as ak\n",
"import numpy as np\n",
"#input_tree_md = uproot.open({\"/work/guenther/reco_tuner/data/param_data_MD_selected_8520.root\": \"Selected\"})\n",
"#input_tree_mu = uproot.open({\"/work/guenther/reco_tuner/data/param_data_MU_selected_8520.root\": \"Selected\"})\n",
"input_tree = uproot.open({\"/work/guenther/reco_tuner/data/param_data_selected.root\": \"Selected\"})\n",
"# this is an event list of dictionaries containing awkward arrays\n",
"#array_md = input_tree_md.arrays()\n",
"#array_mu = input_tree_mu.arrays()\n",
"#array = ak.concatenate([array_md, array_mu])\n",
"array = input_tree.arrays()\n",
"array[\"dSlope_fringe\"] = array[\"tx_ref\"] - array[\"tx\"]\n",
"array[\"dSlope_fringe_abs\"] = abs(array[\"dSlope_fringe\"])\n",
"array[\"z_mag_x_fringe\"] = (array[\"x\"] - array[\"x_ref\"] - array[\"tx\"] * array[\"z\"] + array[\"tx_ref\"] * array[\"z_ref\"] ) / array[\"dSlope_fringe\"]\n",
"array[\"yStraightRef\"] = array[\"y\"] + array[\"ty\"] * ( array[\"z_ref\"] - array[\"z\"])\n",
"array[\"AY_straight_diff\"] = array[\"AY_ex\"] - array[\"yStraightRef\"]\n",
"array[\"y_ref_straight_diff\"] = array[\"y_ref\"] - array[\"yStraightRef\"]\n",
"array[\"y_straight_diff_l1\"] = array[\"y_l1\"] - array[\"y\"] - array[\"ty\"] * ( array[\"z_l1\"] - array[\"z\"])\n",
"array[\"y_straight_diff_l2\"] = array[\"y_l2\"] - array[\"y\"] - array[\"ty\"] * ( array[\"z_l2\"] - array[\"z\"])\n",
"array[\"y_straight_diff_l5\"] = array[\"y_l5\"] - array[\"y\"] - array[\"ty\"] * ( array[\"z_l5\"] - array[\"z\"])\n",
"array[\"y_straight_diff_l6\"] = array[\"y_l6\"] - array[\"y\"] - array[\"ty\"] * ( array[\"z_l6\"] - array[\"z\"])\n",
"array[\"y_straight_diff_l9\"] = array[\"y_l9\"] - array[\"y\"] - array[\"ty\"] * ( array[\"z_l9\"] - array[\"z\"])\n",
"array[\"y_straight_diff_l10\"] = array[\"y_l10\"] - array[\"y\"] - array[\"ty\"] * ( array[\"z_l10\"] - array[\"z\"])\n",
"array[\"BY_straight_diff\"] = array[\"BY_ex\"] - array[\"ty\"]\n",
"array[\"ty_ref_straight_diff\"] = array[\"ty_ref\"] - array[\"ty\"]\n",
"def format_array(name, coef):\n",
" coef = [str(c)+\"f\" for c in coef if c != 0.0]\n",
" code = f\"constexpr std::array {name}\"\n",
" code += \"{\" + \", \".join(list(coef)) +\"};\"\n",
" return code"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['dSlope_fringe' 'ty dSlope_fringe_abs' 'ty tx dSlope_fringe'\n",
" 'ty^3 dSlope_fringe_abs' 'ty tx^2 dSlope_fringe_abs'\n",
" 'ty^3 tx dSlope_fringe' 'ty tx^3 dSlope_fringe'\n",
" 'ty^3 tx^2 dSlope_fringe_abs']\n",
"intercept= 0.0\n",
"coef= {'dSlope_fringe': 2.54927052921483, 'ty dSlope_fringe_abs': 65.18649309504633, 'ty tx dSlope_fringe': 4174.444641065072, 'ty^3 dSlope_fringe_abs': -10543.061353132687, 'ty tx^2 dSlope_fringe_abs': 812.6329282763543, 'ty^3 tx dSlope_fringe': 52906.81696296328, 'ty tx^3 dSlope_fringe': 37770.21841979088, 'ty^3 tx^2 dSlope_fringe_abs': 273401.3548544383}\n",
"r2 score= 0.9424448007543189\n",
"RMSE = 8.058017702810291\n",
"straight RMSE = 33.93970007402008\n",
"constexpr std::array y_ref_straight_diff{2.54927052921483f, 65.18649309504633f, 4174.444641065072f, -10543.061353132687f, 812.6329282763543f, 52906.81696296328f, 37770.21841979088f, 273401.3548544383f};\n"
]
}
],
"source": [
"from sklearn.preprocessing import PolynomialFeatures\n",
"from sklearn.linear_model import LinearRegression, Lasso, Ridge\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.pipeline import Pipeline\n",
"from sklearn.metrics import mean_squared_error\n",
"\n",
"features = [\n",
" \"ty\", \n",
" \"tx\",\n",
" \"dSlope_fringe\",\n",
" \"dSlope_fringe_abs\"\n",
"]\n",
"target_feat = \"y_ref_straight_diff\"\n",
"\n",
"data = np.column_stack([ak.to_numpy(array[feat]) for feat in features])\n",
"target = ak.to_numpy(array[target_feat])\n",
"X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.2, random_state=42)\n",
"\n",
"poly = PolynomialFeatures(degree=6, include_bias=False)\n",
"X_train_model = poly.fit_transform( X_train )\n",
"X_test_model = poly.fit_transform( X_test )\n",
"poly_features = poly.get_feature_names_out(input_features=features)\n",
"#print(poly_features)\n",
"keep = [\n",
" \"ty dSlope_fringe_abs\", \n",
" \"ty tx^2 dSlope_fringe_abs\", \n",
" \"ty^3 dSlope_fringe_abs\", \n",
" \"ty^3 tx^2 dSlope_fringe_abs\",\n",
" \"dSlope_fringe\",\n",
" \"ty tx dSlope_fringe\",\n",
" \"ty tx^3 dSlope_fringe\",\n",
" \"ty^3 tx dSlope_fringe\",\n",
"]\n",
"remove = [i for i, f in enumerate(poly_features) if f not in keep]\n",
"X_train_model = np.delete( X_train_model, remove, axis=1)\n",
"X_test_model = np.delete( X_test_model, remove, axis=1)\n",
"poly_features = np.delete(poly_features, remove )\n",
"print(poly_features)\n",
"\n",
"lin_reg = LinearRegression(fit_intercept=False)\n",
"lin_reg.fit( X_train_model, y_train)\n",
"y_pred_test = lin_reg.predict( X_test_model )\n",
"print(\"intercept=\", lin_reg.intercept_)\n",
"print(\"coef=\", dict(zip(poly_features, lin_reg.coef_)))\n",
"print(\"r2 score=\", lin_reg.score(X_test_model, y_test))\n",
"print(\"RMSE =\", mean_squared_error(y_test, y_pred_test, squared=False))\n",
"print(\"straight RMSE =\", mean_squared_error(array[\"y_ref\"], array[\"y\"] + array[\"ty\"] * ( array[\"z_ref\"] - array[\"z\"] ), squared=False))\n",
"print(format_array(\"y_ref_straight_diff\", lin_reg.coef_))"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['dSlope_fringe' 'ty dSlope_fringe_abs' 'ty tx dSlope_fringe'\n",
" 'ty^3 dSlope_fringe_abs' 'ty tx^2 dSlope_fringe_abs'\n",
" 'ty^3 tx dSlope_fringe' 'ty tx^3 dSlope_fringe'\n",
" 'ty^3 tx^2 dSlope_fringe_abs']\n",
"intercept= 0.0\n",
"coef= {'dSlope_fringe': 1.926395569816899, 'ty dSlope_fringe_abs': 155.7194258002827, 'ty tx dSlope_fringe': 3711.2012601369147, 'ty^3 dSlope_fringe_abs': -6986.454915191362, 'ty tx^2 dSlope_fringe_abs': -102.59383584048052, 'ty^3 tx dSlope_fringe': 42360.930984198254, 'ty tx^3 dSlope_fringe': 29857.060893721067, 'ty^3 tx^2 dSlope_fringe_abs': 209095.7911514973}\n",
"r2 score= 0.9391433516368133\n",
"RMSE = 6.578135946782935\n",
"straight RMSE = 26.874058521627408\n",
"constexpr std::array y_straight_diff_l1{1.926395569816899f, 155.7194258002827f, 3711.2012601369147f, -6986.454915191362f, -102.59383584048052f, 42360.930984198254f, 29857.060893721067f, 209095.7911514973f};\n"
]
}
],
"source": [
"features = [\n",
" \"ty\", \n",
" \"tx\",\n",
" \"dSlope_fringe\",\n",
" \"dSlope_fringe_abs\"\n",
"]\n",
"target_feat = \"y_straight_diff_l1\"\n",
"\n",
"data = np.column_stack([ak.to_numpy(array[feat]) for feat in features])\n",
"target = ak.to_numpy(array[target_feat])\n",
"X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.2, random_state=42)\n",
"\n",
"poly = PolynomialFeatures(degree=6, include_bias=False)\n",
"X_train_model = poly.fit_transform( X_train )\n",
"X_test_model = poly.fit_transform( X_test )\n",
"poly_features = poly.get_feature_names_out(input_features=features)\n",
"#print(poly_features)\n",
"keep = [\n",
" \"ty dSlope_fringe_abs\", \n",
" \"ty tx^2 dSlope_fringe_abs\", \n",
" \"ty^3 dSlope_fringe_abs\", \n",
" \"ty^3 tx^2 dSlope_fringe_abs\",\n",
" \"dSlope_fringe\",\n",
" \"ty tx dSlope_fringe\",\n",
" \"ty tx^3 dSlope_fringe\",\n",
" \"ty^3 tx dSlope_fringe\",\n",
"]\n",
"remove = [i for i, f in enumerate(poly_features) if f not in keep]\n",
"X_train_model = np.delete( X_train_model, remove, axis=1)\n",
"X_test_model = np.delete( X_test_model, remove, axis=1)\n",
"poly_features = np.delete(poly_features, remove )\n",
"print(poly_features)\n",
"\n",
"lin_reg = LinearRegression(fit_intercept=False)\n",
"lin_reg.fit( X_train_model, y_train)\n",
"y_pred_test = lin_reg.predict( X_test_model )\n",
"print(\"intercept=\", lin_reg.intercept_)\n",
"print(\"coef=\", dict(zip(poly_features, lin_reg.coef_)))\n",
"print(\"r2 score=\", lin_reg.score(X_test_model, y_test))\n",
"print(\"RMSE =\", mean_squared_error(y_test, y_pred_test, squared=False))\n",
"print(\"straight RMSE =\", mean_squared_error(array[\"y_l1\"], array[\"y\"] + array[\"ty\"] * ( array[\"z_l1\"] - array[\"z\"] ), squared=False))\n",
"print(format_array(\"y_straight_diff_l1\", lin_reg.coef_))"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['dSlope_fringe' 'ty dSlope_fringe_abs' 'ty tx dSlope_fringe'\n",
" 'ty^3 dSlope_fringe_abs' 'ty tx^2 dSlope_fringe_abs'\n",
" 'ty^3 tx dSlope_fringe' 'ty tx^3 dSlope_fringe'\n",
" 'ty^3 tx^2 dSlope_fringe_abs']\n",
"intercept= 0.0\n",
"coef= {'dSlope_fringe': 1.992023095115273, 'ty dSlope_fringe_abs': 147.53158307285184, 'ty tx dSlope_fringe': 3758.3638141105685, 'ty^3 dSlope_fringe_abs': -7387.838564546752, 'ty tx^2 dSlope_fringe_abs': -20.05844697530397, 'ty^3 tx dSlope_fringe': 43544.55794543203, 'ty tx^3 dSlope_fringe': 30729.099594836473, 'ty^3 tx^2 dSlope_fringe_abs': 216301.09830837924}\n",
"r2 score= 0.939662275160689\n",
"RMSE = 6.726003215973192\n",
"straight RMSE = 27.60567927172973\n",
"constexpr std::array y_straight_diff_l2{1.992023095115273f, 147.53158307285184f, 3758.3638141105685f, -7387.838564546752f, -20.05844697530397f, 43544.55794543203f, 30729.099594836473f, 216301.09830837924f};\n"
]
}
],
"source": [
"features = [\n",
" \"ty\", \n",
" \"tx\",\n",
" \"dSlope_fringe\",\n",
" \"dSlope_fringe_abs\"\n",
"]\n",
"target_feat = \"y_straight_diff_l2\"\n",
"\n",
"data = np.column_stack([ak.to_numpy(array[feat]) for feat in features])\n",
"target = ak.to_numpy(array[target_feat])\n",
"X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.2, random_state=42)\n",
"\n",
"poly = PolynomialFeatures(degree=6, include_bias=False)\n",
"X_train_model = poly.fit_transform( X_train )\n",
"X_test_model = poly.fit_transform( X_test )\n",
"poly_features = poly.get_feature_names_out(input_features=features)\n",
"#print(poly_features)\n",
"keep = [\n",
" \"ty dSlope_fringe_abs\", \n",
" \"ty tx^2 dSlope_fringe_abs\", \n",
" \"ty^3 dSlope_fringe_abs\", \n",
" \"ty^3 tx^2 dSlope_fringe_abs\",\n",
" \"dSlope_fringe\",\n",
" \"ty tx dSlope_fringe\",\n",
" \"ty tx^3 dSlope_fringe\",\n",
" \"ty^3 tx dSlope_fringe\",\n",
"]\n",
"remove = [i for i, f in enumerate(poly_features) if f not in keep]\n",
"X_train_model = np.delete( X_train_model, remove, axis=1)\n",
"X_test_model = np.delete( X_test_model, remove, axis=1)\n",
"poly_features = np.delete(poly_features, remove )\n",
"print(poly_features)\n",
"\n",
"lin_reg = LinearRegression(fit_intercept=False)\n",
"lin_reg.fit( X_train_model, y_train)\n",
"y_pred_test = lin_reg.predict( X_test_model )\n",
"print(\"intercept=\", lin_reg.intercept_)\n",
"print(\"coef=\", dict(zip(poly_features, lin_reg.coef_)))\n",
"print(\"r2 score=\", lin_reg.score(X_test_model, y_test))\n",
"print(\"RMSE =\", mean_squared_error(y_test, y_pred_test, squared=False))\n",
"print(\"straight RMSE =\", mean_squared_error(array[\"y_l2\"], array[\"y\"] + array[\"ty\"] * ( array[\"z_l2\"] - array[\"z\"] ), squared=False))\n",
"print(format_array(\"y_straight_diff_l2\", lin_reg.coef_))"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['dSlope_fringe' 'ty dSlope_fringe_abs' 'ty tx dSlope_fringe'\n",
" 'ty^3 dSlope_fringe_abs' 'ty tx^2 dSlope_fringe_abs'\n",
" 'ty^3 tx dSlope_fringe' 'ty tx^3 dSlope_fringe'\n",
" 'ty^3 tx^2 dSlope_fringe_abs']\n",
"intercept= 0.0\n",
"coef= {'dSlope_fringe': 2.6109097507814067, 'ty dSlope_fringe_abs': 55.23845097538265, 'ty tx dSlope_fringe': 4222.757834030979, 'ty^3 dSlope_fringe_abs': -10869.286047558253, 'ty tx^2 dSlope_fringe_abs': 913.7370293450639, 'ty^3 tx dSlope_fringe': 53875.65687892285, 'ty tx^3 dSlope_fringe': 38512.63398443023, 'ty^3 tx^2 dSlope_fringe_abs': 279346.89317804825}\n",
"r2 score= 0.9426062193562865\n",
"RMSE = 8.213743029204338\n",
"straight RMSE = 34.65071255550859\n",
"constexpr std::array y_straight_diff_l5{2.6109097507814067f, 55.23845097538265f, 4222.757834030979f, -10869.286047558253f, 913.7370293450639f, 53875.65687892285f, 38512.63398443023f, 279346.89317804825f};\n"
]
}
],
"source": [
"features = [\n",
" \"ty\", \n",
" \"tx\",\n",
" \"dSlope_fringe\",\n",
" \"dSlope_fringe_abs\"\n",
"]\n",
"target_feat = \"y_straight_diff_l5\"\n",
"\n",
"data = np.column_stack([ak.to_numpy(array[feat]) for feat in features])\n",
"target = ak.to_numpy(array[target_feat])\n",
"X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.2, random_state=42)\n",
"\n",
"poly = PolynomialFeatures(degree=6, include_bias=False)\n",
"X_train_model = poly.fit_transform( X_train )\n",
"X_test_model = poly.fit_transform( X_test )\n",
"poly_features = poly.get_feature_names_out(input_features=features)\n",
"#print(poly_features)\n",
"keep = [\n",
" \"ty dSlope_fringe_abs\", \n",
" \"ty tx^2 dSlope_fringe_abs\", \n",
" \"ty^3 dSlope_fringe_abs\", \n",
" \"ty^3 tx^2 dSlope_fringe_abs\",\n",
" \"dSlope_fringe\",\n",
" \"ty tx dSlope_fringe\",\n",
" \"ty tx^3 dSlope_fringe\",\n",
" \"ty^3 tx dSlope_fringe\",\n",
"]\n",
"remove = [i for i, f in enumerate(poly_features) if f not in keep]\n",
"X_train_model = np.delete( X_train_model, remove, axis=1)\n",
"X_test_model = np.delete( X_test_model, remove, axis=1)\n",
"poly_features = np.delete(poly_features, remove )\n",
"print(poly_features)\n",
"\n",
"lin_reg = LinearRegression(fit_intercept=False)\n",
"lin_reg.fit( X_train_model, y_train)\n",
"y_pred_test = lin_reg.predict( X_test_model )\n",
"print(\"intercept=\", lin_reg.intercept_)\n",
"print(\"coef=\", dict(zip(poly_features, lin_reg.coef_)))\n",
"print(\"r2 score=\", lin_reg.score(X_test_model, y_test))\n",
"print(\"RMSE =\", mean_squared_error(y_test, y_pred_test, squared=False))\n",
"print(\"straight RMSE =\", mean_squared_error(array[\"y_l5\"], array[\"y\"] + array[\"ty\"] * ( array[\"z_l5\"] - array[\"z\"] ), squared=False))\n",
"print(format_array(\"y_straight_diff_l5\", lin_reg.coef_))"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['dSlope_fringe' 'ty dSlope_fringe_abs' 'ty tx dSlope_fringe'\n",
" 'ty^3 dSlope_fringe_abs' 'ty tx^2 dSlope_fringe_abs'\n",
" 'ty^3 tx dSlope_fringe' 'ty tx^3 dSlope_fringe'\n",
" 'ty^3 tx^2 dSlope_fringe_abs']\n",
"intercept= 0.0\n",
"coef= {'dSlope_fringe': 2.6869036549491536, 'ty dSlope_fringe_abs': 42.86164139009395, 'ty tx dSlope_fringe': 4282.473673540287, 'ty^3 dSlope_fringe_abs': -11261.305925638624, 'ty tx^2 dSlope_fringe_abs': 1038.8682634234196, 'ty^3 tx dSlope_fringe': 55053.993311126746, 'ty tx^3 dSlope_fringe': 39412.35231667733, 'ty^3 tx^2 dSlope_fringe_abs': 286552.2319656697}\n",
"r2 score= 0.9427768256184441\n",
"RMSE = 8.406185401645494\n",
"straight RMSE = 35.523013512241214\n",
"constexpr std::array y_straight_diff_l6{2.6869036549491536f, 42.86164139009395f, 4282.473673540287f, -11261.305925638624f, 1038.8682634234196f, 55053.993311126746f, 39412.35231667733f, 286552.2319656697f};\n"
]
}
],
"source": [
"features = [\n",
" \"ty\", \n",
" \"tx\",\n",
" \"dSlope_fringe\",\n",
" \"dSlope_fringe_abs\"\n",
"]\n",
"target_feat = \"y_straight_diff_l6\"\n",
"\n",
"data = np.column_stack([ak.to_numpy(array[feat]) for feat in features])\n",
"target = ak.to_numpy(array[target_feat])\n",
"X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.2, random_state=42)\n",
"\n",
"poly = PolynomialFeatures(degree=6, include_bias=False)\n",
"X_train_model = poly.fit_transform( X_train )\n",
"X_test_model = poly.fit_transform( X_test )\n",
"poly_features = poly.get_feature_names_out(input_features=features)\n",
"#print(poly_features)\n",
"keep = [\n",
" \"ty dSlope_fringe_abs\", \n",
" \"ty tx^2 dSlope_fringe_abs\", \n",
" \"ty^3 dSlope_fringe_abs\", \n",
" \"ty^3 tx^2 dSlope_fringe_abs\",\n",
" \"dSlope_fringe\",\n",
" \"ty tx dSlope_fringe\",\n",
" \"ty tx^3 dSlope_fringe\",\n",
" \"ty^3 tx dSlope_fringe\",\n",
"]\n",
"remove = [i for i, f in enumerate(poly_features) if f not in keep]\n",
"X_train_model = np.delete( X_train_model, remove, axis=1)\n",
"X_test_model = np.delete( X_test_model, remove, axis=1)\n",
"poly_features = np.delete(poly_features, remove )\n",
"print(poly_features)\n",
"\n",
"lin_reg = LinearRegression(fit_intercept=False)\n",
"lin_reg.fit( X_train_model, y_train)\n",
"y_pred_test = lin_reg.predict( X_test_model )\n",
"print(\"intercept=\", lin_reg.intercept_)\n",
"print(\"coef=\", dict(zip(poly_features, lin_reg.coef_)))\n",
"print(\"r2 score=\", lin_reg.score(X_test_model, y_test))\n",
"print(\"RMSE =\", mean_squared_error(y_test, y_pred_test, squared=False))\n",
"print(\"straight RMSE =\", mean_squared_error(array[\"y_l6\"], array[\"y\"] + array[\"ty\"] * ( array[\"z_l6\"] - array[\"z\"] ), squared=False))\n",
"print(format_array(\"y_straight_diff_l6\", lin_reg.coef_))"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['dSlope_fringe' 'ty dSlope_fringe_abs' 'ty tx dSlope_fringe'\n",
" 'ty^3 dSlope_fringe_abs' 'ty tx^2 dSlope_fringe_abs'\n",
" 'ty^3 tx dSlope_fringe' 'ty tx^3 dSlope_fringe'\n",
" 'ty^3 tx^2 dSlope_fringe_abs']\n",
"intercept= 0.0\n",
"coef= {'dSlope_fringe': 3.3840782993219154, 'ty dSlope_fringe_abs': -73.66219618861146, 'ty tx dSlope_fringe': 4855.8672756860515, 'ty^3 dSlope_fringe_abs': -14629.483020343234, 'ty tx^2 dSlope_fringe_abs': 2208.9899749295746, 'ty^3 tx dSlope_fringe': 65168.34060362849, 'ty tx^3 dSlope_fringe': 47225.377082479885, 'ty^3 tx^2 dSlope_fringe_abs': 349003.88327916135}\n",
"r2 score= 0.943344616193092\n",
"RMSE = 10.238398557856689\n",
"straight RMSE = 43.54352590748684\n",
"constexpr std::array y_straight_diff_l9{3.3840782993219154f, -73.66219618861146f, 4855.8672756860515f, -14629.483020343234f, 2208.9899749295746f, 65168.34060362849f, 47225.377082479885f, 349003.88327916135f};\n"
]
}
],
"source": [
"features = [\n",
" \"ty\", \n",
" \"tx\",\n",
" \"dSlope_fringe\",\n",
" \"dSlope_fringe_abs\"\n",
"]\n",
"target_feat = \"y_straight_diff_l9\"\n",
"\n",
"data = np.column_stack([ak.to_numpy(array[feat]) for feat in features])\n",
"target = ak.to_numpy(array[target_feat])\n",
"X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.2, random_state=42)\n",
"\n",
"poly = PolynomialFeatures(degree=6, include_bias=False)\n",
"X_train_model = poly.fit_transform( X_train )\n",
"X_test_model = poly.fit_transform( X_test )\n",
"poly_features = poly.get_feature_names_out(input_features=features)\n",
"#print(poly_features)\n",
"keep = [\n",
" \"ty dSlope_fringe_abs\", \n",
" \"ty tx^2 dSlope_fringe_abs\", \n",
" \"ty^3 dSlope_fringe_abs\", \n",
" \"ty^3 tx^2 dSlope_fringe_abs\",\n",
" \"dSlope_fringe\",\n",
" \"ty tx dSlope_fringe\",\n",
" \"ty tx^3 dSlope_fringe\",\n",
" \"ty^3 tx dSlope_fringe\",\n",
"]\n",
"remove = [i for i, f in enumerate(poly_features) if f not in keep]\n",
"X_train_model = np.delete( X_train_model, remove, axis=1)\n",
"X_test_model = np.delete( X_test_model, remove, axis=1)\n",
"poly_features = np.delete(poly_features, remove )\n",
"print(poly_features)\n",
"\n",
"lin_reg = LinearRegression(fit_intercept=False)\n",
"lin_reg.fit( X_train_model, y_train)\n",
"y_pred_test = lin_reg.predict( X_test_model )\n",
"print(\"intercept=\", lin_reg.intercept_)\n",
"print(\"coef=\", dict(zip(poly_features, lin_reg.coef_)))\n",
"print(\"r2 score=\", lin_reg.score(X_test_model, y_test))\n",
"print(\"RMSE =\", mean_squared_error(y_test, y_pred_test, squared=False))\n",
"print(\"straight RMSE =\", mean_squared_error(array[\"y_l9\"], array[\"y\"] + array[\"ty\"] * ( array[\"z_l9\"] - array[\"z\"] ), squared=False))\n",
"print(format_array(\"y_straight_diff_l9\", lin_reg.coef_))"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['dSlope_fringe' 'ty dSlope_fringe_abs' 'ty tx dSlope_fringe'\n",
" 'ty^3 dSlope_fringe_abs' 'ty tx^2 dSlope_fringe_abs'\n",
" 'ty^3 tx dSlope_fringe' 'ty tx^3 dSlope_fringe'\n",
" 'ty^3 tx^2 dSlope_fringe_abs']\n",
"intercept= 0.0\n",
"coef= {'dSlope_fringe': 3.466479515860849, 'ty dSlope_fringe_abs': -87.54166562301657, 'ty tx dSlope_fringe': 4926.350778945553, 'ty^3 dSlope_fringe_abs': -15008.616026945787, 'ty tx^2 dSlope_fringe_abs': 2347.3262833969093, 'ty^3 tx dSlope_fringe': 66293.67978997533, 'ty tx^3 dSlope_fringe': 48106.815974854835, 'ty^3 tx^2 dSlope_fringe_abs': 356053.9012962991}\n",
"r2 score= 0.9433372187768841\n",
"RMSE = 10.46108881485266\n",
"straight RMSE = 44.49300835626165\n",
"constexpr std::array y_straight_diff_l10{3.466479515860849f, -87.54166562301657f, 4926.350778945553f, -15008.616026945787f, 2347.3262833969093f, 66293.67978997533f, 48106.815974854835f, 356053.9012962991f};\n"
]
}
],
"source": [
"features = [\n",
" \"ty\", \n",
" \"tx\",\n",
" \"dSlope_fringe\",\n",
" \"dSlope_fringe_abs\"\n",
"]\n",
"target_feat = \"y_straight_diff_l10\"\n",
"\n",
"data = np.column_stack([ak.to_numpy(array[feat]) for feat in features])\n",
"target = ak.to_numpy(array[target_feat])\n",
"X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.2, random_state=42)\n",
"\n",
"poly = PolynomialFeatures(degree=6, include_bias=False)\n",
"X_train_model = poly.fit_transform( X_train )\n",
"X_test_model = poly.fit_transform( X_test )\n",
"poly_features = poly.get_feature_names_out(input_features=features)\n",
"#print(poly_features)\n",
"keep = [\n",
" \"ty dSlope_fringe_abs\", \n",
" \"ty tx^2 dSlope_fringe_abs\", \n",
" \"ty^3 dSlope_fringe_abs\", \n",
" \"ty^3 tx^2 dSlope_fringe_abs\",\n",
" \"dSlope_fringe\",\n",
" \"ty tx dSlope_fringe\",\n",
" \"ty tx^3 dSlope_fringe\",\n",
" \"ty^3 tx dSlope_fringe\",\n",
"]\n",
"remove = [i for i, f in enumerate(poly_features) if f not in keep]\n",
"X_train_model = np.delete( X_train_model, remove, axis=1)\n",
"X_test_model = np.delete( X_test_model, remove, axis=1)\n",
"poly_features = np.delete(poly_features, remove )\n",
"print(poly_features)\n",
"\n",
"lin_reg = LinearRegression(fit_intercept=False)\n",
"lin_reg.fit( X_train_model, y_train)\n",
"y_pred_test = lin_reg.predict( X_test_model )\n",
"print(\"intercept=\", lin_reg.intercept_)\n",
"print(\"coef=\", dict(zip(poly_features, lin_reg.coef_)))\n",
"print(\"r2 score=\", lin_reg.score(X_test_model, y_test))\n",
"print(\"RMSE =\", mean_squared_error(y_test, y_pred_test, squared=False))\n",
"print(\"straight RMSE =\", mean_squared_error(array[\"y_l10\"], array[\"y\"] + array[\"ty\"] * ( array[\"z_l10\"] - array[\"z\"] ), squared=False))\n",
"print(format_array(\"y_straight_diff_l10\", lin_reg.coef_))"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"intercept= 0.0\n",
"coef= {'ty tx dSlope_fringe': 0.9394190987843558, 'ty dSlope_fringe^2': -0.46480920487603206, 'ty^3 dSlope_fringe_abs': -4.137993544858158, 'ty tx^2 dSlope_fringe_abs': 2.980803780828937, 'ty tx^3 dSlope_fringe': 12.402177409386482, 'ty^3 tx^2 dSlope_fringe_abs': 38.238656269022954}\n",
"r2 score= 0.9649625489427109\n",
"RMSE = 0.0025237463237113896\n",
"straight RMSE = 0.013705994187091751\n",
"constexpr std::array ty_ref_straight_diff{0.9394190987843558f, -0.46480920487603206f, -4.137993544858158f, 2.980803780828937f, 12.402177409386482f, 38.238656269022954f};\n"
]
}
],
"source": [
"features = [\n",
" \"ty\", \n",
" \"tx\",\n",
" \"dSlope_fringe\",\n",
" \"dSlope_fringe_abs\"\n",
"]\n",
"target_feat = \"ty_ref_straight_diff\"\n",
"\n",
"data = np.column_stack([ak.to_numpy(array[feat]) for feat in features])\n",
"target = ak.to_numpy(array[target_feat])\n",
"X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.2, random_state=42)\n",
"\n",
"poly = PolynomialFeatures(degree=6, include_bias=False)\n",
"X_train_model = poly.fit_transform( X_train )\n",
"X_test_model = poly.fit_transform( X_test )\n",
"poly_features = poly.get_feature_names_out(input_features=features)\n",
"#print(poly_features)\n",
"keep = [\n",
" \"ty dSlope_fringe^2\",\n",
" #\"ty dSlope_fringe_abs\", \n",
" \"ty tx^2 dSlope_fringe_abs\", \n",
" \"ty^3 dSlope_fringe_abs\", \n",
" \"ty^3 tx^2 dSlope_fringe_abs\",\n",
" #\"dSlope_fringe\",\n",
" \"ty tx dSlope_fringe\",\n",
" \"ty tx^3 dSlope_fringe\",\n",
" #\"ty^3 tx dSlope_fringe\",\n",
"]\n",
"remove = [i for i, f in enumerate(poly_features) if f not in keep]\n",
"#remove = [i for i, f in enumerate(poly_features) if (\"dSlope_fringe\" not in f) or (\"dSlope_fringe^\" in f) or (\"dSlope_fringe_abs^\" in f) or (\"dSlope_fringe dSlope_fringe_abs\" in f)]\n",
"X_train_model = np.delete( X_train_model, remove, axis=1)\n",
"X_test_model = np.delete( X_test_model, remove, axis=1)\n",
"poly_features = np.delete(poly_features, remove )\n",
"#print(poly_features)\n",
"\n",
"lin_reg = LinearRegression(fit_intercept=False)\n",
"#lin_reg = Lasso(fit_intercept=False, alpha=0.0000000001)\n",
"lin_reg.fit( X_train_model, y_train)\n",
"y_pred_test = lin_reg.predict( X_test_model )\n",
"print(\"intercept=\", lin_reg.intercept_)\n",
"print(\"coef=\", dict(zip(poly_features, lin_reg.coef_)))\n",
"print(\"r2 score=\", lin_reg.score(X_test_model, y_test))\n",
"print(\"RMSE =\", mean_squared_error(y_test, y_pred_test, squared=False))\n",
"print(\"straight RMSE =\", mean_squared_error(array[\"ty_ref\"], array[\"ty\"], squared=False))\n",
"print(format_array(\"ty_ref_straight_diff\", lin_reg.coef_))"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<AxesSubplot: >"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAiIAAAGsCAYAAADg5swfAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjYuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy89olMNAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAnZUlEQVR4nO3df3RU5YH/8c/MEIZfSWpISYwEEqw1hVDFxC1UUNmtgFvbcs4eLRZZYYUDp2jBnm4r5+sPUCv01O262iqI3WixLZ5Wpbtfd12w/QpaQpdAsEKQBSVI+ZGISBJEQsg83z/YjAyZzI9k7n3uzLxf58w5zJ1n7n3m8cp8eH6NzxhjBAAAYIHfdgUAAED2IogAAABrCCIAAMAagggAALCGIAIAAKwhiAAAAGsIIgAAwBqCCAAAsIYgAgAArCGIAAAAa9ImiGzatElf+9rXVFJSIp/Pp3Xr1jl6vbNnz+ree+9VeXm5Bg4cqFGjRunBBx9UKBRy9LoAAGSTfrYrkKiPP/5YV1xxhebMmaO/+7u/c/x6P/rRj7Ry5Uo999xzGjNmjOrq6jRnzhzl5+dr0aJFjl8fAIBskDZB5MYbb9SNN97Y4+tnzpzRvffeq1/+8pc6ceKEKisr9aMf/UjXX399r65XW1urb3zjG/rqV78qSSorK9Ovf/1r1dXV9ep8AACgu7QZmolnzpw5+uMf/6i1a9fqz3/+s26++WZNmzZNe/fu7dX5Jk6cqN///vf6n//5H0nSW2+9pTfffFN/+7d/m8pqAwCQ1dKmRySWd999V7/+9a/1l7/8RSUlJZKk733ve3r11VdVU1OjRx55JOlz/uAHP1BLS4sqKioUCATU2dmpH/7wh7r11ltTXX0AALJWRgSR7du3yxijz3/+8xHH29vbNXToUElSY2OjysvLY55n4cKF+ulPfypJeuGFF/T888/rV7/6lcaMGaMdO3Zo8eLFKikp0e233+7MBwEAIMtkRBAJhUIKBALatm2bAoFAxGtDhgyRJF1yySXavXt3zPNcdNFF4T//4z/+o+655x7NmDFDkjR27FgdOHBAy5cvJ4gAAJAiGRFExo0bp87OTjU3N2vSpElRy+Tk5KiioiLhc546dUp+f+QUmkAgwPJdAABSKG2CyMmTJ7Vv377w8/3792vHjh0qKCjQ5z//ec2cOVN///d/r3/6p3/SuHHjdOzYMf3hD3/Q2LFjezXB9Gtf+5p++MMfasSIERozZozq6+v1k5/8RP/wD/+Qyo8FAEBW8xljjO1KJOL111/X5MmTux2//fbb9eyzz6qjo0MPP/ywfvGLX+jQoUMaOnSoJkyYoGXLlmns2LFJX6+trU333XefXn75ZTU3N6ukpES33nqr7r//fvXv3z8VHwkAgKyXNkEEAABknozZRwQAAKQfgggAALDG05NVQ6GQDh8+rNzcXPl8PtvVAQAACTDGqK2tTSUlJd1WoF7I00Hk8OHDKi0ttV0NAADQCwcPHtTw4cNjlvF0EMnNzZV07oPk5eVZrg0AAEhEa2urSktLw9/jsXg6iHQNx+Tl5RFEAABIM4lMq2CyKgAAsIYgAgAArCGIAAAAawgiAADAGoIIAACwhiACAACsIYgAAABrCCIAAMAagggAALCGIAIAAKwhiAAAAGs8/VszTpm/pk6Nx05JksoKB2nVrGrLNQIAIDtlZRBpPHZKe5rabFcDAICsx9AMAACwhiACAACsIYgAAABrCCIAAMAagggAALCGIAIAAKwhiAAAAGsIIgAAwBqCCAAAsIYgAgAArCGIAAAAawgiAADAGoIIAACwhiACAACsIYgAAABrCCIAAMAagggAALCGIAIAAKwhiAAAAGsIIgAAwBqCCAAAsIYgAgAArCGIAAAAawgiAADAGoIIAACwhiACAACsIYgAAABrCCIAAMAagggAALCGIAIAAKwhiAAAAGsIIgAAwBqCCAAAsIYgAgAArCGIAAAAa1wLIsuXL5fP59PixYvduiQAAPA4V4LI1q1b9fTTT+uLX/yiG5cDAABpwvEgcvLkSc2cOVOrV6/WRRdd5PTlAABAGnE8iCxcuFBf/epX9ZWvfCVu2fb2drW2tkY8AABA5urn5MnXrl2r7du3a+vWrQmVX758uZYtW+ZklQAAgIc41iNy8OBBLVq0SM8//7wGDBiQ0HuWLFmilpaW8OPgwYNOVQ8AAHiAYz0i27ZtU3Nzs6qqqsLHOjs7tWnTJv30pz9Ve3u7AoFAxHuCwaCCwaBTVQIAAB7jWBD5m7/5G7399tsRx+bMmaOKigr94Ac/6BZCAABA9nEsiOTm5qqysjLi2ODBgzV06NBux20KGWO7CgAAZK2s21l1a+NxHTrxSfj53uaTmr+mTnWNxy3WCgCA7OQzxrtdAq2trcrPz1dLS4vy8vL6fL41Ww7o/nU7deEHDvh9CoWMHppeqdvGj+zzdQAAyGbJfH9nTY/I1sbjUUOIJHWGjIyk+9btpGcEAAAXZU0QeeaN9+T3+2KW8ft9eubN/S7VCAAAZEUQOd3RqQ0NTeoMxR6F6gwZrd91VKc7Ol2qGQAA2S0rgkjb6bOKk0HCQuZceQAA4LysCCK5A/opzqhMmN93rjwAAHBeVgSRATkB3TC6SIE4aSTg92nKmGINyGGzNQAA3JAVQUSS5k4apVCc8ZlQyGjuxHKXagQAALImiFxdVqCHplcqWp9IwO+TT9JD0ytVXVbgdtUAAMhaWTUZ4rbxI1VRnKvZNVt1sv3TCak3jC7S3InlhBAAAFyWVUFEkqrLCnTJZwZqT1ObJOmyYUO08raqOO8CAABOyJqhmZ74fQkupwEAACmX9UEEAADYQxABAADWEEQAAIA1BBEAAGANQQQAAFhDEAEAANYQRAAAgDUEEQAAYA1BBAAAWEMQAQAA1hBEAACANQQRAABgDUEEAABYQxABAADWEEQAAIA1BBEAAGANQQQAAFhDEAEAANYQRAAAgDUEEQAAYA1BBAAAWEMQAQAA1hBEAACANQQRAABgDUEEAABYQxABAADWEEQAAIA1BBEAAGANQQQAAFhDEAEAANYQRAAAgDUEEQAAYA1BBAAAWEMQAQAA1hBEAACANQQRAABgDUEEAABYQxABAADWEEQAAIA1BBEAAGANQQQAAFjTz3YFbCgrHBT1zwAAwF1ZGURWzaq2XQUAACCGZgAAgEUEEQAAYA1BBAAAWEMQAQAA1hBEAACANY4GkeXLl+vqq69Wbm6uhg0bpunTp2vPnj1OXhIAAKQRR4PIxo0btXDhQm3ZskUbNmzQ2bNnNWXKFH388cdOXhYAAKQJnzHGuHWxDz74QMOGDdPGjRt17bXXxi3f2tqq/Px8tbS0KC8vz4UaAgCAvkrm+9vVDc1aWlokSQUFBVFfb29vV3t7e/h5a2urK/UCAAB2uDZZ1Rij7373u5o4caIqKyujllm+fLny8/PDj9LSUreqBwAALHBtaGbhwoV65ZVX9Oabb2r48OFRy0TrESktLWVoBgCANOK5oZm77rpL//Zv/6ZNmzb1GEIkKRgMKhgMulElAADgAY4GEWOM7rrrLr388st6/fXXVV5e7uTlAABAmnE0iCxcuFC/+tWv9Lvf/U65ubk6evSoJCk/P18DBw508tIAACANODpHxOfzRT1eU1Oj2bNnx30/y3cBAEg/npkj4uIWJQAAIA3xWzMAAMAagggAALCGIAIAAKwhiAAAAGsIIgAAwBqCCAAAsMbVX99Fepi/pk6Nx05JksoKB2nVrGrLNQIAZCqCCLppPHZKe5rabFcDAJAFCCJIK/TWAEBmIYggrdBbAwCZhcmqAADAGoIIAACwhiACAACsIYgAAABrCCIAAMAagggAALCGIAIAAKwhiAAAAGsIIgAAwBqCCAAAsIYgAgAArCGIAAAAawgiAADAGoIIAACwhiACAACsIYgAAABr+tmuAAD01fw1dWo8dkqSVFY4SKtmVVuuEYBEEUQApL3GY6e0p6nNdjUA9AJDMwAAwBqCCAAAsIYgAgAArCGIIG2FjLFdBQBAHxFEkDa2Nh7XoROfhJ/vbT6p+WvqVNd43GKtAAB9waoZpIU1Ww7o/nU7dWEfyGu7m7V+V5Meml6p28aPtFI3pC+W/QL2EUTgeVsbj0cNIZLUGTp39L51O1VRnKvqsgJ3K4e0xrJfwD6CCGLywjyMZ954T36/Lxw6ovH7fXrmzf0EEXjins0W9CghFQgiiNDTPIx5k0ZZ+ZI/3dGpDQ1NipFBJJ3rGVm/66hOd3RqQE6gz9f1yl+wXqmHl3ntns0m9CghFQgiCPPiPIy202fjhpAuIXOufCqCiFf+gvVKPbzKi/csgOQQRCDJu/Mwcgf0k9+nhMKI33euPLJDIvfsvet26ulN72pgTr+s6lGiJw3phL+1Icm78zAG5AR0w+givba7OWbdAn6fbhhdlJLeEKSHRO5ZSXr/+CcxX89E9KQhnRBEYG0eRqLmThql9buaYpYJhYzmTix3qUbJ4V+nqZfoPQvA+9jQDL2ah+Gmq8sK9ND0SvmivBbw++ST9ND0Ss9OTOz61+meprZwIEHfJHPPAvA2ekSQFvMwbhs/UhXFuZpds1Un2z8NQjeMLtLcieWeDSFwRjL3LABvo0cE4XkYAX+0PodPBfw+TRlTbG0eRnVZgS75zMDw88uGDdHK26oIIVko0XsWgPcRRCDp3DyMUJx/XnptHobfx5dQNkvkngXgfQQRSEr/eRjIHPPX1GnqP2/S1H/epPlr6nosl8g9W5QXTPi67MgK2MEcEYQxDyO9ZcrqnFhLT6N9xlj37P95eaeaWtujnosdWQFvIIggQtc8jK4vgq55GPC+RPeOSOfAEu0z9uaeZUdWwDsIIoiJeRiZJ9M3u4p3z3p1F2EgWxFEAEToqccknXtSpE/ngHh1F2EgWxFEgDTh1mTKnnpMku1JiRVc3Ag10eaAzH1uq37/TrPiNaWtXYSBbEQQATwq3SdTxgouTg8P9TQH5A8JhJAuvf0153TvOQLcRhABPIjJlL13quNsj3NAktl2pLe7CGf6HBwg1dhHBPCYeJMpjc5NpqxrPO521dLCRx93yN/HHVdt7yIMZBOCCOAxXZMpY+maTJnpYs2L6em1k+1nY05ETei6HttFGMhkDM0AcaRykmi8cyX68/aZOpky1rwYI/X4WlnhIEnS2VBI737wca+vH/D7FAqZiF2EmfMBOIsgAlwglZNEkz1XMj9v39vJlF4Va17Mf+1qivqe8+fMrJo1Uqc7OjX6/lcTakOfpMHBfnF3EWbOB+AshmaA86zZckC3rKyN+HKSzn3h3byyVs9vOeDoubp+3j4RvZ1M6UWJbDIWzYVzZpL5JemplcX8mjPgAQQR4H+lcpJob8+VzBdprMmUiQ4nxSvX0+vJDlfFK5/IvJhYzp8z09tfks7UXYTd2n+GHw1EbxFEgP+VykmifTlXb75IexoCujDoxCvX0+trahsTOn+y9ZGkTmO0oaGpTxNMz58zk66/JJ3orw7Hk0zb94Vb10Hmc6Vf98knn9SPf/xjHTlyRGPGjNFjjz2mSZMmuXFpICGpnCTa13N1fZHeF6VHJdpkykT3HIlX7qYrLtb/fetIt9fXNzRFnaPR9b4vXJwXDhFlhYM08bLPxrzOHZPKI77A9jWfjN1QCTp/zkw6/pJ0KuaiuLX/DPvcIJUcDyIvvPCCFi9erCeffFLXXHONVq1apRtvvFENDQ0aMWKE05cHEpLKSaKpOFeiX6SJ/oCbMUb3/25XzHL//taRqHXsqce9630NR1rDx051nNX6XU0xr/PMG84sO75wzky2/ZK0Wz/mx48GItUcH5r5yU9+ojvuuENz587VF77wBT322GMqLS3VU0895fSlgYSlcpJoqs7V9UXaJdpkykSHgJ74f/v6vMlXIlKxmVhvJLIBWabOAeni1v4z7HODVHM0iJw5c0bbtm3TlClTIo5PmTJFmzdv7la+vb1dra2tEQ/ADamaJJrqc53vwi/SriGgeHMrOkNGza3tfd7kKxGp2EysN7J9A7Jk7oWu4UAvXwfZxdEgcuzYMXV2dqqoqCjieFFRkY4ePdqt/PLly5Wfnx9+lJaWOlk9IEJvV1s4fa6eJDMElM5iBTovTz6V3FtJ0pvhQC9fB9nFlVUzvgv+JWeM6XZMkpYsWaKWlpbw4+DBg25UD2mkrHCQLi/K1eVFueHdNFMllast3Fi5kcwQUDq7YXSRfrtggn67YIKGBPt1e+03CyZ4ZmKkrZUkbu0/k6373MBZjt4lhYWFCgQC3Xo/mpubu/WSSFIwGFQwGHSySkhzTm+vncrVFk6v3OgaAnptd3PMrvKA36ehQ/rrw5NnHB82GRLsp086OlN2nQsnmHp58qnNlSTJ3As3jC7q9W68bl0H2cXRHpH+/furqqpKGzZsiDi+YcMGffnLX3by0kCvJTJJ1Ma5okl0COiuyZ+LWy4VLhqc06frBPy+iF6PWBNMUzH5NFU9bF74xWQ3hgPdvA6yh+NDM9/97nf1zDPP6F//9V+1e/du3X333Xr//fe1YMECpy8NpEQqV1ukeuVGokNAsyaUxS339StKor7eU5W73leU92kv5oB+gR6vk0iXfihkVDA4J37BFFk1q1r/dfe1+q+7r02ot62nOR9eWEni1kZu6bphHLzL8SDyzW9+U4899pgefPBBXXnlldq0aZP+4z/+QyNHemNMF0h3t40fqd8kMH8iXrnHbx0X9fWpY4r18DfGRH3fg98Yo4/bP10Zsbf5pN7Y+4EeilJ+yphizZtUHvcLbGBO6kaM+9rjkcicDy+tJEn0XkiX6yA7uDKT6Nvf/ra+/e1vu3EpICslunlXvHKxXl+z5f2I49d8rjDmnIhhecHw3JjzzzN1THHMeTNrahP/YcF4+jKnKNE5H177xWS3NnLLtg3j4Bx+awbIQIkOAcUr19Prp892xp0T0dTaHvU8Ts+bSYVk5nx4fSWJWxu5ZfqGcXAOQQRA0lK5g6oXv8CSmfPh1AZ2QLYgiABImq0dVN3QmzkfrCQBeo/dZgAPO3+CZao3cPOqWJ/ZjfbozZyPZH8xGcCnCCLoJhu//LzK6Q3cvCjWZ3ajPbrmfCQSRs6f85HKDezc2hoe8AKCCLrJxi8/JCfVO6h6SV92D+3tSpKelgnPmzSKXhRkPOaIAEjI+XtyjC7Jc2WnVltSNecjkYm4a7Yc0C0rayN6UaRzy4RvXlmr57ekbjkz4EX0iACI0NPQ3IU9Zc9vORBzTsSwvGDEEt504tacj3jLhKVzy4QrinPpGUHGIogAWSbeHKBEh+bizYlY/cZ7ajx2qsfreJ3TP1oofbpMONYQUNcyYYIIMhVBBMgyqZwDFGtORKwvznSZEO3k7qFdy4TjjXCdv0yYPUgSN39NXUQQZu6bdxFEAKRMopuTpeuXQio3X/Pa1vCZpvHYqXCAhLcRRACHufWv/3TpZcA5vV0mDGQa7mzAYW796z9dexmyVV+WCQOZhOW7AGAJW8MDBBEgqvP3zGCYA07pWiYcbeZJwO+TT2JreGQ8hmaAKBjmgFvcWCYMeBlBBECfMEm275xcJgx4HUEEQJ/Qe5R6qVwmDHgdQQQAMgy9VEgnBBEASCEvhAB6qZBOCCIAkEKEACA5BBEAiMELPRxAJiOIAEAM9HAAziKIAAB6hd4ipAJBBADQK/QWIRXY4h0AAFhDEAEAANYQRAAAgDUEEQAAYA1BBAAAWEMQAQAA1rB8FwA8gD05kK0IIgDgAezJgWzF0AwAALCGIAIAAKwhiAAAAGsIIgAAwBqCCAAAsIYgAgAArCGIAAAAawgiAADAGoIIACCjhYyxXQXEQBABAGSUrY3HdejEJ+Hne5tPav6aOtU1HrdYK/SEIAIAyBhrthzQLStrdbL9bMTx13Y36+aVtXp+ywFLNUNPCCIAgIywtfG47l+3U9EGYjpDRkbSfet20jPiMQQRAEBGeOaN9+T3+2KW8ft9eubN/S7VCIkgiAAA0t7pjk5taGhSZyj2xNTOkNH6XUd1uqPTpZohHoIIACDttZ0+qzgZJCxkzpWHNxBEAABpL3dAP8UZlQnz+86VhzfwXwIAkPYG5AR0w+givba7OebwTMDv0w2jizQgJ+Bi7bxp/po6NR47JUkqKxykVbOqrdSDIAIAyAhzJ43S+l1NMcuEQkZzJ5a7VCNvazx2Snua2mxXg6EZAEBmuLqsQA9Nr1S0EZqA3yefpIemV6q6rMDtqiEGekQAABnjtvEjVVGcq9k1WyM2NbthdJHmTiwnhHgQQQQAkFGqywp0yWcGhocdLhs2RCtvq7JcK/SEoRkAQEbz+xJcTgMrCCIAAMAagggAALCGIAIAAKwhiAAAAGscCyKNjY264447VF5eroEDB+rSSy/VAw88oDNnzjh1SQAAkGYcW777zjvvKBQKadWqVfrc5z6nnTt3at68efr444/16KOPOnVZAACQRhwLItOmTdO0adPCz0eNGqU9e/boqaeeIogAAABJLm9o1tLSooKCnne1a29vV3t7e/h5a2urG9UCAACWuDZZ9d1339UTTzyhBQsW9Fhm+fLlys/PDz9KS0vdqh4AALAg6SCydOlS+Xy+mI+6urqI9xw+fFjTpk3TzTffrLlz5/Z47iVLlqilpSX8OHjwYPKfCAAApI2kh2buvPNOzZgxI2aZsrKy8J8PHz6syZMna8KECXr66adjvi8YDCoYDCZbJQAAkKaSDiKFhYUqLCxMqOyhQ4c0efJkVVVVqaamRn4/25YAAIBPOTZZ9fDhw7r++us1YsQIPfroo/rggw/CrxUXFzt1WQAAkEYcCyLr16/Xvn37tG/fPg0fPjziNWOMU5cFAABpxLGxktmzZ8sYE/UBAAAg8VszAADAIoIIAACwhiACAACsIYgAAABrCCIAAMAagggAALCGIAIAAKwhiAAAAGsIIgAAwBqCCAAAsIYgAgAArCGIAAAAawgiAADAmn62KwAAQKqVFQ6K+md4D0EEAJBxVs2qtl0FJIihGQAAYA1BBAAAWEMQAQAA1hBEAACANQQRAABgDUEEAABYQxABAADWEEQAAIA1BBEAAGANQQQAAFhDEAEAANYQRAAAgDUEEQAAslzIGGvXJogAAJBltjYe16ETn4Sf720+qflr6lTXeNz1uhBEAADIImu2HNAtK2t1sv1sxPHXdjfr5pW1en7LAVfrQxABACBLbG08rvvX7VS0gZjOkJGRdN+6na72jBBEAADIEs+88Z78fl/MMn6/T8+8ud+lGhFEAADICqc7OrWhoUmdodgTUztDRut3HdXpjk5X6kUQAQAgC7SdPqs4GSQsZM6VdwNBBACALJA7oJ/ijMqE+X3nyruBIAIAQBYYkBPQDaOLFIiTRgJ+n6aMKdaAnIAr9SKIAACQJeZOGqVQnPGZUMho7sRyl2pEEAEAIGtcXVagh6ZXKlqfSMDvk0/SQ9MrVV1W4Fqd3BkAAgAAnnDb+JGqKM7V7JqtEZua3TC6SHMnlrsaQiSCCAAAWae6rECXfGag9jS1SZIuGzZEK2+rslIXhmYAAMhyfl+Cy2mcuLa1KwMAgKxHEAEAANYQRAAAgDUEEQAAYA1BBAAAWEMQAQAA1hBEAACANQQRAABgDUEEAABYQxABAADWEEQAAIA1BBEAAGANQQQAAFhDEAEAANYQRAAAgDUEEQAAYA1BBAAAWEMQAQAA1hBEAACANa4Ekfb2dl155ZXy+XzasWOHG5cEAABpwJUg8v3vf18lJSVuXAoAAKQRx4PIf/7nf2r9+vV69NFHnb4UAABIM/2cPHlTU5PmzZundevWadCgQXHLt7e3q729Pfy8tbXVyeoBAADLHOsRMcZo9uzZWrBggaqrqxN6z/Lly5Wfnx9+lJaWOlU9AADgAUkHkaVLl8rn88V81NXV6YknnlBra6uWLFmS8LmXLFmilpaW8OPgwYPJVg8AAKSRpIdm7rzzTs2YMSNmmbKyMj388MPasmWLgsFgxGvV1dWaOXOmnnvuuW7vCwaD3coDAIDMlXQQKSwsVGFhYdxyjz/+uB5++OHw88OHD2vq1Kl64YUX9KUvfSnZywIAgAzk2GTVESNGRDwfMmSIJOnSSy/V8OHDnbosAABII+ysCgAArHF0+e75ysrKZIxx63IAACAN0CMCAACsIYgAAABrCCIAAMAagggAALCGIAIAAKwhiAAAAGsIIgAAwBqCCAAAsIYgAgAArHFtZ1UAAOAdZYWDov7ZbQQRAACy0KpZ1barIImhGQAAYBFBBAAAWEMQAQAA1hBEAACANQQRAABgDUEEAABYQxABAADWEEQAAIA1BBEAAGANQQQAAFhDEAEAANYQRAAAgDUEEQAAYI2nf33XGCNJam1ttVwTAACQqK7v7a7v8Vg8HUTa2tokSaWlpZZrAgAAktXW1qb8/PyYZXwmkbhiSSgU0uHDh5Wbmyufz9enc7W2tqq0tFQHDx5UXl5eimqYuWiv5NFmyaPNkkN7JY82S14q2swYo7a2NpWUlMjvjz0LxNM9In6/X8OHD0/pOfPy8rgZk0B7JY82Sx5tlhzaK3m0WfL62mbxekK6MFkVAABYQxABAADWZE0QCQaDeuCBBxQMBm1XJS3QXsmjzZJHmyWH9koebZY8t9vM05NVAQBAZsuaHhEAAOA9BBEAAGANQQQAAFhDEAEAANZkTBD56KOPNGvWLOXn5ys/P1+zZs3SiRMnYr7npZde0tSpU1VYWCifz6cdO3Z0K3P99dfL5/NFPGbMmOHMh3CRU+3V3t6uu+66S4WFhRo8eLC+/vWv6y9/+YszH8JlvWkzY4yWLl2qkpISDRw4UNdff7127doVUSaT7rEnn3xS5eXlGjBggKqqqvTGG2/ELL9x40ZVVVVpwIABGjVqlFauXNmtzIsvvqjRo0crGAxq9OjRevnll52qvhWpbrNnn3222/3k8/l0+vRpJz+Ga5JpryNHjuhb3/qWLr/8cvn9fi1evDhqOe6xTyXSZim/x0yGmDZtmqmsrDSbN282mzdvNpWVleamm26K+Z5f/OIXZtmyZWb16tVGkqmvr+9W5rrrrjPz5s0zR44cCT9OnDjh0Kdwj1PttWDBAnPJJZeYDRs2mO3bt5vJkyebK664wpw9e9ahT+Ke3rTZihUrTG5urnnxxRfN22+/bb75zW+aiy++2LS2tobLZMo9tnbtWpOTk2NWr15tGhoazKJFi8zgwYPNgQMHopZ/7733zKBBg8yiRYtMQ0ODWb16tcnJyTG//e1vw2U2b95sAoGAeeSRR8zu3bvNI488Yvr162e2bNni1sdylBNtVlNTY/Ly8iLupyNHjrj1kRyVbHvt37/ffOc73zHPPfecufLKK82iRYu6leEei5RIm6X6HsuIINLQ0GAkRdw4tbW1RpJ555134r5///79MYNItP8Q6cyp9jpx4oTJyckxa9euDR87dOiQ8fv95tVXX01Z/W3oTZuFQiFTXFxsVqxYET52+vRpk5+fb1auXBk+lin32F/91V+ZBQsWRByrqKgw99xzT9Ty3//+901FRUXEsfnz55vx48eHn99yyy1m2rRpEWWmTp1qZsyYkaJa2+VEm9XU1Jj8/PyU19ULkm2v8/X0/xn3WM96arNU32MZMTRTW1ur/Px8felLXwofGz9+vPLz87V58+Y+n/+Xv/ylCgsLNWbMGH3ve98L/ypwunKqvbZt26aOjg5NmTIlfKykpESVlZUp+e9gU2/abP/+/Tp69GhEewSDQV133XXd3pPu99iZM2e0bdu2iM8qSVOmTOmxfWpra7uVnzp1qurq6tTR0RGzTLrfT5JzbSZJJ0+e1MiRIzV8+HDddNNNqq+vT/0HcFlv2isR3GO9k8p7zNM/epeoo0ePatiwYd2ODxs2TEePHu3TuWfOnKny8nIVFxdr586dWrJkid566y1t2LChT+e1yan2Onr0qPr376+LLroo4nhRUVGf/zvY1ps26zpeVFQUcbyoqEgHDhwIP8+Ee+zYsWPq7OyM+lljtU+08mfPntWxY8d08cUX91gm3e8nybk2q6io0LPPPquxY8eqtbVV//Iv/6JrrrlGb731li677DLHPo/TetNeieAeS16q7zFPB5GlS5dq2bJlMcts3bpVkuTz+bq9ZoyJejwZ8+bNC/+5srJSl112maqrq7V9+3ZdddVVfTp3qnmhvaJx6ryp4EabXfj6he9Jp3ssnnifNZHyFx5P9pzpJtVtNn78eI0fPz78+jXXXKOrrrpKTzzxhB5//PFUVdsaJ+4H7rHkpPoe83QQufPOO+OuHigrK9Of//xnNTU1dXvtgw8+6JYE++qqq65STk6O9u7d67kvCdvtVVxcrDNnzuijjz6K6BVpbm7Wl7/85V6f10lOtllxcbGkc//iuvjii8PHm5ubY7azl++xnhQWFioQCHT7V1asz1pcXBy1fL9+/TR06NCYZVL9/7UNTrXZhfx+v66++mrt3bs3NRW3pDftlQjusb7r6z3m6TkihYWFqqioiPkYMGCAJkyYoJaWFv33f/93+L1/+tOf1NLSkvIvwF27dqmjoyPii8UrbLdXVVWVcnJyIoYUjhw5op07d3o2iDjZZl3DLee3x5kzZ7Rx48aY7eHle6wn/fv3V1VVVbfhpA0bNvT4WSdMmNCt/Pr161VdXa2cnJyYZbx6PyXDqTa7kDFGO3bsSKv7KZretFciuMf6rs/3WMqmvVo2bdo088UvftHU1taa2tpaM3bs2G5LKy+//HLz0ksvhZ9/+OGHpr6+3rzyyitGklm7dq2pr68PL0Pat2+fWbZsmdm6davZv3+/eeWVV0xFRYUZN25c2i9HdaK9jDm3fHf48OHmtddeM9u3bzd//dd/nVHLd5NtsxUrVpj8/Hzz0ksvmbffftvceuutEct3M+ke61om+POf/9w0NDSYxYsXm8GDB5vGxkZjjDH33HOPmTVrVrh811LUu+++2zQ0NJif//zn3Zai/vGPfzSBQMCsWLHC7N6926xYsSIjl1amss2WLl1qXn31VfPuu++a+vp6M2fOHNOvXz/zpz/9yfXPl2rJtpcxxtTX15v6+npTVVVlvvWtb5n6+nqza9eu8OvcY8m3WarvsYwJIh9++KGZOXOmyc3NNbm5uWbmzJnmo48+iigjydTU1ISf19TUGEndHg888IAxxpj333/fXHvttaagoMD079/fXHrppeY73/mO+fDDD937YA5xor2MMeaTTz4xd955pykoKDADBw40N910k3n//ffd+VAO602bhUIh88ADD5ji4mITDAbNtddea95+++3w65l2j/3sZz8zI0eONP379zdXXXWV2bhxY/i122+/3Vx33XUR5V9//XUzbtw4079/f1NWVmaeeuqpbuf8zW9+Yy6//HKTk5NjKioqzIsvvuj0x3BVqtts8eLFZsSIEaZ///7ms5/9rJkyZYrZvHmzGx/FFcm2V7S/s0aOHBlRhnvsuojy8dos1feY738vCgAA4DpPzxEBAACZjSACAACsIYgAAABrCCIAAMAagggAALCGIAIAAKwhiAAAAGsIIgAAwBqCCAAAsIYgAgAArCGIAAAAawgiAADAmv8P7BQ8YeQ7PJYAAAAASUVORK5CYII=",
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import seaborn as sns\n",
"import numpy as np\n",
"bins = 25#np.linspace( -1.5, 1.5, 50 )\n",
"sns.regplot(x=ak.to_numpy(array[\"tx\"]), y=ak.to_numpy(array[\"CY_ex\"]), x_bins=bins, fit_reg=None, x_estimator=np.mean)"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['ty dSlope_fringe_abs' 'ty tx dSlope_fringe' 'ty dSlope_fringe^2'\n",
" 'ty^3 dSlope_fringe_abs' 'ty tx^2 dSlope_fringe_abs']\n",
"intercept= 0.0\n",
"coef= {'ty dSlope_fringe_abs': -1.210948273291364e-05, 'ty tx dSlope_fringe': 8.351598715575842e-05, 'ty dSlope_fringe^2': -3.9073446027618556e-05, 'ty^3 dSlope_fringe_abs': 0.0002466815971481776, 'ty tx^2 dSlope_fringe_abs': 0.0001861876635962951}\n",
"r2 score= 0.9704091284593364\n",
"RMSE = 1.4317438911860726e-07\n",
"constexpr std::array CY_ex{-1.210948273291364e-05f, 8.351598715575842e-05f, -3.9073446027618556e-05f, 0.0002466815971481776f, 0.0001861876635962951f};\n"
]
}
],
"source": [
"features = [\n",
" \"ty\", \n",
" \"tx\",\n",
" \"dSlope_fringe\",\n",
" \"dSlope_fringe_abs\"\n",
"]\n",
"target_feat = \"CY_ex\"\n",
"\n",
"data = np.column_stack([ak.to_numpy(array[feat]) for feat in features])\n",
"target = ak.to_numpy(array[target_feat])\n",
"X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.2, random_state=42)\n",
"\n",
"poly = PolynomialFeatures(degree=4, include_bias=False)\n",
"X_train_model = poly.fit_transform( X_train )\n",
"X_test_model = poly.fit_transform( X_test )\n",
"poly_features = poly.get_feature_names_out(input_features=features)\n",
"#print(poly_features)\n",
"keep = [\n",
" \"ty dSlope_fringe^2\",\n",
" \"ty dSlope_fringe_abs\", \n",
" \"ty tx^2 dSlope_fringe_abs\", \n",
" \"ty^3 dSlope_fringe_abs\", \n",
" #\"ty^3 tx^2 dSlope_fringe_abs\",\n",
" #\"dSlope_fringe\",\n",
" \"ty tx dSlope_fringe\",\n",
" #\"ty tx^3 dSlope_fringe\",\n",
" #\"ty^3 tx dSlope_fringe\",\n",
"]\n",
"remove = [i for i, f in enumerate(poly_features) if f not in keep]\n",
"#remove = [i for i, f in enumerate(poly_features) if (\"dSlope_fringe\" not in f)]# or (\"dSlope_fringe^\" in f) or (\"dSlope_fringe_abs^\" in f) or (\"dSlope_fringe dSlope_fringe_abs\" in f)]\n",
"X_train_model = np.delete( X_train_model, remove, axis=1)\n",
"X_test_model = np.delete( X_test_model, remove, axis=1)\n",
"poly_features = np.delete(poly_features, remove )\n",
"print(poly_features)\n",
"\n",
"lin_reg = LinearRegression(fit_intercept=False)\n",
"#lin_reg = Lasso(fit_intercept=False, alpha=0.00000000001)\n",
"lin_reg.fit( X_train_model, y_train)\n",
"y_pred_test = lin_reg.predict( X_test_model )\n",
"print(\"intercept=\", lin_reg.intercept_)\n",
"print(\"coef=\", dict(zip(poly_features, lin_reg.coef_)))\n",
"print(\"r2 score=\", lin_reg.score(X_test_model, y_test))\n",
"print(\"RMSE =\", mean_squared_error(y_test, y_pred_test, squared=False))\n",
"#print(\"straight RMSE =\", mean_squared_error(array[\"ty_ref\"], array[\"ty\"], squared=False))\n",
"print(format_array(\"CY_ex\", lin_reg.coef_))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3.10.6 (conda)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.6"
},
"orig_nbformat": 4,
"vscode": {
"interpreter": {
"hash": "a2eff8b4da8b8eebf5ee2e5f811f31a557e0a202b4d2f04f849b065340a6eda6"
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}