718 lines
46 KiB
Plaintext
718 lines
46 KiB
Plaintext
|
{
|
||
|
"cells": [
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 3,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"import uproot\n",
|
||
|
"import awkward as ak\n",
|
||
|
"import numpy as np\n",
|
||
|
"#input_tree_md = uproot.open({\"/work/guenther/reco_tuner/data/param_data_MD_selected_8520.root\": \"Selected\"})\n",
|
||
|
"#input_tree_mu = uproot.open({\"/work/guenther/reco_tuner/data/param_data_MU_selected_8520.root\": \"Selected\"})\n",
|
||
|
"input_tree = uproot.open({\"/work/guenther/reco_tuner/data/param_data_selected.root\": \"Selected\"})\n",
|
||
|
"# this is an event list of dictionaries containing awkward arrays\n",
|
||
|
"#array_md = input_tree_md.arrays()\n",
|
||
|
"#array_mu = input_tree_mu.arrays()\n",
|
||
|
"#array = ak.concatenate([array_md, array_mu])\n",
|
||
|
"array = input_tree.arrays()\n",
|
||
|
"array[\"dSlope_fringe\"] = array[\"tx_ref\"] - array[\"tx\"]\n",
|
||
|
"array[\"dSlope_fringe_abs\"] = abs(array[\"dSlope_fringe\"])\n",
|
||
|
"array[\"z_mag_x_fringe\"] = (array[\"x\"] - array[\"x_ref\"] - array[\"tx\"] * array[\"z\"] + array[\"tx_ref\"] * array[\"z_ref\"] ) / array[\"dSlope_fringe\"]\n",
|
||
|
"array[\"yStraightRef\"] = array[\"y\"] + array[\"ty\"] * ( array[\"z_ref\"] - array[\"z\"])\n",
|
||
|
"array[\"AY_straight_diff\"] = array[\"AY_ex\"] - array[\"yStraightRef\"]\n",
|
||
|
"array[\"y_ref_straight_diff\"] = array[\"y_ref\"] - array[\"yStraightRef\"]\n",
|
||
|
"array[\"y_straight_diff_l1\"] = array[\"y_l1\"] - array[\"y\"] - array[\"ty\"] * ( array[\"z_l1\"] - array[\"z\"])\n",
|
||
|
"array[\"y_straight_diff_l2\"] = array[\"y_l2\"] - array[\"y\"] - array[\"ty\"] * ( array[\"z_l2\"] - array[\"z\"])\n",
|
||
|
"array[\"y_straight_diff_l5\"] = array[\"y_l5\"] - array[\"y\"] - array[\"ty\"] * ( array[\"z_l5\"] - array[\"z\"])\n",
|
||
|
"array[\"y_straight_diff_l6\"] = array[\"y_l6\"] - array[\"y\"] - array[\"ty\"] * ( array[\"z_l6\"] - array[\"z\"])\n",
|
||
|
"array[\"y_straight_diff_l9\"] = array[\"y_l9\"] - array[\"y\"] - array[\"ty\"] * ( array[\"z_l9\"] - array[\"z\"])\n",
|
||
|
"array[\"y_straight_diff_l10\"] = array[\"y_l10\"] - array[\"y\"] - array[\"ty\"] * ( array[\"z_l10\"] - array[\"z\"])\n",
|
||
|
"array[\"BY_straight_diff\"] = array[\"BY_ex\"] - array[\"ty\"]\n",
|
||
|
"array[\"ty_ref_straight_diff\"] = array[\"ty_ref\"] - array[\"ty\"]\n",
|
||
|
"def format_array(name, coef):\n",
|
||
|
" coef = [str(c)+\"f\" for c in coef if c != 0.0]\n",
|
||
|
" code = f\"constexpr std::array {name}\"\n",
|
||
|
" code += \"{\" + \", \".join(list(coef)) +\"};\"\n",
|
||
|
" return code"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 4,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"['dSlope_fringe' 'ty dSlope_fringe_abs' 'ty tx dSlope_fringe'\n",
|
||
|
" 'ty^3 dSlope_fringe_abs' 'ty tx^2 dSlope_fringe_abs'\n",
|
||
|
" 'ty^3 tx dSlope_fringe' 'ty tx^3 dSlope_fringe'\n",
|
||
|
" 'ty^3 tx^2 dSlope_fringe_abs']\n",
|
||
|
"intercept= 0.0\n",
|
||
|
"coef= {'dSlope_fringe': 2.54927052921483, 'ty dSlope_fringe_abs': 65.18649309504633, 'ty tx dSlope_fringe': 4174.444641065072, 'ty^3 dSlope_fringe_abs': -10543.061353132687, 'ty tx^2 dSlope_fringe_abs': 812.6329282763543, 'ty^3 tx dSlope_fringe': 52906.81696296328, 'ty tx^3 dSlope_fringe': 37770.21841979088, 'ty^3 tx^2 dSlope_fringe_abs': 273401.3548544383}\n",
|
||
|
"r2 score= 0.9424448007543189\n",
|
||
|
"RMSE = 8.058017702810291\n",
|
||
|
"straight RMSE = 33.93970007402008\n",
|
||
|
"constexpr std::array y_ref_straight_diff{2.54927052921483f, 65.18649309504633f, 4174.444641065072f, -10543.061353132687f, 812.6329282763543f, 52906.81696296328f, 37770.21841979088f, 273401.3548544383f};\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"from sklearn.preprocessing import PolynomialFeatures\n",
|
||
|
"from sklearn.linear_model import LinearRegression, Lasso, Ridge\n",
|
||
|
"from sklearn.model_selection import train_test_split\n",
|
||
|
"from sklearn.pipeline import Pipeline\n",
|
||
|
"from sklearn.metrics import mean_squared_error\n",
|
||
|
"\n",
|
||
|
"features = [\n",
|
||
|
" \"ty\", \n",
|
||
|
" \"tx\",\n",
|
||
|
" \"dSlope_fringe\",\n",
|
||
|
" \"dSlope_fringe_abs\"\n",
|
||
|
"]\n",
|
||
|
"target_feat = \"y_ref_straight_diff\"\n",
|
||
|
"\n",
|
||
|
"data = np.column_stack([ak.to_numpy(array[feat]) for feat in features])\n",
|
||
|
"target = ak.to_numpy(array[target_feat])\n",
|
||
|
"X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.2, random_state=42)\n",
|
||
|
"\n",
|
||
|
"poly = PolynomialFeatures(degree=6, include_bias=False)\n",
|
||
|
"X_train_model = poly.fit_transform( X_train )\n",
|
||
|
"X_test_model = poly.fit_transform( X_test )\n",
|
||
|
"poly_features = poly.get_feature_names_out(input_features=features)\n",
|
||
|
"#print(poly_features)\n",
|
||
|
"keep = [\n",
|
||
|
" \"ty dSlope_fringe_abs\", \n",
|
||
|
" \"ty tx^2 dSlope_fringe_abs\", \n",
|
||
|
" \"ty^3 dSlope_fringe_abs\", \n",
|
||
|
" \"ty^3 tx^2 dSlope_fringe_abs\",\n",
|
||
|
" \"dSlope_fringe\",\n",
|
||
|
" \"ty tx dSlope_fringe\",\n",
|
||
|
" \"ty tx^3 dSlope_fringe\",\n",
|
||
|
" \"ty^3 tx dSlope_fringe\",\n",
|
||
|
"]\n",
|
||
|
"remove = [i for i, f in enumerate(poly_features) if f not in keep]\n",
|
||
|
"X_train_model = np.delete( X_train_model, remove, axis=1)\n",
|
||
|
"X_test_model = np.delete( X_test_model, remove, axis=1)\n",
|
||
|
"poly_features = np.delete(poly_features, remove )\n",
|
||
|
"print(poly_features)\n",
|
||
|
"\n",
|
||
|
"lin_reg = LinearRegression(fit_intercept=False)\n",
|
||
|
"lin_reg.fit( X_train_model, y_train)\n",
|
||
|
"y_pred_test = lin_reg.predict( X_test_model )\n",
|
||
|
"print(\"intercept=\", lin_reg.intercept_)\n",
|
||
|
"print(\"coef=\", dict(zip(poly_features, lin_reg.coef_)))\n",
|
||
|
"print(\"r2 score=\", lin_reg.score(X_test_model, y_test))\n",
|
||
|
"print(\"RMSE =\", mean_squared_error(y_test, y_pred_test, squared=False))\n",
|
||
|
"print(\"straight RMSE =\", mean_squared_error(array[\"y_ref\"], array[\"y\"] + array[\"ty\"] * ( array[\"z_ref\"] - array[\"z\"] ), squared=False))\n",
|
||
|
"print(format_array(\"y_ref_straight_diff\", lin_reg.coef_))"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 5,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"['dSlope_fringe' 'ty dSlope_fringe_abs' 'ty tx dSlope_fringe'\n",
|
||
|
" 'ty^3 dSlope_fringe_abs' 'ty tx^2 dSlope_fringe_abs'\n",
|
||
|
" 'ty^3 tx dSlope_fringe' 'ty tx^3 dSlope_fringe'\n",
|
||
|
" 'ty^3 tx^2 dSlope_fringe_abs']\n",
|
||
|
"intercept= 0.0\n",
|
||
|
"coef= {'dSlope_fringe': 1.926395569816899, 'ty dSlope_fringe_abs': 155.7194258002827, 'ty tx dSlope_fringe': 3711.2012601369147, 'ty^3 dSlope_fringe_abs': -6986.454915191362, 'ty tx^2 dSlope_fringe_abs': -102.59383584048052, 'ty^3 tx dSlope_fringe': 42360.930984198254, 'ty tx^3 dSlope_fringe': 29857.060893721067, 'ty^3 tx^2 dSlope_fringe_abs': 209095.7911514973}\n",
|
||
|
"r2 score= 0.9391433516368133\n",
|
||
|
"RMSE = 6.578135946782935\n",
|
||
|
"straight RMSE = 26.874058521627408\n",
|
||
|
"constexpr std::array y_straight_diff_l1{1.926395569816899f, 155.7194258002827f, 3711.2012601369147f, -6986.454915191362f, -102.59383584048052f, 42360.930984198254f, 29857.060893721067f, 209095.7911514973f};\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"features = [\n",
|
||
|
" \"ty\", \n",
|
||
|
" \"tx\",\n",
|
||
|
" \"dSlope_fringe\",\n",
|
||
|
" \"dSlope_fringe_abs\"\n",
|
||
|
"]\n",
|
||
|
"target_feat = \"y_straight_diff_l1\"\n",
|
||
|
"\n",
|
||
|
"data = np.column_stack([ak.to_numpy(array[feat]) for feat in features])\n",
|
||
|
"target = ak.to_numpy(array[target_feat])\n",
|
||
|
"X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.2, random_state=42)\n",
|
||
|
"\n",
|
||
|
"poly = PolynomialFeatures(degree=6, include_bias=False)\n",
|
||
|
"X_train_model = poly.fit_transform( X_train )\n",
|
||
|
"X_test_model = poly.fit_transform( X_test )\n",
|
||
|
"poly_features = poly.get_feature_names_out(input_features=features)\n",
|
||
|
"#print(poly_features)\n",
|
||
|
"keep = [\n",
|
||
|
" \"ty dSlope_fringe_abs\", \n",
|
||
|
" \"ty tx^2 dSlope_fringe_abs\", \n",
|
||
|
" \"ty^3 dSlope_fringe_abs\", \n",
|
||
|
" \"ty^3 tx^2 dSlope_fringe_abs\",\n",
|
||
|
" \"dSlope_fringe\",\n",
|
||
|
" \"ty tx dSlope_fringe\",\n",
|
||
|
" \"ty tx^3 dSlope_fringe\",\n",
|
||
|
" \"ty^3 tx dSlope_fringe\",\n",
|
||
|
"]\n",
|
||
|
"remove = [i for i, f in enumerate(poly_features) if f not in keep]\n",
|
||
|
"X_train_model = np.delete( X_train_model, remove, axis=1)\n",
|
||
|
"X_test_model = np.delete( X_test_model, remove, axis=1)\n",
|
||
|
"poly_features = np.delete(poly_features, remove )\n",
|
||
|
"print(poly_features)\n",
|
||
|
"\n",
|
||
|
"lin_reg = LinearRegression(fit_intercept=False)\n",
|
||
|
"lin_reg.fit( X_train_model, y_train)\n",
|
||
|
"y_pred_test = lin_reg.predict( X_test_model )\n",
|
||
|
"print(\"intercept=\", lin_reg.intercept_)\n",
|
||
|
"print(\"coef=\", dict(zip(poly_features, lin_reg.coef_)))\n",
|
||
|
"print(\"r2 score=\", lin_reg.score(X_test_model, y_test))\n",
|
||
|
"print(\"RMSE =\", mean_squared_error(y_test, y_pred_test, squared=False))\n",
|
||
|
"print(\"straight RMSE =\", mean_squared_error(array[\"y_l1\"], array[\"y\"] + array[\"ty\"] * ( array[\"z_l1\"] - array[\"z\"] ), squared=False))\n",
|
||
|
"print(format_array(\"y_straight_diff_l1\", lin_reg.coef_))"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 6,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"['dSlope_fringe' 'ty dSlope_fringe_abs' 'ty tx dSlope_fringe'\n",
|
||
|
" 'ty^3 dSlope_fringe_abs' 'ty tx^2 dSlope_fringe_abs'\n",
|
||
|
" 'ty^3 tx dSlope_fringe' 'ty tx^3 dSlope_fringe'\n",
|
||
|
" 'ty^3 tx^2 dSlope_fringe_abs']\n",
|
||
|
"intercept= 0.0\n",
|
||
|
"coef= {'dSlope_fringe': 1.992023095115273, 'ty dSlope_fringe_abs': 147.53158307285184, 'ty tx dSlope_fringe': 3758.3638141105685, 'ty^3 dSlope_fringe_abs': -7387.838564546752, 'ty tx^2 dSlope_fringe_abs': -20.05844697530397, 'ty^3 tx dSlope_fringe': 43544.55794543203, 'ty tx^3 dSlope_fringe': 30729.099594836473, 'ty^3 tx^2 dSlope_fringe_abs': 216301.09830837924}\n",
|
||
|
"r2 score= 0.939662275160689\n",
|
||
|
"RMSE = 6.726003215973192\n",
|
||
|
"straight RMSE = 27.60567927172973\n",
|
||
|
"constexpr std::array y_straight_diff_l2{1.992023095115273f, 147.53158307285184f, 3758.3638141105685f, -7387.838564546752f, -20.05844697530397f, 43544.55794543203f, 30729.099594836473f, 216301.09830837924f};\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"features = [\n",
|
||
|
" \"ty\", \n",
|
||
|
" \"tx\",\n",
|
||
|
" \"dSlope_fringe\",\n",
|
||
|
" \"dSlope_fringe_abs\"\n",
|
||
|
"]\n",
|
||
|
"target_feat = \"y_straight_diff_l2\"\n",
|
||
|
"\n",
|
||
|
"data = np.column_stack([ak.to_numpy(array[feat]) for feat in features])\n",
|
||
|
"target = ak.to_numpy(array[target_feat])\n",
|
||
|
"X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.2, random_state=42)\n",
|
||
|
"\n",
|
||
|
"poly = PolynomialFeatures(degree=6, include_bias=False)\n",
|
||
|
"X_train_model = poly.fit_transform( X_train )\n",
|
||
|
"X_test_model = poly.fit_transform( X_test )\n",
|
||
|
"poly_features = poly.get_feature_names_out(input_features=features)\n",
|
||
|
"#print(poly_features)\n",
|
||
|
"keep = [\n",
|
||
|
" \"ty dSlope_fringe_abs\", \n",
|
||
|
" \"ty tx^2 dSlope_fringe_abs\", \n",
|
||
|
" \"ty^3 dSlope_fringe_abs\", \n",
|
||
|
" \"ty^3 tx^2 dSlope_fringe_abs\",\n",
|
||
|
" \"dSlope_fringe\",\n",
|
||
|
" \"ty tx dSlope_fringe\",\n",
|
||
|
" \"ty tx^3 dSlope_fringe\",\n",
|
||
|
" \"ty^3 tx dSlope_fringe\",\n",
|
||
|
"]\n",
|
||
|
"remove = [i for i, f in enumerate(poly_features) if f not in keep]\n",
|
||
|
"X_train_model = np.delete( X_train_model, remove, axis=1)\n",
|
||
|
"X_test_model = np.delete( X_test_model, remove, axis=1)\n",
|
||
|
"poly_features = np.delete(poly_features, remove )\n",
|
||
|
"print(poly_features)\n",
|
||
|
"\n",
|
||
|
"lin_reg = LinearRegression(fit_intercept=False)\n",
|
||
|
"lin_reg.fit( X_train_model, y_train)\n",
|
||
|
"y_pred_test = lin_reg.predict( X_test_model )\n",
|
||
|
"print(\"intercept=\", lin_reg.intercept_)\n",
|
||
|
"print(\"coef=\", dict(zip(poly_features, lin_reg.coef_)))\n",
|
||
|
"print(\"r2 score=\", lin_reg.score(X_test_model, y_test))\n",
|
||
|
"print(\"RMSE =\", mean_squared_error(y_test, y_pred_test, squared=False))\n",
|
||
|
"print(\"straight RMSE =\", mean_squared_error(array[\"y_l2\"], array[\"y\"] + array[\"ty\"] * ( array[\"z_l2\"] - array[\"z\"] ), squared=False))\n",
|
||
|
"print(format_array(\"y_straight_diff_l2\", lin_reg.coef_))"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 7,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"['dSlope_fringe' 'ty dSlope_fringe_abs' 'ty tx dSlope_fringe'\n",
|
||
|
" 'ty^3 dSlope_fringe_abs' 'ty tx^2 dSlope_fringe_abs'\n",
|
||
|
" 'ty^3 tx dSlope_fringe' 'ty tx^3 dSlope_fringe'\n",
|
||
|
" 'ty^3 tx^2 dSlope_fringe_abs']\n",
|
||
|
"intercept= 0.0\n",
|
||
|
"coef= {'dSlope_fringe': 2.6109097507814067, 'ty dSlope_fringe_abs': 55.23845097538265, 'ty tx dSlope_fringe': 4222.757834030979, 'ty^3 dSlope_fringe_abs': -10869.286047558253, 'ty tx^2 dSlope_fringe_abs': 913.7370293450639, 'ty^3 tx dSlope_fringe': 53875.65687892285, 'ty tx^3 dSlope_fringe': 38512.63398443023, 'ty^3 tx^2 dSlope_fringe_abs': 279346.89317804825}\n",
|
||
|
"r2 score= 0.9426062193562865\n",
|
||
|
"RMSE = 8.213743029204338\n",
|
||
|
"straight RMSE = 34.65071255550859\n",
|
||
|
"constexpr std::array y_straight_diff_l5{2.6109097507814067f, 55.23845097538265f, 4222.757834030979f, -10869.286047558253f, 913.7370293450639f, 53875.65687892285f, 38512.63398443023f, 279346.89317804825f};\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"features = [\n",
|
||
|
" \"ty\", \n",
|
||
|
" \"tx\",\n",
|
||
|
" \"dSlope_fringe\",\n",
|
||
|
" \"dSlope_fringe_abs\"\n",
|
||
|
"]\n",
|
||
|
"target_feat = \"y_straight_diff_l5\"\n",
|
||
|
"\n",
|
||
|
"data = np.column_stack([ak.to_numpy(array[feat]) for feat in features])\n",
|
||
|
"target = ak.to_numpy(array[target_feat])\n",
|
||
|
"X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.2, random_state=42)\n",
|
||
|
"\n",
|
||
|
"poly = PolynomialFeatures(degree=6, include_bias=False)\n",
|
||
|
"X_train_model = poly.fit_transform( X_train )\n",
|
||
|
"X_test_model = poly.fit_transform( X_test )\n",
|
||
|
"poly_features = poly.get_feature_names_out(input_features=features)\n",
|
||
|
"#print(poly_features)\n",
|
||
|
"keep = [\n",
|
||
|
" \"ty dSlope_fringe_abs\", \n",
|
||
|
" \"ty tx^2 dSlope_fringe_abs\", \n",
|
||
|
" \"ty^3 dSlope_fringe_abs\", \n",
|
||
|
" \"ty^3 tx^2 dSlope_fringe_abs\",\n",
|
||
|
" \"dSlope_fringe\",\n",
|
||
|
" \"ty tx dSlope_fringe\",\n",
|
||
|
" \"ty tx^3 dSlope_fringe\",\n",
|
||
|
" \"ty^3 tx dSlope_fringe\",\n",
|
||
|
"]\n",
|
||
|
"remove = [i for i, f in enumerate(poly_features) if f not in keep]\n",
|
||
|
"X_train_model = np.delete( X_train_model, remove, axis=1)\n",
|
||
|
"X_test_model = np.delete( X_test_model, remove, axis=1)\n",
|
||
|
"poly_features = np.delete(poly_features, remove )\n",
|
||
|
"print(poly_features)\n",
|
||
|
"\n",
|
||
|
"lin_reg = LinearRegression(fit_intercept=False)\n",
|
||
|
"lin_reg.fit( X_train_model, y_train)\n",
|
||
|
"y_pred_test = lin_reg.predict( X_test_model )\n",
|
||
|
"print(\"intercept=\", lin_reg.intercept_)\n",
|
||
|
"print(\"coef=\", dict(zip(poly_features, lin_reg.coef_)))\n",
|
||
|
"print(\"r2 score=\", lin_reg.score(X_test_model, y_test))\n",
|
||
|
"print(\"RMSE =\", mean_squared_error(y_test, y_pred_test, squared=False))\n",
|
||
|
"print(\"straight RMSE =\", mean_squared_error(array[\"y_l5\"], array[\"y\"] + array[\"ty\"] * ( array[\"z_l5\"] - array[\"z\"] ), squared=False))\n",
|
||
|
"print(format_array(\"y_straight_diff_l5\", lin_reg.coef_))"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 8,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"['dSlope_fringe' 'ty dSlope_fringe_abs' 'ty tx dSlope_fringe'\n",
|
||
|
" 'ty^3 dSlope_fringe_abs' 'ty tx^2 dSlope_fringe_abs'\n",
|
||
|
" 'ty^3 tx dSlope_fringe' 'ty tx^3 dSlope_fringe'\n",
|
||
|
" 'ty^3 tx^2 dSlope_fringe_abs']\n",
|
||
|
"intercept= 0.0\n",
|
||
|
"coef= {'dSlope_fringe': 2.6869036549491536, 'ty dSlope_fringe_abs': 42.86164139009395, 'ty tx dSlope_fringe': 4282.473673540287, 'ty^3 dSlope_fringe_abs': -11261.305925638624, 'ty tx^2 dSlope_fringe_abs': 1038.8682634234196, 'ty^3 tx dSlope_fringe': 55053.993311126746, 'ty tx^3 dSlope_fringe': 39412.35231667733, 'ty^3 tx^2 dSlope_fringe_abs': 286552.2319656697}\n",
|
||
|
"r2 score= 0.9427768256184441\n",
|
||
|
"RMSE = 8.406185401645494\n",
|
||
|
"straight RMSE = 35.523013512241214\n",
|
||
|
"constexpr std::array y_straight_diff_l6{2.6869036549491536f, 42.86164139009395f, 4282.473673540287f, -11261.305925638624f, 1038.8682634234196f, 55053.993311126746f, 39412.35231667733f, 286552.2319656697f};\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"features = [\n",
|
||
|
" \"ty\", \n",
|
||
|
" \"tx\",\n",
|
||
|
" \"dSlope_fringe\",\n",
|
||
|
" \"dSlope_fringe_abs\"\n",
|
||
|
"]\n",
|
||
|
"target_feat = \"y_straight_diff_l6\"\n",
|
||
|
"\n",
|
||
|
"data = np.column_stack([ak.to_numpy(array[feat]) for feat in features])\n",
|
||
|
"target = ak.to_numpy(array[target_feat])\n",
|
||
|
"X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.2, random_state=42)\n",
|
||
|
"\n",
|
||
|
"poly = PolynomialFeatures(degree=6, include_bias=False)\n",
|
||
|
"X_train_model = poly.fit_transform( X_train )\n",
|
||
|
"X_test_model = poly.fit_transform( X_test )\n",
|
||
|
"poly_features = poly.get_feature_names_out(input_features=features)\n",
|
||
|
"#print(poly_features)\n",
|
||
|
"keep = [\n",
|
||
|
" \"ty dSlope_fringe_abs\", \n",
|
||
|
" \"ty tx^2 dSlope_fringe_abs\", \n",
|
||
|
" \"ty^3 dSlope_fringe_abs\", \n",
|
||
|
" \"ty^3 tx^2 dSlope_fringe_abs\",\n",
|
||
|
" \"dSlope_fringe\",\n",
|
||
|
" \"ty tx dSlope_fringe\",\n",
|
||
|
" \"ty tx^3 dSlope_fringe\",\n",
|
||
|
" \"ty^3 tx dSlope_fringe\",\n",
|
||
|
"]\n",
|
||
|
"remove = [i for i, f in enumerate(poly_features) if f not in keep]\n",
|
||
|
"X_train_model = np.delete( X_train_model, remove, axis=1)\n",
|
||
|
"X_test_model = np.delete( X_test_model, remove, axis=1)\n",
|
||
|
"poly_features = np.delete(poly_features, remove )\n",
|
||
|
"print(poly_features)\n",
|
||
|
"\n",
|
||
|
"lin_reg = LinearRegression(fit_intercept=False)\n",
|
||
|
"lin_reg.fit( X_train_model, y_train)\n",
|
||
|
"y_pred_test = lin_reg.predict( X_test_model )\n",
|
||
|
"print(\"intercept=\", lin_reg.intercept_)\n",
|
||
|
"print(\"coef=\", dict(zip(poly_features, lin_reg.coef_)))\n",
|
||
|
"print(\"r2 score=\", lin_reg.score(X_test_model, y_test))\n",
|
||
|
"print(\"RMSE =\", mean_squared_error(y_test, y_pred_test, squared=False))\n",
|
||
|
"print(\"straight RMSE =\", mean_squared_error(array[\"y_l6\"], array[\"y\"] + array[\"ty\"] * ( array[\"z_l6\"] - array[\"z\"] ), squared=False))\n",
|
||
|
"print(format_array(\"y_straight_diff_l6\", lin_reg.coef_))"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 9,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"['dSlope_fringe' 'ty dSlope_fringe_abs' 'ty tx dSlope_fringe'\n",
|
||
|
" 'ty^3 dSlope_fringe_abs' 'ty tx^2 dSlope_fringe_abs'\n",
|
||
|
" 'ty^3 tx dSlope_fringe' 'ty tx^3 dSlope_fringe'\n",
|
||
|
" 'ty^3 tx^2 dSlope_fringe_abs']\n",
|
||
|
"intercept= 0.0\n",
|
||
|
"coef= {'dSlope_fringe': 3.3840782993219154, 'ty dSlope_fringe_abs': -73.66219618861146, 'ty tx dSlope_fringe': 4855.8672756860515, 'ty^3 dSlope_fringe_abs': -14629.483020343234, 'ty tx^2 dSlope_fringe_abs': 2208.9899749295746, 'ty^3 tx dSlope_fringe': 65168.34060362849, 'ty tx^3 dSlope_fringe': 47225.377082479885, 'ty^3 tx^2 dSlope_fringe_abs': 349003.88327916135}\n",
|
||
|
"r2 score= 0.943344616193092\n",
|
||
|
"RMSE = 10.238398557856689\n",
|
||
|
"straight RMSE = 43.54352590748684\n",
|
||
|
"constexpr std::array y_straight_diff_l9{3.3840782993219154f, -73.66219618861146f, 4855.8672756860515f, -14629.483020343234f, 2208.9899749295746f, 65168.34060362849f, 47225.377082479885f, 349003.88327916135f};\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"features = [\n",
|
||
|
" \"ty\", \n",
|
||
|
" \"tx\",\n",
|
||
|
" \"dSlope_fringe\",\n",
|
||
|
" \"dSlope_fringe_abs\"\n",
|
||
|
"]\n",
|
||
|
"target_feat = \"y_straight_diff_l9\"\n",
|
||
|
"\n",
|
||
|
"data = np.column_stack([ak.to_numpy(array[feat]) for feat in features])\n",
|
||
|
"target = ak.to_numpy(array[target_feat])\n",
|
||
|
"X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.2, random_state=42)\n",
|
||
|
"\n",
|
||
|
"poly = PolynomialFeatures(degree=6, include_bias=False)\n",
|
||
|
"X_train_model = poly.fit_transform( X_train )\n",
|
||
|
"X_test_model = poly.fit_transform( X_test )\n",
|
||
|
"poly_features = poly.get_feature_names_out(input_features=features)\n",
|
||
|
"#print(poly_features)\n",
|
||
|
"keep = [\n",
|
||
|
" \"ty dSlope_fringe_abs\", \n",
|
||
|
" \"ty tx^2 dSlope_fringe_abs\", \n",
|
||
|
" \"ty^3 dSlope_fringe_abs\", \n",
|
||
|
" \"ty^3 tx^2 dSlope_fringe_abs\",\n",
|
||
|
" \"dSlope_fringe\",\n",
|
||
|
" \"ty tx dSlope_fringe\",\n",
|
||
|
" \"ty tx^3 dSlope_fringe\",\n",
|
||
|
" \"ty^3 tx dSlope_fringe\",\n",
|
||
|
"]\n",
|
||
|
"remove = [i for i, f in enumerate(poly_features) if f not in keep]\n",
|
||
|
"X_train_model = np.delete( X_train_model, remove, axis=1)\n",
|
||
|
"X_test_model = np.delete( X_test_model, remove, axis=1)\n",
|
||
|
"poly_features = np.delete(poly_features, remove )\n",
|
||
|
"print(poly_features)\n",
|
||
|
"\n",
|
||
|
"lin_reg = LinearRegression(fit_intercept=False)\n",
|
||
|
"lin_reg.fit( X_train_model, y_train)\n",
|
||
|
"y_pred_test = lin_reg.predict( X_test_model )\n",
|
||
|
"print(\"intercept=\", lin_reg.intercept_)\n",
|
||
|
"print(\"coef=\", dict(zip(poly_features, lin_reg.coef_)))\n",
|
||
|
"print(\"r2 score=\", lin_reg.score(X_test_model, y_test))\n",
|
||
|
"print(\"RMSE =\", mean_squared_error(y_test, y_pred_test, squared=False))\n",
|
||
|
"print(\"straight RMSE =\", mean_squared_error(array[\"y_l9\"], array[\"y\"] + array[\"ty\"] * ( array[\"z_l9\"] - array[\"z\"] ), squared=False))\n",
|
||
|
"print(format_array(\"y_straight_diff_l9\", lin_reg.coef_))"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 10,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"['dSlope_fringe' 'ty dSlope_fringe_abs' 'ty tx dSlope_fringe'\n",
|
||
|
" 'ty^3 dSlope_fringe_abs' 'ty tx^2 dSlope_fringe_abs'\n",
|
||
|
" 'ty^3 tx dSlope_fringe' 'ty tx^3 dSlope_fringe'\n",
|
||
|
" 'ty^3 tx^2 dSlope_fringe_abs']\n",
|
||
|
"intercept= 0.0\n",
|
||
|
"coef= {'dSlope_fringe': 3.466479515860849, 'ty dSlope_fringe_abs': -87.54166562301657, 'ty tx dSlope_fringe': 4926.350778945553, 'ty^3 dSlope_fringe_abs': -15008.616026945787, 'ty tx^2 dSlope_fringe_abs': 2347.3262833969093, 'ty^3 tx dSlope_fringe': 66293.67978997533, 'ty tx^3 dSlope_fringe': 48106.815974854835, 'ty^3 tx^2 dSlope_fringe_abs': 356053.9012962991}\n",
|
||
|
"r2 score= 0.9433372187768841\n",
|
||
|
"RMSE = 10.46108881485266\n",
|
||
|
"straight RMSE = 44.49300835626165\n",
|
||
|
"constexpr std::array y_straight_diff_l10{3.466479515860849f, -87.54166562301657f, 4926.350778945553f, -15008.616026945787f, 2347.3262833969093f, 66293.67978997533f, 48106.815974854835f, 356053.9012962991f};\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"features = [\n",
|
||
|
" \"ty\", \n",
|
||
|
" \"tx\",\n",
|
||
|
" \"dSlope_fringe\",\n",
|
||
|
" \"dSlope_fringe_abs\"\n",
|
||
|
"]\n",
|
||
|
"target_feat = \"y_straight_diff_l10\"\n",
|
||
|
"\n",
|
||
|
"data = np.column_stack([ak.to_numpy(array[feat]) for feat in features])\n",
|
||
|
"target = ak.to_numpy(array[target_feat])\n",
|
||
|
"X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.2, random_state=42)\n",
|
||
|
"\n",
|
||
|
"poly = PolynomialFeatures(degree=6, include_bias=False)\n",
|
||
|
"X_train_model = poly.fit_transform( X_train )\n",
|
||
|
"X_test_model = poly.fit_transform( X_test )\n",
|
||
|
"poly_features = poly.get_feature_names_out(input_features=features)\n",
|
||
|
"#print(poly_features)\n",
|
||
|
"keep = [\n",
|
||
|
" \"ty dSlope_fringe_abs\", \n",
|
||
|
" \"ty tx^2 dSlope_fringe_abs\", \n",
|
||
|
" \"ty^3 dSlope_fringe_abs\", \n",
|
||
|
" \"ty^3 tx^2 dSlope_fringe_abs\",\n",
|
||
|
" \"dSlope_fringe\",\n",
|
||
|
" \"ty tx dSlope_fringe\",\n",
|
||
|
" \"ty tx^3 dSlope_fringe\",\n",
|
||
|
" \"ty^3 tx dSlope_fringe\",\n",
|
||
|
"]\n",
|
||
|
"remove = [i for i, f in enumerate(poly_features) if f not in keep]\n",
|
||
|
"X_train_model = np.delete( X_train_model, remove, axis=1)\n",
|
||
|
"X_test_model = np.delete( X_test_model, remove, axis=1)\n",
|
||
|
"poly_features = np.delete(poly_features, remove )\n",
|
||
|
"print(poly_features)\n",
|
||
|
"\n",
|
||
|
"lin_reg = LinearRegression(fit_intercept=False)\n",
|
||
|
"lin_reg.fit( X_train_model, y_train)\n",
|
||
|
"y_pred_test = lin_reg.predict( X_test_model )\n",
|
||
|
"print(\"intercept=\", lin_reg.intercept_)\n",
|
||
|
"print(\"coef=\", dict(zip(poly_features, lin_reg.coef_)))\n",
|
||
|
"print(\"r2 score=\", lin_reg.score(X_test_model, y_test))\n",
|
||
|
"print(\"RMSE =\", mean_squared_error(y_test, y_pred_test, squared=False))\n",
|
||
|
"print(\"straight RMSE =\", mean_squared_error(array[\"y_l10\"], array[\"y\"] + array[\"ty\"] * ( array[\"z_l10\"] - array[\"z\"] ), squared=False))\n",
|
||
|
"print(format_array(\"y_straight_diff_l10\", lin_reg.coef_))"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 41,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"intercept= 0.0\n",
|
||
|
"coef= {'ty tx dSlope_fringe': 0.9394190987843558, 'ty dSlope_fringe^2': -0.46480920487603206, 'ty^3 dSlope_fringe_abs': -4.137993544858158, 'ty tx^2 dSlope_fringe_abs': 2.980803780828937, 'ty tx^3 dSlope_fringe': 12.402177409386482, 'ty^3 tx^2 dSlope_fringe_abs': 38.238656269022954}\n",
|
||
|
"r2 score= 0.9649625489427109\n",
|
||
|
"RMSE = 0.0025237463237113896\n",
|
||
|
"straight RMSE = 0.013705994187091751\n",
|
||
|
"constexpr std::array ty_ref_straight_diff{0.9394190987843558f, -0.46480920487603206f, -4.137993544858158f, 2.980803780828937f, 12.402177409386482f, 38.238656269022954f};\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"features = [\n",
|
||
|
" \"ty\", \n",
|
||
|
" \"tx\",\n",
|
||
|
" \"dSlope_fringe\",\n",
|
||
|
" \"dSlope_fringe_abs\"\n",
|
||
|
"]\n",
|
||
|
"target_feat = \"ty_ref_straight_diff\"\n",
|
||
|
"\n",
|
||
|
"data = np.column_stack([ak.to_numpy(array[feat]) for feat in features])\n",
|
||
|
"target = ak.to_numpy(array[target_feat])\n",
|
||
|
"X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.2, random_state=42)\n",
|
||
|
"\n",
|
||
|
"poly = PolynomialFeatures(degree=6, include_bias=False)\n",
|
||
|
"X_train_model = poly.fit_transform( X_train )\n",
|
||
|
"X_test_model = poly.fit_transform( X_test )\n",
|
||
|
"poly_features = poly.get_feature_names_out(input_features=features)\n",
|
||
|
"#print(poly_features)\n",
|
||
|
"keep = [\n",
|
||
|
" \"ty dSlope_fringe^2\",\n",
|
||
|
" #\"ty dSlope_fringe_abs\", \n",
|
||
|
" \"ty tx^2 dSlope_fringe_abs\", \n",
|
||
|
" \"ty^3 dSlope_fringe_abs\", \n",
|
||
|
" \"ty^3 tx^2 dSlope_fringe_abs\",\n",
|
||
|
" #\"dSlope_fringe\",\n",
|
||
|
" \"ty tx dSlope_fringe\",\n",
|
||
|
" \"ty tx^3 dSlope_fringe\",\n",
|
||
|
" #\"ty^3 tx dSlope_fringe\",\n",
|
||
|
"]\n",
|
||
|
"remove = [i for i, f in enumerate(poly_features) if f not in keep]\n",
|
||
|
"#remove = [i for i, f in enumerate(poly_features) if (\"dSlope_fringe\" not in f) or (\"dSlope_fringe^\" in f) or (\"dSlope_fringe_abs^\" in f) or (\"dSlope_fringe dSlope_fringe_abs\" in f)]\n",
|
||
|
"X_train_model = np.delete( X_train_model, remove, axis=1)\n",
|
||
|
"X_test_model = np.delete( X_test_model, remove, axis=1)\n",
|
||
|
"poly_features = np.delete(poly_features, remove )\n",
|
||
|
"#print(poly_features)\n",
|
||
|
"\n",
|
||
|
"lin_reg = LinearRegression(fit_intercept=False)\n",
|
||
|
"#lin_reg = Lasso(fit_intercept=False, alpha=0.0000000001)\n",
|
||
|
"lin_reg.fit( X_train_model, y_train)\n",
|
||
|
"y_pred_test = lin_reg.predict( X_test_model )\n",
|
||
|
"print(\"intercept=\", lin_reg.intercept_)\n",
|
||
|
"print(\"coef=\", dict(zip(poly_features, lin_reg.coef_)))\n",
|
||
|
"print(\"r2 score=\", lin_reg.score(X_test_model, y_test))\n",
|
||
|
"print(\"RMSE =\", mean_squared_error(y_test, y_pred_test, squared=False))\n",
|
||
|
"print(\"straight RMSE =\", mean_squared_error(array[\"ty_ref\"], array[\"ty\"], squared=False))\n",
|
||
|
"print(format_array(\"ty_ref_straight_diff\", lin_reg.coef_))"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 12,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/plain": [
|
||
|
"<AxesSubplot: >"
|
||
|
]
|
||
|
},
|
||
|
"execution_count": 12,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
},
|
||
|
{
|
||
|
"data": {
|
||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAiIAAAGsCAYAAADg5swfAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjYuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy89olMNAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAnZUlEQVR4nO3df3RU5YH/8c/MEIZfSWpISYwEEqw1hVDFxC1UUNmtgFvbcs4eLRZZYYUDp2jBnm4r5+sPUCv01O262iqI3WixLZ5Wpbtfd12w/QpaQpdAsEKQBSVI+ZGISBJEQsg83z/YjAyZzI9k7n3uzLxf58w5zJ1n7n3m8cp8eH6NzxhjBAAAYIHfdgUAAED2IogAAABrCCIAAMAagggAALCGIAIAAKwhiAAAAGsIIgAAwBqCCAAAsIYgAgAArCGIAAAAa9ImiGzatElf+9rXVFJSIp/Pp3Xr1jl6vbNnz+ree+9VeXm5Bg4cqFGjRunBBx9UKBRy9LoAAGSTfrYrkKiPP/5YV1xxhebMmaO/+7u/c/x6P/rRj7Ry5Uo999xzGjNmjOrq6jRnzhzl5+dr0aJFjl8fAIBskDZB5MYbb9SNN97Y4+tnzpzRvffeq1/+8pc6ceKEKisr9aMf/UjXX399r65XW1urb3zjG/rqV78qSSorK9Ovf/1r1dXV9ep8AACgu7QZmolnzpw5+uMf/6i1a9fqz3/+s26++WZNmzZNe/fu7dX5Jk6cqN///vf6n//5H0nSW2+9pTfffFN/+7d/m8pqAwCQ1dKmRySWd999V7/+9a/1l7/8RSUlJZKk733ve3r11VdVU1OjRx55JOlz/uAHP1BLS4sqKioUCATU2dmpH/7wh7r11ltTXX0AALJWRgSR7du3yxijz3/+8xHH29vbNXToUElSY2OjysvLY55n4cKF+ulPfypJeuGFF/T888/rV7/6lcaMGaMdO3Zo8eLFKikp0e233+7MBwEAIMtkRBAJhUIKBALatm2bAoFAxGtDhgyRJF1yySXavXt3zPNcdNFF4T//4z/+o+655x7NmDFDkjR27FgdOHBAy5cvJ4gAAJAiGRFExo0bp87OTjU3N2vSpElRy+Tk5KiioiLhc546dUp+f+QUmkAgwPJdAABSKG2CyMmTJ7Vv377w8/3792vHjh0qKCjQ5z//ec2cOVN///d/r3/6p3/SuHHjdOzYMf3hD3/Q2LFjezXB9Gtf+5p++MMfasSIERozZozq6+v1k5/8RP/wD/+Qyo8FAEBW8xljjO1KJOL111/X5MmTux2//fbb9eyzz6qjo0MPP/ywfvGLX+jQoUMaOnSoJkyYoGXLlmns2LFJX6+trU333XefXn75ZTU3N6ukpES33nqr7r//fvXv3z8VHwkAgKyXNkEEAABknozZRwQAAKQfgggAALDG05NVQ6GQDh8+rNzcXPl8PtvVAQAACTDGqK2tTSUlJd1WoF7I00Hk8OHDKi0ttV0NAADQCwcPHtTw4cNjlvF0EMnNzZV07oPk5eVZrg0AAEhEa2urSktLw9/jsXg6iHQNx+Tl5RFEAABIM4lMq2CyKgAAsIYgAgAArCGIAAAAawgiAADAGoIIAACwhiACAACsIYgAAABrCCIAAMAagggAALCGIAIAAKwhiAAAAGs8/VszTpm/pk6Nx05JksoKB2nVrGrLNQIAIDtlZRBpPHZKe5rabFcDAICsx9AMAACwhiACAACsIYgAAABrCCIAAMAagggAALCGIAIAAKwhiAAAAGsIIgAAwBqCCAAAsIYgAgAArCGIAAAAawgiAADAGoIIAACwhiACAACsIYgAAABrCCIAAMAagggAALCGIAIAAKwhiAAAAGsIIgAAwBqCCAAAsIYgAgAArCGIAAAAawgiAADAGoIIAACwhiACAACsIYgAAABrCCIAAMAagggAALCGIAIAAKwhiAAAAGsIIgAAwBqCCAAAsIYgAgAArCGIAAAAa1wLIsuXL5fP59PixYvduiQAAPA4V4LI1q1b9fTTT+uLX/yiG5cDAABpwvEgcvLkSc2cOVOrV6/WRRdd5PTlAABAGnE8iCxcuFBf/epX9ZWvfCVu2fb2drW2tkY8AABA5urn5MnXrl2r7du3a+vWrQmVX758uZYtW+ZklQAAgIc41iNy8OBBLVq0SM8//7wGDBiQ0HuWLFmilpaW8OPgwYNOVQ8AAHiAYz0i27ZtU3Nzs6qqqsLHOjs7tWnTJv30pz9Ve3u7AoFAxHuCwaCCwaBTVQIAAB7jWBD5m7/5G7399tsRx+bMmaOKigr94Ac/6BZCAABA9nEsiOTm5qqysjLi2ODBgzV06NBux20KGWO7CgAAZK2s21l1a+NxHTrxSfj53uaTmr+mTnWNxy3WCgCA7OQzxrtdAq2trcrPz1dLS4vy8vL6fL41Ww7o/nU7deEHDvh9CoWMHppeqdvGj+zzdQAAyGbJfH9nTY/I1sbjUUOIJHWGjIyk+9btpGcEAAAXZU0QeeaN9+T3+2KW8ft9eubN/S7VCAAAZEUQOd3RqQ0NTeoMxR6F6gwZrd91VKc7Ol2qGQAA2S0rgkjb6bOKk0HCQuZceQAA4LysCCK5A/opzqhMmN93rjwAAHBeVgSRATkB3TC6SIE4aSTg92nKmGINyGGzNQAA3JAVQUSS5k4apVCc8ZlQyGjuxHKXagQAALImiFxdVqCHplcqWp9IwO+TT9JD0ytVXVbgdtUAAMhaWTUZ4rbxI1VRnKvZNVt1sv3TCak3jC7S3InlhBAAAFyWVUFEkqrLCnTJZwZqT1ObJOmyYUO08raqOO8CAABOyJqhmZ74fQkupwEAACmX9UEEAADYQxABAADWEEQAAIA1BBEAAGANQQQAAFhDEAEAANYQRAAAgDUEEQAAYA1BBAAAWEMQAQAA1hBEAACANQQRAABgDUEEAABYQxABAADWEEQAAIA1BBEAAGANQQQAAFhDEAEAANYQRAAAgDUEEQAAYA1BBAAAWEMQAQAA1hBEAACANQQRAABgDUEEAABYQxABAADWEEQAAIA1BBEAAGANQQQAAFhDEAEAANYQRAAAgDUEEQAAYA1BBAAAWEMQAQAA1hBEAACANQQRAABgDUEEAABYQxABAADWEEQAAIA1BBEAAGANQQQAAFjTz3YFbCgrHBT1zwAAwF1ZGURWzaq2XQUAACCGZgAAgEUEEQAAYA1BBAAAWEMQAQAA1hBEAACANY4GkeXLl+vqq69Wbm6uhg0bpunTp2vPnj1OXhIAAKQRR4PIxo0btXDhQm3ZskUbNmzQ2bNnNWXKFH388cdOXhYAAKQJnzHGuHWxDz74QMOGDdPGjRt17bXXxi3f2tqq/Px8tbS0KC8vz4UaAgCAvkrm+9vVDc1aWlokSQUFBVFfb29vV3t7e/h5a2urK/UCAAB2uDZZ1Rij7373u5o4caIqKyujllm+fLny8/PDj9LSUreqBwAALHBtaGbhwoV65ZVX9Oabb2r48OFRy0TrESktLWVoBgCANOK5oZm77rpL//Zv/6ZNmzb1GEIkKRgMKhgMulElAADgAY4GEWOM7rrrLr388st6/fXXVV5e7uTlAABAmnE0iCxcuFC/+tWv9Lvf/U65ubk6evSoJCk/P18DBw508tIAACANODpHxOfzRT1eU1Oj2bNnx30/y3cBAEg/npkj4uIWJQAAIA3xWzMAAMAagggAALCGIAIAAKwhiAAAAGsIIgAAwBqCCAAAsMbVX99Fepi/pk6Nx05JksoKB2nVrGrLNQIAZCqCCLppPHZKe5rabFcDAJAFCCJIK/TWAEBmIYggrdBbAwCZhcmqAADAGoIIAACwhiACAACsIYgAAABrCCIAAMAagggAALCGIAIAAKwhiAAAAGsIIgAAwBqCCAAAsIYgAgAArCGIAAAAawgiAADAGoIIAACwhiACAACsIYgAAABr+tmuAAD01fw1dWo8dkqSVFY4SKtmVVuuEYBEEUQApL3GY6e0p6nNdjUA9AJDMwAAwBqCCAAAsIYgAgAArCGIIG2FjLFdBQBAHxFEkDa2Nh7XoROfhJ/vbT6p+WvqVNd43GK
|
||
|
"text/plain": [
|
||
|
"<Figure size 640x480 with 1 Axes>"
|
||
|
]
|
||
|
},
|
||
|
"metadata": {},
|
||
|
"output_type": "display_data"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"import seaborn as sns\n",
|
||
|
"import numpy as np\n",
|
||
|
"bins = 25#np.linspace( -1.5, 1.5, 50 )\n",
|
||
|
"sns.regplot(x=ak.to_numpy(array[\"tx\"]), y=ak.to_numpy(array[\"CY_ex\"]), x_bins=bins, fit_reg=None, x_estimator=np.mean)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 26,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"['ty dSlope_fringe_abs' 'ty tx dSlope_fringe' 'ty dSlope_fringe^2'\n",
|
||
|
" 'ty^3 dSlope_fringe_abs' 'ty tx^2 dSlope_fringe_abs']\n",
|
||
|
"intercept= 0.0\n",
|
||
|
"coef= {'ty dSlope_fringe_abs': -1.210948273291364e-05, 'ty tx dSlope_fringe': 8.351598715575842e-05, 'ty dSlope_fringe^2': -3.9073446027618556e-05, 'ty^3 dSlope_fringe_abs': 0.0002466815971481776, 'ty tx^2 dSlope_fringe_abs': 0.0001861876635962951}\n",
|
||
|
"r2 score= 0.9704091284593364\n",
|
||
|
"RMSE = 1.4317438911860726e-07\n",
|
||
|
"constexpr std::array CY_ex{-1.210948273291364e-05f, 8.351598715575842e-05f, -3.9073446027618556e-05f, 0.0002466815971481776f, 0.0001861876635962951f};\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"features = [\n",
|
||
|
" \"ty\", \n",
|
||
|
" \"tx\",\n",
|
||
|
" \"dSlope_fringe\",\n",
|
||
|
" \"dSlope_fringe_abs\"\n",
|
||
|
"]\n",
|
||
|
"target_feat = \"CY_ex\"\n",
|
||
|
"\n",
|
||
|
"data = np.column_stack([ak.to_numpy(array[feat]) for feat in features])\n",
|
||
|
"target = ak.to_numpy(array[target_feat])\n",
|
||
|
"X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.2, random_state=42)\n",
|
||
|
"\n",
|
||
|
"poly = PolynomialFeatures(degree=4, include_bias=False)\n",
|
||
|
"X_train_model = poly.fit_transform( X_train )\n",
|
||
|
"X_test_model = poly.fit_transform( X_test )\n",
|
||
|
"poly_features = poly.get_feature_names_out(input_features=features)\n",
|
||
|
"#print(poly_features)\n",
|
||
|
"keep = [\n",
|
||
|
" \"ty dSlope_fringe^2\",\n",
|
||
|
" \"ty dSlope_fringe_abs\", \n",
|
||
|
" \"ty tx^2 dSlope_fringe_abs\", \n",
|
||
|
" \"ty^3 dSlope_fringe_abs\", \n",
|
||
|
" #\"ty^3 tx^2 dSlope_fringe_abs\",\n",
|
||
|
" #\"dSlope_fringe\",\n",
|
||
|
" \"ty tx dSlope_fringe\",\n",
|
||
|
" #\"ty tx^3 dSlope_fringe\",\n",
|
||
|
" #\"ty^3 tx dSlope_fringe\",\n",
|
||
|
"]\n",
|
||
|
"remove = [i for i, f in enumerate(poly_features) if f not in keep]\n",
|
||
|
"#remove = [i for i, f in enumerate(poly_features) if (\"dSlope_fringe\" not in f)]# or (\"dSlope_fringe^\" in f) or (\"dSlope_fringe_abs^\" in f) or (\"dSlope_fringe dSlope_fringe_abs\" in f)]\n",
|
||
|
"X_train_model = np.delete( X_train_model, remove, axis=1)\n",
|
||
|
"X_test_model = np.delete( X_test_model, remove, axis=1)\n",
|
||
|
"poly_features = np.delete(poly_features, remove )\n",
|
||
|
"print(poly_features)\n",
|
||
|
"\n",
|
||
|
"lin_reg = LinearRegression(fit_intercept=False)\n",
|
||
|
"#lin_reg = Lasso(fit_intercept=False, alpha=0.00000000001)\n",
|
||
|
"lin_reg.fit( X_train_model, y_train)\n",
|
||
|
"y_pred_test = lin_reg.predict( X_test_model )\n",
|
||
|
"print(\"intercept=\", lin_reg.intercept_)\n",
|
||
|
"print(\"coef=\", dict(zip(poly_features, lin_reg.coef_)))\n",
|
||
|
"print(\"r2 score=\", lin_reg.score(X_test_model, y_test))\n",
|
||
|
"print(\"RMSE =\", mean_squared_error(y_test, y_pred_test, squared=False))\n",
|
||
|
"#print(\"straight RMSE =\", mean_squared_error(array[\"ty_ref\"], array[\"ty\"], squared=False))\n",
|
||
|
"print(format_array(\"CY_ex\", lin_reg.coef_))"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": null,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": []
|
||
|
}
|
||
|
],
|
||
|
"metadata": {
|
||
|
"kernelspec": {
|
||
|
"display_name": "Python 3.10.6 (conda)",
|
||
|
"language": "python",
|
||
|
"name": "python3"
|
||
|
},
|
||
|
"language_info": {
|
||
|
"codemirror_mode": {
|
||
|
"name": "ipython",
|
||
|
"version": 3
|
||
|
},
|
||
|
"file_extension": ".py",
|
||
|
"mimetype": "text/x-python",
|
||
|
"name": "python",
|
||
|
"nbconvert_exporter": "python",
|
||
|
"pygments_lexer": "ipython3",
|
||
|
"version": "3.10.6"
|
||
|
},
|
||
|
"orig_nbformat": 4,
|
||
|
"vscode": {
|
||
|
"interpreter": {
|
||
|
"hash": "a2eff8b4da8b8eebf5ee2e5f811f31a557e0a202b4d2f04f849b065340a6eda6"
|
||
|
}
|
||
|
}
|
||
|
},
|
||
|
"nbformat": 4,
|
||
|
"nbformat_minor": 2
|
||
|
}
|