174 lines
6.4 KiB
Plaintext
174 lines
6.4 KiB
Plaintext
|
{
|
||
|
"cells": [
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 1,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"import uproot\n",
|
||
|
"import awkward as ak\n",
|
||
|
"import numpy as np\n",
|
||
|
"input_tree = uproot.open({\"/work/guenther/reco_tuner/data/param_data_selected.root\": \"Selected\"})\n",
|
||
|
"array = input_tree.arrays()\n",
|
||
|
"array[\"dSlope_xEndT\"] = array[\"tx_l11\"] - array[\"tx\"]\n",
|
||
|
"array[\"dSlope_yEndT\"] = array[\"ty_l11\"] - array[\"ty\"]\n",
|
||
|
"array[\"dSlope_xEndT_abs\"] = abs(array[\"dSlope_xEndT\"])\n",
|
||
|
"array[\"dSlope_yEndT_abs\"] = abs(array[\"dSlope_yEndT\"])\n",
|
||
|
"array[\"yStraightEndT\"] = array[\"y\"] + array[\"ty\"] * ( 9410. - array[\"z\"])\n",
|
||
|
"array[\"yDiffEndT\"] = (array[\"y_l11\"] + array[\"ty_l11\"] * ( 9410. - array[\"z_l11\"])) - array[\"yStraightEndT\"]\n",
|
||
|
"\n",
|
||
|
"def format_array(name, coef):\n",
|
||
|
" coef = [str(c)+\"f\" for c in coef if c != 0.0]\n",
|
||
|
" code = f\"constexpr std::array {name}\"\n",
|
||
|
" code += \"{\" + \", \".join(list(coef)) +\"};\"\n",
|
||
|
" return code"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 89,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"['dSlope_yEndT' 'ty dSlope_yEndT_abs' 'ty tx dSlope_xEndT'\n",
|
||
|
" 'ty dSlope_xEndT^2' 'ty dSlope_yEndT^2' 'tx^2 dSlope_yEndT'\n",
|
||
|
" 'ty tx^2 dSlope_xEndT_abs' 'ty^3 tx dSlope_xEndT']\n",
|
||
|
"intercept= 0.0\n",
|
||
|
"coef= {}\n",
|
||
|
"r2 score= 0.9971571295750978\n",
|
||
|
"RMSE = 2.422206064647647\n",
|
||
|
"straight RMSE = 45.67726454181064\n",
|
||
|
"constexpr std::array y_xEndT_diff{4039.5218935644916f, 1463.501458069602f, 2210.102099471291f, 1537.0718454152473f, -411.54564619803864f, 2594.7244053238287f, -1030.7643414023526f, 14904.842115636024f};\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"from sklearn.preprocessing import PolynomialFeatures\n",
|
||
|
"from sklearn.linear_model import LinearRegression, Lasso, Ridge\n",
|
||
|
"from sklearn.model_selection import train_test_split\n",
|
||
|
"from sklearn.pipeline import Pipeline\n",
|
||
|
"from sklearn.metrics import mean_squared_error\n",
|
||
|
"\n",
|
||
|
"features = [\n",
|
||
|
" \"ty\", \n",
|
||
|
" \"tx\",\n",
|
||
|
" \"dSlope_xEndT\",\n",
|
||
|
" \"dSlope_yEndT\",\n",
|
||
|
" \"dSlope_xEndT_abs\",\n",
|
||
|
" \"dSlope_yEndT_abs\",\n",
|
||
|
"]\n",
|
||
|
"target_feat = \"yDiffEndT\"\n",
|
||
|
"\n",
|
||
|
"data = np.column_stack([ak.to_numpy(array[feat]) for feat in features])\n",
|
||
|
"target = ak.to_numpy(array[target_feat])\n",
|
||
|
"X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.2, random_state=42)\n",
|
||
|
"\n",
|
||
|
"poly = PolynomialFeatures(degree=6, include_bias=False)\n",
|
||
|
"X_train_model = poly.fit_transform( X_train )\n",
|
||
|
"X_test_model = poly.fit_transform( X_test )\n",
|
||
|
"poly_features = poly.get_feature_names_out(input_features=features)\n",
|
||
|
"keep = [\n",
|
||
|
" #'dSlope_xEndT',\n",
|
||
|
" 'dSlope_yEndT', # keep\n",
|
||
|
" #'dSlope_yEndT_abs',\n",
|
||
|
" #'ty dSlope_xEndT',\n",
|
||
|
" #'ty dSlope_yEndT',\n",
|
||
|
" 'ty dSlope_xEndT_abs', # keep\n",
|
||
|
" 'ty dSlope_yEndT_abs', #keep\n",
|
||
|
" 'ty dSlope_yEndT^2', # keep \n",
|
||
|
" 'ty dSlope_xEndT^2', # keep\n",
|
||
|
" #'tx dSlope_xEndT',\n",
|
||
|
" #'tx dSlope_xEndT_abs',\n",
|
||
|
" #'tx dSlope_yEndT',\n",
|
||
|
" 'ty tx dSlope_xEndT', #keep\n",
|
||
|
" 'tx^2 dSlope_yEndT', # keep\n",
|
||
|
" #'ty^2 dSlope_xEndT',\n",
|
||
|
" #'ty^2 dSlope_yEndT', \n",
|
||
|
" #'ty^2 dSlope_xEndT_abs',\n",
|
||
|
" #'ty^2 tx dSlope_xEndT',\n",
|
||
|
" #'ty tx^2 dSlope_yEndT',\n",
|
||
|
" 'ty tx^2 dSlope_xEndT_abs', # keep\n",
|
||
|
" 'ty^3 tx dSlope_xEndT', #keep\n",
|
||
|
" #'ty tx^3 dSlope_xEndT',\n",
|
||
|
" #'ty^3 dSlope_yEndT_abs',\n",
|
||
|
"]\n",
|
||
|
"do_not_keep = [\n",
|
||
|
" 'dSlope_xEndT',\n",
|
||
|
" 'dSlope_yEndT_abs',\n",
|
||
|
" 'ty dSlope_xEndT',\n",
|
||
|
" 'tx dSlope_xEndT',\n",
|
||
|
" 'tx dSlope_xEndT_abs',\n",
|
||
|
" 'tx dSlope_yEndT',\n",
|
||
|
" 'ty^2 dSlope_xEndT',\n",
|
||
|
" 'ty^3 dSlope_yEndT_abs',\n",
|
||
|
" 'ty tx dSlope_yEndT',\n",
|
||
|
" 'ty tx^3 dSlope_xEndT',\n",
|
||
|
" 'ty tx^2 dSlope_yEndT',\n",
|
||
|
"]\n",
|
||
|
"reduce = True\n",
|
||
|
"if reduce:\n",
|
||
|
" remove = [i for i, f in enumerate(poly_features) if (keep and f not in keep )]\n",
|
||
|
" X_train_model = np.delete( X_train_model, remove, axis=1)\n",
|
||
|
" X_test_model = np.delete( X_test_model, remove, axis=1)\n",
|
||
|
" poly_features = np.delete(poly_features, remove )\n",
|
||
|
" print(poly_features)\n",
|
||
|
"if not reduce:\n",
|
||
|
" remove = [i for i, f in enumerate(poly_features) if (\"dSlope_\" not in f) or (\"EndT^\" in f) or (\"abs^\" in f) or (\"EndT dSlope\" in f) or (\"abs dSlope\" in f)]\n",
|
||
|
" X_train_model = np.delete( X_train_model, remove, axis=1)\n",
|
||
|
" X_test_model = np.delete( X_test_model, remove, axis=1)\n",
|
||
|
" poly_features = np.delete(poly_features, remove )\n",
|
||
|
" #print(poly_features)\n",
|
||
|
" lin_reg = Lasso(fit_intercept=False, alpha=0.000001)\n",
|
||
|
"else:\n",
|
||
|
" lin_reg = LinearRegression(fit_intercept=False)\n",
|
||
|
"lin_reg.fit( X_train_model, y_train)\n",
|
||
|
"y_pred_test = lin_reg.predict( X_test_model )\n",
|
||
|
"print(\"intercept=\", lin_reg.intercept_)\n",
|
||
|
"print(\"coef=\", {k: v for k, v in zip(poly_features, lin_reg.coef_) if abs(v) > 1.0 and k not in keep and k not in do_not_keep})\n",
|
||
|
"print(\"r2 score=\", lin_reg.score(X_test_model, y_test))\n",
|
||
|
"print(\"RMSE =\", mean_squared_error(y_test, y_pred_test, squared=False))\n",
|
||
|
"print(\"straight RMSE =\", mean_squared_error(array[\"y_l11\"], array[\"y\"] + array[\"ty\"] * ( array[\"z_l11\"] - array[\"z\"] ), squared=False))\n",
|
||
|
"print(format_array(\"y_xEndT_diff\", lin_reg.coef_))"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": null,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": []
|
||
|
}
|
||
|
],
|
||
|
"metadata": {
|
||
|
"kernelspec": {
|
||
|
"display_name": "Python 3.10.6 (conda)",
|
||
|
"language": "python",
|
||
|
"name": "python3"
|
||
|
},
|
||
|
"language_info": {
|
||
|
"codemirror_mode": {
|
||
|
"name": "ipython",
|
||
|
"version": 3
|
||
|
},
|
||
|
"file_extension": ".py",
|
||
|
"mimetype": "text/x-python",
|
||
|
"name": "python",
|
||
|
"nbconvert_exporter": "python",
|
||
|
"pygments_lexer": "ipython3",
|
||
|
"version": "3.10.6"
|
||
|
},
|
||
|
"orig_nbformat": 4,
|
||
|
"vscode": {
|
||
|
"interpreter": {
|
||
|
"hash": "a2eff8b4da8b8eebf5ee2e5f811f31a557e0a202b4d2f04f849b065340a6eda6"
|
||
|
}
|
||
|
}
|
||
|
},
|
||
|
"nbformat": 4,
|
||
|
"nbformat_minor": 2
|
||
|
}
|