{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import uproot\n", "import awkward as ak\n", "import numpy as np\n", "input_tree = uproot.open({\"/work/guenther/reco_tuner/data/param_data_selected.root\": \"Selected\"})\n", "array = input_tree.arrays()\n", "array[\"dSlope_xEndT\"] = array[\"tx_l11\"] - array[\"tx\"]\n", "array[\"dSlope_yEndT\"] = array[\"ty_l11\"] - array[\"ty\"]\n", "array[\"dSlope_xEndT_abs\"] = abs(array[\"dSlope_xEndT\"])\n", "array[\"dSlope_yEndT_abs\"] = abs(array[\"dSlope_yEndT\"])\n", "array[\"yStraightEndT\"] = array[\"y\"] + array[\"ty\"] * ( 9410. - array[\"z\"])\n", "array[\"yDiffEndT\"] = (array[\"y_l11\"] + array[\"ty_l11\"] * ( 9410. - array[\"z_l11\"])) - array[\"yStraightEndT\"]\n", "\n", "def format_array(name, coef):\n", " coef = [str(c)+\"f\" for c in coef if c != 0.0]\n", " code = f\"constexpr std::array {name}\"\n", " code += \"{\" + \", \".join(list(coef)) +\"};\"\n", " return code" ] }, { "cell_type": "code", "execution_count": 89, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['dSlope_yEndT' 'ty dSlope_yEndT_abs' 'ty tx dSlope_xEndT'\n", " 'ty dSlope_xEndT^2' 'ty dSlope_yEndT^2' 'tx^2 dSlope_yEndT'\n", " 'ty tx^2 dSlope_xEndT_abs' 'ty^3 tx dSlope_xEndT']\n", "intercept= 0.0\n", "coef= {}\n", "r2 score= 0.9971571295750978\n", "RMSE = 2.422206064647647\n", "straight RMSE = 45.67726454181064\n", "constexpr std::array y_xEndT_diff{4039.5218935644916f, 1463.501458069602f, 2210.102099471291f, 1537.0718454152473f, -411.54564619803864f, 2594.7244053238287f, -1030.7643414023526f, 14904.842115636024f};\n" ] } ], "source": [ "from sklearn.preprocessing import PolynomialFeatures\n", "from sklearn.linear_model import LinearRegression, Lasso, Ridge\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.pipeline import Pipeline\n", "from sklearn.metrics import mean_squared_error\n", "\n", "features = [\n", " \"ty\", \n", " \"tx\",\n", " \"dSlope_xEndT\",\n", " \"dSlope_yEndT\",\n", " \"dSlope_xEndT_abs\",\n", " \"dSlope_yEndT_abs\",\n", "]\n", "target_feat = \"yDiffEndT\"\n", "\n", "data = np.column_stack([ak.to_numpy(array[feat]) for feat in features])\n", "target = ak.to_numpy(array[target_feat])\n", "X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.2, random_state=42)\n", "\n", "poly = PolynomialFeatures(degree=6, include_bias=False)\n", "X_train_model = poly.fit_transform( X_train )\n", "X_test_model = poly.fit_transform( X_test )\n", "poly_features = poly.get_feature_names_out(input_features=features)\n", "keep = [\n", " #'dSlope_xEndT',\n", " 'dSlope_yEndT', # keep\n", " #'dSlope_yEndT_abs',\n", " #'ty dSlope_xEndT',\n", " #'ty dSlope_yEndT',\n", " 'ty dSlope_xEndT_abs', # keep\n", " 'ty dSlope_yEndT_abs', #keep\n", " 'ty dSlope_yEndT^2', # keep \n", " 'ty dSlope_xEndT^2', # keep\n", " #'tx dSlope_xEndT',\n", " #'tx dSlope_xEndT_abs',\n", " #'tx dSlope_yEndT',\n", " 'ty tx dSlope_xEndT', #keep\n", " 'tx^2 dSlope_yEndT', # keep\n", " #'ty^2 dSlope_xEndT',\n", " #'ty^2 dSlope_yEndT', \n", " #'ty^2 dSlope_xEndT_abs',\n", " #'ty^2 tx dSlope_xEndT',\n", " #'ty tx^2 dSlope_yEndT',\n", " 'ty tx^2 dSlope_xEndT_abs', # keep\n", " 'ty^3 tx dSlope_xEndT', #keep\n", " #'ty tx^3 dSlope_xEndT',\n", " #'ty^3 dSlope_yEndT_abs',\n", "]\n", "do_not_keep = [\n", " 'dSlope_xEndT',\n", " 'dSlope_yEndT_abs',\n", " 'ty dSlope_xEndT',\n", " 'tx dSlope_xEndT',\n", " 'tx dSlope_xEndT_abs',\n", " 'tx dSlope_yEndT',\n", " 'ty^2 dSlope_xEndT',\n", " 'ty^3 dSlope_yEndT_abs',\n", " 'ty tx dSlope_yEndT',\n", " 'ty tx^3 dSlope_xEndT',\n", " 'ty tx^2 dSlope_yEndT',\n", "]\n", "reduce = True\n", "if reduce:\n", " remove = [i for i, f in enumerate(poly_features) if (keep and f not in keep )]\n", " X_train_model = np.delete( X_train_model, remove, axis=1)\n", " X_test_model = np.delete( X_test_model, remove, axis=1)\n", " poly_features = np.delete(poly_features, remove )\n", " print(poly_features)\n", "if not reduce:\n", " remove = [i for i, f in enumerate(poly_features) if (\"dSlope_\" not in f) or (\"EndT^\" in f) or (\"abs^\" in f) or (\"EndT dSlope\" in f) or (\"abs dSlope\" in f)]\n", " X_train_model = np.delete( X_train_model, remove, axis=1)\n", " X_test_model = np.delete( X_test_model, remove, axis=1)\n", " poly_features = np.delete(poly_features, remove )\n", " #print(poly_features)\n", " lin_reg = Lasso(fit_intercept=False, alpha=0.000001)\n", "else:\n", " lin_reg = LinearRegression(fit_intercept=False)\n", "lin_reg.fit( X_train_model, y_train)\n", "y_pred_test = lin_reg.predict( X_test_model )\n", "print(\"intercept=\", lin_reg.intercept_)\n", "print(\"coef=\", {k: v for k, v in zip(poly_features, lin_reg.coef_) if abs(v) > 1.0 and k not in keep and k not in do_not_keep})\n", "print(\"r2 score=\", lin_reg.score(X_test_model, y_test))\n", "print(\"RMSE =\", mean_squared_error(y_test, y_pred_test, squared=False))\n", "print(\"straight RMSE =\", mean_squared_error(array[\"y_l11\"], array[\"y\"] + array[\"ty\"] * ( array[\"z_l11\"] - array[\"z\"] ), squared=False))\n", "print(format_array(\"y_xEndT_diff\", lin_reg.coef_))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3.10.6 (conda)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.6" }, "orig_nbformat": 4, "vscode": { "interpreter": { "hash": "a2eff8b4da8b8eebf5ee2e5f811f31a557e0a202b4d2f04f849b065340a6eda6" } } }, "nbformat": 4, "nbformat_minor": 2 }