You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

212 lines
21 KiB

{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import uproot\n",
"import awkward as ak\n",
"import matplotlib\n",
"import matplotlib.pyplot as plt\n",
"input_tree = uproot.open({\"/work/guenther/reco_tuner/data/param_data_selected_all_p.root\": \"Selected\"})\n",
"array = input_tree.arrays()\n",
"array[\"dSlope_fringe\"] = array[\"tx_ref\"] - array[\"tx\"]\n",
"array[\"poqmag_gev\"] = 1. / ( array[\"signed_rel_current\"] * array[\"qop\"] * 1000. )\n",
"array[\"B_integral\"] = array[\"poqmag_gev\"] * array[\"dSlope_fringe\"]"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(array([1.00000e+00, 0.00000e+00, 0.00000e+00, 1.00000e+00, 0.00000e+00,\n",
" 0.00000e+00, 0.00000e+00, 0.00000e+00, 0.00000e+00, 0.00000e+00,\n",
" 0.00000e+00, 0.00000e+00, 0.00000e+00, 0.00000e+00, 1.00000e+00,\n",
" 1.00000e+00, 0.00000e+00, 0.00000e+00, 0.00000e+00, 0.00000e+00,\n",
" 0.00000e+00, 0.00000e+00, 1.00000e+00, 0.00000e+00, 0.00000e+00,\n",
" 2.00000e+00, 2.00000e+00, 4.00000e+00, 0.00000e+00, 3.00000e+00,\n",
" 0.00000e+00, 4.00000e+00, 3.00000e+00, 1.00000e+00, 4.00000e+00,\n",
" 4.00000e+00, 1.00000e+00, 1.00000e+01, 8.00000e+00, 8.00000e+00,\n",
" 2.30000e+01, 3.20000e+01, 6.30000e+01, 8.40000e+01, 1.22000e+02,\n",
" 2.03000e+02, 3.10000e+02, 3.98000e+02, 5.90000e+02, 8.65000e+02,\n",
" 1.08700e+03, 1.55300e+03, 2.01000e+03, 2.67400e+03, 3.49500e+03,\n",
" 4.46300e+03, 5.85300e+03, 7.16800e+03, 8.92900e+03, 1.12130e+04,\n",
" 1.37320e+04, 1.69420e+04, 2.09870e+04, 2.59060e+04, 3.24750e+04,\n",
" 3.94560e+04, 4.81480e+04, 6.21400e+04, 8.28540e+04, 1.20767e+05,\n",
" 2.15388e+05, 5.13195e+05, 5.20000e+02, 3.29000e+02, 1.83000e+02,\n",
" 1.32000e+02, 1.02000e+02, 8.30000e+01, 6.40000e+01, 5.00000e+01,\n",
" 2.70000e+01, 2.80000e+01, 2.20000e+01, 1.80000e+01, 2.00000e+01,\n",
" 1.50000e+01, 1.00000e+01, 4.00000e+00, 7.00000e+00, 6.00000e+00,\n",
" 5.00000e+00, 2.00000e+00, 2.00000e+00, 3.00000e+00, 0.00000e+00,\n",
" 2.00000e+00, 3.00000e+00, 0.00000e+00, 0.00000e+00, 2.00000e+00]),\n",
" array([-2.45885773, -2.44145641, -2.4240551 , -2.40665378, -2.38925247,\n",
" -2.37185115, -2.35444984, -2.33704853, -2.31964721, -2.3022459 ,\n",
" -2.28484458, -2.26744327, -2.25004195, -2.23264064, -2.21523933,\n",
" -2.19783801, -2.1804367 , -2.16303538, -2.14563407, -2.12823275,\n",
" -2.11083144, -2.09343013, -2.07602881, -2.0586275 , -2.04122618,\n",
" -2.02382487, -2.00642355, -1.98902224, -1.97162093, -1.95421961,\n",
" -1.9368183 , -1.91941698, -1.90201567, -1.88461436, -1.86721304,\n",
" -1.84981173, -1.83241041, -1.8150091 , -1.79760778, -1.78020647,\n",
" -1.76280516, -1.74540384, -1.72800253, -1.71060121, -1.6931999 ,\n",
" -1.67579858, -1.65839727, -1.64099596, -1.62359464, -1.60619333,\n",
" -1.58879201, -1.5713907 , -1.55398938, -1.53658807, -1.51918676,\n",
" -1.50178544, -1.48438413, -1.46698281, -1.4495815 , -1.43218018,\n",
" -1.41477887, -1.39737756, -1.37997624, -1.36257493, -1.34517361,\n",
" -1.3277723 , -1.31037098, -1.29296967, -1.27556836, -1.25816704,\n",
" -1.24076573, -1.22336441, -1.2059631 , -1.18856178, -1.17116047,\n",
" -1.15375916, -1.13635784, -1.11895653, -1.10155521, -1.0841539 ,\n",
" -1.06675258, -1.04935127, -1.03194996, -1.01454864, -0.99714733,\n",
" -0.97974601, -0.9623447 , -0.94494338, -0.92754207, -0.91014076,\n",
" -0.89273944, -0.87533813, -0.85793681, -0.8405355 , -0.82313418,\n",
" -0.80573287, -0.78833156, -0.77093024, -0.75352893, -0.73612761,\n",
" -0.7187263 ]),\n",
" <BarContainer object of 100 artists>)"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"plt.hist(array[\"B_integral\"], bins=100, log=True)"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['ty^2' 'tx^2' 'tx tx_ref' 'tx_ref^2' 'ty^4' 'ty^2 tx^2' 'ty^2 tx tx_ref'\n",
" 'ty^2 tx_ref^2' 'tx^4' 'tx^3 tx_ref' 'tx_ref^4']\n",
"intercept= -1.2094486121528516\n",
"coef= {'ty^2': -2.7897043324822492, 'tx^2': -0.35976930628193077, 'tx tx_ref': -0.47138558705675454, 'tx_ref^2': -0.5600847231491961, 'ty^4': 14.009315350693472, 'ty^2 tx^2': -16.162818973243674, 'ty^2 tx tx_ref': -8.807994419844437, 'ty^2 tx_ref^2': -0.8753190393972976, 'tx^4': 2.98254201374128, 'tx^3 tx_ref': 0.9625408279466898, 'tx_ref^4': 0.10200564097830103}\n",
"r2 score= 0.9916826041227943\n",
"RMSE = 0.006014471039836984\n",
"['ty^2', 'tx^2', 'tx tx_ref', 'tx_ref^2', 'ty^4', 'ty^2 tx^2', 'ty^2 tx tx_ref', 'ty^2 tx_ref^2', 'tx^4', 'tx^3 tx_ref', 'tx_ref^4']\n"
]
}
],
"source": [
"from sklearn.preprocessing import PolynomialFeatures\n",
"from sklearn.linear_model import LinearRegression, Lasso, Ridge\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.pipeline import Pipeline\n",
"from sklearn.metrics import mean_squared_error\n",
"import numpy as np\n",
"\n",
"features = [\n",
" \"ty\", \n",
" \"tx\",\n",
" \"tx_ref\",\n",
"]\n",
"target_feat = \"B_integral\"\n",
"\n",
"data = np.column_stack([ak.to_numpy(array[feat]) for feat in features])\n",
"target = ak.to_numpy(array[target_feat])\n",
"X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.2, random_state=42)\n",
"\n",
"poly = PolynomialFeatures(degree=5, include_bias=False)\n",
"X_train_model = poly.fit_transform( X_train )\n",
"X_test_model = poly.fit_transform( X_test )\n",
"poly_features = poly.get_feature_names_out(input_features=features)\n",
"reduce = True\n",
"if reduce:\n",
" keep = [\n",
" #'ty',\n",
" #'tx',\n",
" #'tx_ref',\n",
" #'ty tx',\n",
" #'ty tx_ref',\n",
" 'ty^2',#keep\n",
" 'tx^2',#keep\n",
" 'tx tx_ref',#keep\n",
" 'tx_ref^2',#keep\n",
" 'ty^2 tx tx_ref',#keep\n",
" 'ty^2 tx^2',#keep\n",
" 'ty^2 tx_ref^2', #keep\n",
" 'tx^4',#keep\n",
" 'ty^4',#keep\n",
" 'tx_ref^4',#keep\n",
" #'tx_ref^5',\n",
" 'tx^3 tx_ref', #keep\n",
" #'tx tx_ref^3',\n",
" #'tx^2 tx_ref^2',\n",
" #'ty tx_ref^4',\n",
" #'tx tx_ref^4',\n",
" #'tx_ref^5',\n",
" ]\n",
" remove = [i for i, f in enumerate(poly_features) if (keep and f not in keep )]\n",
" X_train_model = np.delete( X_train_model, remove, axis=1)\n",
" X_test_model = np.delete( X_test_model, remove, axis=1)\n",
" poly_features = np.delete(poly_features, remove )\n",
" print(poly_features)\n",
"if not reduce:\n",
" lin_reg = Lasso( alpha=0.0000001)#Lasso(fit_intercept=False, alpha=0.001)\n",
"else:\n",
" lin_reg = LinearRegression()\n",
"lin_reg.fit( X_train_model, y_train)\n",
"y_pred_test = lin_reg.predict( X_test_model )\n",
"print(\"intercept=\", lin_reg.intercept_)\n",
"print(\"coef=\", dict(zip(poly_features, lin_reg.coef_)))\n",
"print(\"r2 score=\", lin_reg.score(X_test_model, y_test))\n",
"print(\"RMSE =\", mean_squared_error(y_test, y_pred_test, squared=False))\n",
"print([key for key, val in dict(zip(poly_features, lin_reg.coef_)).items() if val != 0.0])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3.10.6 (conda)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.6"
},
"orig_nbformat": 4,
"vscode": {
"interpreter": {
"hash": "a2eff8b4da8b8eebf5ee2e5f811f31a557e0a202b4d2f04f849b065340a6eda6"
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}