You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

308 lines
82 KiB

10 months ago
  1. {
  2. "cells": [
  3. {
  4. "cell_type": "code",
  5. "execution_count": 1,
  6. "metadata": {},
  7. "outputs": [
  8. {
  9. "name": "stdout",
  10. "output_type": "stream",
  11. "text": [
  12. "[1, -1, 1, -1, 1, -1, -1, 1, -1, -1, 1, -1, ... 1, -1, 1, -1, 1, -1, 1, -1, 1, 1, 1]\n",
  13. "[1, -1, 1, -1, 1, -1, -1, 1, -1, -1, 1, -1, ... 1, -1, 1, -1, 1, -1, 1, -1, 1, 1, 1]\n"
  14. ]
  15. },
  16. {
  17. "data": {
  18. "text/plain": [
  19. "(array([ 8228., 9035., 9624., 10447., 11068., 11652., 12633., 13575.,\n",
  20. " 14666., 15612., 16815., 18180., 19541., 21065., 22981., 24949.,\n",
  21. " 27515., 29858., 32704., 35823., 38704., 41355., 41764., 32588.,\n",
  22. " 9430., 9730., 33115., 42543., 41654., 38601., 35860., 32884.,\n",
  23. " 29817., 27346., 25018., 23023., 21451., 19626., 18003., 16743.,\n",
  24. " 15334., 14789., 13454., 12433., 11604., 10882., 10287., 9608.,\n",
  25. " 8794., 8285.]),\n",
  26. " array([-1200., -1152., -1104., -1056., -1008., -960., -912., -864.,\n",
  27. " -816., -768., -720., -672., -624., -576., -528., -480.,\n",
  28. " -432., -384., -336., -288., -240., -192., -144., -96.,\n",
  29. " -48., 0., 48., 96., 144., 192., 240., 288.,\n",
  30. " 336., 384., 432., 480., 528., 576., 624., 672.,\n",
  31. " 720., 768., 816., 864., 912., 960., 1008., 1056.,\n",
  32. " 1104., 1152., 1200.]),\n",
  33. " <BarContainer object of 50 artists>)"
  34. ]
  35. },
  36. "execution_count": 1,
  37. "metadata": {},
  38. "output_type": "execute_result"
  39. },
  40. {
  41. "data": {
  42. "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjoAAAGdCAYAAAAbudkLAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjYuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy89olMNAAAACXBIWXMAAA9hAAAPYQGoP6dpAAA2eElEQVR4nO3df1BU973/8Rflx4oUzuVHYNlKrZ0q1aK5udgA2lYTFXREmsa5mpLZ0V6LyRglRJi0JnNvTKdK4s/01ltrvU5s1JTMXGvbXA0Fp5GUEfxBwhTU2HRiIjasGF0W9dKFkPP9I1/PZMUYURT5+HzMnBn3nPee/ZzPGveVzzmfc8Js27YFAABgoC8MdAMAAABuFoIOAAAwFkEHAAAYi6ADAACMRdABAADGIugAAABjEXQAAICxCDoAAMBYEQPdgIH08ccf64MPPlBsbKzCwsIGujkAAOAa2Lat8+fPy+Px6AtfuPqYzR0ddD744AOlpaUNdDMAAMB1aGlp0bBhw65ac0cHndjYWEmfdFRcXNwAtwYAAFyLjo4OpaWlOb/jV3NHB51Lp6vi4uIIOgAADDLXctkJFyMDAABjEXQAAICxCDoAAMBYBB0AAGAsgg4AADAWQQcAABiLoAMAAIxF0AEAAMYi6AAAAGMRdAAAgLEIOgAAwFgEHQAAYCyCDgAAMBZBBwAAGCtioBsAALe79dV//dyaJ6aNugUtAdBXjOgAAABjEXQAAICxCDoAAMBYBB0AAGAsgg4AADAWQQcAABiL6eUAbimmagO4lQg6ANAPCHDA7YlTVwAAwFiM6ADoN9cyqgEAtxIjOgAAwFgEHQAAYCyCDgAAMBZBBwAAGIugAwAAjEXQAQAAxrqhoFNeXq6wsDCVlJQ462zb1vLly+XxeBQdHa3JkyfryJEjIe8LBoNasmSJkpKSFBMTo4KCAp06dSqkxu/3y+v1yrIsWZYlr9er9vb2kJqTJ09q1qxZiomJUVJSkoqLi9XV1XUjhwQAAAxy3UHn0KFD+tWvfqVx48aFrF+1apXWrVunDRs26NChQ3K73Zo2bZrOnz/v1JSUlGjXrl2qqKhQbW2tLly4oPz8fPX09Dg1hYWFamxsVGVlpSorK9XY2Civ1+ts7+np0cyZM3Xx4kXV1taqoqJCO3fuVGlp6fUeEgAAMMx1BZ0LFy7o4Ycf1ubNmxUfH++st21bL7zwgp5++mk9+OCDysjI0K9//Wv93//9n15++WVJUiAQ0JYtW7R27VpNnTpV99xzj7Zv366mpibt3btXknTs2DFVVlbqv//7v5WTk6OcnBxt3rxZ//u//6vjx49LkqqqqnT06FFt375d99xzj6ZOnaq1a9dq8+bN6ujouNF+AQAABriuoPPYY49p5syZmjp1asj6EydOyOfzKTc311nncrk0adIk7d+/X5LU0NCg7u7ukBqPx6OMjAynpq6uTpZlKSsry6nJzs6WZVkhNRkZGfJ4PE5NXl6egsGgGhoartjuYDCojo6OkAUAAJirz4+AqKio0JtvvqlDhw712ubz+SRJKSkpIetTUlL0/vvvOzVRUVEhI0GXai693+fzKTk5udf+k5OTQ2ou/5z4+HhFRUU5NZcrLy/Xs88+ey2HCQAADNCnEZ2WlhY9/vjj2r59u4YMGfKZdWFhYSGvbdvute5yl9dcqf56aj5t2bJlCgQCztLS0nLVNgEAgMGtT0GnoaFBbW1tyszMVEREhCIiIlRTU6P//M//VEREhDPCcvmISltbm7PN7Xarq6tLfr//qjWnT5/u9flnzpwJqbn8c/x+v7q7u3uN9FzicrkUFxcXsgAAAHP1KehMmTJFTU1NamxsdJbx48fr4YcfVmNjo7761a/K7XarurraeU9XV5dqamo0YcIESVJmZqYiIyNDalpbW9Xc3OzU5OTkKBAI6ODBg07NgQMHFAgEQmqam5vV2trq1FRVVcnlcikzM/M6ugIAAJimT9foxMbGKiMjI2RdTEyMEhMTnfUlJSVauXKlRo4cqZEjR2rlypUaOnSoCgsLJUmWZWnBggUqLS1VYmKiEhISVFZWprFjxzoXN48ePVrTp09XUVGRNm3aJElauHCh8vPzlZ6eLknKzc3VmDFj5PV6tXr1ap07d05lZWUqKipipAYAAEi6jouRP8+TTz6pzs5OLVq0SH6/X1lZWaqqqlJsbKxTs379ekVERGjOnDnq7OzUlClTtHXrVoWHhzs1O3bsUHFxsTM7q6CgQBs2bHC2h4eHa/fu3Vq0aJEmTpyo6OhoFRYWas2aNf19SAAAYJAKs23bHuhGDJSOjg5ZlqVAIMAoENAP1lf/tV/288S0Uf2yn/5i6nEBg1Vffr951hUAADAWQQcAABiLoAMAAIxF0AEAAMbq91lXAMzUXxfkAsCtRNABgFvkWsIiM7OA/sWpKwAAYCyCDgAAMBZBBwAAGIugAwAAjEXQAQAAxiLoAAAAYxF0AACAsQg6AADAWAQdAABgLIIOAAAwFkEHAAAYi6ADAACMRdABAADGIugAAABjEXQAAICxCDoAAMBYBB0AAGAsgg4AADAWQQcAABiLoAMAAIxF0AEAAMYi6AAAAGNFDHQDAAy89dV/Hegm4P+7lu/iiWmjbkFLADMwogMAAIxF0AEAAMbqU9DZuHGjxo0bp7i4OMXFxSknJ0evvfaas33+/PkKCwsLWbKzs0P2EQwGtWTJEiUlJSkmJkYFBQU6depUSI3f75fX65VlWbIsS16vV+3t7SE1J0+e1KxZsxQTE6OkpCQVFxerq6urj4cPAABM1qegM2zYMD333HM6fPiwDh8+rPvvv1/f/e53deTIEadm+vTpam1tdZY9e/aE7KOkpES7du1SRUWFamtrdeHCBeXn56unp8epKSwsVGNjoyorK1VZWanGxkZ5vV5ne09Pj2bOnKmLFy+qtrZWFRUV2rlzp0pLS6+3HwAAgIH6dDHyrFmzQl6vWLFCGzduVH19vb7xjW9Iklwul9xu9xXfHwgEtGXLFm3btk1Tp06VJG3fvl1paWnau3ev8vLydOzYMVVWVqq+vl5ZWVmSpM2bNysnJ0fHjx9Xenq6qqqqdPToUbW0tMjj8UiS1q5dq/nz52vFihWKi4vrWy8AAAAjXfc1Oj09PaqoqNDFixeVk5PjrN+3b5+Sk5M1atQoFRUVqa2tzdnW0NCg7u5u5ebmOus8Ho8yMjK0f/9+SVJdXZ0sy3JCjiRlZ2fLsqyQmoyMDCfkSFJeXp6CwaAaGho+s83BYFAdHR0hCwAAMFefg05TU5O++MUvyuVy6dFHH9WuXbs0ZswYSdKMGTO0Y8cO/elPf9LatWt16NAh3X///QoGg5Ikn8+nqKgoxcfHh+wzJSVFPp/PqUlOTu71ucnJySE1KSkpIdvj4+MVFRXl1FxJeXm5c92PZVlKS0vr6+EDAIBBpM/30UlPT1djY6Pa29u1c+dOzZs3TzU1NRozZozmzp3r1GVkZGj8+PEaPny4du/erQcffPAz92nbtsLCwpzXn/7zjdRcbtmyZVq6dKnzuqOjg7ADAIDB+jyiExUVpa997WsaP368ysvLdffdd+tnP/vZFWtTU1M1fPhwvfPOO5Ikt9utrq4u+f3+kLq2tjZnhMbtduv06dO99nXmzJmQmstHbvx+v7q7u3uN9Hyay+VyZoxdWgAAgLlu+D46tm07p6Yud/bsWbW0tCg1NVWSlJmZqcjISFVXVzs1ra2tam5u1oQJEyRJOTk5CgQCOnjwoFNz4MABBQKBkJrm5ma1trY6NVVVVXK5XMrMzLzRQwIAAIbo06mrp556SjNmzFBaWprOnz+viooK7du3T5WVlbpw4YKWL1+u2bNnKzU1Ve+9956eeuopJSUl6Xvf+54kybIsLViwQKWlpUpMTFRCQoLKyso0duxYZxbW6NGjNX36dBUVFWnTpk2SpIULFyo/P1/p6emSpNzcXI0ZM0Zer1erV6/WuXPnVFZWpqKiIkZpAACAo09B5/Tp0/J6vWptbZVlWRo3bpwqKys1bdo0dXZ2qqmpSS+99JLa29uVmpqq++67T6+88opiY2O
  43. "text/plain": [
  44. "<Figure size 640x480 with 1 Axes>"
  45. ]
  46. },
  47. "metadata": {},
  48. "output_type": "display_data"
  49. }
  50. ],
  51. "source": [
  52. "import uproot\n",
  53. "import awkward as ak\n",
  54. "import matplotlib\n",
  55. "import matplotlib.pyplot as plt\n",
  56. "input_tree = uproot.open({\"/work/guenther/reco_tuner/data/param_data_selected_all_p.root\": \"Selected\"})\n",
  57. "array = input_tree.arrays()\n",
  58. "array[\"dSlope_fringe\"] = array[\"tx_ref\"] - array[\"tx\"]\n",
  59. "array[\"z_mag_x_fringe\"] = (array[\"x\"] - array[\"x_ref\"] - array[\"tx\"] * array[\"z\"] + array[\"tx_ref\"] * array[\"z_ref\"] ) / array[\"dSlope_fringe\"]\n",
  60. "array[\"x_straight_diff_ref\"] = array[\"x\"] + array[\"tx\"] * (array[\"z_ref\"] - array[\"z\"]) - array[\"x_ref\"]\n",
  61. "array[\"x_straight_diff_ref_abs\"] = abs(array[\"x_straight_diff_ref\"])\n",
  62. "array[\"inv_p_gev\"] = 1000. / array[\"p\"]\n",
  63. "array[\"pol_qop_gev\"] = array[\"signed_rel_current\"] * array[\"qop\"] * 1000.\n",
  64. "array[\"qop_gev\"] = array[\"qop\"] * 1000.\n",
  65. "array[\"deflection_sign\"] = array[\"pol_qop_gev\"] / array[\"inv_p_gev\"]\n",
  66. "print(array[\"deflection_sign\"])\n",
  67. "print(array[\"x_straight_diff_ref\"] / array[\"x_straight_diff_ref_abs\"])\n",
  68. "plt.figure()\n",
  69. "plt.hist(array[\"x_straight_diff_ref\"], bins=50,\n",
  70. " range=[-1200, 1200], alpha=0.5)"
  71. ]
  72. },
  73. {
  74. "cell_type": "code",
  75. "execution_count": 2,
  76. "metadata": {},
  77. "outputs": [
  78. {
  79. "name": "stdout",
  80. "output_type": "stream",
  81. "text": [
  82. "['inv_p_gev' 'ty^2 inv_p_gev' 'tx^2 inv_p_gev' 'tx inv_p_gev pol_qop_gev'\n",
  83. " 'inv_p_gev^3' 'tx^3 pol_qop_gev' 'tx^2 inv_p_gev^2'\n",
  84. " 'tx inv_p_gev^2 pol_qop_gev' 'inv_p_gev^4' 'ty^2 tx^2 inv_p_gev'\n",
  85. " 'ty^2 tx inv_p_gev pol_qop_gev' 'ty^2 inv_p_gev^3' 'tx^4 inv_p_gev']\n",
  86. "intercept= 0.0\n",
  87. "coef= {'inv_p_gev': 4018.896625676043, 'ty^2 inv_p_gev': 6724.789549369031, 'tx^2 inv_p_gev': 3970.9093976497766, 'tx inv_p_gev pol_qop_gev': -4363.5807241252905, 'inv_p_gev^3': 1421.1056758688073, 'tx^3 pol_qop_gev': 4934.07761471779, 'tx^2 inv_p_gev^2': 6985.252911263751, 'tx inv_p_gev^2 pol_qop_gev': -5538.28013195104, 'inv_p_gev^4': 1642.8616070452542, 'ty^2 tx^2 inv_p_gev': 106068.96918885755, 'ty^2 tx inv_p_gev pol_qop_gev': -94446.81037767915, 'ty^2 inv_p_gev^3': 26489.793756692892, 'tx^4 inv_p_gev': -23936.54391006025}\n",
  88. "r2 score= 0.999838251106393\n",
  89. "RMSE = 6.897104452420935\n",
  90. "['inv_p_gev', 'ty^2 inv_p_gev', 'tx^2 inv_p_gev', 'tx inv_p_gev pol_qop_gev', 'inv_p_gev^3', 'tx^3 pol_qop_gev', 'tx^2 inv_p_gev^2', 'tx inv_p_gev^2 pol_qop_gev', 'inv_p_gev^4', 'ty^2 tx^2 inv_p_gev', 'ty^2 tx inv_p_gev pol_qop_gev', 'ty^2 inv_p_gev^3', 'tx^4 inv_p_gev']\n"
  91. ]
  92. }
  93. ],
  94. "source": [
  95. "from sklearn.preprocessing import PolynomialFeatures\n",
  96. "from sklearn.linear_model import LinearRegression, Lasso, Ridge\n",
  97. "from sklearn.model_selection import train_test_split\n",
  98. "from sklearn.pipeline import Pipeline\n",
  99. "from sklearn.metrics import mean_squared_error\n",
  100. "import numpy as np\n",
  101. "\n",
  102. "features = [\n",
  103. " \"ty\", \n",
  104. " \"tx\",\n",
  105. " \"inv_p_gev\",\n",
  106. " \"pol_qop_gev\"\n",
  107. "]\n",
  108. "target_feat = \"x_straight_diff_ref_abs\"\n",
  109. "\n",
  110. "data = np.column_stack([ak.to_numpy(array[feat]) for feat in features])\n",
  111. "target = ak.to_numpy(array[target_feat])\n",
  112. "X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.2, random_state=42)\n",
  113. "\n",
  114. "poly = PolynomialFeatures(degree=5, include_bias=False)\n",
  115. "X_train_model = poly.fit_transform( X_train )\n",
  116. "X_test_model = poly.fit_transform( X_test )\n",
  117. "poly_features = poly.get_feature_names_out(input_features=features)\n",
  118. "reduce = True\n",
  119. "if reduce:\n",
  120. " keep = [\n",
  121. " 'inv_p_gev', \n",
  122. " 'ty^2 inv_p_gev', \n",
  123. " 'tx^2 inv_p_gev', \n",
  124. " 'tx inv_p_gev pol_qop_gev', \n",
  125. " 'inv_p_gev^3', \n",
  126. " 'tx^3 pol_qop_gev', \n",
  127. " 'tx^2 inv_p_gev^2',\n",
  128. " 'tx inv_p_gev^2 pol_qop_gev',\n",
  129. " 'inv_p_gev^4', \n",
  130. " 'ty^2 tx^2 inv_p_gev', \n",
  131. " 'ty^2 tx inv_p_gev pol_qop_gev',\n",
  132. " 'ty^2 inv_p_gev^3', \n",
  133. " 'tx^4 inv_p_gev', \n",
  134. " ]\n",
  135. " remove = [i for i, f in enumerate(poly_features) if (keep and f not in keep ) or \"p_gev\" not in f]\n",
  136. " X_train_model = np.delete( X_train_model, remove, axis=1)\n",
  137. " X_test_model = np.delete( X_test_model, remove, axis=1)\n",
  138. " poly_features = np.delete(poly_features, remove )\n",
  139. " print(poly_features)\n",
  140. "\n",
  141. "#lin_reg = Lasso( fit_intercept=False, alpha=0.00001)#Lasso(fit_intercept=False, alpha=0.001)\n",
  142. "lin_reg = LinearRegression( fit_intercept=False)\n",
  143. "lin_reg.fit( X_train_model, y_train)\n",
  144. "y_pred_test = lin_reg.predict( X_test_model )\n",
  145. "print(\"intercept=\", lin_reg.intercept_)\n",
  146. "print(\"coef=\", dict(zip(poly_features, lin_reg.coef_)))\n",
  147. "print(\"r2 score=\", lin_reg.score(X_test_model, y_test))\n",
  148. "print(\"RMSE =\", mean_squared_error(y_test, y_pred_test, squared=False))\n",
  149. "print([key for key, val in dict(zip(poly_features, lin_reg.coef_)).items() if val != 0.0])"
  150. ]
  151. },
  152. {
  153. "cell_type": "code",
  154. "execution_count": 3,
  155. "metadata": {},
  156. "outputs": [
  157. {
  158. "data": {
  159. "text/plain": [
  160. "(array([ 165., 160., 184., 192., 194., 215., 235., 258.,\n",
  161. " 266., 249., 245., 284., 279., 318., 316., 339.,\n",
  162. " 359., 316., 383., 442., 415., 457., 456., 520.,\n",
  163. " 555., 561., 616., 614., 698., 743., 818., 883.,\n",
  164. " 957., 1053., 1153., 1285., 1539., 1671., 1931., 2327.,\n",
  165. " 2809., 3299., 4006., 4818., 5850., 6966., 8973., 11792.,\n",
  166. " 15176., 22655., 61283., 26651., 5480., 2436., 1872., 1632.,\n",
  167. " 1423., 1285., 1223., 1165., 1046., 1022., 928., 872.,\n",
  168. " 811., 742., 763., 741., 717., 712., 635., 649.,\n",
  169. " 614., 579., 544., 488., 473., 480., 431., 427.,\n",
  170. " 424., 413., 398., 362., 349., 306., 297., 295.,\n",
  171. " 269., 257., 231., 246., 208., 211., 221., 207.,\n",
  172. " 174., 187., 179., 188.]),\n",
  173. " array([-5. , -4.9, -4.8, -4.7, -4.6, -4.5, -4.4, -4.3, -4.2, -4.1, -4. ,\n",
  174. " -3.9, -3.8, -3.7, -3.6, -3.5, -3.4, -3.3, -3.2, -3.1, -3. , -2.9,\n",
  175. " -2.8, -2.7, -2.6, -2.5, -2.4, -2.3, -2.2, -2.1, -2. , -1.9, -1.8,\n",
  176. " -1.7, -1.6, -1.5, -1.4, -1.3, -1.2, -1.1, -1. , -0.9, -0.8, -0.7,\n",
  177. " -0.6, -0.5, -0.4, -0.3, -0.2, -0.1, 0. , 0.1, 0.2, 0.3, 0.4,\n",
  178. " 0.5, 0.6, 0.7, 0.8, 0.9, 1. , 1.1, 1.2, 1.3, 1.4, 1.5,\n",
  179. " 1.6, 1.7, 1.8, 1.9, 2. , 2.1, 2.2, 2.3, 2.4, 2.5, 2.6,\n",
  180. " 2.7, 2.8, 2.9, 3. , 3.1, 3.2, 3.3, 3.4, 3.5, 3.6, 3.7,\n",
  181. " 3.8, 3.9, 4. , 4.1, 4.2, 4.3, 4.4, 4.5, 4.6, 4.7, 4.8,\n",
  182. " 4.9, 5. ]),\n",
  183. " <BarContainer object of 100 artists>)"
  184. ]
  185. },
  186. "execution_count": 3,
  187. "metadata": {},
  188. "output_type": "execute_result"
  189. },
  190. {
  191. "data": {
  192. "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjoAAAGdCAYAAAAbudkLAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjYuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy89olMNAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAqEElEQVR4nO3df1DU94H/8dcKskEKn6IEtjtBw/UYToJpGswgmlZzKpgBqb2bmh65vXjjYTyMhAqjsf2jNtMDq0ZzV0ajuZvaJsbt9DzT3qkUbu5Kj1HU0DINGnPNxRRUEI3rgg7Zpfj5/pHx8+2CNa6/Vt55PmY+f/D+vHY/788nafbV935212Xbti0AAAADjYv1BAAAAO4Uig4AADAWRQcAABiLogMAAIxF0QEAAMai6AAAAGNRdAAAgLEoOgAAwFjxsZ5ALF25ckVnzpxRcnKyXC5XrKcDAABugG3bGhgYkNfr1bhx11+z+VQXnTNnzigzMzPW0wAAADehu7tbDzzwwHUzn+qik5ycLOnjC5WSkhLj2QAAgBvR39+vzMxM53X8ej7VRefq21UpKSkUHQAAxpgbue2Em5EBAICxKDoAAMBYFB0AAGAsig4AADAWRQcAABiLogMAAIxF0QEAAMai6AAAAGNRdAAAgLEoOgAAwFgUHQAAYCyKDgAAMBZFBwAAGIuiAwAAjBUf6wkAwJ3y4Av7Ro19sL4kBjMBECus6AAAAGNRdAAAgLGiLjqnT5/WX//1X2vSpEmaMGGCHnnkEbW3tzv7bdvWunXr5PV6lZiYqDlz5ujYsWMRzxEKhbRy5UqlpaUpKSlJZWVlOnXqVEQmEAjI5/PJsixZliWfz6eLFy9GZLq6urRw4UIlJSUpLS1NVVVVCofD0Z4SAAAwVFRFJxAIaNasWRo/frwOHDig48eP66WXXtJnP/tZJ7NhwwZt3rxZDQ0NOnr0qDwej+bPn6+BgQEnU11drb1798rv96u1tVWXLl1SaWmphoeHnUx5ebk6OjrU2NioxsZGdXR0yOfzOfuHh4dVUlKiy5cvq7W1VX6/X3v27FFNTc0tXA4AAGAUOwpr1qyxH3/88T+6/8qVK7bH47HXr1/vjH300Ue2ZVn2K6+8Ytu2bV+8eNEeP3687ff7nczp06ftcePG2Y2NjbZt2/bx48dtSXZbW5uTOXTokC3JPnHihG3btr1//3573Lhx9unTp53M7t27bbfbbQeDwRs6n2AwaEu64TyAsWXKmv8YtQEY+6J5/Y5qRednP/uZpk+frq997WtKT0/XF7/4Rb366qvO/pMnT6q3t1dFRUXOmNvt1uzZs3Xw4EFJUnt7u4aGhiIyXq9XeXl5TubQoUOyLEsFBQVOZsaMGbIsKyKTl5cnr9frZIqLixUKhSLeSgMAAJ9eURWd999/X9u2bVN2drZ+/vOfa/ny5aqqqtKPfvQjSVJvb68kKSMjI+JxGRkZzr7e3l4lJCQoNTX1upn09PRRx09PT4/IjDxOamqqEhISnMxIoVBI/f39ERsAADBXVN+jc+XKFU2fPl11dXWSpC9+8Ys6duyYtm3bpr/5m79xci6XK+Jxtm2PGhtpZOZa+ZvJ/KH6+np95zvfue48AACAOaJa0fnc5z6n3NzciLGpU6eqq6tLkuTxeCRp1IpKX1+fs/ri8XgUDocVCASumzl79uyo4587dy4iM/I4gUBAQ0NDo1Z6rlq7dq2CwaCzdXd339B5AwCAsSmqojNr1iy9++67EWP/+7//qylTpkiSsrKy5PF41Nzc7OwPh8NqaWnRzJkzJUn5+fkaP358RKanp0ednZ1OprCwUMFgUEeOHHEyhw8fVjAYjMh0dnaqp6fHyTQ1Ncntdis/P/+a83e73UpJSYnYAACAuaJ66+ob3/iGZs6cqbq6Oi1evFhHjhzRjh07tGPHDkkfv5VUXV2turo6ZWdnKzs7W3V1dZowYYLKy8slSZZlaenSpaqpqdGkSZM0ceJE1dbWatq0aZo3b56kj1eJFixYoIqKCm3fvl2StGzZMpWWlionJ0eSVFRUpNzcXPl8Pm3cuFEXLlxQbW2tKioqKDAAAEBSlEXnscce0969e7V27Vq9+OKLysrK0ssvv6ynn37ayaxevVqDg4OqrKxUIBBQQUGBmpqalJyc7GS2bNmi+Ph4LV68WIODg5o7d6527typuLg4J7Nr1y5VVVU5n84qKytTQ0ODsz8uLk779u1TZWWlZs2apcTERJWXl2vTpk03fTEAAIBZXLZt27GeRKz09/fLsiwFg0FWgQAD8aOegJmief3mt64AAICxKDoAAMBYFB0AAGAsig4AADAWRQcAABiLogMAAIxF0QEAAMai6AAAAGNRdAAAgLEoOgAAwFgUHQAAYCyKDgAAMBZFBwAAGIuiAwAAjEXRAQAAxqLoAAAAY1F0AACAsSg6AADAWBQdAABgLIoOAAAwFkUHAAAYi6IDAACMRdEBAADGougAAABjUXQAAICxKDoAAMBYFB0AAGAsig4AADAWRQcAABiLogMAAIxF0QEAAMai6AAAAGNRdAAAgLEoOgAAwFgUHQAAYCyKDgAAMBZFBwAAGIuiAwAAjEXRAQAAxqLoAAAAY1F0AACAsSg6AADAWBQdAABgLIoOAAAwFkUHAAAYi6IDAACMRdEBAADGiqrorFu3Ti6XK2LzeDzOftu2tW7dOnm9XiUmJmrOnDk6duxYxHOEQiGtXLlSaWlpSkpKUllZmU6dOhWRCQQC8vl8sixLlmXJ5/Pp4sWLEZmuri4tXLhQSUlJSktLU1VVlcLhcJSnDwAATBb1is5DDz2knp4eZ3v77bedfRs2bNDmzZvV0NCgo0ePyuPxaP78+RoYGHAy1dXV2rt3r/x+v1pbW3Xp0iWVlpZqeHjYyZSXl6ujo0ONjY1qbGxUR0eHfD6fs394eFglJSW6fPmyWltb5ff7tWfPHtXU1NzsdQAAAAaKj/oB8fERqzhX2batl19+Wd/61rf0F3/xF5KkH/7wh8rIyNAbb7yhZ599VsFgUP/yL/+i1157TfPmzZMkvf7668rMzNR//ud/qri4WO+8844aGxvV1tamgoICSdKrr76qwsJCvfvuu8rJyVFTU5OOHz+u7u5ueb1eSdJLL72kJUuW6B/+4R+UkpJy0xcEAACYI+oVnd/+9rfyer3KysrS17/+db3//vuSpJMnT6q3t1dFRUVO1u12a/bs2Tp48KAkqb29XUNDQxEZr9ervLw8J3Po0CFZluWUHEmaMWOGLMuKyOTl5TklR5KKi4sVCoXU3t7+R+ceCoXU398fsQEAAHNFVXQKCgr0ox/9SD//+c/16quvqre3VzNnztSHH36o3t5eSVJGRkbEYzIyMpx9vb29SkhIUGpq6nUz6enpo46dnp4ekRl5nNTUVCUkJDiZa6mvr3fu+7EsS5mZmdGcPgAAGGOiKjpPPvmk/vIv/1LTpk3TvHnztG/fPkkfv0V1lcvliniMbdujxkYamblW/mYyI61du1bBYNDZuru7rzsvAAAwtt3Sx8uTkpI0bdo0/fa3v3Xu2xm5otLX1+esvng8HoXDYQUCgetmzp49O+pY586di8iMPE4gENDQ0NColZ4/5Ha7lZKSErEBAABz3VLRCYVCeuedd/S5z31OWVlZ8ng8am5udvaHw2G1tLRo5syZkqT8/HyNHz8+ItPT06POzk4nU1hYqGAwqCNHjjiZw4cPKxgMRmQ6OzvV09PjZJqamuR2u5Wfn38rpwQAAAwS1aeuamtrtXDhQk2ePFl9fX367ne/q/7+fj3zzDNyuVyqrq5WXV2dsrOzlZ2drbq6Ok2YMEHl5eWSJMuytHTpUtXU1GjSpEmaOHGiamtrnbfCJGnq1KlasGCBKioqtH37dknSsmXLVFpaqpycHElSUVGRcnNz5fP5tHHjRl24cEG1tbWqqKhglQYAADiiKjqnTp3SX/3VX+n8+fO6//77NWPGDLW1tWnKlCmSpNWrV2twcFCVlZUKBAIqKChQU1OTkpOTnefYsmWL4uPjtXjxYg0ODmru3LnauXOn4uLinMyuXbtUVVXlfDq
  193. "text/plain": [
  194. "<Figure size 640x480 with 1 Axes>"
  195. ]
  196. },
  197. "metadata": {},
  198. "output_type": "display_data"
  199. }
  200. ],
  201. "source": [
  202. "plt.hist(y_pred_test-y_test, bins=100, range=[-5,5])"
  203. ]
  204. },
  205. {
  206. "cell_type": "code",
  207. "execution_count": 4,
  208. "metadata": {},
  209. "outputs": [
  210. {
  211. "data": {
  212. "text/plain": [
  213. "(array([[nan, nan, nan, ..., nan, nan, nan],\n",
  214. " [nan, nan, nan, ..., nan, nan, nan],\n",
  215. " [nan, nan, nan, ..., nan, nan, nan],\n",
  216. " ...,\n",
  217. " [nan, nan, nan, ..., nan, nan, nan],\n",
  218. " [nan, nan, nan, ..., nan, nan, nan],\n",
  219. " [nan, nan, nan, ..., nan, nan, nan]]),\n",
  220. " array([-60. , -58.8, -57.6, -56.4, -55.2, -54. , -52.8, -51.6, -50.4,\n",
  221. " -49.2, -48. , -46.8, -45.6, -44.4, -43.2, -42. , -40.8, -39.6,\n",
  222. " -38.4, -37.2, -36. , -34.8, -33.6, -32.4, -31.2, -30. , -28.8,\n",
  223. " -27.6, -26.4, -25.2, -24. , -22.8, -21.6, -20.4, -19.2, -18. ,\n",
  224. " -16.8, -15.6, -14.4, -13.2, -12. , -10.8, -9.6, -8.4, -7.2,\n",
  225. " -6. , -4.8, -3.6, -2.4, -1.2, 0. , 1.2, 2.4, 3.6,\n",
  226. " 4.8, 6. , 7.2, 8.4, 9.6, 10.8, 12. , 13.2, 14.4,\n",
  227. " 15.6, 16.8, 18. , 19.2, 20.4, 21.6, 22.8, 24. , 25.2,\n",
  228. " 26.4, 27.6, 28.8, 30. , 31.2, 32.4, 33.6, 34.8, 36. ,\n",
  229. " 37.2, 38.4, 39.6, 40.8, 42. , 43.2, 44.4, 45.6, 46.8,\n",
  230. " 48. , 49.2, 50.4, 51.6, 52.8, 54. , 55.2, 56.4, 57.6,\n",
  231. " 58.8, 60. ]),\n",
  232. " array([-200., -196., -192., -188., -184., -180., -176., -172., -168.,\n",
  233. " -164., -160., -156., -152., -148., -144., -140., -136., -132.,\n",
  234. " -128., -124., -120., -116., -112., -108., -104., -100., -96.,\n",
  235. " -92., -88., -84., -80., -76., -72., -68., -64., -60.,\n",
  236. " -56., -52., -48., -44., -40., -36., -32., -28., -24.,\n",
  237. " -20., -16., -12., -8., -4., 0., 4., 8., 12.,\n",
  238. " 16., 20., 24., 28., 32., 36., 40., 44., 48.,\n",
  239. " 52., 56., 60., 64., 68., 72., 76., 80., 84.,\n",
  240. " 88., 92., 96., 100., 104., 108., 112., 116., 120.,\n",
  241. " 124., 128., 132., 136., 140., 144., 148., 152., 156.,\n",
  242. " 160., 164., 168., 172., 176., 180., 184., 188., 192.,\n",
  243. " 196., 200.]),\n",
  244. " <matplotlib.collections.QuadMesh at 0x7f92bc504160>)"
  245. ]
  246. },
  247. "execution_count": 4,
  248. "metadata": {},
  249. "output_type": "execute_result"
  250. },
  251. {
  252. "data": {
  253. "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjwAAAGiCAYAAADjixw0AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjYuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy89olMNAAAACXBIWXMAAA9hAAAPYQGoP6dpAABqqUlEQVR4nO3deZxU9Zkv/qf2raureq9uaGg2F4QoQqIQZ4BRFn9mm0w0xolX7jWORjAS8GWCRgMaIK7JfWFixowjJjExN854k4lOBKPRcXBBRG0xssjWdHd1Q3dXVW+1n98fXkurPw9ybLptOP15v179etEPp+qcOrX0t87zfL+PzTAMQ4iIiIgszD7SB0BEREQ03DjgISIiIsvjgIeIiIgsjwMeIiIisjwOeIiIiMjyOOAhIiIiy+OAh4iIiCyPAx4iIiKyPA54iIiIyPI44CEiIiLLG9YBz/r16+XTn/60BINBqa6uli996Uuyc+fOom0Mw5DVq1dLXV2d+Hw+mTdvnuzYsaNom1QqJdddd51UVlZKIBCQL3zhC3Lo0KHhPHQiIiKykGEd8Dz33HOydOlSeemll2Tz5s2SzWZl4cKF0tvbW9jmzjvvlHvvvVfuu+8+2bp1q0QiEVmwYIF0d3cXtlm+fLk8/vjj8uijj8oLL7wgPT098rnPfU5yudxwHj4RERFZhO2TbB56+PBhqa6ulueee07+9m//VgzDkLq6Olm+fLl85zvfEZH3rubU1NTIHXfcIVdffbXE43GpqqqSX/7yl/LVr35VRERaWlqkvr5ennzySVm0aNEndfhERER0knJ+kjuLx+MiIlJeXi4iIvv27ZNoNCoLFy4sbOPxeGTu3LmyZcsWufrqq2Xbtm2SyWSKtqmrq5Np06bJli1b1AFPKpWSVCpV+D2fz0tnZ6dUVFSIzWYbrodHREREQ8gwDOnu7pa6ujqx248vKfWJDXgMw5AVK1bIeeedJ9OmTRMRkWg0KiIiNTU1RdvW1NTIgQMHCtu43W4pKyuDbd6//UDr16+XNWvWDPVDICIiohHQ1NQkY8eOPa77+MQGPMuWLZM333xTXnjhBfi/gVddDMM45pWYj9pm1apVsmLFisLv8Xhcxo0bJ01NTVJaWjqIoycaPb4Y+h8Qi//jZyDWV4Xvv77TUhDzBTF2yaTXIJbKuyD2f94+G2JvXnwDxDQzfn8HxK449RWI9eQ8EHs2OgVi0ZYyiFW9iB+hoUdwH5rfx39hajui0SyRSEh9fb0Eg8Hjvq9PZMBz3XXXyR/+8Ad5/vnni0ZokUhERN67ilNbW1uIt7e3F676RCIRSafT0tXVVXSVp729XebMmaPuz+PxiMeDH2KlpaUc8BAdg9OGAw+H24sxDw547D6MOfy4D28J7kOUAY/dj/s1+x52KLf1luBHXiaH+3UG8PPD7lPOgRvvTzt/Gn4WEZk3FOUowzrgMQxDrrvuOnn88cflL3/5i0yYMKHo/ydMmCCRSEQ2b94sM2bMEBGRdDotzz33nNxxx3vfzmbOnCkul0s2b94sl1xyiYiItLa2yltvvSV33nnncB4+Ef0/4Y0vQiz7TfzC0ZPFHHttKAExlw1nWI7zHoFYaWm/2UMERh4/ILX9xjI4IpsY6oBYexd+w0xMwMFN2OTxEdEna1gHPEuXLpVf//rX8vvf/16CwWCh5iYUConP5xObzSbLly+XdevWyZQpU2TKlCmybt068fv9ctlllxW2vfLKK2XlypVSUVEh5eXlcsMNN8j06dPlggsuGM7DJyIiIosY1gHP/fffLyIi8+bNK4o/9NBDsmTJEhERufHGG6W/v1+uvfZa6erqknPOOUc2bdpUlK/70Y9+JE6nUy655BLp7++X888/XzZu3CgOh2M4D5+IiIgsYthTWsdis9lk9erVsnr16qNu4/V6ZcOGDbJhw4YhPDoiIiIaLT7RdXiI6OQUXY71Onk3bmdz5iHmd2YgptXStGZwFlQi4TN5hOa4bFmInR5ogdj/OTRz0PtwTD8NYrnGdyC2wH4xxDbnfzfo/RLRR2PzUCIiIrI8DniIiIjI8jjgISIiIstjDQ8RFdHqSOYvwlWLY5OxiCdXkoZY1sDvVd05ZRE/G9b/BAK4SrNZ2TTO4tRqhzIGblfmxfV/9qcqIWbzmuu9zNocopHHKzxERERkeRzwEBERkeUxpUVERbTp0qmvnAuxvgje1jCwnYPXgVPBtTTSqd5W3G/mOD6iOjHlpqW0OnMBiNltSqqqD4/FlsXH23x+uckDJKJPEq/wEBERkeVxwENERESWxwEPERERWR5reIhGscVn3mJqu/hE/G6UqsJ6GOnBupmxvhjEgo4kxFqU1hJGHmtkTCvHKfJ9Sj+MKmc3xPwOvG2wDrfr7w5BzBvDQ9Gm9T/71HdwQyIaNrzCQ0RERJbHAQ8RERFZHgc8REREZHms4SEaxf70xu0Qm3PJPRCr3o41LQdqXBBzVGBtTiKLbSTKnT0QswuufZM/jhoeI2futkE7HnPYha0lbMraPNnqDMSSrVgnVLENHy8RfbJ4hYeIiIgsjwMeIiIisjwOeIiIiMjyWMNDREVKd8Yg1nYe9odSltIRrwdrWmo8CYgdzgYhNtnTBrFccvAfUXZX3tR2TRl8bPt7MRbwYB1TdwLX4VGW+pGOmbjGEBF9sniFh4iIiCyPAx4iIiKyPKa0iKhIrvEdiPlOPRdifRH8vuR2ZSFW5uqFWJ0rBrFkHqe5hysHP53b6VZaXyiOZDC9dmppO8SyRgRih5M49V1Labl6cEr7AvvFENuc/93RDpOIjhOv8BAREZHlccBDRERElscBDxEREVkea3iIRrHFZ94Csd6vYL1O12n43Sg1DqdplzuwbqYliVOyx7s7IKZNVS/1KnPfTTKUthSd2RKI1bhw2nxnNgCx3gwW52TD+HgdylR6baq/uQojIhoqvMJDRERElscBDxEREVkeBzxERERkeazhIRoltHodjSeOa+nY01i/4g/1m7q/sKsPYlqNjEOwFUQ6O/iPqEyHF2KnelshljRw/R/t+FQ5rBNy4rJDkjg1DDFPZJa5fQzA9XuIBodXeIiIiMjyOOAhIiIiy2NKi2iU0FpGaKmQGUt/BDE7zkCXVM4BsboSnOLdmsKO4l47dlXXuqUn+j24Y5NcFTil/UC6EmKtaTy+SV5sLVHlw1zVATem4fJuPC9amtC5+VWImcH0FdHg8AoPERERWd6wDnief/55+fznPy91dXVis9nk//7f/1v0/0uWLBGbzVb0c+65xYuepVIpue6666SyslICgYB84QtfkEOHDg3nYRMREZHFDOuAp7e3V84880y57777jrrN4sWLpbW1tfDz5JNPFv3/8uXL5fHHH5dHH31UXnjhBenp6ZHPfe5zkstxnVIiIiIyZ1hreC688EK58MILP3Ibj8cjkUhE/b94PC4PPvig/PKXv5QLLrhARER+9atfSX19vTz99NOyaNGiIT9mIqtyTD8NYnMuuQdifdPxe5C7C+8vm8ZalQo31rlo09LHujsh1p3HaeSZzOA/onJZfBwuG35RGufBY2nLYF1PNo/3Z/fg/aXK8Jhjk3Faf1hwWjqnnBMNnxGv4fnLX/4i1dXVcsopp8hVV10l7e0fFAtu27ZNMpmMLFy4sBCrq6uTadOmyZYtW456n6lUShKJRNEPERERjV4jOuC58MIL5ZFHHpFnnnlG7rnnHtm6dav83d/9naRSKRERiUaj4na7paysuPlgTU2NRKPRo97v+vXrJRQKFX7q6+uH9XEQERHRiW1Ep6V/9atfLfx72rRpMmvWLBk/frw88cQT8uUvf/motzMMQ2w2XOH0fatWrZIVK1YUfk8kEhz0EBERjWIn1Do8tbW1Mn78eNm9e7eIiEQiEUmn09LV1VV0lae9vV3mzJlz1PvxeDzi8Qx+/Q6i0aJ0Zwxi3WPLIdZfa0BsUu1hiHWksSVDnRf3odHqazKxwb+PnW68v0qnufS2y6asm2OvhVikKg6x1mwZxESwhkej1VkR0dAY8RqeD+vo6JCmpiaprX3vg2XmzJnicrlk8+bNhW1
  254. "text/plain": [
  255. "<Figure size 640x480 with 1 Axes>"
  256. ]
  257. },
  258. "metadata": {},
  259. "output_type": "display_data"
  260. }
  261. ],
  262. "source": [
  263. "noise = np.random.normal(0, 0.15, X_test[:,2].shape)\n",
  264. "noisey_X_test = X_test.copy()\n",
  265. "noisey_X_test[:, 2] = 1./(1./noisey_X_test[:,2] + noise * (1./noisey_X_test[:,2]))\n",
  266. "noisey_X_test[:, 3] = noisey_X_test[:, 2] * np.sign( noisey_X_test[:, 3] )\n",
  267. "noisey_X_test_model = np.delete( poly.fit_transform( noisey_X_test ), remove, axis=1)\n",
  268. "noisy_y_pred_test = lin_reg.predict( noisey_X_test_model )\n",
  269. "plt.figure()\n",
  270. "plt.hist2d(np.sign(noisey_X_test[:, 3])/noisey_X_test[:, 2], noisy_y_pred_test - y_test, bins=100, range=[[-60,60], [-200,200]],cmin=1, norm=matplotlib.colors.LogNorm())"
  271. ]
  272. },
  273. {
  274. "cell_type": "code",
  275. "execution_count": null,
  276. "metadata": {},
  277. "outputs": [],
  278. "source": []
  279. }
  280. ],
  281. "metadata": {
  282. "kernelspec": {
  283. "display_name": "Python 3.10.6 (conda)",
  284. "language": "python",
  285. "name": "python3"
  286. },
  287. "language_info": {
  288. "codemirror_mode": {
  289. "name": "ipython",
  290. "version": 3
  291. },
  292. "file_extension": ".py",
  293. "mimetype": "text/x-python",
  294. "name": "python",
  295. "nbconvert_exporter": "python",
  296. "pygments_lexer": "ipython3",
  297. "version": "3.10.6"
  298. },
  299. "orig_nbformat": 4,
  300. "vscode": {
  301. "interpreter": {
  302. "hash": "a2eff8b4da8b8eebf5ee2e5f811f31a557e0a202b4d2f04f849b065340a6eda6"
  303. }
  304. }
  305. },
  306. "nbformat": 4,
  307. "nbformat_minor": 2
  308. }