You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

717 lines
46 KiB

10 months ago
  1. {
  2. "cells": [
  3. {
  4. "cell_type": "code",
  5. "execution_count": 3,
  6. "metadata": {},
  7. "outputs": [],
  8. "source": [
  9. "import uproot\n",
  10. "import awkward as ak\n",
  11. "import numpy as np\n",
  12. "#input_tree_md = uproot.open({\"/work/guenther/reco_tuner/data/param_data_MD_selected_8520.root\": \"Selected\"})\n",
  13. "#input_tree_mu = uproot.open({\"/work/guenther/reco_tuner/data/param_data_MU_selected_8520.root\": \"Selected\"})\n",
  14. "input_tree = uproot.open({\"/work/guenther/reco_tuner/data/param_data_selected.root\": \"Selected\"})\n",
  15. "# this is an event list of dictionaries containing awkward arrays\n",
  16. "#array_md = input_tree_md.arrays()\n",
  17. "#array_mu = input_tree_mu.arrays()\n",
  18. "#array = ak.concatenate([array_md, array_mu])\n",
  19. "array = input_tree.arrays()\n",
  20. "array[\"dSlope_fringe\"] = array[\"tx_ref\"] - array[\"tx\"]\n",
  21. "array[\"dSlope_fringe_abs\"] = abs(array[\"dSlope_fringe\"])\n",
  22. "array[\"z_mag_x_fringe\"] = (array[\"x\"] - array[\"x_ref\"] - array[\"tx\"] * array[\"z\"] + array[\"tx_ref\"] * array[\"z_ref\"] ) / array[\"dSlope_fringe\"]\n",
  23. "array[\"yStraightRef\"] = array[\"y\"] + array[\"ty\"] * ( array[\"z_ref\"] - array[\"z\"])\n",
  24. "array[\"AY_straight_diff\"] = array[\"AY_ex\"] - array[\"yStraightRef\"]\n",
  25. "array[\"y_ref_straight_diff\"] = array[\"y_ref\"] - array[\"yStraightRef\"]\n",
  26. "array[\"y_straight_diff_l1\"] = array[\"y_l1\"] - array[\"y\"] - array[\"ty\"] * ( array[\"z_l1\"] - array[\"z\"])\n",
  27. "array[\"y_straight_diff_l2\"] = array[\"y_l2\"] - array[\"y\"] - array[\"ty\"] * ( array[\"z_l2\"] - array[\"z\"])\n",
  28. "array[\"y_straight_diff_l5\"] = array[\"y_l5\"] - array[\"y\"] - array[\"ty\"] * ( array[\"z_l5\"] - array[\"z\"])\n",
  29. "array[\"y_straight_diff_l6\"] = array[\"y_l6\"] - array[\"y\"] - array[\"ty\"] * ( array[\"z_l6\"] - array[\"z\"])\n",
  30. "array[\"y_straight_diff_l9\"] = array[\"y_l9\"] - array[\"y\"] - array[\"ty\"] * ( array[\"z_l9\"] - array[\"z\"])\n",
  31. "array[\"y_straight_diff_l10\"] = array[\"y_l10\"] - array[\"y\"] - array[\"ty\"] * ( array[\"z_l10\"] - array[\"z\"])\n",
  32. "array[\"BY_straight_diff\"] = array[\"BY_ex\"] - array[\"ty\"]\n",
  33. "array[\"ty_ref_straight_diff\"] = array[\"ty_ref\"] - array[\"ty\"]\n",
  34. "def format_array(name, coef):\n",
  35. " coef = [str(c)+\"f\" for c in coef if c != 0.0]\n",
  36. " code = f\"constexpr std::array {name}\"\n",
  37. " code += \"{\" + \", \".join(list(coef)) +\"};\"\n",
  38. " return code"
  39. ]
  40. },
  41. {
  42. "cell_type": "code",
  43. "execution_count": 4,
  44. "metadata": {},
  45. "outputs": [
  46. {
  47. "name": "stdout",
  48. "output_type": "stream",
  49. "text": [
  50. "['dSlope_fringe' 'ty dSlope_fringe_abs' 'ty tx dSlope_fringe'\n",
  51. " 'ty^3 dSlope_fringe_abs' 'ty tx^2 dSlope_fringe_abs'\n",
  52. " 'ty^3 tx dSlope_fringe' 'ty tx^3 dSlope_fringe'\n",
  53. " 'ty^3 tx^2 dSlope_fringe_abs']\n",
  54. "intercept= 0.0\n",
  55. "coef= {'dSlope_fringe': 2.54927052921483, 'ty dSlope_fringe_abs': 65.18649309504633, 'ty tx dSlope_fringe': 4174.444641065072, 'ty^3 dSlope_fringe_abs': -10543.061353132687, 'ty tx^2 dSlope_fringe_abs': 812.6329282763543, 'ty^3 tx dSlope_fringe': 52906.81696296328, 'ty tx^3 dSlope_fringe': 37770.21841979088, 'ty^3 tx^2 dSlope_fringe_abs': 273401.3548544383}\n",
  56. "r2 score= 0.9424448007543189\n",
  57. "RMSE = 8.058017702810291\n",
  58. "straight RMSE = 33.93970007402008\n",
  59. "constexpr std::array y_ref_straight_diff{2.54927052921483f, 65.18649309504633f, 4174.444641065072f, -10543.061353132687f, 812.6329282763543f, 52906.81696296328f, 37770.21841979088f, 273401.3548544383f};\n"
  60. ]
  61. }
  62. ],
  63. "source": [
  64. "from sklearn.preprocessing import PolynomialFeatures\n",
  65. "from sklearn.linear_model import LinearRegression, Lasso, Ridge\n",
  66. "from sklearn.model_selection import train_test_split\n",
  67. "from sklearn.pipeline import Pipeline\n",
  68. "from sklearn.metrics import mean_squared_error\n",
  69. "\n",
  70. "features = [\n",
  71. " \"ty\", \n",
  72. " \"tx\",\n",
  73. " \"dSlope_fringe\",\n",
  74. " \"dSlope_fringe_abs\"\n",
  75. "]\n",
  76. "target_feat = \"y_ref_straight_diff\"\n",
  77. "\n",
  78. "data = np.column_stack([ak.to_numpy(array[feat]) for feat in features])\n",
  79. "target = ak.to_numpy(array[target_feat])\n",
  80. "X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.2, random_state=42)\n",
  81. "\n",
  82. "poly = PolynomialFeatures(degree=6, include_bias=False)\n",
  83. "X_train_model = poly.fit_transform( X_train )\n",
  84. "X_test_model = poly.fit_transform( X_test )\n",
  85. "poly_features = poly.get_feature_names_out(input_features=features)\n",
  86. "#print(poly_features)\n",
  87. "keep = [\n",
  88. " \"ty dSlope_fringe_abs\", \n",
  89. " \"ty tx^2 dSlope_fringe_abs\", \n",
  90. " \"ty^3 dSlope_fringe_abs\", \n",
  91. " \"ty^3 tx^2 dSlope_fringe_abs\",\n",
  92. " \"dSlope_fringe\",\n",
  93. " \"ty tx dSlope_fringe\",\n",
  94. " \"ty tx^3 dSlope_fringe\",\n",
  95. " \"ty^3 tx dSlope_fringe\",\n",
  96. "]\n",
  97. "remove = [i for i, f in enumerate(poly_features) if f not in keep]\n",
  98. "X_train_model = np.delete( X_train_model, remove, axis=1)\n",
  99. "X_test_model = np.delete( X_test_model, remove, axis=1)\n",
  100. "poly_features = np.delete(poly_features, remove )\n",
  101. "print(poly_features)\n",
  102. "\n",
  103. "lin_reg = LinearRegression(fit_intercept=False)\n",
  104. "lin_reg.fit( X_train_model, y_train)\n",
  105. "y_pred_test = lin_reg.predict( X_test_model )\n",
  106. "print(\"intercept=\", lin_reg.intercept_)\n",
  107. "print(\"coef=\", dict(zip(poly_features, lin_reg.coef_)))\n",
  108. "print(\"r2 score=\", lin_reg.score(X_test_model, y_test))\n",
  109. "print(\"RMSE =\", mean_squared_error(y_test, y_pred_test, squared=False))\n",
  110. "print(\"straight RMSE =\", mean_squared_error(array[\"y_ref\"], array[\"y\"] + array[\"ty\"] * ( array[\"z_ref\"] - array[\"z\"] ), squared=False))\n",
  111. "print(format_array(\"y_ref_straight_diff\", lin_reg.coef_))"
  112. ]
  113. },
  114. {
  115. "cell_type": "code",
  116. "execution_count": 5,
  117. "metadata": {},
  118. "outputs": [
  119. {
  120. "name": "stdout",
  121. "output_type": "stream",
  122. "text": [
  123. "['dSlope_fringe' 'ty dSlope_fringe_abs' 'ty tx dSlope_fringe'\n",
  124. " 'ty^3 dSlope_fringe_abs' 'ty tx^2 dSlope_fringe_abs'\n",
  125. " 'ty^3 tx dSlope_fringe' 'ty tx^3 dSlope_fringe'\n",
  126. " 'ty^3 tx^2 dSlope_fringe_abs']\n",
  127. "intercept= 0.0\n",
  128. "coef= {'dSlope_fringe': 1.926395569816899, 'ty dSlope_fringe_abs': 155.7194258002827, 'ty tx dSlope_fringe': 3711.2012601369147, 'ty^3 dSlope_fringe_abs': -6986.454915191362, 'ty tx^2 dSlope_fringe_abs': -102.59383584048052, 'ty^3 tx dSlope_fringe': 42360.930984198254, 'ty tx^3 dSlope_fringe': 29857.060893721067, 'ty^3 tx^2 dSlope_fringe_abs': 209095.7911514973}\n",
  129. "r2 score= 0.9391433516368133\n",
  130. "RMSE = 6.578135946782935\n",
  131. "straight RMSE = 26.874058521627408\n",
  132. "constexpr std::array y_straight_diff_l1{1.926395569816899f, 155.7194258002827f, 3711.2012601369147f, -6986.454915191362f, -102.59383584048052f, 42360.930984198254f, 29857.060893721067f, 209095.7911514973f};\n"
  133. ]
  134. }
  135. ],
  136. "source": [
  137. "features = [\n",
  138. " \"ty\", \n",
  139. " \"tx\",\n",
  140. " \"dSlope_fringe\",\n",
  141. " \"dSlope_fringe_abs\"\n",
  142. "]\n",
  143. "target_feat = \"y_straight_diff_l1\"\n",
  144. "\n",
  145. "data = np.column_stack([ak.to_numpy(array[feat]) for feat in features])\n",
  146. "target = ak.to_numpy(array[target_feat])\n",
  147. "X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.2, random_state=42)\n",
  148. "\n",
  149. "poly = PolynomialFeatures(degree=6, include_bias=False)\n",
  150. "X_train_model = poly.fit_transform( X_train )\n",
  151. "X_test_model = poly.fit_transform( X_test )\n",
  152. "poly_features = poly.get_feature_names_out(input_features=features)\n",
  153. "#print(poly_features)\n",
  154. "keep = [\n",
  155. " \"ty dSlope_fringe_abs\", \n",
  156. " \"ty tx^2 dSlope_fringe_abs\", \n",
  157. " \"ty^3 dSlope_fringe_abs\", \n",
  158. " \"ty^3 tx^2 dSlope_fringe_abs\",\n",
  159. " \"dSlope_fringe\",\n",
  160. " \"ty tx dSlope_fringe\",\n",
  161. " \"ty tx^3 dSlope_fringe\",\n",
  162. " \"ty^3 tx dSlope_fringe\",\n",
  163. "]\n",
  164. "remove = [i for i, f in enumerate(poly_features) if f not in keep]\n",
  165. "X_train_model = np.delete( X_train_model, remove, axis=1)\n",
  166. "X_test_model = np.delete( X_test_model, remove, axis=1)\n",
  167. "poly_features = np.delete(poly_features, remove )\n",
  168. "print(poly_features)\n",
  169. "\n",
  170. "lin_reg = LinearRegression(fit_intercept=False)\n",
  171. "lin_reg.fit( X_train_model, y_train)\n",
  172. "y_pred_test = lin_reg.predict( X_test_model )\n",
  173. "print(\"intercept=\", lin_reg.intercept_)\n",
  174. "print(\"coef=\", dict(zip(poly_features, lin_reg.coef_)))\n",
  175. "print(\"r2 score=\", lin_reg.score(X_test_model, y_test))\n",
  176. "print(\"RMSE =\", mean_squared_error(y_test, y_pred_test, squared=False))\n",
  177. "print(\"straight RMSE =\", mean_squared_error(array[\"y_l1\"], array[\"y\"] + array[\"ty\"] * ( array[\"z_l1\"] - array[\"z\"] ), squared=False))\n",
  178. "print(format_array(\"y_straight_diff_l1\", lin_reg.coef_))"
  179. ]
  180. },
  181. {
  182. "cell_type": "code",
  183. "execution_count": 6,
  184. "metadata": {},
  185. "outputs": [
  186. {
  187. "name": "stdout",
  188. "output_type": "stream",
  189. "text": [
  190. "['dSlope_fringe' 'ty dSlope_fringe_abs' 'ty tx dSlope_fringe'\n",
  191. " 'ty^3 dSlope_fringe_abs' 'ty tx^2 dSlope_fringe_abs'\n",
  192. " 'ty^3 tx dSlope_fringe' 'ty tx^3 dSlope_fringe'\n",
  193. " 'ty^3 tx^2 dSlope_fringe_abs']\n",
  194. "intercept= 0.0\n",
  195. "coef= {'dSlope_fringe': 1.992023095115273, 'ty dSlope_fringe_abs': 147.53158307285184, 'ty tx dSlope_fringe': 3758.3638141105685, 'ty^3 dSlope_fringe_abs': -7387.838564546752, 'ty tx^2 dSlope_fringe_abs': -20.05844697530397, 'ty^3 tx dSlope_fringe': 43544.55794543203, 'ty tx^3 dSlope_fringe': 30729.099594836473, 'ty^3 tx^2 dSlope_fringe_abs': 216301.09830837924}\n",
  196. "r2 score= 0.939662275160689\n",
  197. "RMSE = 6.726003215973192\n",
  198. "straight RMSE = 27.60567927172973\n",
  199. "constexpr std::array y_straight_diff_l2{1.992023095115273f, 147.53158307285184f, 3758.3638141105685f, -7387.838564546752f, -20.05844697530397f, 43544.55794543203f, 30729.099594836473f, 216301.09830837924f};\n"
  200. ]
  201. }
  202. ],
  203. "source": [
  204. "features = [\n",
  205. " \"ty\", \n",
  206. " \"tx\",\n",
  207. " \"dSlope_fringe\",\n",
  208. " \"dSlope_fringe_abs\"\n",
  209. "]\n",
  210. "target_feat = \"y_straight_diff_l2\"\n",
  211. "\n",
  212. "data = np.column_stack([ak.to_numpy(array[feat]) for feat in features])\n",
  213. "target = ak.to_numpy(array[target_feat])\n",
  214. "X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.2, random_state=42)\n",
  215. "\n",
  216. "poly = PolynomialFeatures(degree=6, include_bias=False)\n",
  217. "X_train_model = poly.fit_transform( X_train )\n",
  218. "X_test_model = poly.fit_transform( X_test )\n",
  219. "poly_features = poly.get_feature_names_out(input_features=features)\n",
  220. "#print(poly_features)\n",
  221. "keep = [\n",
  222. " \"ty dSlope_fringe_abs\", \n",
  223. " \"ty tx^2 dSlope_fringe_abs\", \n",
  224. " \"ty^3 dSlope_fringe_abs\", \n",
  225. " \"ty^3 tx^2 dSlope_fringe_abs\",\n",
  226. " \"dSlope_fringe\",\n",
  227. " \"ty tx dSlope_fringe\",\n",
  228. " \"ty tx^3 dSlope_fringe\",\n",
  229. " \"ty^3 tx dSlope_fringe\",\n",
  230. "]\n",
  231. "remove = [i for i, f in enumerate(poly_features) if f not in keep]\n",
  232. "X_train_model = np.delete( X_train_model, remove, axis=1)\n",
  233. "X_test_model = np.delete( X_test_model, remove, axis=1)\n",
  234. "poly_features = np.delete(poly_features, remove )\n",
  235. "print(poly_features)\n",
  236. "\n",
  237. "lin_reg = LinearRegression(fit_intercept=False)\n",
  238. "lin_reg.fit( X_train_model, y_train)\n",
  239. "y_pred_test = lin_reg.predict( X_test_model )\n",
  240. "print(\"intercept=\", lin_reg.intercept_)\n",
  241. "print(\"coef=\", dict(zip(poly_features, lin_reg.coef_)))\n",
  242. "print(\"r2 score=\", lin_reg.score(X_test_model, y_test))\n",
  243. "print(\"RMSE =\", mean_squared_error(y_test, y_pred_test, squared=False))\n",
  244. "print(\"straight RMSE =\", mean_squared_error(array[\"y_l2\"], array[\"y\"] + array[\"ty\"] * ( array[\"z_l2\"] - array[\"z\"] ), squared=False))\n",
  245. "print(format_array(\"y_straight_diff_l2\", lin_reg.coef_))"
  246. ]
  247. },
  248. {
  249. "cell_type": "code",
  250. "execution_count": 7,
  251. "metadata": {},
  252. "outputs": [
  253. {
  254. "name": "stdout",
  255. "output_type": "stream",
  256. "text": [
  257. "['dSlope_fringe' 'ty dSlope_fringe_abs' 'ty tx dSlope_fringe'\n",
  258. " 'ty^3 dSlope_fringe_abs' 'ty tx^2 dSlope_fringe_abs'\n",
  259. " 'ty^3 tx dSlope_fringe' 'ty tx^3 dSlope_fringe'\n",
  260. " 'ty^3 tx^2 dSlope_fringe_abs']\n",
  261. "intercept= 0.0\n",
  262. "coef= {'dSlope_fringe': 2.6109097507814067, 'ty dSlope_fringe_abs': 55.23845097538265, 'ty tx dSlope_fringe': 4222.757834030979, 'ty^3 dSlope_fringe_abs': -10869.286047558253, 'ty tx^2 dSlope_fringe_abs': 913.7370293450639, 'ty^3 tx dSlope_fringe': 53875.65687892285, 'ty tx^3 dSlope_fringe': 38512.63398443023, 'ty^3 tx^2 dSlope_fringe_abs': 279346.89317804825}\n",
  263. "r2 score= 0.9426062193562865\n",
  264. "RMSE = 8.213743029204338\n",
  265. "straight RMSE = 34.65071255550859\n",
  266. "constexpr std::array y_straight_diff_l5{2.6109097507814067f, 55.23845097538265f, 4222.757834030979f, -10869.286047558253f, 913.7370293450639f, 53875.65687892285f, 38512.63398443023f, 279346.89317804825f};\n"
  267. ]
  268. }
  269. ],
  270. "source": [
  271. "features = [\n",
  272. " \"ty\", \n",
  273. " \"tx\",\n",
  274. " \"dSlope_fringe\",\n",
  275. " \"dSlope_fringe_abs\"\n",
  276. "]\n",
  277. "target_feat = \"y_straight_diff_l5\"\n",
  278. "\n",
  279. "data = np.column_stack([ak.to_numpy(array[feat]) for feat in features])\n",
  280. "target = ak.to_numpy(array[target_feat])\n",
  281. "X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.2, random_state=42)\n",
  282. "\n",
  283. "poly = PolynomialFeatures(degree=6, include_bias=False)\n",
  284. "X_train_model = poly.fit_transform( X_train )\n",
  285. "X_test_model = poly.fit_transform( X_test )\n",
  286. "poly_features = poly.get_feature_names_out(input_features=features)\n",
  287. "#print(poly_features)\n",
  288. "keep = [\n",
  289. " \"ty dSlope_fringe_abs\", \n",
  290. " \"ty tx^2 dSlope_fringe_abs\", \n",
  291. " \"ty^3 dSlope_fringe_abs\", \n",
  292. " \"ty^3 tx^2 dSlope_fringe_abs\",\n",
  293. " \"dSlope_fringe\",\n",
  294. " \"ty tx dSlope_fringe\",\n",
  295. " \"ty tx^3 dSlope_fringe\",\n",
  296. " \"ty^3 tx dSlope_fringe\",\n",
  297. "]\n",
  298. "remove = [i for i, f in enumerate(poly_features) if f not in keep]\n",
  299. "X_train_model = np.delete( X_train_model, remove, axis=1)\n",
  300. "X_test_model = np.delete( X_test_model, remove, axis=1)\n",
  301. "poly_features = np.delete(poly_features, remove )\n",
  302. "print(poly_features)\n",
  303. "\n",
  304. "lin_reg = LinearRegression(fit_intercept=False)\n",
  305. "lin_reg.fit( X_train_model, y_train)\n",
  306. "y_pred_test = lin_reg.predict( X_test_model )\n",
  307. "print(\"intercept=\", lin_reg.intercept_)\n",
  308. "print(\"coef=\", dict(zip(poly_features, lin_reg.coef_)))\n",
  309. "print(\"r2 score=\", lin_reg.score(X_test_model, y_test))\n",
  310. "print(\"RMSE =\", mean_squared_error(y_test, y_pred_test, squared=False))\n",
  311. "print(\"straight RMSE =\", mean_squared_error(array[\"y_l5\"], array[\"y\"] + array[\"ty\"] * ( array[\"z_l5\"] - array[\"z\"] ), squared=False))\n",
  312. "print(format_array(\"y_straight_diff_l5\", lin_reg.coef_))"
  313. ]
  314. },
  315. {
  316. "cell_type": "code",
  317. "execution_count": 8,
  318. "metadata": {},
  319. "outputs": [
  320. {
  321. "name": "stdout",
  322. "output_type": "stream",
  323. "text": [
  324. "['dSlope_fringe' 'ty dSlope_fringe_abs' 'ty tx dSlope_fringe'\n",
  325. " 'ty^3 dSlope_fringe_abs' 'ty tx^2 dSlope_fringe_abs'\n",
  326. " 'ty^3 tx dSlope_fringe' 'ty tx^3 dSlope_fringe'\n",
  327. " 'ty^3 tx^2 dSlope_fringe_abs']\n",
  328. "intercept= 0.0\n",
  329. "coef= {'dSlope_fringe': 2.6869036549491536, 'ty dSlope_fringe_abs': 42.86164139009395, 'ty tx dSlope_fringe': 4282.473673540287, 'ty^3 dSlope_fringe_abs': -11261.305925638624, 'ty tx^2 dSlope_fringe_abs': 1038.8682634234196, 'ty^3 tx dSlope_fringe': 55053.993311126746, 'ty tx^3 dSlope_fringe': 39412.35231667733, 'ty^3 tx^2 dSlope_fringe_abs': 286552.2319656697}\n",
  330. "r2 score= 0.9427768256184441\n",
  331. "RMSE = 8.406185401645494\n",
  332. "straight RMSE = 35.523013512241214\n",
  333. "constexpr std::array y_straight_diff_l6{2.6869036549491536f, 42.86164139009395f, 4282.473673540287f, -11261.305925638624f, 1038.8682634234196f, 55053.993311126746f, 39412.35231667733f, 286552.2319656697f};\n"
  334. ]
  335. }
  336. ],
  337. "source": [
  338. "features = [\n",
  339. " \"ty\", \n",
  340. " \"tx\",\n",
  341. " \"dSlope_fringe\",\n",
  342. " \"dSlope_fringe_abs\"\n",
  343. "]\n",
  344. "target_feat = \"y_straight_diff_l6\"\n",
  345. "\n",
  346. "data = np.column_stack([ak.to_numpy(array[feat]) for feat in features])\n",
  347. "target = ak.to_numpy(array[target_feat])\n",
  348. "X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.2, random_state=42)\n",
  349. "\n",
  350. "poly = PolynomialFeatures(degree=6, include_bias=False)\n",
  351. "X_train_model = poly.fit_transform( X_train )\n",
  352. "X_test_model = poly.fit_transform( X_test )\n",
  353. "poly_features = poly.get_feature_names_out(input_features=features)\n",
  354. "#print(poly_features)\n",
  355. "keep = [\n",
  356. " \"ty dSlope_fringe_abs\", \n",
  357. " \"ty tx^2 dSlope_fringe_abs\", \n",
  358. " \"ty^3 dSlope_fringe_abs\", \n",
  359. " \"ty^3 tx^2 dSlope_fringe_abs\",\n",
  360. " \"dSlope_fringe\",\n",
  361. " \"ty tx dSlope_fringe\",\n",
  362. " \"ty tx^3 dSlope_fringe\",\n",
  363. " \"ty^3 tx dSlope_fringe\",\n",
  364. "]\n",
  365. "remove = [i for i, f in enumerate(poly_features) if f not in keep]\n",
  366. "X_train_model = np.delete( X_train_model, remove, axis=1)\n",
  367. "X_test_model = np.delete( X_test_model, remove, axis=1)\n",
  368. "poly_features = np.delete(poly_features, remove )\n",
  369. "print(poly_features)\n",
  370. "\n",
  371. "lin_reg = LinearRegression(fit_intercept=False)\n",
  372. "lin_reg.fit( X_train_model, y_train)\n",
  373. "y_pred_test = lin_reg.predict( X_test_model )\n",
  374. "print(\"intercept=\", lin_reg.intercept_)\n",
  375. "print(\"coef=\", dict(zip(poly_features, lin_reg.coef_)))\n",
  376. "print(\"r2 score=\", lin_reg.score(X_test_model, y_test))\n",
  377. "print(\"RMSE =\", mean_squared_error(y_test, y_pred_test, squared=False))\n",
  378. "print(\"straight RMSE =\", mean_squared_error(array[\"y_l6\"], array[\"y\"] + array[\"ty\"] * ( array[\"z_l6\"] - array[\"z\"] ), squared=False))\n",
  379. "print(format_array(\"y_straight_diff_l6\", lin_reg.coef_))"
  380. ]
  381. },
  382. {
  383. "cell_type": "code",
  384. "execution_count": 9,
  385. "metadata": {},
  386. "outputs": [
  387. {
  388. "name": "stdout",
  389. "output_type": "stream",
  390. "text": [
  391. "['dSlope_fringe' 'ty dSlope_fringe_abs' 'ty tx dSlope_fringe'\n",
  392. " 'ty^3 dSlope_fringe_abs' 'ty tx^2 dSlope_fringe_abs'\n",
  393. " 'ty^3 tx dSlope_fringe' 'ty tx^3 dSlope_fringe'\n",
  394. " 'ty^3 tx^2 dSlope_fringe_abs']\n",
  395. "intercept= 0.0\n",
  396. "coef= {'dSlope_fringe': 3.3840782993219154, 'ty dSlope_fringe_abs': -73.66219618861146, 'ty tx dSlope_fringe': 4855.8672756860515, 'ty^3 dSlope_fringe_abs': -14629.483020343234, 'ty tx^2 dSlope_fringe_abs': 2208.9899749295746, 'ty^3 tx dSlope_fringe': 65168.34060362849, 'ty tx^3 dSlope_fringe': 47225.377082479885, 'ty^3 tx^2 dSlope_fringe_abs': 349003.88327916135}\n",
  397. "r2 score= 0.943344616193092\n",
  398. "RMSE = 10.238398557856689\n",
  399. "straight RMSE = 43.54352590748684\n",
  400. "constexpr std::array y_straight_diff_l9{3.3840782993219154f, -73.66219618861146f, 4855.8672756860515f, -14629.483020343234f, 2208.9899749295746f, 65168.34060362849f, 47225.377082479885f, 349003.88327916135f};\n"
  401. ]
  402. }
  403. ],
  404. "source": [
  405. "features = [\n",
  406. " \"ty\", \n",
  407. " \"tx\",\n",
  408. " \"dSlope_fringe\",\n",
  409. " \"dSlope_fringe_abs\"\n",
  410. "]\n",
  411. "target_feat = \"y_straight_diff_l9\"\n",
  412. "\n",
  413. "data = np.column_stack([ak.to_numpy(array[feat]) for feat in features])\n",
  414. "target = ak.to_numpy(array[target_feat])\n",
  415. "X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.2, random_state=42)\n",
  416. "\n",
  417. "poly = PolynomialFeatures(degree=6, include_bias=False)\n",
  418. "X_train_model = poly.fit_transform( X_train )\n",
  419. "X_test_model = poly.fit_transform( X_test )\n",
  420. "poly_features = poly.get_feature_names_out(input_features=features)\n",
  421. "#print(poly_features)\n",
  422. "keep = [\n",
  423. " \"ty dSlope_fringe_abs\", \n",
  424. " \"ty tx^2 dSlope_fringe_abs\", \n",
  425. " \"ty^3 dSlope_fringe_abs\", \n",
  426. " \"ty^3 tx^2 dSlope_fringe_abs\",\n",
  427. " \"dSlope_fringe\",\n",
  428. " \"ty tx dSlope_fringe\",\n",
  429. " \"ty tx^3 dSlope_fringe\",\n",
  430. " \"ty^3 tx dSlope_fringe\",\n",
  431. "]\n",
  432. "remove = [i for i, f in enumerate(poly_features) if f not in keep]\n",
  433. "X_train_model = np.delete( X_train_model, remove, axis=1)\n",
  434. "X_test_model = np.delete( X_test_model, remove, axis=1)\n",
  435. "poly_features = np.delete(poly_features, remove )\n",
  436. "print(poly_features)\n",
  437. "\n",
  438. "lin_reg = LinearRegression(fit_intercept=False)\n",
  439. "lin_reg.fit( X_train_model, y_train)\n",
  440. "y_pred_test = lin_reg.predict( X_test_model )\n",
  441. "print(\"intercept=\", lin_reg.intercept_)\n",
  442. "print(\"coef=\", dict(zip(poly_features, lin_reg.coef_)))\n",
  443. "print(\"r2 score=\", lin_reg.score(X_test_model, y_test))\n",
  444. "print(\"RMSE =\", mean_squared_error(y_test, y_pred_test, squared=False))\n",
  445. "print(\"straight RMSE =\", mean_squared_error(array[\"y_l9\"], array[\"y\"] + array[\"ty\"] * ( array[\"z_l9\"] - array[\"z\"] ), squared=False))\n",
  446. "print(format_array(\"y_straight_diff_l9\", lin_reg.coef_))"
  447. ]
  448. },
  449. {
  450. "cell_type": "code",
  451. "execution_count": 10,
  452. "metadata": {},
  453. "outputs": [
  454. {
  455. "name": "stdout",
  456. "output_type": "stream",
  457. "text": [
  458. "['dSlope_fringe' 'ty dSlope_fringe_abs' 'ty tx dSlope_fringe'\n",
  459. " 'ty^3 dSlope_fringe_abs' 'ty tx^2 dSlope_fringe_abs'\n",
  460. " 'ty^3 tx dSlope_fringe' 'ty tx^3 dSlope_fringe'\n",
  461. " 'ty^3 tx^2 dSlope_fringe_abs']\n",
  462. "intercept= 0.0\n",
  463. "coef= {'dSlope_fringe': 3.466479515860849, 'ty dSlope_fringe_abs': -87.54166562301657, 'ty tx dSlope_fringe': 4926.350778945553, 'ty^3 dSlope_fringe_abs': -15008.616026945787, 'ty tx^2 dSlope_fringe_abs': 2347.3262833969093, 'ty^3 tx dSlope_fringe': 66293.67978997533, 'ty tx^3 dSlope_fringe': 48106.815974854835, 'ty^3 tx^2 dSlope_fringe_abs': 356053.9012962991}\n",
  464. "r2 score= 0.9433372187768841\n",
  465. "RMSE = 10.46108881485266\n",
  466. "straight RMSE = 44.49300835626165\n",
  467. "constexpr std::array y_straight_diff_l10{3.466479515860849f, -87.54166562301657f, 4926.350778945553f, -15008.616026945787f, 2347.3262833969093f, 66293.67978997533f, 48106.815974854835f, 356053.9012962991f};\n"
  468. ]
  469. }
  470. ],
  471. "source": [
  472. "features = [\n",
  473. " \"ty\", \n",
  474. " \"tx\",\n",
  475. " \"dSlope_fringe\",\n",
  476. " \"dSlope_fringe_abs\"\n",
  477. "]\n",
  478. "target_feat = \"y_straight_diff_l10\"\n",
  479. "\n",
  480. "data = np.column_stack([ak.to_numpy(array[feat]) for feat in features])\n",
  481. "target = ak.to_numpy(array[target_feat])\n",
  482. "X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.2, random_state=42)\n",
  483. "\n",
  484. "poly = PolynomialFeatures(degree=6, include_bias=False)\n",
  485. "X_train_model = poly.fit_transform( X_train )\n",
  486. "X_test_model = poly.fit_transform( X_test )\n",
  487. "poly_features = poly.get_feature_names_out(input_features=features)\n",
  488. "#print(poly_features)\n",
  489. "keep = [\n",
  490. " \"ty dSlope_fringe_abs\", \n",
  491. " \"ty tx^2 dSlope_fringe_abs\", \n",
  492. " \"ty^3 dSlope_fringe_abs\", \n",
  493. " \"ty^3 tx^2 dSlope_fringe_abs\",\n",
  494. " \"dSlope_fringe\",\n",
  495. " \"ty tx dSlope_fringe\",\n",
  496. " \"ty tx^3 dSlope_fringe\",\n",
  497. " \"ty^3 tx dSlope_fringe\",\n",
  498. "]\n",
  499. "remove = [i for i, f in enumerate(poly_features) if f not in keep]\n",
  500. "X_train_model = np.delete( X_train_model, remove, axis=1)\n",
  501. "X_test_model = np.delete( X_test_model, remove, axis=1)\n",
  502. "poly_features = np.delete(poly_features, remove )\n",
  503. "print(poly_features)\n",
  504. "\n",
  505. "lin_reg = LinearRegression(fit_intercept=False)\n",
  506. "lin_reg.fit( X_train_model, y_train)\n",
  507. "y_pred_test = lin_reg.predict( X_test_model )\n",
  508. "print(\"intercept=\", lin_reg.intercept_)\n",
  509. "print(\"coef=\", dict(zip(poly_features, lin_reg.coef_)))\n",
  510. "print(\"r2 score=\", lin_reg.score(X_test_model, y_test))\n",
  511. "print(\"RMSE =\", mean_squared_error(y_test, y_pred_test, squared=False))\n",
  512. "print(\"straight RMSE =\", mean_squared_error(array[\"y_l10\"], array[\"y\"] + array[\"ty\"] * ( array[\"z_l10\"] - array[\"z\"] ), squared=False))\n",
  513. "print(format_array(\"y_straight_diff_l10\", lin_reg.coef_))"
  514. ]
  515. },
  516. {
  517. "cell_type": "code",
  518. "execution_count": 41,
  519. "metadata": {},
  520. "outputs": [
  521. {
  522. "name": "stdout",
  523. "output_type": "stream",
  524. "text": [
  525. "intercept= 0.0\n",
  526. "coef= {'ty tx dSlope_fringe': 0.9394190987843558, 'ty dSlope_fringe^2': -0.46480920487603206, 'ty^3 dSlope_fringe_abs': -4.137993544858158, 'ty tx^2 dSlope_fringe_abs': 2.980803780828937, 'ty tx^3 dSlope_fringe': 12.402177409386482, 'ty^3 tx^2 dSlope_fringe_abs': 38.238656269022954}\n",
  527. "r2 score= 0.9649625489427109\n",
  528. "RMSE = 0.0025237463237113896\n",
  529. "straight RMSE = 0.013705994187091751\n",
  530. "constexpr std::array ty_ref_straight_diff{0.9394190987843558f, -0.46480920487603206f, -4.137993544858158f, 2.980803780828937f, 12.402177409386482f, 38.238656269022954f};\n"
  531. ]
  532. }
  533. ],
  534. "source": [
  535. "features = [\n",
  536. " \"ty\", \n",
  537. " \"tx\",\n",
  538. " \"dSlope_fringe\",\n",
  539. " \"dSlope_fringe_abs\"\n",
  540. "]\n",
  541. "target_feat = \"ty_ref_straight_diff\"\n",
  542. "\n",
  543. "data = np.column_stack([ak.to_numpy(array[feat]) for feat in features])\n",
  544. "target = ak.to_numpy(array[target_feat])\n",
  545. "X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.2, random_state=42)\n",
  546. "\n",
  547. "poly = PolynomialFeatures(degree=6, include_bias=False)\n",
  548. "X_train_model = poly.fit_transform( X_train )\n",
  549. "X_test_model = poly.fit_transform( X_test )\n",
  550. "poly_features = poly.get_feature_names_out(input_features=features)\n",
  551. "#print(poly_features)\n",
  552. "keep = [\n",
  553. " \"ty dSlope_fringe^2\",\n",
  554. " #\"ty dSlope_fringe_abs\", \n",
  555. " \"ty tx^2 dSlope_fringe_abs\", \n",
  556. " \"ty^3 dSlope_fringe_abs\", \n",
  557. " \"ty^3 tx^2 dSlope_fringe_abs\",\n",
  558. " #\"dSlope_fringe\",\n",
  559. " \"ty tx dSlope_fringe\",\n",
  560. " \"ty tx^3 dSlope_fringe\",\n",
  561. " #\"ty^3 tx dSlope_fringe\",\n",
  562. "]\n",
  563. "remove = [i for i, f in enumerate(poly_features) if f not in keep]\n",
  564. "#remove = [i for i, f in enumerate(poly_features) if (\"dSlope_fringe\" not in f) or (\"dSlope_fringe^\" in f) or (\"dSlope_fringe_abs^\" in f) or (\"dSlope_fringe dSlope_fringe_abs\" in f)]\n",
  565. "X_train_model = np.delete( X_train_model, remove, axis=1)\n",
  566. "X_test_model = np.delete( X_test_model, remove, axis=1)\n",
  567. "poly_features = np.delete(poly_features, remove )\n",
  568. "#print(poly_features)\n",
  569. "\n",
  570. "lin_reg = LinearRegression(fit_intercept=False)\n",
  571. "#lin_reg = Lasso(fit_intercept=False, alpha=0.0000000001)\n",
  572. "lin_reg.fit( X_train_model, y_train)\n",
  573. "y_pred_test = lin_reg.predict( X_test_model )\n",
  574. "print(\"intercept=\", lin_reg.intercept_)\n",
  575. "print(\"coef=\", dict(zip(poly_features, lin_reg.coef_)))\n",
  576. "print(\"r2 score=\", lin_reg.score(X_test_model, y_test))\n",
  577. "print(\"RMSE =\", mean_squared_error(y_test, y_pred_test, squared=False))\n",
  578. "print(\"straight RMSE =\", mean_squared_error(array[\"ty_ref\"], array[\"ty\"], squared=False))\n",
  579. "print(format_array(\"ty_ref_straight_diff\", lin_reg.coef_))"
  580. ]
  581. },
  582. {
  583. "cell_type": "code",
  584. "execution_count": 12,
  585. "metadata": {},
  586. "outputs": [
  587. {
  588. "data": {
  589. "text/plain": [
  590. "<AxesSubplot: >"
  591. ]
  592. },
  593. "execution_count": 12,
  594. "metadata": {},
  595. "output_type": "execute_result"
  596. },
  597. {
  598. "data": {
  599. "image/png": "iVBORw0KGgoAAAANSUhEUgAAAiIAAAGsCAYAAADg5swfAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjYuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy89olMNAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAnZUlEQVR4nO3df3RU5YH/8c/MEIZfSWpISYwEEqw1hVDFxC1UUNmtgFvbcs4eLRZZYYUDp2jBnm4r5+sPUCv01O262iqI3WixLZ5Wpbtfd12w/QpaQpdAsEKQBSVI+ZGISBJEQsg83z/YjAyZzI9k7n3uzLxf58w5zJ1n7n3m8cp8eH6NzxhjBAAAYIHfdgUAAED2IogAAABrCCIAAMAagggAALCGIAIAAKwhiAAAAGsIIgAAwBqCCAAAsIYgAgAArCGIAAAAa9ImiGzatElf+9rXVFJSIp/Pp3Xr1jl6vbNnz+ree+9VeXm5Bg4cqFGjRunBBx9UKBRy9LoAAGSTfrYrkKiPP/5YV1xxhebMmaO/+7u/c/x6P/rRj7Ry5Uo999xzGjNmjOrq6jRnzhzl5+dr0aJFjl8fAIBskDZB5MYbb9SNN97Y4+tnzpzRvffeq1/+8pc6ceKEKisr9aMf/UjXX399r65XW1urb3zjG/rqV78qSSorK9Ovf/1r1dXV9ep8AACgu7QZmolnzpw5+uMf/6i1a9fqz3/+s26++WZNmzZNe/fu7dX5Jk6cqN///vf6n//5H0nSW2+9pTfffFN/+7d/m8pqAwCQ1dKmRySWd999V7/+9a/1l7/8RSUlJZKk733ve3r11VdVU1OjRx55JOlz/uAHP1BLS4sqKioUCATU2dmpH/7wh7r11ltTXX0AALJWRgSR7du3yxijz3/+8xHH29vbNXToUElSY2OjysvLY55n4cKF+ulPfypJeuGFF/T888/rV7/6lcaMGaMdO3Zo8eLFKikp0e233+7MBwEAIMtkRBAJhUIKBALatm2bAoFAxGtDhgyRJF1yySXavXt3zPNcdNFF4T//4z/+o+655x7NmDFDkjR27FgdOHBAy5cvJ4gAAJAiGRFExo0bp87OTjU3N2vSpElRy+Tk5KiioiLhc546dUp+f+QUmkAgwPJdAABSKG2CyMmTJ7Vv377w8/3792vHjh0qKCjQ5z//ec2cOVN///d/r3/6p3/SuHHjdOzYMf3hD3/Q2LFjezXB9Gtf+5p++MMfasSIERozZozq6+v1k5/8RP/wD/+Qyo8FAEBW8xljjO1KJOL111/X5MmTux2//fbb9eyzz6qjo0MPP/ywfvGLX+jQoUMaOnSoJkyYoGXLlmns2LFJX6+trU333XefXn75ZTU3N6ukpES33nqr7r//fvXv3z8VHwkAgKyXNkEEAABknozZRwQAAKQfgggAALDG05NVQ6GQDh8+rNzcXPl8PtvVAQAACTDGqK2tTSUlJd1WoF7I00Hk8OHDKi0ttV0NAADQCwcPHtTw4cNjlvF0EMnNzZV07oPk5eVZrg0AAEhEa2urSktLw9/jsXg6iHQNx+Tl5RFEAABIM4lMq2CyKgAAsIYgAgAArCGIAAAAawgiAADAGoIIAACwhiACAACsIYgAAABrCCIAAMAagggAALCGIAIAAKwhiAAAAGs8/VszTpm/pk6Nx05JksoKB2nVrGrLNQIAIDtlZRBpPHZKe5rabFcDAICsx9AMAACwhiACAACsIYgAAABrCCIAAMAagggAALCGIAIAAKwhiAAAAGsIIgAAwBqCCAAAsIYgAgAArCGIAAAAawgiAADAGoIIAACwhiACAACsIYgAAABrCCIAAMAagggAALCGIAIAAKwhiAAAAGsIIgAAwBqCCAAAsIYgAgAArCGIAAAAawgiAADAGoIIAACwhiACAACsIYgAAABrCCIAAMAagggAALCGIAIAAKwhiAAAAGsIIgAAwBqCCAAAsIYgAgAArCGIAAAAa1wLIsuXL5fP59PixYvduiQAAPA4V4LI1q1b9fTTT+uLX/yiG5cDAABpwvEgcvLkSc2cOVOrV6/WRRdd5PTlAABAGnE8iCxcuFBf/epX9ZWvfCVu2fb2drW2tkY8AABA5urn5MnXrl2r7du3a+vWrQmVX758uZYtW+ZklQAAgIc41iNy8OBBLVq0SM8//7wGDBiQ0HuWLFmilpaW8OPgwYNOVQ8AAHiAYz0i27ZtU3Nzs6qqqsLHOjs7tWnTJv30pz9Ve3u7AoFAxHuCwaCCwaBTVQIAAB7jWBD5m7/5G7399tsRx+bMmaOKigr94Ac/6BZCAABA9nEsiOTm5qqysjLi2ODBgzV06NBux20KGWO7CgAAZK2s21l1a+NxHTrxSfj53uaTmr+mTnWNxy3WCgCA7OQzxrtdAq2trcrPz1dLS4vy8vL6fL41Ww7o/nU7deEHDvh9CoWMHppeqdvGj+zzdQAAyGbJfH9nTY/I1sbjUUOIJHWGjIyk+9btpGcEAAAXZU0QeeaN9+T3+2KW8ft9eubN/S7VCAAAZEUQOd3RqQ0NTeoMxR6F6gwZrd91VKc7Ol2qGQAA2S0rgkjb6bOKk0HCQuZceQAA4LysCCK5A/opzqhMmN93rjwAAHBeVgSRATkB3TC6SIE4aSTg92nKmGINyGGzNQAA3JAVQUSS5k4apVCc8ZlQyGjuxHKXagQAALImiFxdVqCHplcqWp9IwO+TT9JD0ytVXVbgdtUAAMhaWTUZ4rbxI1VRnKvZNVt1sv3TCak3jC7S3InlhBAAAFyWVUFEkqrLCnTJZwZqT1ObJOmyYUO08raqOO8CAABOyJqhmZ74fQkupwEAACmX9UEEAADYQxABAADWEEQAAIA1BBEAAGANQQQAAFhDEAEAANYQRAAAgDUEEQAAYA1BBAAAWEMQAQAA1hBEAACANQQRAABgDUEEAABYQxABAADWEEQAAIA1BBEAAGANQQQAAFhDEAEAANYQRAAAgDUEEQAAYA1BBAAAWEMQAQAA1hBEAACANQQRAABgDUEEAABYQxABAADWEEQAAIA1BBEAAGANQQQAAFhDEAEAANYQRAAAgDUEEQAAYA1BBAAAWEMQAQAA1hBEAACANQQRAABgDUEEAABYQxABAADWEEQAAIA1BBEAAGANQQQAAFjTz3YFbCgrHBT1zwAAwF1ZGURWzaq2XQUAACCGZgAAgEUEEQAAYA1BBAAAWEMQAQAA1hBEAACANY4GkeXLl+vqq69Wbm6uhg0bpunTp2vPnj1OXhIAAKQRR4PIxo0btXDhQm3ZskUbNmzQ2bNnNWXKFH388cdOXhYAAKQJnzHGuHWxDz74QMOGDdPGjRt17bXXxi3f2tqq/Px8tbS0KC8vz4UaAgCAvkrm+9vVDc1aWlokSQUFBVFfb29vV3t7e/h5a2urK/UCAAB2uDZZ1Rij7373u5o4caIqKyujllm+fLny8/PDj9LSUreqBwAALHBtaGbhwoV65ZVX9Oabb2r48OFRy0TrESktLWVoBgCANOK5oZm77rpL//Zv/6ZNmzb1GEIkKRgMKhgMulElAADgAY4GEWOM7rrrLr388st6/fXXVV5e7uTlAABAmnE0iCxcuFC/+tWv9Lvf/U65ubk6evSoJCk/P18DBw508tIAACANODpHxOfzRT1eU1Oj2bNnx30/y3cBAEg/npkj4uIWJQAAIA3xWzMAAMAagggAALCGIAIAAKwhiAAAAGsIIgAAwBqCCAAAsMbVX99Fepi/pk6Nx05JksoKB2nVrGrLNQIAZCqCCLppPHZKe5rabFcDAJAFCCJIK/TWAEBmIYggrdBbAwCZhcmqAADAGoIIAACwhiACAACsIYgAAABrCCIAAMAagggAALCGIAIAAKwhiAAAAGsIIgAAwBqCCAAAsIYgAgAArCGIAAAAawgiAADAGoIIAACwhiACAACsIYgAAABr+tmuAAD01fw1dWo8dkqSVFY4SKtmVVuuEYBEEUQApL3GY6e0p6nNdjUA9AJDMwAAwBqCCAAAsIYgAgAArCGIIG2FjLFdBQBAHxFEkDa2Nh7XoROfhJ/vbT6p+WvqVNd43GK
  600. "text/plain": [
  601. "<Figure size 640x480 with 1 Axes>"
  602. ]
  603. },
  604. "metadata": {},
  605. "output_type": "display_data"
  606. }
  607. ],
  608. "source": [
  609. "import seaborn as sns\n",
  610. "import numpy as np\n",
  611. "bins = 25#np.linspace( -1.5, 1.5, 50 )\n",
  612. "sns.regplot(x=ak.to_numpy(array[\"tx\"]), y=ak.to_numpy(array[\"CY_ex\"]), x_bins=bins, fit_reg=None, x_estimator=np.mean)"
  613. ]
  614. },
  615. {
  616. "cell_type": "code",
  617. "execution_count": 26,
  618. "metadata": {},
  619. "outputs": [
  620. {
  621. "name": "stdout",
  622. "output_type": "stream",
  623. "text": [
  624. "['ty dSlope_fringe_abs' 'ty tx dSlope_fringe' 'ty dSlope_fringe^2'\n",
  625. " 'ty^3 dSlope_fringe_abs' 'ty tx^2 dSlope_fringe_abs']\n",
  626. "intercept= 0.0\n",
  627. "coef= {'ty dSlope_fringe_abs': -1.210948273291364e-05, 'ty tx dSlope_fringe': 8.351598715575842e-05, 'ty dSlope_fringe^2': -3.9073446027618556e-05, 'ty^3 dSlope_fringe_abs': 0.0002466815971481776, 'ty tx^2 dSlope_fringe_abs': 0.0001861876635962951}\n",
  628. "r2 score= 0.9704091284593364\n",
  629. "RMSE = 1.4317438911860726e-07\n",
  630. "constexpr std::array CY_ex{-1.210948273291364e-05f, 8.351598715575842e-05f, -3.9073446027618556e-05f, 0.0002466815971481776f, 0.0001861876635962951f};\n"
  631. ]
  632. }
  633. ],
  634. "source": [
  635. "features = [\n",
  636. " \"ty\", \n",
  637. " \"tx\",\n",
  638. " \"dSlope_fringe\",\n",
  639. " \"dSlope_fringe_abs\"\n",
  640. "]\n",
  641. "target_feat = \"CY_ex\"\n",
  642. "\n",
  643. "data = np.column_stack([ak.to_numpy(array[feat]) for feat in features])\n",
  644. "target = ak.to_numpy(array[target_feat])\n",
  645. "X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.2, random_state=42)\n",
  646. "\n",
  647. "poly = PolynomialFeatures(degree=4, include_bias=False)\n",
  648. "X_train_model = poly.fit_transform( X_train )\n",
  649. "X_test_model = poly.fit_transform( X_test )\n",
  650. "poly_features = poly.get_feature_names_out(input_features=features)\n",
  651. "#print(poly_features)\n",
  652. "keep = [\n",
  653. " \"ty dSlope_fringe^2\",\n",
  654. " \"ty dSlope_fringe_abs\", \n",
  655. " \"ty tx^2 dSlope_fringe_abs\", \n",
  656. " \"ty^3 dSlope_fringe_abs\", \n",
  657. " #\"ty^3 tx^2 dSlope_fringe_abs\",\n",
  658. " #\"dSlope_fringe\",\n",
  659. " \"ty tx dSlope_fringe\",\n",
  660. " #\"ty tx^3 dSlope_fringe\",\n",
  661. " #\"ty^3 tx dSlope_fringe\",\n",
  662. "]\n",
  663. "remove = [i for i, f in enumerate(poly_features) if f not in keep]\n",
  664. "#remove = [i for i, f in enumerate(poly_features) if (\"dSlope_fringe\" not in f)]# or (\"dSlope_fringe^\" in f) or (\"dSlope_fringe_abs^\" in f) or (\"dSlope_fringe dSlope_fringe_abs\" in f)]\n",
  665. "X_train_model = np.delete( X_train_model, remove, axis=1)\n",
  666. "X_test_model = np.delete( X_test_model, remove, axis=1)\n",
  667. "poly_features = np.delete(poly_features, remove )\n",
  668. "print(poly_features)\n",
  669. "\n",
  670. "lin_reg = LinearRegression(fit_intercept=False)\n",
  671. "#lin_reg = Lasso(fit_intercept=False, alpha=0.00000000001)\n",
  672. "lin_reg.fit( X_train_model, y_train)\n",
  673. "y_pred_test = lin_reg.predict( X_test_model )\n",
  674. "print(\"intercept=\", lin_reg.intercept_)\n",
  675. "print(\"coef=\", dict(zip(poly_features, lin_reg.coef_)))\n",
  676. "print(\"r2 score=\", lin_reg.score(X_test_model, y_test))\n",
  677. "print(\"RMSE =\", mean_squared_error(y_test, y_pred_test, squared=False))\n",
  678. "#print(\"straight RMSE =\", mean_squared_error(array[\"ty_ref\"], array[\"ty\"], squared=False))\n",
  679. "print(format_array(\"CY_ex\", lin_reg.coef_))"
  680. ]
  681. },
  682. {
  683. "cell_type": "code",
  684. "execution_count": null,
  685. "metadata": {},
  686. "outputs": [],
  687. "source": []
  688. }
  689. ],
  690. "metadata": {
  691. "kernelspec": {
  692. "display_name": "Python 3.10.6 (conda)",
  693. "language": "python",
  694. "name": "python3"
  695. },
  696. "language_info": {
  697. "codemirror_mode": {
  698. "name": "ipython",
  699. "version": 3
  700. },
  701. "file_extension": ".py",
  702. "mimetype": "text/x-python",
  703. "name": "python",
  704. "nbconvert_exporter": "python",
  705. "pygments_lexer": "ipython3",
  706. "version": "3.10.6"
  707. },
  708. "orig_nbformat": 4,
  709. "vscode": {
  710. "interpreter": {
  711. "hash": "a2eff8b4da8b8eebf5ee2e5f811f31a557e0a202b4d2f04f849b065340a6eda6"
  712. }
  713. }
  714. },
  715. "nbformat": 4,
  716. "nbformat_minor": 2
  717. }