Compare commits
2 Commits
2cedfd0e9f
...
7bbcc601b7
Author | SHA1 | Date | |
---|---|---|---|
7bbcc601b7 | |||
36f3b3ede8 |
139
notebooks/01_intro_ex_1a_sol.ipynb
Normal file
139
notebooks/01_intro_ex_1a_sol.ipynb
Normal file
@ -0,0 +1,139 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Exercise 1: Create numpy array and draw rgb color objects"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import numpy as np\n",
|
||||||
|
"import matplotlib.pyplot as plt"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"create data array 2x2 as pixel position and 1x3 as rgb color data"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"width, height = 200, 200\n",
|
||||||
|
"data = np.zeros((height, width, 3), dtype=np.uint8)\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"draw blue cross"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"x = np.arange(width)\n",
|
||||||
|
"x_1 = np.arange(width)\n",
|
||||||
|
"x_2 = np.arange(width-1,-1,-1)\n",
|
||||||
|
"y = np.arange(height)\n",
|
||||||
|
"data[x_1,y] = [0,0,255]\n",
|
||||||
|
"data[x_2,y] = [0,0,255]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
" draw a square "
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"lower = 55\n",
|
||||||
|
"upper = 75\n",
|
||||||
|
"data[lower:upper,lower:upper] = [0,255,0]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"create a mask of a circle using indexing\n",
|
||||||
|
"np.newaxis adds another dimension\n",
|
||||||
|
"we create a row and column vector and fill it using the condition"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"x_center = 100\n",
|
||||||
|
"y_center = 100\n",
|
||||||
|
"radius = 10\n",
|
||||||
|
"mask = (x[np.newaxis,:]-x_center)**2 + (y[:,np.newaxis]-y_center)**2 < radius**2\n",
|
||||||
|
"data[mask] = [255,0,0]\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# plot image\n",
|
||||||
|
"plt.figure(figsize=(4.,4.),dpi=100,facecolor='lightgrey')\n",
|
||||||
|
"plt.imshow(data)\n",
|
||||||
|
"plt.show()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": []
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3 (ipykernel)",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.8.16"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 4
|
||||||
|
}
|
133
notebooks/01_intro_ex_1b_sol.ipynb
Normal file
133
notebooks/01_intro_ex_1b_sol.ipynb
Normal file
@ -0,0 +1,133 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Exercise 1b: Read a binary file which contains pixel data and apply\n",
|
||||||
|
"transformations"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import numpy as np\n",
|
||||||
|
"import matplotlib.pyplot as plt"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# load figure as 2D array \n",
|
||||||
|
"data = np.load('horse.npy')\n",
|
||||||
|
"print(data.shape)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# just scale the data by a factor and shift by trans\n",
|
||||||
|
"trans = np.ones(data.shape)\n",
|
||||||
|
"trans[0,:] *=0.6\n",
|
||||||
|
"trans[1,:] *=0.4\n",
|
||||||
|
"factor = 0.5 \n",
|
||||||
|
"data_scale = data * factor + trans"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"#compression in x and y \n",
|
||||||
|
"sx = 0.4\n",
|
||||||
|
"sy = 0.9\n",
|
||||||
|
"t = np.array([[sx,0],[0,sy]])\n",
|
||||||
|
"data_comp = t@data"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"#rotation by an angle theta\n",
|
||||||
|
"theta = 0.5\n",
|
||||||
|
"data_rot = np.array([[np.cos(theta),-np.sin(theta)],[np.sin(theta), np.cos(theta)]])@data"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"#spiegelung an der x Achse\n",
|
||||||
|
"tx = np.array([[1,0],[0,-1]]) # mirror x axis\n",
|
||||||
|
"ty = np.array([[-1,0],[0,1]]) # mirror y axis\n",
|
||||||
|
"tp = np.array([[-1,0],[0,-1]]) # mirror (0,0)\n",
|
||||||
|
"data_mirror = tp@data"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# create figure for the transformations\n",
|
||||||
|
"plt.figure(figsize=(10.0,10.0),dpi=100,facecolor='lightgrey')\n",
|
||||||
|
"plt.suptitle('Plot Transformations')\n",
|
||||||
|
"plt.subplot(2,2,1)\n",
|
||||||
|
"plt.title('original picture')\n",
|
||||||
|
"plt.plot(data[0,:],data[1,:],'.')\n",
|
||||||
|
"plt.axis([-1.2,1.2,-1.2,1.2])\n",
|
||||||
|
"plt.subplot(2,2,2)\n",
|
||||||
|
"plt.title('scaling and translation')\n",
|
||||||
|
"plt.plot(data_scale[0,:],data_scale[1,:],'.')\n",
|
||||||
|
"plt.axis([-1.2,1.2,-1.2,1.2])\n",
|
||||||
|
"plt.subplot(2,2,3)\n",
|
||||||
|
"plt.title('compression')\n",
|
||||||
|
"plt.plot(data_comp[0,:],data_comp[1,:],'.')\n",
|
||||||
|
"plt.axis([-1.2,1.2,-1.2,1.2])\n",
|
||||||
|
"plt.subplot(2,2,4)\n",
|
||||||
|
"plt.title('rotation and mirror at p(0,0)')\n",
|
||||||
|
"plt.plot(data_rot[0,:],data_rot[1,:],'.')\n",
|
||||||
|
"plt.plot(data_mirror[0,:],data_mirror[1,:],'.')\n",
|
||||||
|
"plt.axis([-1.2,1.2,-1.2,1.2])"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3 (ipykernel)",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.8.16"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 4
|
||||||
|
}
|
559
notebooks/01_intro_ex_2_sol.ipynb
Normal file
559
notebooks/01_intro_ex_2_sol.ipynb
Normal file
File diff suppressed because one or more lines are too long
314
notebooks/02_fit_fitGraph.ipynb
Normal file
314
notebooks/02_fit_fitGraph.ipynb
Normal file
File diff suppressed because one or more lines are too long
291
notebooks/02_fit_iminuitFit.ipynb
Normal file
291
notebooks/02_fit_iminuitFit.ipynb
Normal file
@ -0,0 +1,291 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Fit with the python interface to Minuit 2 called iminuit\n",
|
||||||
|
"https://iminuit.readthedocs.io/en/stable/"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from matplotlib import pyplot as plt\n",
|
||||||
|
"plt.rcParams[\"font.size\"] = 20\n",
|
||||||
|
"import numpy as np"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Data "
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"x = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10], dtype='d')\n",
|
||||||
|
"dx = np.array([0.1,0.1,0.5,0.1,0.5,0.1,0.5,0.1,0.5,0.1], dtype='d')\n",
|
||||||
|
"y = np.array([1.1 ,2.3 ,2.7 ,3.2 ,3.1 ,2.4 ,1.7 ,1.5 ,1.5 ,1.7 ], dtype='d')\n",
|
||||||
|
"dy = np.array([0.15,0.22,0.29,0.39,0.31,0.21,0.13,0.15,0.19,0.13], dtype='d')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Define fit function"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"def pol3(a0, a1, a2, a3):\n",
|
||||||
|
" return a0 + x*a1 + a2*x**2 + a3*x**3"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"least-squares function: sum of data residuals squared"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"def LSQ(a0, a1, a2, a3):\n",
|
||||||
|
" return np.sum((y - pol3(a0, a1, a2, a3)) ** 2 / dy ** 2)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"import Minuit object"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from iminuit import Minuit"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Minuit instance using LSQ function to minimize"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"LSQ.errordef = Minuit.LEAST_SQUARES\n",
|
||||||
|
"#LSQ.errordef = Minuit.LIKELIHOOD\n",
|
||||||
|
"m = Minuit(LSQ,a0=-1.3, a1=2.6 ,a2=-0.24 ,a3=0.005)\n",
|
||||||
|
"m.fixed[\"a3\"] = True \n",
|
||||||
|
"m.params"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"run migrad"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"m.fixed[\"a3\"] = False\n",
|
||||||
|
"m.params\n",
|
||||||
|
"m.migrad()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Get contour"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"m.draw_mncontour(\"a2\", \"a3\", cl=[1, 2, 3])"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Improve the fit"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"m.hesse()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"m.minos()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"access fit results"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"print(m.values,m.errors)\n",
|
||||||
|
"a0_fit = m.values[\"a0\"]\n",
|
||||||
|
"a1_fit = m.values[\"a1\"]\n",
|
||||||
|
"a2_fit = m.values[\"a2\"]\n",
|
||||||
|
"a3_fit = m.values[\"a3\"]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"print (m.covariance)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"prepare data to display fitted function "
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"x_plot = np.linspace( 0.5, 10.5 , 500 )\n",
|
||||||
|
"y_fit = a0_fit + a1_fit * x_plot + a2_fit * x_plot**2 + a3_fit * x_plot**3"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"The Minos algorithm uses the profile likelihood method to compute (generally asymmetric) confidence intervals. This can be plotted"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"m.draw_profile(\"a2\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Get a 2D contour of the function around the minimum for 2 parameters"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"m.draw_mncontour(\"a2\", \"a3\" , cl=[1, 2, 3])"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"lotlib"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"plt.figure()\n",
|
||||||
|
"plt.errorbar(x, y, dy , dx, fmt=\"o\")\n",
|
||||||
|
"plt.plot(x_plot, y_fit)\n",
|
||||||
|
"plt.title(\"iminuit Fit Test\")\n",
|
||||||
|
"plt.xlim(-0.1, 10.1)\n",
|
||||||
|
"plt.show()"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3 (ipykernel)",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.8.16"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 4
|
||||||
|
}
|
338
notebooks/02_fit_numpyFit.ipynb
Normal file
338
notebooks/02_fit_numpyFit.ipynb
Normal file
File diff suppressed because one or more lines are too long
122
notebooks/03_ml_basics_display_Clothing.ipynb
Normal file
122
notebooks/03_ml_basics_display_Clothing.ipynb
Normal file
@ -0,0 +1,122 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "8f9f0e7b",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Display fashion_mnist dataset of clothes from Zalando"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "cc829d9a",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import tensorflow as tf\n",
|
||||||
|
"from tensorflow import keras\n",
|
||||||
|
"import matplotlib.pyplot as plt\n",
|
||||||
|
"import numpy as np"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "63348efe",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Load the MNIST Fashion dataset\n",
|
||||||
|
"(x_train, y_train), (x_test, y_test) = keras.datasets.fashion_mnist.load_data()\n",
|
||||||
|
"# Set the class names\n",
|
||||||
|
"class_names = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat', \n",
|
||||||
|
" 'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "a6c86027",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# print the shape of the numpy arrays\n",
|
||||||
|
"print ('Print shape of pixel data')\n",
|
||||||
|
"print(x_train.shape)\n",
|
||||||
|
"print ('Print shape of labels')\n",
|
||||||
|
"print(y_train.shape)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "cc58b142",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Normalize pixel values to between 0 and 1\n",
|
||||||
|
"x_train = x_train.astype(\"float32\") / 255.0\n",
|
||||||
|
"x_test = x_test.astype(\"float32\") / 255.0"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "c7976111",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# choose an image num to print\n",
|
||||||
|
"num = 20\n",
|
||||||
|
"image = x_train[num]\n",
|
||||||
|
"label = y_train[num]\n",
|
||||||
|
"\n",
|
||||||
|
"print ('Print normailzed pixel data of image ',num, ' :')\n",
|
||||||
|
"print(x_train[num])\n",
|
||||||
|
"print ('Print label of image ',num , ' :' )\n",
|
||||||
|
"print(y_train[num])\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "64a46625",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"plt.figure(figsize=(10,10))\n",
|
||||||
|
"for i in range(25):\n",
|
||||||
|
" plt.subplot(5,5,i+1)\n",
|
||||||
|
" plt.xticks([])\n",
|
||||||
|
" plt.yticks([])\n",
|
||||||
|
" plt.grid(False)\n",
|
||||||
|
" plt.imshow(x_train[i], cmap=plt.cm.binary)\n",
|
||||||
|
" plt.xlabel(class_names[y_train[i]])\n",
|
||||||
|
"plt.show()"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3 (ipykernel)",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.8.16"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 5
|
||||||
|
}
|
134
notebooks/03_ml_basics_display_HandWrt.ipynb
Normal file
134
notebooks/03_ml_basics_display_HandWrt.ipynb
Normal file
@ -0,0 +1,134 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "3644475e",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Display hand writing dataset"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "8125479b",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import tensorflow as tf\n",
|
||||||
|
"import numpy as np\n",
|
||||||
|
"import matplotlib.pyplot as plt"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "d45b964f",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Load training dataset of 60000 images with greyscale values in 28 x 28\n",
|
||||||
|
"# and labels \n",
|
||||||
|
"(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "fa8ae2a6",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# print the shape of the numpy arrays\n",
|
||||||
|
"print ('Print shape of pixel data')\n",
|
||||||
|
"print(x_train.shape)\n",
|
||||||
|
"print ('Print shape of labels')\n",
|
||||||
|
"print(y_train.shape)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "be70973e",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# normalize pixel to 0-1\n",
|
||||||
|
"x_train = x_train / 255\n",
|
||||||
|
"x_test = x_test / 255"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "55f457d5",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# choose an image num to display and print\n",
|
||||||
|
"num = 20\n",
|
||||||
|
"\n",
|
||||||
|
"image = x_train[num]\n",
|
||||||
|
"label = y_train[num]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "149788b7",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# plot the image using imshow\n",
|
||||||
|
"plt.imshow(image, cmap='gray')\n",
|
||||||
|
"# set the title\n",
|
||||||
|
"plt.title(\"Label: %d\" % label )\n",
|
||||||
|
"# remove the axis labels and ticks\n",
|
||||||
|
"plt.axis('off')\n",
|
||||||
|
"# show the plot\n",
|
||||||
|
"plt.show()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "232ef6ca",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Plot 16 examples from the numpy array which was read in above\n",
|
||||||
|
"# and display it\n",
|
||||||
|
"fig, axes = plt.subplots(4, 4, figsize=(10, 10))\n",
|
||||||
|
"for i , ax in enumerate(axes.ravel()):\n",
|
||||||
|
" ax.imshow(x_train[num+i], cmap='gray')\n",
|
||||||
|
" ax.set_title(\"Label: %d\" % y_train[num+i])\n",
|
||||||
|
" ax.axis('off')\n",
|
||||||
|
"plt.suptitle(\"Examples of training set images\")\n",
|
||||||
|
"plt.show()"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3 (ipykernel)",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.8.16"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 5
|
||||||
|
}
|
197
notebooks/03_ml_basics_display_HorseOrHuman.ipynb
Normal file
197
notebooks/03_ml_basics_display_HorseOrHuman.ipynb
Normal file
@ -0,0 +1,197 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "2eaba66b",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Read and Display Horse or Human machine learning dataset"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "f1e48ac0",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import tensorflow as tf\n",
|
||||||
|
"import numpy as np\n",
|
||||||
|
"import tensorflow_datasets as tfds\n",
|
||||||
|
"from tensorflow.keras import regularizers\n",
|
||||||
|
"import matplotlib.pyplot as plt"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "feda024e",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Load the horse or human dataset\n",
|
||||||
|
"#(300, 300, 3) unint8\n",
|
||||||
|
"dataset, label = tfds.load('horses_or_humans', with_info=True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "35991dec",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Extract the horse/human class\n",
|
||||||
|
"horse_ds = dataset['train'].filter(lambda x: x['label'] == 0)\n",
|
||||||
|
"human_ds = dataset['train'].filter(lambda x: x['label'] == 1)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "fab03aa8",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Take a few examples < 16\n",
|
||||||
|
"n_examples = 5\n",
|
||||||
|
"horse_examples = horse_ds.take(n_examples)\n",
|
||||||
|
"human_examples = human_ds.take(n_examples)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "c33f1acd",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Display the examples\n",
|
||||||
|
"fig, axes = plt.subplots(1, n_examples, figsize=(15, 15))\n",
|
||||||
|
"for i, example in enumerate(human_examples):\n",
|
||||||
|
" image = example['image']\n",
|
||||||
|
" axes[i].imshow(image)\n",
|
||||||
|
" axes[i].set_title(f\"humans {i+1}\")\n",
|
||||||
|
"plt.show()\n",
|
||||||
|
"\n",
|
||||||
|
"fig, axes = plt.subplots(1, n_examples, figsize=(15, 15))\n",
|
||||||
|
"for i, example in enumerate(horse_examples):\n",
|
||||||
|
" image = example['image']\n",
|
||||||
|
" axes[i].imshow(image)\n",
|
||||||
|
" axes[i].set_title(f\"horses {i+1}\")\n",
|
||||||
|
"plt.show()\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "25f3eeb3",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Split the dataset into training and validation sets\n",
|
||||||
|
"# as_supervised: Specifies whether to return the dataset as a tuple\n",
|
||||||
|
"# of (input, label) pairs.\n",
|
||||||
|
"train_dataset, valid_dataset = tfds.load('horses_or_humans', split=['train','test'], as_supervised=True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "29dc0e62",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Get the number of elements in the training and validation dataset\n",
|
||||||
|
"train_size = tf.data.experimental.cardinality(train_dataset).numpy()\n",
|
||||||
|
"valid_size = tf.data.experimental.cardinality(valid_dataset).numpy()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "db8aaf91",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"IMG_SIZE = 300\n",
|
||||||
|
"NUM_CLASSES = 2\n",
|
||||||
|
"\n",
|
||||||
|
"def preprocess(image, label):\n",
|
||||||
|
" image = tf.cast(image, tf.float32)\n",
|
||||||
|
"# # Resize the images to a fixed size\n",
|
||||||
|
" image = tf.image.resize(image, (IMG_SIZE, IMG_SIZE))\n",
|
||||||
|
"# # Rescale the pixel values to be between 0 and 1\n",
|
||||||
|
" image = image / 255.0\n",
|
||||||
|
" label = tf.one_hot(label, NUM_CLASSES)\n",
|
||||||
|
" return image, label"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "d59661c3",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Apply the preprocessing function to the datasets\n",
|
||||||
|
"train_dataset = train_dataset.map(preprocess)\n",
|
||||||
|
"valid_dataset = valid_dataset.map(preprocess)\n",
|
||||||
|
"\n",
|
||||||
|
"# Batch and shuffle the datasets\n",
|
||||||
|
"train_dataset = train_dataset.shuffle(2000).batch(80)\n",
|
||||||
|
"valid_dataset = valid_dataset.batch(20)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "9399bc99",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Get the number of elements in the trainingand validation dataset\n",
|
||||||
|
"train_size = tf.data.experimental.cardinality(train_dataset).numpy()\n",
|
||||||
|
"valid_size = tf.data.experimental.cardinality(valid_dataset).numpy()\n",
|
||||||
|
"print(\"Training dataset size:\", train_size)\n",
|
||||||
|
"print(\"Validation dataset size:\", valid_size)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "13af7d53",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Store images and labels of the validation data for predictions\n",
|
||||||
|
"for images, labels in valid_dataset:\n",
|
||||||
|
" x_val = images\n",
|
||||||
|
" y_val = labels\n",
|
||||||
|
" \n",
|
||||||
|
"print(x_val.shape, y_val.shape)"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3 (ipykernel)",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.8.16"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 5
|
||||||
|
}
|
236
notebooks/03_ml_basics_ex_4_mlp_clothing.ipynb
Normal file
236
notebooks/03_ml_basics_ex_4_mlp_clothing.ipynb
Normal file
@ -0,0 +1,236 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "6c180d4b",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Exercise 3\n",
|
||||||
|
"# fashion mnist data\n",
|
||||||
|
"# MLP model with two hidden layers, each with a ReLU activation function.\n",
|
||||||
|
"# Input data is flattened to a 1D array and passed to the model."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "b0e31b9c",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import tensorflow as tf\n",
|
||||||
|
"from tensorflow import keras\n",
|
||||||
|
"import matplotlib.pyplot as plt\n",
|
||||||
|
"import numpy as np"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "1ae1412e",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Load the MNIST Fashion dataset\n",
|
||||||
|
"(x_train, y_train), (x_test, y_test) = keras.datasets.fashion_mnist.load_data()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "f8814914",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Normalize pixel values to between 0 and 1\n",
|
||||||
|
"x_train = x_train.astype(\"float32\") / 255.0\n",
|
||||||
|
"x_test = x_test.astype(\"float32\") / 255.0"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "2810da39",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# MNIST dataset images have a shape of (28, 28). The images are flattened\n",
|
||||||
|
"# into a 1D array of length 784 \n",
|
||||||
|
"x_train = x_train.reshape(-1, 784)\n",
|
||||||
|
"x_test = x_test.reshape(-1, 784)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "96f7ff8a",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# The model is defined here with three dense (fully connected) layers\n",
|
||||||
|
"# The first layer is a Dense layer with 128 units and a ReLU activation\n",
|
||||||
|
"# function with an input shape of (784,). This layer serves as the input\n",
|
||||||
|
"# layer of the model.\n",
|
||||||
|
"# The second layer is also a Dense layer with 64 units and a ReLU activation\n",
|
||||||
|
"# function. This layer takes the output of the previous layer as input, and\n",
|
||||||
|
"# applies a non-linear transformation to it to produce a new set of features\n",
|
||||||
|
"# that the next layer can use.\n",
|
||||||
|
"# The third is another Dense layer, one for each class in the output. The\n",
|
||||||
|
"# output is raw scores or logits for each class since there is no activation\n",
|
||||||
|
"# function . This layer is responsible for producing the final output of the\n",
|
||||||
|
"# model, which can then be used to make predictions.\n",
|
||||||
|
"# With Dropout(0.2) 20 % of the input is randomly droped, this should reduce overfitting\n",
|
||||||
|
"model = keras.Sequential([\n",
|
||||||
|
" keras.layers.Dense(128, activation='relu', input_shape=(784,)),\n",
|
||||||
|
" # keras.layers.Dropout(0.2),\n",
|
||||||
|
" keras.layers.Dense(64, activation='relu'),\n",
|
||||||
|
" keras.layers.Dense(10)\n",
|
||||||
|
"])\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "a3fe609c",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Compile the model\n",
|
||||||
|
"# adam = specifies the optimizer to use during training\n",
|
||||||
|
"# loss function to use during training, SparseCategoricalCrossentropy loss\n",
|
||||||
|
"# is commonly used for multi-class classification problems.\n",
|
||||||
|
"# from_logits=True indicates that the model's output is a raw score\n",
|
||||||
|
"# for each class and not a probability distribution.\n",
|
||||||
|
"model.compile(optimizer='adam',\n",
|
||||||
|
" loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),\n",
|
||||||
|
" metrics=['accuracy'])"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "cf6c978d",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Train the model\n",
|
||||||
|
"history = model.fit(x_train, y_train, epochs=10, validation_split=0.2)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "97fc2313",
|
||||||
|
"metadata": {
|
||||||
|
"scrolled": true
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Evaluate the model on the test set\n",
|
||||||
|
"test_loss, test_acc = model.evaluate(x_test, y_test, verbose=2)\n",
|
||||||
|
"print(\"Test accuracy:\", test_acc)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "ef5f19d0",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Plot the training and validation accuracy and loss over time\n",
|
||||||
|
"plt.figure(figsize=(10, 4))\n",
|
||||||
|
"plt.subplot(1, 2, 1)\n",
|
||||||
|
"plt.plot(history.history[\"accuracy\"])\n",
|
||||||
|
"plt.plot(history.history[\"val_accuracy\"])\n",
|
||||||
|
"plt.title(\"Model accuracy\")\n",
|
||||||
|
"plt.ylabel(\"Accuracy\")\n",
|
||||||
|
"plt.xlabel(\"Epoch\")\n",
|
||||||
|
"plt.legend([\"Train\", \"Validation\"], loc=\"lower right\")\n",
|
||||||
|
"\n",
|
||||||
|
"plt.subplot(1, 2, 2)\n",
|
||||||
|
"plt.plot(history.history[\"loss\"])\n",
|
||||||
|
"plt.plot(history.history[\"val_loss\"])\n",
|
||||||
|
"plt.title(\"Model loss\")\n",
|
||||||
|
"plt.ylabel(\"Loss\")\n",
|
||||||
|
"plt.xlabel(\"Epoch\")\n",
|
||||||
|
"plt.legend([\"Train\", \"Validation\"], loc=\"upper right\")\n",
|
||||||
|
"\n",
|
||||||
|
"plt.show()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "c0ebddc4",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Plot a confusion matrix of the test set predictions\n",
|
||||||
|
"test_preds = np.argmax(model.predict(x_test), axis=1)\n",
|
||||||
|
"conf_mat = tf.math.confusion_matrix(y_test, test_preds)\n",
|
||||||
|
"plt.imshow(conf_mat, cmap=\"Blues\")\n",
|
||||||
|
"plt.xlabel(\"Predicted labels\")\n",
|
||||||
|
"plt.ylabel(\"True labels\")\n",
|
||||||
|
"plt.xticks(np.arange(10))\n",
|
||||||
|
"plt.yticks(np.arange(10))\n",
|
||||||
|
"plt.colorbar()\n",
|
||||||
|
"plt.show()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "9175d533",
|
||||||
|
"metadata": {
|
||||||
|
"scrolled": true
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Make predictions on the test set\n",
|
||||||
|
"y_pred = model.predict(x_test)\n",
|
||||||
|
"y_pred = np.argmax(y_pred, axis=1)\n",
|
||||||
|
"\n",
|
||||||
|
"# Plot some examples from the test set and their predictions\n",
|
||||||
|
"fig, axes = plt.subplots(4, 4, figsize=(18, 18))\n",
|
||||||
|
"for i, ax in enumerate(axes.ravel()):\n",
|
||||||
|
" ax.matshow(x_test[i].reshape(28, 28), cmap='gray')\n",
|
||||||
|
" ax.set_title(\"True: %d\\nPredict: %d\" % (y_test[i], y_pred[i]))\n",
|
||||||
|
" ax.axis(\"off\")\n",
|
||||||
|
"\n",
|
||||||
|
"plt.show()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "4a6e85be",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": []
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3 (ipykernel)",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.8.16"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 5
|
||||||
|
}
|
166
notebooks/03_ml_basics_minimizer.ipynb
Normal file
166
notebooks/03_ml_basics_minimizer.ipynb
Normal file
@ -0,0 +1,166 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "042acd49",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Test a minimizer"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "cb51a492",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import tensorflow as tf\n",
|
||||||
|
"import numpy as np\n",
|
||||||
|
"import matplotlib.pyplot as plt\n",
|
||||||
|
"from scipy.optimize import minimize\n",
|
||||||
|
"plt.style.use(\"ggplot\")\n",
|
||||||
|
"from matplotlib import colors, cm"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "2ac3651a",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"plt.rcParams controls the appearance of your plots globally,\n",
|
||||||
|
"affecting all subsequent plots created in your session."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "97ef9933",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"plt.rcParams[\"axes.grid\"] = False\n",
|
||||||
|
"plt.rcParams.update({'font.size': 20})\n",
|
||||||
|
"plt.rcParams.update({'figure.figsize': (12,9)})\n",
|
||||||
|
"plt.rcParams['lines.markersize'] = 8"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "f15200f9",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Generate data points with gaussian smearing\n",
|
||||||
|
"data = np.random.uniform(size=100)\n",
|
||||||
|
"labels = 5.*data*data*data + 1 + np.random.normal(loc=0.0, scale=0.1, size=100)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "7237f5ed",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# show plot\n",
|
||||||
|
"plt.scatter(data, labels, label=\"data\")\n",
|
||||||
|
"plt.legend()\n",
|
||||||
|
"plt.show()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "0d6e104c",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# define chi2 like cost function\n",
|
||||||
|
"def cost(params):\n",
|
||||||
|
" W, b = params\n",
|
||||||
|
" return np.mean((labels - (W*data*data*data + b))**2)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "8e00e16a",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"call minimizer\n",
|
||||||
|
"provides a collection of optimization algorithms for finding the minimum or maximum of a given function. "
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "433975c3",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"res = minimize(cost, [1., 1.])\n",
|
||||||
|
"# returns an OptimizeResult object\n",
|
||||||
|
"# x :the solution (minimum) of the optimization problem, represented as an\n",
|
||||||
|
"# array.\n",
|
||||||
|
"# Results of the minimization\n",
|
||||||
|
"W, b = res.x\n",
|
||||||
|
"print ('function value at the minimum and fitted parameters',res.fun,' ',W,' ',b)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "1e1f4e81",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"points = np.linspace(0, 1, 100)\n",
|
||||||
|
"prediction = W*points*points*points + b"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "d8de971e",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# plot fit model\n",
|
||||||
|
"plt.scatter(data, labels, label=\"data\")\n",
|
||||||
|
"plt.plot(points, prediction, label=\"model\", color=\"green\")\n",
|
||||||
|
"plt.legend()\n",
|
||||||
|
"plt.show()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "4a7d62c2",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": []
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3 (ipykernel)",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.8.16"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 5
|
||||||
|
}
|
118
notebooks/03_ml_basics_tf_broadcasting.ipynb
Normal file
118
notebooks/03_ml_basics_tf_broadcasting.ipynb
Normal file
@ -0,0 +1,118 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "df1f5eb3",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# demonstration of broadcasting in tensorflow"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "1d61c70a",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import tensorflow as tf"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "38bca1cf",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Define two tensors with different shapes\n",
|
||||||
|
"a = tf.constant([[1, 2, 3], [4, 5, 6]])\n",
|
||||||
|
"b = tf.constant([10, 20, 30])"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "c3f382e3",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Perform element-wise multiplication using broadcasting\n",
|
||||||
|
"c = a * b\n",
|
||||||
|
"# Print the result\n",
|
||||||
|
"print(c)\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "95683fe5",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Broadcasting scalar to tensor\n",
|
||||||
|
"x = tf.constant([1, 2, 3])\n",
|
||||||
|
"y = 2\n",
|
||||||
|
"z = x + y # equivalent to tf.add(x, y)\n",
|
||||||
|
"print(z.numpy()) # [3 4 5]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "8ed98565",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Broadcasting vector to matrix\n",
|
||||||
|
"x = tf.constant([[1, 2], [3, 4]])\n",
|
||||||
|
"y = tf.constant([1, 2])\n",
|
||||||
|
"z = x + y # equivalent to tf.add(x, y)\n",
|
||||||
|
"print(z.numpy()) # [[2 4], [4 6]]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "41f4196f",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Broadcasting matrix to tensor\n",
|
||||||
|
"x = tf.constant([[[1, 2], [3, 4]], [[5, 6], [7, 8]]])\n",
|
||||||
|
"y = tf.constant([[1], [2]])\n",
|
||||||
|
"z = x + y # equivalent to tf.add(x, y)\n",
|
||||||
|
"print(z.numpy()) # [[[2 3], [4 5]], [[7 8], [9 10]]]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "76a5108d",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": []
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3 (ipykernel)",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.8.16"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 5
|
||||||
|
}
|
102
notebooks/03_ml_basics_tf_differentiate.ipynb
Normal file
102
notebooks/03_ml_basics_tf_differentiate.ipynb
Normal file
@ -0,0 +1,102 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "eefe7571",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# show differentiation in Tensorflow"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "a9d7c185",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import tensorflow as tf"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "584384f1",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Define a function to differentiate\n",
|
||||||
|
"def f(x):\n",
|
||||||
|
" return x ** 2 + 2 * x + 1"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "70430402",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Create a TensorFlow variable\n",
|
||||||
|
"x = tf.Variable(2.0)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "45ea0a33",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Use tf.GradientTape to record the gradients\n",
|
||||||
|
"with tf.GradientTape() as tape:\n",
|
||||||
|
" y = f(x)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "f6b1ff27",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Calculate the gradient of y with respect to x\n",
|
||||||
|
"dy_dx = tape.gradient(y, x)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "4f581817",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Print the result\n",
|
||||||
|
"print(dy_dx)"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3 (ipykernel)",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.8.16"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 5
|
||||||
|
}
|
988
slides/01_intro_python.md
Normal file
988
slides/01_intro_python.md
Normal file
@ -0,0 +1,988 @@
|
|||||||
|
% Introduction to Data Analysis and Machine Learning in Physics: \ 1. Introduction to python
|
||||||
|
% Day 1: 11. April 2023
|
||||||
|
% \underline{Jörg Marks}, Klaus Reygers
|
||||||
|
|
||||||
|
## Outline of the $1^{st}$ day
|
||||||
|
|
||||||
|
* Technical instructions for your interactions with the CIP pool, like
|
||||||
|
* using the jupyter hub
|
||||||
|
* using python locally in your own linux environment (anaconda)
|
||||||
|
* access the CIP pool from your own windows or linux system
|
||||||
|
* transfer data from and to the CIP pool
|
||||||
|
|
||||||
|
Can be found in [\textcolor{violet}{CIPpoolAccess.PDF}](https://www.physi.uni-heidelberg.de/~marks/root_einfuehrung/Folien/CIPpoolAccess.PDF)\normalsize
|
||||||
|
|
||||||
|
* Summary of NumPy
|
||||||
|
|
||||||
|
* Plotting with matplotlib
|
||||||
|
|
||||||
|
* Input / output of data
|
||||||
|
|
||||||
|
* Summary of pandas
|
||||||
|
|
||||||
|
* Fitting with iminuit and PyROOT
|
||||||
|
|
||||||
|
|
||||||
|
## A glimpse into python classes
|
||||||
|
|
||||||
|
The following python classes are important to \textcolor{red}{data analysis and machine
|
||||||
|
learning} and will be useful during the course
|
||||||
|
|
||||||
|
* [\textcolor{violet}{NumPy}](https://numpy.org/doc/stable/user/basics.html) - python library adding support for large,
|
||||||
|
multi-dimensional arrays and matrices, along with high-level
|
||||||
|
mathematical functions to operate on these arrays
|
||||||
|
|
||||||
|
* [\textcolor{violet}{matplotlib}](https://matplotlib.org/stable/tutorials/index.html) - a python plotting library
|
||||||
|
|
||||||
|
* [\textcolor{violet}{SciPy}](https://docs.scipy.org/doc/scipy/reference/tutorial/index.html) - extension of NumPy by a collection of
|
||||||
|
mathematical algorithms for minimization, regression,
|
||||||
|
fourier transformation, linear algebra and image processing
|
||||||
|
|
||||||
|
* [\textcolor{violet}{iminuit}](https://iminuit.readthedocs.io/en/stable/) -
|
||||||
|
python wrapper to the data fitting toolkit
|
||||||
|
[\textcolor{violet}{Minuit2}](https://root.cern.ch/doc/master/Minuit2Page.html)
|
||||||
|
developed at CERN by F. James in the 1970ies
|
||||||
|
|
||||||
|
* [\textcolor{violet}{PyROOT}](https://root.cern/manual/python/) - python wrapper to the C++ data analysis toolkit
|
||||||
|
ROOT [\textcolor{violet}{(lecture WS 2021 / 22)}](https://www.physi.uni-heidelberg.de/~marks/root_einfuehrung/) used at the LHC
|
||||||
|
|
||||||
|
* [\textcolor{violet}{scikit-learn}](https://scikit-learn.org/stable/) - machine learning library written in
|
||||||
|
python, which makes use extensively of NumPy for high-performance
|
||||||
|
linear algebra algorithms
|
||||||
|
|
||||||
|
## NumPy
|
||||||
|
|
||||||
|
\textcolor{blue}{NumPy} (Numerical Python) is an open source python library,
|
||||||
|
which contains multidimensional array and matrix data structures and methods
|
||||||
|
to efficiently operate on these. The core object is
|
||||||
|
a homogeneous n-dimensional array object, \textcolor{blue}{ndarray}, which
|
||||||
|
allows for a wide variety of \textcolor{blue}{fast operations and mathematical calculations
|
||||||
|
with arrays and matrices} due to the extensive usage of compiled code.
|
||||||
|
|
||||||
|
* It is heavily used in numerous scientific python packages
|
||||||
|
|
||||||
|
* `ndarray` 's have a fixed size at creation $\rightarrow$ changing size
|
||||||
|
leads to recreation
|
||||||
|
|
||||||
|
* Array elements are all required to be of the same data type
|
||||||
|
|
||||||
|
* Facilitates advanced mathematical operations on large datasets
|
||||||
|
|
||||||
|
* See for a summary, e.g.
|
||||||
|
\small
|
||||||
|
[\textcolor{violet}{https://cs231n.github.io/python-numpy-tutorial/\#numpy}](https://cs231n.github.io/python-numpy-tutorial/#numpy) \normalsize
|
||||||
|
|
||||||
|
\vfill
|
||||||
|
|
||||||
|
::: columns
|
||||||
|
:::: {.column width=30%}
|
||||||
|
|
||||||
|
::::
|
||||||
|
:::
|
||||||
|
|
||||||
|
::: columns
|
||||||
|
:::: {.column width=35%}
|
||||||
|
|
||||||
|
`c = []`
|
||||||
|
|
||||||
|
`for i in range(len(a)):`
|
||||||
|
|
||||||
|
`c.append(a[i]*b[i])`
|
||||||
|
|
||||||
|
::::
|
||||||
|
|
||||||
|
:::: {.column width=35%}
|
||||||
|
|
||||||
|
with NumPy
|
||||||
|
|
||||||
|
`c = a * b`
|
||||||
|
|
||||||
|
::::
|
||||||
|
:::
|
||||||
|
|
||||||
|
<!---
|
||||||
|
It seem we need to indent by hand.
|
||||||
|
I don't manage to align under the bullet text
|
||||||
|
If we do it with column the vertical space is with code sections not good
|
||||||
|
If we do it without code section the vertical space is ok, but there is no
|
||||||
|
code high lightning.
|
||||||
|
See the different versions of the same page in the following
|
||||||
|
-->
|
||||||
|
|
||||||
|
## NumPy - array basics (1)
|
||||||
|
|
||||||
|
* numpy arrays build a grid of \textcolor{blue}{same type} values, which are indexed.
|
||||||
|
The *rank* is the dimension of the array.
|
||||||
|
There are methods to create and preset arrays.
|
||||||
|
|
||||||
|
\footnotesize
|
||||||
|
|
||||||
|
```python
|
||||||
|
myA = np.array([12, 5 , 11]) # create rank 1 array (vector like)
|
||||||
|
type(myA) # <class ‘numpy.ndarray’>
|
||||||
|
myA.shape # (3,)
|
||||||
|
print(myA[2]) # 11 access 3. element
|
||||||
|
myA[0] = 12 # set 1. element to 12
|
||||||
|
myB = np.array([[1,5],[7,9]]) # create rank 2 array
|
||||||
|
myB.shape # (2,2)
|
||||||
|
print(myB[0,0],myB[0,1],myB[1,1]) # 1 5 9
|
||||||
|
myC = np.arange(6) # create rank 1 set to 0 - 5
|
||||||
|
myC.reshape(2,3) # change rank to (2,3)
|
||||||
|
|
||||||
|
zero = np.zeros((2,5)) # 2 rows, 5 columns, set to 0
|
||||||
|
one = np.ones((2,2)) # 2 rows, 2 columns, set to 1
|
||||||
|
five = np.full((2,2), 5) # 2 rows, 2 columns, set to 5
|
||||||
|
e = np.eye(2) # create 2x2 identity matrix
|
||||||
|
```
|
||||||
|
\normalsize
|
||||||
|
|
||||||
|
## NumPy - array basics (2)
|
||||||
|
|
||||||
|
* Similar to a coordinate system numpy arrays also have \textcolor{blue}{axes}. numpy operations
|
||||||
|
can be performed along these axes.
|
||||||
|
|
||||||
|
\footnotesize
|
||||||
|
::: columns
|
||||||
|
:::: {.column width=35%}
|
||||||
|
```python
|
||||||
|
# 2D arrays
|
||||||
|
five = np.full((2,3), 5) # 2 rows, 3 columns, set to 5
|
||||||
|
seven = np.full((2,3), 7) # 2 rows, 3 columns, set to 7
|
||||||
|
np.concatenate((five,seven), axis = 0) # results in a 3 x 4 array
|
||||||
|
np.concatenate((five,seven), axis = 1]) # results in a 6 x 2 array
|
||||||
|
# 1D array
|
||||||
|
one = np.array([1, 1 , 1]) # results in a 1 x 3 array, set to 1
|
||||||
|
four = np.array([4, 4 , 4]) # results in a 1 x 3 array, set to 4
|
||||||
|
np.concatenate((one,four), axis = 0) # concat. arrays horizontally!
|
||||||
|
```
|
||||||
|
::::
|
||||||
|
:::: {.column width=50%}
|
||||||
|
\vspace{3cm}
|
||||||
|
![](figures/numpy_axes.png)
|
||||||
|
::::
|
||||||
|
:::
|
||||||
|
\normalsize
|
||||||
|
|
||||||
|
|
||||||
|
## NumPy - array indexing (1)
|
||||||
|
|
||||||
|
* select slices of a numpy array
|
||||||
|
|
||||||
|
\footnotesize
|
||||||
|
```python
|
||||||
|
a = np.array([[1,2,3,4],
|
||||||
|
[5,6,7,8], # 3 rows 4 columns array
|
||||||
|
[9,10,11,12]])
|
||||||
|
b = a[:2, 1:3] # subarray of 2 rows and
|
||||||
|
array([[2, 3], # column 1 and 2
|
||||||
|
[6, 7]])
|
||||||
|
```
|
||||||
|
\normalsize
|
||||||
|
|
||||||
|
* a slice of an array points into the same data, *modifying* changes the original array!
|
||||||
|
|
||||||
|
\footnotesize
|
||||||
|
```python
|
||||||
|
b[0, 0] = 77 # b[0,0] and a[0,1] are 77
|
||||||
|
|
||||||
|
r1_row = a[1, :] # get 2nd row -> rank 1
|
||||||
|
r1_row.shape # (4,)
|
||||||
|
r2_row = a[1:2, :] # get 2nd row -> rank 2
|
||||||
|
r2_row.shape # (1,4)
|
||||||
|
a=np.array([[1,2],[3,4],[5,6]]) # set a , 3 rows 2 cols
|
||||||
|
d=a[[0, 1, 2], [0, 1, 1]] # d contains [1 4 6]
|
||||||
|
e=a[[1, 2], [1, 1]] # e contains [4 6]
|
||||||
|
np.array([a[0,0],a[1,1],a[2,0]]) # address elements explicitly
|
||||||
|
```
|
||||||
|
\normalsize
|
||||||
|
|
||||||
|
|
||||||
|
## NumPy - array indexing (2)
|
||||||
|
|
||||||
|
|
||||||
|
* integer array indexing by setting an array of indices $\rightarrow$ selecting/changing elements
|
||||||
|
|
||||||
|
\footnotesize
|
||||||
|
```python
|
||||||
|
a = np.array([[1,2,3,4],
|
||||||
|
[5,6,7,8], # 3 rows 4 columns array
|
||||||
|
[9,10,11,12]])
|
||||||
|
p_a = np.array([0,2,0]) # Create an array of indices
|
||||||
|
s = a[np.arange(3), p_a] # number the rows, p_a points to cols
|
||||||
|
print (s) # s contains [1 7 9]
|
||||||
|
a[np.arange(3),p_a] += 10 # add 10 to corresponding elements
|
||||||
|
x=np.array([[8,2],[7,4]]) # create 2x2 array
|
||||||
|
bool = (x > 5) # bool : array of boolians
|
||||||
|
# [[True False]
|
||||||
|
# [True False]]
|
||||||
|
print(x[x>5]) # select elements, prints [8 7]
|
||||||
|
```
|
||||||
|
\normalsize
|
||||||
|
|
||||||
|
* data type in numpy - create according to input numbers or set explicitly
|
||||||
|
|
||||||
|
\footnotesize
|
||||||
|
|
||||||
|
```python
|
||||||
|
x = np.array([1.1, 2.1]) # create float array
|
||||||
|
print(x.dtype) # print float64
|
||||||
|
y=np.array([1.1,2.9],dtype=np.int64) # create float array [1 2]
|
||||||
|
```
|
||||||
|
\normalsize
|
||||||
|
|
||||||
|
|
||||||
|
## NumPy - functions
|
||||||
|
|
||||||
|
* math functions operate elementwise either as operator overload or as methods
|
||||||
|
|
||||||
|
\footnotesize
|
||||||
|
```python
|
||||||
|
x=np.array([[1,2],[3,4]],dtype=np.float64) # define 2x2 float array
|
||||||
|
y=np.array([[3,1],[5,1]],dtype=np.float64) # define 2x2 float array
|
||||||
|
s = x + y # elementwise sum
|
||||||
|
s = np.add(x,y)
|
||||||
|
s = np.subtract(x,y)
|
||||||
|
s = np.multiply(x,y) # no matrix multiplication!
|
||||||
|
s = np.divide(x,y)
|
||||||
|
s = np.sqrt(x), np.exp(x), ...
|
||||||
|
x @ y , or np.dot(x, y) # matrix product
|
||||||
|
np.sum(x, axis=0) # sum of each column
|
||||||
|
np.sum(x, axis=1) # sum of each row
|
||||||
|
xT = x.T # transpose of x
|
||||||
|
x = np.linspace(0,2*pi,100) # get equal spaced points in x
|
||||||
|
|
||||||
|
r = np.random.default_rng(seed=42) # constructor random number class
|
||||||
|
b = r.random((2,3)) # random 2x3 matrix
|
||||||
|
```
|
||||||
|
\normalsize
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
##
|
||||||
|
|
||||||
|
* broadcasting in numpy
|
||||||
|
\vspace{0.4cm}
|
||||||
|
|
||||||
|
The term \textcolor{blue}{broadcasting} describes how numpy treats arrays
|
||||||
|
with different shapes during arithmetic operations
|
||||||
|
|
||||||
|
* add a scalar $b$ to a 1D array $a = [a_1,a_2,a_3]$ $\rightarrow$ expand $b$ to
|
||||||
|
$[b,b,b]$
|
||||||
|
\vspace{0.2cm}
|
||||||
|
|
||||||
|
* add a scalar $b$ to a 2D [2,3] array $a =[[a_{11},a_{12},a_{13}],[a_{21},a_{22},a_{23}]]$
|
||||||
|
$\rightarrow$ expand $b$ to $b =[[b,b,b],[b,b,b]]$ and add element wise
|
||||||
|
\vspace{0.2cm}
|
||||||
|
|
||||||
|
* add 1D array $b = [b_1,b_2,b_3]$ to a 2D [2,3] array $a=[[a_{11},a_{12},a_{13}],[a_{21},a_{22},a_{23}]]$ $\rightarrow$ 1D array is broadcast
|
||||||
|
across each row of the 2D array $b =[[b_1,b_2,b_3],[b_1,b_2,b_3]]$ and added element wise
|
||||||
|
\vspace{0.2cm}
|
||||||
|
|
||||||
|
Arithmetic operations can only be performed when the shape of each
|
||||||
|
dimension in the arrays are equal or one has the dimension size of 1. Look
|
||||||
|
[\textcolor{violet}{here}](https://numpy.org/doc/stable/user/basics.broadcasting.html) for more details
|
||||||
|
|
||||||
|
\footnotesize
|
||||||
|
```python
|
||||||
|
# Add a vector to each row of a matrix
|
||||||
|
x = np.array([[1,2,3], [4,5,6]]) # x has shape (2, 3)
|
||||||
|
v = np.array([1,2,3]) # v has shape (3,)
|
||||||
|
x + v # [[2 4 6]
|
||||||
|
# [5 7 9]]
|
||||||
|
```
|
||||||
|
\normalsize
|
||||||
|
|
||||||
|
## Plot data
|
||||||
|
|
||||||
|
A popular library to present data is the `pyplot` module of `matplotlib`.
|
||||||
|
|
||||||
|
* Drawing a function in one plot
|
||||||
|
|
||||||
|
\footnotesize
|
||||||
|
::: columns
|
||||||
|
:::: {.column width=35%}
|
||||||
|
```python
|
||||||
|
import numpy as np
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
# generate 100 points from 0 to 2 pi
|
||||||
|
x = np.linspace( 0, 10*np.pi, 100 )
|
||||||
|
f = np.sin(x)**2
|
||||||
|
# plot function
|
||||||
|
plt.plot(x,f,'blueviolet',label='sine')
|
||||||
|
plt.xlabel('x [radian]')
|
||||||
|
plt.ylabel('f(x)')
|
||||||
|
plt.title('Plot sin^2')
|
||||||
|
plt.legend(loc='upper right')
|
||||||
|
plt.axis([0,30,-0.1,1.2]) # limit the plot range
|
||||||
|
|
||||||
|
# show the plot
|
||||||
|
plt.show()
|
||||||
|
```
|
||||||
|
::::
|
||||||
|
:::: {.column width=40%}
|
||||||
|
![](figures/matplotlib_Figure_1.png)
|
||||||
|
::::
|
||||||
|
:::
|
||||||
|
|
||||||
|
\normalsize
|
||||||
|
|
||||||
|
##
|
||||||
|
* Drawing a scatter plot of data
|
||||||
|
|
||||||
|
\footnotesize
|
||||||
|
::: columns
|
||||||
|
:::: {.column width=35%}
|
||||||
|
```python
|
||||||
|
...
|
||||||
|
|
||||||
|
# create x,y data points
|
||||||
|
num = 75
|
||||||
|
x = range(num)
|
||||||
|
y = range(num) + np.random.randint(0,num/1.5,num)
|
||||||
|
z = - (range(num) + np.random.randint(0,num/3,num)) + num
|
||||||
|
# create colored scatter plot, sample 1
|
||||||
|
plt.scatter(x, y, color = 'green',
|
||||||
|
label='Sample 1')
|
||||||
|
# create colored scatter plot, sample 2
|
||||||
|
plt.scatter(x, z, color = 'orange',
|
||||||
|
label='Sample 2')
|
||||||
|
plt.title('scatter plot')
|
||||||
|
plt.xlabel('x')
|
||||||
|
plt.ylabel('y')
|
||||||
|
# description and plot
|
||||||
|
plt.legend()
|
||||||
|
plt.show()
|
||||||
|
```
|
||||||
|
::::
|
||||||
|
:::: {.column width=35%}
|
||||||
|
\vspace{3cm}
|
||||||
|
![](figures/matplotlib_Figure_6.png)
|
||||||
|
::::
|
||||||
|
:::
|
||||||
|
\normalsize
|
||||||
|
|
||||||
|
##
|
||||||
|
* Drawing a histogram of data
|
||||||
|
|
||||||
|
\footnotesize
|
||||||
|
::: columns
|
||||||
|
:::: {.column width=35%}
|
||||||
|
```python
|
||||||
|
...
|
||||||
|
|
||||||
|
# create normalized gaussian Distribution
|
||||||
|
g = np.random.normal(size=10000)
|
||||||
|
# histogram the data
|
||||||
|
plt.hist(g,bins=40)
|
||||||
|
# plot rotated histogram
|
||||||
|
plt.hist(g,bins=40,orientation='horizontal')
|
||||||
|
# normalize area to 1
|
||||||
|
plt.hist(g,bins=40,density=True)
|
||||||
|
# change color
|
||||||
|
plt.hist(g,bins=40,density=True,
|
||||||
|
edgecolor='lightgreen',color='orange')
|
||||||
|
plt.title('Gaussian Histogram')
|
||||||
|
plt.xlabel('bin')
|
||||||
|
plt.ylabel('entries')
|
||||||
|
# description and plot
|
||||||
|
plt.legend(['Normalized distribution'])
|
||||||
|
plt.show()
|
||||||
|
```
|
||||||
|
::::
|
||||||
|
:::: {.column width=35%}
|
||||||
|
\vspace{3.5cm}
|
||||||
|
![](figures/matplotlib_Figure_5.png)
|
||||||
|
::::
|
||||||
|
:::
|
||||||
|
\normalsize
|
||||||
|
|
||||||
|
##
|
||||||
|
* Drawing subplots in one canvas
|
||||||
|
|
||||||
|
\footnotesize
|
||||||
|
::: columns
|
||||||
|
:::: {.column width=35%}
|
||||||
|
```python
|
||||||
|
...
|
||||||
|
g = np.exp(-0.2*x)
|
||||||
|
# create figure
|
||||||
|
plt.figure(num=2,figsize=(10.0,7.5),dpi=150,facecolor='lightgrey')
|
||||||
|
plt.suptitle('1 x 2 Plot')
|
||||||
|
# create subplot and plot first one
|
||||||
|
plt.subplot(1,2,1)
|
||||||
|
# plot first one
|
||||||
|
plt.title('exp(x)')
|
||||||
|
plt.xlabel('x')
|
||||||
|
plt.ylabel('g(x)')
|
||||||
|
plt.plot(x,g,'blueviolet')
|
||||||
|
# create subplot and plot second one
|
||||||
|
plt.subplot(1,2,2)
|
||||||
|
plt.plot(x,f,'orange')
|
||||||
|
plt.plot(x,f*g,'red')
|
||||||
|
plt.legend(['sine^2','exp*sine'])
|
||||||
|
# show the plot
|
||||||
|
plt.show()
|
||||||
|
```
|
||||||
|
::::
|
||||||
|
:::: {.column width=40%}
|
||||||
|
\vspace{3cm}
|
||||||
|
![](figures/matplotlib_Figure_2.png)
|
||||||
|
::::
|
||||||
|
:::
|
||||||
|
\normalsize
|
||||||
|
|
||||||
|
## Image data
|
||||||
|
|
||||||
|
The `image` class of the `matplotlib` library can be used to load the image
|
||||||
|
to numpy arrays and to render the image.
|
||||||
|
|
||||||
|
* There are 3 common formats for the numpy array
|
||||||
|
|
||||||
|
* (M, N) scalar data used for greyscale images
|
||||||
|
|
||||||
|
* (M, N, 3) for RGB images (each pixel has an array with RGB color attached)
|
||||||
|
|
||||||
|
* (M, N, 4) for RGBA images (each pixel has an array with RGB color
|
||||||
|
and transparency attached)
|
||||||
|
|
||||||
|
|
||||||
|
The method `imread` loads the image into an `ndarray`, which can be
|
||||||
|
manipulated.
|
||||||
|
|
||||||
|
The method `imshow` renders the image data
|
||||||
|
|
||||||
|
\vspace {2cm}
|
||||||
|
|
||||||
|
##
|
||||||
|
* Drawing pixel data and images
|
||||||
|
|
||||||
|
\footnotesize
|
||||||
|
::: columns
|
||||||
|
:::: {.column width=50%}
|
||||||
|
|
||||||
|
```python
|
||||||
|
....
|
||||||
|
# create data array with pixel postion and RGB color code
|
||||||
|
width, height = 200, 200
|
||||||
|
data = np.zeros((height, width, 3), dtype=np.uint8)
|
||||||
|
# red patch in the center
|
||||||
|
data[75:125, 75:125] = [255, 0, 0]
|
||||||
|
x = np.random.randint(0,width-1,100)
|
||||||
|
y = np.random.randint(0,height-1,100)
|
||||||
|
data[x,y]= [0,255,0] # 100 random green pixel
|
||||||
|
plt.imshow(data)
|
||||||
|
plt.show()
|
||||||
|
....
|
||||||
|
import matplotlib.image as mpimg
|
||||||
|
#read image into numpy array
|
||||||
|
pic = mpimg.imread('picture.jpg')
|
||||||
|
mod_pic = pic[:,:,0] # grab slice 0 of the colors
|
||||||
|
plt.imshow(mod_pic) # use default color code also
|
||||||
|
plt.colorbar() # try cmap='hot'
|
||||||
|
plt.show()
|
||||||
|
```
|
||||||
|
::::
|
||||||
|
:::: {.column width=25%}
|
||||||
|
![](figures/matplotlib_Figure_3.png)
|
||||||
|
\vspace{1cm}
|
||||||
|
![](figures/matplotlib_Figure_4.png)
|
||||||
|
::::
|
||||||
|
:::
|
||||||
|
\normalsize
|
||||||
|
|
||||||
|
|
||||||
|
## Input / output
|
||||||
|
|
||||||
|
For the analysis of measured data efficient input \/ output plays an
|
||||||
|
important role. In numpy, `ndarrays` can be saved and read in from files.
|
||||||
|
`load()` and `save()` functions handle numpy binary files (.npy extension)
|
||||||
|
which contain data, shape, dtype and other information required to
|
||||||
|
reconstruct the `ndarray` of the disk file.
|
||||||
|
|
||||||
|
\footnotesize
|
||||||
|
```python
|
||||||
|
r = np.random.default_rng() # instanciate random number generator
|
||||||
|
a = r.random((4,3)) # random 4x3 array
|
||||||
|
np.save('myBinary.npy', a) # write array a to binary file myBinary.npy
|
||||||
|
b = np.arange(12)
|
||||||
|
np.savez('myComp.npz', a=a, b=b) # write a and b in compressed binary file
|
||||||
|
......
|
||||||
|
b = np.load('myBinary.npy') # read content of myBinary.npy into b
|
||||||
|
```
|
||||||
|
\normalsize
|
||||||
|
|
||||||
|
The storage and retrieval of array data in text file format is done
|
||||||
|
with `savetxt()` and `loadtxt()` methods. Parameter controlling delimiter,
|
||||||
|
line separators, file header and footer can be specified.
|
||||||
|
|
||||||
|
\footnotesize
|
||||||
|
```python
|
||||||
|
x = np.array([1,2,3,4,5,6,7]) # create ndarray
|
||||||
|
np.savetxt('myText.txt',x,fmt='%d', delimiter=',') # write array x to file myText.txt
|
||||||
|
# with comma separation
|
||||||
|
```
|
||||||
|
\normalsize
|
||||||
|
|
||||||
|
|
||||||
|
## Input / output
|
||||||
|
|
||||||
|
Import tabular data from table processing programs in office packages.
|
||||||
|
|
||||||
|
\vspace{0.4cm}
|
||||||
|
|
||||||
|
\footnotesize
|
||||||
|
::: columns
|
||||||
|
:::: {.column width=35%}
|
||||||
|
`Excel data` can be exported as text file (myData_01.csv) with a comma as
|
||||||
|
delimiter.
|
||||||
|
::::
|
||||||
|
:::: {.column width=35%}
|
||||||
|
![](figures/numpy_excel.png)
|
||||||
|
::::
|
||||||
|
:::
|
||||||
|
|
||||||
|
\footnotesize
|
||||||
|
```python
|
||||||
|
.....
|
||||||
|
# read content of all files myData_*.csv into data
|
||||||
|
data = np.loadtxt('myData_01.csv',dtype=int,delimiter=',')
|
||||||
|
|
||||||
|
print (data.shape) # (12, 9)
|
||||||
|
print (data) # [[1 1 1 1 0 0 0 0 0]
|
||||||
|
# [0 0 1 1 0 0 1 1 0]
|
||||||
|
# .....
|
||||||
|
# [0 0 0 0 1 1 1 1 1]]
|
||||||
|
|
||||||
|
```
|
||||||
|
\normalsize
|
||||||
|
|
||||||
|
## Input / output
|
||||||
|
|
||||||
|
Import tabular data from table processing programs in office packages.
|
||||||
|
|
||||||
|
\vspace{0.4cm}
|
||||||
|
|
||||||
|
\footnotesize
|
||||||
|
::: columns
|
||||||
|
:::: {.column width=35%}
|
||||||
|
`Excel data` can be exported as text file (myData_01.csv) with a comma as
|
||||||
|
delimiter. \newline
|
||||||
|
$\color{blue}{Often~many~files~are~available~(myData\_*.csv)}$
|
||||||
|
::::
|
||||||
|
:::: {.column width=35%}
|
||||||
|
![](figures/numpy_multi_excel.png)
|
||||||
|
::::
|
||||||
|
:::
|
||||||
|
|
||||||
|
\footnotesize
|
||||||
|
```python
|
||||||
|
.....
|
||||||
|
# find files and directories with names matching a pattern
|
||||||
|
import glob
|
||||||
|
# read content of all files myData_*.csv into data
|
||||||
|
file_list = sorted(glob.glob('myData_*.csv')) # generate a sorted file list
|
||||||
|
for filename in file_list:
|
||||||
|
data = np.loadtxt(fname=filename, dtype=int, delimiter=',')
|
||||||
|
print(data[:,3]) # print column 3 of each file
|
||||||
|
# [1 1 1 1 1 1 1 1 1 1 1 0]
|
||||||
|
# ......
|
||||||
|
# [0 1 0 1 0 1 0 1 0 1 0 1]
|
||||||
|
|
||||||
|
```
|
||||||
|
\normalsize
|
||||||
|
|
||||||
|
## Exercise 1
|
||||||
|
|
||||||
|
i) Display a numpy array as figure of a blue cross. The size should be 200
|
||||||
|
by 200 pixel. Use as array format (M, N, 3), where the first 2 specify
|
||||||
|
the pixel positions and the last 3 the rbg color from 0:255.
|
||||||
|
- Draw in addition a red square of arbitrary position into the figure.
|
||||||
|
- Draw a circle in the center of the figure. Try to create a mask which
|
||||||
|
selects the inner part of the circle using the indexing.
|
||||||
|
|
||||||
|
\small
|
||||||
|
[Solution: 01_intro_ex_1a_sol.ipynb](https://www.physi.uni-heidelberg.de/~reygers/lectures/2022/ml/solutions/01_intro_ex_1a_sol.ipynb) \normalsize
|
||||||
|
|
||||||
|
ii) Read data which contains pixels from the binary file horse.py into a
|
||||||
|
numpy array. Display the data and the following transformations in 4
|
||||||
|
subplots: scaling and translation, compression in x and y, rotation
|
||||||
|
and mirroring.
|
||||||
|
|
||||||
|
\small
|
||||||
|
[Solution: 01_intro_ex_1b_sol.ipynb](https://www.physi.uni-heidelberg.de/~reygers/lectures/2022/ml/solutions/01_intro_ex_1b_sol.ipynb) \normalsize
|
||||||
|
|
||||||
|
|
||||||
|
## Pandas
|
||||||
|
|
||||||
|
[\textcolor{violet}{pandas}](https://pandas.pydata.org/pandas-docs/stable/getting_started/index.html) is a software library written in python for
|
||||||
|
\textcolor{blue}{data manipulation and analysis}.
|
||||||
|
|
||||||
|
\vspace{0.4cm}
|
||||||
|
|
||||||
|
\setbeamertemplate{itemize item}{\color{red}\tiny$\blacksquare$}
|
||||||
|
|
||||||
|
* Offers data structures and operations for manipulating numerical tables with
|
||||||
|
integrated indexing
|
||||||
|
|
||||||
|
* Imports data from various file formats, e.g. comma-separated values, JSON,
|
||||||
|
SQL or Excel
|
||||||
|
|
||||||
|
* Tools for reading and writing data structures, allows analyzing, filtering,
|
||||||
|
spliting, grouping and aggregating, merging and joining and plotting
|
||||||
|
|
||||||
|
* Built on top of `NumPy`
|
||||||
|
|
||||||
|
* Visualize the data with `matplotlib`
|
||||||
|
|
||||||
|
* Most machine learning tools support `pandas` $\rightarrow$
|
||||||
|
it is widely used to preprocess data sets for analysis and machine learning
|
||||||
|
in various scientific fields
|
||||||
|
|
||||||
|
## Pandas micro introduction
|
||||||
|
|
||||||
|
Goal: Exploring, cleaning, transforming, and visualization of data.
|
||||||
|
The basic indexable objects are
|
||||||
|
|
||||||
|
\setbeamertemplate{itemize item}{\color{red}\tiny$\blacksquare$}
|
||||||
|
|
||||||
|
* `Series` -> vector (list) of data elements of arbitrary type
|
||||||
|
|
||||||
|
* `DataFrame` -> tabular arangement of data elements of column wise
|
||||||
|
arbitrary type
|
||||||
|
|
||||||
|
Both allow cleaning data by removing of `empty` or `nan` data entries
|
||||||
|
|
||||||
|
\footnotesize
|
||||||
|
```python
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd # use together with numpy
|
||||||
|
s = pd.Series([1, 3, 5, np.nan, 6, 8]) # create a Series of int64
|
||||||
|
r = pd.Series(np.random.randn(4)) # Series of random numbers float64
|
||||||
|
dates = pd.date_range("20130101", periods=3) # index according to dates
|
||||||
|
df = pd.DataFrame(np.random.randn(3,4),index=dates,columns=list("ABCD"))
|
||||||
|
print (df) # print the DataFrame
|
||||||
|
A B C D
|
||||||
|
2013-01-01 1.618395 1.210263 -1.276586 -0.775545
|
||||||
|
2013-01-02 0.676783 -0.754161 -1.148029 -0.244821
|
||||||
|
2013-01-03 -0.359081 0.296019 1.541571 0.235337
|
||||||
|
|
||||||
|
new_s = s.dropna() # return a new Data Frame without the column that has NaN cells
|
||||||
|
```
|
||||||
|
\normalsize
|
||||||
|
|
||||||
|
##
|
||||||
|
|
||||||
|
\setbeamertemplate{itemize item}{\color{red}\tiny$\blacksquare$}
|
||||||
|
|
||||||
|
* pandas data can be saved in different file formats (CSV, JASON, html, XML,
|
||||||
|
Excel, OpenDocument, HDF5 format, .....). `NaN` entries are kept
|
||||||
|
in the output file, except if they are removed with `dataframe.dropna()`
|
||||||
|
|
||||||
|
* csv file
|
||||||
|
\footnotesize
|
||||||
|
```python
|
||||||
|
df.to_csv("myFile.csv") # Write the DataFrame df to a csv file
|
||||||
|
```
|
||||||
|
\normalsize
|
||||||
|
|
||||||
|
* HDF5 output
|
||||||
|
|
||||||
|
\footnotesize
|
||||||
|
```python
|
||||||
|
df.to_hdf("myFile.h5",key='df',mode='w') # Write the DataFrame df to HDF5
|
||||||
|
s.to_hdf("myFile.h5", key='s',mode='a')
|
||||||
|
```
|
||||||
|
\normalsize
|
||||||
|
|
||||||
|
* Writing to an excel file
|
||||||
|
|
||||||
|
\footnotesize
|
||||||
|
```python
|
||||||
|
df.to_excel("myFile.xlsx", sheet_name="Sheet1")
|
||||||
|
```
|
||||||
|
\normalsize
|
||||||
|
|
||||||
|
* Deleting file with data in python
|
||||||
|
|
||||||
|
\footnotesize
|
||||||
|
```python
|
||||||
|
import os
|
||||||
|
os.remove('myFile.h5')
|
||||||
|
```
|
||||||
|
\normalsize
|
||||||
|
|
||||||
|
##
|
||||||
|
|
||||||
|
\setbeamertemplate{itemize item}{\color{red}\tiny$\blacksquare$}
|
||||||
|
|
||||||
|
* read in data from various formats
|
||||||
|
|
||||||
|
* csv file
|
||||||
|
|
||||||
|
\footnotesize
|
||||||
|
|
||||||
|
```python
|
||||||
|
.......
|
||||||
|
df = pd.read_csv('heart.csv') # read csv data table
|
||||||
|
print(df.info())
|
||||||
|
<class 'pandas.core.frame.DataFrame'>
|
||||||
|
RangeIndex: 303 entries, 0 to 302
|
||||||
|
Data columns (total 14 columns):
|
||||||
|
# Column Non-Null Count Dtype
|
||||||
|
--- ------ -------------- -----
|
||||||
|
0 age 303 non-null int64
|
||||||
|
1 sex 303 non-null int64
|
||||||
|
2 cp 303 non-null int64
|
||||||
|
print(df.head(5)) # prints the first 5 rows of the data table
|
||||||
|
print(df.describe()) # shows a quick statistic summary of your data
|
||||||
|
```
|
||||||
|
\normalsize
|
||||||
|
|
||||||
|
* Reading an excel file
|
||||||
|
|
||||||
|
\footnotesize
|
||||||
|
```python
|
||||||
|
df = pd.read_excel("myFile.xlsx","Sheet1", na_values=["NA"])
|
||||||
|
```
|
||||||
|
\normalsize
|
||||||
|
|
||||||
|
\textcolor{olive}{There are many options specifying details for IO.}
|
||||||
|
|
||||||
|
##
|
||||||
|
|
||||||
|
\setbeamertemplate{itemize item}{\color{red}\tiny$\blacksquare$}
|
||||||
|
|
||||||
|
* Various functions exist to select and view data from pandas objects
|
||||||
|
|
||||||
|
* Display column and index
|
||||||
|
|
||||||
|
\footnotesize
|
||||||
|
|
||||||
|
```python
|
||||||
|
df.index # show datetime index of df
|
||||||
|
DatetimeIndex(['2013-01-01','2013-01-02','2013-01-03'],
|
||||||
|
dtype='datetime64[ns]',freq='D')
|
||||||
|
df.column # show columns info
|
||||||
|
Index(['A', 'B', 'C', 'D'], dtype='object')
|
||||||
|
```
|
||||||
|
\normalsize
|
||||||
|
|
||||||
|
* `DataFrame.to_numpy()` gives a `NumPy` representation of the underlying data
|
||||||
|
|
||||||
|
\footnotesize
|
||||||
|
|
||||||
|
```python
|
||||||
|
df.to_numpy() # one dtype for the entire array, not per column!
|
||||||
|
[[-0.62660101 -0.67330526 0.23269168 -0.67403546]
|
||||||
|
[-0.53033339 0.32872063 -0.09893568 0.44814084]
|
||||||
|
[-0.60289996 -0.22352548 -0.43393248 0.47531456]]
|
||||||
|
```
|
||||||
|
\normalsize
|
||||||
|
|
||||||
|
Does not include the index or column labels in the output
|
||||||
|
|
||||||
|
* more on viewing
|
||||||
|
|
||||||
|
\footnotesize
|
||||||
|
|
||||||
|
```python
|
||||||
|
df.T # transpose the DataFrame df
|
||||||
|
df.sort_values(by="B") # Sorting by values of column B of df
|
||||||
|
df.sort_index(axis=0) # Sorting by index ascending values
|
||||||
|
df.sort_index(axis=0,ascending=False) # Display columns in inverse order
|
||||||
|
|
||||||
|
```
|
||||||
|
\normalsize
|
||||||
|
|
||||||
|
##
|
||||||
|
|
||||||
|
\setbeamertemplate{itemize item}{\color{red}\tiny$\blacksquare$}
|
||||||
|
|
||||||
|
* Selecting data of pandas objects $\rightarrow$ keep or reduce dimensions
|
||||||
|
|
||||||
|
* get a named column as a Series
|
||||||
|
|
||||||
|
\footnotesize
|
||||||
|
|
||||||
|
```python
|
||||||
|
df["A"] # selects a column A from df, simular to df.A
|
||||||
|
df.iloc[:, 1:2] # slices column A explicitly from df, df.loc[:, ["A"]]
|
||||||
|
```
|
||||||
|
\normalsize
|
||||||
|
|
||||||
|
* select rows of a DataFrame
|
||||||
|
|
||||||
|
\footnotesize
|
||||||
|
|
||||||
|
```python
|
||||||
|
df[0:2] # selects row 0 and 1 from df,
|
||||||
|
df["20130102":"20130103"] # use indices, endpoints are included!
|
||||||
|
df.iloc[3] # select with the position of the passed integers
|
||||||
|
df.iloc[1:3, :] # selects row 1 and 2 from df
|
||||||
|
```
|
||||||
|
\normalsize
|
||||||
|
|
||||||
|
* select by label
|
||||||
|
|
||||||
|
\footnotesize
|
||||||
|
|
||||||
|
```python
|
||||||
|
df.loc["20130102":"20130103",["C","D"]] # selects row 1 and 2 and only C and D
|
||||||
|
df.loc[dates[0], "A"] # selects a single value (scalar)
|
||||||
|
```
|
||||||
|
\normalsize
|
||||||
|
|
||||||
|
* select by lists of integer position (as in `NumPy`)
|
||||||
|
|
||||||
|
\footnotesize
|
||||||
|
|
||||||
|
```python
|
||||||
|
df.iloc[[0, 2], [1, 3]] # select row 1 and 3 and col B and D (data only)
|
||||||
|
df.iloc[1, 1] # get a value explicitly (data only, no index lines)
|
||||||
|
|
||||||
|
```
|
||||||
|
\normalsize
|
||||||
|
|
||||||
|
* select according to expressions
|
||||||
|
|
||||||
|
\footnotesize
|
||||||
|
|
||||||
|
```python
|
||||||
|
df.query('B<C') # select rows where B < C
|
||||||
|
df1=df[(df["B"]==0)&(df["D"]==0)] # conditions on rows
|
||||||
|
```
|
||||||
|
\normalsize
|
||||||
|
|
||||||
|
##
|
||||||
|
|
||||||
|
|
||||||
|
\setbeamertemplate{itemize item}{\color{red}\tiny$\blacksquare$}
|
||||||
|
|
||||||
|
* Selecting data of pandas objects continued
|
||||||
|
|
||||||
|
* Boolean indexing
|
||||||
|
|
||||||
|
\footnotesize
|
||||||
|
|
||||||
|
```python
|
||||||
|
df[df["A"] > 0] # select df where all values of column A are >0
|
||||||
|
df[df > 0] # select values >0 from the entire DataFrame
|
||||||
|
```
|
||||||
|
\normalsize
|
||||||
|
|
||||||
|
more complex example
|
||||||
|
|
||||||
|
\footnotesize
|
||||||
|
|
||||||
|
```python
|
||||||
|
df2 = df.copy() # copy df
|
||||||
|
df2["E"] = ["eight","one","four"] # add column E
|
||||||
|
df2[df2["E"].isin(["two", "four"])] # test if elements "two" and "four" are
|
||||||
|
# contained in Series column E
|
||||||
|
```
|
||||||
|
\normalsize
|
||||||
|
|
||||||
|
* Operations (in general exclude missing data)
|
||||||
|
|
||||||
|
\footnotesize
|
||||||
|
|
||||||
|
```python
|
||||||
|
df2[df2 > 0] = -df2 # All elements > 0 change sign
|
||||||
|
df.mean(0) # get column wise mean (numbers=axis)
|
||||||
|
df.mean(1) # get row wise mean
|
||||||
|
df.std(0) # standard deviation according to axis
|
||||||
|
df.cumsum() # cumulative sum of each column
|
||||||
|
df.apply(np.sin) # apply function to each element of df
|
||||||
|
df.apply(lambda x: x.max() - x.min()) # apply lambda function column wise
|
||||||
|
df + 10 # add scalar 10
|
||||||
|
df - [1, 2, 10 , 100] # subtract values of each column
|
||||||
|
df.corr() # Compute pairwise correlation of columns
|
||||||
|
```
|
||||||
|
\normalsize
|
||||||
|
|
||||||
|
|
||||||
|
## Pandas - plotting data
|
||||||
|
|
||||||
|
[\textcolor{violet}{Visualization}](https://pandas.pydata.org/pandas-docs/stable/user_guide/visualization.html) is integrated in pandas using matplotlib. Here are only 2 examples
|
||||||
|
|
||||||
|
* Plot random data in histogramm and scatter plot
|
||||||
|
|
||||||
|
\footnotesize
|
||||||
|
```python
|
||||||
|
# create DataFrame with random normal distributed data
|
||||||
|
df = pd.DataFrame(np.random.randn(1000,4),columns=["a","b","c","d"])
|
||||||
|
df = df + [1, 3, 8 , 10] # shift column wise mean by 1, 3, 8 , 10
|
||||||
|
df.plot.hist(bins=20) # histogram all 4 columns
|
||||||
|
g1 = df.plot.scatter(x="a",y="c",color="DarkBlue",label="Group 1")
|
||||||
|
df.plot.scatter(x="b",y="d",color="DarkGreen",label="Group 2",ax=g1)
|
||||||
|
```
|
||||||
|
\normalsize
|
||||||
|
|
||||||
|
::: columns
|
||||||
|
:::: {.column width=35%}
|
||||||
|
![](figures/pandas_histogramm.png)
|
||||||
|
::::
|
||||||
|
:::: {.column width=35%}
|
||||||
|
![](figures/pandas_scatterplot.png)
|
||||||
|
::::
|
||||||
|
:::
|
||||||
|
|
||||||
|
## Pandas - plotting data
|
||||||
|
|
||||||
|
The function crosstab() takes one or more array-like objects as indexes or
|
||||||
|
columns and constructs a new DataFrame of variable counts on the inputs
|
||||||
|
|
||||||
|
\footnotesize
|
||||||
|
```python
|
||||||
|
df = pd.DataFrame( # create DataFrame of 2 categories
|
||||||
|
{"sex": np.array([0,0,0,0,1,1,1,1,0,0,0]),
|
||||||
|
"heart": np.array([1,1,1,0,1,1,1,0,0,0,1])
|
||||||
|
} ) # closing bracket goes on next line
|
||||||
|
pd.crosstab(df2.sex,df2.heart) # create cross table of possibilities
|
||||||
|
pd.crosstab(df2.sex,df2.heart).plot(kind="bar",color=['red','blue']) # plot counts
|
||||||
|
```
|
||||||
|
\normalsize
|
||||||
|
::: columns
|
||||||
|
:::: {.column width=38%}
|
||||||
|
![](figures/pandas_crosstabplot.png)
|
||||||
|
::::
|
||||||
|
:::
|
||||||
|
|
||||||
|
## Exercise 2
|
||||||
|
|
||||||
|
Read the file [\textcolor{violet}{heart.csv}](https://www.physi.uni-heidelberg.de/~reygers/lectures/2023/ml/exercises/heart.csv) into a DataFrame.
|
||||||
|
[\textcolor{violet}{Information on the dataset}](https://archive.ics.uci.edu/ml/datasets/heart+Disease)
|
||||||
|
|
||||||
|
\setbeamertemplate{itemize item}{\color{red}$\square$}
|
||||||
|
|
||||||
|
* Which columns do we have
|
||||||
|
|
||||||
|
* Print the first 3 rows
|
||||||
|
|
||||||
|
* Print the statistics summary and the correlations
|
||||||
|
|
||||||
|
* Print mean values for each column with and without disease (target)
|
||||||
|
|
||||||
|
* Select the data according to `sex` and `target` (heart disease 0=no 1=yes).
|
||||||
|
|
||||||
|
* Plot the `age` distribution of male and female in one histogram
|
||||||
|
|
||||||
|
* Plot the heart disease distribution according to chest pain type `cp`
|
||||||
|
|
||||||
|
* Plot `thalach` according to `target` in one histogramm
|
||||||
|
|
||||||
|
* Plot `sex` and `target` in a histogramm figure
|
||||||
|
|
||||||
|
* Correlate `age` and `max heart rate` according to `target`
|
||||||
|
|
||||||
|
* Correlate `age` and `colesterol` according to `target`
|
||||||
|
|
||||||
|
\small
|
||||||
|
[Solution: 01_intro_ex_2_sol.ipynb](https://www.physi.uni-heidelberg.de/~reygers/lectures/2023/ml/solutions/01_intro_ex_2_sol.ipynb) \normalsize
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user