diff --git a/notebooks/01_intro_ex_1a_sol.ipynb b/notebooks/01_intro_ex_1a_sol.ipynb new file mode 100644 index 0000000..a910633 --- /dev/null +++ b/notebooks/01_intro_ex_1a_sol.ipynb @@ -0,0 +1,139 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Exercise 1: Create numpy array and draw rgb color objects" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "create data array 2x2 as pixel position and 1x3 as rgb color data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "width, height = 200, 200\n", + "data = np.zeros((height, width, 3), dtype=np.uint8)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "draw blue cross" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "x = np.arange(width)\n", + "x_1 = np.arange(width)\n", + "x_2 = np.arange(width-1,-1,-1)\n", + "y = np.arange(height)\n", + "data[x_1,y] = [0,0,255]\n", + "data[x_2,y] = [0,0,255]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " draw a square " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "lower = 55\n", + "upper = 75\n", + "data[lower:upper,lower:upper] = [0,255,0]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "create a mask of a circle using indexing\n", + "np.newaxis adds another dimension\n", + "we create a row and column vector and fill it using the condition" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "x_center = 100\n", + "y_center = 100\n", + "radius = 10\n", + "mask = (x[np.newaxis,:]-x_center)**2 + (y[:,np.newaxis]-y_center)**2 < radius**2\n", + "data[mask] = [255,0,0]\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# plot image\n", + "plt.figure(figsize=(4.,4.),dpi=100,facecolor='lightgrey')\n", + "plt.imshow(data)\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebooks/01_intro_ex_1b_sol.ipynb b/notebooks/01_intro_ex_1b_sol.ipynb new file mode 100644 index 0000000..3180256 --- /dev/null +++ b/notebooks/01_intro_ex_1b_sol.ipynb @@ -0,0 +1,133 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Exercise 1b: Read a binary file which contains pixel data and apply\n", + "transformations" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# load figure as 2D array \n", + "data = np.load('horse.npy')\n", + "print(data.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# just scale the data by a factor and shift by trans\n", + "trans = np.ones(data.shape)\n", + "trans[0,:] *=0.6\n", + "trans[1,:] *=0.4\n", + "factor = 0.5 \n", + "data_scale = data * factor + trans" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#compression in x and y \n", + "sx = 0.4\n", + "sy = 0.9\n", + "t = np.array([[sx,0],[0,sy]])\n", + "data_comp = t@data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#rotation by an angle theta\n", + "theta = 0.5\n", + "data_rot = np.array([[np.cos(theta),-np.sin(theta)],[np.sin(theta), np.cos(theta)]])@data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#spiegelung an der x Achse\n", + "tx = np.array([[1,0],[0,-1]]) # mirror x axis\n", + "ty = np.array([[-1,0],[0,1]]) # mirror y axis\n", + "tp = np.array([[-1,0],[0,-1]]) # mirror (0,0)\n", + "data_mirror = tp@data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# create figure for the transformations\n", + "plt.figure(figsize=(10.0,10.0),dpi=100,facecolor='lightgrey')\n", + "plt.suptitle('Plot Transformations')\n", + "plt.subplot(2,2,1)\n", + "plt.title('original picture')\n", + "plt.plot(data[0,:],data[1,:],'.')\n", + "plt.axis([-1.2,1.2,-1.2,1.2])\n", + "plt.subplot(2,2,2)\n", + "plt.title('scaling and translation')\n", + "plt.plot(data_scale[0,:],data_scale[1,:],'.')\n", + "plt.axis([-1.2,1.2,-1.2,1.2])\n", + "plt.subplot(2,2,3)\n", + "plt.title('compression')\n", + "plt.plot(data_comp[0,:],data_comp[1,:],'.')\n", + "plt.axis([-1.2,1.2,-1.2,1.2])\n", + "plt.subplot(2,2,4)\n", + "plt.title('rotation and mirror at p(0,0)')\n", + "plt.plot(data_rot[0,:],data_rot[1,:],'.')\n", + "plt.plot(data_mirror[0,:],data_mirror[1,:],'.')\n", + "plt.axis([-1.2,1.2,-1.2,1.2])" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebooks/01_intro_ex_2_sol.ipynb b/notebooks/01_intro_ex_2_sol.ipynb new file mode 100644 index 0000000..75d9488 --- /dev/null +++ b/notebooks/01_intro_ex_2_sol.ipynb @@ -0,0 +1,559 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Exercise 2: Example for pandas using the heart.csv data set" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "# read the csv Data \n", + "df = pd.read_csv('heart.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Index(['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach',\n", + " 'exang', 'oldpeak', 'slope', 'ca', 'thal', 'target'],\n", + " dtype='object')\n", + "\n", + "RangeIndex: 303 entries, 0 to 302\n", + "Data columns (total 14 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 age 303 non-null int64 \n", + " 1 sex 303 non-null int64 \n", + " 2 cp 303 non-null int64 \n", + " 3 trestbps 303 non-null int64 \n", + " 4 chol 303 non-null int64 \n", + " 5 fbs 303 non-null int64 \n", + " 6 restecg 303 non-null int64 \n", + " 7 thalach 303 non-null int64 \n", + " 8 exang 303 non-null int64 \n", + " 9 oldpeak 303 non-null float64\n", + " 10 slope 303 non-null int64 \n", + " 11 ca 303 non-null int64 \n", + " 12 thal 303 non-null int64 \n", + " 13 target 303 non-null int64 \n", + "dtypes: float64(1), int64(13)\n", + "memory usage: 33.3 KB\n", + "None\n", + "age int64\n", + "sex int64\n", + "cp int64\n", + "trestbps int64\n", + "chol int64\n", + "fbs int64\n", + "restecg int64\n", + "thalach int64\n", + "exang int64\n", + "oldpeak float64\n", + "slope int64\n", + "ca int64\n", + "thal int64\n", + "target int64\n", + "dtype: object\n" + ] + } + ], + "source": [ + "# What is the number of columns and rows\n", + "print(df.columns)\n", + "print (df.info())\n", + "print(df.dtypes)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " age sex cp trestbps chol fbs restecg thalach exang oldpeak slope \\\n", + "0 63 1 3 145 233 1 0 150 0 2.3 0 \n", + "1 37 1 2 130 250 0 1 187 0 3.5 0 \n", + "2 41 0 1 130 204 0 0 172 0 1.4 2 \n", + "\n", + " ca thal target \n", + "0 0 1 1 \n", + "1 0 2 1 \n", + "2 0 2 1 \n" + ] + } + ], + "source": [ + "# get first 3 lines\n", + "print(df.head(3))" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " age sex cp trestbps chol fbs \\\n", + "count 303.000000 303.000000 303.000000 303.000000 303.000000 303.000000 \n", + "mean 54.366337 0.683168 0.966997 131.623762 246.264026 0.148515 \n", + "std 9.082101 0.466011 1.032052 17.538143 51.830751 0.356198 \n", + "min 29.000000 0.000000 0.000000 94.000000 126.000000 0.000000 \n", + "25% 47.500000 0.000000 0.000000 120.000000 211.000000 0.000000 \n", + "50% 55.000000 1.000000 1.000000 130.000000 240.000000 0.000000 \n", + "75% 61.000000 1.000000 2.000000 140.000000 274.500000 0.000000 \n", + "max 77.000000 1.000000 3.000000 200.000000 564.000000 1.000000 \n", + "\n", + " restecg thalach exang oldpeak slope ca \\\n", + "count 303.000000 303.000000 303.000000 303.000000 303.000000 303.000000 \n", + "mean 0.528053 149.646865 0.326733 1.039604 1.399340 0.729373 \n", + "std 0.525860 22.905161 0.469794 1.161075 0.616226 1.022606 \n", + "min 0.000000 71.000000 0.000000 0.000000 0.000000 0.000000 \n", + "25% 0.000000 133.500000 0.000000 0.000000 1.000000 0.000000 \n", + "50% 1.000000 153.000000 0.000000 0.800000 1.000000 0.000000 \n", + "75% 1.000000 166.000000 1.000000 1.600000 2.000000 1.000000 \n", + "max 2.000000 202.000000 1.000000 6.200000 2.000000 4.000000 \n", + "\n", + " thal target \n", + "count 303.000000 303.000000 \n", + "mean 2.313531 0.544554 \n", + "std 0.612277 0.498835 \n", + "min 0.000000 0.000000 \n", + "25% 2.000000 0.000000 \n", + "50% 2.000000 1.000000 \n", + "75% 3.000000 1.000000 \n", + "max 3.000000 1.000000 \n" + ] + } + ], + "source": [ + "#display statistics summary\n", + "print(df.describe())" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " age sex cp trestbps chol fbs \\\n", + "age 1.000000 -0.098447 -0.068653 0.279351 0.213678 0.121308 \n", + "sex -0.098447 1.000000 -0.049353 -0.056769 -0.197912 0.045032 \n", + "cp -0.068653 -0.049353 1.000000 0.047608 -0.076904 0.094444 \n", + "trestbps 0.279351 -0.056769 0.047608 1.000000 0.123174 0.177531 \n", + "chol 0.213678 -0.197912 -0.076904 0.123174 1.000000 0.013294 \n", + "fbs 0.121308 0.045032 0.094444 0.177531 0.013294 1.000000 \n", + "restecg -0.116211 -0.058196 0.044421 -0.114103 -0.151040 -0.084189 \n", + "thalach -0.398522 -0.044020 0.295762 -0.046698 -0.009940 -0.008567 \n", + "exang 0.096801 0.141664 -0.394280 0.067616 0.067023 0.025665 \n", + "oldpeak 0.210013 0.096093 -0.149230 0.193216 0.053952 0.005747 \n", + "slope -0.168814 -0.030711 0.119717 -0.121475 -0.004038 -0.059894 \n", + "ca 0.276326 0.118261 -0.181053 0.101389 0.070511 0.137979 \n", + "thal 0.068001 0.210041 -0.161736 0.062210 0.098803 -0.032019 \n", + "target -0.225439 -0.280937 0.433798 -0.144931 -0.085239 -0.028046 \n", + "\n", + " restecg thalach exang oldpeak slope ca \\\n", + "age -0.116211 -0.398522 0.096801 0.210013 -0.168814 0.276326 \n", + "sex -0.058196 -0.044020 0.141664 0.096093 -0.030711 0.118261 \n", + "cp 0.044421 0.295762 -0.394280 -0.149230 0.119717 -0.181053 \n", + "trestbps -0.114103 -0.046698 0.067616 0.193216 -0.121475 0.101389 \n", + "chol -0.151040 -0.009940 0.067023 0.053952 -0.004038 0.070511 \n", + "fbs -0.084189 -0.008567 0.025665 0.005747 -0.059894 0.137979 \n", + "restecg 1.000000 0.044123 -0.070733 -0.058770 0.093045 -0.072042 \n", + "thalach 0.044123 1.000000 -0.378812 -0.344187 0.386784 -0.213177 \n", + "exang -0.070733 -0.378812 1.000000 0.288223 -0.257748 0.115739 \n", + "oldpeak -0.058770 -0.344187 0.288223 1.000000 -0.577537 0.222682 \n", + "slope 0.093045 0.386784 -0.257748 -0.577537 1.000000 -0.080155 \n", + "ca -0.072042 -0.213177 0.115739 0.222682 -0.080155 1.000000 \n", + "thal -0.011981 -0.096439 0.206754 0.210244 -0.104764 0.151832 \n", + "target 0.137230 0.421741 -0.436757 -0.430696 0.345877 -0.391724 \n", + "\n", + " thal target \n", + "age 0.068001 -0.225439 \n", + "sex 0.210041 -0.280937 \n", + "cp -0.161736 0.433798 \n", + "trestbps 0.062210 -0.144931 \n", + "chol 0.098803 -0.085239 \n", + "fbs -0.032019 -0.028046 \n", + "restecg -0.011981 0.137230 \n", + "thalach -0.096439 0.421741 \n", + "exang 0.206754 -0.436757 \n", + "oldpeak 0.210244 -0.430696 \n", + "slope -0.104764 0.345877 \n", + "ca 0.151832 -0.391724 \n", + "thal 1.000000 -0.344029 \n", + "target -0.344029 1.000000 \n" + ] + } + ], + "source": [ + "#display correlation\n", + "print (df.corr())" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " age sex cp trestbps chol fbs \\\n", + "target \n", + "0 56.601449 0.826087 0.478261 134.398551 251.086957 0.159420 \n", + "1 52.496970 0.563636 1.375758 129.303030 242.230303 0.139394 \n", + "\n", + " restecg thalach exang oldpeak slope ca thal \n", + "target \n", + "0 0.449275 139.101449 0.550725 1.585507 1.166667 1.166667 2.543478 \n", + "1 0.593939 158.466667 0.139394 0.583030 1.593939 0.363636 2.121212 \n" + ] + } + ], + "source": [ + "# Print mean values for each column with and without disease\n", + "print(df.groupby('target').mean())" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " age sex cp trestbps chol fbs restecg thalach exang oldpeak \\\n", + "167 62 0 0 140 268 0 0 160 0 3.6 \n", + "181 65 0 0 150 225 0 0 114 0 1.0 \n", + "182 61 0 0 130 330 0 0 169 0 0.0 \n", + "190 51 0 0 130 305 0 1 142 1 1.2 \n", + "204 62 0 0 160 164 0 0 145 0 6.2 \n", + "\n", + " slope ca thal target \n", + "167 0 2 2 0 \n", + "181 1 3 3 0 \n", + "182 2 0 2 0 \n", + "190 1 0 3 0 \n", + "204 0 3 3 0 \n" + ] + } + ], + "source": [ + "# get table with selection on more than 1 column\n", + "df1 = df[(df[\"sex\"] == 0) & (df[\"target\"] == 0) ]\n", + "print (df1.head(5))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " Plots" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0 63\n", + "1 37\n", + "3 56\n", + "5 57\n", + "7 44\n", + " ..\n", + "295 63\n", + "297 59\n", + "299 45\n", + "300 68\n", + "301 57\n", + "Name: age, Length: 207, dtype: int64\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# age dirtibution group into male and female (1 = male; 0 = female)\n", + "# male\n", + "plt.title('age distribution according to Sex') \n", + "df[df[\"sex\"] == 1]['age'].plot.hist()\n", + "print(df[df[\"sex\"] > 0]['age'])\n", + "# female\n", + "df[df[\"sex\"] == 0]['age'].plot.hist()\n", + "plt.xlabel('age [years]')\n", + "plt.legend([\"male\", \"female\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Text(0.5, 0, 'max heart rate')" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plt.figure()\n", + "# Plot maximum heart rate\n", + "# Heart disease (0 = no, 1 = yes)\n", + "plt.title('maximum heart rate according to heart disease') \n", + "df[df[\"target\"] == 1]['thalach'].plot.hist()\n", + "# no disease\n", + "df[df[\"target\"] == 0]['thalach'].plot.hist()\n", + "plt.legend([\"disease\", \"no disease\"])\n", + "plt.xlabel('max heart rate')" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Plot sex and target in one histogramm via crosstab\n", + "pd.crosstab(df.sex,df.target).plot(kind=\"bar\",color=['red','blue' ])\n", + "plt.title('Heart Disease distribution according to Sex')\n", + "plt.xlabel('Sex (0 = Female, 1 = Male)')\n", + "plt.legend([\"no disease\", \"disease\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Text(0, 0.5, 'Frequency of Disease or Not')" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Plot target and cp in one histogramm via crosstab\n", + "pd.crosstab(df.cp,df.target).plot(kind=\"bar\",figsize=(15,6),color=['#11A5AA','#AA1190' ])\n", + "plt.title('Heart Disease Distribution According To Chest Pain Type')\n", + "plt.xlabel('Chest Pain Type')\n", + "plt.xticks(rotation = 0)\n", + "plt.ylabel('Frequency of Disease or Not')\n" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# plot correlations for target\n", + "plt.figure()\n", + "plt.scatter(x=df.age[df.target==1], y=df.thalach[(df.target==1)], c=\"red\")\n", + "plt.scatter(x=df.age[df.target==0], y=df.thalach[(df.target==0)])\n", + "plt.title('Age-max Heart Rate Plot')\n", + "plt.xlabel('age[years]')\n", + "plt.ylabel('max. heart rate')\n", + "plt.legend([\"Disease\", \"No Disease\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plt.figure()\n", + "plt.scatter(x=df.age[df.target==1], y=df.chol[(df.target==1)], c=\"red\")\n", + "plt.scatter(x=df.age[df.target==0], y=df.chol[(df.target==0)])\n", + "plt.title('Age-Cholesterol Plot')\n", + "plt.xlabel('age[years]')\n", + "plt.ylabel('Cholesterol')\n", + "plt.legend([\"Disease\", \"No Disease\"])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebooks/02_fit_fitGraph.ipynb b/notebooks/02_fit_fitGraph.ipynb new file mode 100644 index 0000000..a8537eb --- /dev/null +++ b/notebooks/02_fit_fitGraph.ipynb @@ -0,0 +1,314 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Graph fit example within pyROOT" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Welcome to JupyROOT 6.20/08\n" + ] + } + ], + "source": [ + "import numpy as np\n", + "import ROOT\n", + "from ROOT import TF1, TVirtualFitter, TGraphErrors, TCanvas, TMinuit, TFitResult, gROOT, TGraph" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Define the data" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "numDat = 10 \n", + "x = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10], dtype='d')\n", + "dx = np.array([0.1,0.1,0.5,0.1,0.5,0.1,0.5,0.1,0.5,0.1], dtype='d')\n", + "y = np.array([1.1 ,2.3 ,2.7 ,3.2 ,3.1 ,2.4 ,1.7 ,1.5 ,1.5 ,1.7 ], dtype='d')\n", + "dy = np.array([0.15,0.22,0.29,0.39,0.31,0.21,0.13,0.15,0.19,0.13], dtype='d')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Define fit function" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "def myNFunc(x, p):\n", + " return p[0] + x[0]*p[1] + p[2]*x[0]**2 + p[3]*x[0]**3" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create ROOT function and set start parameters" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "func = TF1('func',myNFunc, 0.5 ,10.5, 4)\n", + "func.SetParameters(-1.3, 2.6 , -0.24 , 0.005)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create Graph with error bars" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "Graph = TGraphErrors(numDat, x, y, dx, dy)\n", + "Graph.SetTitle( 'Fit Graph' )\n", + "Graph.SetMarkerColor( 4 )\n", + "Graph.SetMarkerStyle( 21 )\n", + "Graph.SetMaximum(4.)\n", + "Graph.SetMinimum(-1.)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Start fitter and return fit result for later processing" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " FCN=2.15982 FROM MINOS STATUS=SUCCESSFUL 90 CALLS 2039 TOTAL\n", + " EDM=4.56889e-09 STRATEGY= 1 ERROR MATRIX ACCURATE \n", + " EXT PARAMETER STEP FIRST \n", + " NO. NAME VALUE ERROR SIZE DERIVATIVE \n", + " 1 p0 -1.03327e+00 4.63548e-01 -1.99485e-03 1.64067e-03\n", + " 2 p1 2.54886e+00 3.59144e-01 2.70551e-03 -5.29683e-02\n", + " 3 p2 -4.84278e-01 7.01863e-02 -2.72411e-04 -7.70245e-01\n", + " 4 p3 2.56882e-02 3.97082e-03 3.97082e-03 -1.30073e+00\n" + ] + } + ], + "source": [ + "result = Graph.Fit(func, \"M E S\",\"\",0.5,10.5)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Confidence plot" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "values = result.GetConfidenceIntervals(0.66, False)\n", + "interval = TGraphErrors(Graph.GetN())\n", + "for i in range(Graph.GetN()):\n", + " interval.SetPoint(i, Graph.GetX()[i], func.Eval(Graph.GetX()[i] ))\n", + " interval.SetPointError(i, 0, values[i] )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Canvas for plotting" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "c = TCanvas( 'c', 'A Simple Fit',700, 500)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Draw canvas and graph and 1 sigma confidence" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "Graph.Draw(\"AP\")\n", + "interval.SetFillColor(21)\n", + "interval.Draw(\"3\")\n", + "c.Draw()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create graph for confidence interval\n", + "\n", + "Compute the confidence intervals at the x points of the created graph" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "GraphConf = TGraphErrors(Graph.GetN())\n", + "GraphConf.SetTitle(\"Fit Polynomial with .95 confidence;x;f(x)\")\n", + "for i in range(Graph.GetN()):\n", + " GraphConf.SetPoint(i, Graph.GetX()[i],0 )" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + " \n", + "TVirtualFitter.GetFitter().GetConfidenceIntervals(GraphConf,0.95)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now the \"GraphConf\" graph contains function values as its y-coordinates\n", + "and confidence intervals as the errors on these coordinates" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "myC = TCanvas(\"myC\", \"Confidence Intervall\",800, 800);" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "GraphConf.SetLineColor(4)\n", + "GraphConf.Draw(\"AP\")" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "Graph.SetLineColor(2)\n", + "Graph.SetMarkerStyle(21)\n", + "Graph.Draw(\"P SAME\")\n", + "func.SetLineColor(4)\n", + "func.Draw(\"SAME\")" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "myC.Draw()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebooks/02_fit_iminuitFit.ipynb b/notebooks/02_fit_iminuitFit.ipynb new file mode 100644 index 0000000..ff23941 --- /dev/null +++ b/notebooks/02_fit_iminuitFit.ipynb @@ -0,0 +1,291 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Fit with the python interface to Minuit 2 called iminuit\n", + "https://iminuit.readthedocs.io/en/stable/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from matplotlib import pyplot as plt\n", + "plt.rcParams[\"font.size\"] = 20\n", + "import numpy as np" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Data " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "x = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10], dtype='d')\n", + "dx = np.array([0.1,0.1,0.5,0.1,0.5,0.1,0.5,0.1,0.5,0.1], dtype='d')\n", + "y = np.array([1.1 ,2.3 ,2.7 ,3.2 ,3.1 ,2.4 ,1.7 ,1.5 ,1.5 ,1.7 ], dtype='d')\n", + "dy = np.array([0.15,0.22,0.29,0.39,0.31,0.21,0.13,0.15,0.19,0.13], dtype='d')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Define fit function" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def pol3(a0, a1, a2, a3):\n", + " return a0 + x*a1 + a2*x**2 + a3*x**3" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "least-squares function: sum of data residuals squared" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def LSQ(a0, a1, a2, a3):\n", + " return np.sum((y - pol3(a0, a1, a2, a3)) ** 2 / dy ** 2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "import Minuit object" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from iminuit import Minuit" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Minuit instance using LSQ function to minimize" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "LSQ.errordef = Minuit.LEAST_SQUARES\n", + "#LSQ.errordef = Minuit.LIKELIHOOD\n", + "m = Minuit(LSQ,a0=-1.3, a1=2.6 ,a2=-0.24 ,a3=0.005)\n", + "m.fixed[\"a3\"] = True \n", + "m.params" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "run migrad" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "m.fixed[\"a3\"] = False\n", + "m.params\n", + "m.migrad()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Get contour" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "m.draw_mncontour(\"a2\", \"a3\", cl=[1, 2, 3])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Improve the fit" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "m.hesse()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "m.minos()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "access fit results" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(m.values,m.errors)\n", + "a0_fit = m.values[\"a0\"]\n", + "a1_fit = m.values[\"a1\"]\n", + "a2_fit = m.values[\"a2\"]\n", + "a3_fit = m.values[\"a3\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print (m.covariance)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "prepare data to display fitted function " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "x_plot = np.linspace( 0.5, 10.5 , 500 )\n", + "y_fit = a0_fit + a1_fit * x_plot + a2_fit * x_plot**2 + a3_fit * x_plot**3" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The Minos algorithm uses the profile likelihood method to compute (generally asymmetric) confidence intervals. This can be plotted" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "m.draw_profile(\"a2\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Get a 2D contour of the function around the minimum for 2 parameters" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "m.draw_mncontour(\"a2\", \"a3\" , cl=[1, 2, 3])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "lotlib" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure()\n", + "plt.errorbar(x, y, dy , dx, fmt=\"o\")\n", + "plt.plot(x_plot, y_fit)\n", + "plt.title(\"iminuit Fit Test\")\n", + "plt.xlim(-0.1, 10.1)\n", + "plt.show()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebooks/02_fit_numpyFit.ipynb b/notebooks/02_fit_numpyFit.ipynb new file mode 100644 index 0000000..b3a228b --- /dev/null +++ b/notebooks/02_fit_numpyFit.ipynb @@ -0,0 +1,338 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Fit 3rd order Polynomial to graph data using numpy" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from matplotlib import pyplot as plt\n", + "plt.rcParams[\"font.size\"] = 20\n", + "import numpy as np" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "data" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "x = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10], dtype='d')\n", + "dx = np.array([0.1,0.1,0.5,0.1,0.5,0.1,0.5,0.1,0.5,0.1], dtype='d')\n", + "y = np.array([1.1 ,2.3 ,2.7 ,3.2 ,3.1 ,2.4 ,1.7 ,1.5 ,1.5 ,1.7 ], dtype='d')\n", + "dy = np.array([0.15,0.22,0.29,0.39,0.31,0.21,0.13,0.15,0.19,0.13], dtype='d')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "create numpy array with weights as 1/error " + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "ones = np.ones(10, dtype='d')\n", + "weight = ones/dy" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "various fit options" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "n = 3 \n", + "#model = np.polyfit(x, y, n, 0 , 0, weight, cov='unscaled' )\n", + "#model, fitCov = np.polyfit(x, y, n , 0 , 0, weight, cov='unscaled' )\n", + "model, fitCov = np.polyfit(x, y, n , 0 , 0, weight, cov='unscaled' )\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "monitor printout of coefficient polynomial vector (model) and covariance matrix (fitCov)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[ 0.02590585 -0.48405665 2.52045767 -0.98073638]\n", + "[[ 1.19801683e-05 -2.07833174e-04 9.95675425e-04 -9.97263377e-04]\n", + " [-2.07833174e-04 3.65835615e-03 -1.78482005e-02 1.83096634e-02]\n", + " [ 9.95675425e-04 -1.78482005e-02 8.93452262e-02 -9.59887600e-02]\n", + " [-9.97263377e-04 1.83096634e-02 -9.59887600e-02 1.19001618e-01]]\n" + ] + } + ], + "source": [ + "print (model)\n", + "print (fitCov)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "prepare errors for plotting, see\n", + "https://stackoverflow.com/questions/28505008/numpy-polyfit-how-to-get-1-sigma-uncertainty-around-the-estimated-curve/28528966\n", + "write polynomial as np.dot(yy, model) with yy=[x**n, x*n-1, ..., 1] and x can be a single value or \n", + "or a vector and model as coefficient vector. Since this a linear equation, with the covariance matrix\n", + "fitCov of model, the covariance matrix of the values is np.dot(yy, np.dot(model, yy.T))." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Do the interpolation for plotting" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "x_plot = np.linspace(0.1, 10.5, 500)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Matrix with rows 1, x, x**2, ..." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "YY = np.vstack([x_plot**(n-i) for i in range(n+1)]).T" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "matrix multiplication calculates the polynomial values" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "y_plot = np.dot(YY, model)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Covariance_y = YY*Covariance*YY.T" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "fitCov_y_plot = np.dot(YY, np.dot(fitCov, YY.T))\n", + "Sigma_y_plot = np.sqrt(np.diag(fitCov_y_plot))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "model contains parameters in order highest power first! Define fit function " + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "def fitpol3(model):\n", + " return model[3] + model[2]*x + model[1]*x**2 + model[0]*x**3" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "construct polynomial from coefficients in model" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "predict = np.poly1d(model)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "r_2 coefficient of the determination of the regression score function\n", + "input are the y data values and the prediction from the fit with x data values \n", + "this is borrowed from scikit-learn" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.9486643210376879\n" + ] + } + ], + "source": [ + "from sklearn.metrics import r2_score\n", + "p = r2_score(y, predict(x))\n", + "print (p)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "plot data" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plt.figure()\n", + "plt.errorbar(x, y, dy , dx, fmt=\"o\")\n", + "plt.plot(x, fitpol3(model) )\n", + "plt.title(\"numpy Fit Test\")\n", + "plt.xlim(-0.1, 10.1)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "plot data with covariance" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "fg, ax = plt.subplots(1, 1)\n", + "ax.set_title(\"numpy Fit Test with $\\pm1\\sigma$ interval\")\n", + "ax.fill_between(x_plot, y_plot+Sigma_y_plot, y_plot-Sigma_y_plot, alpha=.25)\n", + "ax.plot(x_plot, y_plot,'-')\n", + "ax.errorbar(x, y,dy , dx, fmt='o')\n", + "ax.axis('tight')\n", + "\n", + "fg.canvas.draw()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebooks/03_ml_basics_display_Clothing.ipynb b/notebooks/03_ml_basics_display_Clothing.ipynb new file mode 100644 index 0000000..36a5712 --- /dev/null +++ b/notebooks/03_ml_basics_display_Clothing.ipynb @@ -0,0 +1,122 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "8f9f0e7b", + "metadata": {}, + "source": [ + "Display fashion_mnist dataset of clothes from Zalando" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cc829d9a", + "metadata": {}, + "outputs": [], + "source": [ + "import tensorflow as tf\n", + "from tensorflow import keras\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "63348efe", + "metadata": {}, + "outputs": [], + "source": [ + "# Load the MNIST Fashion dataset\n", + "(x_train, y_train), (x_test, y_test) = keras.datasets.fashion_mnist.load_data()\n", + "# Set the class names\n", + "class_names = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat', \n", + " 'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a6c86027", + "metadata": {}, + "outputs": [], + "source": [ + "# print the shape of the numpy arrays\n", + "print ('Print shape of pixel data')\n", + "print(x_train.shape)\n", + "print ('Print shape of labels')\n", + "print(y_train.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cc58b142", + "metadata": {}, + "outputs": [], + "source": [ + "# Normalize pixel values to between 0 and 1\n", + "x_train = x_train.astype(\"float32\") / 255.0\n", + "x_test = x_test.astype(\"float32\") / 255.0" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c7976111", + "metadata": {}, + "outputs": [], + "source": [ + "# choose an image num to print\n", + "num = 20\n", + "image = x_train[num]\n", + "label = y_train[num]\n", + "\n", + "print ('Print normailzed pixel data of image ',num, ' :')\n", + "print(x_train[num])\n", + "print ('Print label of image ',num , ' :' )\n", + "print(y_train[num])\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "64a46625", + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(10,10))\n", + "for i in range(25):\n", + " plt.subplot(5,5,i+1)\n", + " plt.xticks([])\n", + " plt.yticks([])\n", + " plt.grid(False)\n", + " plt.imshow(x_train[i], cmap=plt.cm.binary)\n", + " plt.xlabel(class_names[y_train[i]])\n", + "plt.show()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/03_ml_basics_display_HandWrt.ipynb b/notebooks/03_ml_basics_display_HandWrt.ipynb new file mode 100644 index 0000000..03f65e6 --- /dev/null +++ b/notebooks/03_ml_basics_display_HandWrt.ipynb @@ -0,0 +1,134 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "3644475e", + "metadata": {}, + "outputs": [], + "source": [ + "# Display hand writing dataset" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8125479b", + "metadata": {}, + "outputs": [], + "source": [ + "import tensorflow as tf\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d45b964f", + "metadata": {}, + "outputs": [], + "source": [ + "# Load training dataset of 60000 images with greyscale values in 28 x 28\n", + "# and labels \n", + "(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fa8ae2a6", + "metadata": {}, + "outputs": [], + "source": [ + "# print the shape of the numpy arrays\n", + "print ('Print shape of pixel data')\n", + "print(x_train.shape)\n", + "print ('Print shape of labels')\n", + "print(y_train.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "be70973e", + "metadata": {}, + "outputs": [], + "source": [ + "# normalize pixel to 0-1\n", + "x_train = x_train / 255\n", + "x_test = x_test / 255" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "55f457d5", + "metadata": {}, + "outputs": [], + "source": [ + "# choose an image num to display and print\n", + "num = 20\n", + "\n", + "image = x_train[num]\n", + "label = y_train[num]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "149788b7", + "metadata": {}, + "outputs": [], + "source": [ + "# plot the image using imshow\n", + "plt.imshow(image, cmap='gray')\n", + "# set the title\n", + "plt.title(\"Label: %d\" % label )\n", + "# remove the axis labels and ticks\n", + "plt.axis('off')\n", + "# show the plot\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "232ef6ca", + "metadata": {}, + "outputs": [], + "source": [ + "# Plot 16 examples from the numpy array which was read in above\n", + "# and display it\n", + "fig, axes = plt.subplots(4, 4, figsize=(10, 10))\n", + "for i , ax in enumerate(axes.ravel()):\n", + " ax.imshow(x_train[num+i], cmap='gray')\n", + " ax.set_title(\"Label: %d\" % y_train[num+i])\n", + " ax.axis('off')\n", + "plt.suptitle(\"Examples of training set images\")\n", + "plt.show()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/03_ml_basics_display_HorseOrHuman.ipynb b/notebooks/03_ml_basics_display_HorseOrHuman.ipynb new file mode 100644 index 0000000..7efff50 --- /dev/null +++ b/notebooks/03_ml_basics_display_HorseOrHuman.ipynb @@ -0,0 +1,197 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "2eaba66b", + "metadata": {}, + "source": [ + "Read and Display Horse or Human machine learning dataset" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f1e48ac0", + "metadata": {}, + "outputs": [], + "source": [ + "import tensorflow as tf\n", + "import numpy as np\n", + "import tensorflow_datasets as tfds\n", + "from tensorflow.keras import regularizers\n", + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "feda024e", + "metadata": {}, + "outputs": [], + "source": [ + "# Load the horse or human dataset\n", + "#(300, 300, 3) unint8\n", + "dataset, label = tfds.load('horses_or_humans', with_info=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "35991dec", + "metadata": {}, + "outputs": [], + "source": [ + "# Extract the horse/human class\n", + "horse_ds = dataset['train'].filter(lambda x: x['label'] == 0)\n", + "human_ds = dataset['train'].filter(lambda x: x['label'] == 1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fab03aa8", + "metadata": {}, + "outputs": [], + "source": [ + "# Take a few examples < 16\n", + "n_examples = 5\n", + "horse_examples = horse_ds.take(n_examples)\n", + "human_examples = human_ds.take(n_examples)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c33f1acd", + "metadata": {}, + "outputs": [], + "source": [ + "# Display the examples\n", + "fig, axes = plt.subplots(1, n_examples, figsize=(15, 15))\n", + "for i, example in enumerate(human_examples):\n", + " image = example['image']\n", + " axes[i].imshow(image)\n", + " axes[i].set_title(f\"humans {i+1}\")\n", + "plt.show()\n", + "\n", + "fig, axes = plt.subplots(1, n_examples, figsize=(15, 15))\n", + "for i, example in enumerate(horse_examples):\n", + " image = example['image']\n", + " axes[i].imshow(image)\n", + " axes[i].set_title(f\"horses {i+1}\")\n", + "plt.show()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25f3eeb3", + "metadata": {}, + "outputs": [], + "source": [ + "# Split the dataset into training and validation sets\n", + "# as_supervised: Specifies whether to return the dataset as a tuple\n", + "# of (input, label) pairs.\n", + "train_dataset, valid_dataset = tfds.load('horses_or_humans', split=['train','test'], as_supervised=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "29dc0e62", + "metadata": {}, + "outputs": [], + "source": [ + "# Get the number of elements in the training and validation dataset\n", + "train_size = tf.data.experimental.cardinality(train_dataset).numpy()\n", + "valid_size = tf.data.experimental.cardinality(valid_dataset).numpy()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "db8aaf91", + "metadata": {}, + "outputs": [], + "source": [ + "IMG_SIZE = 300\n", + "NUM_CLASSES = 2\n", + "\n", + "def preprocess(image, label):\n", + " image = tf.cast(image, tf.float32)\n", + "# # Resize the images to a fixed size\n", + " image = tf.image.resize(image, (IMG_SIZE, IMG_SIZE))\n", + "# # Rescale the pixel values to be between 0 and 1\n", + " image = image / 255.0\n", + " label = tf.one_hot(label, NUM_CLASSES)\n", + " return image, label" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d59661c3", + "metadata": {}, + "outputs": [], + "source": [ + "# Apply the preprocessing function to the datasets\n", + "train_dataset = train_dataset.map(preprocess)\n", + "valid_dataset = valid_dataset.map(preprocess)\n", + "\n", + "# Batch and shuffle the datasets\n", + "train_dataset = train_dataset.shuffle(2000).batch(80)\n", + "valid_dataset = valid_dataset.batch(20)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9399bc99", + "metadata": {}, + "outputs": [], + "source": [ + "# Get the number of elements in the trainingand validation dataset\n", + "train_size = tf.data.experimental.cardinality(train_dataset).numpy()\n", + "valid_size = tf.data.experimental.cardinality(valid_dataset).numpy()\n", + "print(\"Training dataset size:\", train_size)\n", + "print(\"Validation dataset size:\", valid_size)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "13af7d53", + "metadata": {}, + "outputs": [], + "source": [ + "# Store images and labels of the validation data for predictions\n", + "for images, labels in valid_dataset:\n", + " x_val = images\n", + " y_val = labels\n", + " \n", + "print(x_val.shape, y_val.shape)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/03_ml_basics_ex_4_mlp_clothing.ipynb b/notebooks/03_ml_basics_ex_4_mlp_clothing.ipynb new file mode 100644 index 0000000..a66819c --- /dev/null +++ b/notebooks/03_ml_basics_ex_4_mlp_clothing.ipynb @@ -0,0 +1,236 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "6c180d4b", + "metadata": {}, + "outputs": [], + "source": [ + "# Exercise 3\n", + "# fashion mnist data\n", + "# MLP model with two hidden layers, each with a ReLU activation function.\n", + "# Input data is flattened to a 1D array and passed to the model." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0e31b9c", + "metadata": {}, + "outputs": [], + "source": [ + "import tensorflow as tf\n", + "from tensorflow import keras\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1ae1412e", + "metadata": {}, + "outputs": [], + "source": [ + "# Load the MNIST Fashion dataset\n", + "(x_train, y_train), (x_test, y_test) = keras.datasets.fashion_mnist.load_data()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f8814914", + "metadata": {}, + "outputs": [], + "source": [ + "# Normalize pixel values to between 0 and 1\n", + "x_train = x_train.astype(\"float32\") / 255.0\n", + "x_test = x_test.astype(\"float32\") / 255.0" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2810da39", + "metadata": {}, + "outputs": [], + "source": [ + "# MNIST dataset images have a shape of (28, 28). The images are flattened\n", + "# into a 1D array of length 784 \n", + "x_train = x_train.reshape(-1, 784)\n", + "x_test = x_test.reshape(-1, 784)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "96f7ff8a", + "metadata": {}, + "outputs": [], + "source": [ + "# The model is defined here with three dense (fully connected) layers\n", + "# The first layer is a Dense layer with 128 units and a ReLU activation\n", + "# function with an input shape of (784,). This layer serves as the input\n", + "# layer of the model.\n", + "# The second layer is also a Dense layer with 64 units and a ReLU activation\n", + "# function. This layer takes the output of the previous layer as input, and\n", + "# applies a non-linear transformation to it to produce a new set of features\n", + "# that the next layer can use.\n", + "# The third is another Dense layer, one for each class in the output. The\n", + "# output is raw scores or logits for each class since there is no activation\n", + "# function . This layer is responsible for producing the final output of the\n", + "# model, which can then be used to make predictions.\n", + "# With Dropout(0.2) 20 % of the input is randomly droped, this should reduce overfitting\n", + "model = keras.Sequential([\n", + " keras.layers.Dense(128, activation='relu', input_shape=(784,)),\n", + " # keras.layers.Dropout(0.2),\n", + " keras.layers.Dense(64, activation='relu'),\n", + " keras.layers.Dense(10)\n", + "])\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a3fe609c", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile the model\n", + "# adam = specifies the optimizer to use during training\n", + "# loss function to use during training, SparseCategoricalCrossentropy loss\n", + "# is commonly used for multi-class classification problems.\n", + "# from_logits=True indicates that the model's output is a raw score\n", + "# for each class and not a probability distribution.\n", + "model.compile(optimizer='adam',\n", + " loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),\n", + " metrics=['accuracy'])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cf6c978d", + "metadata": {}, + "outputs": [], + "source": [ + "# Train the model\n", + "history = model.fit(x_train, y_train, epochs=10, validation_split=0.2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "97fc2313", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "# Evaluate the model on the test set\n", + "test_loss, test_acc = model.evaluate(x_test, y_test, verbose=2)\n", + "print(\"Test accuracy:\", test_acc)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ef5f19d0", + "metadata": {}, + "outputs": [], + "source": [ + "# Plot the training and validation accuracy and loss over time\n", + "plt.figure(figsize=(10, 4))\n", + "plt.subplot(1, 2, 1)\n", + "plt.plot(history.history[\"accuracy\"])\n", + "plt.plot(history.history[\"val_accuracy\"])\n", + "plt.title(\"Model accuracy\")\n", + "plt.ylabel(\"Accuracy\")\n", + "plt.xlabel(\"Epoch\")\n", + "plt.legend([\"Train\", \"Validation\"], loc=\"lower right\")\n", + "\n", + "plt.subplot(1, 2, 2)\n", + "plt.plot(history.history[\"loss\"])\n", + "plt.plot(history.history[\"val_loss\"])\n", + "plt.title(\"Model loss\")\n", + "plt.ylabel(\"Loss\")\n", + "plt.xlabel(\"Epoch\")\n", + "plt.legend([\"Train\", \"Validation\"], loc=\"upper right\")\n", + "\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c0ebddc4", + "metadata": {}, + "outputs": [], + "source": [ + "# Plot a confusion matrix of the test set predictions\n", + "test_preds = np.argmax(model.predict(x_test), axis=1)\n", + "conf_mat = tf.math.confusion_matrix(y_test, test_preds)\n", + "plt.imshow(conf_mat, cmap=\"Blues\")\n", + "plt.xlabel(\"Predicted labels\")\n", + "plt.ylabel(\"True labels\")\n", + "plt.xticks(np.arange(10))\n", + "plt.yticks(np.arange(10))\n", + "plt.colorbar()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9175d533", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "# Make predictions on the test set\n", + "y_pred = model.predict(x_test)\n", + "y_pred = np.argmax(y_pred, axis=1)\n", + "\n", + "# Plot some examples from the test set and their predictions\n", + "fig, axes = plt.subplots(4, 4, figsize=(18, 18))\n", + "for i, ax in enumerate(axes.ravel()):\n", + " ax.matshow(x_test[i].reshape(28, 28), cmap='gray')\n", + " ax.set_title(\"True: %d\\nPredict: %d\" % (y_test[i], y_pred[i]))\n", + " ax.axis(\"off\")\n", + "\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4a6e85be", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/03_ml_basics_minimizer.ipynb b/notebooks/03_ml_basics_minimizer.ipynb new file mode 100644 index 0000000..21058c9 --- /dev/null +++ b/notebooks/03_ml_basics_minimizer.ipynb @@ -0,0 +1,166 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "042acd49", + "metadata": {}, + "source": [ + "# Test a minimizer" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb51a492", + "metadata": {}, + "outputs": [], + "source": [ + "import tensorflow as tf\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "from scipy.optimize import minimize\n", + "plt.style.use(\"ggplot\")\n", + "from matplotlib import colors, cm" + ] + }, + { + "cell_type": "markdown", + "id": "2ac3651a", + "metadata": {}, + "source": [ + "plt.rcParams controls the appearance of your plots globally,\n", + "affecting all subsequent plots created in your session." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "97ef9933", + "metadata": {}, + "outputs": [], + "source": [ + "plt.rcParams[\"axes.grid\"] = False\n", + "plt.rcParams.update({'font.size': 20})\n", + "plt.rcParams.update({'figure.figsize': (12,9)})\n", + "plt.rcParams['lines.markersize'] = 8" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f15200f9", + "metadata": {}, + "outputs": [], + "source": [ + "# Generate data points with gaussian smearing\n", + "data = np.random.uniform(size=100)\n", + "labels = 5.*data*data*data + 1 + np.random.normal(loc=0.0, scale=0.1, size=100)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7237f5ed", + "metadata": {}, + "outputs": [], + "source": [ + "# show plot\n", + "plt.scatter(data, labels, label=\"data\")\n", + "plt.legend()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0d6e104c", + "metadata": {}, + "outputs": [], + "source": [ + "# define chi2 like cost function\n", + "def cost(params):\n", + " W, b = params\n", + " return np.mean((labels - (W*data*data*data + b))**2)" + ] + }, + { + "cell_type": "markdown", + "id": "8e00e16a", + "metadata": {}, + "source": [ + "call minimizer\n", + "provides a collection of optimization algorithms for finding the minimum or maximum of a given function. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "433975c3", + "metadata": {}, + "outputs": [], + "source": [ + "res = minimize(cost, [1., 1.])\n", + "# returns an OptimizeResult object\n", + "# x :the solution (minimum) of the optimization problem, represented as an\n", + "# array.\n", + "# Results of the minimization\n", + "W, b = res.x\n", + "print ('function value at the minimum and fitted parameters',res.fun,' ',W,' ',b)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1e1f4e81", + "metadata": {}, + "outputs": [], + "source": [ + "points = np.linspace(0, 1, 100)\n", + "prediction = W*points*points*points + b" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d8de971e", + "metadata": {}, + "outputs": [], + "source": [ + "# plot fit model\n", + "plt.scatter(data, labels, label=\"data\")\n", + "plt.plot(points, prediction, label=\"model\", color=\"green\")\n", + "plt.legend()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4a7d62c2", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/03_ml_basics_tf_broadcasting.ipynb b/notebooks/03_ml_basics_tf_broadcasting.ipynb new file mode 100644 index 0000000..93ba24f --- /dev/null +++ b/notebooks/03_ml_basics_tf_broadcasting.ipynb @@ -0,0 +1,118 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "df1f5eb3", + "metadata": {}, + "source": [ + "# demonstration of broadcasting in tensorflow" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1d61c70a", + "metadata": {}, + "outputs": [], + "source": [ + "import tensorflow as tf" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "38bca1cf", + "metadata": {}, + "outputs": [], + "source": [ + "# Define two tensors with different shapes\n", + "a = tf.constant([[1, 2, 3], [4, 5, 6]])\n", + "b = tf.constant([10, 20, 30])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c3f382e3", + "metadata": {}, + "outputs": [], + "source": [ + "# Perform element-wise multiplication using broadcasting\n", + "c = a * b\n", + "# Print the result\n", + "print(c)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "95683fe5", + "metadata": {}, + "outputs": [], + "source": [ + "# Broadcasting scalar to tensor\n", + "x = tf.constant([1, 2, 3])\n", + "y = 2\n", + "z = x + y # equivalent to tf.add(x, y)\n", + "print(z.numpy()) # [3 4 5]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8ed98565", + "metadata": {}, + "outputs": [], + "source": [ + "# Broadcasting vector to matrix\n", + "x = tf.constant([[1, 2], [3, 4]])\n", + "y = tf.constant([1, 2])\n", + "z = x + y # equivalent to tf.add(x, y)\n", + "print(z.numpy()) # [[2 4], [4 6]]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "41f4196f", + "metadata": {}, + "outputs": [], + "source": [ + "# Broadcasting matrix to tensor\n", + "x = tf.constant([[[1, 2], [3, 4]], [[5, 6], [7, 8]]])\n", + "y = tf.constant([[1], [2]])\n", + "z = x + y # equivalent to tf.add(x, y)\n", + "print(z.numpy()) # [[[2 3], [4 5]], [[7 8], [9 10]]]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "76a5108d", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/03_ml_basics_tf_differentiate.ipynb b/notebooks/03_ml_basics_tf_differentiate.ipynb new file mode 100644 index 0000000..3df9807 --- /dev/null +++ b/notebooks/03_ml_basics_tf_differentiate.ipynb @@ -0,0 +1,102 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "eefe7571", + "metadata": {}, + "outputs": [], + "source": [ + "# show differentiation in Tensorflow" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a9d7c185", + "metadata": {}, + "outputs": [], + "source": [ + "import tensorflow as tf" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "584384f1", + "metadata": {}, + "outputs": [], + "source": [ + "# Define a function to differentiate\n", + "def f(x):\n", + " return x ** 2 + 2 * x + 1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "70430402", + "metadata": {}, + "outputs": [], + "source": [ + "# Create a TensorFlow variable\n", + "x = tf.Variable(2.0)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "45ea0a33", + "metadata": {}, + "outputs": [], + "source": [ + "# Use tf.GradientTape to record the gradients\n", + "with tf.GradientTape() as tape:\n", + " y = f(x)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f6b1ff27", + "metadata": {}, + "outputs": [], + "source": [ + "# Calculate the gradient of y with respect to x\n", + "dy_dx = tape.gradient(y, x)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4f581817", + "metadata": {}, + "outputs": [], + "source": [ + "# Print the result\n", + "print(dy_dx)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}