Browse Source

update files

master
Joerg Marks 2 years ago
parent
commit
7bbcc601b7
  1. 139
      notebooks/01_intro_ex_1a_sol.ipynb
  2. 133
      notebooks/01_intro_ex_1b_sol.ipynb
  3. 559
      notebooks/01_intro_ex_2_sol.ipynb
  4. 314
      notebooks/02_fit_fitGraph.ipynb
  5. 291
      notebooks/02_fit_iminuitFit.ipynb
  6. 338
      notebooks/02_fit_numpyFit.ipynb
  7. 122
      notebooks/03_ml_basics_display_Clothing.ipynb
  8. 134
      notebooks/03_ml_basics_display_HandWrt.ipynb
  9. 197
      notebooks/03_ml_basics_display_HorseOrHuman.ipynb
  10. 236
      notebooks/03_ml_basics_ex_4_mlp_clothing.ipynb
  11. 166
      notebooks/03_ml_basics_minimizer.ipynb
  12. 118
      notebooks/03_ml_basics_tf_broadcasting.ipynb
  13. 102
      notebooks/03_ml_basics_tf_differentiate.ipynb

139
notebooks/01_intro_ex_1a_sol.ipynb

@ -0,0 +1,139 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Exercise 1: Create numpy array and draw rgb color objects"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import matplotlib.pyplot as plt"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"create data array 2x2 as pixel position and 1x3 as rgb color data"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"width, height = 200, 200\n",
"data = np.zeros((height, width, 3), dtype=np.uint8)\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"draw blue cross"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"x = np.arange(width)\n",
"x_1 = np.arange(width)\n",
"x_2 = np.arange(width-1,-1,-1)\n",
"y = np.arange(height)\n",
"data[x_1,y] = [0,0,255]\n",
"data[x_2,y] = [0,0,255]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
" draw a square "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"lower = 55\n",
"upper = 75\n",
"data[lower:upper,lower:upper] = [0,255,0]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"create a mask of a circle using indexing\n",
"np.newaxis adds another dimension\n",
"we create a row and column vector and fill it using the condition"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"x_center = 100\n",
"y_center = 100\n",
"radius = 10\n",
"mask = (x[np.newaxis,:]-x_center)**2 + (y[:,np.newaxis]-y_center)**2 < radius**2\n",
"data[mask] = [255,0,0]\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# plot image\n",
"plt.figure(figsize=(4.,4.),dpi=100,facecolor='lightgrey')\n",
"plt.imshow(data)\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.16"
}
},
"nbformat": 4,
"nbformat_minor": 4
}

133
notebooks/01_intro_ex_1b_sol.ipynb

@ -0,0 +1,133 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Exercise 1b: Read a binary file which contains pixel data and apply\n",
"transformations"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import matplotlib.pyplot as plt"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# load figure as 2D array \n",
"data = np.load('horse.npy')\n",
"print(data.shape)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# just scale the data by a factor and shift by trans\n",
"trans = np.ones(data.shape)\n",
"trans[0,:] *=0.6\n",
"trans[1,:] *=0.4\n",
"factor = 0.5 \n",
"data_scale = data * factor + trans"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#compression in x and y \n",
"sx = 0.4\n",
"sy = 0.9\n",
"t = np.array([[sx,0],[0,sy]])\n",
"data_comp = t@data"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#rotation by an angle theta\n",
"theta = 0.5\n",
"data_rot = np.array([[np.cos(theta),-np.sin(theta)],[np.sin(theta), np.cos(theta)]])@data"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#spiegelung an der x Achse\n",
"tx = np.array([[1,0],[0,-1]]) # mirror x axis\n",
"ty = np.array([[-1,0],[0,1]]) # mirror y axis\n",
"tp = np.array([[-1,0],[0,-1]]) # mirror (0,0)\n",
"data_mirror = tp@data"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# create figure for the transformations\n",
"plt.figure(figsize=(10.0,10.0),dpi=100,facecolor='lightgrey')\n",
"plt.suptitle('Plot Transformations')\n",
"plt.subplot(2,2,1)\n",
"plt.title('original picture')\n",
"plt.plot(data[0,:],data[1,:],'.')\n",
"plt.axis([-1.2,1.2,-1.2,1.2])\n",
"plt.subplot(2,2,2)\n",
"plt.title('scaling and translation')\n",
"plt.plot(data_scale[0,:],data_scale[1,:],'.')\n",
"plt.axis([-1.2,1.2,-1.2,1.2])\n",
"plt.subplot(2,2,3)\n",
"plt.title('compression')\n",
"plt.plot(data_comp[0,:],data_comp[1,:],'.')\n",
"plt.axis([-1.2,1.2,-1.2,1.2])\n",
"plt.subplot(2,2,4)\n",
"plt.title('rotation and mirror at p(0,0)')\n",
"plt.plot(data_rot[0,:],data_rot[1,:],'.')\n",
"plt.plot(data_mirror[0,:],data_mirror[1,:],'.')\n",
"plt.axis([-1.2,1.2,-1.2,1.2])"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.16"
}
},
"nbformat": 4,
"nbformat_minor": 4
}

559
notebooks/01_intro_ex_2_sol.ipynb
File diff suppressed because one or more lines are too long
View File

314
notebooks/02_fit_fitGraph.ipynb
File diff suppressed because one or more lines are too long
View File

291
notebooks/02_fit_iminuitFit.ipynb

@ -0,0 +1,291 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Fit with the python interface to Minuit 2 called iminuit\n",
"https://iminuit.readthedocs.io/en/stable/"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from matplotlib import pyplot as plt\n",
"plt.rcParams[\"font.size\"] = 20\n",
"import numpy as np"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Data "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"x = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10], dtype='d')\n",
"dx = np.array([0.1,0.1,0.5,0.1,0.5,0.1,0.5,0.1,0.5,0.1], dtype='d')\n",
"y = np.array([1.1 ,2.3 ,2.7 ,3.2 ,3.1 ,2.4 ,1.7 ,1.5 ,1.5 ,1.7 ], dtype='d')\n",
"dy = np.array([0.15,0.22,0.29,0.39,0.31,0.21,0.13,0.15,0.19,0.13], dtype='d')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Define fit function"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def pol3(a0, a1, a2, a3):\n",
" return a0 + x*a1 + a2*x**2 + a3*x**3"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"least-squares function: sum of data residuals squared"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def LSQ(a0, a1, a2, a3):\n",
" return np.sum((y - pol3(a0, a1, a2, a3)) ** 2 / dy ** 2)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"import Minuit object"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from iminuit import Minuit"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Minuit instance using LSQ function to minimize"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"LSQ.errordef = Minuit.LEAST_SQUARES\n",
"#LSQ.errordef = Minuit.LIKELIHOOD\n",
"m = Minuit(LSQ,a0=-1.3, a1=2.6 ,a2=-0.24 ,a3=0.005)\n",
"m.fixed[\"a3\"] = True \n",
"m.params"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"run migrad"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"m.fixed[\"a3\"] = False\n",
"m.params\n",
"m.migrad()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Get contour"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"m.draw_mncontour(\"a2\", \"a3\", cl=[1, 2, 3])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Improve the fit"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"m.hesse()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"m.minos()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"access fit results"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(m.values,m.errors)\n",
"a0_fit = m.values[\"a0\"]\n",
"a1_fit = m.values[\"a1\"]\n",
"a2_fit = m.values[\"a2\"]\n",
"a3_fit = m.values[\"a3\"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print (m.covariance)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"prepare data to display fitted function "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"x_plot = np.linspace( 0.5, 10.5 , 500 )\n",
"y_fit = a0_fit + a1_fit * x_plot + a2_fit * x_plot**2 + a3_fit * x_plot**3"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The Minos algorithm uses the profile likelihood method to compute (generally asymmetric) confidence intervals. This can be plotted"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"m.draw_profile(\"a2\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Get a 2D contour of the function around the minimum for 2 parameters"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"m.draw_mncontour(\"a2\", \"a3\" , cl=[1, 2, 3])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"lotlib"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"plt.figure()\n",
"plt.errorbar(x, y, dy , dx, fmt=\"o\")\n",
"plt.plot(x_plot, y_fit)\n",
"plt.title(\"iminuit Fit Test\")\n",
"plt.xlim(-0.1, 10.1)\n",
"plt.show()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.16"
}
},
"nbformat": 4,
"nbformat_minor": 4
}

338
notebooks/02_fit_numpyFit.ipynb
File diff suppressed because one or more lines are too long
View File

122
notebooks/03_ml_basics_display_Clothing.ipynb

@ -0,0 +1,122 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "8f9f0e7b",
"metadata": {},
"source": [
"Display fashion_mnist dataset of clothes from Zalando"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "cc829d9a",
"metadata": {},
"outputs": [],
"source": [
"import tensorflow as tf\n",
"from tensorflow import keras\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "63348efe",
"metadata": {},
"outputs": [],
"source": [
"# Load the MNIST Fashion dataset\n",
"(x_train, y_train), (x_test, y_test) = keras.datasets.fashion_mnist.load_data()\n",
"# Set the class names\n",
"class_names = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat', \n",
" 'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a6c86027",
"metadata": {},
"outputs": [],
"source": [
"# print the shape of the numpy arrays\n",
"print ('Print shape of pixel data')\n",
"print(x_train.shape)\n",
"print ('Print shape of labels')\n",
"print(y_train.shape)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "cc58b142",
"metadata": {},
"outputs": [],
"source": [
"# Normalize pixel values to between 0 and 1\n",
"x_train = x_train.astype(\"float32\") / 255.0\n",
"x_test = x_test.astype(\"float32\") / 255.0"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c7976111",
"metadata": {},
"outputs": [],
"source": [
"# choose an image num to print\n",
"num = 20\n",
"image = x_train[num]\n",
"label = y_train[num]\n",
"\n",
"print ('Print normailzed pixel data of image ',num, ' :')\n",
"print(x_train[num])\n",
"print ('Print label of image ',num , ' :' )\n",
"print(y_train[num])\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "64a46625",
"metadata": {},
"outputs": [],
"source": [
"plt.figure(figsize=(10,10))\n",
"for i in range(25):\n",
" plt.subplot(5,5,i+1)\n",
" plt.xticks([])\n",
" plt.yticks([])\n",
" plt.grid(False)\n",
" plt.imshow(x_train[i], cmap=plt.cm.binary)\n",
" plt.xlabel(class_names[y_train[i]])\n",
"plt.show()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.16"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

134
notebooks/03_ml_basics_display_HandWrt.ipynb

@ -0,0 +1,134 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "3644475e",
"metadata": {},
"outputs": [],
"source": [
"# Display hand writing dataset"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8125479b",
"metadata": {},
"outputs": [],
"source": [
"import tensorflow as tf\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d45b964f",
"metadata": {},
"outputs": [],
"source": [
"# Load training dataset of 60000 images with greyscale values in 28 x 28\n",
"# and labels \n",
"(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "fa8ae2a6",
"metadata": {},
"outputs": [],
"source": [
"# print the shape of the numpy arrays\n",
"print ('Print shape of pixel data')\n",
"print(x_train.shape)\n",
"print ('Print shape of labels')\n",
"print(y_train.shape)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "be70973e",
"metadata": {},
"outputs": [],
"source": [
"# normalize pixel to 0-1\n",
"x_train = x_train / 255\n",
"x_test = x_test / 255"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "55f457d5",
"metadata": {},
"outputs": [],
"source": [
"# choose an image num to display and print\n",
"num = 20\n",
"\n",
"image = x_train[num]\n",
"label = y_train[num]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "149788b7",
"metadata": {},
"outputs": [],
"source": [
"# plot the image using imshow\n",
"plt.imshow(image, cmap='gray')\n",
"# set the title\n",
"plt.title(\"Label: %d\" % label )\n",
"# remove the axis labels and ticks\n",
"plt.axis('off')\n",
"# show the plot\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "232ef6ca",
"metadata": {},
"outputs": [],
"source": [
"# Plot 16 examples from the numpy array which was read in above\n",
"# and display it\n",
"fig, axes = plt.subplots(4, 4, figsize=(10, 10))\n",
"for i , ax in enumerate(axes.ravel()):\n",
" ax.imshow(x_train[num+i], cmap='gray')\n",
" ax.set_title(\"Label: %d\" % y_train[num+i])\n",
" ax.axis('off')\n",
"plt.suptitle(\"Examples of training set images\")\n",
"plt.show()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.16"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

197
notebooks/03_ml_basics_display_HorseOrHuman.ipynb

@ -0,0 +1,197 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "2eaba66b",
"metadata": {},
"source": [
"Read and Display Horse or Human machine learning dataset"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f1e48ac0",
"metadata": {},
"outputs": [],
"source": [
"import tensorflow as tf\n",
"import numpy as np\n",
"import tensorflow_datasets as tfds\n",
"from tensorflow.keras import regularizers\n",
"import matplotlib.pyplot as plt"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "feda024e",
"metadata": {},
"outputs": [],
"source": [
"# Load the horse or human dataset\n",
"#(300, 300, 3) unint8\n",
"dataset, label = tfds.load('horses_or_humans', with_info=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "35991dec",
"metadata": {},
"outputs": [],
"source": [
"# Extract the horse/human class\n",
"horse_ds = dataset['train'].filter(lambda x: x['label'] == 0)\n",
"human_ds = dataset['train'].filter(lambda x: x['label'] == 1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "fab03aa8",
"metadata": {},
"outputs": [],
"source": [
"# Take a few examples < 16\n",
"n_examples = 5\n",
"horse_examples = horse_ds.take(n_examples)\n",
"human_examples = human_ds.take(n_examples)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c33f1acd",
"metadata": {},
"outputs": [],
"source": [
"# Display the examples\n",
"fig, axes = plt.subplots(1, n_examples, figsize=(15, 15))\n",
"for i, example in enumerate(human_examples):\n",
" image = example['image']\n",
" axes[i].imshow(image)\n",
" axes[i].set_title(f\"humans {i+1}\")\n",
"plt.show()\n",
"\n",
"fig, axes = plt.subplots(1, n_examples, figsize=(15, 15))\n",
"for i, example in enumerate(horse_examples):\n",
" image = example['image']\n",
" axes[i].imshow(image)\n",
" axes[i].set_title(f\"horses {i+1}\")\n",
"plt.show()\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "25f3eeb3",
"metadata": {},
"outputs": [],
"source": [
"# Split the dataset into training and validation sets\n",
"# as_supervised: Specifies whether to return the dataset as a tuple\n",
"# of (input, label) pairs.\n",
"train_dataset, valid_dataset = tfds.load('horses_or_humans', split=['train','test'], as_supervised=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "29dc0e62",
"metadata": {},
"outputs": [],
"source": [
"# Get the number of elements in the training and validation dataset\n",
"train_size = tf.data.experimental.cardinality(train_dataset).numpy()\n",
"valid_size = tf.data.experimental.cardinality(valid_dataset).numpy()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "db8aaf91",
"metadata": {},
"outputs": [],
"source": [
"IMG_SIZE = 300\n",
"NUM_CLASSES = 2\n",
"\n",
"def preprocess(image, label):\n",
" image = tf.cast(image, tf.float32)\n",
"# # Resize the images to a fixed size\n",
" image = tf.image.resize(image, (IMG_SIZE, IMG_SIZE))\n",
"# # Rescale the pixel values to be between 0 and 1\n",
" image = image / 255.0\n",
" label = tf.one_hot(label, NUM_CLASSES)\n",
" return image, label"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d59661c3",
"metadata": {},
"outputs": [],
"source": [
"# Apply the preprocessing function to the datasets\n",
"train_dataset = train_dataset.map(preprocess)\n",
"valid_dataset = valid_dataset.map(preprocess)\n",
"\n",
"# Batch and shuffle the datasets\n",
"train_dataset = train_dataset.shuffle(2000).batch(80)\n",
"valid_dataset = valid_dataset.batch(20)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9399bc99",
"metadata": {},
"outputs": [],
"source": [
"# Get the number of elements in the trainingand validation dataset\n",
"train_size = tf.data.experimental.cardinality(train_dataset).numpy()\n",
"valid_size = tf.data.experimental.cardinality(valid_dataset).numpy()\n",
"print(\"Training dataset size:\", train_size)\n",
"print(\"Validation dataset size:\", valid_size)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "13af7d53",
"metadata": {},
"outputs": [],
"source": [
"# Store images and labels of the validation data for predictions\n",
"for images, labels in valid_dataset:\n",
" x_val = images\n",
" y_val = labels\n",
" \n",
"print(x_val.shape, y_val.shape)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.16"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

236
notebooks/03_ml_basics_ex_4_mlp_clothing.ipynb

@ -0,0 +1,236 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "6c180d4b",
"metadata": {},
"outputs": [],
"source": [
"# Exercise 3\n",
"# fashion mnist data\n",
"# MLP model with two hidden layers, each with a ReLU activation function.\n",
"# Input data is flattened to a 1D array and passed to the model."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b0e31b9c",
"metadata": {},
"outputs": [],
"source": [
"import tensorflow as tf\n",
"from tensorflow import keras\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1ae1412e",
"metadata": {},
"outputs": [],
"source": [
"# Load the MNIST Fashion dataset\n",
"(x_train, y_train), (x_test, y_test) = keras.datasets.fashion_mnist.load_data()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f8814914",
"metadata": {},
"outputs": [],
"source": [
"# Normalize pixel values to between 0 and 1\n",
"x_train = x_train.astype(\"float32\") / 255.0\n",
"x_test = x_test.astype(\"float32\") / 255.0"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2810da39",
"metadata": {},
"outputs": [],
"source": [
"# MNIST dataset images have a shape of (28, 28). The images are flattened\n",
"# into a 1D array of length 784 \n",
"x_train = x_train.reshape(-1, 784)\n",
"x_test = x_test.reshape(-1, 784)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "96f7ff8a",
"metadata": {},
"outputs": [],
"source": [
"# The model is defined here with three dense (fully connected) layers\n",
"# The first layer is a Dense layer with 128 units and a ReLU activation\n",
"# function with an input shape of (784,). This layer serves as the input\n",
"# layer of the model.\n",
"# The second layer is also a Dense layer with 64 units and a ReLU activation\n",
"# function. This layer takes the output of the previous layer as input, and\n",
"# applies a non-linear transformation to it to produce a new set of features\n",
"# that the next layer can use.\n",
"# The third is another Dense layer, one for each class in the output. The\n",
"# output is raw scores or logits for each class since there is no activation\n",
"# function . This layer is responsible for producing the final output of the\n",
"# model, which can then be used to make predictions.\n",
"# With Dropout(0.2) 20 % of the input is randomly droped, this should reduce overfitting\n",
"model = keras.Sequential([\n",
" keras.layers.Dense(128, activation='relu', input_shape=(784,)),\n",
" # keras.layers.Dropout(0.2),\n",
" keras.layers.Dense(64, activation='relu'),\n",
" keras.layers.Dense(10)\n",
"])\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a3fe609c",
"metadata": {},
"outputs": [],
"source": [
"# Compile the model\n",
"# adam = specifies the optimizer to use during training\n",
"# loss function to use during training, SparseCategoricalCrossentropy loss\n",
"# is commonly used for multi-class classification problems.\n",
"# from_logits=True indicates that the model's output is a raw score\n",
"# for each class and not a probability distribution.\n",
"model.compile(optimizer='adam',\n",
" loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),\n",
" metrics=['accuracy'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "cf6c978d",
"metadata": {},
"outputs": [],
"source": [
"# Train the model\n",
"history = model.fit(x_train, y_train, epochs=10, validation_split=0.2)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "97fc2313",
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"# Evaluate the model on the test set\n",
"test_loss, test_acc = model.evaluate(x_test, y_test, verbose=2)\n",
"print(\"Test accuracy:\", test_acc)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ef5f19d0",
"metadata": {},
"outputs": [],
"source": [
"# Plot the training and validation accuracy and loss over time\n",
"plt.figure(figsize=(10, 4))\n",
"plt.subplot(1, 2, 1)\n",
"plt.plot(history.history[\"accuracy\"])\n",
"plt.plot(history.history[\"val_accuracy\"])\n",
"plt.title(\"Model accuracy\")\n",
"plt.ylabel(\"Accuracy\")\n",
"plt.xlabel(\"Epoch\")\n",
"plt.legend([\"Train\", \"Validation\"], loc=\"lower right\")\n",
"\n",
"plt.subplot(1, 2, 2)\n",
"plt.plot(history.history[\"loss\"])\n",
"plt.plot(history.history[\"val_loss\"])\n",
"plt.title(\"Model loss\")\n",
"plt.ylabel(\"Loss\")\n",
"plt.xlabel(\"Epoch\")\n",
"plt.legend([\"Train\", \"Validation\"], loc=\"upper right\")\n",
"\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c0ebddc4",
"metadata": {},
"outputs": [],
"source": [
"# Plot a confusion matrix of the test set predictions\n",
"test_preds = np.argmax(model.predict(x_test), axis=1)\n",
"conf_mat = tf.math.confusion_matrix(y_test, test_preds)\n",
"plt.imshow(conf_mat, cmap=\"Blues\")\n",
"plt.xlabel(\"Predicted labels\")\n",
"plt.ylabel(\"True labels\")\n",
"plt.xticks(np.arange(10))\n",
"plt.yticks(np.arange(10))\n",
"plt.colorbar()\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9175d533",
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"# Make predictions on the test set\n",
"y_pred = model.predict(x_test)\n",
"y_pred = np.argmax(y_pred, axis=1)\n",
"\n",
"# Plot some examples from the test set and their predictions\n",
"fig, axes = plt.subplots(4, 4, figsize=(18, 18))\n",
"for i, ax in enumerate(axes.ravel()):\n",
" ax.matshow(x_test[i].reshape(28, 28), cmap='gray')\n",
" ax.set_title(\"True: %d\\nPredict: %d\" % (y_test[i], y_pred[i]))\n",
" ax.axis(\"off\")\n",
"\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4a6e85be",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.16"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

166
notebooks/03_ml_basics_minimizer.ipynb

@ -0,0 +1,166 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "042acd49",
"metadata": {},
"source": [
"# Test a minimizer"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "cb51a492",
"metadata": {},
"outputs": [],
"source": [
"import tensorflow as tf\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"from scipy.optimize import minimize\n",
"plt.style.use(\"ggplot\")\n",
"from matplotlib import colors, cm"
]
},
{
"cell_type": "markdown",
"id": "2ac3651a",
"metadata": {},
"source": [
"plt.rcParams controls the appearance of your plots globally,\n",
"affecting all subsequent plots created in your session."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "97ef9933",
"metadata": {},
"outputs": [],
"source": [
"plt.rcParams[\"axes.grid\"] = False\n",
"plt.rcParams.update({'font.size': 20})\n",
"plt.rcParams.update({'figure.figsize': (12,9)})\n",
"plt.rcParams['lines.markersize'] = 8"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f15200f9",
"metadata": {},
"outputs": [],
"source": [
"# Generate data points with gaussian smearing\n",
"data = np.random.uniform(size=100)\n",
"labels = 5.*data*data*data + 1 + np.random.normal(loc=0.0, scale=0.1, size=100)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7237f5ed",
"metadata": {},
"outputs": [],
"source": [
"# show plot\n",
"plt.scatter(data, labels, label=\"data\")\n",
"plt.legend()\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0d6e104c",
"metadata": {},
"outputs": [],
"source": [
"# define chi2 like cost function\n",
"def cost(params):\n",
" W, b = params\n",
" return np.mean((labels - (W*data*data*data + b))**2)"
]
},
{
"cell_type": "markdown",
"id": "8e00e16a",
"metadata": {},
"source": [
"call minimizer\n",
"provides a collection of optimization algorithms for finding the minimum or maximum of a given function. "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "433975c3",
"metadata": {},
"outputs": [],
"source": [
"res = minimize(cost, [1., 1.])\n",
"# returns an OptimizeResult object\n",
"# x :the solution (minimum) of the optimization problem, represented as an\n",
"# array.\n",
"# Results of the minimization\n",
"W, b = res.x\n",
"print ('function value at the minimum and fitted parameters',res.fun,' ',W,' ',b)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1e1f4e81",
"metadata": {},
"outputs": [],
"source": [
"points = np.linspace(0, 1, 100)\n",
"prediction = W*points*points*points + b"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d8de971e",
"metadata": {},
"outputs": [],
"source": [
"# plot fit model\n",
"plt.scatter(data, labels, label=\"data\")\n",
"plt.plot(points, prediction, label=\"model\", color=\"green\")\n",
"plt.legend()\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4a7d62c2",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.16"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

118
notebooks/03_ml_basics_tf_broadcasting.ipynb

@ -0,0 +1,118 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "df1f5eb3",
"metadata": {},
"source": [
"# demonstration of broadcasting in tensorflow"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1d61c70a",
"metadata": {},
"outputs": [],
"source": [
"import tensorflow as tf"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "38bca1cf",
"metadata": {},
"outputs": [],
"source": [
"# Define two tensors with different shapes\n",
"a = tf.constant([[1, 2, 3], [4, 5, 6]])\n",
"b = tf.constant([10, 20, 30])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c3f382e3",
"metadata": {},
"outputs": [],
"source": [
"# Perform element-wise multiplication using broadcasting\n",
"c = a * b\n",
"# Print the result\n",
"print(c)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "95683fe5",
"metadata": {},
"outputs": [],
"source": [
"# Broadcasting scalar to tensor\n",
"x = tf.constant([1, 2, 3])\n",
"y = 2\n",
"z = x + y # equivalent to tf.add(x, y)\n",
"print(z.numpy()) # [3 4 5]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8ed98565",
"metadata": {},
"outputs": [],
"source": [
"# Broadcasting vector to matrix\n",
"x = tf.constant([[1, 2], [3, 4]])\n",
"y = tf.constant([1, 2])\n",
"z = x + y # equivalent to tf.add(x, y)\n",
"print(z.numpy()) # [[2 4], [4 6]]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "41f4196f",
"metadata": {},
"outputs": [],
"source": [
"# Broadcasting matrix to tensor\n",
"x = tf.constant([[[1, 2], [3, 4]], [[5, 6], [7, 8]]])\n",
"y = tf.constant([[1], [2]])\n",
"z = x + y # equivalent to tf.add(x, y)\n",
"print(z.numpy()) # [[[2 3], [4 5]], [[7 8], [9 10]]]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "76a5108d",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.16"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

102
notebooks/03_ml_basics_tf_differentiate.ipynb

@ -0,0 +1,102 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "eefe7571",
"metadata": {},
"outputs": [],
"source": [
"# show differentiation in Tensorflow"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a9d7c185",
"metadata": {},
"outputs": [],
"source": [
"import tensorflow as tf"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "584384f1",
"metadata": {},
"outputs": [],
"source": [
"# Define a function to differentiate\n",
"def f(x):\n",
" return x ** 2 + 2 * x + 1"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "70430402",
"metadata": {},
"outputs": [],
"source": [
"# Create a TensorFlow variable\n",
"x = tf.Variable(2.0)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "45ea0a33",
"metadata": {},
"outputs": [],
"source": [
"# Use tf.GradientTape to record the gradients\n",
"with tf.GradientTape() as tape:\n",
" y = f(x)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f6b1ff27",
"metadata": {},
"outputs": [],
"source": [
"# Calculate the gradient of y with respect to x\n",
"dy_dx = tape.gradient(y, x)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4f581817",
"metadata": {},
"outputs": [],
"source": [
"# Print the result\n",
"print(dy_dx)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.16"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Loading…
Cancel
Save