ML-Kurs-SS2023/notebooks/05_neural_networks_tf_CNN_4...


								{

								 "cells": [

								  {

								   "cell_type": "markdown",

								   "id": "69c093f5",

								   "metadata": {},

								   "source": [

								    "# Convolutional neural network\n",

								    "\n",

								    "input data is shaped as 4D tensors of shape (samples, height, width, channels) and passed through several convolutional layers and max \n",

								    "pooling layers before being flattened and passed.\n",

								    "The convolutional layer applies the convolution operation to the input image using a set of learned filters, generating a feature map that captures patterns and features of the input image.  The activation function then applies a non-linear transformation to the feature map.\n",

								    "In convolutional neural networks, it is common to increase the number\n",

								    "of filters in each convolutional layer to capture more complex and higher-level features in the data. The deeper layers of a convolutional neural network typically have more filters than the earlier layers. The exact number of filters used for each layer depends on the complexity of the problem and the size of the dataset."

								   ]

								  },

								  {

								   "cell_type": "code",

								   "execution_count": null,

								   "id": "3a5fe3c5",

								   "metadata": {},

								   "outputs": [],

								   "source": [

								    "import tensorflow as tf\n",

								    "from tensorflow import keras\n",

								    "import matplotlib.pyplot as plt\n",

								    "import numpy as np"

								   ]

								  },

								  {

								   "cell_type": "code",

								   "execution_count": null,

								   "id": "8d86a19a",

								   "metadata": {},

								   "outputs": [],

								   "source": [

								    "# Load the MNIST Fashion dataset\n",

								    "(x_train, y_train), (x_test, y_test) = keras.datasets.fashion_mnist.load_data()"

								   ]

								  },

								  {

								   "cell_type": "code",

								   "execution_count": null,

								   "id": "5d7180c4",

								   "metadata": {},

								   "outputs": [],

								   "source": [

								    "# Normalize pixel values to between 0 and 1\n",

								    "x_train = x_train.astype(\"float32\") / 255.0\n",

								    "x_test = x_test.astype(\"float32\") / 255.0"

								   ]

								  },

								  {

								   "cell_type": "code",

								   "execution_count": null,

								   "id": "12cc2bc2",

								   "metadata": {},

								   "outputs": [],

								   "source": [

								    "# Reshape the input data to (samples, height, width, channels)\n",

								    "x_train = x_train.reshape(-1, 28, 28, 1)\n",

								    "x_test = x_test.reshape(-1, 28, 28, 1)"

								   ]

								  },

								  {

								   "cell_type": "code",

								   "execution_count": null,

								   "id": "8dc840b9",

								   "metadata": {},

								   "outputs": [],

								   "source": [

								    "# Define the CNN model\n",

								    "# the Conv2D layer in the model expects input data with four dimensions,\n",

								    "# where the last dimension is the number of color channels, in this case\n",

								    "# normalized greyscale values.\n",

								    "model = keras.Sequential([\n",

								    "# create a class for a 2D convolutional layer in Keras with a 3x3 2D kernel\n",

								    "# size. The first argument 32 is the number of filters in the layer.\n",

								    "# the 32 filters are separate 2D kernels that slide over the input image or\n",

								    "# feature map to produce 32 output feature maps. The filters are applied\n",

								    "# to non-overlapping regions of the input. \n",

								    "# As activation function ReLU (Rectified Linear Unit) is used. The input\n",

								    "# shape is a 3D tensor with shape of 28x28x1 \n",

								    "    keras.layers.Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(28, 28, 1)),\n",

								    " # Max pooling is a type of down-sampling operation by partitioning the\n",

								    "# input image into a set of non-overlapping rectangular regions and,\n",

								    "# for each region, selecting the maximum value. The output of this operation\n",

								    "# is a feature map with reduced spatial dimensions, but with retained\n",

								    "# important features.\n",

								    "    keras.layers.MaxPooling2D(pool_size=(2, 2)),  \n",

								    "# include dropping of layers to prevent overfitting\n",

								    "    keras.layers.Dropout(0.25),\n",

								    "    keras.layers.Conv2D(64, kernel_size=(3, 3), activation='relu'),\n",

								    "    keras.layers.MaxPooling2D(pool_size=(2, 2)),\n",

								    "    keras.layers.Dropout(0.25),\n",

								    "    keras.layers.Flatten(),\n",

								    "# input a vector of size 784 (the flattened image), multiplies it by a weight\n",

								    "# matrix of size (784, 128), adds a bias vector of size 128, and applies the\n",

								    "# ReLU activation function element-wise to the resulting vector of size 128. \n",

								    "    keras.layers.Dense(128, activation='relu'),\n",

								    "    keras.layers.Dropout(0.5),\n",

								    "    keras.layers.Dense(10)\n",

								    "])"

								   ]

								  },

								  {

								   "cell_type": "code",

								   "execution_count": null,

								   "id": "5007a3c3",

								   "metadata": {},

								   "outputs": [],

								   "source": [

								    "# Compile the model\n",

								    "model.compile(optimizer='adam',\n",

								    "              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),\n",

								    "              metrics=['accuracy'])\n"

								   ]

								  },

								  {

								   "cell_type": "code",

								   "execution_count": null,

								   "id": "620cfa48",

								   "metadata": {},

								   "outputs": [],

								   "source": [

								    "# Train the model\n",

								    "history = model.fit(x_train, y_train, epochs=25, validation_split=0.2)"

								   ]

								  },

								  {

								   "cell_type": "code",

								   "execution_count": null,

								   "id": "056668a2",

								   "metadata": {},

								   "outputs": [],

								   "source": [

								    "# Evaluate the model on the test set\n",

								    "test_loss, test_acc = model.evaluate(x_test, y_test, verbose=2)\n",

								    "print(\"Test accuracy:\", test_acc)"

								   ]

								  },

								  {

								   "cell_type": "code",

								   "execution_count": null,

								   "id": "be3465f5",

								   "metadata": {},

								   "outputs": [],

								   "source": [

								    "# Plot the training and validation accuracy and loss over time\n",

								    "plt.figure(figsize=(10, 4))\n",

								    "plt.subplot(1, 2, 1)\n",

								    "plt.plot(history.history[\"accuracy\"])\n",

								    "plt.plot(history.history[\"val_accuracy\"])\n",

								    "plt.title(\"Model accuracy\")\n",

								    "plt.ylabel(\"Accuracy\")\n",

								    "plt.xlabel(\"Epoch\")\n",

								    "plt.legend([\"Train\", \"Validation\"], loc=\"lower right\")\n",

								    "\n",

								    "plt.subplot(1, 2, 2)\n",

								    "plt.plot(history.history[\"loss\"])\n",

								    "plt.plot(history.history[\"val_loss\"])\n",

								    "plt.title(\"Model loss\")\n",

								    "plt.ylabel(\"Loss\")\n",

								    "plt.xlabel(\"Epoch\")\n",

								    "plt.legend([\"Train\", \"Validation\"], loc=\"upper right\")\n",

								    "\n",

								    "plt.show()"

								   ]

								  },

								  {

								   "cell_type": "code",

								   "execution_count": null,

								   "id": "53b81651",

								   "metadata": {},

								   "outputs": [],

								   "source": [

								    "# Plot a confusion matrix of the test set predictions\n",

								    "test_preds = np.argmax(model.predict(x_test), axis=-1)\n",

								    "conf_mat = tf.math.confusion_matrix(y_test, test_preds)\n",

								    "plt.imshow(conf_mat, cmap=\"Blues\")\n",

								    "plt.xlabel(\"Predicted labels\")\n",

								    "plt.ylabel(\"True labels\")\n",

								    "plt.xticks(np.arange(10))\n",

								    "plt.yticks(np.arange(10))\n",

								    "plt.colorbar()\n",

								    "plt.show()"

								   ]

								  },

								  {

								   "cell_type": "code",

								   "execution_count": null,

								   "id": "3bfc89c6",

								   "metadata": {},

								   "outputs": [],

								   "source": [

								    "# Plot a random sample of test set images and their predicted labels\n",

								    "random_indices = np.random.choice(x_test.shape[0], 16, replace=False)\n",

								    "test_preds = np.argmax(model.predict(x_test), axis=-1)\n",

								    "plt.figure(figsize=(10, 10))\n",

								    "for i, index in enumerate(random_indices):\n",

								    "    plt.subplot(4, 4, i+1)\n",

								    "    plt.xticks([])\n",

								    "    plt.yticks([])\n",

								    "    plt.grid(False)\n",

								    "    plt.imshow(x_test[index].reshape(28, 28), cmap=plt.cm.binary)\n",

								    "    plt.xlabel(f\"Predicted: {test_preds[index]}\\nTrue: {y_test[index]}\")\n",

								    "plt.show()\n"

								   ]

								  }

								 ],

								 "metadata": {

								  "kernelspec": {

								   "display_name": "Python 3 (ipykernel)",

								   "language": "python",

								   "name": "python3"

								  },

								  "language_info": {

								   "codemirror_mode": {

								    "name": "ipython",

								    "version": 3

								   },

								   "file_extension": ".py",

								   "mimetype": "text/x-python",

								   "name": "python",

								   "nbconvert_exporter": "python",

								   "pygments_lexer": "ipython3",

								   "version": "3.8.16"

								  }

								 },

								 "nbformat": 4,

								 "nbformat_minor": 5

								}