ML-Kurs-SS2023/notebooks/03_ml_basics_ex_4_mlp_cloth...


								{

								 "cells": [

								  {

								   "cell_type": "code",

								   "execution_count": null,

								   "id": "6c180d4b",

								   "metadata": {},

								   "outputs": [],

								   "source": [

								    "# Exercise 3\n",

								    "# fashion mnist data\n",

								    "# MLP model with two hidden layers, each with a ReLU activation function.\n",

								    "# Input data is flattened to a 1D array and passed to the model."

								   ]

								  },

								  {

								   "cell_type": "code",

								   "execution_count": null,

								   "id": "b0e31b9c",

								   "metadata": {},

								   "outputs": [],

								   "source": [

								    "import tensorflow as tf\n",

								    "from tensorflow import keras\n",

								    "import matplotlib.pyplot as plt\n",

								    "import numpy as np"

								   ]

								  },

								  {

								   "cell_type": "code",

								   "execution_count": null,

								   "id": "1ae1412e",

								   "metadata": {},

								   "outputs": [],

								   "source": [

								    "# Load the MNIST Fashion dataset\n",

								    "(x_train, y_train), (x_test, y_test) = keras.datasets.fashion_mnist.load_data()"

								   ]

								  },

								  {

								   "cell_type": "code",

								   "execution_count": null,

								   "id": "f8814914",

								   "metadata": {},

								   "outputs": [],

								   "source": [

								    "# Normalize pixel values to between 0 and 1\n",

								    "x_train = x_train.astype(\"float32\") / 255.0\n",

								    "x_test = x_test.astype(\"float32\") / 255.0"

								   ]

								  },

								  {

								   "cell_type": "code",

								   "execution_count": null,

								   "id": "2810da39",

								   "metadata": {},

								   "outputs": [],

								   "source": [

								    "# MNIST dataset images have a shape of (28, 28). The images are flattened\n",

								    "# into a 1D array of length 784 \n",

								    "x_train = x_train.reshape(-1, 784)\n",

								    "x_test = x_test.reshape(-1, 784)"

								   ]

								  },

								  {

								   "cell_type": "code",

								   "execution_count": null,

								   "id": "96f7ff8a",

								   "metadata": {},

								   "outputs": [],

								   "source": [

								    "# The model is defined here with three dense (fully connected) layers\n",

								    "# The first layer is a Dense layer with 128 units and a ReLU activation\n",

								    "# function with an input shape of (784,). This layer serves as the input\n",

								    "# layer of the model.\n",

								    "# The second layer is also a Dense layer with 64 units and a ReLU activation\n",

								    "# function. This layer takes the output of the previous layer as input, and\n",

								    "# applies a non-linear transformation to it to produce a new set of features\n",

								    "# that the next layer can use.\n",

								    "# The third is another Dense layer, one for each class in the output. The\n",

								    "# output is raw scores or logits for each class since there is no activation\n",

								    "# function . This layer is responsible for producing the final output of the\n",

								    "# model, which can then be used to make predictions.\n",

								    "# With Dropout(0.2) 20 % of the input is randomly droped, this should reduce overfitting\n",

								    "model = keras.Sequential([\n",

								    "    keras.layers.Dense(128, activation='relu', input_shape=(784,)),\n",

								    "    # keras.layers.Dropout(0.2),\n",

								    "    keras.layers.Dense(64, activation='relu'),\n",

								    "    keras.layers.Dense(10)\n",

								    "])\n"

								   ]

								  },

								  {

								   "cell_type": "code",

								   "execution_count": null,

								   "id": "a3fe609c",

								   "metadata": {},

								   "outputs": [],

								   "source": [

								    "# Compile the model\n",

								    "# adam = specifies the optimizer to use during training\n",

								    "# loss function to use during training, SparseCategoricalCrossentropy loss\n",

								    "# is commonly used for multi-class classification problems.\n",

								    "# from_logits=True indicates that the model's output is a raw score\n",

								    "# for each class and not  a probability distribution.\n",

								    "model.compile(optimizer='adam',\n",

								    "              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),\n",

								    "              metrics=['accuracy'])"

								   ]

								  },

								  {

								   "cell_type": "code",

								   "execution_count": null,

								   "id": "cf6c978d",

								   "metadata": {},

								   "outputs": [],

								   "source": [

								    "# Train the model\n",

								    "history = model.fit(x_train, y_train, epochs=10, validation_split=0.2)"

								   ]

								  },

								  {

								   "cell_type": "code",

								   "execution_count": null,

								   "id": "97fc2313",

								   "metadata": {

								    "scrolled": true

								   },

								   "outputs": [],

								   "source": [

								    "# Evaluate the model on the test set\n",

								    "test_loss, test_acc = model.evaluate(x_test, y_test, verbose=2)\n",

								    "print(\"Test accuracy:\", test_acc)"

								   ]

								  },

								  {

								   "cell_type": "code",

								   "execution_count": null,

								   "id": "ef5f19d0",

								   "metadata": {},

								   "outputs": [],

								   "source": [

								    "# Plot the training and validation accuracy and loss over time\n",

								    "plt.figure(figsize=(10, 4))\n",

								    "plt.subplot(1, 2, 1)\n",

								    "plt.plot(history.history[\"accuracy\"])\n",

								    "plt.plot(history.history[\"val_accuracy\"])\n",

								    "plt.title(\"Model accuracy\")\n",

								    "plt.ylabel(\"Accuracy\")\n",

								    "plt.xlabel(\"Epoch\")\n",

								    "plt.legend([\"Train\", \"Validation\"], loc=\"lower right\")\n",

								    "\n",

								    "plt.subplot(1, 2, 2)\n",

								    "plt.plot(history.history[\"loss\"])\n",

								    "plt.plot(history.history[\"val_loss\"])\n",

								    "plt.title(\"Model loss\")\n",

								    "plt.ylabel(\"Loss\")\n",

								    "plt.xlabel(\"Epoch\")\n",

								    "plt.legend([\"Train\", \"Validation\"], loc=\"upper right\")\n",

								    "\n",

								    "plt.show()"

								   ]

								  },

								  {

								   "cell_type": "code",

								   "execution_count": null,

								   "id": "c0ebddc4",

								   "metadata": {},

								   "outputs": [],

								   "source": [

								    "# Plot a confusion matrix of the test set predictions\n",

								    "test_preds = np.argmax(model.predict(x_test), axis=1)\n",

								    "conf_mat = tf.math.confusion_matrix(y_test, test_preds)\n",

								    "plt.imshow(conf_mat, cmap=\"Blues\")\n",

								    "plt.xlabel(\"Predicted labels\")\n",

								    "plt.ylabel(\"True labels\")\n",

								    "plt.xticks(np.arange(10))\n",

								    "plt.yticks(np.arange(10))\n",

								    "plt.colorbar()\n",

								    "plt.show()"

								   ]

								  },

								  {

								   "cell_type": "code",

								   "execution_count": null,

								   "id": "9175d533",

								   "metadata": {

								    "scrolled": true

								   },

								   "outputs": [],

								   "source": [

								    "# Make predictions on the test set\n",

								    "y_pred = model.predict(x_test)\n",

								    "y_pred = np.argmax(y_pred, axis=1)\n",

								    "\n",

								    "# Plot some examples from the test set and their predictions\n",

								    "fig, axes = plt.subplots(4, 4, figsize=(18, 18))\n",

								    "for i, ax in enumerate(axes.ravel()):\n",

								    "    ax.matshow(x_test[i].reshape(28, 28), cmap='gray')\n",

								    "    ax.set_title(\"True: %d\\nPredict: %d\" % (y_test[i], y_pred[i]))\n",

								    "    ax.axis(\"off\")\n",

								    "\n",

								    "plt.show()"

								   ]

								  },

								  {

								   "cell_type": "code",

								   "execution_count": null,

								   "id": "4a6e85be",

								   "metadata": {},

								   "outputs": [],

								   "source": []

								  }

								 ],

								 "metadata": {

								  "kernelspec": {

								   "display_name": "Python 3 (ipykernel)",

								   "language": "python",

								   "name": "python3"

								  },

								  "language_info": {

								   "codemirror_mode": {

								    "name": "ipython",

								    "version": 3

								   },

								   "file_extension": ".py",

								   "mimetype": "text/x-python",

								   "name": "python",

								   "nbconvert_exporter": "python",

								   "pygments_lexer": "ipython3",

								   "version": "3.8.16"

								  }

								 },

								 "nbformat": 4,

								 "nbformat_minor": 5

								}