diff --git a/notebooks/03_ml_basics_ex_3_mlp_clothing.ipynb b/notebooks/03_ml_basics_ex_3_mlp_clothing.ipynb new file mode 100644 index 0000000..3e504e9 --- /dev/null +++ b/notebooks/03_ml_basics_ex_3_mlp_clothing.ipynb @@ -0,0 +1,236 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "d63563a8", + "metadata": {}, + "outputs": [], + "source": [ + "# Exercise 3\n", + "# fashion mnist data\n", + "# MLP model with two hidden layers, each with a ReLU activation function.\n", + "# Input data is flattened to a 1D array and passed to the model." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "062f7519", + "metadata": {}, + "outputs": [], + "source": [ + "import tensorflow as tf\n", + "from tensorflow import keras\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4ccb3a5e", + "metadata": {}, + "outputs": [], + "source": [ + "# Load the MNIST Fashion dataset\n", + "(x_train, y_train), (x_test, y_test) = keras.datasets.fashion_mnist.load_data()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e5ede535", + "metadata": {}, + "outputs": [], + "source": [ + "# Normalize pixel values to between 0 and 1\n", + "x_train = x_train.astype(\"float32\") / 255.0\n", + "x_test = x_test.astype(\"float32\") / 255.0" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39c100c1", + "metadata": {}, + "outputs": [], + "source": [ + "# MNIST dataset images have a shape of (28, 28). The images are flattened\n", + "# into a 1D array of length 784 \n", + "x_train = x_train.reshape(-1, 784)\n", + "x_test = x_test.reshape(-1, 784)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e267a290", + "metadata": {}, + "outputs": [], + "source": [ + "# The model is defined here with three dense (fully connected) layers\n", + "# The first layer is a Dense layer with 128 units and a ReLU activation\n", + "# function with an input shape of (784,). This layer serves as the input\n", + "# layer of the model.\n", + "# The second layer is also a Dense layer with 64 units and a ReLU activation\n", + "# function. This layer takes the output of the previous layer as input, and\n", + "# applies a non-linear transformation to it to produce a new set of features\n", + "# that the next layer can use.\n", + "# The third is another Dense layer, one for each class in the output. The\n", + "# output is raw scores or logits for each class since there is no activation\n", + "# function . This layer is responsible for producing the final output of the\n", + "# model, which can then be used to make predictions.\n", + "# With Dropout(0.2) 20 % of the input is randomly droped, this should reduce overfitting\n", + "model = keras.Sequential([\n", + " keras.layers.Dense(128, activation='relu', input_shape=(784,)),\n", + " # keras.layers.Dropout(0.2),\n", + " keras.layers.Dense(64, activation='relu'),\n", + " keras.layers.Dense(10)\n", + "])\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7dae353a", + "metadata": {}, + "outputs": [], + "source": [ + "# Compile the model\n", + "# adam = specifies the optimizer to use during training\n", + "# loss function to use during training, SparseCategoricalCrossentropy loss\n", + "# is commonly used for multi-class classification problems.\n", + "# from_logits=True indicates that the model's output is a raw score\n", + "# for each class and not a probability distribution.\n", + "model.compile(optimizer='adam',\n", + " loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),\n", + " metrics=['accuracy'])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bd237a4b", + "metadata": {}, + "outputs": [], + "source": [ + "# Train the model\n", + "history = model.fit(x_train, y_train, epochs=10, validation_split=0.2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "169fc8c4", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "# Evaluate the model on the test set\n", + "test_loss, test_acc = model.evaluate(x_test, y_test, verbose=2)\n", + "print(\"Test accuracy:\", test_acc)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7f2e657a", + "metadata": {}, + "outputs": [], + "source": [ + "# Plot the training and validation accuracy and loss over time\n", + "plt.figure(figsize=(10, 4))\n", + "plt.subplot(1, 2, 1)\n", + "plt.plot(history.history[\"accuracy\"])\n", + "plt.plot(history.history[\"val_accuracy\"])\n", + "plt.title(\"Model accuracy\")\n", + "plt.ylabel(\"Accuracy\")\n", + "plt.xlabel(\"Epoch\")\n", + "plt.legend([\"Train\", \"Validation\"], loc=\"lower right\")\n", + "\n", + "plt.subplot(1, 2, 2)\n", + "plt.plot(history.history[\"loss\"])\n", + "plt.plot(history.history[\"val_loss\"])\n", + "plt.title(\"Model loss\")\n", + "plt.ylabel(\"Loss\")\n", + "plt.xlabel(\"Epoch\")\n", + "plt.legend([\"Train\", \"Validation\"], loc=\"upper right\")\n", + "\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "298ac3bc", + "metadata": {}, + "outputs": [], + "source": [ + "# Plot a confusion matrix of the test set predictions\n", + "test_preds = np.argmax(model.predict(x_test), axis=1)\n", + "conf_mat = tf.math.confusion_matrix(y_test, test_preds)\n", + "plt.imshow(conf_mat, cmap=\"Blues\")\n", + "plt.xlabel(\"Predicted labels\")\n", + "plt.ylabel(\"True labels\")\n", + "plt.xticks(np.arange(10))\n", + "plt.yticks(np.arange(10))\n", + "plt.colorbar()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a0355ec", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "# Make predictions on the test set\n", + "y_pred = model.predict(x_test)\n", + "y_pred = np.argmax(y_pred, axis=1)\n", + "\n", + "# Plot some examples from the test set and their predictions\n", + "fig, axes = plt.subplots(4, 4, figsize=(14, 14))\n", + "for i, ax in enumerate(axes.ravel()):\n", + " ax.matshow(x_test[i].reshape(28, 28), cmap='gray')\n", + " ax.set_title(\"True: %d\\nPredict: %d\" % (np.argmax(y_test[i]), y_pred[i]))\n", + " ax.axis(\"off\")\n", + "\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "facda3d1", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}