ML-Kurs-SS2023/notebooks/03_ml_basics_tf_mlp_mnist_d...


								{

								 "cells": [

								  {

								   "cell_type": "code",

								   "execution_count": null,

								   "id": "13fa2f64",

								   "metadata": {},

								   "outputs": [],

								   "source": [

								    "# In this example, we used the TensorFlow library to load the MNIST data,\n",

								    "# define an MLP model with three dense layers, compile the model, train it\n",

								    "# for 10 epochs, evaluate it on the test set, and make predictions on\n",

								    "# the test set. Finally, we plot some examples of the predictions made\n",

								    "# by the model."

								   ]

								  },

								  {

								   "cell_type": "code",

								   "execution_count": null,

								   "id": "1c4405f8",

								   "metadata": {},

								   "outputs": [],

								   "source": [

								    "import tensorflow as tf\n",

								    "from tensorflow import keras\n",

								    "import numpy as np\n",

								    "import matplotlib.pyplot as plt\n",

								    "(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()"

								   ]

								  },

								  {

								   "cell_type": "code",

								   "execution_count": null,

								   "id": "1e7e58fb",

								   "metadata": {},

								   "outputs": [],

								   "source": [

								    "# Normalize the pixel values to be between 0 and 1\n",

								    "x_train = x_train / 255\n",

								    "x_test = x_test / 255"

								   ]

								  },

								  {

								   "cell_type": "code",

								   "execution_count": null,

								   "id": "3d8a7370",

								   "metadata": {},

								   "outputs": [],

								   "source": [

								    "# Flatten the 2D images into 1D arrays\n",

								    "x_train = x_train.reshape(x_train.shape[0], -1)\n",

								    "x_test = x_test.reshape(x_test.shape[0], -1)"

								   ]

								  },

								  {

								   "cell_type": "code",

								   "execution_count": null,

								   "id": "c2df0f54",

								   "metadata": {},

								   "outputs": [],

								   "source": [

								    "# Convert the labels into one-hot encoded arrays\n",

								    "y_train = tf.keras.utils.to_categorical(y_train, num_classes=10)\n",

								    "y_test = tf.keras.utils.to_categorical(y_test, num_classes=10)"

								   ]

								  },

								  {

								   "cell_type": "code",

								   "execution_count": null,

								   "id": "e2b249ea",

								   "metadata": {},

								   "outputs": [],

								   "source": [

								    "# Define the model\n",

								    "# The number of parameters depends on the shapes and sizes of the layers.\n",

								    "# In the given model, the first layer Dense(512, activation='relu',\n",

								    "# input_shape=(784,)) has 784 input nodes and 512 output nodes. Therefore,\n",

								    "# the number of parameters in this layer would be (784 * 512) + 512 = 401920,\n",

								    "# where the +512 term is for the bias terms.\n",

								    "# The second layer also has 512 input nodes and 512 output nodes, which makes\n",

								    "# 512512 = 262,144 parameters. The third and last layer has 512 input nodes\n",

								    "# and 10 output nodes, which makes 512*10 = 5,120 parameters.\n",

								    "model = tf.keras.models.Sequential()\n",

								    "model.add(tf.keras.layers.Dense(512, activation='relu', input_shape=(784,)))\n",

								    "model.add(tf.keras.layers.Dense(512, activation='relu'))\n",

								    "model.add(tf.keras.layers.Dense(10, activation='softmax'))"

								   ]

								  },

								  {

								   "cell_type": "code",

								   "execution_count": null,

								   "id": "bab8730a",

								   "metadata": {},

								   "outputs": [],

								   "source": [

								    "# over come overfitting by regularization\n",

								    "#model = tf.keras.models.Sequential([\n",

								    "#    tf.keras.layers.Dense(64, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001),input_shape=(784,)),\n",

								    "#    tf.keras.layers.Dense(64, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001)),\n",

								    "#    tf.keras.layers.Dense(10, activation='softmax')\n",

								    "#])"

								   ]

								  },

								  {

								   "cell_type": "code",

								   "execution_count": null,

								   "id": "e3223c61",

								   "metadata": {},

								   "outputs": [],

								   "source": [

								    "# Compile the model\n",

								    "model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])"

								   ]

								  },

								  {

								   "cell_type": "code",

								   "execution_count": null,

								   "id": "51e7758f",

								   "metadata": {},

								   "outputs": [],

								   "source": [

								    "# Train the model and record the history\n",

								    "history = model.fit(x_train, y_train, epochs=10, batch_size=64, validation_data=(x_test, y_test))"

								   ]

								  },

								  {

								   "cell_type": "code",

								   "execution_count": null,

								   "id": "e3240069",

								   "metadata": {},

								   "outputs": [],

								   "source": [

								    "# Get the weights of the Dense layer\n",

								    "# plot the weights as a heatmap or image, where the weights are represented\n",

								    "# as pixel values.\n",

								    "# model.layers[2].get_weights()[0] returns only the weights of the third\n",

								    "# layer. If you wanted to get the biases, you would use\n",

								    "# model.layers[2].get_weights()[1].\n",

								    "dense_weights = model.layers[2].get_weights()[0]\n",

								    "\n",

								    "# Plot the weights as a heatmap\n",

								    "plt.imshow(dense_weights, cmap='coolwarm')\n",

								    "plt.colorbar()\n",

								    "plt.title('weights in the output layer')\n",

								    "plt.show()"

								   ]

								  },

								  {

								   "cell_type": "code",

								   "execution_count": null,

								   "id": "13118686",

								   "metadata": {},

								   "outputs": [],

								   "source": [

								    "# Evaluate the model on the test set\n",

								    "test_loss, test_acc = model.evaluate(x_test, y_test)\n",

								    "print('Test accuracy:', test_acc)"

								   ]

								  },

								  {

								   "cell_type": "code",

								   "execution_count": null,

								   "id": "40f35fe5",

								   "metadata": {},

								   "outputs": [],

								   "source": [

								    "# Plot loss and accuracy\n",

								    "plt.figure(figsize=(12, 4))\n",

								    "\n",

								    "# Plot the loss and accuracy for training and validation data\n",

								    "plt.subplot(1, 2, 1)\n",

								    "plt.plot(history.history['loss'], label='training loss')\n",

								    "plt.plot(history.history['val_loss'], label='validation loss')\n",

								    "plt.xlabel('Epoch')\n",

								    "plt.ylabel('Loss')\n",

								    "plt.legend()\n",

								    "\n",

								    "plt.subplot(1, 2, 2)\n",

								    "plt.plot(history.history['accuracy'])\n",

								    "plt.plot(history.history['val_accuracy'])\n",

								    "plt.xlabel('Epoch')\n",

								    "plt.ylabel('Accuracy')\n",

								    "plt.legend()\n",

								    "\n",

								    "plt.show()\n",

								    "\n"

								   ]

								  },

								  {

								   "cell_type": "code",

								   "execution_count": null,

								   "id": "6176ce1e",

								   "metadata": {},

								   "outputs": [],

								   "source": [

								    "# Make predictions on the test set\n",

								    "y_pred = model.predict(x_test)\n",

								    "y_pred = np.argmax(y_pred, axis=1)"

								   ]

								  },

								  {

								   "cell_type": "code",

								   "execution_count": null,

								   "id": "3635ded5",

								   "metadata": {},

								   "outputs": [],

								   "source": [

								    "# Plot some examples from the test set and their predictions\n",

								    "fig, axes = plt.subplots(4, 4, figsize=(14, 14))\n",

								    "for i, ax in enumerate(axes.ravel()):\n",

								    "    ax.matshow(x_test[i].reshape(28, 28), cmap='gray')\n",

								    "    ax.set_title(\"True: %d\\nPredict: %d\" % (np.argmax(y_test[i]), y_pred[i]))\n",

								    "    ax.axis(\"off\")\n",

								    "\n",

								    "plt.show()"

								   ]

								  }

								 ],

								 "metadata": {

								  "kernelspec": {

								   "display_name": "Python 3 (ipykernel)",

								   "language": "python",

								   "name": "python3"

								  },

								  "language_info": {

								   "codemirror_mode": {

								    "name": "ipython",

								    "version": 3

								   },

								   "file_extension": ".py",

								   "mimetype": "text/x-python",

								   "name": "python",

								   "nbconvert_exporter": "python",

								   "pygments_lexer": "ipython3",

								   "version": "3.8.16"

								  }

								 },

								 "nbformat": 4,

								 "nbformat_minor": 5

								}