{ "cells": [ { "cell_type": "code", "execution_count": null, "id": "13fa2f64", "metadata": {}, "outputs": [], "source": [ "# In this example, we used the TensorFlow library to load the MNIST data,\n", "# define an MLP model with three dense layers, compile the model, train it\n", "# for 10 epochs, evaluate it on the test set, and make predictions on\n", "# the test set. Finally, we plot some examples of the predictions made\n", "# by the model." ] }, { "cell_type": "code", "execution_count": null, "id": "1c4405f8", "metadata": {}, "outputs": [], "source": [ "import tensorflow as tf\n", "from tensorflow import keras\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()" ] }, { "cell_type": "code", "execution_count": null, "id": "1e7e58fb", "metadata": {}, "outputs": [], "source": [ "# Normalize the pixel values to be between 0 and 1\n", "x_train = x_train / 255\n", "x_test = x_test / 255" ] }, { "cell_type": "code", "execution_count": null, "id": "3d8a7370", "metadata": {}, "outputs": [], "source": [ "# Flatten the 2D images into 1D arrays\n", "x_train = x_train.reshape(x_train.shape[0], -1)\n", "x_test = x_test.reshape(x_test.shape[0], -1)" ] }, { "cell_type": "code", "execution_count": null, "id": "c2df0f54", "metadata": {}, "outputs": [], "source": [ "# Convert the labels into one-hot encoded arrays\n", "y_train = tf.keras.utils.to_categorical(y_train, num_classes=10)\n", "y_test = tf.keras.utils.to_categorical(y_test, num_classes=10)" ] }, { "cell_type": "code", "execution_count": null, "id": "e2b249ea", "metadata": {}, "outputs": [], "source": [ "# Define the model\n", "# The number of parameters depends on the shapes and sizes of the layers.\n", "# In the given model, the first layer Dense(512, activation='relu',\n", "# input_shape=(784,)) has 784 input nodes and 512 output nodes. Therefore,\n", "# the number of parameters in this layer would be (784 * 512) + 512 = 401920,\n", "# where the +512 term is for the bias terms.\n", "# The second layer also has 512 input nodes and 512 output nodes, which makes\n", "# 512512 = 262,144 parameters. The third and last layer has 512 input nodes\n", "# and 10 output nodes, which makes 512*10 = 5,120 parameters.\n", "model = tf.keras.models.Sequential()\n", "model.add(tf.keras.layers.Dense(512, activation='relu', input_shape=(784,)))\n", "model.add(tf.keras.layers.Dense(512, activation='relu'))\n", "model.add(tf.keras.layers.Dense(10, activation='softmax'))" ] }, { "cell_type": "code", "execution_count": null, "id": "bab8730a", "metadata": {}, "outputs": [], "source": [ "# over come overfitting by regularization\n", "#model = tf.keras.models.Sequential([\n", "# tf.keras.layers.Dense(64, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001),input_shape=(784,)),\n", "# tf.keras.layers.Dense(64, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001)),\n", "# tf.keras.layers.Dense(10, activation='softmax')\n", "#])" ] }, { "cell_type": "code", "execution_count": null, "id": "e3223c61", "metadata": {}, "outputs": [], "source": [ "# Compile the model\n", "model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])" ] }, { "cell_type": "code", "execution_count": null, "id": "51e7758f", "metadata": {}, "outputs": [], "source": [ "# Train the model and record the history\n", "history = model.fit(x_train, y_train, epochs=10, batch_size=64, validation_data=(x_test, y_test))" ] }, { "cell_type": "code", "execution_count": null, "id": "e3240069", "metadata": {}, "outputs": [], "source": [ "# Get the weights of the Dense layer\n", "# plot the weights as a heatmap or image, where the weights are represented\n", "# as pixel values.\n", "# model.layers[2].get_weights()[0] returns only the weights of the third\n", "# layer. If you wanted to get the biases, you would use\n", "# model.layers[2].get_weights()[1].\n", "dense_weights = model.layers[2].get_weights()[0]\n", "\n", "# Plot the weights as a heatmap\n", "fig, ax = plt.subplots(figsize=(12, 36))\n", "im = ax.imshow(dense_weights, cmap='coolwarm')\n", "plt.colorbar(im, ax=ax)\n", "ax.set_title('Weights in the Output Layer')\n", "ax.set_xlabel('Neurons in the Output Layer')\n", "ax.set_ylabel('Neurons in the Previous Layer')\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": null, "id": "13118686", "metadata": {}, "outputs": [], "source": [ "# Evaluate the model on the test set\n", "test_loss, test_acc = model.evaluate(x_test, y_test)\n", "print('Test accuracy:', test_acc)" ] }, { "cell_type": "code", "execution_count": null, "id": "40f35fe5", "metadata": {}, "outputs": [], "source": [ "# Plot loss and accuracy\n", "plt.figure(figsize=(12, 4))\n", "\n", "# Plot the loss and accuracy for training and validation data\n", "plt.subplot(1, 2, 1)\n", "plt.plot(history.history['loss'], label='training loss')\n", "plt.plot(history.history['val_loss'], label='validation loss')\n", "plt.xlabel('Epoch')\n", "plt.ylabel('Loss')\n", "plt.legend()\n", "\n", "plt.subplot(1, 2, 2)\n", "plt.plot(history.history['accuracy'])\n", "plt.plot(history.history['val_accuracy'])\n", "plt.xlabel('Epoch')\n", "plt.ylabel('Accuracy')\n", "plt.legend()\n", "\n", "plt.show()\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "id": "e882afea", "metadata": {}, "outputs": [], "source": [ "# Plot a confusion matrix of the test set predictions\n", "test_preds = np.argmax(model.predict(x_test), axis=1)\n", "conf_mat = tf.math.confusion_matrix(y_test.argmax(axis=1), test_preds)\n", "plt.imshow(conf_mat, cmap=\"Blues\")\n", "plt.xlabel(\"Predicted labels\")\n", "plt.ylabel(\"True labels\")\n", "plt.xticks(np.arange(10))\n", "plt.yticks(np.arange(10))\n", "plt.colorbar()\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": null, "id": "6176ce1e", "metadata": {}, "outputs": [], "source": [ "# Make predictions on the test set\n", "y_pred = model.predict(x_test)\n", "y_pred = np.argmax(y_pred, axis=1)" ] }, { "cell_type": "code", "execution_count": null, "id": "3635ded5", "metadata": {}, "outputs": [], "source": [ "# Plot some examples from the test set and their predictions\n", "fig, axes = plt.subplots(4, 4, figsize=(14, 14))\n", "for i, ax in enumerate(axes.ravel()):\n", " ax.matshow(x_test[i].reshape(28, 28), cmap='gray')\n", " ax.set_title(\"True: %d\\nPredict: %d\" % (np.argmax(y_test[i]), y_pred[i]))\n", " ax.axis(\"off\")\n", "\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": null, "id": "71f1cb93", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.16" } }, "nbformat": 4, "nbformat_minor": 5 }