{ "cells": [ { "cell_type": "code", "execution_count": null, "id": "13fa2f64", "metadata": {}, "outputs": [], "source": [ "# In this example, we used the TensorFlow library to load the MNIST data,\n", "# define an MLP model with three dense layers, compile the model, train it\n", "# for 10 epochs, evaluate it on the test set, and make predictions on\n", "# the test set. Finally, we plot some examples of the predictions made\n", "# by the model." ] }, { "cell_type": "code", "execution_count": null, "id": "1c4405f8", "metadata": {}, "outputs": [], "source": [ "import tensorflow as tf\n", "from tensorflow import keras\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()" ] }, { "cell_type": "code", "execution_count": null, "id": "1e7e58fb", "metadata": {}, "outputs": [], "source": [ "# Normalize the pixel values to be between 0 and 1\n", "x_train = x_train / 255\n", "x_test = x_test / 255" ] }, { "cell_type": "code", "execution_count": null, "id": "3d8a7370", "metadata": {}, "outputs": [], "source": [ "# Flatten the 2D images into 1D arrays\n", "x_train = x_train.reshape(x_train.shape[0], -1)\n", "x_test = x_test.reshape(x_test.shape[0], -1)" ] }, { "cell_type": "code", "execution_count": null, "id": "c2df0f54", "metadata": {}, "outputs": [], "source": [ "# Convert the labels into one-hot encoded arrays\n", "y_train = tf.keras.utils.to_categorical(y_train, num_classes=10)\n", "y_test = tf.keras.utils.to_categorical(y_test, num_classes=10)" ] }, { "cell_type": "code", "execution_count": null, "id": "e2b249ea", "metadata": {}, "outputs": [], "source": [ "# Define the model\n", "# The number of parameters depends on the shapes and sizes of the layers.\n", "# In the given model, the first layer Dense(512, activation='relu',\n", "# input_shape=(784,)) has 784 input nodes and 512 output nodes. Therefore,\n", "# the number of parameters in this layer would be (784 * 512) + 512 = 401920,\n", "# where the +512 term is for the bias terms.\n", "# The second layer also has 512 input nodes and 512 output nodes, which makes\n", "# 512512 = 262,144 parameters. The third and last layer has 512 input nodes\n", "# and 10 output nodes, which makes 512*10 = 5,120 parameters.\n", "model = tf.keras.models.Sequential()\n", "model.add(tf.keras.layers.Dense(512, activation='relu', input_shape=(784,)))\n", "model.add(tf.keras.layers.Dense(512, activation='relu'))\n", "model.add(tf.keras.layers.Dense(10, activation='softmax'))" ] }, { "cell_type": "code", "execution_count": null, "id": "bab8730a", "metadata": {}, "outputs": [], "source": [ "# over come overfitting by regularization\n", "#model = tf.keras.models.Sequential([\n", "# tf.keras.layers.Dense(64, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001),input_shape=(784,)),\n", "# tf.keras.layers.Dense(64, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001)),\n", "# tf.keras.layers.Dense(10, activation='softmax')\n", "#])" ] }, { "cell_type": "code", "execution_count": null, "id": "e3223c61", "metadata": {}, "outputs": [], "source": [ "# Compile the model\n", "model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])" ] }, { "cell_type": "code", "execution_count": null, "id": "51e7758f", "metadata": {}, "outputs": [], "source": [ "# Train the model and record the history\n", "history = model.fit(x_train, y_train, epochs=10, batch_size=64, validation_data=(x_test, y_test))" ] }, { "cell_type": "code", "execution_count": null, "id": "e3240069", "metadata": {}, "outputs": [], "source": [ "# Get the weights of the Dense layer\n", "# plot the weights as a heatmap or image, where the weights are represented\n", "# as pixel values.\n", "# model.layers[2].get_weights()[0] returns only the weights of the third\n", "# layer. 