In [None]:
# Exercise 3
# fashion mnist data
# MLP model with two hidden layers, each with a ReLU activation function.
# Input data is flattened to a 1D array and passed to the model.

In [None]:
import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt
import numpy as np

In [None]:
# Load the MNIST Fashion dataset
(x_train, y_train), (x_test, y_test) = keras.datasets.fashion_mnist.load_data()

In [None]:
# Normalize pixel values to between 0 and 1
x_train = x_train.astype("float32") / 255.0
x_test = x_test.astype("float32") / 255.0

In [None]:
# MNIST dataset images have a shape of (28, 28). The images are flattened
# into a 1D array of length 784 
x_train = x_train.reshape(-1, 784)
x_test = x_test.reshape(-1, 784)

In [None]:
# The model is defined here with three dense (fully connected) layers
# The first layer is a Dense layer with 128 units and a ReLU activation
# function with an input shape of (784,). This layer serves as the input
# layer of the model.
# The second layer is also a Dense layer with 64 units and a ReLU activation
# function. This layer takes the output of the previous layer as input, and
# applies a non-linear transformation to it to produce a new set of features
# that the next layer can use.
# The third is another Dense layer, one for each class in the output. The
# output is raw scores or logits for each class since there is no activation
# function . This layer is responsible for producing the final output of the
# model, which can then be used to make predictions.
# With Dropout(0.2) 20 % of the input is randomly droped, this should reduce overfitting
model = keras.Sequential([
 keras.layers.Dense(128, activation='relu', input_shape=(784,)),
 # keras.layers.Dropout(0.2),
 keras.layers.Dense(64, activation='relu'),
 keras.layers.Dense(10)
])


In [None]:
# Compile the model
# adam = specifies the optimizer to use during training
# loss function to use during training, SparseCategoricalCrossentropy loss
# is commonly used for multi-class classification problems.
# from_logits=True indicates that the model's output is a raw score
# for each class and not a probability distribution.
model.compile(optimizer='adam',
 loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
 metrics=['accuracy'])

In [None]:
# Train the model
history = model.fit(x_train, y_train, epochs=10, validation_split=0.2)

In [None]:
# Evaluate the model on the test set
test_loss, test_acc = model.evaluate(x_test, y_test, verbose=2)
print("Test accuracy:", test_acc)

In [None]:
# Plot the training and validation accuracy and loss over time
plt.figure(figsize=(10, 4))
plt.subplot(1, 2, 1)
plt.plot(history.history["accuracy"])
plt.plot(history.history["val_accuracy"])
plt.title("Model accuracy")
plt.ylabel("Accuracy")
plt.xlabel("Epoch")
plt.legend(["Train", "Validation"], loc="lower right")

plt.subplot(1, 2, 2)
plt.plot(history.history["loss"])
plt.plot(history.history["val_loss"])
plt.title("Model loss")
plt.ylabel("Loss")
plt.xlabel("Epoch")
plt.legend(["Train", "Validation"], loc="upper right")

plt.show()

In [None]:
# Plot a confusion matrix of the test set predictions
test_preds = np.argmax(model.predict(x_test), axis=1)
conf_mat = tf.math.confusion_matrix(y_test, test_preds)
plt.imshow(conf_mat, cmap="Blues")
plt.xlabel("Predicted labels")
plt.ylabel("True labels")
plt.xticks(np.arange(10))
plt.yticks(np.arange(10))
plt.colorbar()
plt.show()

In [None]:
# Make predictions on the test set
y_pred = model.predict(x_test)
y_pred = np.argmax(y_pred, axis=1)

# Plot some examples from the test set and their predictions
fig, axes = plt.subplots(4, 4, figsize=(18, 18))
for i, ax in enumerate(axes.ravel()):
 ax.matshow(x_test[i].reshape(28, 28), cmap='gray')
 ax.set_title("True: %d\nPredict: %d" % (y_test[i], y_pred[i]))
 ax.axis("off")

plt.show()