237 lines
6.7 KiB
Plaintext
237 lines
6.7 KiB
Plaintext
|
{
|
||
|
"cells": [
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": null,
|
||
|
"id": "d63563a8",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"# Exercise 3\n",
|
||
|
"# fashion mnist data\n",
|
||
|
"# MLP model with two hidden layers, each with a ReLU activation function.\n",
|
||
|
"# Input data is flattened to a 1D array and passed to the model."
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": null,
|
||
|
"id": "062f7519",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"import tensorflow as tf\n",
|
||
|
"from tensorflow import keras\n",
|
||
|
"import matplotlib.pyplot as plt\n",
|
||
|
"import numpy as np"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": null,
|
||
|
"id": "4ccb3a5e",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"# Load the MNIST Fashion dataset\n",
|
||
|
"(x_train, y_train), (x_test, y_test) = keras.datasets.fashion_mnist.load_data()"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": null,
|
||
|
"id": "e5ede535",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"# Normalize pixel values to between 0 and 1\n",
|
||
|
"x_train = x_train.astype(\"float32\") / 255.0\n",
|
||
|
"x_test = x_test.astype(\"float32\") / 255.0"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": null,
|
||
|
"id": "39c100c1",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"# MNIST dataset images have a shape of (28, 28). The images are flattened\n",
|
||
|
"# into a 1D array of length 784 \n",
|
||
|
"x_train = x_train.reshape(-1, 784)\n",
|
||
|
"x_test = x_test.reshape(-1, 784)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": null,
|
||
|
"id": "e267a290",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"# The model is defined here with three dense (fully connected) layers\n",
|
||
|
"# The first layer is a Dense layer with 128 units and a ReLU activation\n",
|
||
|
"# function with an input shape of (784,). This layer serves as the input\n",
|
||
|
"# layer of the model.\n",
|
||
|
"# The second layer is also a Dense layer with 64 units and a ReLU activation\n",
|
||
|
"# function. This layer takes the output of the previous layer as input, and\n",
|
||
|
"# applies a non-linear transformation to it to produce a new set of features\n",
|
||
|
"# that the next layer can use.\n",
|
||
|
"# The third is another Dense layer, one for each class in the output. The\n",
|
||
|
"# output is raw scores or logits for each class since there is no activation\n",
|
||
|
"# function . This layer is responsible for producing the final output of the\n",
|
||
|
"# model, which can then be used to make predictions.\n",
|
||
|
"# With Dropout(0.2) 20 % of the input is randomly droped, this should reduce overfitting\n",
|
||
|
"model = keras.Sequential([\n",
|
||
|
" keras.layers.Dense(128, activation='relu', input_shape=(784,)),\n",
|
||
|
" # keras.layers.Dropout(0.2),\n",
|
||
|
" keras.layers.Dense(64, activation='relu'),\n",
|
||
|
" keras.layers.Dense(10)\n",
|
||
|
"])\n"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": null,
|
||
|
"id": "7dae353a",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"# Compile the model\n",
|
||
|
"# adam = specifies the optimizer to use during training\n",
|
||
|
"# loss function to use during training, SparseCategoricalCrossentropy loss\n",
|
||
|
"# is commonly used for multi-class classification problems.\n",
|
||
|
"# from_logits=True indicates that the model's output is a raw score\n",
|
||
|
"# for each class and not a probability distribution.\n",
|
||
|
"model.compile(optimizer='adam',\n",
|
||
|
" loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),\n",
|
||
|
" metrics=['accuracy'])"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": null,
|
||
|
"id": "bd237a4b",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"# Train the model\n",
|
||
|
"history = model.fit(x_train, y_train, epochs=10, validation_split=0.2)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": null,
|
||
|
"id": "169fc8c4",
|
||
|
"metadata": {
|
||
|
"scrolled": true
|
||
|
},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"# Evaluate the model on the test set\n",
|
||
|
"test_loss, test_acc = model.evaluate(x_test, y_test, verbose=2)\n",
|
||
|
"print(\"Test accuracy:\", test_acc)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": null,
|
||
|
"id": "7f2e657a",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"# Plot the training and validation accuracy and loss over time\n",
|
||
|
"plt.figure(figsize=(10, 4))\n",
|
||
|
"plt.subplot(1, 2, 1)\n",
|
||
|
"plt.plot(history.history[\"accuracy\"])\n",
|
||
|
"plt.plot(history.history[\"val_accuracy\"])\n",
|
||
|
"plt.title(\"Model accuracy\")\n",
|
||
|
"plt.ylabel(\"Accuracy\")\n",
|
||
|
"plt.xlabel(\"Epoch\")\n",
|
||
|
"plt.legend([\"Train\", \"Validation\"], loc=\"lower right\")\n",
|
||
|
"\n",
|
||
|
"plt.subplot(1, 2, 2)\n",
|
||
|
"plt.plot(history.history[\"loss\"])\n",
|
||
|
"plt.plot(history.history[\"val_loss\"])\n",
|
||
|
"plt.title(\"Model loss\")\n",
|
||
|
"plt.ylabel(\"Loss\")\n",
|
||
|
"plt.xlabel(\"Epoch\")\n",
|
||
|
"plt.legend([\"Train\", \"Validation\"], loc=\"upper right\")\n",
|
||
|
"\n",
|
||
|
"plt.show()"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": null,
|
||
|
"id": "298ac3bc",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"# Plot a confusion matrix of the test set predictions\n",
|
||
|
"test_preds = np.argmax(model.predict(x_test), axis=1)\n",
|
||
|
"conf_mat = tf.math.confusion_matrix(y_test, test_preds)\n",
|
||
|
"plt.imshow(conf_mat, cmap=\"Blues\")\n",
|
||
|
"plt.xlabel(\"Predicted labels\")\n",
|
||
|
"plt.ylabel(\"True labels\")\n",
|
||
|
"plt.xticks(np.arange(10))\n",
|
||
|
"plt.yticks(np.arange(10))\n",
|
||
|
"plt.colorbar()\n",
|
||
|
"plt.show()"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": null,
|
||
|
"id": "9a0355ec",
|
||
|
"metadata": {
|
||
|
"scrolled": true
|
||
|
},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"# Make predictions on the test set\n",
|
||
|
"y_pred = model.predict(x_test)\n",
|
||
|
"y_pred = np.argmax(y_pred, axis=1)\n",
|
||
|
"\n",
|
||
|
"# Plot some examples from the test set and their predictions\n",
|
||
|
"fig, axes = plt.subplots(4, 4, figsize=(14, 14))\n",
|
||
|
"for i, ax in enumerate(axes.ravel()):\n",
|
||
|
" ax.matshow(x_test[i].reshape(28, 28), cmap='gray')\n",
|
||
|
" ax.set_title(\"True: %d\\nPredict: %d\" % (np.argmax(y_test[i]), y_pred[i]))\n",
|
||
|
" ax.axis(\"off\")\n",
|
||
|
"\n",
|
||
|
"plt.show()"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": null,
|
||
|
"id": "facda3d1",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": []
|
||
|
}
|
||
|
],
|
||
|
"metadata": {
|
||
|
"kernelspec": {
|
||
|
"display_name": "Python 3 (ipykernel)",
|
||
|
"language": "python",
|
||
|
"name": "python3"
|
||
|
},
|
||
|
"language_info": {
|
||
|
"codemirror_mode": {
|
||
|
"name": "ipython",
|
||
|
"version": 3
|
||
|
},
|
||
|
"file_extension": ".py",
|
||
|
"mimetype": "text/x-python",
|
||
|
"name": "python",
|
||
|
"nbconvert_exporter": "python",
|
||
|
"pygments_lexer": "ipython3",
|
||
|
"version": "3.8.16"
|
||
|
}
|
||
|
},
|
||
|
"nbformat": 4,
|
||
|
"nbformat_minor": 5
|
||
|
}
|