Machine Learning Kurs im Rahmen der Studierendentage im SS 2023
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

197 lines
5.1 KiB

2 years ago
  1. {
  2. "cells": [
  3. {
  4. "cell_type": "markdown",
  5. "id": "2eaba66b",
  6. "metadata": {},
  7. "source": [
  8. "Read and Display Horse or Human machine learning dataset"
  9. ]
  10. },
  11. {
  12. "cell_type": "code",
  13. "execution_count": null,
  14. "id": "f1e48ac0",
  15. "metadata": {},
  16. "outputs": [],
  17. "source": [
  18. "import tensorflow as tf\n",
  19. "import numpy as np\n",
  20. "import tensorflow_datasets as tfds\n",
  21. "from tensorflow.keras import regularizers\n",
  22. "import matplotlib.pyplot as plt"
  23. ]
  24. },
  25. {
  26. "cell_type": "code",
  27. "execution_count": null,
  28. "id": "feda024e",
  29. "metadata": {},
  30. "outputs": [],
  31. "source": [
  32. "# Load the horse or human dataset\n",
  33. "#(300, 300, 3) unint8\n",
  34. "dataset, label = tfds.load('horses_or_humans', with_info=True)"
  35. ]
  36. },
  37. {
  38. "cell_type": "code",
  39. "execution_count": null,
  40. "id": "35991dec",
  41. "metadata": {},
  42. "outputs": [],
  43. "source": [
  44. "# Extract the horse/human class\n",
  45. "horse_ds = dataset['train'].filter(lambda x: x['label'] == 0)\n",
  46. "human_ds = dataset['train'].filter(lambda x: x['label'] == 1)"
  47. ]
  48. },
  49. {
  50. "cell_type": "code",
  51. "execution_count": null,
  52. "id": "fab03aa8",
  53. "metadata": {},
  54. "outputs": [],
  55. "source": [
  56. "# Take a few examples < 16\n",
  57. "n_examples = 5\n",
  58. "horse_examples = horse_ds.take(n_examples)\n",
  59. "human_examples = human_ds.take(n_examples)"
  60. ]
  61. },
  62. {
  63. "cell_type": "code",
  64. "execution_count": null,
  65. "id": "c33f1acd",
  66. "metadata": {},
  67. "outputs": [],
  68. "source": [
  69. "# Display the examples\n",
  70. "fig, axes = plt.subplots(1, n_examples, figsize=(15, 15))\n",
  71. "for i, example in enumerate(human_examples):\n",
  72. " image = example['image']\n",
  73. " axes[i].imshow(image)\n",
  74. " axes[i].set_title(f\"humans {i+1}\")\n",
  75. "plt.show()\n",
  76. "\n",
  77. "fig, axes = plt.subplots(1, n_examples, figsize=(15, 15))\n",
  78. "for i, example in enumerate(horse_examples):\n",
  79. " image = example['image']\n",
  80. " axes[i].imshow(image)\n",
  81. " axes[i].set_title(f\"horses {i+1}\")\n",
  82. "plt.show()\n"
  83. ]
  84. },
  85. {
  86. "cell_type": "code",
  87. "execution_count": null,
  88. "id": "25f3eeb3",
  89. "metadata": {},
  90. "outputs": [],
  91. "source": [
  92. "# Split the dataset into training and validation sets\n",
  93. "# as_supervised: Specifies whether to return the dataset as a tuple\n",
  94. "# of (input, label) pairs.\n",
  95. "train_dataset, valid_dataset = tfds.load('horses_or_humans', split=['train','test'], as_supervised=True)"
  96. ]
  97. },
  98. {
  99. "cell_type": "code",
  100. "execution_count": null,
  101. "id": "29dc0e62",
  102. "metadata": {},
  103. "outputs": [],
  104. "source": [
  105. "# Get the number of elements in the training and validation dataset\n",
  106. "train_size = tf.data.experimental.cardinality(train_dataset).numpy()\n",
  107. "valid_size = tf.data.experimental.cardinality(valid_dataset).numpy()"
  108. ]
  109. },
  110. {
  111. "cell_type": "code",
  112. "execution_count": null,
  113. "id": "db8aaf91",
  114. "metadata": {},
  115. "outputs": [],
  116. "source": [
  117. "IMG_SIZE = 300\n",
  118. "NUM_CLASSES = 2\n",
  119. "\n",
  120. "def preprocess(image, label):\n",
  121. " image = tf.cast(image, tf.float32)\n",
  122. "# # Resize the images to a fixed size\n",
  123. " image = tf.image.resize(image, (IMG_SIZE, IMG_SIZE))\n",
  124. "# # Rescale the pixel values to be between 0 and 1\n",
  125. " image = image / 255.0\n",
  126. " label = tf.one_hot(label, NUM_CLASSES)\n",
  127. " return image, label"
  128. ]
  129. },
  130. {
  131. "cell_type": "code",
  132. "execution_count": null,
  133. "id": "d59661c3",
  134. "metadata": {},
  135. "outputs": [],
  136. "source": [
  137. "# Apply the preprocessing function to the datasets\n",
  138. "train_dataset = train_dataset.map(preprocess)\n",
  139. "valid_dataset = valid_dataset.map(preprocess)\n",
  140. "\n",
  141. "# Batch and shuffle the datasets\n",
  142. "train_dataset = train_dataset.shuffle(2000).batch(80)\n",
  143. "valid_dataset = valid_dataset.batch(20)"
  144. ]
  145. },
  146. {
  147. "cell_type": "code",
  148. "execution_count": null,
  149. "id": "9399bc99",
  150. "metadata": {},
  151. "outputs": [],
  152. "source": [
  153. "# Get the number of elements in the trainingand validation dataset\n",
  154. "train_size = tf.data.experimental.cardinality(train_dataset).numpy()\n",
  155. "valid_size = tf.data.experimental.cardinality(valid_dataset).numpy()\n",
  156. "print(\"Training dataset size:\", train_size)\n",
  157. "print(\"Validation dataset size:\", valid_size)"
  158. ]
  159. },
  160. {
  161. "cell_type": "code",
  162. "execution_count": null,
  163. "id": "13af7d53",
  164. "metadata": {},
  165. "outputs": [],
  166. "source": [
  167. "# Store images and labels of the validation data for predictions\n",
  168. "for images, labels in valid_dataset:\n",
  169. " x_val = images\n",
  170. " y_val = labels\n",
  171. " \n",
  172. "print(x_val.shape, y_val.shape)"
  173. ]
  174. }
  175. ],
  176. "metadata": {
  177. "kernelspec": {
  178. "display_name": "Python 3 (ipykernel)",
  179. "language": "python",
  180. "name": "python3"
  181. },
  182. "language_info": {
  183. "codemirror_mode": {
  184. "name": "ipython",
  185. "version": 3
  186. },
  187. "file_extension": ".py",
  188. "mimetype": "text/x-python",
  189. "name": "python",
  190. "nbconvert_exporter": "python",
  191. "pygments_lexer": "ipython3",
  192. "version": "3.8.16"
  193. }
  194. },
  195. "nbformat": 4,
  196. "nbformat_minor": 5
  197. }