Machine Learning Kurs im Rahmen der Studierendentage im SS 2023
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

193 lines
4.9 KiB

2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
  1. {
  2. "cells": [
  3. {
  4. "cell_type": "markdown",
  5. "id": "2eaba66b",
  6. "metadata": {},
  7. "source": [
  8. "Read and Display Horse or Human machine learning dataset"
  9. ]
  10. },
  11. {
  12. "cell_type": "code",
  13. "execution_count": null,
  14. "id": "f1e48ac0",
  15. "metadata": {},
  16. "outputs": [],
  17. "source": [
  18. "import tensorflow as tf\n",
  19. "import numpy as np\n",
  20. "import tensorflow_datasets as tfds\n",
  21. "from tensorflow.keras import regularizers\n",
  22. "import matplotlib.pyplot as plt"
  23. ]
  24. },
  25. {
  26. "cell_type": "code",
  27. "execution_count": null,
  28. "id": "feda024e",
  29. "metadata": {},
  30. "outputs": [],
  31. "source": [
  32. "# Load the horse or human dataset\n",
  33. "#(300, 300, 3) unint8\n",
  34. "dataset, label = tfds.load('horses_or_humans', with_info=True)"
  35. ]
  36. },
  37. {
  38. "cell_type": "code",
  39. "execution_count": null,
  40. "id": "35991dec",
  41. "metadata": {},
  42. "outputs": [],
  43. "source": [
  44. "# Extract the horse/human class\n",
  45. "horse_ds = dataset['train'].filter(lambda x: x['label'] == 0)\n",
  46. "human_ds = dataset['train'].filter(lambda x: x['label'] == 1)"
  47. ]
  48. },
  49. {
  50. "cell_type": "code",
  51. "execution_count": null,
  52. "id": "fab03aa8",
  53. "metadata": {},
  54. "outputs": [],
  55. "source": [
  56. "# Take a few examples < 16\n",
  57. "n_examples = 5\n",
  58. "horse_examples = horse_ds.take(n_examples)\n",
  59. "human_examples = human_ds.take(n_examples)"
  60. ]
  61. },
  62. {
  63. "cell_type": "code",
  64. "execution_count": null,
  65. "id": "c33f1acd",
  66. "metadata": {},
  67. "outputs": [],
  68. "source": [
  69. "# Display the examples\n",
  70. "fig, axes = plt.subplots(1, n_examples, figsize=(15, 15))\n",
  71. "for i, example in enumerate(human_examples):\n",
  72. " image = example['image']\n",
  73. " axes[i].imshow(image)\n",
  74. " axes[i].set_title(f\"humans {i+1}\")\n",
  75. "plt.show()\n",
  76. "\n",
  77. "fig, axes = plt.subplots(1, n_examples, figsize=(15, 15))\n",
  78. "for i, example in enumerate(horse_examples):\n",
  79. " image = example['image']\n",
  80. " axes[i].imshow(image)\n",
  81. " axes[i].set_title(f\"horses {i+1}\")\n",
  82. "plt.show()\n"
  83. ]
  84. },
  85. {
  86. "cell_type": "code",
  87. "execution_count": null,
  88. "id": "25f3eeb3",
  89. "metadata": {},
  90. "outputs": [],
  91. "source": [
  92. "# Split the dataset into training and validation sets\n",
  93. "# as_supervised: Specifies whether to return the dataset as a tuple\n",
  94. "# of (input, label) pairs.\n",
  95. "train_dataset, valid_dataset = tfds.load('horses_or_humans', split=['train','test'], as_supervised=True)"
  96. ]
  97. },
  98. {
  99. "cell_type": "code",
  100. "execution_count": null,
  101. "id": "29dc0e62",
  102. "metadata": {},
  103. "outputs": [],
  104. "source": [
  105. "# Get the number of elements in the training and validation dataset\n",
  106. "train_size = tf.data.experimental.cardinality(train_dataset).numpy()\n",
  107. "valid_size = tf.data.experimental.cardinality(valid_dataset).numpy()\n",
  108. "print(\"Training dataset size:\", train_size)\n",
  109. "print(\"Validation dataset size:\", valid_size)"
  110. ]
  111. },
  112. {
  113. "cell_type": "code",
  114. "execution_count": null,
  115. "id": "db8aaf91",
  116. "metadata": {},
  117. "outputs": [],
  118. "source": [
  119. "IMG_SIZE = 300\n",
  120. "NUM_CLASSES = 2\n",
  121. "\n",
  122. "def preprocess(image, label):\n",
  123. " image = tf.cast(image, tf.float32)\n",
  124. "# # Resize the images to a fixed size\n",
  125. " image = tf.image.resize(image, (IMG_SIZE, IMG_SIZE))\n",
  126. "# # Rescale the pixel values to be between 0 and 1\n",
  127. " image = image / 255.0\n",
  128. " label = tf.one_hot(label, NUM_CLASSES)\n",
  129. " return image, label"
  130. ]
  131. },
  132. {
  133. "cell_type": "code",
  134. "execution_count": null,
  135. "id": "d59661c3",
  136. "metadata": {},
  137. "outputs": [],
  138. "source": [
  139. "# Apply the preprocessing function to the datasets\n",
  140. "train_dataset = train_dataset.map(preprocess)\n",
  141. "valid_dataset = valid_dataset.map(preprocess)\n",
  142. "\n",
  143. "# Batch and shuffle the datasets\n",
  144. "train_dataset = train_dataset.shuffle(1000).batch(80)\n",
  145. "valid_dataset = valid_dataset.batch(20)"
  146. ]
  147. },
  148. {
  149. "cell_type": "code",
  150. "execution_count": null,
  151. "id": "13af7d53",
  152. "metadata": {},
  153. "outputs": [],
  154. "source": [
  155. "# Store images and labels of the validation data for predictions\n",
  156. "for images, labels in valid_dataset:\n",
  157. " x_val = images\n",
  158. " y_val = labels\n",
  159. " \n",
  160. "print(x_val.shape, y_val.shape)"
  161. ]
  162. },
  163. {
  164. "cell_type": "code",
  165. "execution_count": null,
  166. "id": "67e152ff-0713-4629-8471-1afbb1bf22a6",
  167. "metadata": {},
  168. "outputs": [],
  169. "source": []
  170. }
  171. ],
  172. "metadata": {
  173. "kernelspec": {
  174. "display_name": "Python [conda env:ML]",
  175. "language": "python",
  176. "name": "conda-env-ML-py"
  177. },
  178. "language_info": {
  179. "codemirror_mode": {
  180. "name": "ipython",
  181. "version": 3
  182. },
  183. "file_extension": ".py",
  184. "mimetype": "text/x-python",
  185. "name": "python",
  186. "nbconvert_exporter": "python",
  187. "pygments_lexer": "ipython3",
  188. "version": "3.10.9"
  189. }
  190. },
  191. "nbformat": 4,
  192. "nbformat_minor": 5
  193. }