Machine Learning Kurs im Rahmen der Studierendentage im SS 2023
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

226 lines
5.3 KiB

2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
  1. {
  2. "cells": [
  3. {
  4. "cell_type": "markdown",
  5. "metadata": {},
  6. "source": [
  7. "Exercise 2: Example for pandas using the heart.csv data set"
  8. ]
  9. },
  10. {
  11. "cell_type": "code",
  12. "execution_count": null,
  13. "metadata": {},
  14. "outputs": [],
  15. "source": [
  16. "import numpy as np\n",
  17. "import pandas as pd\n",
  18. "import matplotlib.pyplot as plt"
  19. ]
  20. },
  21. {
  22. "cell_type": "code",
  23. "execution_count": null,
  24. "metadata": {},
  25. "outputs": [],
  26. "source": [
  27. "# read the csv Data \n",
  28. "df = pd.read_csv('heart.csv')"
  29. ]
  30. },
  31. {
  32. "cell_type": "code",
  33. "execution_count": null,
  34. "metadata": {},
  35. "outputs": [],
  36. "source": [
  37. "# What is the number of columns and rows\n",
  38. "print(df.columns)\n",
  39. "print (df.info())\n",
  40. "print(df.dtypes)"
  41. ]
  42. },
  43. {
  44. "cell_type": "code",
  45. "execution_count": null,
  46. "metadata": {},
  47. "outputs": [],
  48. "source": [
  49. "# get first 3 lines\n",
  50. "print(df.head(3))"
  51. ]
  52. },
  53. {
  54. "cell_type": "code",
  55. "execution_count": null,
  56. "metadata": {},
  57. "outputs": [],
  58. "source": [
  59. "#display statistics summary\n",
  60. "print(df.describe())"
  61. ]
  62. },
  63. {
  64. "cell_type": "code",
  65. "execution_count": null,
  66. "metadata": {},
  67. "outputs": [],
  68. "source": [
  69. "#display correlation\n",
  70. "print (df.corr())"
  71. ]
  72. },
  73. {
  74. "cell_type": "code",
  75. "execution_count": null,
  76. "metadata": {},
  77. "outputs": [],
  78. "source": [
  79. "# Print mean values for each column with and without disease\n",
  80. "print(df.groupby('target').mean())"
  81. ]
  82. },
  83. {
  84. "cell_type": "code",
  85. "execution_count": null,
  86. "metadata": {},
  87. "outputs": [],
  88. "source": [
  89. "# get table with selection on more than 1 column\n",
  90. "df1 = df[(df[\"sex\"] == 0) & (df[\"target\"] == 0) ]\n",
  91. "print (df1.head(5))"
  92. ]
  93. },
  94. {
  95. "cell_type": "markdown",
  96. "metadata": {},
  97. "source": [
  98. " Plots"
  99. ]
  100. },
  101. {
  102. "cell_type": "code",
  103. "execution_count": null,
  104. "metadata": {},
  105. "outputs": [],
  106. "source": [
  107. "# age dirtibution group into male and female (1 = male; 0 = female)\n",
  108. "# male\n",
  109. "plt.title('age distribution according to Sex') \n",
  110. "df[df[\"sex\"] == 1]['age'].plot.hist()\n",
  111. "print(df[df[\"sex\"] > 0]['age'])\n",
  112. "# female\n",
  113. "df[df[\"sex\"] == 0]['age'].plot.hist()\n",
  114. "plt.xlabel('age [years]')\n",
  115. "plt.legend([\"male\", \"female\"])"
  116. ]
  117. },
  118. {
  119. "cell_type": "code",
  120. "execution_count": null,
  121. "metadata": {},
  122. "outputs": [],
  123. "source": [
  124. "plt.figure()\n",
  125. "# Plot maximum heart rate\n",
  126. "# Heart disease (0 = no, 1 = yes)\n",
  127. "plt.title('maximum heart rate according to heart disease') \n",
  128. "df[df[\"target\"] == 1]['thalach'].plot.hist()\n",
  129. "# no disease\n",
  130. "df[df[\"target\"] == 0]['thalach'].plot.hist()\n",
  131. "plt.legend([\"disease\", \"no disease\"])\n",
  132. "plt.xlabel('max heart rate')"
  133. ]
  134. },
  135. {
  136. "cell_type": "code",
  137. "execution_count": null,
  138. "metadata": {
  139. "scrolled": true
  140. },
  141. "outputs": [],
  142. "source": [
  143. "# Plot sex and target in one histogramm via crosstab\n",
  144. "pd.crosstab(df.sex,df.target).plot(kind=\"bar\",color=['red','blue' ])\n",
  145. "plt.title('Heart Disease distribution according to Sex')\n",
  146. "plt.xlabel('Sex (0 = Female, 1 = Male)')\n",
  147. "plt.legend([\"no disease\", \"disease\"])"
  148. ]
  149. },
  150. {
  151. "cell_type": "code",
  152. "execution_count": null,
  153. "metadata": {
  154. "scrolled": true
  155. },
  156. "outputs": [],
  157. "source": [
  158. "# Plot target and cp in one histogramm via crosstab\n",
  159. "pd.crosstab(df.cp,df.target).plot(kind=\"bar\",figsize=(15,6),color=['#11A5AA','#AA1190' ])\n",
  160. "plt.title('Heart Disease Distribution According To Chest Pain Type')\n",
  161. "plt.xlabel('Chest Pain Type')\n",
  162. "plt.xticks(rotation = 0)\n",
  163. "plt.ylabel('Frequency of Disease or Not')\n"
  164. ]
  165. },
  166. {
  167. "cell_type": "code",
  168. "execution_count": null,
  169. "metadata": {},
  170. "outputs": [],
  171. "source": [
  172. "# plot correlations for target\n",
  173. "plt.figure()\n",
  174. "plt.scatter(x=df.age[df.target==1], y=df.thalach[(df.target==1)], c=\"red\")\n",
  175. "plt.scatter(x=df.age[df.target==0], y=df.thalach[(df.target==0)])\n",
  176. "plt.title('Age-max Heart Rate Plot')\n",
  177. "plt.xlabel('age[years]')\n",
  178. "plt.ylabel('max. heart rate')\n",
  179. "plt.legend([\"Disease\", \"No Disease\"])"
  180. ]
  181. },
  182. {
  183. "cell_type": "code",
  184. "execution_count": null,
  185. "metadata": {},
  186. "outputs": [],
  187. "source": [
  188. "plt.figure()\n",
  189. "plt.scatter(x=df.age[df.target==1], y=df.chol[(df.target==1)], c=\"red\")\n",
  190. "plt.scatter(x=df.age[df.target==0], y=df.chol[(df.target==0)])\n",
  191. "plt.title('Age-Cholesterol Plot')\n",
  192. "plt.xlabel('age[years]')\n",
  193. "plt.ylabel('Cholesterol')\n",
  194. "plt.legend([\"Disease\", \"No Disease\"])"
  195. ]
  196. },
  197. {
  198. "cell_type": "code",
  199. "execution_count": null,
  200. "metadata": {},
  201. "outputs": [],
  202. "source": []
  203. }
  204. ],
  205. "metadata": {
  206. "kernelspec": {
  207. "display_name": "Python 3 (ipykernel)",
  208. "language": "python",
  209. "name": "python3"
  210. },
  211. "language_info": {
  212. "codemirror_mode": {
  213. "name": "ipython",
  214. "version": 3
  215. },
  216. "file_extension": ".py",
  217. "mimetype": "text/x-python",
  218. "name": "python",
  219. "nbconvert_exporter": "python",
  220. "pygments_lexer": "ipython3",
  221. "version": "3.8.16"
  222. }
  223. },
  224. "nbformat": 4,
  225. "nbformat_minor": 4
  226. }