{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "Exercise 2: Example for pandas using the heart.csv data set" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "import matplotlib.pyplot as plt" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# read the csv Data \n", "df = pd.read_csv('heart.csv')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# What is the number of columns and rows\n", "print(df.columns)\n", "print (df.info())\n", "print(df.dtypes)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# get first 3 lines\n", "print(df.head(3))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#display statistics summary\n", "print(df.describe())" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#display correlation\n", "print (df.corr())" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Print mean values for each column with and without disease\n", "print(df.groupby('target').mean())" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# get table with selection on more than 1 column\n", "df1 = df[(df[\"sex\"] == 0) & (df[\"target\"] == 0) ]\n", "print (df1.head(5))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ " Plots" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# age dirtibution group into male and female (1 = male; 0 = female)\n", "# male\n", "plt.title('age distribution according to Sex') \n", "df[df[\"sex\"] == 1]['age'].plot.hist()\n", "print(df[df[\"sex\"] > 0]['age'])\n", "# female\n", "df[df[\"sex\"] == 0]['age'].plot.hist()\n", "plt.xlabel('age [years]')\n", "plt.legend([\"male\", \"female\"])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "plt.figure()\n", "# Plot maximum heart rate\n", "# Heart disease (0 = no, 1 = yes)\n", "plt.title('maximum heart rate according to heart disease') \n", "df[df[\"target\"] == 1]['thalach'].plot.hist()\n", "# no disease\n", "df[df[\"target\"] == 0]['thalach'].plot.hist()\n", "plt.legend([\"disease\", \"no disease\"])\n", "plt.xlabel('max heart rate')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": true }, "outputs": [], "source": [ "# Plot sex and target in one histogramm via crosstab\n", "pd.crosstab(df.sex,df.target).plot(kind=\"bar\",color=['red','blue' ])\n", "plt.title('Heart Disease distribution according to Sex')\n", "plt.xlabel('Sex (0 = Female, 1 = Male)')\n", "plt.legend([\"no disease\", \"disease\"])" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": true }, "outputs": [], "source": [ "# Plot target and cp in one histogramm via crosstab\n", "pd.crosstab(df.cp,df.target).plot(kind=\"bar\",figsize=(15,6),color=['#11A5AA','#AA1190' ])\n", "plt.title('Heart Disease Distribution According To Chest Pain Type')\n", "plt.xlabel('Chest Pain Type')\n", "plt.xticks(rotation = 0)\n", "plt.ylabel('Frequency of Disease or Not')\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# plot correlations for target\n", "plt.figure()\n", "plt.scatter(x=df.age[df.target==1], y=df.thalach[(df.target==1)], c=\"red\")\n", "plt.scatter(x=df.age[df.target==0], y=df.thalach[(df.target==0)])\n", "plt.title('Age-max Heart Rate Plot')\n", "plt.xlabel('age[years]')\n", "plt.ylabel('max. heart rate')\n", "plt.legend([\"Disease\", \"No Disease\"])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "plt.figure()\n", "plt.scatter(x=df.age[df.target==1], y=df.chol[(df.target==1)], c=\"red\")\n", "plt.scatter(x=df.age[df.target==0], y=df.chol[(df.target==0)])\n", "plt.title('Age-Cholesterol Plot')\n", "plt.xlabel('age[years]')\n", "plt.ylabel('Cholesterol')\n", "plt.legend([\"Disease\", \"No Disease\"])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.16" } }, "nbformat": 4, "nbformat_minor": 4 }