2023-04-03 13:08:49 +02:00
|
|
|
{
|
|
|
|
"cells": [
|
|
|
|
{
|
|
|
|
"cell_type": "markdown",
|
|
|
|
"metadata": {},
|
|
|
|
"source": [
|
|
|
|
"Exercise 2: Example for pandas using the heart.csv data set"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2023-04-11 13:16:53 +02:00
|
|
|
"execution_count": 1,
|
2023-04-03 13:08:49 +02:00
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"import numpy as np\n",
|
|
|
|
"import pandas as pd\n",
|
|
|
|
"import matplotlib.pyplot as plt"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2023-04-11 13:16:53 +02:00
|
|
|
"execution_count": 2,
|
2023-04-03 13:08:49 +02:00
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"# read the csv Data \n",
|
|
|
|
"df = pd.read_csv('heart.csv')"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2023-04-11 13:16:53 +02:00
|
|
|
"execution_count": 3,
|
2023-04-03 13:08:49 +02:00
|
|
|
"metadata": {},
|
2023-04-11 13:16:53 +02:00
|
|
|
"outputs": [
|
|
|
|
{
|
|
|
|
"name": "stdout",
|
|
|
|
"output_type": "stream",
|
|
|
|
"text": [
|
|
|
|
"Index(['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach',\n",
|
|
|
|
" 'exang', 'oldpeak', 'slope', 'ca', 'thal', 'target'],\n",
|
|
|
|
" dtype='object')\n",
|
|
|
|
"<class 'pandas.core.frame.DataFrame'>\n",
|
|
|
|
"RangeIndex: 303 entries, 0 to 302\n",
|
|
|
|
"Data columns (total 14 columns):\n",
|
|
|
|
" # Column Non-Null Count Dtype \n",
|
|
|
|
"--- ------ -------------- ----- \n",
|
|
|
|
" 0 age 303 non-null int64 \n",
|
|
|
|
" 1 sex 303 non-null int64 \n",
|
|
|
|
" 2 cp 303 non-null int64 \n",
|
|
|
|
" 3 trestbps 303 non-null int64 \n",
|
|
|
|
" 4 chol 303 non-null int64 \n",
|
|
|
|
" 5 fbs 303 non-null int64 \n",
|
|
|
|
" 6 restecg 303 non-null int64 \n",
|
|
|
|
" 7 thalach 303 non-null int64 \n",
|
|
|
|
" 8 exang 303 non-null int64 \n",
|
|
|
|
" 9 oldpeak 303 non-null float64\n",
|
|
|
|
" 10 slope 303 non-null int64 \n",
|
|
|
|
" 11 ca 303 non-null int64 \n",
|
|
|
|
" 12 thal 303 non-null int64 \n",
|
|
|
|
" 13 target 303 non-null int64 \n",
|
|
|
|
"dtypes: float64(1), int64(13)\n",
|
|
|
|
"memory usage: 33.3 KB\n",
|
|
|
|
"None\n",
|
|
|
|
"age int64\n",
|
|
|
|
"sex int64\n",
|
|
|
|
"cp int64\n",
|
|
|
|
"trestbps int64\n",
|
|
|
|
"chol int64\n",
|
|
|
|
"fbs int64\n",
|
|
|
|
"restecg int64\n",
|
|
|
|
"thalach int64\n",
|
|
|
|
"exang int64\n",
|
|
|
|
"oldpeak float64\n",
|
|
|
|
"slope int64\n",
|
|
|
|
"ca int64\n",
|
|
|
|
"thal int64\n",
|
|
|
|
"target int64\n",
|
|
|
|
"dtype: object\n"
|
|
|
|
]
|
|
|
|
}
|
|
|
|
],
|
2023-04-03 13:08:49 +02:00
|
|
|
"source": [
|
|
|
|
"# What is the number of columns and rows\n",
|
|
|
|
"print(df.columns)\n",
|
|
|
|
"print (df.info())\n",
|
|
|
|
"print(df.dtypes)"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2023-04-11 13:16:53 +02:00
|
|
|
"execution_count": 4,
|
2023-04-03 13:08:49 +02:00
|
|
|
"metadata": {},
|
2023-04-11 13:16:53 +02:00
|
|
|
"outputs": [
|
|
|
|
{
|
|
|
|
"name": "stdout",
|
|
|
|
"output_type": "stream",
|
|
|
|
"text": [
|
|
|
|
" age sex cp trestbps chol fbs restecg thalach exang oldpeak slope \\\n",
|
|
|
|
"0 63 1 3 145 233 1 0 150 0 2.3 0 \n",
|
|
|
|
"1 37 1 2 130 250 0 1 187 0 3.5 0 \n",
|
|
|
|
"2 41 0 1 130 204 0 0 172 0 1.4 2 \n",
|
|
|
|
"\n",
|
|
|
|
" ca thal target \n",
|
|
|
|
"0 0 1 1 \n",
|
|
|
|
"1 0 2 1 \n",
|
|
|
|
"2 0 2 1 \n"
|
|
|
|
]
|
|
|
|
}
|
|
|
|
],
|
2023-04-03 13:08:49 +02:00
|
|
|
"source": [
|
|
|
|
"# get first 3 lines\n",
|
|
|
|
"print(df.head(3))"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2023-04-11 13:16:53 +02:00
|
|
|
"execution_count": 5,
|
2023-04-03 13:08:49 +02:00
|
|
|
"metadata": {},
|
2023-04-11 13:16:53 +02:00
|
|
|
"outputs": [
|
|
|
|
{
|
|
|
|
"name": "stdout",
|
|
|
|
"output_type": "stream",
|
|
|
|
"text": [
|
|
|
|
" age sex cp trestbps chol fbs \\\n",
|
|
|
|
"count 303.000000 303.000000 303.000000 303.000000 303.000000 303.000000 \n",
|
|
|
|
"mean 54.366337 0.683168 0.966997 131.623762 246.264026 0.148515 \n",
|
|
|
|
"std 9.082101 0.466011 1.032052 17.538143 51.830751 0.356198 \n",
|
|
|
|
"min 29.000000 0.000000 0.000000 94.000000 126.000000 0.000000 \n",
|
|
|
|
"25% 47.500000 0.000000 0.000000 120.000000 211.000000 0.000000 \n",
|
|
|
|
"50% 55.000000 1.000000 1.000000 130.000000 240.000000 0.000000 \n",
|
|
|
|
"75% 61.000000 1.000000 2.000000 140.000000 274.500000 0.000000 \n",
|
|
|
|
"max 77.000000 1.000000 3.000000 200.000000 564.000000 1.000000 \n",
|
|
|
|
"\n",
|
|
|
|
" restecg thalach exang oldpeak slope ca \\\n",
|
|
|
|
"count 303.000000 303.000000 303.000000 303.000000 303.000000 303.000000 \n",
|
|
|
|
"mean 0.528053 149.646865 0.326733 1.039604 1.399340 0.729373 \n",
|
|
|
|
"std 0.525860 22.905161 0.469794 1.161075 0.616226 1.022606 \n",
|
|
|
|
"min 0.000000 71.000000 0.000000 0.000000 0.000000 0.000000 \n",
|
|
|
|
"25% 0.000000 133.500000 0.000000 0.000000 1.000000 0.000000 \n",
|
|
|
|
"50% 1.000000 153.000000 0.000000 0.800000 1.000000 0.000000 \n",
|
|
|
|
"75% 1.000000 166.000000 1.000000 1.600000 2.000000 1.000000 \n",
|
|
|
|
"max 2.000000 202.000000 1.000000 6.200000 2.000000 4.000000 \n",
|
|
|
|
"\n",
|
|
|
|
" thal target \n",
|
|
|
|
"count 303.000000 303.000000 \n",
|
|
|
|
"mean 2.313531 0.544554 \n",
|
|
|
|
"std 0.612277 0.498835 \n",
|
|
|
|
"min 0.000000 0.000000 \n",
|
|
|
|
"25% 2.000000 0.000000 \n",
|
|
|
|
"50% 2.000000 1.000000 \n",
|
|
|
|
"75% 3.000000 1.000000 \n",
|
|
|
|
"max 3.000000 1.000000 \n"
|
|
|
|
]
|
|
|
|
}
|
|
|
|
],
|
2023-04-03 13:08:49 +02:00
|
|
|
"source": [
|
|
|
|
"#display statistics summary\n",
|
|
|
|
"print(df.describe())"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2023-04-11 13:16:53 +02:00
|
|
|
"execution_count": 6,
|
2023-04-03 13:08:49 +02:00
|
|
|
"metadata": {},
|
2023-04-11 13:16:53 +02:00
|
|
|
"outputs": [
|
|
|
|
{
|
|
|
|
"name": "stdout",
|
|
|
|
"output_type": "stream",
|
|
|
|
"text": [
|
|
|
|
" age sex cp trestbps chol fbs \\\n",
|
|
|
|
"age 1.000000 -0.098447 -0.068653 0.279351 0.213678 0.121308 \n",
|
|
|
|
"sex -0.098447 1.000000 -0.049353 -0.056769 -0.197912 0.045032 \n",
|
|
|
|
"cp -0.068653 -0.049353 1.000000 0.047608 -0.076904 0.094444 \n",
|
|
|
|
"trestbps 0.279351 -0.056769 0.047608 1.000000 0.123174 0.177531 \n",
|
|
|
|
"chol 0.213678 -0.197912 -0.076904 0.123174 1.000000 0.013294 \n",
|
|
|
|
"fbs 0.121308 0.045032 0.094444 0.177531 0.013294 1.000000 \n",
|
|
|
|
"restecg -0.116211 -0.058196 0.044421 -0.114103 -0.151040 -0.084189 \n",
|
|
|
|
"thalach -0.398522 -0.044020 0.295762 -0.046698 -0.009940 -0.008567 \n",
|
|
|
|
"exang 0.096801 0.141664 -0.394280 0.067616 0.067023 0.025665 \n",
|
|
|
|
"oldpeak 0.210013 0.096093 -0.149230 0.193216 0.053952 0.005747 \n",
|
|
|
|
"slope -0.168814 -0.030711 0.119717 -0.121475 -0.004038 -0.059894 \n",
|
|
|
|
"ca 0.276326 0.118261 -0.181053 0.101389 0.070511 0.137979 \n",
|
|
|
|
"thal 0.068001 0.210041 -0.161736 0.062210 0.098803 -0.032019 \n",
|
|
|
|
"target -0.225439 -0.280937 0.433798 -0.144931 -0.085239 -0.028046 \n",
|
|
|
|
"\n",
|
|
|
|
" restecg thalach exang oldpeak slope ca \\\n",
|
|
|
|
"age -0.116211 -0.398522 0.096801 0.210013 -0.168814 0.276326 \n",
|
|
|
|
"sex -0.058196 -0.044020 0.141664 0.096093 -0.030711 0.118261 \n",
|
|
|
|
"cp 0.044421 0.295762 -0.394280 -0.149230 0.119717 -0.181053 \n",
|
|
|
|
"trestbps -0.114103 -0.046698 0.067616 0.193216 -0.121475 0.101389 \n",
|
|
|
|
"chol -0.151040 -0.009940 0.067023 0.053952 -0.004038 0.070511 \n",
|
|
|
|
"fbs -0.084189 -0.008567 0.025665 0.005747 -0.059894 0.137979 \n",
|
|
|
|
"restecg 1.000000 0.044123 -0.070733 -0.058770 0.093045 -0.072042 \n",
|
|
|
|
"thalach 0.044123 1.000000 -0.378812 -0.344187 0.386784 -0.213177 \n",
|
|
|
|
"exang -0.070733 -0.378812 1.000000 0.288223 -0.257748 0.115739 \n",
|
|
|
|
"oldpeak -0.058770 -0.344187 0.288223 1.000000 -0.577537 0.222682 \n",
|
|
|
|
"slope 0.093045 0.386784 -0.257748 -0.577537 1.000000 -0.080155 \n",
|
|
|
|
"ca -0.072042 -0.213177 0.115739 0.222682 -0.080155 1.000000 \n",
|
|
|
|
"thal -0.011981 -0.096439 0.206754 0.210244 -0.104764 0.151832 \n",
|
|
|
|
"target 0.137230 0.421741 -0.436757 -0.430696 0.345877 -0.391724 \n",
|
|
|
|
"\n",
|
|
|
|
" thal target \n",
|
|
|
|
"age 0.068001 -0.225439 \n",
|
|
|
|
"sex 0.210041 -0.280937 \n",
|
|
|
|
"cp -0.161736 0.433798 \n",
|
|
|
|
"trestbps 0.062210 -0.144931 \n",
|
|
|
|
"chol 0.098803 -0.085239 \n",
|
|
|
|
"fbs -0.032019 -0.028046 \n",
|
|
|
|
"restecg -0.011981 0.137230 \n",
|
|
|
|
"thalach -0.096439 0.421741 \n",
|
|
|
|
"exang 0.206754 -0.436757 \n",
|
|
|
|
"oldpeak 0.210244 -0.430696 \n",
|
|
|
|
"slope -0.104764 0.345877 \n",
|
|
|
|
"ca 0.151832 -0.391724 \n",
|
|
|
|
"thal 1.000000 -0.344029 \n",
|
|
|
|
"target -0.344029 1.000000 \n"
|
|
|
|
]
|
|
|
|
}
|
|
|
|
],
|
2023-04-03 13:08:49 +02:00
|
|
|
"source": [
|
|
|
|
"#display correlation\n",
|
|
|
|
"print (df.corr())"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2023-04-11 13:16:53 +02:00
|
|
|
"execution_count": 7,
|
2023-04-03 13:08:49 +02:00
|
|
|
"metadata": {},
|
2023-04-11 13:16:53 +02:00
|
|
|
"outputs": [
|
|
|
|
{
|
|
|
|
"name": "stdout",
|
|
|
|
"output_type": "stream",
|
|
|
|
"text": [
|
|
|
|
" age sex cp trestbps chol fbs \\\n",
|
|
|
|
"target \n",
|
|
|
|
"0 56.601449 0.826087 0.478261 134.398551 251.086957 0.159420 \n",
|
|
|
|
"1 52.496970 0.563636 1.375758 129.303030 242.230303 0.139394 \n",
|
|
|
|
"\n",
|
|
|
|
" restecg thalach exang oldpeak slope ca thal \n",
|
|
|
|
"target \n",
|
|
|
|
"0 0.449275 139.101449 0.550725 1.585507 1.166667 1.166667 2.543478 \n",
|
|
|
|
"1 0.593939 158.466667 0.139394 0.583030 1.593939 0.363636 2.121212 \n"
|
|
|
|
]
|
|
|
|
}
|
|
|
|
],
|
2023-04-03 13:08:49 +02:00
|
|
|
"source": [
|
|
|
|
"# Print mean values for each column with and without disease\n",
|
|
|
|
"print(df.groupby('target').mean())"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2023-04-11 13:16:53 +02:00
|
|
|
"execution_count": 8,
|
2023-04-03 13:08:49 +02:00
|
|
|
"metadata": {},
|
2023-04-11 13:16:53 +02:00
|
|
|
"outputs": [
|
|
|
|
{
|
|
|
|
"name": "stdout",
|
|
|
|
"output_type": "stream",
|
|
|
|
"text": [
|
|
|
|
" age sex cp trestbps chol fbs restecg thalach exang oldpeak \\\n",
|
|
|
|
"167 62 0 0 140 268 0 0 160 0 3.6 \n",
|
|
|
|
"181 65 0 0 150 225 0 0 114 0 1.0 \n",
|
|
|
|
"182 61 0 0 130 330 0 0 169 0 0.0 \n",
|
|
|
|
"190 51 0 0 130 305 0 1 142 1 1.2 \n",
|
|
|
|
"204 62 0 0 160 164 0 0 145 0 6.2 \n",
|
|
|
|
"\n",
|
|
|
|
" slope ca thal target \n",
|
|
|
|
"167 0 2 2 0 \n",
|
|
|
|
"181 1 3 3 0 \n",
|
|
|
|
"182 2 0 2 0 \n",
|
|
|
|
"190 1 0 3 0 \n",
|
|
|
|
"204 0 3 3 0 \n"
|
|
|
|
]
|
|
|
|
}
|
|
|
|
],
|
2023-04-03 13:08:49 +02:00
|
|
|
"source": [
|
|
|
|
"# get table with selection on more than 1 column\n",
|
|
|
|
"df1 = df[(df[\"sex\"] == 0) & (df[\"target\"] == 0) ]\n",
|
|
|
|
"print (df1.head(5))"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "markdown",
|
|
|
|
"metadata": {},
|
|
|
|
"source": [
|
|
|
|
" Plots"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2023-04-11 13:16:53 +02:00
|
|
|
"execution_count": 9,
|
2023-04-03 13:08:49 +02:00
|
|
|
"metadata": {},
|
2023-04-11 13:16:53 +02:00
|
|
|
"outputs": [
|
|
|
|
{
|
|
|
|
"name": "stdout",
|
|
|
|
"output_type": "stream",
|
|
|
|
"text": [
|
|
|
|
"0 63\n",
|
|
|
|
"1 37\n",
|
|
|
|
"3 56\n",
|
|
|
|
"5 57\n",
|
|
|
|
"7 44\n",
|
|
|
|
" ..\n",
|
|
|
|
"295 63\n",
|
|
|
|
"297 59\n",
|
|
|
|
"299 45\n",
|
|
|
|
"300 68\n",
|
|
|
|
"301 57\n",
|
|
|
|
"Name: age, Length: 207, dtype: int64\n"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"data": {
|
|
|
|
"text/plain": [
|
|
|
|
"<matplotlib.legend.Legend at 0x7fc9f07ea340>"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
"execution_count": 9,
|
|
|
|
"metadata": {},
|
|
|
|
"output_type": "execute_result"
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"data": {
|
|
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAjMAAAHFCAYAAAAHcXhbAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjYuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/P9b71AAAACXBIWXMAAA9hAAAPYQGoP6dpAABA0ElEQVR4nO3deXyNZ/7/8fchyZHIgpANlahYY6nqqKUNVRQ1VFtKq9YpZVpLtaqoaNU6UmYMtbTBV1HVUq2x1ZIuhoZSmnRUbUETaa0RFUuu3x9+OY8eSUgicXLzej4eeUzv677u+/6c6yTOe677OufYjDFGAAAAFlXM1QUAAADcCsIMAACwNMIMAACwNMIMAACwNMIMAACwNMIMAACwNMIMAACwNMIMAACwNMIMAACwNMIMUAC2bNkim82mLVu2ONp69uyp0NDQPJ3n119/VVRUlHbv3p2n47K7ls1m09///vc8nedmZs6cqfnz52dpP3z4sGw2W7b7kHvz58+XzWbT4cOHHW3NmjVTs2bNXFJPTs/3rbp8+bJmz56tBx54QGXKlJGXl5cqVaqkDh06aMWKFQV+Pdz5CDNAIRk9enSe/2H+9ddfNXbs2DyHmfxcKz9yenELDg7Wf//7X7Vr167Qa7jbzJw5UzNnznTZtQsjzHTv3l0vvfSSmjdvrkWLFunzzz/XqFGj5ObmpnXr1hX49XDnc3N1AcCd6t577y30a1y4cEFeXl635Vo3Yrfb9eCDD7q0BiswxujixYvy9PTM9TE1a9YsxIpuv0OHDumjjz7Sm2++qbFjxzraW7Roob/97W/KyMhwYXWwKmZmUKT98ssv6tWrl8LDw+Xl5aXy5curffv22rt3b5a+8fHxatWqlby8vFSuXDkNHDhQq1evznL7R5K+/PJLtWjRQr6+vvLy8lKTJk20cePGXNX0v//9T4899pi8vLxUtmxZ9e/fX6mpqVn6ZXfr5+OPP1bDhg3l5+cnLy8vVa5cWb1795Z07VbVAw88IEnq1auXbDabbDaboqKiHOfz9vbW3r171apVK/n4+KhFixY5XivT7NmzVbVqVdntdtWsWVNLly512h8VFSWbzZbluOtveYSGhio+Pl6xsbGO2jKvmdNtpm+++UYtWrSQj4+PvLy81LhxY61evTrb62zevFkvvviiypYtK39/f3Xq1Em//vprto/pz3bs2KFnnnlGoaGh8vT0VGhoqLp27aojR45k6Xv8+HG98MILqlixojw8PBQSEqKnnnpKJ06ccPQ5c+aMXnnlFVWuXFl2u10BAQFq27at/ve//zn6nDp1SgMGDFD58uXl4eGhypUra+TIkUpPT3e6Xuatvvfee081atSQ3W7XggULJEnbtm1TkyZNVKJECYWEhGjEiBG6fPlylpqvv82UOdb/+Mc/FB0drbCwMHl7e6tRo0batm1bluPnzp3r9PwvXrw4V7dAb/R8S1JiYqKee+45BQQEyG63q0aNGpo6depNw8jJkyclXZvNy06xYs4vS+fOndOwYcMUFhYmDw8PlS9fXoMHD1ZaWpqjT//+/VWiRAnt3LnT0ZaRkaEWLVooMDBQSUlJN6wJ1sfMDIq0X3/9Vf7+/po4caLKlSunU6dOacGCBWrYsKF27dqlatWqSZKSkpIUGRmpkiVLatasWQoICNCSJUuyXTOyaNEiPf/88+rQoYMWLFggd3d3zZ49W61bt9a6descASE7J06cUGRkpNzd3TVz5kwFBgbqww8/zNXalP/+97/q0qWLunTpoqioKJUoUUJHjhzRpk2bJEn169dXTEyMevXqpVGjRjlu2VSoUMFxjkuXLumvf/2r+vXrp9dff11Xrly54TVXrVqlzZs366233lLJkiU1c+ZMde3aVW5ubnrqqaduWvOfrVixQk899ZT8/Pwctz3sdnuO/WNjY9WyZUvVqVNH77//vux2u2bOnKn27dtryZIl6tKli1P/vn37ql27dlq8eLGOHj2qV199Vc8995xjfHJy+PBhVatWTc8884zKlCmjpKQkzZo1Sw888IASEhJUtmxZSdeCzAMPPKDLly/rjTfeUJ06dXTy5EmtW7dOp0+fVmBgoFJTU9W0aVMdPnxYw4cPV8OGDXX+/Hl99dVXSkpKUvXq1XXx4kU1b95cBw4c0NixY1WnTh19/fXXmjBhgnbv3p0lrK1cuVJff/213nzzTQUFBSkgIEAJCQlq0aKFQkNDNX/+fHl5eWnmzJlavHhxrp+Pf//736pevbqmTZsm6dqtxrZt2+rQoUPy8/OTJM2ZM0f9+vXTk08+qXfffVdnz57V2LFjs4Su7Nzo+f7tt9/UuHFjXbp0SW+//bZCQ0P1xRdfaNiwYTpw4MANb4vVqFFDpUqV0tixY1WsWDG1atUqx2B14cIFRUZG6tixY47nLD4+Xm+++ab27t2rL7/8UjabTdOmTdP27dvVuXNn7dy503H+LVu2aO3atTkGJ9xBDGAhV65cMZcuXTLh4eFmyJAhjvZXX33V2Gw2Ex8f79S/devWRpLZvHmzMcaYtLQ0U6ZMGdO+fXunflevXjV169Y1f/nLX254/eHDhxubzWZ2797t1N6yZUun6xhjTI8ePUylSpUc2//4xz+MJHPmzJkczx8XF2ckmZiYmCz7evToYSSZDz74INt9f76WMcZIMp6eniY5OdnRduXKFVO9enVTpUoVR9uYMWNMdv8UxMTEGEnm0KFDjrZatWqZyMjILH0PHTqUpe4HH3zQBAQEmNTUVKfrR0REmAoVKpiMjAyn6wwYMMDpnJMnTzaSTFJSUpbr3ciVK1fM+fPnTcmSJc306dMd7b179zbu7u4mISEhx2PfeustI8ls2LAhxz7vvfeekWSWLVvm1D5p0iQjyaxfv97RJsn4+fmZU6dOOfXt0qVLjs/N9WMeGRnpNOaZY127dm1z5coVR/t3331nJJklS5YYY679TgcFBZmGDRs6XfvIkSPG3d09y+9LdnJ6vl9//XUjyWzfvt2p/cUXXzQ2m83s27fvhuddvXq1KVu2rJFkJBl/f3/z9NNPm1WrVjn1mzBhgilWrJiJi4tzal++fLmRZP7zn/842vbv3298fX1Nx44dzZdffmmKFStmRo0addPHiDsDt5lQpF25ckXjx49XzZo15eHhITc3N3l4eGj//v366aefHP1iY2MVERGRZX1B165dnba3bt2qU6dOqUePHrpy5YrjJyMjQ4899pji4uKcpq+vt3nzZtWqVUt169Z1au/WrdtNH0vmLaTOnTtr2bJlOn78+E2Pyc6TTz6Z676Z0+yZihcvri5duuiXX37RsWPH8nX93EhLS9P27dv11FNPydvb2+n63bt317Fjx7Rv3z6nY/761786bdepU0eSsr1d9Gfnz5/X8OHDVaVKFbm5ucnNzU3e3t5KS0tz+h1Zs2aNmjdvrho1auR4rjVr1qhq1ap69NFHc+yzadMmlSxZMsvMVs+ePSUpy+3KRx55RKVLl3Zq27x5c47PTW61a9dOxYsXd2xfP1779u1TcnKyOnfu7HTcPffcoyZNmuT6OtnZtGmTatasqb/85S9O7T179pQx5qazaW3btlViYqJWrFihYcOGqVatWlq5cqX++te/Os1yfvHFF4qIiFC9evWc/l5bt26d5fZxlSpVNHfuXK1cuVKPP/64HnroIcctWtz5CDMo0oYOHarRo0erY8eO+vzzz7V9+3bFxcWpbt26+uOPPxz9Tp486fTCkOn6tsy1EU899ZTc3d2dfiZNmiRjjE6dOpVjPSdPnlRQUFCW9uzarvfwww9r5cqVunLlip5//nlVqFBBERERWrJkyU2PzeTl5SVfX99c979RrZlrFwrD6dOnZYzJdno/JCQk2+v7+/s7bWfe0vjz85ydbt26acaMGerbt6/WrVun7777TnFxcSpXrpzTsb/99pvTLbvs5KZP5u/A9euMAgIC5ObmluVxZTcGt/J7lOlm45VZR27+LvLq5MmTeXpus+Pp6amOHTtqypQpio2N1S+//KKaNWvq3//+t+Lj4yVd+3vds2dPlr9VHx8fGWP0+++/O52zXbt
|
|
|
|
"text/plain": [
|
|
|
|
"<Figure size 640x480 with 1 Axes>"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
"metadata": {},
|
|
|
|
"output_type": "display_data"
|
|
|
|
}
|
|
|
|
],
|
2023-04-03 13:08:49 +02:00
|
|
|
"source": [
|
|
|
|
"# age dirtibution group into male and female (1 = male; 0 = female)\n",
|
|
|
|
"# male\n",
|
|
|
|
"plt.title('age distribution according to Sex') \n",
|
|
|
|
"df[df[\"sex\"] == 1]['age'].plot.hist()\n",
|
|
|
|
"print(df[df[\"sex\"] > 0]['age'])\n",
|
|
|
|
"# female\n",
|
|
|
|
"df[df[\"sex\"] == 0]['age'].plot.hist()\n",
|
|
|
|
"plt.xlabel('age [years]')\n",
|
|
|
|
"plt.legend([\"male\", \"female\"])"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2023-04-11 13:16:53 +02:00
|
|
|
"execution_count": 10,
|
2023-04-03 13:08:49 +02:00
|
|
|
"metadata": {},
|
2023-04-11 13:16:53 +02:00
|
|
|
"outputs": [
|
|
|
|
{
|
|
|
|
"data": {
|
|
|
|
"text/plain": [
|
|
|
|
"Text(0.5, 0, 'max heart rate')"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
"execution_count": 10,
|
|
|
|
"metadata": {},
|
|
|
|
"output_type": "execute_result"
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"data": {
|
|
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAjMAAAHFCAYAAAAHcXhbAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjYuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/P9b71AAAACXBIWXMAAA9hAAAPYQGoP6dpAABSbklEQVR4nO3dd1gUV/828HtpS18EBRZFwIhYQCPRIMaIvVf0sfcSDZYotqgxYvQRSzQkMdYY1BhFE0vMY0VFNFEjdo3GCmIiSESlqUs77x++zC8rHcHdgftzXXtdzpkzM985u+zeTtlVCCEEiIiIiGTKQNcFEBEREb0OhhkiIiKSNYYZIiIikjWGGSIiIpI1hhkiIiKSNYYZIiIikjWGGSIiIpI1hhkiIiKSNYYZIiIikjWGGSoxhUKBoKAgXZdRamJiYqBQKPD555/ruhQtDx48QFBQEC5evFiutylHrq6uGDZsmDR97NgxKBQKHDt27I3Xcu3aNQQFBSEmJqZU17thwwYoFAqcPXu2VNf7uk6ePImgoCA8ffr0tdYzbNgwuLq6arWVt/e2ioBhhkrs1KlTGDVqlK7LKPcePHiAefPmvfEw86a3WR54e3vj1KlT8Pb2fuPbvnbtGubNm1fqYUZfnTx5EvPmzXvtMJMXvrfJj5GuCyD5atKkia5LKNeysrKQmZlZKut6/vw5TE1NoVAoSmV9FcWzZ89gbm5e5P7W1tb8uyhjOa/lssTnUH54ZEYmgoKCoFAocPnyZfznP/+BSqWCra0tAgMDkZmZiRs3bqBDhw6wsrKCq6srlixZorX8ixcvMGXKFLz99tvSsr6+vvj555+1+oWFhUGhUGDFihVa7XPnzoWhoSHCw8OltlcPxeYcjj569ChGjx4NOzs7WFtbY8iQIUhLS0N8fDz69OkDGxsbqNVqTJ06FRkZGdLy+R2izzn9s2HDBqlt2LBhsLS0xJ9//on27dvDwsICarUaixYtAgCcPn0azZo1g4WFBWrVqoWNGzcWa7yXL18ONzc3WFpawtfXF6dPn87V5+zZs+jWrRtsbW1hamqKhg0bYvv27Vp9/vnnHwQEBKBu3bqwtLSEvb09WrVqhRMnTuS5j0uWLMGCBQvg5uYGpVKJiIgING7cGAAwfPhwKBSKQg+B5zwPhw4dwogRI1ClShWYm5tDo9Hg9u3bGD58ONzd3WFubo6qVauia9euuHLlirT8sWPHCt1mUfY9P/PmzYOPjw9sbW1hbW0Nb29vrF+/Hnn95u2WLVvg6+sLS0tLWFpa4u2338b69eu1+hw4cACtW7eGSqWCubk56tSpg+DgYK0+e/bsga+vL8zNzWFlZYW2bdvi1KlTWn1y/sbOnz+P3r17o1KlSnjrrbcAABkZGZg+fTocHR1hbm6OZs2a4cyZM7nqzes1nPNavX37Njp16gRLS0s4OztjypQp0Gg0Wsv/9ddf6N27N6ysrGBjY4OBAwciKioq1+v/VRs2bMB//vMfAEDLli2l5+zfy3z33Xdo0KABTE1NYWtri549e+L69ev5rvNVKSkp+PDDD1G5cmXY2dnB398fDx48yNVv27Zt8PX1hYWFBSwtLdG+fXtcuHBBq8/Zs2fRr18/uLq6wszMDK6urujfvz/u3buXa7/yei3PnDkT06ZNAwC4ublJ+1vY6b0NGzbAw8MDSqUSderUwaZNm/Ls9+rr/dmzZ5g6dSrc3Nyk8WvUqBG2bt2aa79K6z0BAFatWoUGDRrA0tISVlZWqF27NmbNmqXVJz4+HmPGjEG1atVgYmICNzc3zJs3r9T+IyQXPDIjM3369MGgQYMwZswYhIeHY8mSJcjIyMDhw4cREBCAqVOnYsuWLZgxYwZq1qwJf39/AIBGo8Hjx48xdepUVK1aFenp6Th8+DD8/f0RGhqKIUOGAAD69euHyMhITJkyBU2aNEGjRo1w9OhRLFiwALNmzULbtm0LrXHUqFHw9/dHWFgYLly4gFmzZkmBy9/fHx988AEOHz6MxYsXw8nJCYGBgSUai4yMDPj7+2Ps2LGYNm0atmzZgpkzZyI5ORk7duzAjBkzUK1aNXz99dcYNmwYPD098c477xS63m+++Qa1a9dGSEgIAGDOnDno1KkToqOjoVKpAAARERHo0KEDfHx8sHr1aqhUKoSFhaFv37549uyZdB3F48ePAbwMg46OjkhNTcWuXbvQokULHDlyBC1atNDa9ldffYVatWrh888/h7W1NRwcHBAaGorhw4fjk08+QefOnQEA1apVK3Q/RowYgc6dO+P7779HWloajI2N8eDBA9jZ2WHRokWoUqUKHj9+jI0bN8LHxwcXLlyAh4cHvL29C9xmUfc9PzExMRgzZgyqV68O4GXwnDBhAv7++298+umnUr9PP/0U8+fPh7+/P6ZMmQKVSoWrV69qfeCtX78eo0ePhp+fH1avXg17e3vcvHkTV69elfps2bIFAwcORLt27bB161ZoNBosWbJEeg6aNWumVZ+/vz/69euHsWPHIi0tDQAwevRobNq0CVOnTkXbtm1x9epV+Pv7IyUlpdDnAXj5Wu3WrRtGjhyJKVOm4Pjx45g/fz5UKpW0z2lpaWjZsiUeP36MxYsXo2bNmjhw4AD69u1b6Po7d+6MhQsXYtasWfjmm2+k01w5YSw4OBizZs1C//79ERwcjMTERAQFBcHX1xdRUVFwd3cvdBujRo1C586dsWXLFty/fx/Tpk3DoEGDcPToUanPwoUL8cknn0ivnfT0dCxduhTvv/8+zpw5g7p16wJ4+Rrw8PBAv379YGtri7i4OKxatQqNGzfGtWvXULlyZa1tv/pabtSoEZ49e4avv/4aO3fuhFqtBgBp/XnZsGEDhg8fju7du2PZsmVISkpCUFAQNBoNDAwK/n99YGAgvv/+eyxYsAANGzZEWloarl69isTERKlPab8nhIWFISAgABMmTMDnn38OAwMD3L59G9euXZO2GR8fj3fffRcGBgb49NNP8dZbb+HUqVNYsGABYmJiEBoaWsizWo4IkoW5c+cKAGLZsmVa7W+//bYAIHbu3Cm1ZWRkiCpVqgh/f/9815eZmSkyMjLEyJEjRcOGDbXmvXjxQjRs2FC4ubmJa9euCQcHB+Hn5ycyMzO1+gEQc+fOlaZDQ0MFADFhwgStfj169BAAxPLly3PV7u3tLU1HREQIACIiIkKrX3R0tAAgQkNDpbahQ4cKAGLHjh259huAOH/+vNSemJgoDA0NRWBgYL7j8e/teHl5ae3rmTNnBACxdetWqa127dqiYcOGIiMjQ2sdXbp0EWq1WmRlZeW5jZxxb926tejZs2eubb/11lsiPT1da5moqKhc+1+QnOdhyJAhhfbNzMwU6enpwt3dXUyePLlI2yzpvuclKytLZGRkiM8++0zY2dmJ7OxsIYQQd+/eFYaGhmLgwIH5LpuSkiKsra1Fs2bNpOXyWr+Tk5Pw8vLSqislJUXY29uLpk2bSm05f2Offvqp1jquX78uAGiNjxBC/PDDDwKAGDp0qNSW12s457W6fft2reU7deokPDw8pOlvvvlGABD79+/X6jdmzJgiPf8//vhjnn8/T548EWZmZqJTp05a7bGxsUKpVIoBAwYUuN6c11NAQIBW+5IlSwQAERcXJ63PyMgo199/SkqKcHR0FH369Ml3G5mZmSI1NVVYWFiIL7/8Mte283otL126VAAQ0dHRBdYvxP+9Dry9vbVeKzExMcLY2Fi4uLho9X/1vc3T01P06NGjwG2U9nvC+PHjhY2NTYHbHDNmjLC0tBT37t3Tav/8888FAPHHH38UuHx5wtNMMtOlSxet6Tp16kChUKBjx45Sm5GREWrWrJnrkO2PP/6I9957D5aWljAyMoKxsTHWr1+f61CzUqnE9u3bkZiYCG9vbwghsHXrVhgaGpa4RgDS//D/3f5qjcWhUCjQqVMnaTpnv9VqNRo2bCi129rawt7evsjb6ty5s9a+1q9fHwCk5W/fvo0///wTAwcOBABkZmZKj06dOiEuLg43btyQll+9ejW8vb1hamoqjfuRI0fyPMTfrVs3GBsbF2M
|
|
|
|
"text/plain": [
|
|
|
|
"<Figure size 640x480 with 1 Axes>"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
"metadata": {},
|
|
|
|
"output_type": "display_data"
|
|
|
|
}
|
|
|
|
],
|
2023-04-03 13:08:49 +02:00
|
|
|
"source": [
|
|
|
|
"plt.figure()\n",
|
|
|
|
"# Plot maximum heart rate\n",
|
|
|
|
"# Heart disease (0 = no, 1 = yes)\n",
|
|
|
|
"plt.title('maximum heart rate according to heart disease') \n",
|
|
|
|
"df[df[\"target\"] == 1]['thalach'].plot.hist()\n",
|
|
|
|
"# no disease\n",
|
|
|
|
"df[df[\"target\"] == 0]['thalach'].plot.hist()\n",
|
|
|
|
"plt.legend([\"disease\", \"no disease\"])\n",
|
|
|
|
"plt.xlabel('max heart rate')"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2023-04-11 13:16:53 +02:00
|
|
|
"execution_count": 11,
|
2023-04-03 13:08:49 +02:00
|
|
|
"metadata": {
|
|
|
|
"scrolled": true
|
|
|
|
},
|
2023-04-11 13:16:53 +02:00
|
|
|
"outputs": [
|
|
|
|
{
|
|
|
|
"data": {
|
|
|
|
"text/plain": [
|
|
|
|
"<matplotlib.legend.Legend at 0x7fca5819bd30>"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
"execution_count": 11,
|
|
|
|
"metadata": {},
|
|
|
|
"output_type": "execute_result"
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"data": {
|
|
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAigAAAHACAYAAACI8pP3AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjYuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/P9b71AAAACXBIWXMAAA9hAAAPYQGoP6dpAABH70lEQVR4nO3dd3QU5f7H8c8mpFeIpGmoUqWDIlFMFAjSsYGAIFUwKAIiykUhKBKJgl5BxEaxAHotiPeCEgXpHUGaIBAglBgpJhBCAsnz+8OT/blpgAYySd6vc+Yc9plnZr4zs5P9MDM7azPGGAEAAFiIU3EXAAAAkBsBBQAAWA4BBQAAWA4BBQAAWA4BBQAAWA4BBQAAWA4BBQAAWA4BBQAAWA4BBQAAWA4BpZSYM2eObDabNm/enO/4jh07qkqVKte3qL9YvHixYmJirrh/3759ZbPZ7IOXl5eqVKmizp07a/bs2crIyMgzTWRkpCIjI4uu6BKmb9++efaxzWa7qu0uXf2+KmhZl3tP/h3Hjx9XTEyMtm3blmdcTEyMbDZbkS2rrMp9HB06dEg2m01z5sy57rUUtr//qcTEREVHR6tmzZry8PBQhQoVVL9+fQ0aNEiJiYlFvjxcvXLFXQDKhsWLF+utt966qg8+Dw8PLVu2TJKUnp6uxMRELVmyRIMGDdKUKVP07bff6qabbrL3nzFjRlGXXeKtW7fOYRtdib+zr/7usq7W8ePHNWHCBFWpUkWNGjVyGDdw4EDde++913T5ZVFISIjWrVun6tWrX/dlF7a//4mjR4+qSZMm8vf319NPP61atWopJSVFu3fv1meffaaDBw8qLCysyJaHv4eAgmvq/Pnz8vT0/FvTOjk56fbbb3do69Onj/r166eOHTvqwQcf1Pr16+3j6tat+49qLY1yb7+iZozRhQsX5OHhcc2XdTk33XTTNQ9IpUF6errc3d2v+GyTm5tbse/bovbee+/p5MmT2rhxo6pWrWpv79q1q/71r38pOzu7GKtDDi7xlGHGGM2YMUONGjWSh4eHypcvrwcffFAHDx506BcfH68uXbropptukru7u26++WYNHjxYJ0+edOiXc4p969atevDBB1W+fHlVr15dffv21VtvvSVJDpdtDh069LfqjoqK0qBBg7RhwwatXLnS3p7fJZ63335bDRs2lLe3t3x8fFS7dm3961//cuiTlJSkwYMH66abbpKrq6uqVq2qCRMm6NKlSw79JkyYoObNm6tChQry9fVVkyZN9MEHHyj3720uW7ZMkZGRCggIkIeHhypVqqQHHnhA58+ft/fJzMzUxIkTVbt2bbm5ualixYrq16+ffv/99yvaBnPmzFGtWrXk5uamOnXq6MMPP8y3X+7LLufPn9eoUaNUtWpVubu7q0KFCmrWrJnmz58vSZfdVzabTU888YRmzpypOnXqyM3NTXPnzs13WTnOnDmjfv36qUKFCvLy8lKnTp3yvMeqVKmivn375pn2r/v0xx9/1K233ipJ6tevn722nGXmd4knOztbcXFx9u0cGBioPn366OjRo3mWU69ePW3atEktW7aUp6enqlWrpldeeeWKPqzeeust3XXXXQoMDJSXl5fq16+vuLg4Xbx4MU/fb7/9Vq1atZKfn588PT1Vp04dxcbGOvTZsGGDOnXqpICAALm7u6t69eoaPny4Q5/Vq1erVatW8vHxkaenp8LDw/W///3PoU/OZbalS5eqf//+qlixojw9PZWRkSFjjOLi4lS5cmW5u7urSZMmWrJkSZ5687vEk7Otd+3apR49esjPz09BQUHq37+/UlJSHKb/448/NGDAAFWoUEHe3t7q0KGDDh48eNnLj5fb35K0aNEitWjRQp6envLx8VGbNm20bt26AueZ49SpU3JyclJgYGC+452cHD8aN2/erM6dO6tChQpyd3dX48aN9dlnn9nHnzx5UmFhYQoPD3fY57t375aXl5d69+592ZqQF2dQSpmsrKw8H6yS8nyIStLgwYM1Z84cDRs2TJMnT9bp06f14osvKjw8XNu3b1dQUJAk6cCBA2rRooUGDhwoPz8/HTp0SFOnTtWdd96pHTt2yMXFxWG+999/vx5++GENGTJEaWlpqlevntLS0vT55587/PEICQn52+vZuXNnzZgxQytXrtRdd92Vb58FCxYoOjpaTz75pF577TU5OTlp//792r17t71PUlKSbrvtNjk5OWncuHGqXr261q1bp4kTJ+rQoUOaPXu2ve+hQ4c0ePBgVapUSZK0fv16Pfnkkzp27JjGjRtn79OhQwe1bNlSs2bNkr+/v44dO6Zvv/1WmZmZ8vT0VHZ2trp06aJVq1Zp9OjRCg8P1+HDhzV+/HhFRkZq8+bN8vDwKHDd58yZo379+qlLly6aMmWKUlJSFBMTo4yMjDx/WHMbOXKkPvroI02cOFGNGzdWWlqadu7cqVOnTkmSXnjhhcvuq4ULF2rVqlUaN26cgoODC/wjn2PAgAFq06aN5s2bp8TERD3//POKjIzUzz//LH9//0Kn/asmTZpo9uzZ6tevn55//nl16NBBkgo9a/L444/r3Xff1RNPPKGOHTvq0KFDeuGFF/Tjjz9q69atuuGGG+x9k5KS1KtXLz399NMaP368vvrqK40ZM0ahoaHq06dPobUdOHBAPXv2VNWqVeXq6qrt27fr5Zdf1i+//KJZs2bZ+33wwQcaNGiQIiIiNHPmTAUGBmrfvn3auXOnvc93332nTp06qU6dOpo6daoqVaqkQ4cOaenSpfY+K1asUJs2bdSgQQN98MEHcnNz04wZM9SpUyfNnz9f3bt3d6ivf//+6tChgz766COlpaXJxcVFEyZM0IQJEzRgwAA9+OCDSkxM1KBBg5SVlaVatWpd0T554IEH1L17dw0YMEA7duzQmDFjJMm+ztnZ2erUqZM2b96smJgYNWnSROvWrbuiS3GX29/z5s1Tr169FBUVpfnz5ysjI0NxcXGKjIzUDz/8oDvvvLPAebdo0UJvvfWW7r//fo0cOVItWrSQr69vvn2XL1+ue++9V82bN9fMmTPl5+enBQsWqHv37jp//rz69u2rG264QQsWLFBkZKSeffZZTZ06VefPn9dDDz2kSpUqaebMmVe0PZGLQakwe/ZsI6nQoXLlyvb+69atM5LMlClTHOaTmJhoPDw8zOjRo/NdTnZ2trl48aI5fPiwkWS+/vpr+7jx48cbSWbcuHF5phs6dKi5mrfbo48+ary8vAocv2fPHiPJPP744/a2iIgIExERYX/9xBNPGH9//0KXM3jwYOPt7W0OHz7s0P7aa68ZSWbXrl35TpeVlWUuXrxoXnzxRRMQEGCys7ONMcZ8/vnnRpLZtm1bgcucP3++kWS++OILh/ZNmzYZSWbGjBkFTpuVlWVCQ0NNkyZN7Ms0xphDhw4ZFxcXh31sjDGSzPjx4+2v69WrZ7p27Vrg/I0pfF9JMn5+fub06dP5jvvrsnLek/fdd59DvzVr1hhJZuLEifa2ypUrm0cffTTPPHPv05xtNHv27Dx9c95/OXLeI9HR0Q79NmzYYCSZf/3rXw7LkWQ2bNjg0Ldu3bqmbdu2eZZVmJz3xocffmicnZ3t2+rs2bPG19fX3HnnnQ77Lrfq1aub6tWrm/T09AL73H777SYwMNCcPXvW3nbp0iVTr149c9NNN9nnn7MP+vTp4zD9mTNnjLu7e4H75q/bPCEhIc82z9nWcXFxDtNHR0cbd3d3+/L/97//GUnm7bffdugXGxub5/2Sn4L2d85xUL9+fZOVlWVvP3v2rAkMDDTh4eGFzjc7O9sMHjzYODk5GUnGZrOZOnXqmBEjRpiEhASHvrVr1zaNGzc2Fy9edGjv2LGjCQkJcVj+5MmTjSTz1VdfmUcffdR4eHiYn3/+udBaUDAu8ZQyH374oTZt2pRnyP2/if/+97+y2Wx65JFHdOnSJfsQHByshg0b6scff7T3TU5O1pAhQxQWFqZy5crJxcVFlStXliTt2bMnTw0PPPDANV1HKf8zQrnddttt+uOPP9SjRw99/fXXeS5JSX9uh7vvvlu
|
|
|
|
"text/plain": [
|
|
|
|
"<Figure size 640x480 with 1 Axes>"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
"metadata": {},
|
|
|
|
"output_type": "display_data"
|
|
|
|
}
|
|
|
|
],
|
2023-04-03 13:08:49 +02:00
|
|
|
"source": [
|
|
|
|
"# Plot sex and target in one histogramm via crosstab\n",
|
|
|
|
"pd.crosstab(df.sex,df.target).plot(kind=\"bar\",color=['red','blue' ])\n",
|
|
|
|
"plt.title('Heart Disease distribution according to Sex')\n",
|
|
|
|
"plt.xlabel('Sex (0 = Female, 1 = Male)')\n",
|
|
|
|
"plt.legend([\"no disease\", \"disease\"])"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2023-04-11 13:16:53 +02:00
|
|
|
"execution_count": 12,
|
2023-04-03 13:08:49 +02:00
|
|
|
"metadata": {
|
|
|
|
"scrolled": true
|
|
|
|
},
|
2023-04-11 13:16:53 +02:00
|
|
|
"outputs": [
|
|
|
|
{
|
|
|
|
"data": {
|
|
|
|
"text/plain": [
|
|
|
|
"Text(0, 0.5, 'Frequency of Disease or Not')"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
"execution_count": 12,
|
|
|
|
"metadata": {},
|
|
|
|
"output_type": "execute_result"
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"data": {
|
|
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAABNYAAAIhCAYAAACCMjckAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjYuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/P9b71AAAACXBIWXMAAA9hAAAPYQGoP6dpAABg0klEQVR4nO3deZxO9f//8edl9p2xzJhmMBhbtiwJFcKUsZWiorIvIYSkfBOS3UT5WJIsRZbsFZnssu8lHyKMbbLvzDBzfn/0m+vjamaY67jGdeFxv92u2815n/c553Wd65ozzbP3OW+LYRiGAAAAAAAAANglm7MLAAAAAAAAAB5EBGsAAAAAAACACQRrAAAAAAAAgAkEawAAAAAAAIAJBGsAAAAAAACACQRrAAAAAAAAgAkEawAAAAAAAIAJBGsAAAAAAACACQRrAAAAAAAAgAkEawCA+2rKlCmyWCzaunVruuvr1aunAgUK3N+ibvPTTz+pX79+me7fokULWSwW68vPz08FChRQgwYNNHnyZCUmJqbZpnr16qpevbrjinZR/fr1szk3vr6+Cg8P1/PPP68vvvhCly9fTrNNixYt7P78T5w4oX79+mnnzp12bZfesSwWizp37mzXfu5m7NixmjJlSpr2w4cPy2KxpLvufurevbssFovq1avn1DruRep37XYFChRQixYtnFLH3V73+vP/999/q3fv3ipVqpT8/f3l7e2tqKgode3aVX/++ae1X4sWLeTv73+P78o+165dU79+/bRq1apM9U/9OUh9ZcuWTTlz5lRMTIw2bNhg9/HT+y6Y9e/re0av+/09AwC4FndnFwAAgCv56aef9J///MeucM3Hx0crVqyQJF2/fl1Hjx7VkiVL1LZtW40cOVJLly5VeHi4tf/YsWMdXbZLW7p0qYKCgpSUlKQTJ05o+fLl6tWrl4YPH67FixerTJky1r4fffSRunbtatf+T5w4of79+6tAgQIqW7Zsprczcywzxo4dq1y5cqX54ztv3rzasGGDChUqlOU1ZOTmzZv69ttvJf3zOR0/flyPPfaY0+pxpPnz5yswMPC+HrNNmzZ64YUXrMsnT55Uo0aN9M4776hp06bW9nupa/PmzapXr54Mw1Dnzp1VuXJleXp6at++ffr222/15JNP6vz58/f0Pu7FtWvX1L9/f0myK0BMPUfJycnas2eP+vfvrxo1amjDhg164oknMr2ff38G9+Kjjz5Shw4drMvbt29Xp06dNGjQINWoUcPanjt3boccDwDwYCJYAwBA//wx6Ovra2rbbNmy6amnnrJpe+utt9SyZUvVq1dPr7zyijZu3GhdV6JEiXuq9UFTvnx55cqVy7r82muvqXPnzqpWrZoaNGig/fv3y8vLS5LuS8iU+lk7M9CSJC8vrzTfm/tt4cKFOn36tOrWrasff/xRU6dO1YcffujUmtKTnJysW7duWb8nmWFPGOMo4eHhNiH64cOHJUn58uVzyGd96dIlNWzYUN7e3lq/fr3NsapXr6727dvr+++/v+fjOMPt56hq1aoqXLiwatasqbFjx2rixImZ3s+/P4N7UahQIZvrxI0bNyRJUVFRTv/ZBQC4Dm4FBQC4PMMwNHbsWJUtW1Y+Pj7KkSOHXnnlFf311182/eLi4tSwYUOFh4fL29tbhQsXVvv27XXmzBmbfqm3Cm3fvl2vvPKKcuTIoUKFCqlFixb6z3/+I0k2t/mk/nFsr+joaLVt21abNm3SmjVrrO3p3Qo6btw4lSlTRv7+/goICFCxYsXSBBwJCQlq3769wsPD5enpqcjISPXv31+3bt2y6de/f39VqlRJwcHBCgwMVLly5TRp0iQZhmHTb8WKFapevbpy5swpHx8f5cuXTy+//LKuXbtm7ZOUlKSBAweqWLFi8vLyUu7cudWyZUudPn3a1DlJVaZMGfXp00fx8fGaNWuWtT292zPnzJmjSpUqKSgoSL6+vipYsKBatWolSVq1apUqVqwoSWrZsqX1M0sdcZh6K9xvv/2m6OhoBQQEqGbNmhkeK9WECRNUpEgReXl5qUSJEpo5c6bN+oxuN0u91Tn1O1OgQAHt2bNHq1evttaWesyMbgVdt26datasqYCAAPn6+qpKlSr68ccf0z3OypUr9fbbbytXrlzKmTOnGjVqpBMnTqT7ntIzadIkeXp6avLkyYqIiNDkyZPTfE8k6b///a9ef/11hYSEyMvLS/ny5dNbb71lc6vz8ePH1a5dO0VERMjT01NhYWF65ZVX9Pfff1v7xMfH64033lCePHnk5eWl4sWLa+TIkUpJSbH2ST0vw4YN08CBAxUZGSkvLy+tXLlSkvTjjz+qbNmy8vLyUmRkpEaMGJHue/v3raCrVq2SxWLRd999pz59+igsLEyBgYGqVauW9u3bZ7OtYRgaNGiQ8ufPL29vb1WoUEFxcXEOu4170aJFqly5snx9fRUQEKDatWtn6rbHiRMnKiEhQcOGDcswPHrllVfStB04cEAxMTHy9/dXRESEevTokeY29cz+rN/punH48GHr6K3+/fvf062SqcHVkSNHJEmzZs1SdHS08ubNKx8fHxUvXly9e/fW1atXbbbL6LbgevXqaenSpSpXrpx8fHxUrFgxff3113bXdbu1a9dav1P/Nm3aNFksFm3ZskXS/65Fe/bsUc2aNeXn56fcuXOrc+fONtdcKfO/8wAAroFgDQDgFKkjUP79Su+P+vbt26tbt26qVauWFixYoLFjx2rPnj2qUqWKzR/tBw8eVOXKlTVu3DgtW7ZMffv21aZNm/T000/r5s2bafbbqFEjFS5cWHPmzNH48eP10UcfWf8o3bBhg/WVN29e0++zQYMGkmQTrP3bzJkz1bFjR1WrVk3z58/XggUL9O6779r8wZiQkKAnn3xSP//8s/r27aslS5aodevWGjx4sNq2bWuzv8OHD6t9+/aaPXu25s2bZ70V7ZNPPrHpU7duXXl6eurrr7/W0qVLNWTIEPn5+SkpKUmSlJKSooYNG2rIkCFq2rSpfvzxRw0ZMsQaMFy/ft30ecnsudmwYYNeffVVFSxYUDNnztSPP/6ovn37WsPEcuXKafLkyZKk//u//7N+Zm3atLHuIykpSQ0aNNBzzz2nhQsXWm9Ty8iiRYv0+eefa8CAAfr++++VP39+vf7666ZGAs2fP18FCxbUE088Ya1t/vz5GfZfvXq1nnvuOV28eFGTJk3Sd999p4CAANWvX98mgEzVpk0beXh4aMaMGRo2bJhWrVqlN954I1O1HTt2TMuWLVPDhg2VO3duNW/eXAcOHEjzeezatUsVK1bUxo0bNWDAAC1ZskSDBw9WYmKi9bty/PhxVaxYUfPnz1f37t21ZMkSjRo1SkFBQdbbEk+fPq0qVapo2bJl+uSTT7Ro0SLVqlVLPXv2TPe5dp9//rlWrFihESNGaMmSJSpWrJiWL1+uhg0bKiAgQDNnztTw4cM1e/Zs63cgMz788EMdOXJEX331lb788kv9+eefql+/vpKTk619+vTpoz59+uiFF17QwoUL1aFDB7Vp00b79+/P9HEyMmPGDDVs2FCBgYH67rvvNGnSJJ0/f17Vq1fXunXr7rjtsmXL5Obmpvr162f6eDdv3lSDBg1Us2ZNLVy4UK1atdJnn32moUOHWvtk9mf9bteNvHnzaunSpZKk1q1bW7/zH330kd3n6cCBA5L+d5vln3/+qZiYGE2aNElLly5Vt27dNHv27Eyfi127dqlHjx569913tXDhQpUuXVqtW7e+4/Xnbp555hk98cQT1v8hc7sxY8aoYsWK1uBf+ueziImJUc2aNbVgwQJ17txZEyZM0KuvvmqzbWZ/5wEAXIQBAMB9NHnyZEPSHV/58+e39t+wYYMhyRg5cqTNfo4ePWr4+PgYvXr1Svc4KSkpxs2bN40jR44YkoyFCxda13388ceGJKNv375ptuvUqZNhz6/H5s2bG35+fhmu37t3ryHJePvtt61t1apVM6pVq2Zd7ty5s5E9e/Y7Hqd9+/aGv7+/ceTIEZv2ESNGGJKMPXv2pLtdcnKycfPmTWPAgAF
|
|
|
|
"text/plain": [
|
|
|
|
"<Figure size 1500x600 with 1 Axes>"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
"metadata": {},
|
|
|
|
"output_type": "display_data"
|
|
|
|
}
|
|
|
|
],
|
2023-04-03 13:08:49 +02:00
|
|
|
"source": [
|
|
|
|
"# Plot target and cp in one histogramm via crosstab\n",
|
|
|
|
"pd.crosstab(df.cp,df.target).plot(kind=\"bar\",figsize=(15,6),color=['#11A5AA','#AA1190' ])\n",
|
|
|
|
"plt.title('Heart Disease Distribution According To Chest Pain Type')\n",
|
|
|
|
"plt.xlabel('Chest Pain Type')\n",
|
|
|
|
"plt.xticks(rotation = 0)\n",
|
|
|
|
"plt.ylabel('Frequency of Disease or Not')\n"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2023-04-09 13:23:19 +02:00
|
|
|
"execution_count": null,
|
2023-04-03 13:08:49 +02:00
|
|
|
"metadata": {},
|
2023-04-09 13:23:19 +02:00
|
|
|
"outputs": [],
|
2023-04-03 13:08:49 +02:00
|
|
|
"source": [
|
|
|
|
"# plot correlations for target\n",
|
|
|
|
"plt.figure()\n",
|
|
|
|
"plt.scatter(x=df.age[df.target==1], y=df.thalach[(df.target==1)], c=\"red\")\n",
|
|
|
|
"plt.scatter(x=df.age[df.target==0], y=df.thalach[(df.target==0)])\n",
|
|
|
|
"plt.title('Age-max Heart Rate Plot')\n",
|
|
|
|
"plt.xlabel('age[years]')\n",
|
|
|
|
"plt.ylabel('max. heart rate')\n",
|
|
|
|
"plt.legend([\"Disease\", \"No Disease\"])"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2023-04-09 13:23:19 +02:00
|
|
|
"execution_count": null,
|
2023-04-03 13:08:49 +02:00
|
|
|
"metadata": {},
|
2023-04-09 13:23:19 +02:00
|
|
|
"outputs": [],
|
2023-04-03 13:08:49 +02:00
|
|
|
"source": [
|
|
|
|
"plt.figure()\n",
|
|
|
|
"plt.scatter(x=df.age[df.target==1], y=df.chol[(df.target==1)], c=\"red\")\n",
|
|
|
|
"plt.scatter(x=df.age[df.target==0], y=df.chol[(df.target==0)])\n",
|
|
|
|
"plt.title('Age-Cholesterol Plot')\n",
|
|
|
|
"plt.xlabel('age[years]')\n",
|
|
|
|
"plt.ylabel('Cholesterol')\n",
|
|
|
|
"plt.legend([\"Disease\", \"No Disease\"])"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": null,
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"source": []
|
|
|
|
}
|
|
|
|
],
|
|
|
|
"metadata": {
|
|
|
|
"kernelspec": {
|
|
|
|
"display_name": "Python 3 (ipykernel)",
|
|
|
|
"language": "python",
|
|
|
|
"name": "python3"
|
|
|
|
},
|
|
|
|
"language_info": {
|
|
|
|
"codemirror_mode": {
|
|
|
|
"name": "ipython",
|
|
|
|
"version": 3
|
|
|
|
},
|
|
|
|
"file_extension": ".py",
|
|
|
|
"mimetype": "text/x-python",
|
|
|
|
"name": "python",
|
|
|
|
"nbconvert_exporter": "python",
|
|
|
|
"pygments_lexer": "ipython3",
|
|
|
|
"version": "3.8.16"
|
|
|
|
}
|
|
|
|
},
|
|
|
|
"nbformat": 4,
|
|
|
|
"nbformat_minor": 4
|
|
|
|
}
|