{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Logistic regression with scikit-learn: heart disease data set" ] }, { "cell_type": "code", "execution_count": 81, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "import matplotlib.pyplot as plt" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Read data " ] }, { "cell_type": "code", "execution_count": 82, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
agesexcptrestbpscholfbsrestecgthalachexangoldpeakslopecathaltarget
063131452331015002.30011
137121302500118703.50021
241011302040017201.42021
356111202360117800.82021
457001203540116310.62021
.............................................
29857001402410112310.21030
29945131102640113201.21030
30068101441931114103.41230
30157101301310111511.21130
30257011302360017400.01120
\n", "

303 rows × 14 columns

\n", "
" ], "text/plain": [ " age sex cp trestbps chol fbs restecg thalach exang oldpeak \\\n", "0 63 1 3 145 233 1 0 150 0 2.3 \n", "1 37 1 2 130 250 0 1 187 0 3.5 \n", "2 41 0 1 130 204 0 0 172 0 1.4 \n", "3 56 1 1 120 236 0 1 178 0 0.8 \n", "4 57 0 0 120 354 0 1 163 1 0.6 \n", ".. ... ... .. ... ... ... ... ... ... ... \n", "298 57 0 0 140 241 0 1 123 1 0.2 \n", "299 45 1 3 110 264 0 1 132 0 1.2 \n", "300 68 1 0 144 193 1 1 141 0 3.4 \n", "301 57 1 0 130 131 0 1 115 1 1.2 \n", "302 57 0 1 130 236 0 0 174 0 0.0 \n", "\n", " slope ca thal target \n", "0 0 0 1 1 \n", "1 0 0 2 1 \n", "2 2 0 2 1 \n", "3 2 0 2 1 \n", "4 2 0 2 1 \n", ".. ... .. ... ... \n", "298 1 0 3 0 \n", "299 1 0 3 0 \n", "300 1 2 3 0 \n", "301 1 1 3 0 \n", "302 1 1 2 0 \n", "\n", "[303 rows x 14 columns]" ] }, "execution_count": 82, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# filename = \"heart.csv\"\n", "filename = \"https://www.physi.uni-heidelberg.de/~reygers/lectures/2021/ml/data/heart.csv\"\n", "df = pd.read_csv(filename)\n", "df" ] }, { "cell_type": "code", "execution_count": 83, "metadata": {}, "outputs": [], "source": [ "y = df['target'].values\n", "X = df[[col for col in df.columns if col!=\"target\"]]" ] }, { "cell_type": "code", "execution_count": 92, "metadata": {}, "outputs": [], "source": [ "from sklearn.model_selection import train_test_split\n", "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, shuffle=True, random_state=42)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Fit the model" ] }, { "cell_type": "code", "execution_count": 85, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CPU times: user 427 ms, sys: 14.1 ms, total: 441 ms\n", "Wall time: 587 ms\n" ] }, { "data": { "text/plain": [ "LogisticRegression(max_iter=5000, penalty='none', tol=1e-05)" ] }, "execution_count": 85, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from sklearn.linear_model import LogisticRegression\n", "lr = LogisticRegression(penalty='none', fit_intercept=True, max_iter=5000, tol=1E-5)\n", "%time lr.fit(X_train, y_train)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Test predictions on test data set" ] }, { "cell_type": "code", "execution_count": 86, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " precision recall f1-score support\n", "\n", " 0 0.83 0.75 0.79 69\n", " 1 0.81 0.87 0.84 83\n", "\n", " accuracy 0.82 152\n", " macro avg 0.82 0.81 0.81 152\n", "weighted avg 0.82 0.82 0.81 152\n", "\n" ] } ], "source": [ "from sklearn.metrics import classification_report\n", "y_pred_lr = lr.predict(X_test)\n", "print(classification_report(y_test, y_pred_lr))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Compare two classifiers using the ROC curve" ] }, { "cell_type": "code", "execution_count": 87, "metadata": {}, "outputs": [], "source": [ "from sklearn.ensemble import RandomForestClassifier\n", "rf = RandomForestClassifier(max_depth=3)\n", "rf.fit(X_train, y_train)\n", "y_pred_rf = rf.predict(X_test)" ] }, { "cell_type": "code", "execution_count": 88, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYoAAAEOCAYAAACXX1DeAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy86wFpkAAAACXBIWXMAAAsTAAALEwEAmpwYAAAruElEQVR4nO3deXRUVfrv//fDmEREJIwGNMhgA9KiRMDhpyAiijY4IIptK+hVHFeriDi1DH7FAVG+9mpF/Iq0MygK9DUttop69YIQrzYCKqZpZJAZBW1meH5/nEpRqYRKpZJUZfi81qplzjm7znlOEfPU3vvsvc3dEREROZRaqQ5AREQqNyUKERGJSYlCRERiUqIQEZGYlChERCSmOqkOoLw1adLEs7OzUx2GiEiV8sUXX2x296bFHat2iSI7O5u8vLxUhyEiUqWY2Q+HOqamJxERiUmJQkREYlKiEBGRmJQoREQkJiUKERGJKWWJwsymmtlGM1tyiONmZk+ZWb6ZLTazk5Ido4iIpLZGMQ04N8bx84D2odf1wDNJiElERKKkbByFu39iZtkxigwEXvRgHvQFZtbIzFq6+7qKiGfB09dx+M/fVMSpq51f219Ej0tHpDoMEUmSytxHkQWsjtheE9pXhJldb2Z5Zpa3adOmpARXU7Xe8y8afP92qsMQkSSqFiOz3X0KMAUgJycnoZWYet70XLnGVF0tHX96qkMQkSSrzIliLdA6YrtVaJ+kWOs9/zpkwmjSoD7ND08r+0W6DIKcYWU/j4iUWWVuepoDXBV6+qknsK2i+ickfr+2v4jV9doWe2zHnv1s/nV32S+y/mv4+s2yn0dEykXKahRm9hrQC2hiZmuA0UBdAHefDOQC/YF8YAegr5eVQNCJXXxH9mXPzgdg+rBTynaRF84v2/tFpFyl8qmnISUcd+DmJIUjlc36r5OTMNTEJVKiytxHITVVl0HJuc76r4P/KlGIxKREIZVPzrDk/PFWE5dIXJQopFwtW7c93FdRWQzsmsUVPY4u/mCymriqMjXP1XhKFFJuBnYtdjxkSi1btx2g+ESRrCauqkzNc4IShZSjK3ocfehv7ikSs3aTrCauqky1LUGJQmqAytYcFrMpTKQSUqKQaq2yNYfFbAoTqaSUKKRaq2zNYZWpZiMSLyUKkSSrbE1hUMonw/QUVI2jRCGSRJWtKQxK+WSYnoKqkSyYKaP6yMnJ8by8vFSHIVJlXPbsfJat206nlg1LLPvAlpFk713ByrrHhveVasZg1UYqLTP7wt1zijumGoVIDVeaWs5n6b0LbRfMGBxXolBtpMpSohCp4UrX4V94ZuBSzRisMRlVlhKFiJRJrM55jRmpHpQoRCRhsZqtNGak+lCiEJGExWq2qmyPAEvilChEpMJENks9sGVb8MRUaL31Ep+W0hNSlYYShYhUiOhmqcgnpkp8WkpPSFUqShQiUiGKNksdfDKqxKel9IRUpaJEISIpEetpqRKbqdQslVRKFCKSdCUN8ovZTKVmqaRTohCRpCt5kF+MZio1SyWdEoWIVD2x1jpXs1S5U6IQkaol1lrnapaqEEoUIlK1xFrrXM1SFUKJQkQqvegnpEq10FI0NU2VmhKFiFRq0U9IlWqhpWhqmkqIEoWIVGrRT0jFnEMqVrMUqGkqQUoUIlLllGbd8SLNVJFNU2qGiosShYhUKaVZka9IM1Vk05SaoeKmNbNFpNqKtR74A1tG0v7ASuplnXBwZw2uYcRaM7tWsoOJZGbnmtl3ZpZvZncXc/xoM5tnZl+a2WIz65+KOEWkahrYNavYJAHw5p5T+L5W9sEd67+Gr99MTmBVTMpqFGZWG1gO9AXWAIuAIe6+LKLMFOBLd3/GzDoBue6eHeu8qlGISDzCU4MMj5oaZNg7KYootWLVKFLZR9EdyHf3FQBm9jowEFgWUcaBgq8DRwA/JjVCEanWohdWan9gJfU0NUgRqWx6ygJWR2yvCe2LNAa40szWALnArcWdyMyuN7M8M8vbtGlTRcQqItVMdLNUkaaoSDW8WaqyP/U0BJjm7hPN7BTgJTM73t0PRBZy9ynAFAianlIQp4hUMUXHZ8A4Li5+MaUaPv4ilTWKtUDriO1WoX2RrgVmALj7fCANaJKU6EREBEhtolgEtDezNmZWD7gcmBNVZhXQB8DMOhIkCrUtiYgkUcoShbvvA24B5gLfADPcfamZjTOzAaFiI4DrzOyfwGvAUK9uAz9ERCq5lPZRuHsuQSd15L4HIn5eBpyW7LhEpGYq1Sy1NUhl78wWEUmKUs1SW8MoUYiIUMpZamsYJQoRkUMoaIp6YMs2AMbV0GYpJQoRkWLEmqW2pjVLKVGIiBSjUFPUC0cAhAfj1bRmKSUKEZF4RCx49MCWbezYs5+l42uHD//a/iJ6XDoiVdFVqJROMy4iUiV0GQQtuoQ3mzSoT0a9g0mi9Z5/0eD7t1MRWVKoRiEiUpKotbibh14Flo4/PekhJZMShYhIOcjeu6LarsWtRCEiUkafpfcGoDNUy7W41UchIlJGH2T0Z1zmhGB1vIi+jOpCNQoRkXIQa3BetKo2WE+JQkSkjGINzotWFQfrKVGIiJRRrMF50ariYD31UYiISExKFCIiEpMShYiIxFSmPgozywAyAYs+5u6rynJuERGpHEqdKMysFnAXcCvQIkbR2jGOiYhIFZFIjeIR4E5gKTAT2FKuEYmIVHURM80CVX5Kj0QSxZXAu+7ev7yDERGp8roMKrxdDab0SCRRHAnMLu9ARESqhaiZZgvVLKqoRJ56+hpoWd6BiIhI5ZRIohgL3GBmrcs7GBERqXwSaXrqBvwALDOzt4F/A/ujyri7P1jW4EREJPUSSRRjIn6+8hBlHFCiEBGBIuttB+tXFD8XVGWUSKJoU+5RiIhUV1FPQWXvXZGiQBJX6kTh7j9URCAiItVS1FNQK6vg+tplncIjk4M1jH+7uwbfiYhUMwlNCmhmJ5jZx8BG4PPQa6OZfWRmvy3PAEVEJLVKnSjM7HjgU+BUgoF340Ov2cBpwP8xs85xnutcM/vOzPLN7O5DlBlsZsvMbKmZvVraeEVEpGwSaXoaB+wFTnP3xZEHQknkk1CZS2KdxMxqA38B+gJrgEVmNsfdl0WUaQ/cE7rWT2bWLIF4RUSkDBJpejoD+Et0kgBw9yXA08CZcZynO5Dv7ivcfQ/wOjAwqsx1oWv9FDr/xgTiFRGRMkgkURwGrI9xfF2oTEmygNUR22tC+yJ1ADqY2WdmtsDMzi3uRGZ2vZnlmVnepk2b4ri0iIjEK5FEsQK4IMbxC0JlykMdoD3QCxgCPGdmjaILufsUd89x95ymTZuW06VFRAQSSxQvAv3M7FUz62xmtUOv483sFeAcYFoc51kLRM4X1Sq0L9IaYI6773X3fwPLCRKHiIgkSSKJ4nHgDeByYDGwK/T6J8G3/jeAiXGcZxHQ3szamFm90PnmRJWZRVCbwMyaEDRFVb1hjSIiVVgiI7P3A5eZ2f8AF3JwwN0KYJa7vx/nefaZ2S3AXIJlU6e6+1IzGwfkufuc0LFzzGwZwcSDIzWoT0QkuRIeme3u/wD+UZaLu3sukBu174GInx24I/QSEZEUSGhktoiI1Bwl1ijM7AGCacMfcvcDoe2SaD0KEZFqIp6mpzEEieJRYA+F16M4FK1HISJSTcSTKNoAhEZPh7dFRCQx2XtXhBcyosugQtOQV0YlJoro9Se0HoWISOKC1e2gMwQr30GlTxTl1pltZk1Ck/iJiMghfJDRn3GZE2DYO9CiS6rDiUsi04xfZWZTovY9DGwAvg3Ny3R4eQUoIiKplUiNYjgRTVZmlgOMAv4P8BzBrLAa9yAiUk0kMuCuHcE0HQUuBbYC57j7HjNzYDAwthziExGRFEukRnEEsC1iuw/wfsRTUXnA0WUNTEREKodEEsV6QjO4mllToCtBs1OBBgTzMomISDWQSNPTh8DNZrYV6E0wuO6diOPHUXS6cBERqaISSRQPAKcCj4W2/8vdVwKYWR2CtbJnlkt0IiLV3fqvDw6+g0o5AC+RacbXmFlnoBOwzd1XRRzOAK4nWJtCRERi6TKo8HYlHYCX0DTjoTUpvi5m/3ZgdlmDEhGpEXKGFU4KkTWLSiTh9ShERKQCVMKmqHimGT8AHAAyQuMkDhB0YMfi7q4kJCJSGpW0KSqeP+YvEiSG/VHbIiKSgGXrtnPZs/OLOfIbBnZ9hit6hIaivXB+0RpGpCTVNuKZPXZorG0REYnfwK5Zhzy2bN12gIOJIrqGESmJtQ01D4mIJNEVPY4+mAiiFKllRHd2R0pix3epE4WZnQ30cfd7DnH8YeA9d59X1uBERGqa6GapgV2zDplYkiWRGsVdFJ7rKVobgtlklShEREohulmqSFNUiiSSKE7g4Kjs4nxOkExERKQUopuliu/wTr5EZ4/9T4zjO4EjEwtHREQqm0QSxVqgW4zj3QhmmBURkWogkUTxDnB1qFO7EDPrA1wN5JY1MBERqRwS6aN4iGCG2Llm9nfgq9D+rsB5BLWJB8sjOBERSb1EZo/dYGanAs8QJIb+BYeAvwO3uPu68gtRRERSKdHZY38A+pvZkQRraAPku/tP5RaZiIhUCmUamR1KDIvKKRYREamEEunMxsxqm9lVZvaymf3DzE4M7T8ytP/Qk5mIiEiVUupEYWYZwMfANGAgcBYHx01sBx4BbozzXOea2Xdmlm9md8cod4mZuZnllDZeEREpm0RqFGOAHOAi4FjACg6EVr57C+hX0knMrDbwF4IO8U7AEDPrVEy5w4E/Eoz4FhGRJEskUVwKTHH32QQLGkXLB7LjOE93gg7wFe6+B3idoIYS7UHgUWBXArGKiEgZJZIojgL+GeP4DuDwOM6TBayO2F4T2hdmZicBrd39nVgnMrPrzSzPzPI2bdoUx6VFRCReiTz1tIWoP+hROgM/JhbOQWZWC3gCGFpSWXefAkwByMnJ0ep7IlJtRE47nqopxxOpUXwADAt1ahdiZm2Aa4B34zjPWqB1xHar0L4ChwPHAx+Z2UqgJzBHHdoiUlMM7JpFp5YNgSBhzP5qbQnvqBjmXrov4GbWDsgj+KP+GjAWmEiwpvYNof+e6O6rD3mS4Dx1gOVAn9C5FgFXuPvSQ5T/CLjT3fNinTcnJ8fz8mIWERGpci57dj7L1m0PJ44HtowEYFzmhHCZstQ4zOwLdy/2i3giU3jkhyb/mwqMC+2+M/TfJcAfSkoSofPsM7NbgLlAbWCquy81s3FAnrvPKW1sIiLVVay1tqFiFzkqdY2i0JvNjgc6Ejwi+727f1legSVKNQoRqREK1sweFjzrU9CPMX34KQmdrtxqFGbWgOCJpz+7+yR3X0JQixARkWqqVJ3Z7v4rkAn8WjHhiIhIZZPIU08LCEZmi4hIDZBIorgbGGxmw8zMSiwtIiJVWiID7p4AfgL+B3jMzP5FMBo7krt7n7IGJyIiqZdIojiWYDW7VaHt5uUXjoiIVDalfeqpKXAZsNnd/1UxIYmISGUSVx+FmdUys8nAOuD/AsvN7NNQ4hARkWos3s7sW4DrgfUE6018DZwKPFtBcYmISCURb9PTVcA3QE93/wXAzJ4DhppZI3f/uYLiExGRFIu3RnEcMK0gSYT8mWCOpg7lHpWIiFQa8SaKwyi6xsSPEcdERKSaKs2Au+jZAwu2NehORKQaK83jsf3NrEXEdgZBsrjUzLpGlXV3f7KswYmISOqVJlFcEXpFG17MPgeUKEREqoF4E0XvCo1CREQqrbgShbt/XNGBiIhI5ZTIXE8iIlIZrP86vNLdA1u28Vl6byCxFe5iUaIQEamKugwqtJm9d0WFXUqJQkSkKsoZFrxCVo4/vcIulcjCRSIiUoMoUYiISExKFCIiEpMShYiIxKREISIiMSlRiIhITDXy8djt27ezceNG9u7dm+pQpAarW7cuzZo1o2HDhqkORSSmGpcotm/fzoYNG8jKyiI9PR0zzZIuyefu7Ny5k7Vr1wIoWUilVuOanjZu3EhWVhYZGRlKEpIyZkZGRgZZWVls3Lgx1eGIxFTjEsXevXtJT09PdRgiAKSnp6sJVCq9lCYKMzvXzL4zs3wzu7uY43eY2TIzW2xmH5jZMeV03fI4jUiZ6XdRqoKUJQozqw38BTgP6AQMMbNOUcW+BHLc/bfAm8BjyY1SRERSWaPoDuS7+wp33wO8DgyMLODu89x9R2hzAdAqyTGKiNR4qUwUWcDqiO01oX2Hci3w9+IOmNn1ZpZnZnmbNm0qxxArp6FDh5KTk5PqMKqtadOmYWb8+uuvqQ5FpFKoEp3ZZnYlkANMKO64u09x9xx3z2natGlyg5Nq5/zzz2f+/PlkZGSkOhSRSiGV4yjWAq0jtluF9hViZmcD9wFnuvvuJMUmpbRz584Kf5osGdcAaNq0KfrCIXJQKmsUi4D2ZtbGzOoBlwNzIguY2YnAs8AAd9fD5jF89dVX9OnTh4yMDI488kh+//vfs2HDhkJlVq1axXnnnUd6ejpt2rRh2rRpDBo0iF69epX6embGE088wW233UbTpk3p0qULALt27eKuu+6idevW1K9fnxNOOIHc3NxC7929ezc33ngjjRo1IjMzk5EjRzJp0qRCTwB99NFHmBlz585lwIABNGjQgFtuuSV8H5dffjmNGzcmIyODfv368d133xW6xsMPP0y7du1IS0ujefPmnHvuuaxfvx4IHpG+8847Ofroo6lfvz5HHXUUF110EXv27AGKb3ravHkzV199NZmZmWRkZNCrVy/y8vIKXTM7O5s777yTJ598klatWnHkkUdy+eWX8/PPP5f68xWpTFJWo3D3fWZ2CzAXqA1MdfelZjYOyHP3OQRNTQ2AN0J/RFa5+4BUxVxZbdq0iV69etGxY0deffVVfv31V+6++2769u1LXl4e9erVw90ZMGAAP//8M1OnTiUtLY0HH3yQTZs20bZt24SuO2HCBM444wxeeuklDhw4AMCgQYNYuHAhY8eOpW3btsyYMYMBAwaQl5dH165dAbjrrruYNm0a48ePp2PHjrzwwgu8/vrrxV7j2muvZdiwYdx2222kpaWxdetWTj/9dDIzM5k8eTIZGRk88sgjnH322Sxfvpz09HRefPFFxo8fz6OPPkrnzp3ZsmULH374If/5z3+AIIm88sorPPLII7Rp04b169eTm5vL/v37D3mvF154Ifn5+Tz++OM0adKECRMm0Lt3b7788kvatWsXLjdjxgx++9vfMmXKFNasWcMdd9zBvffey9NPP53QZyxSGaR0Cg93zwVyo/Y9EPHz2cmIY+zflrLsx+3JuFQRnY5qyOjfdS7TOSZOnAjA3Llzw1NBtG/fnp49ezJz5kyGDBlCbm4u//znP1m4cCEnn3wyAN27dyc7OzvhRNGyZUumT58e3v7ggw945513+OijjzjzzDMBOOecc1i+fDkPPfQQb7zxBlu2bGHKlCmMGzeO22+/HYB+/fpx/PHHF3uNSy+9lAcffDC8/ac//Yn//Oc/fPXVVzRu3BiA0047jezsbKZOncrNN9/MwoULOeecc7jpppvC77v44ovDPy9cuJArrriCq6++Orxv8ODBh7zPd999l88++6zQfZ111llkZ2czYcIEnn322XDZunXrMmvWLOrUCf7XWrZsGa+//roShVRpVaIzW2Ir+MMYOV9Qjx49yM7O5tNPPwVg0aJFtGjRIpwkALKysujWrVvC1+3fv3+h7ffff58WLVpw2mmnsW/fvvCrT58+4Waar7/+ml27djFgwMGKoZnxu9/9rthrnH/++UWu0bdvXxo2bBg+/+GHH063bt3C1+jatSu5ubmMHj2ahQsXFqkpdO3alWnTpvHYY4+xePFi3D3mfS5cuJBmzZqFkwTAYYcdxgUXXBD+fAv07t07nCQAOnXqpAkopcqrcZMCFqes3+hTbd26dXTuXPQemjdvztatWwFYv359sR20TZs25Zdffknous2bNy+0vXnzZtavX0/dunWLlK1du3Y4joLrRscR7zUWLFhQqCZToE+fPgBcc801/PLLL+GaS2ZmJjfccANjx46ldu3a3H///dSqVYunn36aUaNGkZWVxciRI/njH/9YbAzr1q2jWbNmxcZW8PkWaNSoUaHtgma/3bt3F/u5iFQFShTVQMuWLYudWG7Dhg3hGkOLFi0obozJpk2bSEtLS+i60dNPNG7cmKysLGbNmnXI97Ro0SJ83YKmo4LteK8xYMAA/vSnPxUpe/jhhwNQq1Ytbr/9dm6//XZWr17NK6+8wn333UerVq244YYbSEtLY9y4cYwbN47vv/+eyZMnc9ttt3Hcccdx7rnnFjlvrM838h5Eqis1PVUDPXr0YO7cuYVqBosWLWLlypWcfvrpAJx88smsX7+ehQsXhsusXbuWL774otzi6NOnD+vXr6dBgwbk5OQUeQF06dKFtLQ0Zs+eHX6fu/O3v/0t7mssXbqUzp07Fzn/cccdV6R869atufvuu2nXrh3Lli0rcrx9+/Y8/vjj1K9fv9jjEHy+Gzdu5JNPPgnv27FjB++880748xWpzlSjqAbuuOMOnnnmGfr168eoUaPCTz116dKFSy65BAj6E0444QQGDx7Mww8/THp6OmPHjqV58+bUqnXw+8LHH39Mnz59+OCDDwq1ycejb9++9OvXj759+zJq1Cg6d+7M9u3b+eqrr9i1axcPP/wwmZmZXHfddYwePZq6deuGn3ravn17XBPk3XHHHbz88sucddZZ3HrrrWRlZbFhwwY+/vhjTj/9dIYMGcLw4cNp3LgxPXv25IgjjmDevHl8//33PProowBcdNFFdOvWjRNPPJH09HTefPNN9u3bxxlnnFHsNfv168epp57KZZddxiOPPEJmZiaPP/44O3fuZOTIkaX6jESqIiWKaqBp06bMmzePESNGMGTIEOrVq0f//v158sknqVevHhA04cyePZvhw4czbNgwmjdvzn333cebb75ZaASyu7N///4SO3iLY2a89dZbjB8/nkmTJrFq1SoaN25M165dufXWW8PlHnvsMfbu3cuYMWOoVasWf/jDH7j22muZNGlSiddo0qQJCxYs4L777uP222/n559/pmXLlpx++un89re/BeCUU07hueee49lnn2XXrl20a9eO5557jgsvvBCAU089lenTpzNhwgQOHDhAp06dmDlzZsxpUWbNmsWIESO47bbb2LVrF927d+fDDz8s9GisSHVlifxBqMxycnI8eiBUpG+++YaOHTsmMaLKa9u2bRx77LHccsstjB07NqWxnH322ezdu5ePP/44pXGkgn4npTwsHR80g3a+99MSShbPzL5w92K/LalGUYNMnjyZWrVq0b59ezZt2sQTTzzB7t27ueaaa5Iax7x58/j888856aST2Lt3L9OnT+eDDz7gjTfeSGocIhIfJYoaJC0tjUcffZQffvgBM6N79+68//77HHNMuawHFbcGDRowa9YsHn74YXbt2kX79u3D04mISOWjRFGDDB06lKFDh6Y6DE4++WQWLFiQ6jBEJE56PFZERGJSohARkZiUKEREJCYlChERiUmJQkREYlKiEBGRmJQoJG6//vorZsa0adNSGsdnn33GSSedRFpaWlzzQyXDjBkzUv65iFQUjaOQKmf48OE0a9aMuXPnUr9+/VSHAwSJYvPmzZVinIpIeVOiqAb279/P/v37wxMAVnfffvst119/falnt41W0z43kUSp6akKGjp0KDk5OcyaNYvOnTuTlpbG559/zrp167jmmms49thjSU9Pp0OHDtx///3s2bMn/N6VK1diZsyYMYPhw4dzxBFH0KpVK0aPHs2BAwcKXWfmzJl06NCB9PR0zjjjDL799tsisezfv58xY8Zw9NFHU79+fTp37syrr75abLzvvPMOnTp1IiMjg/PPP5+tW7eSn59P7969Oeyww8jJyWHx4sWHvO+PPvoIM2P//v388Y9/xMzC3+BLE0f05wYwe/ZscnJySEtLo0WLFtx1112Fli9ds2YNgwcPplmzZqSnp9O2bdvw4klDhw5l5syZfPzxx5gZZsaYMWNK/ocUqSrcvVq9unXr5rEsW7Ys5vGq4Oqrr/bMzExv3769v/TSS/6Pf/zDV69e7YsXL/YRI0b422+/7R999JFPmTLFjzrqKL/++uvD7/33v//tgB9zzDF+xx13+HvvveejRo1ywKdPnx4u98UXX3jt2rV90KBBnpub64899pi3adPGAX/hhRfC5e69916vU6eOP/jgg/7uu+/6dddd54C/+uqrheJt2rSpn3TSST5z5kx/6aWXvFGjRn7JJZd4t27dfPLkyZ6bm+snnHCCd+zY0Q8cOFDsfW/bts3nz5/vgI8YMcLnz5/v+fn5pYqjuM9t+vTpXqtWLb/xxht97ty5/vTTT/sRRxzhI0aMCL+3d+/e3rNnT3/77bd93rx5/vzzz/vIkSPd3T0/P9979+7tJ554os+fP9/nz5/vq1evjvvfszr8TkrqLXnoNF/y0GkJvx/I80P8XVXTE8Df74b1X6fm2i26wHmPlPptW7Zs4f3336dr167hfa1ateLxxx8Pb5922mkcdthhXHPNNfz5z38u1MRyxhlnMHHiRCBYcOjdd9/lrbfeYvDgwQA88sgjdOjQgRkzZmBmnHfeeezZs4f7778/fI6tW7cyadIk7r///vD+fv36sWbNGsaMGcOQIUMKlZ0/fz5t27YFYPHixUyYMIG//vWvXHXVVUDwpeX888/n22+/LXba7YYNG9KzZ08AsrOzwz+XJo7oz83dGTlyJFdddRVPP/10uFz9+vW5+eabueeee8jMzGThwoW89tpr/O53vwOgV69e4bJt27alcePGHDhwIByTSHWipqcqKisrq1CSgOCP3qRJk+jUqRPp6enUrVuX3//+9+zevZtVq1YVKnvOOecU2u7UqRNr1qwJby9cuJABAwYUeqro4osvLvSeJUuWsGPHDi699NJC+y+77DKWL19eaB3s7OzscJIAwgv+nHXWWUX2rV27tsT7TzSO6M9t+fLlrFq1isGDB7Nv377w66yzzmLXrl0sWbIEgK5du3LPPfcwbdq0Ip+lSHWnGgUk9I0+1Zo3b15k36RJkxg5ciSjRo3izDPP5Mgjj2TRokXcfPPN7Nq1q1DZRo0aFdquV69eoTLr16+nWbNmhcpEb69bt67YWAq2t27dStOmTQ95vej9BfuiYy1JaeKILrN582YgWCq2OKtXrwZg+vTphVbVO+GEE5g4cSJ9+vQpVawiVZESRRVV3PiBN954g0GDBvHQQw+F9y1btiyh87do0YKNGzcW2he93bJly/D+zMzM8P4NGzYA0Lhx44SuXVqliSP6cys4NmXKFE488cQi527Tpg0Q1ESmTZvGgQMHWLhwIWPGjGHAgAGsWrWq0DVFqiM1PVUjO3fuLDKu4JVXXknoXCeffDJz5swptHb2W2+9VajM8ccfT0ZGRpGV6WbMmEGHDh3C3+IrWlniOO6448jKymLlypXk5OQUeUUngVq1atGzZ09Gjx7Njh07+OGHH4CiNTKR6kQ1imqkb9++PPXUU/To0YO2bdvyyiuvkJ+fn9C5Ro0aRY8ePRg8eDDXXnstS5Ys4fnnny9UpnHjxtx2223813/9F3Xq1CEnJ4e33nqL3NxcXnvttfK4pbiUJY5atWoxceJE/vCHP7B9+3bOO+886tWrx4oVK5g1axZvvvkme/fupV+/flx11VV06NCB3bt3M3HiRFq0aBHudP/Nb37D7NmzmTVrFq1ateKoo47iqKOOSsbti1Q4JYpq5IEHHmDTpk3hJ38uvvhinnrqqfCTOqWRk5PD66+/zj333MOFF15ITk4O06dPp3v37oXKjRs3jjp16vDMM8+wYcMG2rVrx8svv8zll19eLvcUr7LEcdlll9GwYUPGjx/P1KlTqV27NsceeywXXHAB9erVo3bt2nTp0oX//u//ZvXq1WRkZNCzZ0/ee+890tPTAbjpppv48ssvueaaa/jpp58YPXq0xlJItWGRTQvVQU5Ojufl5R3y+DfffFPso5ciqaLfSSkPS8efDkDnez9N6P1m9oW75xR3TH0UIiISkxKFiIjElNJEYWbnmtl3ZpZvZncXc7y+mU0PHf/czLJTEKaISI2WskRhZrWBvwDnAZ2AIWbWKarYtcBP7t4OeBJ4NLlRiohIKmsU3YF8d1/h7nuA14GBUWUGAn8N/fwm0MfKYaWa6taBL1WXfhelKkhlosgCVkdsrwntK7aMu+8DtgFFhsGa2fVmlmdmeZHz+hSnbt267Ny5syxxi5SbnTt3Urdu3VSHIdXAL4068kujinl6rlqMo3D3KcAUCB6PjVW2WbNmrF27lqysLNLT0yvNUppSs7g7O3fuZO3atcXO2yVSWj1veq7Czp3KRLEWaB2x3Sq0r7gya8ysDnAEsKUsF23YsCEAP/74Y6GFaUSSrW7dujRv3jz8OylSWaUyUSwC2ptZG4KEcDlwRVSZOcDVwHxgEPChl0OjbsOGDfU/p4hInFKWKNx9n5ndAswFagNT3X2pmY0jWGlpDvA88JKZ5QNbCZKJiIgkUUr7KNw9F8iN2vdAxM+7gEuj3yciIsmjkdkiIhKTEoWIiMSkRCEiIjEpUYiISEzVbj0KM9sE/JDg25sAm8sxnKpA91wz6J5rhrLc8zHuXuy6wdUuUZSFmeUdauGO6kr3XDPonmuGirpnNT2JiEhMShQiIhKTEkVhU1IdQAronmsG3XPNUCH3rD4KERGJSTUKERGJSYlCRERiqpGJwszONbPvzCzfzO4u5nh9M5seOv65mWWnIMxyFcc932Fmy8xssZl9YGbHpCLO8lTSPUeUu8TM3Myq/KOU8dyzmQ0O/VsvNbNXkx1jeYvjd/toM5tnZl+Gfr/7pyLO8mJmU81so5ktOcRxM7OnQp/HYjM7qcwXdfca9SKY0vxfwLFAPeCfQKeoMjcBk0M/Xw5MT3XcSbjn3kBG6Ocba8I9h8odDnwCLAByUh13Ev6d2wNfAkeGtpulOu4k3PMU4MbQz52AlamOu4z3fAZwErDkEMf7A38HDOgJfF7Wa9bEGkV3IN/dV7j7HuB1YGBUmYHAX0M/vwn0saq9ZmqJ9+zu89x9R2hzAcGKg1VZPP/OAA8CjwK7khlcBYnnnq8D/uLuPwG4+8Ykx1je4rlnBwpWKjsC+DGJ8ZU7d/+EYH2eQxkIvOiBBUAjM2tZlmvWxESRBayO2F4T2ldsGXffB2wDMpMSXcWI554jXUvwjaQqK/GeQ1Xy1u7+TjIDq0Dx/Dt3ADqY2WdmtsDMzk1adBUjnnseA1xpZmsI1r+5NTmhpUxp/38vUUoXLpLKx8yuBHKAM1MdS0Uys1rAE8DQFIeSbHUImp96EdQaPzGzLu7+cyqDqmBDgGnuPtHMTiFYNfN4dz+Q6sCqippYo1gLtI7YbhXaV2wZM6tDUF3dkpToKkY894yZnQ3cBwxw991Jiq2ilHTPhwPHAx+Z2UqCttw5VbxDO55/5zXAHHff6+7/BpYTJI6qKp57vhaYAeDu84E0gsnzqqu4/n8vjZqYKBYB7c2sjZnVI+isnhNVZg5wdejnQcCHHuolqqJKvGczOxF4liBJVPV2ayjhnt19m7s3cfdsd88m6JcZ4O55qQm3XMTzuz2LoDaBmTUhaIpakcQYy1s897wK6ANgZh0JEsWmpEaZXHOAq0JPP/UEtrn7urKcsMY1Pbn7PjO7BZhL8MTEVHdfambjgDx3nwM8T1A9zSfoNLo8dRGXXZz3PAFoALwR6rdf5e4DUhZ0GcV5z9VKnPc8FzjHzJYB+4GR7l5la8tx3vMI4Dkzu52gY3toVf7iZ2avEST7JqF+l9FAXQB3n0zQD9MfyAd2AMPKfM0q/HmJiEgS1MSmJxERKQUlChERiUmJQkREYlKiEBGRmJQoREQkJiUKkSrGzIaGZrvtFbGvV2jf0JQFJtWWEoUIhf7QRr5+NbP/Z2a3h0boi9RI+uUXKew1ggFLBrQAriKYE6ojcH0K4xJJGSUKkcL+n7u/XLBhZk8D3wL/y8zuc/fqPPWDSLHU9CQSg7v/h2AeKAPaFuw3s5Zm9oyZrTKzPWb2o5lNMbNm0ecws4Zm9pCZfWNmu8xsi5l9amaXR5T5jZk9HVp17hcz22FmX5jZ/0rKjYrEoBqFSMkKEsRWCJbWBOYTrKj2PMEKa+0IVgbsbWY57r4tVLYR8CnQmWARrGcI5iQ6EbiAYKEdCObuOQP438C/gcOASwnmKGrq7g9X6B2KxKBEIVJYRmhW1YI+ihsI/qgvdPfloTJ/JpiE7UR3X1PwRjN7g6D2cTvBYjkA4wmSxHB3nxJ5odCaGAVeCk3oFnn8SeBD4G4ze9zd95bPLYqUjpqeRAobSzAF9UZgMcH66W8RWl7TzI4gqAnMAXaZWZOCF7CSYMbOc0JlaxHMPPxNdJIAiFw4J9TEReh9aWaWCTQG3iNYxvM35X6nInFSjUKksCnAGwQ1hi7AKIKFXwrW1D6O4AvWtaFXcQrWd2gCHAm8W9JFzawBQS1kMIUXnSlwZFzRi1QAJQqRwr539/dDP//dzD4l6GOYTFA7sNCxl4G/HuIcOxO47qsENZUpwCcEKyruJ1hX4HZU+5cUUqIQicHd/6+ZvUSwYthTwHcEi9/Ui0goh7IZ+Ak4IVahUIf3BQT9FDdEHTs70dhFyou+pYiU7EGCb/fjQqvB5QIXh5aZLCS0/GRTCPdBvAZ0MrMizVQWWkowdG44WFspON4S0OOxknKqUYiUwN3zzex14Pdm9v8RPAb7KfCJmb0IfEnwpetYgk7vFzn41NP9wFnA/5jZOaH3GcGTVHWAP7j7L2b2HnClme0kWAf6GGA4waOymUm5UZFDUKIQic9DwBCCWkVvM+tG0NE9ELiSoLN7NfA3YEbBm9z9JzM7BbgXuBi4CPgFWEbwmG2BK4FHgN8BVwPfA/cBe4EXKvTOREqgNbNFRCQm9VGIiEhMShQiIhKTEoWIiMSkRCEiIjEpUYiISExKFCIiEpMShYiIxKREISIiMSlRiIhITP8/OPsvw2LPtM8AAAAASUVORK5CYII=\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "from sklearn.metrics import roc_curve\n", "\n", "y_pred_prob_lr = lr.predict_proba(X_test) # predicted probabilities\n", "fpr_lr, tpr_lr, _ = roc_curve(y_test, y_pred_prob_lr[:,1])\n", "\n", "y_pred_prob_rf = rf.predict_proba(X_test) # predicted probabilities\n", "fpr_rf, tpr_rf, _ = roc_curve(y_test, y_pred_prob_rf[:,1])\n", "\n", "plt.plot(tpr_lr, 1-fpr_lr, label=\"log. regression\")\n", "plt.plot(tpr_rf, 1-fpr_rf, label=\"random forest\")\n", "\n", "plt.xlabel('Recall', fontsize=18)\n", "plt.ylabel('Precision', fontsize=18);\n", "plt.legend(fontsize=15)\n", "\n", "plt.savefig(\"03_ml_basics_log_regr_heart_disease.pdf\")" ] }, { "cell_type": "code", "execution_count": 89, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Area under Curve (AUC) scores: 0.81, 0.81\n" ] } ], "source": [ "from sklearn.metrics import roc_auc_score\n", "auc_lr = roc_auc_score(y_test,y_pred_lr)\n", "auc_rf = roc_auc_score(y_test,y_pred_rf)\n", "print(f\"Area under Curve (AUC) scores: {auc_lr:.2f}, {auc_rf:.2f}\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Check wether data preprocessing makes a difference in this case" ] }, { "cell_type": "code", "execution_count": 93, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CPU times: user 22.4 ms, sys: 2.89 ms, total: 25.3 ms\n", "Wall time: 27.6 ms\n", " precision recall f1-score support\n", "\n", " 0 0.80 0.81 0.81 70\n", " 1 0.84 0.83 0.83 82\n", "\n", " accuracy 0.82 152\n", " macro avg 0.82 0.82 0.82 152\n", "weighted avg 0.82 0.82 0.82 152\n", "\n" ] } ], "source": [ "from sklearn.pipeline import make_pipeline\n", "from sklearn.preprocessing import StandardScaler\n", "pipe = make_pipeline(StandardScaler(), LogisticRegression(penalty='none', fit_intercept=False, max_iter=5000, tol=1E-5))\n", "%time pipe.fit(X_train, y_train)\n", "y_pred_pipe = pipe.predict(X_test)\n", "print(classification_report(y_test, y_pred_pipe))" ] }, { "cell_type": "code", "execution_count": 91, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " precision recall f1-score support\n", "\n", " 0 0.83 0.75 0.79 69\n", " 1 0.81 0.87 0.84 83\n", "\n", " accuracy 0.82 152\n", " macro avg 0.82 0.81 0.81 152\n", "weighted avg 0.82 0.82 0.81 152\n", "\n" ] } ], "source": [ "print(classification_report(y_test, y_pred_lr))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5" } }, "nbformat": 4, "nbformat_minor": 4 }