{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Simple example of logistic regression with scikit-learn" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "import matplotlib.pyplot as plt" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Read data \n", "Data are from the [wikipedia article on logistic regression](https://en.wikipedia.org/wiki/Logistic_regression)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "# data: 1. hours studies, 2. passed (0/1) \n", "filename = \"https://www.physi.uni-heidelberg.de/~reygers/lectures/2021/ml/data/exam.txt\"\n", "df = pd.read_csv(filename, engine='python', sep='\\s+')" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "x_tmp = df['hours_studied'].values\n", "x = np.reshape(x_tmp, (-1, 1))\n", "y = df['passed'].values" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Fit the model" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "from sklearn.linear_model import LogisticRegression\n", "clf = LogisticRegression(penalty='none', fit_intercept=True)\n", "clf.fit(x, y);" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Calculate predictions" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "hours_studied_tmp = np.linspace(0., 6., 1000)\n", "hours_studied = np.reshape(hours_studied_tmp, (-1, 1))\n", "y_pred = clf.predict_proba(hours_studied)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Plot result" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "df.plot.scatter(x='hours_studied', y='passed')\n", "plt.plot(hours_studied, y_pred[:,1])\n", "plt.xlabel(\"preparation time in hours\", fontsize=14)\n", "plt.ylabel(\"probability of passing exam\", fontsize=14)\n", "plt.savefig(\"03_ml_basics_logistic_regression.pdf\")" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'C': 1.0,\n", " 'class_weight': None,\n", " 'dual': False,\n", " 'fit_intercept': True,\n", " 'intercept_scaling': 1,\n", " 'l1_ratio': None,\n", " 'max_iter': 100,\n", " 'multi_class': 'auto',\n", " 'n_jobs': None,\n", " 'penalty': 'none',\n", " 'random_state': None,\n", " 'solver': 'lbfgs',\n", " 'tol': 0.0001,\n", " 'verbose': 0,\n", " 'warm_start': False}" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "clf.get_params()" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Coefficient: [[1.50464522]]\n", "Intercept: [-4.07771764]\n" ] } ], "source": [ "print('Coefficient: ', clf.coef_)\n", "print('Intercept: ', clf.intercept_)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5" } }, "nbformat": 4, "nbformat_minor": 4 }