From 530e313b90632818201f99a5e32964fdd9d41a58 Mon Sep 17 00:00:00 2001 From: Joerg Marks Date: Mon, 3 Apr 2023 12:26:38 +0200 Subject: [PATCH] updates --- notebooks/02_fit_exp_fit_iMinuit.ipynb | 284 ++++++++++ notebooks/02_fit_histFit.ipynb | 126 +++++ notebooks/02_fit_scikitFit.ipynb | 147 ++++++ notebooks/03_ml_basics_activation.ipynb | 148 ++++++ ...sics_ex_2_display_Cifar10_Greyscale.ipynb} | 0 .../03_ml_basics_simple_neural_network.ipynb | 493 ++++++++++++++++++ ...le_neural_network_exercise_solution.ipynb} | 0 7 files changed, 1198 insertions(+) create mode 100644 notebooks/02_fit_exp_fit_iMinuit.ipynb create mode 100644 notebooks/02_fit_histFit.ipynb create mode 100644 notebooks/02_fit_scikitFit.ipynb create mode 100644 notebooks/03_ml_basics_activation.ipynb rename notebooks/{03_ml_basics_ex_1_display_Cifar10_Greyscale.ipynb => 03_ml_basics_ex_2_display_Cifar10_Greyscale.ipynb} (100%) create mode 100755 notebooks/03_ml_basics_simple_neural_network.ipynb rename notebooks/{simple_neural_network_exercise_solution.ipynb => 03_ml_basics_simple_neural_network_exercise_solution.ipynb} (100%) diff --git a/notebooks/02_fit_exp_fit_iMinuit.ipynb b/notebooks/02_fit_exp_fit_iMinuit.ipynb new file mode 100644 index 0000000..9eb6b7d --- /dev/null +++ b/notebooks/02_fit_exp_fit_iMinuit.ipynb @@ -0,0 +1,284 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Example fit for the usage of iminuit" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from matplotlib import pyplot as plt\n", + "plt.rcParams[\"font.size\"] = 20\n", + "import numpy as np" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Data " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "x = np.array([0.2,0.4,0.6,0.8,1.,1.2,1.4,1.6,1.8,2.,2.2,2.4,2.6,2.8,3.,3.2,3.4,3.6,3.8,4.],dtype='d')\n", + "dy = np.array([0.04,0.021,0.035,0.03,0.029,0.019,0.024,0.018,0.019,0.022,0.02,0.025,0.018,0.024,0.019,0.021,0.03,0.019,0.03,0.024 ], dtype='d')\n", + "y = np.array([1.792,1.695,1.541,1.514,1.427,1.399,1.388,1.270,1.262,1.228,1.189,1.182,1.121,1.129,1.124,1.089,1.092,1.084,1.058,1.057 ], dtype='d')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Define fit functions -an exponential" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def xp(a, b , c):\n", + " return a * np.exp(b*x) + c" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "least-squares function: sum of data residuals squared" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def LS(a,b,c):\n", + " return np.sum((y - xp(a,b,c)) ** 2 / dy ** 2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "import Minuit object" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from iminuit import Minuit" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Minuit instance using LS function to minimize" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "LS.errordef = Minuit.LEAST_SQUARES\n", + "m = Minuit(LS, a=0.9, b=-0.7 , c=0.95)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Run migrad , parameter c is now fixed" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "m.migrad()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "release fix on \"c\" and minimize again" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "m.fixed[\"c\"] = False\n", + "m.migrad()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Get covariance information" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "m.hesse()\n", + "m.params\n", + "m.covariance" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Copy covariance information to numpy arrays" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Get a 2D contour of the function around the minimum for 2 parameters" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "m.minos()\n", + "print (m.merrors['a']) # Print control information of parameter a\n", + "m.draw_profile('b', subtract_min=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The Minos algorithm uses the profile likelihood method to compute (generally asymmetric) confidence intervals. This can be plotted" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "m.draw_mncontour('a', 'b')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Access fit results" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(m.values,m.errors)\n", + "a_fit = m.values[\"a\"]\n", + "b_fit = m.values[\"b\"]\n", + "c_fit = m.values[\"c\"]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Prepare data to display fitted function " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "x_plot = np.linspace( 0.1, 4.5 , 100 )\n", + "y_fit = a_fit * np.exp(b_fit*x_plot) + c_fit " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "plot data and fit results with matplotlib" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure()\n", + "plt.errorbar(x, y, dy , fmt=\"o\")\n", + "plt.plot(x_plot, y_fit)\n", + "plt.title(\"iminuit exponential Fit\")\n", + "plt.xlim(-0.1, 4.1)\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebooks/02_fit_histFit.ipynb b/notebooks/02_fit_histFit.ipynb new file mode 100644 index 0000000..b7dac4e --- /dev/null +++ b/notebooks/02_fit_histFit.ipynb @@ -0,0 +1,126 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Test Histogramm Fitting in pyROOT" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import math\n", + "#import ROOT\n", + "from ROOT import TCanvas, TFile, TProfile, TNtuple, TH1D, TH2D, TF1 \n", + "from ROOT import gROOT, gBenchmark, gRandom, gSystem" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "Read data previously used from text file and put it in a numpy array" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "data = np.genfromtxt('D0Mass.txt', dtype='d')\n", + "print(data)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create histogramm and draw" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "d0 = TH1D( 'd0', 'D0 Mass ', 200, 1700. , 2000. )\n", + "for x in data :\n", + " d0.Fill(x)\n", + "c = TCanvas( 'myC', 'Dynamic Filling Example',700, 500 )\n", + "d0.Draw()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To display the notebook draw the canvas." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# %jsroot on\n", + "c.Draw()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "perform Fit" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "func = TF1(\"func\", 'gaus', 1840. , 1880.)\n", + "myfit = d0.Fit(func,\"S\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "c.Draw()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.4" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebooks/02_fit_scikitFit.ipynb b/notebooks/02_fit_scikitFit.ipynb new file mode 100644 index 0000000..ccfc503 --- /dev/null +++ b/notebooks/02_fit_scikitFit.ipynb @@ -0,0 +1,147 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Fit 3rd order Polynomial to graph data using scikit-learn, more infos\n", + "https://www.datatechnotes.com/2018/06/polynomial-regression-fitting-in-python.html" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from matplotlib import pyplot as plt\n", + "plt.rcParams[\"font.size\"] = 20\n", + "\n", + "import numpy as np\n", + "\n", + "from sklearn.linear_model import LinearRegression\n", + "from sklearn.preprocessing import PolynomialFeatures" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "x = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10], dtype='d')\n", + "dx = np.array([0.1,0.1,0.5,0.1,0.5,0.1,0.5,0.1,0.5,0.1], dtype='d')\n", + "y = np.array([1.1 ,2.3 ,2.7 ,3.2 ,3.1 ,2.4 ,1.7 ,1.5 ,1.5 ,1.7 ], dtype='d')\n", + "dy = np.array([0.15,0.22,0.29,0.39,0.31,0.21,0.13,0.15,0.19,0.13], dtype='d')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " building polynomial model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "polyModel = PolynomialFeatures(degree = 3)\n", + "xpol = polyModel.fit_transform(x.reshape(-1, 1))\n", + "preg = polyModel.fit(xpol,y)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Building linear model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "linearModel = LinearRegression(fit_intercept = True)\n", + "linearModel.fit(xpol, y[:, np.newaxis])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Plotting\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "x_plot = np.linspace(0.1,10.1,200)\n", + "polyfit = linearModel.predict(preg.fit_transform(x_plot.reshape(-1, 1)))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " plot data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure()\n", + "plt.errorbar(x, y, dy , dx, fmt=\"o\")\n", + "plt.plot(x_plot, polyfit )\n", + "plt.title(\"scikit-learn Fit Test\")\n", + "plt.xlim(-0.1, 10.1)\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebooks/03_ml_basics_activation.ipynb b/notebooks/03_ml_basics_activation.ipynb new file mode 100644 index 0000000..9ee5845 --- /dev/null +++ b/notebooks/03_ml_basics_activation.ipynb @@ -0,0 +1,148 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "d0ce4228", + "metadata": {}, + "source": [ + "# plot activation functions\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3c8093a", + "metadata": {}, + "outputs": [], + "source": [ + "# Importing the required libraries\n", + "import math\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a134fb45", + "metadata": {}, + "outputs": [], + "source": [ + "# The definition of activation functions mathematically\n", + "# Sigmoid Function\n", + "def sigmoid(x):\n", + " a = []\n", + " for i in x:\n", + " a.append(1/(1+math.exp(-i)))\n", + " return a" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "954c32ed", + "metadata": {}, + "outputs": [], + "source": [ + "# Hyperbolic Tanjant Function\n", + "def tanh(x, derivative=False):\n", + " if (derivative == True):\n", + " return (1 - (x ** 2))\n", + " return np.tanh(x)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2c0e5136", + "metadata": {}, + "outputs": [], + "source": [ + "# ReLU Function\n", + "def re(x):\n", + " b = []\n", + " for i in x:\n", + " if i<0:\n", + " b.append(0)\n", + " else:\n", + " b.append(i)\n", + " return b" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f29cac64", + "metadata": {}, + "outputs": [], + "source": [ + "# Leaky ReLU Function\n", + "def lr(x):\n", + " b = []\n", + " for i in x:\n", + " if i<0:\n", + " b.append(i/10)\n", + " else:\n", + " b.append(i)\n", + " return b\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1854dc7b", + "metadata": {}, + "outputs": [], + "source": [ + "# Determining the intervals to be created for the graph\n", + "x = np.arange(-3., 3., 0.1)\n", + "sig = sigmoid(x)\n", + "tanh = tanh(x)\n", + "relu = re(x)\n", + "leaky_relu = lr(x)\n", + "swish = sig*x" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9c535ccb", + "metadata": {}, + "outputs": [], + "source": [ + "# Displaying the functions\n", + "line_1, = plt.plot(x,sig, label='Sigmoid')\n", + "line_2, = plt.plot(x,tanh, label='Tanh')\n", + "line_3, = plt.plot(x,relu, label='ReLU')\n", + "line_4, = plt.plot(x,leaky_relu, label='Leaky ReLU')\n", + "line_5, = plt.plot(x,swish, label='Swish')\n", + "plt.legend(handles=[line_1, line_2, line_3, line_4, line_5])\n", + "plt.axhline(y=0, color='k')\n", + "plt.axvline(x=0, color='k')\n", + "plt.show()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/03_ml_basics_ex_1_display_Cifar10_Greyscale.ipynb b/notebooks/03_ml_basics_ex_2_display_Cifar10_Greyscale.ipynb similarity index 100% rename from notebooks/03_ml_basics_ex_1_display_Cifar10_Greyscale.ipynb rename to notebooks/03_ml_basics_ex_2_display_Cifar10_Greyscale.ipynb diff --git a/notebooks/03_ml_basics_simple_neural_network.ipynb b/notebooks/03_ml_basics_simple_neural_network.ipynb new file mode 100755 index 0000000..8fbd480 --- /dev/null +++ b/notebooks/03_ml_basics_simple_neural_network.ipynb @@ -0,0 +1,493 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# A simple neural network with one hidden layer in pure Python\n", + "\n", + "## Introduction\n", + "We consider a simple feed-forward neural network with one hidden layer:" + ] + }, + { + "attachments": { + "48b1ed6e-8e2b-4883-82ac-a2bbed6e2885.png": { + "image/png": "" + } + }, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![nn.png](attachment:48b1ed6e-8e2b-4883-82ac-a2bbed6e2885.png)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In this example the input vector of the neural network has two features, i.e., the input is a two-dimensional vector:\n", + "\n", + "$$\n", + "\\mathbf x = (x_0, x_1).\n", + "$$\n", + "\n", + "We consider a set of $n$ vectors as training data. The training data can therefore be written as a $n \\times 2$ matrix where each row represents a feature vector:\n", + "\n", + "$$ \n", + "X = \n", + "\\begin{pmatrix}\n", + "x_{00} & x_{01} \\\\\n", + "x_{10} & x_{11} \\\\\n", + "\\vdots & \\vdots \\\\\n", + "x_{m-1\\,0} & x_{m-1\\,1} \n", + "\\end{pmatrix} $$\n", + "\n", + "The known labels (1 = 'signal', 0 = 'background') are stored in a $n$-dimensional column vector $\\mathbf y$.\n", + "\n", + "In the following, $n_1$ denotes the number of neurons in the hidden layer. The weights for the connections from the input layer (layer 0) to the hidden layer (layer 0) are given by the following matrix:\n", + "\n", + "$$\n", + "W^{(1)} = \n", + "\\begin{pmatrix}\n", + "w_{00}^{(1)} \\dots w_{0 \\, n_1-1}^{(1)} \\\\\n", + "w_{10}^{(1)} \\dots w_{1 \\, n_1-1}^{(1)} \n", + "\\end{pmatrix}\n", + "$$\n", + "\n", + "Each neuron in the hidden layer is assigned a bias $\\mathbf b^{(1)} = (b^{(1)}_0, \\ldots, b^{(1)}_{n_1-1})$. The neuron in the output layer has the bias $\\mathbf b^{(2)}$. With that, the output values of the network for the matrix $X$ of input feature vectors is given by\n", + "\n", + "$$\n", + "\\begin{align}\n", + "Z^{(1)} &= X W^{(1)} + \\mathbf b^{(1)} \\\\\n", + "A^{(1)} &= \\sigma(Z^{(1)}) \\\\\n", + "Z^{(2)} &= A^{(1)} W^{(2)} + \\mathbf b^{(2)} \\\\\n", + "A^{(2)} &= \\sigma(Z^{(2)})\n", + "\\end{align}\n", + "$$\n", + "\n", + "The loss function for a given set of weights is given by\n", + "\n", + "$$ L = \\sum_{i=0}^{n-1} (y_\\mathrm{pred} - y_\\mathrm{true})^2 $$\n", + "\n", + "We can know calculate the gradient of the loss function w.r.t. the wights. With the definition $\\hat L = (y_\\mathrm{pred} - y_\\mathrm{true})^2$, the gradients for the weights from the output layer to the hidden layer are given by: \n", + "\n", + "$$ \\frac{\\partial \\tilde L}{\\partial w_i^{(2)}} = \\frac{\\partial \\tilde L}{a_k^{(2)}} \\frac{a_k^{(2)}}{\\partial w_i^{(2)}} = \\frac{\\partial \\tilde L}{a_k^{(2)}} \\frac{a_k^{(2)}}{z_k^{(2)}} \\frac{z_k^{(2)}}{\\partial w_i^{(2)}} = 2 (a_k^{(2)} - y_k) a_k^{(2)} (1 - a_k^{(2)}) a_{k,i}^{(1)}$$\n", + "\n", + "Applying the chain rule further, we also obtain the gradient for the weights from the input layer to the hidden layer read: \n", + "\n", + "$$ \\frac{\\partial \\tilde L}{\\partial w_{ij}^{(1)}} = \\frac{\\partial \\tilde L}{\\partial a_k^{(2)}} \\frac{\\partial a_k^{(2)}}{\\partial z_k^{(2)}} \\frac{\\partial z_k^{(2)}}{\\partial a_{k,j}^{(1)}} \\frac{\\partial a_{k,j}^{(1)}}{\\partial z_{k,j}^{(1)}} \\frac{\\partial z_{k,j}^{(1)}}{\\partial w_{ij}^{(1)}} $$" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## A simple neural network class" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "# A simple feed-forward neutral network with on hidden layer\n", + "# see also https://towardsdatascience.com/how-to-build-your-own-neural-network-from-scratch-in-python-68998a08e4f6\n", + "\n", + "import numpy as np\n", + "\n", + "class NeuralNetwork:\n", + " def __init__(self, x, y):\n", + " n1 = 3 # number of neurons in the hidden layer\n", + " self.input = x\n", + " self.weights1 = np.random.rand(self.input.shape[1],n1)\n", + " self.bias1 = np.random.rand(n1)\n", + " self.weights2 = np.random.rand(n1,1)\n", + " self.bias2 = np.random.rand(1) \n", + " self.y = y\n", + " self.output = np.zeros(y.shape)\n", + " self.learning_rate = 0.01\n", + " self.n_train = 0\n", + " self.loss_history = []\n", + "\n", + " def sigmoid(self, x):\n", + " return 1/(1+np.exp(-x))\n", + "\n", + " def sigmoid_derivative(self, x):\n", + " return x * (1 - x)\n", + "\n", + " def feedforward(self):\n", + " self.layer1 = self.sigmoid(self.input @ self.weights1 + self.bias1)\n", + " self.output = self.sigmoid(self.layer1 @ self.weights2 + self.bias2)\n", + "\n", + " def backprop(self):\n", + "\n", + " # delta1: [m, 1], m = number of training data\n", + " delta1 = 2 * (self.y - self.output) * self.sigmoid_derivative(self.output)\n", + "\n", + " # Gradient w.r.t. weights from hidden to output layer: [n1, 1] matrix, n1 = # neurons in hidden layer\n", + " d_weights2 = self.layer1.T @ delta1\n", + " d_bias2 = np.sum(delta1) \n", + " \n", + " # shape of delta2: [m, n1], m = number of training data, n1 = # neurons in hidden layer\n", + " delta2 = (delta1 @ self.weights2.T) * self.sigmoid_derivative(self.layer1)\n", + " d_weights1 = self.input.T @ delta2\n", + " d_bias1 = np.ones(delta2.shape[0]) @ delta2 \n", + " \n", + " # update weights and biases\n", + " self.weights1 += self.learning_rate * d_weights1\n", + " self.weights2 += self.learning_rate * d_weights2\n", + "\n", + " self.bias1 += self.learning_rate * d_bias1\n", + " self.bias2 += self.learning_rate * d_bias2\n", + "\n", + " def train(self, X, y):\n", + " self.output = np.zeros(y.shape)\n", + " self.input = X\n", + " self.y = y\n", + " self.feedforward()\n", + " self.backprop()\n", + " self.n_train += 1\n", + " if (self.n_train %1000 == 0):\n", + " loss = np.sum((self.y - self.output)**2)\n", + " print(\"loss: \", loss)\n", + " self.loss_history.append(loss)\n", + " \n", + " def predict(self, X):\n", + " self.output = np.zeros(y.shape)\n", + " self.input = X\n", + " self.feedforward()\n", + " return self.output\n", + " \n", + " def loss_history(self):\n", + " return self.loss_history\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create toy data\n", + "We create three toy data sets\n", + "1. two moon-like distributions\n", + "2. circles\n", + "3. linearly separable data sets" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "# https://scikit-learn.org/stable/auto_examples/classification/plot_classifier_comparison.html#sphx-glr-auto-examples-classification-plot-classifier-comparison-py\n", + "import numpy as np\n", + "from sklearn.datasets import make_moons, make_circles, make_classification\n", + "from sklearn.model_selection import train_test_split\n", + "\n", + "X, y = make_classification(\n", + " n_features=2, n_redundant=0, n_informative=2, random_state=1, n_clusters_per_class=1\n", + ")\n", + "rng = np.random.RandomState(2)\n", + "X += 2 * rng.uniform(size=X.shape)\n", + "linearly_separable = (X, y)\n", + "\n", + "datasets = [\n", + " make_moons(n_samples=200, noise=0.1, random_state=0),\n", + " make_circles(n_samples=200, noise=0.1, factor=0.5, random_state=1),\n", + " linearly_separable,\n", + "]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create training and test data set" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "# datasets: 0 = moons, 1 = circles, 2 = linearly separable\n", + "X, y = datasets[1]\n", + "X_train, X_test, y_train, y_test = train_test_split(\n", + " X, y, test_size=0.4, random_state=42\n", + ")\n", + "\n", + "x_min, x_max = X[:, 0].min() - 0.5, X[:, 0].max() + 0.5\n", + "y_min, y_max = X[:, 1].min() - 0.5, X[:, 1].max() + 0.5\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Train the model" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "loss: 28.591431249971087\n", + "loss: 19.174944855091578\n", + "loss: 18.300519116661075\n", + "loss: 5.44035901972833\n", + "loss: 2.2654992441410906\n", + "loss: 1.6923656607186892\n", + "loss: 1.3715971480249087\n", + "loss: 1.1473150221090382\n", + "loss: 0.9774346378363713\n", + "loss: 0.8457117685917934\n", + "loss: 0.7429652120737472\n", + "loss: 0.6621808985042399\n", + "loss: 0.5977165926831687\n", + "loss: 0.545283043346378\n", + "loss: 0.5017902977940301\n", + "loss: 0.46506515287723293\n", + "loss: 0.4335772706016494\n", + "loss: 0.40623169342909965\n", + "loss: 0.3822273847227754\n", + "loss: 0.36096446182458697\n", + "loss: 0.3419836665195889\n", + "loss: 0.3249263905044797\n", + "loss: 0.3095077414631703\n", + "loss: 0.29549797484687557\n", + "loss: 0.282709394404349\n", + "loss: 0.27098690712728085\n", + "loss: 0.2602010759266338\n", + "loss: 0.2502429170283057\n", + "loss: 0.24101994107129043\n", + "loss: 0.23245309736167535\n", + "loss: 0.2244743850815736\n", + "loss: 0.2170249645242441\n", + "loss: 0.21005364833790718\n", + "loss: 0.2035156851277511\n", + "loss: 0.19737177048767093\n", + "loss: 0.19158723674048994\n", + "loss: 0.18613138439559326\n", + "loss: 0.1809769269368725\n", + "loss: 0.17609952694233805\n", + "loss: 0.17147740633361158\n", + "loss: 0.16709101719249247\n", + "loss: 0.16292276236867193\n", + "loss: 0.15895675725575403\n", + "loss: 0.15517862578972155\n", + "loss: 0.1515753250400271\n", + "loss: 0.1481349938036084\n", + "loss: 0.1448468214395504\n", + "loss: 0.1417009338445398\n", + "loss: 0.13868829400248622\n", + "loss: 0.13580061497353096\n", + "loss: 0.1330302835389617\n", + "loss: 0.1303702930059422\n", + "loss: 0.127814183912036\n", + "loss: 0.12535599156436183\n", + "loss: 0.12299019950967911\n", + "loss: 0.1207116981660866\n", + "loss: 0.11851574795923153\n", + "loss: 0.11639794640004714\n", + "loss: 0.11435419862018448\n", + "loss: 0.11238069094815246\n", + "loss: 0.11047386716576169\n", + "loss: 0.10863040713256283\n", + "loss: 0.10684720750694056\n", + "loss: 0.1051213643275356\n", + "loss: 0.10345015724866172\n", + "loss: 0.10183103524916717\n", + "loss: 0.10026160365640727\n", + "loss: 0.09873961234613571\n", + "loss: 0.09726294499576621\n", + "loss: 0.09582960928281026\n", + "loss: 0.09443772793285743\n", + "loss: 0.09308553053235864\n", + "loss: 0.0917713460310074\n", + "loss: 0.09049359586685884\n", + "loss: 0.08925078765463015\n", + "loss: 0.08804150938406197\n", + "loss: 0.08686442408087547\n", + "loss: 0.08571826488782217\n", + "loss: 0.08460183052776402\n", + "loss: 0.08351398111459095\n", + "loss: 0.08245363428124586\n", + "loss: 0.08141976159718958\n", + "loss: 0.08041138525037475\n", + "loss: 0.07942757497120295\n", + "loss: 0.07846744517812901\n", + "loss: 0.07753015232648876\n", + "loss: 0.0766148924438704\n", + "loss: 0.0757208988368859\n", + "loss: 0.07484743995559193\n", + "loss: 0.07399381740306453\n", + "loss: 0.07315936407873515\n", + "loss: 0.07234344244512234\n", + "loss: 0.07154544290849069\n", + "loss: 0.07076478230479659\n", + "loss: 0.07000090248301835\n", + "loss: 0.06925326897863726\n", + "loss: 0.06852136977065329\n", + "loss: 0.06780471411604974\n", + "loss: 0.06710283145614956\n", + "loss: 0.06641527038972668\n" + ] + } + ], + "source": [ + "y_train = y_train.reshape(-1, 1)\n", + "\n", + "nn = NeuralNetwork(X_train, y_train)\n", + "\n", + "for i in range(100000):\n", + " nn.train(X_train, y_train)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Plot the loss vs. the number of epochs" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Text(0, 0.5, 'loss')" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import matplotlib.pyplot as plt\n", + "plt.plot(nn.loss_history)\n", + "plt.xlabel(\"# epochs / 1000\")\n", + "plt.ylabel(\"loss\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import matplotlib.pyplot as plt\n", + "from matplotlib.colors import ListedColormap\n", + "\n", + "cm = plt.cm.RdBu\n", + "cm_bright = ListedColormap([\"#FF0000\", \"#0000FF\"])\n", + "\n", + "xv = np.linspace(x_min, x_max, 10)\n", + "yv = np.linspace(y_min, y_max, 10)\n", + "Xv, Yv = np.meshgrid(xv, yv)\n", + "XYpairs = np.vstack([ Xv.reshape(-1), Yv.reshape(-1)])\n", + "zv = nn.predict(XYpairs.T)\n", + "Zv = zv.reshape(Xv.shape)\n", + "\n", + "fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(9, 7))\n", + "ax.set_aspect(1)\n", + "cn = ax.contourf(Xv, Yv, Zv, cmap=\"coolwarm_r\", alpha=0.4)\n", + "\n", + "ax.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=cm_bright, edgecolors=\"k\")\n", + "\n", + "# Plot the testing points\n", + "ax.scatter(X_test[:, 0], X_test[:, 1], c=y_test, cmap=cm_bright, alpha=0.4, edgecolors=\"k\")\n", + "\n", + "ax.set_xlim(x_min, x_max)\n", + "ax.set_ylim(y_min, y_max)\n", + "# ax.set_xticks(())\n", + "# ax.set_yticks(())\n", + "\n", + "fig.colorbar(cn)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.9" + }, + "vscode": { + "interpreter": { + "hash": "b0fa6594d8f4cbf19f97940f81e996739fb7646882a419484c72d19e05852a7e" + } + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebooks/simple_neural_network_exercise_solution.ipynb b/notebooks/03_ml_basics_simple_neural_network_exercise_solution.ipynb similarity index 100% rename from notebooks/simple_neural_network_exercise_solution.ipynb rename to notebooks/03_ml_basics_simple_neural_network_exercise_solution.ipynb