{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "# simpleClassifier.py\n", "# G. Cowan / RHUL Physics / November 2021\n", "# Simple program to illustrate classification with scikit-learn\n", "\n", "import scipy as sp\n", "import numpy as np\n", "import matplotlib\n", "import matplotlib.pyplot as plt\n", "import matplotlib.ticker as ticker\n", "\n", "from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA\n", "from sklearn.model_selection import train_test_split\n", "from sklearn import metrics" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# read the data in from files, \n", "# assign target values 1 for signal, 0 for background\n", "sigData = np.loadtxt('signal.txt')\n", "nSig = sigData.shape[0]\n", "sigTargets = np.ones(nSig)\n", "\n", "bkgData = np.loadtxt('background.txt')\n", "nBkg = bkgData.shape[0]\n", "bkgTargets = np.zeros(nBkg)\n", "\n", "# concatenate arrays into data X and targets y\n", "X = np.concatenate((sigData,bkgData),0)\n", "y = np.concatenate((sigTargets, bkgTargets))\n", "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=1)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "LinearDiscriminantAnalysis()" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Create classifier object and train\n", "# Add code here to include other claassifiers (MLP, BDT,...)\n", "clf = LDA()\n", "clf.fit(X_train, y_train)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "power of test with respect to signal = 0.7916666666666666\n" ] } ], "source": [ "# Evaluate accuracy using the test data.\n", "# If available, use the decision function, else (e.g. for MLP) use predict_proba\n", "# Adjust threshold value tCut or pMin as appropriate\n", "\n", "X_bkg_test = X_test[y_test==0]\n", "X_sig_test = X_test[y_test==1]\n", "y_bkg_test = y_test[y_test==0]\n", "y_sig_test = y_test[y_test==1]\n", "if hasattr(clf, \"decision_function\"):\n", " tCut = 0.\n", " y_bkg_pred = (clf.decision_function(X_bkg_test) >= tCut).astype(bool)\n", " y_sig_pred = (clf.decision_function(X_sig_test) >= tCut).astype(bool)\n", "else:\n", " pCut = 0.5\n", " y_bkg_pred = (clf.predict_proba(X_bkg_test)[:,1] >= pCut).astype(bool)\n", " y_sig_pred = (clf.predict_proba(X_sig_test)[:,1] >= pCut).astype(bool)\n", "\n", "power = metrics.accuracy_score(y_sig_test, y_sig_pred) # = Prob(t >= tCut|sig)\n", "print('power of test with respect to signal = ', power)\n", "\n", "# Add code here to obtain the background efficiency\n", "# = size of test alpha = Prob(t >= tCut|bkg)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# make histogram of decision function\n", "plt.figure() # new window\n", "matplotlib.rcParams.update({'font.size':14}) # set all font sizes\n", "tTest = clf.predict_proba(X_test)[:,1]\n", "if hasattr(clf, \"decision_function\"):\n", " tTest = clf.decision_function(X_test) # if available use decision_function\n", "else:\n", " tTest = clf.predict_proba(X_test)[:,1] # for e.g. MLP need to use predict_proba\n", "tBkg = tTest[y_test==0]\n", "tSig = tTest[y_test==1]\n", "nBins = 50\n", "tMin = np.floor(np.min(tTest))\n", "tMax = np.ceil(np.max(tTest))\n", "bins = np.linspace(tMin, tMax, nBins+1)\n", "plt.xlabel('decision function $t$', labelpad=3)\n", "plt.ylabel('$f(t)$', labelpad=3)\n", "n, bins, patches = plt.hist(tSig, bins=bins, density=True, histtype='step', fill=False, color='dodgerblue')\n", "n, bins, patches = plt.hist(tBkg, bins=bins, density=True, histtype='step', fill=False, color='red', alpha=0.5)\n", "plt.savefig(\"decision_function_hist.pdf\", format='pdf')\n", "\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.12" } }, "nbformat": 4, "nbformat_minor": 4 }