# simpleClassifier.py # G. Cowan / RHUL Physics / October 2017 # Simple program to illustrate classification with scikit-learn import scipy as sp import numpy as np import matplotlib import matplotlib.pyplot as plt import matplotlib.ticker as ticker from sklearn.neural_network import MLPClassifier from sklearn.model_selection import train_test_split from sklearn import metrics # read the data in from files, # assign target values 1 for signal, 0 for background sigData = np.loadtxt('signal.txt') nSig = sigData.shape[0] sigTargets = np.ones(nSig) bkgData = np.loadtxt('background.txt') nBkg = bkgData.shape[0] bkgTargets = np.zeros(nBkg) # concatenate arrays into data X and targets y X = np.concatenate((sigData,bkgData),0) X = X[:,0:2] # at first, only use x1 and x2 y = np.concatenate((sigTargets, bkgTargets)) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=1) # create classifier object and train clf = MLPClassifier(hidden_layer_sizes=(5,), activation='tanh', max_iter=2000, random_state=0) clf.fit(X_train, y_train) # evaluate its accuracy using the test data y_pred = clf.predict(X_test) print ('classification accuracy = ', metrics.accuracy_score(y_test, y_pred)) # make a scatter plot fig, ax = plt.subplots(1,1) plt.gcf().subplots_adjust(bottom=0.15) plt.gcf().subplots_adjust(left=0.15) ax.set_xlim((-2.5,3.5)) ax.set_ylim((-2,4)) x0,x1 = ax.get_xlim() y0,y1 = ax.get_ylim() ax.set_aspect(abs(x1-x0)/abs(y1-y0)) # make square plot xtick_spacing = 0.5 ytick_spacing = 2.0 ax.yaxis.set_major_locator(ticker.MultipleLocator(xtick_spacing)) ax.yaxis.set_major_locator(ticker.MultipleLocator(ytick_spacing)) plt.scatter(sigData[:,0], sigData[:,1], s=3, color='dodgerblue', marker='o') plt.scatter(bkgData[:,0], bkgData[:,1], s=3, color='red', marker='o') # add decision boundary to scatter plot x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5 y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5 h = .01 # step size in the mesh xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h)) # depending on classifier call predict_proba or decision_function Z = clf.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:, 1] if hasattr(clf, "decision_function"): Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()]) else: Z = clf.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:, 1] Z = Z.reshape(xx.shape) plt.contour(xx, yy, Z, 1, colors='k') plt.xlabel(r'$x_{1}$', labelpad=0) plt.ylabel(r'$x_{2}$', labelpad=15) plt.savefig("scatterplot.pdf", format='pdf') # make histogram of decision function plt.figure() # new window matplotlib.rcParams.update({'font.size':14}) # set all font sizes # depending on classifier call predict_proba or decision_function if hasattr(clf, "decision_function"): tTest = clf.decision_function(X_test) else: tTest = clf.predict_proba(X_test)[:,1] tBkg = tTest[y_test==0] tSig = tTest[y_test==1] nBins = 50 tMin = np.floor(np.min(tTest)) tMax = np.ceil(np.max(tTest)) bins = np.linspace(tMin, tMax, nBins+1) plt.xlabel('decision function $t$', labelpad=3) plt.ylabel('$f(t)$', labelpad=3) n, bins, patches = plt.hist(tSig, bins=bins, density=True, histtype='step', fill=False, color='dodgerblue') n, bins, patches = plt.hist(tBkg, bins=bins, density=True, histtype='step', fill=False, color='red', alpha=0.5) plt.savefig("decision_function_hist.pdf", format='pdf') plt.show()