{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "from scipy.optimize import minimize\n", "from scipy.special import expit, xlogy\n", "from sklearn.datasets import load_breast_cancer, load_iris\n", "from sklearn.neural_network import MLPClassifier as skMLPClassifier" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "class MLPClassifier():\n", " def __init__(self, hidden_layer_sizes=(100,), alpha=0.0001, max_iter=200, random_state=0):\n", " self.hidden_layer_sizes = hidden_layer_sizes\n", " self.alpha = alpha\n", " self.max_iter = max_iter\n", " self.random_state = random_state\n", "\n", " def _encode(self, y):\n", " classes = np.unique(y)\n", " y_train = np.zeros((y.shape[0], len(classes)))\n", " for i, c in enumerate(classes):\n", " y_train[y == c, i] = 1\n", " if len(classes) == 2:\n", " y_train = y_train[:, 1].reshape(-1, 1)\n", " return classes, y_train\n", "\n", " def _softmax(self, X):\n", " X -= X.max(axis=1)[:, np.newaxis]\n", " X = np.exp(X)\n", " X /= X.sum(axis=1)[:, np.newaxis]\n", " return X\n", "\n", " def _pack(self, coefs, intercepts):\n", " return np.hstack([cur.ravel() for cur in coefs + intercepts])\n", "\n", " def _unpack(self, packed_coef):\n", " for i in range(self.n_layers_ - 1):\n", " start, end, shape = self._coef_indptr[i]\n", " self.coefs_[i] = np.reshape(packed_coef[start:end], shape)\n", " start, end = self._intercept_indptr[i]\n", " self.intercepts_[i] = packed_coef[start:end]\n", "\n", " def _forward_pass(self, activations):\n", " for i in range(self.n_layers_ - 1):\n", " activations[i + 1] = np.dot(activations[i], self.coefs_[i]) + self.intercepts_[i]\n", " if i + 1 != self.n_layers_ - 1:\n", " activations[i + 1] = expit(activations[i + 1])\n", " if len(self.classes_) == 2:\n", " activations[i + 1] = expit(activations[i + 1])\n", " else:\n", " activations[i + 1] = self._softmax(activations[i + 1])\n", " return activations\n", "\n", " def _cost_grad(self, packed_coef, X, y_train, activations, deltas, coef_grads, intercept_grads):\n", " self._unpack(packed_coef)\n", " # forward pass\n", " activations = self._forward_pass(activations)\n", " if len(self.classes_) == 2:\n", " loss = -np.sum(xlogy(y_train, activations[-1]) + xlogy(1 - y_train, 1 - activations[-1])) / X.shape[0]\n", " else:\n", " loss = -np.sum(xlogy(y_train, activations[-1])) / X.shape[0]\n", " loss += 0.5 * self.alpha * np.sum([np.dot(c.ravel(), c.ravel()) for c in self.coefs_]) / X.shape[0]\n", " # backward pass\n", " deltas[self.n_layers_ - 2] = activations[-1] - y_train\n", " coef_grads[self.n_layers_ - 2] = np.dot(activations[self.n_layers_ - 2].T, deltas[self.n_layers_ - 2])\n", " coef_grads[self.n_layers_ - 2] += (self.alpha * self.coefs_[self.n_layers_ - 2])\n", " coef_grads[self.n_layers_ - 2] /= X.shape[0]\n", " intercept_grads[self.n_layers_ - 2] = np.mean(deltas[self.n_layers_ - 2], axis=0)\n", " for i in range(self.n_layers_ - 2, 0, -1):\n", " deltas[i - 1] = np.dot(deltas[i], self.coefs_[i].T)\n", " deltas[i - 1] *= activations[i] * (1 - activations[i])\n", " coef_grads[i - 1] = np.dot(activations[i - 1].T, deltas[i - 1])\n", " coef_grads[i - 1] += (self.alpha * self.coefs_[i - 1])\n", " coef_grads[i - 1] /= X.shape[0]\n", " intercept_grads[i - 1] = np.mean(deltas[i - 1], axis=0)\n", " grad = self._pack(coef_grads, intercept_grads)\n", " return loss, grad\n", " \n", " def fit(self, X, y):\n", " self.classes_, y_train = self._encode(y)\n", " self.n_outputs_ = y_train.shape[1]\n", " layer_units = ([X.shape[1]] + list(self.hidden_layer_sizes) + [self.n_outputs_])\n", " self.n_layers_ = len(layer_units)\n", " self.coefs_, self.intercepts_ = [], []\n", " rng = np.random.RandomState(self.random_state)\n", " for i in range(self.n_layers_ - 1):\n", " init_bound = np.sqrt(2 / (layer_units[i] + layer_units[i + 1]))\n", " self.coefs_.append(rng.uniform(-init_bound, init_bound, (layer_units[i], layer_units[i + 1])))\n", " self.intercepts_.append(rng.uniform(-init_bound, init_bound, layer_units[i + 1]))\n", " activations = [X]\n", " activations.extend(np.empty((X.shape[0], n_fan_out)) for n_fan_out in layer_units[1:])\n", " deltas = [np.empty_like(a_layer) for a_layer in activations[1:]]\n", " coef_grads = [np.empty((n_fan_in, n_fan_out)) for n_fan_in, n_fan_out in zip(layer_units[:-1], layer_units[1:])]\n", " intercept_grads = [np.empty(n_fan_out) for n_fan_out in layer_units[1:]]\n", " self._coef_indptr, self._intercept_indptr = [], []\n", " start = 0\n", " for i in range(self.n_layers_ - 1):\n", " end = start + (self.coefs_[i].shape[0] * self.coefs_[i].shape[1])\n", " self._coef_indptr.append((start, end, (self.coefs_[i].shape[0], self.coefs_[i].shape[1])))\n", " start = end\n", " for i in range(self.n_layers_ - 1):\n", " end = start + self.intercepts_[i].shape[0]\n", " self._intercept_indptr.append((start, end))\n", " start = end\n", " packed_coef = self._pack(self.coefs_, self.intercepts_)\n", " res = minimize(fun=self._cost_grad, jac=True, x0=packed_coef,\n", " args=(X, y_train, activations, deltas, coef_grads, intercept_grads), method='L-BFGS-B',\n", " options = {\"maxiter\": self.max_iter})\n", " self._unpack(res.x)\n", " return self\n", "\n", " def _predict(self, X):\n", " layer_units = ([X.shape[1]] + list(self.hidden_layer_sizes) + [self.n_outputs_])\n", " activations = [X]\n", " activations.extend(np.empty((X.shape[0], n_fan_out)) for n_fan_out in layer_units[1:])\n", " self._forward_pass(activations)\n", " y_pred = activations[-1]\n", " return y_pred\n", "\n", " def predict(self, X):\n", " y_pred = self._predict(X)\n", " if len(self.classes_) == 2:\n", " y_pred = y_pred.ravel()\n", " indices = (y_pred > 0.5).astype(int)\n", " else:\n", " indices = np.argmax(y_pred, axis=1)\n", " return self.classes_[indices]\n", "\n", " def predict_proba(self, X):\n", " y_pred = self._predict(X)\n", " if len(self.classes_) == 2:\n", " y_pred = y_pred.ravel()\n", " return np.vstack([1 - y_pred, y_pred]).T\n", " else:\n", " return y_pred" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "d:\\github\\scikit-learn\\sklearn\\neural_network\\multilayer_perceptron.py:473: ConvergenceWarning: lbfgs failed to converge (status=1): b'STOP: TOTAL NO. of ITERATIONS REACHED LIMIT'. Increase the number of iterations.\n", " self.n_iter_ = _check_optimize_result(\"lbfgs\", opt_res, self.max_iter)\n" ] } ], "source": [ "X, y = load_iris(return_X_y=True)\n", "clf1 = MLPClassifier(hidden_layer_sizes=(5,), max_iter=10, random_state=0).fit(X, y)\n", "clf2 = skMLPClassifier(hidden_layer_sizes=(5,), max_iter=10, activation=\"logistic\", solver=\"lbfgs\",\n", " random_state=0, tol=1e-5).fit(X, y)\n", "assert len(clf1.coefs_) == len(clf2.coefs_)\n", "for i in range(len(clf1.coefs_)):\n", " assert np.allclose(clf1.coefs_[i], clf2.coefs_[i])\n", "assert len(clf2.intercepts_) == len(clf2.intercepts_)\n", "for i in range(len(clf1.intercepts_)):\n", " assert np.allclose(clf1.intercepts_[i], clf2.intercepts_[i])\n", "pred1 = clf1.predict(X)\n", "pred2 = clf2.predict(X)\n", "assert np.array_equal(pred1, pred2)\n", "prob1 = clf1.predict_proba(X)\n", "prob2 = clf2.predict_proba(X)\n", "assert np.allclose(prob1, prob2)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "d:\\github\\scikit-learn\\sklearn\\neural_network\\multilayer_perceptron.py:473: ConvergenceWarning: lbfgs failed to converge (status=1): b'STOP: TOTAL NO. of ITERATIONS REACHED LIMIT'. Increase the number of iterations.\n", " self.n_iter_ = _check_optimize_result(\"lbfgs\", opt_res, self.max_iter)\n" ] } ], "source": [ "X, y = load_iris(return_X_y=True)\n", "clf1 = MLPClassifier(hidden_layer_sizes=(5, 5), max_iter=10, random_state=0).fit(X, y)\n", "clf2 = skMLPClassifier(hidden_layer_sizes=(5, 5), max_iter=10, activation=\"logistic\", solver=\"lbfgs\",\n", " random_state=0, tol=1e-5).fit(X, y)\n", "assert len(clf1.coefs_) == len(clf2.coefs_)\n", "for i in range(len(clf1.coefs_)):\n", " assert np.allclose(clf1.coefs_[i], clf2.coefs_[i])\n", "assert len(clf2.intercepts_) == len(clf2.intercepts_)\n", "for i in range(len(clf1.intercepts_)):\n", " assert np.allclose(clf1.intercepts_[i], clf2.intercepts_[i])\n", "pred1 = clf1.predict(X)\n", "pred2 = clf2.predict(X)\n", "assert np.array_equal(pred1, pred2)\n", "prob1 = clf1.predict_proba(X)\n", "prob2 = clf2.predict_proba(X)\n", "assert np.allclose(prob1, prob2)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "X, y = load_breast_cancer(return_X_y=True)\n", "clf1 = MLPClassifier(hidden_layer_sizes=(5,), max_iter=10, random_state=0).fit(X, y)\n", "clf2 = skMLPClassifier(hidden_layer_sizes=(5,), max_iter=10, activation=\"logistic\", solver=\"lbfgs\",\n", " random_state=0, tol=1e-5).fit(X, y)\n", "assert len(clf1.coefs_) == len(clf2.coefs_)\n", "for i in range(len(clf1.coefs_)):\n", " assert np.allclose(clf1.coefs_[i], clf2.coefs_[i])\n", "assert len(clf2.intercepts_) == len(clf2.intercepts_)\n", "for i in range(len(clf1.intercepts_)):\n", " assert np.allclose(clf1.intercepts_[i], clf2.intercepts_[i])\n", "pred1 = clf1.predict(X)\n", "pred2 = clf2.predict(X)\n", "assert np.array_equal(pred1, pred2)\n", "prob1 = clf1.predict_proba(X)\n", "prob2 = clf2.predict_proba(X)\n", "assert np.allclose(prob1, prob2)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "d:\\github\\scikit-learn\\sklearn\\neural_network\\multilayer_perceptron.py:473: ConvergenceWarning: lbfgs failed to converge (status=1): b'STOP: TOTAL NO. of ITERATIONS REACHED LIMIT'. Increase the number of iterations.\n", " self.n_iter_ = _check_optimize_result(\"lbfgs\", opt_res, self.max_iter)\n" ] } ], "source": [ "X, y = load_breast_cancer(return_X_y=True)\n", "clf1 = MLPClassifier(hidden_layer_sizes=(5, 5), max_iter=10, random_state=0).fit(X, y)\n", "clf2 = skMLPClassifier(hidden_layer_sizes=(5, 5), max_iter=10, activation=\"logistic\", solver=\"lbfgs\",\n", " random_state=0, tol=1e-5).fit(X, y)\n", "assert len(clf1.coefs_) == len(clf2.coefs_)\n", "for i in range(len(clf1.coefs_)):\n", " assert np.allclose(clf1.coefs_[i], clf2.coefs_[i])\n", "assert len(clf2.intercepts_) == len(clf2.intercepts_)\n", "for i in range(len(clf1.intercepts_)):\n", " assert np.allclose(clf1.intercepts_[i], clf2.intercepts_[i])\n", "pred1 = clf1.predict(X)\n", "pred2 = clf2.predict(X)\n", "assert np.array_equal(pred1, pred2)\n", "prob1 = clf1.predict_proba(X)\n", "prob2 = clf2.predict_proba(X)\n", "assert np.allclose(prob1, prob2)" ] } ], "metadata": { "kernelspec": { "display_name": "dev", "language": "python", "name": "dev" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" } }, "nbformat": 4, "nbformat_minor": 2 }