{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "from sklearn.datasets import load_iris\n", "from sklearn.dummy import DummyClassifier as skDummyClassifier" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "class DummyClassifier():\n", " def __init__(self, strategy=\"stratified\", random_state=0, constant=None):\n", " self.strategy = strategy\n", " self.constant = constant\n", " self.random_state = random_state\n", "\n", " def fit(self, X, y):\n", " self.classes_, y_train = np.unique(y, return_inverse=True)\n", " self.n_classes_ = self.classes_.shape[0]\n", " self.class_prior_ = np.bincount(y_train) / y.shape[0]\n", " return self\n", "\n", " def predict(self, X):\n", " if self.strategy == \"most_frequent\" or self.strategy == \"prior\":\n", " y = np.full(X.shape[0], self.classes_[np.argmax(self.class_prior_)])\n", " elif self.strategy == \"constant\":\n", " y = np.full(X.shape[0], self.constant)\n", " elif self.strategy == \"uniform\":\n", " rng = np.random.RandomState(self.random_state)\n", " y = self.classes_[rng.randint(self.n_classes_, size=X.shape[0])]\n", " elif self.strategy == \"stratified\":\n", " y = self.classes_[np.argmax(self.predict_proba(X), axis=1)]\n", " return y\n", "\n", " def predict_proba(self, X):\n", " if self.strategy == \"most_frequent\":\n", " p = np.zeros((X.shape[0], self.n_classes_))\n", " p[:, np.argmax(self.class_prior_)] = 1\n", " elif self.strategy == \"prior\":\n", " p = np.tile(self.class_prior_, (X.shape[0], 1))\n", " elif self.strategy == \"constant\":\n", " p = np.zeros((X.shape[0], self.n_classes_))\n", " p[:, self.classes_ == self.constant] = 1\n", " elif self.strategy == \"uniform\":\n", " p = np.full((X.shape[0], self.n_classes_), 1 / self.n_classes_)\n", " elif self.strategy == \"stratified\":\n", " rng = np.random.RandomState(self.random_state)\n", " p = rng.multinomial(1, self.class_prior_, size=X.shape[0])\n", " return p" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "X, y = load_iris(return_X_y=True)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "clf1 = DummyClassifier(strategy=\"most_frequent\").fit(X, y)\n", "clf2 = skDummyClassifier(strategy=\"most_frequent\").fit(X, y)\n", "assert clf1.n_classes_ == clf2.n_classes_\n", "assert np.array_equal(clf1.classes_, clf2.classes_)\n", "assert np.array_equal(clf1.class_prior_, clf2.class_prior_)\n", "pred1 = clf1.predict(X)\n", "pred2 = clf2.predict(X)\n", "assert np.array_equal(pred1, pred2)\n", "prob1 = clf1.predict_proba(X)\n", "prob2 = clf2.predict_proba(X)\n", "assert np.array_equal(prob1, prob2)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "clf1 = DummyClassifier(strategy=\"prior\").fit(X, y)\n", "clf2 = skDummyClassifier(strategy=\"prior\").fit(X, y)\n", "assert clf1.n_classes_ == clf2.n_classes_\n", "assert np.array_equal(clf1.classes_, clf2.classes_)\n", "assert np.array_equal(clf1.class_prior_, clf2.class_prior_)\n", "pred1 = clf1.predict(X)\n", "pred2 = clf2.predict(X)\n", "assert np.array_equal(pred1, pred2)\n", "prob1 = clf1.predict_proba(X)\n", "prob2 = clf2.predict_proba(X)\n", "assert np.array_equal(prob1, prob2)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "clf1 = DummyClassifier(strategy=\"constant\", constant=0).fit(X, y)\n", "clf2 = skDummyClassifier(strategy=\"constant\", constant=0).fit(X, y)\n", "assert clf1.n_classes_ == clf2.n_classes_\n", "assert np.array_equal(clf1.classes_, clf2.classes_)\n", "assert np.array_equal(clf1.class_prior_, clf2.class_prior_)\n", "pred1 = clf1.predict(X)\n", "pred2 = clf2.predict(X)\n", "assert np.array_equal(pred1, pred2)\n", "prob1 = clf1.predict_proba(X)\n", "prob2 = clf2.predict_proba(X)\n", "assert np.array_equal(prob1, prob2)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "clf1 = DummyClassifier(strategy=\"uniform\", random_state=0).fit(X, y)\n", "clf2 = skDummyClassifier(strategy=\"uniform\", random_state=0).fit(X, y)\n", "assert clf1.n_classes_ == clf2.n_classes_\n", "assert np.array_equal(clf1.classes_, clf2.classes_)\n", "assert np.array_equal(clf1.class_prior_, clf2.class_prior_)\n", "pred1 = clf1.predict(X)\n", "pred2 = clf2.predict(X)\n", "assert np.array_equal(pred1, pred2)\n", "prob1 = clf1.predict_proba(X)\n", "prob2 = clf2.predict_proba(X)\n", "assert np.array_equal(prob1, prob2)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "clf1 = DummyClassifier(strategy=\"stratified\", random_state=0).fit(X, y)\n", "clf2 = skDummyClassifier(strategy=\"stratified\", random_state=0).fit(X, y)\n", "assert clf1.n_classes_ == clf2.n_classes_\n", "assert np.array_equal(clf1.classes_, clf2.classes_)\n", "assert np.array_equal(clf1.class_prior_, clf2.class_prior_)\n", "pred1 = clf1.predict(X)\n", "pred2 = clf2.predict(X)\n", "#assert np.array_equal(pred1, pred2)\n", "prob1 = clf1.predict_proba(X)\n", "prob2 = clf2.predict_proba(X)\n", "assert np.array_equal(prob1, prob2)" ] } ], "metadata": { "kernelspec": { "display_name": "dev", "language": "python", "name": "dev" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" } }, "nbformat": 4, "nbformat_minor": 2 }