{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "from sklearn.datasets import load_boston\n",
    "from sklearn.tree import DecisionTreeRegressor\n",
    "from sklearn.ensemble import AdaBoostRegressor as skAdaBoostRegressor"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "class AdaBoostRegressor():\n",
    "    def __init__(self, n_estimators=50, random_state=0):\n",
    "        self.n_estimators = n_estimators\n",
    "        self.random_state = 0\n",
    "\n",
    "    def fit(self, X, y):\n",
    "        sample_weight = np.full(X.shape[0], 1 / X.shape[0])\n",
    "        self.estimators_ = []\n",
    "        self.estimator_weights_ = np.zeros(self.n_estimators)\n",
    "        self.estimator_errors_ = np.ones(self.n_estimators)\n",
    "        MAX_INT = np.iinfo(np.int32).max\n",
    "        rng = np.random.RandomState(self.random_state)\n",
    "        for i in range(self.n_estimators):\n",
    "            est = DecisionTreeRegressor(max_depth=3,\n",
    "                                        random_state=rng.randint(MAX_INT))\n",
    "            cdf = np.cumsum(sample_weight)\n",
    "            cdf /= cdf[-1]\n",
    "            uniform_samples = rng.random_sample(X.shape[0])\n",
    "            bootstrap_idx = cdf.searchsorted(uniform_samples, side='right')\n",
    "            est.fit(X[bootstrap_idx], y[bootstrap_idx])\n",
    "            y_predict = est.predict(X)\n",
    "            error_vect = np.abs(y_predict - y)\n",
    "            error_vect /= error_vect.max()\n",
    "            estimator_error = (sample_weight * error_vect).sum()\n",
    "            beta = estimator_error / (1 - estimator_error)\n",
    "            estimator_weight = np.log(1 / beta)\n",
    "            sample_weight *= np.power(beta, 1 - error_vect)\n",
    "            sample_weight /= np.sum(sample_weight)\n",
    "            self.estimators_.append(est)\n",
    "            self.estimator_errors_[i] = estimator_error\n",
    "            self.estimator_weights_[i] = estimator_weight\n",
    "        return self\n",
    "\n",
    "    def predict(self, X):\n",
    "        predictions = np.array([est.predict(X) for est in self.estimators_]).T\n",
    "        sorted_idx = np.argsort(predictions, axis=1)\n",
    "        weight_cdf = np.cumsum(self.estimator_weights_[sorted_idx], axis=1)\n",
    "        median_or_above = weight_cdf >= 0.5 * weight_cdf[:, -1][:, np.newaxis]\n",
    "        median_idx = median_or_above.argmax(axis=1)\n",
    "        median_estimators = sorted_idx[np.arange(X.shape[0]), median_idx]\n",
    "        return predictions[np.arange(X.shape[0]), median_estimators]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "X, y = load_boston(return_X_y=True)\n",
    "clf1 = AdaBoostRegressor(random_state=0).fit(X, y)\n",
    "clf2 = skAdaBoostRegressor(random_state=0).fit(X, y)\n",
    "assert np.allclose(clf1.estimator_errors_, clf2.estimator_errors_)\n",
    "assert np.allclose(clf1.estimator_weights_, clf2.estimator_weights_)\n",
    "pred1 = clf1.predict(X)\n",
    "pred2 = clf2.predict(X)\n",
    "assert np.array_equal(pred1, pred2)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "dev",
   "language": "python",
   "name": "dev"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}