{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Imports"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.19.1\n",
      "1.14.2\n"
     ]
    }
   ],
   "source": [
    "from sklearn.preprocessing import StandardScaler, MinMaxScaler\n",
    "import sklearn\n",
    "import numpy as np\n",
    "np.random.seed(10)\n",
    "\n",
    "print sklearn.__version__\n",
    "print np.__version__"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Random Data Generation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "rows = 5\n",
    "column = 2\n",
    "data = np.random.rand(rows, column)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Standardisation \n",
    "\n",
    "* Mean = 0; Stdev = 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "True\n",
      "\n",
      "[[ 1.3365329  -1.08044519]\n",
      " [ 0.7564286   1.1806552 ]\n",
      " [ 0.18698985 -0.44674652]\n",
      " [-1.07897872  1.21707505]\n",
      " [-1.20097264 -0.87053854]]\n"
     ]
    }
   ],
   "source": [
    "def py_standardisation(X):\n",
    "    rows, features = X.shape\n",
    "    data = np.zeros((rows, ), dtype='float')\n",
    "    for f in xrange(features):\n",
    "        X[:, f] = (X[:,f] - X[:,f].mean(axis=0)) / X[:,f].std()\n",
    "    return X\n",
    "\n",
    "def skl_standardisation(X):\n",
    "    scaler = StandardScaler()\n",
    "    return scaler.fit_transform(data)\n",
    "\n",
    "\n",
    "print skl_standardisation(data).all() == py_standardisation(data).all()\n",
    "print \n",
    "print skl_standardisation(data)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Mean Normalisation\n",
    "\n",
    "* Mean = 0; Range = [-1, 1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[ 0.52671132 -0.4702658 ]\n",
      " [ 0.29809929  0.51388239]\n",
      " [ 0.07369042 -0.19444726]\n",
      " [-0.42521236  0.5297342 ]\n",
      " [-0.47328868 -0.37890353]]\n"
     ]
    }
   ],
   "source": [
    "def py_mean_normalisation(X):\n",
    "    rows, features = X.shape\n",
    "    data = np.zeros((rows, ), dtype='float')\n",
    "    for f in xrange(features):\n",
    "        X[:, f] = (X[:,f] - X[:,f].mean(axis=0)) / (X[:,f].max() - X[:,f].min())\n",
    "    return X\n",
    "\n",
    "print py_mean_normalisation(data)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Min-Max Scaling\n",
    "\n",
    "* Range = [0, 1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "True\n",
      "\n",
      "[[1.         0.        ]\n",
      " [0.77138797 0.98414819]\n",
      " [0.5469791  0.27581854]\n",
      " [0.04807631 1.        ]\n",
      " [0.         0.09136227]]\n"
     ]
    }
   ],
   "source": [
    "def py_min_max_scaling(X):\n",
    "    rows , features = X.shape\n",
    "    data = np.zeros((rows, ), dtype='float')\n",
    "    for f in xrange(features):\n",
    "        X[:, f] = (X[:,f] - X[:,f].min()) / (X[:,f].max() - X[:,f].min())\n",
    "    return X\n",
    "\n",
    "def skl_min_max_scaling(X):\n",
    "    min_max_scaler = MinMaxScaler()\n",
    "    return min_max_scaler.fit_transform(X)\n",
    "\n",
    "print skl_min_max_scaling(data).all() == py_min_max_scaling(data).all()\n",
    "print \n",
    "print skl_min_max_scaling(data)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### I would recommend the reader/practitioner to solve them on copy pen once to get a feel of it"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}