{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "### Imports" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0.19.1\n", "1.14.2\n" ] } ], "source": [ "from sklearn.preprocessing import StandardScaler, MinMaxScaler\n", "import sklearn\n", "import numpy as np\n", "np.random.seed(10)\n", "\n", "print sklearn.__version__\n", "print np.__version__" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Random Data Generation" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [], "source": [ "rows = 5\n", "column = 2\n", "data = np.random.rand(rows, column)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Standardisation \n", "\n", "* Mean = 0; Stdev = 1" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "True\n", "\n", "[[ 1.3365329 -1.08044519]\n", " [ 0.7564286 1.1806552 ]\n", " [ 0.18698985 -0.44674652]\n", " [-1.07897872 1.21707505]\n", " [-1.20097264 -0.87053854]]\n" ] } ], "source": [ "def py_standardisation(X):\n", " rows, features = X.shape\n", " data = np.zeros((rows, ), dtype='float')\n", " for f in xrange(features):\n", " X[:, f] = (X[:,f] - X[:,f].mean(axis=0)) / X[:,f].std()\n", " return X\n", "\n", "def skl_standardisation(X):\n", " scaler = StandardScaler()\n", " return scaler.fit_transform(data)\n", "\n", "\n", "print skl_standardisation(data).all() == py_standardisation(data).all()\n", "print \n", "print skl_standardisation(data)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Mean Normalisation\n", "\n", "* Mean = 0; Range = [-1, 1]" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[[ 0.52671132 -0.4702658 ]\n", " [ 0.29809929 0.51388239]\n", " [ 0.07369042 -0.19444726]\n", " [-0.42521236 0.5297342 ]\n", " [-0.47328868 -0.37890353]]\n" ] } ], "source": [ "def py_mean_normalisation(X):\n", " rows, features = X.shape\n", " data = np.zeros((rows, ), dtype='float')\n", " for f in xrange(features):\n", " X[:, f] = (X[:,f] - X[:,f].mean(axis=0)) / (X[:,f].max() - X[:,f].min())\n", " return X\n", "\n", "print py_mean_normalisation(data)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Min-Max Scaling\n", "\n", "* Range = [0, 1]" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "True\n", "\n", "[[1. 0. ]\n", " [0.77138797 0.98414819]\n", " [0.5469791 0.27581854]\n", " [0.04807631 1. ]\n", " [0. 0.09136227]]\n" ] } ], "source": [ "def py_min_max_scaling(X):\n", " rows , features = X.shape\n", " data = np.zeros((rows, ), dtype='float')\n", " for f in xrange(features):\n", " X[:, f] = (X[:,f] - X[:,f].min()) / (X[:,f].max() - X[:,f].min())\n", " return X\n", "\n", "def skl_min_max_scaling(X):\n", " min_max_scaler = MinMaxScaler()\n", " return min_max_scaler.fit_transform(X)\n", "\n", "print skl_min_max_scaling(data).all() == py_min_max_scaling(data).all()\n", "print \n", "print skl_min_max_scaling(data)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### I would recommend the reader/practitioner to solve them on copy pen once to get a feel of it" ] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.12" } }, "nbformat": 4, "nbformat_minor": 1 }