{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Import necessary dependencies and settings"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler\n",
"import numpy as np\n",
"import pandas as pd\n",
"np.set_printoptions(suppress=True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Load sample data of video views"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" views | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1295.0 | \n",
"
\n",
" \n",
" 1 | \n",
" 25.0 | \n",
"
\n",
" \n",
" 2 | \n",
" 19000.0 | \n",
"
\n",
" \n",
" 3 | \n",
" 5.0 | \n",
"
\n",
" \n",
" 4 | \n",
" 1.0 | \n",
"
\n",
" \n",
" 5 | \n",
" 300.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" views\n",
"0 1295.0\n",
"1 25.0\n",
"2 19000.0\n",
"3 5.0\n",
"4 1.0\n",
"5 300.0"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"views = pd.DataFrame([1295., 25., 19000., 5., 1., 300.], columns=['views'])\n",
"views"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Standard Scaler $\\frac{x_i - \\mu}{\\sigma}$"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" views | \n",
" zscore | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1295.0 | \n",
" -0.307214 | \n",
"
\n",
" \n",
" 1 | \n",
" 25.0 | \n",
" -0.489306 | \n",
"
\n",
" \n",
" 2 | \n",
" 19000.0 | \n",
" 2.231317 | \n",
"
\n",
" \n",
" 3 | \n",
" 5.0 | \n",
" -0.492173 | \n",
"
\n",
" \n",
" 4 | \n",
" 1.0 | \n",
" -0.492747 | \n",
"
\n",
" \n",
" 5 | \n",
" 300.0 | \n",
" -0.449877 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" views zscore\n",
"0 1295.0 -0.307214\n",
"1 25.0 -0.489306\n",
"2 19000.0 2.231317\n",
"3 5.0 -0.492173\n",
"4 1.0 -0.492747\n",
"5 300.0 -0.449877"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ss = StandardScaler()\n",
"views['zscore'] = ss.fit_transform(views[['views']])\n",
"views"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"-0.30721413311687235"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"vw = np.array(views['views'])\n",
"(vw[0] - np.mean(vw)) / np.std(vw)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Min-Max Scaler $\\frac{x_i - min(x)}{max(x) - min(x)}$"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" views | \n",
" zscore | \n",
" minmax | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1295.0 | \n",
" -0.307214 | \n",
" 0.068109 | \n",
"
\n",
" \n",
" 1 | \n",
" 25.0 | \n",
" -0.489306 | \n",
" 0.001263 | \n",
"
\n",
" \n",
" 2 | \n",
" 19000.0 | \n",
" 2.231317 | \n",
" 1.000000 | \n",
"
\n",
" \n",
" 3 | \n",
" 5.0 | \n",
" -0.492173 | \n",
" 0.000211 | \n",
"
\n",
" \n",
" 4 | \n",
" 1.0 | \n",
" -0.492747 | \n",
" 0.000000 | \n",
"
\n",
" \n",
" 5 | \n",
" 300.0 | \n",
" -0.449877 | \n",
" 0.015738 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" views zscore minmax\n",
"0 1295.0 -0.307214 0.068109\n",
"1 25.0 -0.489306 0.001263\n",
"2 19000.0 2.231317 1.000000\n",
"3 5.0 -0.492173 0.000211\n",
"4 1.0 -0.492747 0.000000\n",
"5 300.0 -0.449877 0.015738"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"mms = MinMaxScaler()\n",
"views['minmax'] = mms.fit_transform(views[['views']])\n",
"views"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"0.068108847834096528"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"(vw[0] - np.min(vw)) / (np.max(vw) - np.min(vw))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Robust Scaler $\\frac{x_i - median(x)}{IQR_{(1,3)}(x)}$"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" views | \n",
" zscore | \n",
" minmax | \n",
" robust | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1295.0 | \n",
" -0.307214 | \n",
" 0.068109 | \n",
" 1.092883 | \n",
"
\n",
" \n",
" 1 | \n",
" 25.0 | \n",
" -0.489306 | \n",
" 0.001263 | \n",
" -0.132690 | \n",
"
\n",
" \n",
" 2 | \n",
" 19000.0 | \n",
" 2.231317 | \n",
" 1.000000 | \n",
" 18.178528 | \n",
"
\n",
" \n",
" 3 | \n",
" 5.0 | \n",
" -0.492173 | \n",
" 0.000211 | \n",
" -0.151990 | \n",
"
\n",
" \n",
" 4 | \n",
" 1.0 | \n",
" -0.492747 | \n",
" 0.000000 | \n",
" -0.155850 | \n",
"
\n",
" \n",
" 5 | \n",
" 300.0 | \n",
" -0.449877 | \n",
" 0.015738 | \n",
" 0.132690 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" views zscore minmax robust\n",
"0 1295.0 -0.307214 0.068109 1.092883\n",
"1 25.0 -0.489306 0.001263 -0.132690\n",
"2 19000.0 2.231317 1.000000 18.178528\n",
"3 5.0 -0.492173 0.000211 -0.151990\n",
"4 1.0 -0.492747 0.000000 -0.155850\n",
"5 300.0 -0.449877 0.015738 0.132690"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"rs = RobustScaler()\n",
"views['robust'] = rs.fit_transform(views[['views']])\n",
"views"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"1.0928829915560916"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"quartiles = np.percentile(vw, (25., 75.))\n",
"iqr = quartiles[1] - quartiles[0]\n",
"(vw[0] - np.median(vw)) / iqr"
]
}
],
"metadata": {
"anaconda-cloud": {},
"kernelspec": {
"display_name": "Python [conda root]",
"language": "python",
"name": "conda-root-py"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.2"
}
},
"nbformat": 4,
"nbformat_minor": 1
}