{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 1 数据的标准化"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 1.1 scale：sklearn.preprocessing.scale(X, axis=0, with_mean=True, with_std=True, copy=True)\n",
    "- with_mean : boolean, True by default, If True, center the data before scaling. 即使得对应axis上的均值为0\n",
    "- with_std : boolean, True by default，If True, scale the data to unit variance. 即使得对应axis上的方差为1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Mean: [0 0 0], \n",
      "Std: [1. 1. 1.]\n"
     ]
    }
   ],
   "source": [
    "import numpy as np\n",
    "from sklearn.preprocessing import * \n",
    "\n",
    "rg = np.random.RandomState(2017)\n",
    "X_train = rg.uniform(0, 5, (4,3))\n",
    "X_scaled = scale(X_train)\n",
    "print('Mean: {}, \\nStd: {}'.format(X_scaled.mean(axis=0, dtype=np.int), X_scaled.std(axis=0)))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### scale的参数axis=0，表示对每列进行标准化，即每列减去此列均值再除以其方差"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "def f(array):\n",
    "    result = (array - np.mean(array)) / np.std(array, ddof=0)  # ddof默认为0\n",
    "    return result\n",
    "\n",
    "scale_result = np.apply_along_axis(f, axis=0, arr=X_train)\n",
    "assert np.allclose(X_scaled, scale_result)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 1.2 StandardScaler：sklearn.preprocessing.StandardScaler(copy=True, with_mean=True, with_std=True)\n",
    "\n",
    "可通过fit方法获取某特征的均值与方差，再运用transform方法标准化其他特征\n",
    "\n",
    "#### 优点：\n",
    "1)提升模型的收敛速度\n",
    "\n",
    "2)使得各指标值都处于同一个量纲上，提升模型的精度"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>sepal length (cm)</th>\n",
       "      <th>sepal width (cm)</th>\n",
       "      <th>petal length (cm)</th>\n",
       "      <th>petal width (cm)</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>143</th>\n",
       "      <td>6.8</td>\n",
       "      <td>3.2</td>\n",
       "      <td>5.9</td>\n",
       "      <td>2.3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>115</th>\n",
       "      <td>6.4</td>\n",
       "      <td>3.2</td>\n",
       "      <td>5.3</td>\n",
       "      <td>2.3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>102</th>\n",
       "      <td>7.1</td>\n",
       "      <td>3.0</td>\n",
       "      <td>5.9</td>\n",
       "      <td>2.1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>51</th>\n",
       "      <td>6.4</td>\n",
       "      <td>3.2</td>\n",
       "      <td>4.5</td>\n",
       "      <td>1.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>76</th>\n",
       "      <td>6.8</td>\n",
       "      <td>2.8</td>\n",
       "      <td>4.8</td>\n",
       "      <td>1.4</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)\n",
       "143                6.8               3.2                5.9               2.3\n",
       "115                6.4               3.2                5.3               2.3\n",
       "102                7.1               3.0                5.9               2.1\n",
       "51                 6.4               3.2                4.5               1.5\n",
       "76                 6.8               2.8                4.8               1.4"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "from sklearn.datasets import load_iris \n",
    "\n",
    "dataset = load_iris()\n",
    "np.random.seed(2017)\n",
    "iris = pd.DataFrame(dataset.data, columns=dataset.feature_names).sample(5)\n",
    "iris"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 对某数据框直接调用fit_transform时，等价于单独对每列分别进行scale操作"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[ 0.372678  ,  0.75      ,  1.0932857 ,  0.96958969],\n",
       "       [-1.11803399,  0.75      ,  0.03526728,  0.96958969],\n",
       "       [ 1.49071198, -0.5       ,  1.0932857 ,  0.45927933],\n",
       "       [-1.11803399,  0.75      , -1.37542395, -1.07165176],\n",
       "       [ 0.372678  , -1.75      , -0.84641474, -1.32680694]])"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "scaler = StandardScaler()\n",
    "iris_scaled = scaler.fit_transform(iris)  \n",
    "iris_scaled"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(array([0, 0, 0, 0]), array([1., 1., 1., 1.]))"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "iris_scaled.mean(axis=0, dtype=np.int), iris_scaled.std(axis=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 等价于对每列单独调用scale\n",
    "np.allclose(scaler.fit_transform(iris), scale(iris))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 1.2 数据的归一化：将数据映射到指定的范围，用于去除不同维度数据的量纲以及量纲单位"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##### sklearn.preprocessing.MinMaxScaler(feature_range=(0, 1), copy=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 转换过程\n",
    "X_std = (X - X.min(axis=0)) / (X.max(axis=0) - X.min(axis=0))\n",
    "\n",
    "X_scaled = X_std * (max - min) + min"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "MinMaxScaler(copy=True, feature_range=(0, 1))"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data = [[-1, 2], [-0.5, 6], [0, 10], [1, 18]]\n",
    "scaler = MinMaxScaler()\n",
    "scaler.fit(data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[0.  , 0.  ],\n",
       "       [0.25, 0.25],\n",
       "       [0.5 , 0.5 ],\n",
       "       [1.  , 1.  ]])"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "scaler.transform(data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[0.  , 0.  ],\n",
       "       [0.25, 0.25],\n",
       "       [0.5 , 0.5 ],\n",
       "       [1.  , 1.  ]])"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 或者直接调用fit_transform \n",
    "scaler.fit_transform(data)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 1.3 MaxAbsScaler"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[ 0.5, -1. ,  1. ],\n",
       "       [ 1. ,  0. ,  0. ],\n",
       "       [ 0. ,  1. , -0.5]])"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X_train = np.array([[ 1., -1.,  2.],\n",
    "                    [ 2.,  0.,  0.],\n",
    "                    [ 0.,  1., -1.]])\n",
    "\n",
    "max_abs_scaler = MaxAbsScaler()\n",
    "X_train_maxabs = max_abs_scaler.fit_transform(X_train)\n",
    "X_train_maxabs           "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([2., 1., 2.])"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "max_abs_scaler.scale_    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[-1.5, -1. ,  2. ]])"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X_test = np.array([[ -3., -1.,  4.]])\n",
    "X_test_maxabs = max_abs_scaler.transform(X_test)\n",
    "X_test_maxabs                 "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 1.4 RobustScaler"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 转化过程:(x-median) / IQR, IQR等于75分位点减去25分位点处的值"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[-0.12669367,  0.61018033,  1.22235048],\n",
       "       [-2.02310438, -0.03205827,  0.34615926],\n",
       "       [ 0.12669367, -3.19747007, -0.70069397],\n",
       "       [ 1.2167336 ,  0.03205827, -0.34615926]])"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.random.seed(2018)\n",
    "X_train = np.random.randn(4,3)\n",
    "\n",
    "max_abs_scaler = RobustScaler()\n",
    "X_train_maxabs = max_abs_scaler.fit_transform(X_train)\n",
    "X_train_maxabs               "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([-0.20977884,  0.50624895,  0.34544916])"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 求各列的中位数\n",
    "max_abs_scaler.center_ "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([0.52874591, 0.12390117, 1.47498622])"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 求各列IQR值\n",
    "max_abs_scaler.scale_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 验证max_abs_scaler.scale_返回的是否为IQR值\n",
    "IQR = np.percentile(X_train, 75, axis=0) - np.percentile(X_train, 25, axis=0)\n",
    "np.allclose(max_abs_scaler.scale_ ,IQR)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2 正则化"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 2.1 L1正则化:每行各元素除以每行的L1范数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "L1正则化:\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.25</td>\n",
       "      <td>-0.25</td>\n",
       "      <td>0.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1.00</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.00</td>\n",
       "      <td>0.50</td>\n",
       "      <td>-0.5</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      0     1    2\n",
       "0  0.25 -0.25  0.5\n",
       "1  1.00  0.00  0.0\n",
       "2  0.00  0.50 -0.5"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x = [[1,-1,2],[2, 0,0],[0, 1, -1]]\n",
    "df = pd.DataFrame(x, columns=list('ABC'))\n",
    "\n",
    "x_norm1 = normalize(x, norm='l1')\n",
    "df_norm1 = pd.DataFrame(x_norm1)\n",
    "print('L1正则化:')\n",
    "df_norm1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.25</td>\n",
       "      <td>-0.25</td>\n",
       "      <td>0.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1.00</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.00</td>\n",
       "      <td>0.50</td>\n",
       "      <td>-0.5</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      A     B    C\n",
       "0  0.25 -0.25  0.5\n",
       "1  1.00  0.00  0.0\n",
       "2  0.00  0.50 -0.5"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_norm1 = df.copy()\n",
    "for idx in df.index:\n",
    "    l1_row = sum(abs(df.iloc[idx]))\n",
    "    df_norm1.iloc[idx] = df.iloc[idx] / l1_row\n",
    "    \n",
    "df_norm1  "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 2.2 L2正则化：每行各元素除以每行的L2范数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>-1</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>-1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   A  B  C\n",
       "0  1 -1  2\n",
       "1  2  0  0\n",
       "2  0  1 -1"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x = [[1,-1,2],[2, 0,0],[0, 1, -1]]\n",
    "df = pd.DataFrame(x, columns=list('ABC'))\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.408248</td>\n",
       "      <td>-0.408248</td>\n",
       "      <td>0.816497</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.707107</td>\n",
       "      <td>-0.707107</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          0         1         2\n",
       "0  0.408248 -0.408248  0.816497\n",
       "1  1.000000  0.000000  0.000000\n",
       "2  0.000000  0.707107 -0.707107"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x_norm2 = normalize(x, norm='l2')\n",
    "df_norm2 = pd.DataFrame(x_norm2)\n",
    "df_norm2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.408248</td>\n",
       "      <td>-0.408248</td>\n",
       "      <td>0.816497</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.707107</td>\n",
       "      <td>-0.707107</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          A         B         C\n",
       "0  0.408248 -0.408248  0.816497\n",
       "1  1.000000  0.000000  0.000000\n",
       "2  0.000000  0.707107 -0.707107"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_norm2 = df.copy()\n",
    "for idx in df.index:\n",
    "    l2_row = np.sqrt(sum(np.square(df.iloc[idx])))\n",
    "    df_norm2.iloc[idx] = df.iloc[idx] / l2_row\n",
    "\n",
    "df_norm2     "
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}