{
 "metadata": {
  "name": "",
  "signature": "sha256:1c6173a6f0cec1737eb8ab133f3b3570ba765d9d236a56e865e46f9c36df1cea"
 },
 "nbformat": 3,
 "nbformat_minor": 0,
 "worksheets": [
  {
   "cells": [
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "# Binning Data In Pandas\n",
      "\n",
      "- **Author:** [Chris Albon](http://www.chrisalbon.com/), [@ChrisAlbon](https://twitter.com/chrisalbon)\n",
      "- **Date:** -\n",
      "- **Repo:** [Python 3 code snippets for data science](https://github.com/chrisalbon/code_py)\n",
      "- **Note:**"
     ]
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "### import modules"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "import pandas as pd"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 1
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "### Create dataframe"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "raw_data = {'regiment': ['Nighthawks', 'Nighthawks', 'Nighthawks', 'Nighthawks', 'Dragoons', 'Dragoons', 'Dragoons', 'Dragoons', 'Scouts', 'Scouts', 'Scouts', 'Scouts'], \n",
      "        'company': ['1st', '1st', '2nd', '2nd', '1st', '1st', '2nd', '2nd','1st', '1st', '2nd', '2nd'], \n",
      "        'name': ['Miller', 'Jacobson', 'Ali', 'Milner', 'Cooze', 'Jacon', 'Ryaner', 'Sone', 'Sloan', 'Piger', 'Riani', 'Ali'], \n",
      "        'preTestScore': [4, 24, 31, 2, 3, 4, 24, 31, 2, 3, 2, 3],\n",
      "        'postTestScore': [25, 94, 57, 62, 70, 25, 94, 57, 62, 70, 62, 70]}\n",
      "df = pd.DataFrame(raw_data, columns = ['regiment', 'company', 'name', 'preTestScore', 'postTestScore'])\n",
      "df"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "html": [
        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
        "      <th>regiment</th>\n",
        "      <th>company</th>\n",
        "      <th>name</th>\n",
        "      <th>preTestScore</th>\n",
        "      <th>postTestScore</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0 </th>\n",
        "      <td> Nighthawks</td>\n",
        "      <td> 1st</td>\n",
        "      <td>   Miller</td>\n",
        "      <td>  4</td>\n",
        "      <td> 25</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1 </th>\n",
        "      <td> Nighthawks</td>\n",
        "      <td> 1st</td>\n",
        "      <td> Jacobson</td>\n",
        "      <td> 24</td>\n",
        "      <td> 94</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2 </th>\n",
        "      <td> Nighthawks</td>\n",
        "      <td> 2nd</td>\n",
        "      <td>      Ali</td>\n",
        "      <td> 31</td>\n",
        "      <td> 57</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3 </th>\n",
        "      <td> Nighthawks</td>\n",
        "      <td> 2nd</td>\n",
        "      <td>   Milner</td>\n",
        "      <td>  2</td>\n",
        "      <td> 62</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4 </th>\n",
        "      <td>   Dragoons</td>\n",
        "      <td> 1st</td>\n",
        "      <td>    Cooze</td>\n",
        "      <td>  3</td>\n",
        "      <td> 70</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>5 </th>\n",
        "      <td>   Dragoons</td>\n",
        "      <td> 1st</td>\n",
        "      <td>    Jacon</td>\n",
        "      <td>  4</td>\n",
        "      <td> 25</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>6 </th>\n",
        "      <td>   Dragoons</td>\n",
        "      <td> 2nd</td>\n",
        "      <td>   Ryaner</td>\n",
        "      <td> 24</td>\n",
        "      <td> 94</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>7 </th>\n",
        "      <td>   Dragoons</td>\n",
        "      <td> 2nd</td>\n",
        "      <td>     Sone</td>\n",
        "      <td> 31</td>\n",
        "      <td> 57</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>8 </th>\n",
        "      <td>     Scouts</td>\n",
        "      <td> 1st</td>\n",
        "      <td>    Sloan</td>\n",
        "      <td>  2</td>\n",
        "      <td> 62</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>9 </th>\n",
        "      <td>     Scouts</td>\n",
        "      <td> 1st</td>\n",
        "      <td>    Piger</td>\n",
        "      <td>  3</td>\n",
        "      <td> 70</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>10</th>\n",
        "      <td>     Scouts</td>\n",
        "      <td> 2nd</td>\n",
        "      <td>    Riani</td>\n",
        "      <td>  2</td>\n",
        "      <td> 62</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>11</th>\n",
        "      <td>     Scouts</td>\n",
        "      <td> 2nd</td>\n",
        "      <td>      Ali</td>\n",
        "      <td>  3</td>\n",
        "      <td> 70</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "<p>12 rows \u00d7 5 columns</p>\n",
        "</div>"
       ],
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 2,
       "text": [
        "      regiment company      name  preTestScore  postTestScore\n",
        "0   Nighthawks     1st    Miller             4             25\n",
        "1   Nighthawks     1st  Jacobson            24             94\n",
        "2   Nighthawks     2nd       Ali            31             57\n",
        "3   Nighthawks     2nd    Milner             2             62\n",
        "4     Dragoons     1st     Cooze             3             70\n",
        "5     Dragoons     1st     Jacon             4             25\n",
        "6     Dragoons     2nd    Ryaner            24             94\n",
        "7     Dragoons     2nd      Sone            31             57\n",
        "8       Scouts     1st     Sloan             2             62\n",
        "9       Scouts     1st     Piger             3             70\n",
        "10      Scouts     2nd     Riani             2             62\n",
        "11      Scouts     2nd       Ali             3             70\n",
        "\n",
        "[12 rows x 5 columns]"
       ]
      }
     ],
     "prompt_number": 2
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "### Define bins as 0 to 25, 25 to 50, 60 to 75, 75 to 100"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "bins = [0, 25, 50, 75, 100]"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 22
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "### Create names for the four groups"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "group_names = ['Low', 'Okay', 'Good', 'Great']"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 23
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "### Cut postTestScore"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "categories = pd.cut(df['postTestScore'], bins, labels=group_names)\n",
      "df['categories'] = pd.cut(df['postTestScore'], bins, labels=group_names)\n",
      "categories"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 27,
       "text": [
        "   Low\n",
        " Great\n",
        "  Good\n",
        "  Good\n",
        "  Good\n",
        "   Low\n",
        " Great\n",
        "  Good\n",
        "  Good\n",
        "  Good\n",
        "  Good\n",
        "  Good\n",
        "Name: postTestScore, Levels (4): Index(['Low', 'Okay', 'Good', 'Great'], dtype=object)"
       ]
      }
     ],
     "prompt_number": 27
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "### Count the number of observations which each value"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "pd.value_counts(df['categories'])"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "### View the dataframe"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "df"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "html": [
        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
        "      <th>regiment</th>\n",
        "      <th>company</th>\n",
        "      <th>name</th>\n",
        "      <th>preTestScore</th>\n",
        "      <th>postTestScore</th>\n",
        "      <th>scoresBinned</th>\n",
        "      <th>categories</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0 </th>\n",
        "      <td> Nighthawks</td>\n",
        "      <td> 1st</td>\n",
        "      <td>   Miller</td>\n",
        "      <td>  4</td>\n",
        "      <td> 25</td>\n",
        "      <td>   (0, 25]</td>\n",
        "      <td>   Low</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1 </th>\n",
        "      <td> Nighthawks</td>\n",
        "      <td> 1st</td>\n",
        "      <td> Jacobson</td>\n",
        "      <td> 24</td>\n",
        "      <td> 94</td>\n",
        "      <td> (75, 100]</td>\n",
        "      <td> Great</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2 </th>\n",
        "      <td> Nighthawks</td>\n",
        "      <td> 2nd</td>\n",
        "      <td>      Ali</td>\n",
        "      <td> 31</td>\n",
        "      <td> 57</td>\n",
        "      <td>  (50, 75]</td>\n",
        "      <td>  Good</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3 </th>\n",
        "      <td> Nighthawks</td>\n",
        "      <td> 2nd</td>\n",
        "      <td>   Milner</td>\n",
        "      <td>  2</td>\n",
        "      <td> 62</td>\n",
        "      <td>  (50, 75]</td>\n",
        "      <td>  Good</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4 </th>\n",
        "      <td>   Dragoons</td>\n",
        "      <td> 1st</td>\n",
        "      <td>    Cooze</td>\n",
        "      <td>  3</td>\n",
        "      <td> 70</td>\n",
        "      <td>  (50, 75]</td>\n",
        "      <td>  Good</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>5 </th>\n",
        "      <td>   Dragoons</td>\n",
        "      <td> 1st</td>\n",
        "      <td>    Jacon</td>\n",
        "      <td>  4</td>\n",
        "      <td> 25</td>\n",
        "      <td>   (0, 25]</td>\n",
        "      <td>   Low</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>6 </th>\n",
        "      <td>   Dragoons</td>\n",
        "      <td> 2nd</td>\n",
        "      <td>   Ryaner</td>\n",
        "      <td> 24</td>\n",
        "      <td> 94</td>\n",
        "      <td> (75, 100]</td>\n",
        "      <td> Great</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>7 </th>\n",
        "      <td>   Dragoons</td>\n",
        "      <td> 2nd</td>\n",
        "      <td>     Sone</td>\n",
        "      <td> 31</td>\n",
        "      <td> 57</td>\n",
        "      <td>  (50, 75]</td>\n",
        "      <td>  Good</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>8 </th>\n",
        "      <td>     Scouts</td>\n",
        "      <td> 1st</td>\n",
        "      <td>    Sloan</td>\n",
        "      <td>  2</td>\n",
        "      <td> 62</td>\n",
        "      <td>  (50, 75]</td>\n",
        "      <td>  Good</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>9 </th>\n",
        "      <td>     Scouts</td>\n",
        "      <td> 1st</td>\n",
        "      <td>    Piger</td>\n",
        "      <td>  3</td>\n",
        "      <td> 70</td>\n",
        "      <td>  (50, 75]</td>\n",
        "      <td>  Good</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>10</th>\n",
        "      <td>     Scouts</td>\n",
        "      <td> 2nd</td>\n",
        "      <td>    Riani</td>\n",
        "      <td>  2</td>\n",
        "      <td> 62</td>\n",
        "      <td>  (50, 75]</td>\n",
        "      <td>  Good</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>11</th>\n",
        "      <td>     Scouts</td>\n",
        "      <td> 2nd</td>\n",
        "      <td>      Ali</td>\n",
        "      <td>  3</td>\n",
        "      <td> 70</td>\n",
        "      <td>  (50, 75]</td>\n",
        "      <td>  Good</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "<p>12 rows \u00d7 7 columns</p>\n",
        "</div>"
       ],
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 40,
       "text": [
        "      regiment company      name  preTestScore  postTestScore scoresBinned  \\\n",
        "0   Nighthawks     1st    Miller             4             25      (0, 25]   \n",
        "1   Nighthawks     1st  Jacobson            24             94    (75, 100]   \n",
        "2   Nighthawks     2nd       Ali            31             57     (50, 75]   \n",
        "3   Nighthawks     2nd    Milner             2             62     (50, 75]   \n",
        "4     Dragoons     1st     Cooze             3             70     (50, 75]   \n",
        "5     Dragoons     1st     Jacon             4             25      (0, 25]   \n",
        "6     Dragoons     2nd    Ryaner            24             94    (75, 100]   \n",
        "7     Dragoons     2nd      Sone            31             57     (50, 75]   \n",
        "8       Scouts     1st     Sloan             2             62     (50, 75]   \n",
        "9       Scouts     1st     Piger             3             70     (50, 75]   \n",
        "10      Scouts     2nd     Riani             2             62     (50, 75]   \n",
        "11      Scouts     2nd       Ali             3             70     (50, 75]   \n",
        "\n",
        "   categories  \n",
        "0         Low  \n",
        "1       Great  \n",
        "2        Good  \n",
        "3        Good  \n",
        "4        Good  \n",
        "5         Low  \n",
        "6       Great  \n",
        "7        Good  \n",
        "8        Good  \n",
        "9        Good  \n",
        "10       Good  \n",
        "11       Good  \n",
        "\n",
        "[12 rows x 7 columns]"
       ]
      }
     ],
     "prompt_number": 40
    }
   ],
   "metadata": {}
  }
 ]
}