{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# SF Salaries\n",
"In this notebook i explore San Francisco city employee salary data, which can be found on kaggle: \n",
"https://www.kaggle.com/kaggle/sf-salaries"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Importing libraries"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
""
],
"text/vnd.plotly.v1+html": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
""
],
"text/vnd.plotly.v1+html": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import numpy as np\n",
"\n",
"import pandas as pd\n",
"\n",
"import seaborn as sns\n",
"sns.set_style(\"whitegrid\")\n",
"\n",
"import matplotlib.pyplot as plt\n",
"%matplotlib inline\n",
"plt.rcParams['patch.force_edgecolor']=True\n",
"\n",
"from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot\n",
"import cufflinks as cf\n",
"init_notebook_mode(connected=True)\n",
"cf.go_offline()\n",
"import plotly.graph_objs as go"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Getting data"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Doing the imports"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\gilew\\Anaconda3\\lib\\site-packages\\IPython\\core\\interactiveshell.py:2698: DtypeWarning:\n",
"\n",
"Columns (3,4,5,6,12) have mixed types. Specify dtype option on import or set low_memory=False.\n",
"\n"
]
}
],
"source": [
"salaries = pd.read_csv('Salaries.csv')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Because of the DtypeWarning, I have to convert the types of the following columns: 3,4,5,6. I will take care of column 12 later."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Converting types"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"for column in ['BasePay', 'OvertimePay', 'OtherPay', 'Benefits']:\n",
" salaries[column]=pd.to_numeric(salaries[column], errors='coerce')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Checking the head of the data"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Id | \n",
" EmployeeName | \n",
" JobTitle | \n",
" BasePay | \n",
" OvertimePay | \n",
" OtherPay | \n",
" Benefits | \n",
" TotalPay | \n",
" TotalPayBenefits | \n",
" Year | \n",
" Notes | \n",
" Agency | \n",
" Status | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 1 | \n",
" NATHANIEL FORD | \n",
" GENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY | \n",
" 167411.18 | \n",
" 0.00 | \n",
" 400184.25 | \n",
" NaN | \n",
" 567595.43 | \n",
" 567595.43 | \n",
" 2011 | \n",
" NaN | \n",
" San Francisco | \n",
" NaN | \n",
"
\n",
" \n",
" | 1 | \n",
" 2 | \n",
" GARY JIMENEZ | \n",
" CAPTAIN III (POLICE DEPARTMENT) | \n",
" 155966.02 | \n",
" 245131.88 | \n",
" 137811.38 | \n",
" NaN | \n",
" 538909.28 | \n",
" 538909.28 | \n",
" 2011 | \n",
" NaN | \n",
" San Francisco | \n",
" NaN | \n",
"
\n",
" \n",
" | 2 | \n",
" 3 | \n",
" ALBERT PARDINI | \n",
" CAPTAIN III (POLICE DEPARTMENT) | \n",
" 212739.13 | \n",
" 106088.18 | \n",
" 16452.60 | \n",
" NaN | \n",
" 335279.91 | \n",
" 335279.91 | \n",
" 2011 | \n",
" NaN | \n",
" San Francisco | \n",
" NaN | \n",
"
\n",
" \n",
" | 3 | \n",
" 4 | \n",
" CHRISTOPHER CHONG | \n",
" WIRE ROPE CABLE MAINTENANCE MECHANIC | \n",
" 77916.00 | \n",
" 56120.71 | \n",
" 198306.90 | \n",
" NaN | \n",
" 332343.61 | \n",
" 332343.61 | \n",
" 2011 | \n",
" NaN | \n",
" San Francisco | \n",
" NaN | \n",
"
\n",
" \n",
" | 4 | \n",
" 5 | \n",
" PATRICK GARDNER | \n",
" DEPUTY CHIEF OF DEPARTMENT,(FIRE DEPARTMENT) | \n",
" 134401.60 | \n",
" 9737.00 | \n",
" 182234.59 | \n",
" NaN | \n",
" 326373.19 | \n",
" 326373.19 | \n",
" 2011 | \n",
" NaN | \n",
" San Francisco | \n",
" NaN | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Id EmployeeName JobTitle \\\n",
"0 1 NATHANIEL FORD GENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY \n",
"1 2 GARY JIMENEZ CAPTAIN III (POLICE DEPARTMENT) \n",
"2 3 ALBERT PARDINI CAPTAIN III (POLICE DEPARTMENT) \n",
"3 4 CHRISTOPHER CHONG WIRE ROPE CABLE MAINTENANCE MECHANIC \n",
"4 5 PATRICK GARDNER DEPUTY CHIEF OF DEPARTMENT,(FIRE DEPARTMENT) \n",
"\n",
" BasePay OvertimePay OtherPay Benefits TotalPay TotalPayBenefits \\\n",
"0 167411.18 0.00 400184.25 NaN 567595.43 567595.43 \n",
"1 155966.02 245131.88 137811.38 NaN 538909.28 538909.28 \n",
"2 212739.13 106088.18 16452.60 NaN 335279.91 335279.91 \n",
"3 77916.00 56120.71 198306.90 NaN 332343.61 332343.61 \n",
"4 134401.60 9737.00 182234.59 NaN 326373.19 326373.19 \n",
"\n",
" Year Notes Agency Status \n",
"0 2011 NaN San Francisco NaN \n",
"1 2011 NaN San Francisco NaN \n",
"2 2011 NaN San Francisco NaN \n",
"3 2011 NaN San Francisco NaN \n",
"4 2011 NaN San Francisco NaN "
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"salaries.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Cleaning the data"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"I create a heatmap, to see the distribution of Nans in our data."
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
""
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAV0AAAE3CAYAAAAJ55KLAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAHlFJREFUeJzt3XucnVV59vHfBIiJHCpgFBAFRLxF\nUCiCRUSQ6uuJ1sOLchCUg0pFKeCpKAKtFYpYtaJoEUmEcJSDCgoFDwgipQiIYN7qJXKqJoJEoYgJ\nISHz/nGvnezEDBiynrWzJ9f385nP7JnJPOvJ7JlrP8863GtkdHQUMzNrY8KgT8DMbFXi0DUza8ih\na2bWkEPXzKwhh66ZWUMOXTOzhlZ/nK+Prj7xGZ2ewIJHZtJ1G25n5W2j18782Xd03s4aT322n5sn\n0M54e24a/X9Gxvqar3TNzBpy6JqZNeTQNTNryKFrZtaQQ9fMrCGHrplZQw5dM7OGHLpmZg05dM3M\nGnLompk15NA1M2vIoWtm1pBD18ysIYeumVlDDl0zs4YcumZmDTl0zcwacuiamTXk0DUza8iha2bW\nkEPXzKwhh66ZWUMOXTOzhhy6ZmYNOXTNzBpy6JqZNeTQNTNryKFrZtaQQ9fMrCGHrplZQw5dM7OG\nHLpmZg05dM3MGnLompk15NA1M2vIoWtm1pBD18ysIYeumVlDDl0zs4YcumZmDTl0zcwacuiamTXk\n0DUza8iha2bWkEPXzKwhh66ZWUMOXTOzhhy6ZmYNOXTNzBpy6JqZNeTQNTNraPVBn4AZwOSNXtZ5\nGwsemdl5G2aPx1e6ZmYNOXTNzBpy6JqZNeTQNTNryKFrZtaQQ9fMrCGHrplZQw5dM7OGHLpmZg05\ndM3MGnLompk15NA1M2vIoWtm1pBD18ysIYeumVlDDl0zs4YcumZmDTl0zcwacuiamTXk0DUza8ih\na2bWkEPXzKwhh66ZWUMOXTOzhhy6ZmYNOXTNzBpy6JqZNeTQNTNryKFrZtaQQ9fMrCGHrplZQw5d\nM7OGHLpmZg05dM3MGnLompk15NA1M2vIoWtm1pBD18ysIYeumVlDDl0zs4YcumZmDTl0zcwacuia\nmTXk0DUza8iha2bWkEPXzKwhh66ZWUMOXTOzhhy6ZmYNOXTNzBpy6JqZNeTQNTNryKFrZtaQQ9fM\nrCGHrplZQw5dM7OGHLpmZg05dM3MGnLompk15NA1M2vIoWtm1pBD18ysIYeumVlDDl0zs4YcumZm\nDTl0zcwacuiamTXk0DUza8iha2bWkEPXzKwhh66ZWUMOXTOzhhy6ZmYNOXTNzBpy6JqZNeTQNTNr\nyKFrZtaQQ9fMrCGHrplZQw5dM7OGHLpmZg05dM3MGnLompk15NA1M2vIoWtm1pBD18ysIYeumVlD\nDl0zs4YcumZmDTl0zcwacuiamTXk0DUza8iha2bWkEPXzKwhh66ZWUMOXTOzhhy6ZmYNOXTNzBpy\n6JqZNeTQNTNryKFrZtaQQ9fMrKHVB30C49HcWdcM+hTMbCXl0O3A5I1e1nkbCx6Z2XkbZlafuxfM\nzBpy6JqZNeTQNTNryKFrZtaQB9LMbJUy6IFuX+mamTXk0DUza8iha2bWkEPXzKwhh66ZWUMOXTOz\nhhy6ZmYNOXTNzBpy6JqZNeTQNTNryKFrZtaQQ9fMrCGHrplZQw5dM7OGHLpmZg05dM3MGnLompk1\n5NA1M2vIoWtm1pBD18ysIYeumVlDDl0zs4YcumZmDTl0zcwacuiamTXk0DUza8iha2bWkEPXzKwh\nh66ZWUMOXTOzhhy6ZmYNOXTNzBpy6JqZNeTQNTNryKFrZtaQQ9fMrCGHrplZQw5dM7OGHLpmZg05\ndM3MGnLompk15NA1M2vIoWtm1pBD18ysIYeumVlDDl0zs4YcumZmDTl0zcwacuiamTXk0DUza8ih\na2bWkEPXzKwhh66ZWUMOXTOzhhy6ZmYNjYyOjg76HMzMVhm+0jUza8iha2bWkEPXzKwhh67ZOBYR\nfzHoc7AlOXStiohYY9DnYMt06aBPoKaIePqgz2FFrdDshYg4dqyvSfrnJ3zgsdtbGzgS2JD8ZbpV\n0i8rt7EacADwLOD7wAxJs2u20dfWjcBZwHRJv++ojc8DUyX9pIvj97VzK3AlcJqkGV221bWIOBw4\ns6vnpK+dpwPrAfOBDwFfkHRr5TYuAb4HCFgIIOnbNdtoKSJ+CNwHTAUuk7Sww7YmACPATsD1kh6p\ncdzVV/D77y3v3wjcCVwL7EAGVhemAf8B7Er+0KeWxzV9CZgF/B/gRmA68LrKbfS8Engr8M2I+BUZ\nWN+t3MalwFERsTFwJnC2pAcrtwGwLfAa4B8jYgr5YnKepIdqNhIRuwBPJu/SPg8cI+mcmm0AjwKX\nRsTd5AvWdyofv+dM4BPAIcDFwGeBv67cxu/I52bb8vEo0EnotnhuJO0cEVsCBwFHR8T3yOfojprt\nRMSJwB3AJsB2ZNbtX+PYK9S9IOlLkr4ETJD0HklnSzoCWLvGyS3D+pKmAfMl/Sf5KlTb5pKOBeZK\n+ibQWZ+YpAckfRF4J/mHfk5EXB8Ru1ds43JJewJvAF4G/CYiTo+ITWq1UdpZSL4gTiP/0P8euCIi\nDq7ZDvBJ4DbgMOClwLsrHx9JJ0t6CXACcGBE3BkRR0fEOpWbWgO4ClhX0lms+EXQn5B0YP8b8NHa\nbfTp/LkpZpGBOAfYGjgpImrfWe9csu0lkl4DbFzrwLWe5PUjYnNJt0fE84Dav5yLlONTrtwe7aCJ\n1SPiqaWNtSm3ZF2IiPcAbwceBE4juzXWAP6LSn1x5argAOBvyT/wncnn/SJg+xptlHY+SQb71cCJ\nkn5Ubs9uAk6t1Q4wl7zqWCDpnoh4UsVjA4ue973IK5s5wD+QP7OLgd0qNjUR+FfgmnKVWL1fPCI+\nBryntPVk4BfAVrXbKVo8N+eTQXsWsJ+kWeXzNwJjdnc+AatFxIuBuyJiIjCl1oFrhe7h5FXaM4B7\ngP0qHXdphwFfAbYELiR/mWo7muwm2ZAMvyM6aKPnGcA+ku7s+9z8iPi7im2cRobeP0ma2/tkRHyl\nYhuQVzgv6u9OkLQwIt5UuZ0/AN8FvhgR7wX+p/LxAW4GzgXe3v/cRMQ2lds5CHg1+fy8EXhb5eMD\nvJa8Svs34DPAFztoo+dBun9uvgxcJ+mhiNiw7/M7V25nOtlFchB5BX9SrQOv6EDadWQfESx5qz8q\naacVObFBK/2SsyV1tk46ItYHXkVe4YwAG0k6oYN2Nlyqjes6aGML4M1LtVPzxaPXztrAMyX9d0Rs\nTT5H91RuYzVJj/Z9PEXSfTXbKMc9SdLhfR9Pk3RQ5Tb+Q9JrI+JMSW+LiB9Kqh1QvbaeRHbP9Z6b\n2yTNq9zGPwLrSPpARFwA3CTpEzXb6NqKXunuXeUs/kwRcTz5yrMoCCVtVOnY/S8g/Z+nwxeQC8jb\nvReSt2ZzajcQEVOBlwBrkreXtwM71m4HOAP4JnnFMQtYq+bBI2IDsttqOvC2iHgu2b10CfDimm0B\nH42IQ8kXkDXJn9mWtQ5eupU+AkwpdwIj5a3qTJzi1xFxEPDHiDiBDrv+gKeSg7ZTyDvRNYHrK7fx\nekkvApD0loi4lhyMrCoi7mTJPHhQ0rZj/fvlsUKhK+nuGiexHHYHNq396ln0XkAmAv1TQ9broK1F\nJL07IqaRg2k/6KCJLck+vC8BR5F/DF2YI+mEiNhC0kERcU3l4+9IdmMFi/uIFwJXVG4H8vfsWcCn\ngc+Vt2okfTEiTiWfj+PLpxd2dFf1d2T3wgVk336XF0qnkj+zY8jf5TOo/wK/MCImSnqkzA3vaq3B\n88r7EeBFwFtqHXjYFkfcDEzq6NjzgCeR03gmlseTybDqTERMIq8IRql8dVj8ofwxr1nmG0/soA2A\nkXI1ulZErEnlFytJ35C0G7CvpN3K2yskHVOzneJ+SQ+Tt7Ei7xCqiYjXSloAzCQH6/YnZ0lU7Voo\n1gQOJqejzWTJC4raJkm6kuxeFPBwB22cAsyIiIuAn5SPq5M0r7w9LOlactpYFdWnqHRsBjnl6R7y\nFWhU0rMrHbvllVTPF8iBum8DvwJ+2EEbN0XEB4FZEXEesFoHbQB8DHgTOap8J9kNUE1EHC3pOLJr\nYYmBWklvrdkWMDMiDgDmRsTHgadUPv5zyvtNKx93WVrMbe+ZFxGvJkf+d6SD0JU0tSz4eDZwe4cL\nl05gcffChlScxTRsobsXsBnwQO0DS/oG8I2IeJ2ky2off4w2L+o9jogLuli0IOmoMvg0lxzJ/lHt\nNsr81Rsl9bpHnla7DXJkHPKWde5j/cMKDia7F74GvINcwFLTW8iR8XUlHVr52EtbX9K0iNhP0n9G\nRBdz23sOBj5F9u1+kA7m6UbEtqWdSeVjag8+Fj/ve3wL+cJVxbCF7t3AH7vo0215JRURbyGn78wh\n5xreUDtwy/zc48gpVkdK+gM50FVVGXD6ALAgIg6V1NWdwX5lUPDD5GrB6uEREU8D3g88BHxG0hxy\nqlVtD5eB2+dFxAv7vyBpl9qNNZjb3vMaSYv6jCPiMCr3hwOnAyeTd4Zd2qH/BTEippNz6lfYsIXu\nM4HbI6K35K/m1LRLyvtO+oiWcgQ5Y2Fdsq/t9R20cQq5omo9cp5hlSWMy/BWsktmHbI/vKvQ/Q7Z\nh7cxWUegF7qj5K1mDdOBb5HPy4nkqrouvI78f3yJXALcxQvICyT9lOwy63Rue0TsQ/4O7xYRvWXM\nE4AXUD9075F0WuVjLlLmFx8NrBsR/7d8egT471ptDFvo7tXhsT8K7CXp6g7b6Jkn6X7g/jLo1IVH\nJV0O0NEATc/DykIgs8vKnU5I+ig5lesYSR/vqJmJkk4GiIjaNTAWKYNod0XE68iFRJuQxZX+X8Vm\nLoiIUyR9lpwy2KXLgd8A67N44HkhOdWutrsi4sPkoPoo1C3gI+kLwBci4ihJ/1LruP2GLXTXIPvD\nFk3AJ6fE1FBtmd9y6rKPrafVLJUW/5evRMRZ5PN1IVlprtZc0P4pWy1+Zv8OzCaL3NxC3inUqrvx\nIuBTEXEFsH/tBST9ygXEVcBV5YVkK+AXkmpPG4ScVRTlDbor4HNKuYKvvnBp2EJ3Ot1NwN88Ipb5\nyibpqIrt9Lc1snS7FdtaPyJeVdpYrzzutVHzl3SriDintNN73Gun9gAU5JVUV3NBnxwRm5GBO7k8\nHgFQ5SpWxRaSDo6I70u6OCL+odaBJf0ROKTUdLg2Iq7v+1oXz0tvxP+5wDXA/hGxi6QP1GxD0oFl\nYczmwE/JHOjChXS0cGnYQrfLCfhzyL7CFo4d43FNPwb2KY9v7ntc+8pgz/J+Ndr0h0+SdGUZ+FRE\n1JyW9CiLp7ot6Hs8ClQf4ALWiIj1gNHSzVS1uFIZQDuBvAqtOoVvDLtIemlp+ySydklVZeD2TeRY\nxenAFkAnM0C6Wrg0bKHb5QT8eySdUfF4Y+pvJyJeQQ4EXU++stZq48C+NtYh+w3vKFdA1fT6wCPi\n25Je9Xj/voLO5oJ2VZPgMRxLBtPTyal81a4KI+JIcsrWoZJa7R6xRkRMUJb5HGEZy+or2JssUXql\npJMi4oYO2gC6W7g0bKHb5QT8myoe689SuhU2JkeWHyHX4+/zmN+0/G3sQY7Grg6cHxGjZWpcbQ9E\nxBtYcoeCai8ifZaeC3pIrQOXO6dlBkUXU7nKFXsAGwD3qu4uCNsD20v6XcVjPp7zyK6M/wL+qnxc\nW6+vvfc8dVESADpcuDRUoVsm33cyAV/SB2FRpaR3kR31M8jdHLqa27izpF1Kn94ZEVEtQPq8n+zz\nvJyct3tjeV/bFJYsgzlK/V0QkPRruqsfcEBHx12mcpdzOEtO9K9ytyCpWq2AxxMRvfmrs4Gzyf/P\nOSxe0FLTuWQGbBIRlwHf6KANgLt7i5ciq5n9Za0DD0XoRsT3WfYVyKikV1Ru7gxyEcZ3yduYaXQ3\nx3X1cgszGrk3WxfhvlDSvHKFOxoRVbsXeiTtFrnzbK8bo+o2PT0RcRRZVHwOi5eCV6k0J+n20sZm\nLFWmEnhvjTaW8llyb7SuJ/p3bekKbCPAgeRzVLUvWdLny3S+rYGfl7nI1UTEy4DnA++LiM+UT69G\nPv9b12hjKEKXP11OuA1ZVLj23lgAG/Stqrk4Irqct/tvZLfGFLJPt4vVT9dExLnAxhFxCtBJH1jD\nbow9yek71ctg9jmLvDPYhdwJYXJH7fyqN5d6mEn6SO9xRDyHHOD6Fh1sAFAGtnpeGxHzyRetL5Sp\nayvqfrK750nl/Xrk9lPVZpYMReiWikWUdeMfJpfj7V1zIUPfxP47I2IHSTeUJZpd9EsCIOmC8qr9\nHPLqsHr/W6m98BpyNsPPJH2rdhtFq26Mu+i+9sJcSR+PUlS88iyZfvdGxMksOdF/2mN/y8qrrOY6\nAnhfh79nk8lFF9eQv287AL8l71BrrOycSI4b7Ur2i/87WevllgrHBoYkdGHRzgTTgVvJddG1b19F\n/uKPAC+PiF6pxy7K0wF/snvqeWW1VdWr93Kr/Fzy//X8iHi+pE/WbKNo0o1B/lH8NCJ+yuKgqj3v\ndCSyEPdaETGZ7moq/7q837Sj4zcRuU3XV4DfAy+udMU5limSeoPNV5RZM8dERK0pXceT2zT9T0Rc\nThaJ+iVZ8OaSx/zOP9NQhG6Zm/f+8nZZ+dxEAFXai17SZn3tjZC3/LMrjygv7ZPAvuRI6UuB86nf\nZXIxWS2ryz8EaNSNQdZE6Npx5JLzc8l9vs7uopESFi9n8ZTBLnaOaGEGOfvmSnIJ7aIvdPCCuE5E\nPE/SzyOLOq0Vue1VrSldEyTdGhEbkTWofwwQEatcacfe/MXe5npdFDsBoPwRTAP+lyx68S5J36nZ\nRp/Od08l+w3/qYPjLqFhN8aPgSPJGqeXknc+tU3q1WAAvl76q6uLrNW7OXknMkruJLFvF2117I0N\n2zoUODty37+5ZP/xXizegWNF9aakvYYcTO/NaFq70vGHI3T7r0IbOI6cyjWr3DZ9jaxw1YUWu6d+\nMyI+QV+VJEnVVydFxKbk6O5kYLuI2E7SP9duhw6LckfE7mQ/4X4RcWb59ARgD3LL+tp27Zsy+JWo\nuwt0M42KRPXa+lGZWnkouanr0ysXQPpu5L5rzwReHxGbk/26X63VwFCEbk9EbEUuNX0Kecs3o4Mr\nqkclzQKQNLPyMtOl7cmSu6d+uYM29gZ+xuJpPV3tbnwuOYjWWWGVosui3DPIEet55LRByIUe+435\nHStm9XIV1eWUwXGhdCfuQ07dmkeWEt1MUtVBVUknRu5M8VtJv+uFrqSv12pjqEKXrM15IBlOU8kr\nntqh+2BE/D05AXsXcnCgK88E3hARb6Z+1bSeeZK6WHSxtDmSPtagnc6Kcis3Wp0aWbB6c/KF6jZJ\nM2q1sZTPkbM8ngZcRxbntmW7i3xh31fSbZFby3cyi0XSz/oe307lEpXDFrpI+mUZIb8vIv5Q67iR\nlYsguxc+SPbp/IYM+a50WTWt5+6I+AjZF1q9/mjfz+3eyFJ4/e10Md3uMHKkfFuyElQXLyjvIp/3\n64GjI+IsSdXnUEs6r0wZ3AL4paT7arcxjpxEFszfNCJOo00Z0U4MW+j+vvR7rRkRe1N3r7Rl7fr7\nEnKPrE9XbKdf19uWQ66qem55g/pVxvp/bgf3Pa66DLiE+6fJmhsfBr5OhtXW5DzXmt4G7CRpfrmt\nvZaKC1ci4slk5aq7yGA/liwW8yFJtf8v44KkE4ETI2JX8me3Q0ScCJzZ4Z1IJ4YtdN9BjvDOJicu\nv6PWgZXbey8hIiaQZfG6Ct1e1bS1o3LVtIhYXblDQaeDM72fW0T8TX//ekTsOfZ3PSHTyIJH65Fd\nStsB95H9yGc+xvc9ESOS5kNOSYyI2tuWn06+eGxFFu/5FLmq6nPk0nMbQxm0uzoinkK+OJ5JxboI\nLQxV6Ep6sNyO3U5eIXS2FLQMbOxMd0tAIUPkjeQvzh3UDY/p5O1Yb9EHLC63V22aXUT8DTnHeJ+I\n6O1Xtxq5Ouj8Wu2Q0+q+U9o8XNJt5XEXNR6ui9yu/hryd6DWzhQ9G0raEyAibpV0anlcteD3eCbp\nAXJH5c8P+lyW11CFbjQohdhnErkx4WG1D7zUrfKF5K0yVJxz2jcpfU9JixYqlHnINd1C7o21Iblt\n9QSyAPi5ldvpn5zeP6Ok2rY6EfFVSXtJel9kmcotgfMkXVyrjWJ+3+PZfY9bbatkAzRUoUubUojA\nou1O3tzR4ftvlS9lyVvlKnNoI2Jn8va1v1rSBHJ+Y5VqScX95M/pFnLwcQtyLXztF8NlbQs0QlaE\nqmXRPnklaGuHbc9GkZuFjiz1eMOO2rOVyLCFbotSiC20uFV+gMXVknp/zAupWC2p+ARwQf+Ci4h4\nB/Cv1O1P7u8jPmWMxyuq1T55FwCbLePxhRXbsJXUsIXu0qUQP/PY/3yl1fmtchnRnVGm13WxMqxn\nG0lL7FElaWpEvLNmI41WPTXZJ0/SMQARcQQwXVKXc8FtJTNsofsDcmDjOcCdkmY/zr9fWbW4Ve55\neUQcr+52v5g/xucXdNRel5rtk1csAC6NiLuBqR3W+LCVyLCF7kVk3+dUBrCnWUUtbpV7pgCzIuJO\ncubCqKSdHud7lsfvI2J7STf2PhER29PtSr6uNP2dKkV1To6IbYAjI+JU8nf7c5K62OrGVgIjo6Nd\nLcXvRinndhA5n/F75BXCHYM9q5VXRGyy9OfKctdax9+UrDN6FTmVbzPglcDfSrqzVjvjUUSsTVbI\n2p/s2jiNvBA6eFnzxm18GMYpKrPIOa1zyFH4kyKiyz7LYbeALHt3KlmVaYOaB5d0F/Bi4GqywPiP\ngL9y4P5Zbib3lHu7pFdLukDSudSfF2wrkaG60o2I88mgPQs4vVcNLCJulLT9QE9uJRURl5Jzgo8h\n95o7Q9KOgz0rg1yA09/XHhFTXH9h/Bu2Pt0vjzHYsHPzMxkekyRdGRFHS1LHpSpt+Xw0cleUNYA1\nye6ZpXfWtXFm2LoX7o2IGyLiNxFxc0RsByDJQTK2eRHxamC1iNiRDvd8s+W2O/AscoumF9BNEXtb\nyQxb6J4EvFPShmTpvaFbdz0AB5M/q6eSJStb1Na1P8/95YJhHUkiNym1cW7YuhcmSLoFQNJPImIY\n54K2tgdwSMc7tNoTMzMiDgDmlv3SnjLg87EGhi1055eqVteQuzrMG/D5DIM1gO9EhMg+8asGfD62\n2MFk98LXyDKltXfOtZXQsM1e2ISsPboludHih2rOOR3PImIH4EPAX0raYtDnsyqLiKcB7wceAj4j\nqbMSpbbyGYor3VK9H3L7nH1ZXBfWHkdETCa7GPYnf27HDvaMjKwk9y1gXeBEsoSorSKGInRZXIi7\nP2yrF+Qepx4iFyy8vVfNzAZuYlkCTCnKb6uQoQhdSb3Sd70dHaaQWyQvHPu7Vm0RsRZZSPxmslj6\nORHxW2Afr+sfuP67tGGbQWQraChCtyci3kSWc7wfWCciDnFlpjEtq87tO6lf59aW35MjYjMycCeX\nxyMAriMy/g3bq+yx5Lr+7ch9uY4f8PmszLbpD1wASacBLxzQ+dhij5L9uqeTtTGmA2eUj22cG6or\nXeB3kn4LIOneiPBt8tjGU53bcUWSl62vwoYtdB+MiCvIilbbk7dp/wLVt1MZD8ZTndtxJSKuYYzZ\nN5J2aXw61tiwhW7/RoEzB3YWw+GDwCURcRVL1bkd5EkZAAcM+gRscIZtccRfALuS26MDIOn8wZ3R\nyq1s4rk7Oa1uJnBx2eXYVgJlAO3N5KrBEWAjSe8d7FlZ14btSvfb5Eq0B8rHo4BDdwylmMpFgz4P\nG9NZwOXkkvZ7gcmDPR1rYdhC938lHTjokzCrZK6kj0fENEkHlb5eG+eGLXSviIh3k1e7AEj6wQDP\nx2xFjETEFGCtslx7vUGfkHVv2EL3ZcCTyH5dyO4Fh64Nq+PIjSnPJQuYnz3Y07EWhi1015L0ykGf\nhFklk3o1GICvR8QeAz0ba2LYZi98ltwp9ceUeY6SfjHQkzJbThGxO7AjsB9wZvn0BGAPSd4jbZwb\ntivdbchlrCPl40nASwZ3OmZPyAxgA7IIf68e9EIyhG2cG4raCxHxVQBJuwGXSdqtPPYmizZ0JN0t\naSq5GeW15CrBGyTdNNgzsxaGInSBp/U9fl3f4+HpGzH7U+8iuxdeAZwREe8b8PlYA8MSuv1GHv+f\nmA2FtwE7STqU7CbzHmmrgGEJ3dExHpsNsxFJ8wEkPQI8MuDzsQaGZSBtq4g4h7zK7X/8/MGeltkK\nuS4iziN3t96ZnJlj49xQTBmLiF3H+pqkq1uei9mKioivStqrPH4Dubv1zyRd/NjfaePBUISu2XgS\nEVdK+utBn4cNhkPXrLGIuJsxlvy6GP/4Nyx9umbjyRxAgz4JGwyHrll790g6Y9AnYYMxLFPGzMYT\nrzxbhblP18ysIV/pmpk15NA1M2vIoWtm1pBD18ysIYeumVlD/x9X2OljIz+wwQAAAABJRU5ErkJg\ngg==\n",
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.heatmap(salaries.isnull(), yticklabels=False, cbar=False)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"There are two columns which are practically filled with Nans, so I decided to drop them. \n",
"I also drop the Nan values in BasePay, since there is only few of them and Agency, since I do not need it.\n",
"Next I will try to analize the Benefits column and decide whether I can fill in the values or drop the column."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Dropping columns, which have too many nans (Notes, Status)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"salaries.drop(['Notes', 'Agency', 'Status'], axis=1, inplace=True)\n",
"salaries = salaries[np.isfinite(salaries['BasePay'])]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Plotting a distribution of benefits"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
""
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYQAAAEBCAYAAAB4wNK4AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAIABJREFUeJzt3Xl4k9eB7/GvNq/YBluADWFzICcm\nCRDIAklolmZvkibt3JlO2pvbtGlJl+l2e7tMp03bJ22mz0wmbbrO0HbSjS7TNG3SNGuzQxIaAwkE\nc4CACYsNtsGbbMm2pPvH+5oII9sSyJYt/z7PwwM675F0jgH99J5z3vN64vE4IiIi3mw3QERExgYF\ngoiIAAoEERFxKRBERARQIIiIiEuBICIiAPiz3YCTUVtbqzWzIiJpWrZsmSdZ+bgOBIBly5aN6vvV\n1dVRU1Mzqu852tTH3DER+qk+pqe2tnbQYxoyEhERQIEgIiIuBYKIiAAKBBERcSkQREQEUCCIiIhL\ngSAiIoACQUREXOP+wrTR9MOf3McW+wbBiuAx5ZXBMj7ywfdnpU0iIpmiQEhDY3Mb1RdeR1Vl1THl\n9pkHstQiEZHM0ZCRiIgACgQREXEpEEREBFAgiIiIS4EgIiKAAkFERFwKBBERARQIIiLiUiCIiAig\nQBAREdewW1cYY7zAD4DFQAS4zVq7M+H4h4BVQB9wp7X2z8aYILAGKAQOALdaa7vSrPtZ4B+BGPBN\na632hxARGUGpnCHcCBRYa1cAXwDu7j9gjKkEPgFcCFwF3GWMyQe+Aqyx1q4ENgKr0qw72a27ArgS\n+HYmOisiIoNLJRAuAh4FsNa+BJyTcOw8YK21NmKtbQN2AosSnwM8AlyeZt0QsAcodn/FTrSDIiKS\nmlR2Oy0F2hIeR40xfmttX5JjHUDZgPJkZcPVBdgLbAV8wF2DNa6uri6FLmRGc0szRb29NDQ2HFc+\nmu0YaeFwOKf6k8xE6CNMjH6qj5mTSiC0AyUJj71uGCQ7VgK0JpR3JylLpe41QBUwz637mDFmrbV2\n/cDG1dTUpNCFzAhWBPEHAsdtf91eERzVdoy0urq6nOpPMhOhjzAx+qk+pqe2tnbQY6kMGa0FrgUw\nxiwHNiccWw+sNMYUGGPKgBpgS+JzcD7cn0+z7hGcgIhYa8M4ITE5hbaKiMgJSiUQHgDCxph1wD3A\np40xnzHG3GCtbQTuxfkQfwr4kvsBfifwHmPMWpyJ4e+lWfd54G/AS8aYF4HtwBOZ67aIiAw07JCR\ntTYG3D6geFvC8dXA6gHPOQhcneS10ql7B3DHcO0TEZHM0IVpIiICKBBERMSlQBAREUCBICIiLgWC\niIgACgQREXEpEEREBFAgiIiIS4EgIiKAAkFERFwKBBERARQIIiLiUiCIiAigQBAREZcCQUREAAWC\niIi4FAgiIgIoEERExKVAEBERQIEgIiIuBYKIiAAKBBERcSkQREQEUCCIiIhLgSAiIoACQUREXAoE\nEREBFAgiIuJSIIiICKBAEBERlwJBREQABYKIiLgUCCIiAigQRETEpUAQERFAgSAiIi4FgoiIAAoE\nERFxKRBERARQIIiIiMs/XAVjjBf4AbAYiAC3WWt3Jhz/ELAK6APutNb+2RgTBNYAhcAB4FZrbVea\nda8B7nDfZgPwMWttPCO9FhGR46RyhnAjUGCtXQF8Abi7/4AxphL4BHAhcBVwlzEmH/gKsMZauxLY\nCKxKs24J8G/Addba5UA9EMxAf0VEZBCpBMJFwKMA1tqXgHMSjp0HrLXWRqy1bcBOYFHic4BHgMvT\nrHsBsBm42xjzPHDQWtt0wr0UEZFhDTtkBJQCbQmPo8YYv7W2L8mxDqBsQHmysuHqBoFLgSVAJ/C8\nMeZFa+32gY2rq6tLoQuZ0dzSTFFvLw2NDceVj2Y7Rlo4HM6p/iQzEfoIE6Of6mPmpBII7UBJwmOv\nGwbJjpUArQnl3UnKUqnbAvzNWtsIYIx5DiccjguEmpqaFLqQGcGKIP5AgKrKqmPK2yuCo9qOkVZX\nV5dT/UlmIvQRJkY/1cf01NbWDnoslSGjtcC1AMaY5ThDOf3WAyuNMQXGmDKgBtiS+BzgGuD5NOvW\nAmcaY4LGGD+wHNiaQltFROQEpRIIDwBhY8w64B7g08aYzxhjbnC/wd+L8yH+FPAla20YuBN4jzFm\nLbAC+F6adZuALwKPAS8Df7DWbslct0VEZKBhh4ystTHg9gHF2xKOrwZWD3jOQeDqJK+VTt3fAL8Z\nrn0iIpIZujBNREQABYKIiLgUCCIiAigQRETEpUAQERFAgSAiIi4FgoiIAAoEERFxKRBERARQIIiI\niEuBICIigAJBRERcCgQREQEUCCIi4lIgiIgIoEAQERGXAkFERAAFgoiIuBQIIiICKBBERMSlQBAR\nEUCBICIiLgWCiIgACgQREXEpEEREBFAgiIiIS4EgIiKAAkFERFwKBBERARQIIiLiUiCIiAigQBAR\nEZcCQUREAAWCiIi4FAgiIgIoEERExKVAEBERQIEgIiIuBUIamvryebWhO9vNEBEZEQqENGyLTOa5\n+hBvNHVmuykiIhnnH66CMcYL/ABYDESA26y1OxOOfwhYBfQBd1pr/2yMCQJrgELgAHCrtbYrnboJ\n7/0w8Cdr7Y8y1ekT1R4NAPDwaw18/LL5eD2eLLdIRCRzhg0E4EagwFq7whizHLgbeCeAMaYS+ARw\nDlAAvGCMeQL4CrDGWnufMeYLwCpjzK9TrQvc4773nUB5hvp60jpiAUrzvTS2h6mtP8K588ZM08a0\nH/7kPhqb244rrwyW8ZEPvn/U2yMiyaUSCBcBjwJYa18yxpyTcOw8YK21NgJEjDE7gUXuc77p1nnE\n/fMbadS9xxjzd0DMLcu61q4eInEf51QWsK8THt/ayFmnlFEQ8GW7aWNeY3Mb5pKbjiu3zzyQhdaI\nyGBSCYRSIPHrXdQY47fW9iU51gGUDShPVjZkXWPMmcDNwN/hnEEMqq6uLoUunDzbFAagJADnVAa4\nv6WLl+1eTgvm09zSPGrtGA3hcDij/Wluaaa0sSFpebZ+bpnu41g1EfqpPmZOKoHQDpQkPPa6YZDs\nWAnQmlDenaQslbq3ADOBp4C5QI8xpt5a++jAxtXU1KTQhZO3PbIfOEBFcR7zZ8/g/tfbIK+Yqsqp\ntFcER60do6Guri6j/QlWBKmqrDquPJs/t0z3cayaCP1UH9NTW1s76LFUAmEtcD3wO3cOYXPCsfXA\nN4wxBUA+UANscZ9zLXAfcA3wfDp1rbXf6n8DY8xXgcZkYTCadjeHgDilBT7y/T4KAz5au3qy2aQJ\nJdk8hOYgRDIrlUB4ALjCGLMO8AC3GmM+A+y01j5ojLkX5wPfC3zJWhs2xtwJ/MxdVdQM3GytDaVa\nN+O9zIA9LV0Ue/vwe52VRZOLArR29Wa5VRNHsnkIzUGIZNawgWCtjQG3DyjelnB8NbB6wHMOAlcn\nea2U6yYc/+pwbRwN9S0hSrxvBcDkojyOhHSGICK5Qxempai+OURpYiAUBjiiISMRySGpDBlNeG1d\nvRzp6qW6MPEMIUCkL0Z3TzSLLRs/DrWH+eu2QxxsD9PW3cvy6gpmx1N/fiwOOw524PF4qJ5arIsC\nRUaAAiEFew6HAI49QyjKA6C1W2cJw4nH4Q8b93OwPcy8YDEVk/J5dnsTM/1VfLa7l7LCwKDPbevu\n5TtP7uB/2uYSXlcPwNSSfN62IEhxGoEiIsNTIKTAWWEEJb5jh4wAWrt6Ne42jP19RbzZ2cWNS2Zy\nnnt198u7W3hw035u+v5a/uuWc5g/bdJxz6vdc4RP/mYjDW1hZvm7uWjpmUT6oryws5n7N+znnMKy\n0e6KSE7TZ1kK9rR0AQyYVHYDoVsrjYYSj8fZ2F3OlKIAy+ZMOVp+/rwKrpq0n7buXm76/lqe3HqQ\neNz5yn+oPczXH9rK3//niwD8z+0ruHjSQRbOKOXs2VP4+KXzWVhVSm13kNo9R7LSL5FcpDOEFNS3\nhKgqK8DveWuMojjfj9/robWrZ+xstjQGPfZ6I4ejBfzd6dPxeY8d958eCPPgqov48M9f4bafv8KU\nogALppWwaV8r0Vicdy+dyZfesZCywgB/Sniex+Ph3UtP4Z5HWvj4mg08/ImVlBfnjW7HRHKQAiEF\n9c0h5lYUQ8KXUa/HQ1mhrkUYzvee3kmpt4fFsyYnPT5zciG/v/0C7t+wjy3726hr7OBdZ8/kI5ec\nypyK4kFftzDPxyXFjTzemc+df97Kf/zDkpHqgsiEoUBIwZ6WLq48Y/oxgQD9F6dpUnkwh9rDbNnf\nzrLC9uPODhIV5vl43/I5gHtF8s42frrz6WPq1G569bgL0yr8Ed5/4Vx+/PwuPnrp/KTzECKSOgXC\nMCJ9UVpCPVSVFdIy4Njkwjx2HOpw7uQgx3lxl/MTq/Inv8tc7YaN3PGt7xxbtulVbv7UV4+r+9yL\n65O+xqq3VfPLl/Zw7193cO8/nn1yDRaZ4BQIwzgScoaEyovzjg+EogAd4T6iBaPfrvFg3c4WSgv8\nTPFFkh4PRXqP+9Y/2Ad/MrUbNnLv939EtbeCB1/tw7P7RSb7erXHkcgJUiAMoyXkfJgFJ+WxY8Cx\nyUUB4kAoph9jv8RN6B5um8MUX4SNr75KzaXH3w/hZPUHyqxIH9sft7xRsoibz5utPY5ETpCWnQ7j\nsLtfUXlx/nHH+i9OC8UGv7BqounfhG7aee+gMxZgcc0CQt3JzxAypSjfz4rqCl7f33b070tE0qdA\nGMZbgXD8ssb+i9N0hnC8XU3OxXzVU0dnond5dQUeD6zffXhU3k8kFykQhtHS6QRCRZJAKFMgDGpX\nUyfFeT6mlxx/ZjUSygoD1FSV8sqew0Tj2udI5EQoEIZxONSDz+tJut+O3+elJN9Pp4aMjhGPx9nV\nHKJ66iQ8o7gJ3fnzKujqiVLfM/j1CyIyOAXCMFpCPUwpCuAdZB19WVFAZwgDtIR6aOvupXrq6H4w\nnzq1mOCkfGxEexyJnAgFwjBaOiNDbotQWhCgK+YbxRaNfXsPO3s/zR3iSuOR4PF4OH9eOU3RQrbs\nbxv+CSJyDAXCMA6HeqhIssKoX2lhgK64zhASNbSF8Xs9BCeNzvxBoqWzp+Alxv0b9o36e4uMdwqE\nYRwO9VA+afAzhLICP71xH109faPYqrHtQFs3lWUFQ25XMVIK83zMCnTx0KsH6IvGRv39RcYzBcIw\nWkI9SVcY9StxJ5sPto/sWvvxIh6HhtYwVWXZu3x7Xl4HzZ09rH1j4LXlIjIUjXUMoTcao627d9g5\nBIDGNuduYBNdV9xPd2+UqrLsbfB0aMs68qpv5Gu/fJKVxYcAtJ2FSAoUCEM40jX4NQj9SgudH+HB\n9vCotGmsO9zn/KyyeYbQHYmwaHaQ1/YHmHfRCvL8Xm1nIZICDRkNYahtK/qVFfQPGSkQAA5H8/EA\nlaXZ3fFvyazJ9PTFqGtoz2o7RMYTBcIQDvdfpTzEpHJ+wIefGI0KBACORPMpL84jP5Ddpbhzg8WU\nFQbYtLc1q+0QGU8UCENoDg0/ZARQ5O3TGYLrcDSPqsnZv0GE1+PhrJll7GzqJNwbzXZzRMYFBcIQ\nDnc6K4eGu1+vEwhaZdQR7qUjlpfV+YNEC6tKicbi2IMd2W6KyLigQBjC4VAPHs9b21wPpsjbR2Ob\nzhC2NTofvGMlEGZXFDEp38/rBzSPIJIKBcIQnH2M8oa9wKrIE+VQR5hYLD5KLRubtrofvNlccprI\n6/GwsKqU7Y0d2gFVJAUKhCEcDvUMO1wEzhlCbzR+dJnqRLX1QDv5niilBWNnNfPCGaX0RGMc6B0b\nISUylikQhtCSRiAAE36l0daGdsp9kVHd8no41VOLKQh42dM7OjfqERnPFAhDOBzqITjEktN+/YEw\nkVca9UZj2IMdlPvG1uS63+vl9MpS9vUWa28jkWEoEIaQ+pCRs6xxIq802tUUoqcvNuYCAZzVRpG4\nT7fXFBmGAmEQ0ZgzJzDUVcr9Cj19eDxM6JVG/VcET/GPvXmU06aX4CPGo683ZrspImOaAmEQR7p6\niMeHvygNwOuBiuL8CT1ktLWhnTy/lzLv2AuEPL+XmYEuHn/94IRfCSYyFAXCIN7ax2j4QACoLJvg\ngXCgHTO9hCzcAiElswOdNLaHeXWftrIQGYwCYRAtnaltW9FvekkBjRN0DiEej7O1oZ2FVaXZbsqg\nTgl04fd6NGwkMgQFwiCOniGksMoIYHpZwYQ9QzjYHuFwqIeFM8ZuIOR7Y6w4tYLHtjQSj2vYSCQZ\nBcIgWkLOt/2h7qecqLK0gMOhHiJ9E28jta0Nzg3tx3IgAFx9ZiX1LV1sP9iZ7aaIjEkKhEE0d0Tw\netKZQ3D275mIK436t6w4vbIkyy0Z2hULp+PxwKNbNGwkkowCYRBNnc41CKneKH5OeREAe1q6RrJZ\nY9LWhnbmVBRR4t4saKyaVlLAstlTeEzzCCJJDbvpjDHGC/wAWAxEgNustTsTjn8IWAX0AXdaa/9s\njAkCa4BC4ABwq7W2K826nwbe477NX6y1X8tMl1PT3BkhOCm14SJwbsgCsKclBEwdoVaNTXUNHdRU\nju3hon5Xn1nJnQ/X0dBRRk22GyMyxqRyhnAjUGCtXQF8Abi7/4AxphL4BHAhcBVwlzEmH/gKsMZa\nuxLYCKxKs2418F7gAmAFcKUxZlEmOpyqdANhWkk+hQEfu5sn1hlCZ6SP+pYQNWN4hVGiq86oBGDd\nm6Est0Rk7EklEC4CHgWw1r4EnJNw7DxgrbU2Yq1tA3YCixKfAzwCXJ5m3b3A1dbaqLU2BgSAUR2c\ndwIhtfkDAI/Hw5yKIvcMYeLYvK+NeBwWzSrLdlNSMqu8iIVVpQoEkSRS2ae4FGhLeBw1xvittX1J\njnUAZQPKk5UNWdda2ws0G2M8wL8BG62125M1rq6uLoUupK+pPYy3J3TM6ze3NFPU20tDY8MxdZtb\nmqmrq6MiL8r2hiMj1qbREg6HU+7D45udC70Kuw5RV9dCc0szpQN+Pl3d3cf9zAYrH6m6/X9HAMum\n+/jlpghrazdTXjR2tuoeCen8XY5X6mPmpPK/oR1IXD7idcMg2bESoDWhvDtJWSp1McYUAD/FCYmP\nDta4mprMjwR39fQR7tvFgtlV1NScerQ8WBHEHwhQVVl1TP32iiA1NTWcWQ9/e6Ge08zpKU9Gj0V1\ndXUp/1wbXqllTkURy88+E3B+RgN/PkWFhceVDVY+UnWf3rOX3zz4OABt0TzizOa7D7/CxTM9fOSD\n7x+2n+NVOn+X45X6mJ7a2tpBj6USCGuB64HfGWOWA5sTjq0HvuF+eOcDNcAW9znXAvcB1wDPp1PX\nPTP4E/CUtfZbKfYzY5o7nIvS0hkyAphXUezcjKW1m1nuqqNct2lvK+fNK892M4YVivRiLrkJcK6s\nfu7RrbRMqqaxeVOWWyYydqQyh/AAEDbGrAPuAT5tjPmMMeYGa20jcC/OB/5TwJestWHgTuA9xpi1\nOJPC30unLs5E9sXANcaYZ9xfKzLY7yE1dToXpQVLUp9UBphT0b/SaGJMLDe2hWlsD7Nk1uRsNyUt\nHo+HU8vzeaOpk66YL9vNERkzhj1DcCd1bx9QvC3h+Gpg9YDnHASuTvJaqdZ9AMjandqb3UCYmsYq\nI4C5QeesoL4lxEULghlv11jTv1Hc4nEWCAALpxVQe6CbHZHxsTpKZDTowrQk+gMhnWWn4Gxwl+/3\nTpiVRpv2tuL3ejhjjG9ZkczkQh8Lpk1ie6RMd1ITcSkQkuifQ6hIcw7B6/Uwt6J4wlyL8OreVmqq\nSikIjM9hl/PnVdAV9/Nk3aFsN0VkTFAgJNHcGWFyUYCAL/0fz0S5FiEai/PavjYWj5PrD5IxlSUU\ne3r51ct7st0UkTFBgZBEulcpJ5obLGbP4a6cvzPXrqZOOiN9LJk1JdtNOWE+r4cF+e08v6OZXU3a\nAVVEgZBES2dP2ktO+82pKKKnL0Zjjt8bYeNeZ0J5yTg+QwA4Ld+59eePnn0j200RyToFQhIndYbg\nLj2tb87tYaMX32ihvDiP6uCkbDflpBR6o7z3/Nncv2F/zv+diQwnt6/bP0FNJzlkBFDf0sUF8zPZ\nqrEjGovz6Gt7qfJ18rV/u/eYY7WbXj16Adh4ULthIwtjPojN4QP3PsjK4kNUBsty+uplkcEoEAYI\n90bpCPed8JBRVWkBRXk+tjW2Z7hlY8dr+1rpjno59+wzMLMuPObYcy+uz1KrTkwo0suSy26gYUsD\nL+zwcsPKZTRueCTbzRLJCg0ZDdAS6t+24sTOELxeD8vmTOHlXYcz2awx5WnbhIc4C6aN7+GiRCsX\nTCXg8/Jk3cFsN0UkaxQIAzR3nNhFaYmWV1dgD3bQ4l7glmuesYcI+sIU5efOCeakfD8rTwuy5UA7\nDb2F2W6OSFYoEAZoPsF9jBKtOLUCgJd3595ZQlNHhNf2tXFKIPcuvnvbgqlMKQrwcleQXl29LBOQ\nAmGAt7atOLE5BICzZpZRlOfjpV0tmWrWmPHs9iYAZgZyb0VOwOflukUzaIvlc9/a+mw3R2TUKRAG\naO48uTkEcD5YzplbnpOB8LQ9xLSSfMp9PdluyoioqSrllECIbz+5nYM5fi2JyEAKhAGaOyOU5PtP\nen+e5dXlbD/YefSMIxe0dfXy9LZDvL1mGp7xe/+fYZ1b2ExvNM43/5Lbd+ESGSh3ZgUzpLmzJ+35\ng9oNG7njW985pqypLx+Yxfrdh7n2rOPv6jUerVn/Jl09Uf738rn8dke2WzNySn29rLq4mu8+tZN/\nPG82y6srst0kkVGhQBiguSNCRXF68weJd+PqNz8W5/E/vcaLb7TkRCD09MW4b91uLpxfwcJxuN11\nOmo3bGRx3EOxdza3//hZri/di9eDLliTnKchowH2Huli5pSTX3bo83qY7u/mhZ3NxOPjf6O7hzcf\n4GB7hNtWVme7KSMuFOnljEtv5MZzT6U1lk/LKRdhLrmJxua2bDdNZEQpEBJE+qIcaO0+uh/RyZqb\n18nu5hCPbx3fFzvF43FWP7eb+dMmcfGCqdluzqhZWFWKmV7CE3UHORzKzUl0kUQKhAR7D3cRi8O8\nYGYCoTqvg3nBYu55Yvu43g77kS2NbG1o57aL5uH15vBs8gAej4d3LpmB1+PhDxv3kQMneiJDUiAk\n6L/T2dwMBYLXA598+wK2NXbwyJbGjLzmaNvTEuLzv3+NxaeU8a6lp2S7OaNuclEeV59Zya6mEDt6\ncnvuRESBkKB/++O5FUUZe83rF89g/rRJfPvJ7UTH2VlCuDfKR3+1Aa/Xw/duXkqef2L+czl3bjnV\nwWJe6QpOiLvhycSlVUYJ6ltCTC4KMLnoxK9SHsjn9fCpyxfw8TUb+ddH6vjna2vwjINF/O3hXj77\nu1d5/UA7lxUf4Merj12TP962uT4ZXo+Hdy87he88vpWPrdnA/R+5gHz/+LyPtMhQFAgJ6ltCGZtQ\nTvSOs6pYv+Iwq5/fTU9fjDuuP2NMj8Xb5jCrHnqB/a3dnFvYxOVXXnVcnfG2zfXJmlKUx4VFh3h6\nv4+7/rKNr95wRrabJJJxCoQE9c1dnDs3c/cIPuaCtTgszK/gZy9CY3uYf3nHQmaVZ25o6mTF43HW\nvdHCf6+t5691B6kqK+C3H17OQ7/9RbabNmbMzgvxgSXz+Ona3SyaoHMqktsUCK5wb5QDbd3MDWbu\nP/nAC9ZMPM4fHnuGZ7d7eXrbs9x8/mzet3wO87NwX4FoLM7u5k4272/jhR0tPLejiaaOCPmeKKfG\nGziHHh767ZYJNTSUii9cczrbGtv5f79/jbLCAG+vmZ7tJolkjALBtfdwF/EMLjlNxuPxENn6NNct\nOYdN3RX8bF2U+9bVE/SFOWNyH595z1WcObMM3zDDSd09UepbQuxudn7tagqx90gXvdEYsVgcv89L\nUZ6PgoCPwoCPgoCXaAx6ojFau3o40NrN/tZuwr3OFs9lhQFWLgjStnMjl1/+dpqbplNV6VxdPdGG\nhoZSu2Ej37j7u8yNe7CemXz4Z+v5+5nt3PVP781200QyQoHg2n10hdHIBQI4Zw1LL7uBpUBHuJdN\ne1vZtLeVZ1vCPPv9tRQEvMwLTqI6WExpYYCiPB990Rit3b00d0aob+5if2v3Ma9Z5OnDF2mjpKgQ\nD3FieOiLe+jDS3ekF2+gAI8njo84AU+MeNcRqksLmVIUocIfoczbg3cv1Ne9SuCqK0a0/+NZ4hnf\nvEgf//X8Ln67H87fuJ8bz56Z5daJnDwFgqu+ZXQCIVFJQYCVC6aycsFUNj31IBdd+y5e3dvKrqZO\ntja00xHuo6unj4DPS1lhgPLiPM6bV87uutc4/awlBCflUzEpj3y/j9V3fZEPffGu494jWfnqu77I\n+5LU1dlA6orz/Xx4ZTU/fnwDn/rtJuzBDj5zxWkEfBNzaa7kBgWCa3dzF1OKApQVBbLy/ls3vUKh\nNwrAbPfX9t3bOO20050KcaDT+dVnX2XRNZdkpZ3yluJ8P1dM2k/0tMv44TNv8Nz2Jr717kWcObMs\n200TOSEKBFd9cyhjVyifiGQ7pj734he5PsmErr7Jjx2bNm5kmQcuKS7m5YY+rv/u8yzIa+ftM/r4\n/O23ZLt5ImlRILj2tIS0772krT/IDbCyJ8oTdY38bbeXnfUx2h7YzC0r5nB6pba8kPFBgUD/ktMw\nc0Zx/kByT2GejxsWz2Tlgqn86en1/L52H2tefpOlsydzw+IZXHlGJTMmn/zW6iIjRYEA2MYOAKqn\nKhDk5E0pyiPPPs5NS5bxRk8pO/ZF+OqbrXz1oa1My+vjunMXsLy6nPPnVWRtzkokGQUC8MTWg3g9\ncOH8YLabIjkiFOll8WXvZLH7+FBHmK0H2nlt+25+9fIefrp2Nx6Pc8+FFdUVLK+u4Nx55ZQVKiAk\nexQIwGOvN3L+vArK07x1pkiqppUUMM0UcODh73H52Utp6svnYF8hjYe6+Mn+I/z4hd14iFPui1Dp\n72ZOXidnTi/go7e9P9tNlwlkwgfCzkOd7DjUyfuWz8l2U2QCCEV6WXjpjceU/ee/fokrbvsiu5qc\nK8+3He7i9cgUXgj10PvkDm60j5/CAAAImElEQVQ8e4bmt2RUTPhAeOx158Y1V56hPWkkO7zxGNXB\nSVQHnT2twr1RtuxvY93mHXz7r9u558ntLJ09mZuWnsJ1Z1UxRWeyMkIUCK83snjWZKrKtPpDxoaC\ngI9z5paz/Q8Pc9GSc9nVM4ld+yJ8+c1W7vjjZi46bRpXLJzOFTXTqSwryHZzJYdM6EDY39rNa/va\n+PzVp2e7KSLHcfa9up6lONuTN7SFeXrd39jTUsyX/7iFL/9xC4tOKeOKmulcMD/IolPKtHWGnJQJ\nHQh/3LgfgKs0XCRjnMfjYcbkQjzbnuTis8+mtTSPvT3F7G0Ic/e+Nu5+YjuFAR/L5kzh/HnlnDuv\nHDO9JNvNlnFm2EAwxniBHwCLgQhwm7V2Z8LxDwGrgD7gTmvtn40xQWANUAgcAG611nadbN2M9Rp4\ndEsD//HEdi4+bSrVU0f/fgQiJyIU6eX0S53tTJa7ZZ2RPn7x3z+hePZCXt/dxQs7m4/WLyvwcnpV\nK/OnTWJesJgZkwupKitgxuRCpk7KH9N37pPRl8oZwo1AgbV2hTFmOXA38E4AY0wl8AngHKAAeMEY\n8wTwFWCNtfY+Y8wXgFXGmF+fbF1rbSQTnX562yH+6dcbWXxKGT9479JMvKRI1kzK9+M/Us8tt68C\noKunj31Hunn0oT8SKK9i15shNtbn0RM/9j7Qfq+H6aUFzJhcQGVZIRXFeZQX5zGlOI+K4jzKCgMU\n5/spzvNR1P97np88v4alclUqgXAR8CiAtfYlY8w5CcfOA9a6H9QRY8xOYJH7nG+6dR5x//xGBur+\n7YR7muCuR+owlSX8963nUZw/oUfNJAcV5fk5bXoJzzZt5brrLqeqsop4PE53b5S27l7+tOY+qqpP\noyvmJ9Tl583OAJt3xogGCugdEBrJBHweCgM+8vw+Aj4PAZ/36O95fu8xj/1eD16PB6/Xg8/jwesF\nr8eDr7/c48HnBZ/Xg8fjof+ExYPzB8/Rx2/xeI49qzly5DDlO18f9DlHH7t/OPrsYd7LM1xbBrze\nYPWHagsDy5O8VxzY33CE0je30dMXo6cvxtmzp4zIPTg88Xh8yArGmB8D91trH3EfvwlUW2v7jDHv\nA86y1n7ePfZz4OfAj9zybmNM9YCyE65rrX0ysW21tbVDN15ERI6zbNmypGOFqXw9bgcSZ6e81tq+\nQY6VAK0J5d1Jyk6mbkqdEhGR9KUyGLgWuBbAnUPYnHBsPbDSGFNgjCkDaoAtic8BrgGez1BdEREZ\nIakMGfWvMlqEM6R1K84H+E5r7YPuaqAP44TLN6219xtjpgM/w/lm3wzcbK0NnWzdTHdeRETeMmwg\nyPBLb8cqY0wA+CkwF8gH7gS2AvfhzFVtAT5mrY0ZY+4A3oGzzPdT1tr1xpj5qdYdzX4NxhgzDagF\nrsBp233kUD+NMV8EbgDycP49Pkvu9TGA8wVxLhAFPkQO/V0aY84HvmWtvSSdtmaibirt0/qx1Bxd\negt8AWfp7XjwPqDFWrsSZzjue8B/AP/ilnmAdxpjlgIXA+cD7wG+7z4/nbpZ5X6Q/CfOXBTkWD+N\nMZcAFwAX4rRrFjnWR9e1gN9aewHwdeAb5Eg/jTGfA36Ms5Q+3baeVN1U26hASM0xS29xro8YD/4H\n+HLC4z5gGc43S3CW+V6O07/HrbVxa+2bgN8YMzXNutn27zir0w64j3Otn1fhzN89ADwE/Jnc6yPA\ndrcdXqAU6CV3+vkG8K6ExyPVr2R1U6JASE0p0JbwOGqMGfMXMFhrO621HcaYEuD3wL8AHmtt/zhh\nB1DG8f3rL0+nbtYYY94PNFlrH0sozrV+BnG+iPwv4HbgVzgr/nKpjwCdOMNF24DVwL3kyN+lOw/a\nm1A0Uv1KVjclCoTUDLX0dkwzxswCngZ+Ya1dAySOJQ63zDedutn0AeAKY8wzwBKca1mmJRzPhX62\nAI9Za3ustRYIc+x/9FzoI8Cncfp5Gs6c3c9w5kz65Uo/YeT+LyarmxIFQmqGWno7ZrkruB4HPm+t\n/albvNEdj4a3lvmuBa4yxniNMbNxAq85zbpZY619m7X2YmvtJcAm4BbgkRzr5wvA1cYYjzFmBlAM\n/DXH+ghwhLe+9R4GAuTgv1nXSPUrWd2UjPlhjzHiAZxvoOt4a+ntePDPwBTgy8aY/rmETwL3GmPy\ngDrg99baqDHmeeBFnC8JH3Pr/l9gdYp1x5p02j7m++luBPk2nGt0+tuzmxzqo+se4Kduu/Jw/g2/\nQu71E0bu3+hxdVNtkJadiogIoCEjERFxKRBERARQIIiIiEuBICIigAJBRERcWnYqMoC7hvt3OBsB\nenDWwn/AWrstA6/9DZxtKH4JlFprv26MuQl42Vp7YOhni4wsBYJIck9Za98DYIy5EmevpOsy8Lr/\nAJxtre1IKPskznYUCgTJKgWCyPCmAPXGmLNw99bB2UriA8DZwOeBHmAe8Ftr7TfcLUP+C2dnyzDO\nvT1uBU4BHjbG3AX8H+AXuNttGGMuxzlzKAMKgc9Za58ZrU6KaA5BJLnLjDHPGGNexLmnxO9xNlv7\nmLtFxl+Az7l15wDvBlYklP07cK+19lL3z/9qrf060AhcibtNt7X2Yd7abmMWUAlcD9wMFI1wH0WO\noTMEkeQSh4wMzvYAxcAPnIcEcLZqBtjsbnbYZ4zpvx/DWcA/G2M+j3NG0TPcG1prXzfGfB/4tfv6\n92awPyLD0hmCyPAOur+/BtziniF8DnjYLU+2/8s2nE0FLwFWMfR+MjHA6w5JlVhr34EznPTdk2+6\nSOp0hiCS3GXudtpRnC2EP4Ozy+3PjTE+t84HgRmDPP+zwA+NMQU48wGfHOK91uFs2X0DcIkx5hac\nM4qvnGwnRNKhze1ERATQkJGIiLgUCCIiAigQRETEpUAQERFAgSAiIi4FgoiIAAoEERFxKRBERASA\n/w9bXBIMnZoHRgAAAABJRU5ErkJggg==\n",
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.distplot(salaries['Benefits'].dropna())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We decite, that with a distribution like this, it is a good idea to fill the Nan benefits values with the mean values."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Filling nans with mean values of benefits"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"salaries['Benefits']=salaries['Benefits'].fillna(value=salaries['Benefits'].mean())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Checking our data once again"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"I decide to take a look at the datato make sure I got rid of all the Nan values."
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
""
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAV0AAAE3CAYAAAAJ55KLAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAGc1JREFUeJzt3Xu0ZVV15/Fv8ZK3CkIEH2AjziBG\nbMQ0GoKixgckbRwYkAgiiESUiBoNhlfHCCEaJT6IDQRKq0BAEBUNRtBWkSAiEHzQsWcUkDYgKAJB\nBHne/mPtQx2KKrC768512PX9jHFHnTq3Lmty77m/s/faa8+1YG5uDklSjVV6FyBJKxNDV5IKGbqS\nVMjQlaRChq4kFTJ0JanQag/z+bnV1nhCSSHLc89d19K7hlmpYxZqmJU6ZqGGWaljFmqYlTpmoYah\njgXL+5xHupJUyNCVpEKGriQVMnQlqZChK0mFDF1JKmToSlIhQ1eSChm6klTI0JWkQoauJBUydCWp\nkKErSYUMXUkqZOhKUiFDV5IKGbqSVMjQlaRChq4kFTJ0JamQoStJhQxdSSpk6EpSIUNXkgoZupJU\nyNCVpEKGriQVMnQlqZChK0mFDF1JKmToSlIhQ1eSChm6klTI0JWkQoauJBUydCWpkKErSYUMXUkq\nZOhKUiFDV5IKGbqSVMjQlaRChq4kFTJ0JamQoStJhQxdSSpk6EpSIUNXkgoZupJUyNCVpEKGriQV\nMnQlqZChK0mFDF1JKmToSlIhQ1eSChm6klTI0JWkQoauJBUydCWpkKErSYUMXUkqZOhKUiFDV5IK\nGbqSVMjQlaRChq4kFTJ0JamQoStJhQxdSSpk6EpSIUNXkgoZupJUyNCVpEKGriQVMnQlqZChK0mF\nDF1JKmToSlIhQ1eSChm6klTI0JWkQoauJBUydCWpkKErSYUMXUkqZOhKUiFDV5IKGbqSVMjQlaRC\nhq4kFTJ0JamQoStJhQxdSSpk6EpSIUNXkgoZupJUyNCVpEKGriQVMnQlqZChK0mFDF1JKmToSlIh\nQ1eSChm6klTI0JWkQoauJBUydCWpkKErSYUMXUkqZOhKUiFDV5IKGbqSVMjQlaRChq4kFTJ0JamQ\noStJhQxdSSpk6EpSIUNXkgoZupJUyNCVpEKGriQVMnQlqZChK0mFDF1JKmToSlIhQ1eSChm6klTI\n0JWkQoauJBUydCWpkKErSYUMXUkqZOhKUiFDV5IKGbqSVMjQlaRChq4kFTJ0JamQoStJhQxdSSpk\n6EpSIUNXkgoZupJUyNCVpEKGriQVMnQlqZChK0mFDF1JKmToSlIhQ1eSChm6klTI0JWkQoauJBUy\ndCWpkKErSYUMXUkqZOhKUiFDV5IKGbqSVMjQlaRChq4kFTJ0JamQoStJhQxdSSpk6EpSIUNXkgoZ\nupJUyNCVpEKGriQVMnQlqZChK0mFDF1JKmToSlIhQ1eSChm6klTI0JWkQoauJBUydCWpkKErSYUM\nXUkqZOhKUiFDV5IKGbqSVMjQlaRChq4kFTJ0JamQoStJhQxdSSpk6EpSIUNXkgoZupJUyNCVpEKG\nriQVMnQlqZChK0mFDF1JKmToSlIhQ1eSChm6klTI0JWkQoauJBUydCWpkKErSYUMXUkqZOhKUiFD\nV5IKGbqSVMjQlaRChq4kFTJ0JamQoStJhQxdSSpk6EpSIUNXkgotmJub612DJK00PNKVpEKGriQV\nMnQlqZChK2mlFxGPrhrL0NVKJSJW712DZtI5VQOttiL/YxFxxPI+l5l/tSLH+nVExHrAwcAmtG/q\ndzPzh8U1rAq8Dngy8FXgisy8sbKGoY5LgVOAxZl5U/X4Qw0fAU7KzG/3GH9wWUR8BTgxM6/oWEd3\nEXEQcHKv18NUHb8BbADcDbwT+PvM/G5xGTcN348E7gPIzPPmY6AVfaR7w/DxXODxwJW0b+azVvA4\nv66FwFXA04DrgZM61HA8sBnwEmA9YHGHGgBeDNwFfD4iTo+IF3eo4RzgkIj4RkQcEBHrd6jhWcB5\nwH+LiK9FxH4RsW51ERGxY0S8LCJ2jogrI+KPq2sA7gXOGV4Pv9dh/ImTaQdGRwMXAB/sUMPPaa+N\n3YE9gFfP10ArNHQz8/jMPB5YJTPflJmfyMy30sKmhw0zcyFwd2Z+A1jQoYYtMvMI4I7M/DxQNnc0\nLTNvycyPAvvRftlOjYiLI2KXwhq+mJm7Aa8Afhf4SUR8PCI2K6zhPuCfaG/IPwf+FDg3IvavqmHw\nPuAHwFuA3wHeWDw+mXlsZj6XFnb7RMTVEXFYhzfD1YGvAY/NzFNYwWfgv47M3Gf6Azh0vsaar/+5\nDSNii8y8MiJ+E+hxRAPAMD4R8URa2FRbLSIeN9SwHsOpS7WIeBPwWuBW4ETalMfqwDcpms+KiK2G\ncf+A9ku2A+01eBawXVEN76OF/vnAezPzWxGxCnAZcEJFDYM7aGeF92Tm9RHxqMKxgftfj7sDewO3\nA39O+3mcDexUWMoawN8CF0TEjrTXZamIeDfwpqGWtYF/A7aej7HmK3QPoh1JPYF2Wr/nPI3zcN4C\nfAzYCvgU7Zta7TDgQtrp0zeBt3aoAeAJwB6ZefXUc3dHxJ8U1nAiLdj+MjPvmDwZER8rrOEHwLMz\n87bJE5l5X0S8srAGgF8AXwY+GhFvBv538fgAlwOnAa+dfl1ExDbFdewLvJT22vhDYK/i8QFeDjwR\n+DvgGOCj8zXQCr0NOCIuAib/welT+bnMfN4KG+gRKCI2Am7MzC73XUfEhrR55dVpP5tNM/PoDnVs\nslQNFxWPvyXwqqVqqHzjmdSxHvCkzPzXiHgG7bVxfXENq2bmvVN/3ygzf1ZZwzDuhzLzoKm/L8zM\nfYtr+KfMfHlEnJyZe0XEP2fmDvMx1oo+0p23yef/FxFxFO1d9P6gy8xNi8aefgOafp5Ob0Bn0k6Z\nnkk7tb29uoCIOIl2kXUd2inclcD2xWUsAj5Pm9q4Dii9iBYRj6dNty0G9oqIp9GmvT4H/HZlLcCh\nEXEg7Q1oHdrPY6uqwYcpr78ANhrONBYMH6UrjAb/HhH7Ar+MiKOZxynRFRq6mXnNivzvrQC7AJtn\n5p0dxp68Aa1BWzUwsUGHWgDIzDdGxELaxbSvdyhhK9o82fHAIbQpn2q3Z+bREbFlZu4bERcUj789\nbfotWDKHfB9wbnEd0H4/ngx8APjw8FEmMz8aESfQXgtHDU/f1+ls8E9o0wtn0q47PDJWL8ygy4E1\nO419J/Ao2nKYNYbHa9ECp4uIWJN2RDNH8RHe4BfDL9Q6w1rlNTrUsGA42lw3Itah+E0wMz+bmTsB\nr8nMnYaPF2Xm4ZV1DG7OzF8B62dm0s4+ykTEyzPzHuBa2sW8vWmrKEqnFgbrAPvTlqtdywMPlFao\n8qUZxa6gLUu6nnbaMpeZ/6lo7Fk6ogH4e9pFvPOAHwP/3KGGyyLiHcB1EXE6sGqHGt4NvJJ2o8jV\nFK+bjojDMvNI2tTCAy4wZ2b1Wt1rI+J1wB0R8R7gMcXjP3X4c/PicZdlIW0p4fNp6/lPGh6vcGMP\n3d2BpwC3VA+cmZ8FPhsRO2fmF6rHX0Y9Z00eR8SZmXlrhxoOGS4g3UG7WvytyvGH9aeXZuZkamXj\nyvEHk+/7Itr3oaf9adMLnwZeD1SH/h8BH6Gtzz2weOylbZiZCyNiz8z8RkTM25r+sYfuNcAve8zp\nzsoRTUT8EW0JzO3Anpl5SXXgDutzj6Qtkzo4M39Bu5hVWcOBwJ8B90TEgZnZ64xjz+GC4ruA36PD\nDTsRsTHwduA24JjMvJ22VKrar4YLzr8ZEc+c/kRm7lhdTNWa/rGH7pOAKyPiquHvlUvXPjf8eVzR\neMvzVtqKhcfS5qv+a4cajqPd9bQB7U6svTvU8Me0qZ71afPsvUL3S8C3aRdtkiWhOwdUTX0tBv6R\n9pp4L+2uvB52pn0fjgcOoM8b0G9l5vdoU4Ela/rHHrq7dxz7UGD3zDy/Yw0Ad2bmzcDNw4WjHu7N\nzC8CdLpIAvCrzLwLuDEielzAAyAzD6Ut1To8M9/TqYw1MvNYgIj4cqcaGC6i/SgidqbdQLUZrSnU\n/yws48yIOC4zP0hbzjjvxh66q9Pmje5fCE9bGlJho6Jx/m/06D2xtFlYMTML34ePRcQptNfJp2gd\n8C4uGnt6SdYs/Dz+O3Aj8ELgO7QzkaqeIM8G3h8R5wJ7V9ygMvbQXUy/hfBbRMRfL+sTmXlIhzoW\nLF1TYR0bRsRLhho2GB5PapiX9nnLsHVEnDrUMHk8qaFHh6/jaetjD6etmV5E3Y0ia0fEU2iBu9bw\neAFAZl71kF85P7bMzP0j4quZeXZE/HnVwJn5S+CAoefDhRFx8dTn5uV1MfbQ7bkQ/nbanF1vRyzn\ncaV/obXLg7Z2evJ4jraErcJuw5+r0n+eHWDNzPzKcME1I+JXhWPfy5KlcvdMPZ4Dyi9gAatHxAbA\n3DAFVtoUariAdjStCdO8LyEce+j2XAh/fWYuKhxvmaZriIgX0S7WXEy7Jbiqhn2malifNnd31XCU\nUVXD+cP452XmSx7u3xe4MyJeCqwaEdsDZaE7Xz0F/j8cQWsG9Ru0ZYR/VjVwRBxMa6t5YGaWdNsb\ne+j2XAh/WeFYD2uYVngi7ersXbR73vd4yC9a8TXsSuu6thpwRkTMDcvqKt0SEa/ggTsElL0BTdkf\neD/wOOAdtKv3JYYzvmXeattjqdZwxB+0jQ9uGHoeV9kO2C4zf1414ArtMqYHG/qkvoG2XOkK2jYx\n5X19I+LrmbnjMG+2U0R8MzNLm81ExIW0iyVfHP68NDOfXVzDV5d6ai4zX1hZQ28RscXyPpeZV1bW\nAvefgR3E1C37M3I2Mi9GeaQ7/GIt691kLjNfVFzOItpNGl+m7ZawkD7rVFcbei/MRdu3rUdD9/sy\n887hCHcuIsqmFyaGN5xHs2SK47aH+5r5EBGH0JqG386SW9RLOuBNgnW4gPaANpfAmytqWMoHaXuj\n/bjD2OVGGbo8eOuTbYAPAacu49/Ot8dn5qRj0dkR0Wvd7t/Rpjw2os3p9rgD6YKIOA14YkQcB1xS\nXcCMTHFAu7C36XA3WC+n0M46dqTtYrFWpzp+PFnHvTIYZegOHZMY7p9+F22bmldX3qgwtQD/6oh4\nTmZeMtzq2GP+kMw8c1gI/1TaEV7ZHNZUDYdExMtoqxm+n5n/WF0D7fbX7WlhcyRw6fBntR/Rv/fC\nHZn5nknT8A5tLiduiIhjaStb5gCy7W04SqMMXbh/h4DFwHeB53Q4jUzaC2gB8IKImLR6rFwadL9h\nHeLatLWZpw93RJUe+Q+ns0+jfU+eHhFPz8z3VdbADExxDNYAvhcR32NJ0FSvF14QbUeTdSNiLfr1\nev734c/NO41fapShOzQ3efvw8YXhuTUAhltB511mPmWqngW00/obi6/MTnsf8Bpai8ffAc6gfrrl\nbFpHq5uLx53WfYpj8N5O4047knar/Gm0Pdo+0aOIzDw8Il7AkuWMPXaOKDPK0GXJOr/JJnM9mooA\nMLyYFgL/ATw2It6QmV+qrGHQffdZ2tzdX3YY934zMsXBMP7BtA1Lz6GdkVVbc9KDAfjMMN9dbujl\nuwXtLGiOtpPEa3rUUmGUoTt9lDkDjgR2yMzrou2O/Glap6lqt9J/99nPR8TfAP86eSIzq5uIbw48\ng3bRaNuI2DYz/6qyhkFZ0+ylRcQutHntPSPi5OHpVYBdgbOW+4Xz5/lTyxk/FrU7VJcbZehORMTW\ntFs+H0M7dbqiw5HNvZl5HUBmXlt8u+e03YAtcsnus//QoYZXA99nyeaHPRaJn0a7iFa68+4ylDXN\nXoYraDci3ElbzgjtRpE9l/sV82u14cyr53LGMqMOXdpGe/vQAuYk2pFFdejeGhF/SmtqsiNwU/H4\nE08CXhERr6K+49rEnZlZdufVctyeme/uXANQ1zR7adk2kD0pIhbTTuu3An6QmVdU1bCUD9NWkWwM\nXAQc+9D//JFt7KFLZv5wuFL9s4j4RdW40bbWhja98A7gZcBPaG8CPfTsuDZxTUT8BW0+c3LFvqTh\nzdTP44aI2GOpGnos43sLrWn2s2itHXu8Gb2B9nq8GDgsIk7JzPL125l5+rCccUvgh5n5s+oaKo09\ndG8a5ofWiYhXU7tX2rJ2/X0ubS+qDxTWMdF763Fodz49bfiA2i5j0z+P/acez9FuSS4xhP8HaL1A\n3gV8hhY2z6CtU620F/C8zLx7WN1zIYU3zUTE2sB+tDXLF9Ma36weEe/MzOrvRZmxh+7raVdCb6Q1\ntnh91cDZttl+gIhYhdY+rkfoTjqurVfdcS0iVht2Ceh2gWTy84iI35+e14+I3Zb/VfNiIa0R0wa0\nqa5tgZ/R5plPfoivmw8LMvNuaEspI6JkOeWUj9PefLamNf95P+1W4A/TbpkfpVGHbmbeOpy2XEl7\nJ+12y+VwgWAH+t1q+W7gD2m/2FdR+wu+mLZH2eSGERj6DVC0hC8ifp+2PnmPiJjsk7cqbc+4Mypq\nGNwzWTIYEQdl5g+Gxz16QFwUEacDF9Bem1U7V0xskpm7AUTEdzPzhOFxWWvHHkYdurPQznDKmrQN\nAN9SOehSp7Ofop3OQuG60Kk7rXbLzPtvRhjWMFf5DrAhbV3s/6ItkbqHtpqh0vTNMdMrWcq2zYmI\nT2bm7pn5tqHN5VbA6Zl5dlUNg7unHt849XgWthCaN6MOXdr62Mn6v0UR0e3K+dCw+1Udhp4+nT2H\nB57OlqyRjYgdaKeQb4uIY4anVwEOpM1lVriZ9v3/Du2i5pbAT6l/E17WtkELgKcX1nD//n1D0FaH\n7cSm0TYqXbDU40061VNi7KE7C+0Me5uF09lbaOtCH8WSX6j7aK0Nq/wNcOb0zRgR8Xrgb6mda56e\nQz5uOY/n26zs33cm8JRlPP5UYQ3lxh66S7czPOah//kodT+dHdZ/XjEs3etx9xfANpl54FJ1nRQR\n+1UWUdnp7iHMxP59mXk4QES8FVicmb3WsJcae+h+nXaB4KnA1Zl548P8+zGahdPZiRdExFE9ds7g\ngfOH0+4prWI2zMT+fVPuAc6JiGuAkzr1Jikz9tA9izZ/eRIztmdZoVk4nZ3YCLguIq6mrVyYy8zn\nPczXrCg3RcR2mXnp5ImI2I5+dwj2NFO/C0PTnWMjYhvg4Ig4gfY7++HMvLVvdSve6PdIi4itgH1p\n6/7+B+2d9Kq+Va2cImKzpZ8bbkmtGHtz4HO0ddJX0uYPXwz8QWZeXVGDli0i1qO1mNybNvVxIu2A\ncP9lrXd/pBv10ozBdbR1qbfTrpR/KCJ6zSuu7O4BjgJOAF5Cu7hWIjN/BPw2cD6tgfi3gP9i4M6E\ny2l71r02M1+amWdm5mnUrxsuMeoj3Yg4gxa0pwAfn3T7iohLM3O7rsWthCLiHNqa4cNp+9gtqt6R\nWLMnIladnuePiI3G3H9h7HO6/7CcSfkdyisRtKbZX4mIwzIzO7a51Gw5dNjtZXVgHdr0z1YP/SWP\nXGOfXrghIi6JiJ9ExOURsS1AZvrL3sedEfFSYNWI2J5O+8Vp5uwCPJm2fdRv0afBfpmxh+6HgP0y\ncxNaC7uPdK5nZbc/7efwOFq7y969dTUbbh4OhNYfdvJeu3dB82ns0wurZOZ3ADLz2xGxMq7JnCW7\nAgdkZs+NKTV7ro2I1wF3DPulPaZzPfNq7KF799Bd6gLarg13dq5nZbc68KWISNp8+9c616PZsD9t\neuHTtPar1VvRlxr76oXNaD06t6JthvjOqnWhWr6IeA7wTuA/Z+aWvetRHxGxMfB24DbgmMzs1nq1\n0iiPdIcu+NC2x3kNS3q3qqOIWIs2xbA37WdyRN+K1NliWiP3xwLvpbU+Hb1Rhi5LmmVPh21p02wt\n0220mxJeO+l2ppXaGsMtwAybDawURhm6mTlpETfZsWEj4KeZed/yv0rzJSLWpTULv5zWTP3UiPgp\nsMcY763Xr2367HPsK6nuN8rQnYiIV9LaOd4MrB8RB4y9g9GMWlYv2/2o72Wr2bJ2RDyFFrhrDY8X\nAIy5P8rY312OoN1fvy1tf6yjOtezstpmOnABMvNE4Jmd6tFsuJc2r/txWl+OxcCi4e+jNeojXeDn\nmflTgMy8ISI8le3DXrZ6kMxcKW/HH3vo3hoR59I6S21HO535ayjflmRlZy9bPUhEXMByVhVl5o7F\n5ZQZe+hOb7h3bbcq9A7gcxHxNZbqZduzKHX3ut4F9DD2myMeDTyftv05AJl5Rr+KVl7DBqG70Jbs\nXQucPeyQrJXccAHtVbQ7FhcAm2bmm/tWNX/GfqR7Hu1OtFuGv88Bhm4HQ0OTs3rXoZl0CvBF2q36\nNwBr9S1nfo09dP8jM/fpXYSkh3RHZr4nIhZm5r7DXO9ojT10z42IN9KOdgHIzK93rEfSgy2IiI2A\ndYdbxTfoXdB8Gnvo/i7wKNq8LrTpBUNXmi1H0jamPI3WwPwTfcuZX2MP3XUz88W9i5D0kNac9GAA\nPhMRu3atZp6NffXCB2k7iv4Lw3rAzPy3rkVJAiAidgG2B/YETh6eXgXYNTNHu0fa2I90t6Hdarpg\n+PuawHP7lSNpyhXA42mbC0z6XN9HC+HRGmXvhYj4JEBm7gR8ITN3Gh67EaI0IzLzmsw8ibYZ5YW0\nOxQvyczL+lY2v0YZusDGU493nno83rkU6ZHrDbTphRcBiyLibZ3rmVdjDd1pCx7+n0jqaC/geZl5\nIG36b9R7pI01dOeW81jS7FmQmXcDZOZdwF2d65lXY72QtnVEnEo7yp1+/PS+ZUlahosi4nTart07\n0FYcjdYol4xFxPOX97nMPL+yFknLFhGfzMzdh8evoO3a/f3MPPuhv/KRbZShK2n2RcRXMvOFveuo\nZuhK6iIirmE5t/yOeZOBsc7pSpp9twPZu4hqhq6kXq7PzEW9i6g21iVjkmbfqO88Wx7ndCWpkEe6\nklTI0JWkQoauJBUydCWpkKErSYX+Dyc/AXubTNhbAAAAAElFTkSuQmCC\n",
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.heatmap(salaries.isnull(), yticklabels=False, cbar=False)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Looking at our clean data"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Now I take a look at the data, to get familiar with it."
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Id | \n",
" EmployeeName | \n",
" JobTitle | \n",
" BasePay | \n",
" OvertimePay | \n",
" OtherPay | \n",
" Benefits | \n",
" TotalPay | \n",
" TotalPayBenefits | \n",
" Year | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 1 | \n",
" NATHANIEL FORD | \n",
" GENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY | \n",
" 167411.18 | \n",
" 0.00 | \n",
" 400184.25 | \n",
" 25114.34451 | \n",
" 567595.43 | \n",
" 567595.43 | \n",
" 2011 | \n",
"
\n",
" \n",
" | 1 | \n",
" 2 | \n",
" GARY JIMENEZ | \n",
" CAPTAIN III (POLICE DEPARTMENT) | \n",
" 155966.02 | \n",
" 245131.88 | \n",
" 137811.38 | \n",
" 25114.34451 | \n",
" 538909.28 | \n",
" 538909.28 | \n",
" 2011 | \n",
"
\n",
" \n",
" | 2 | \n",
" 3 | \n",
" ALBERT PARDINI | \n",
" CAPTAIN III (POLICE DEPARTMENT) | \n",
" 212739.13 | \n",
" 106088.18 | \n",
" 16452.60 | \n",
" 25114.34451 | \n",
" 335279.91 | \n",
" 335279.91 | \n",
" 2011 | \n",
"
\n",
" \n",
" | 3 | \n",
" 4 | \n",
" CHRISTOPHER CHONG | \n",
" WIRE ROPE CABLE MAINTENANCE MECHANIC | \n",
" 77916.00 | \n",
" 56120.71 | \n",
" 198306.90 | \n",
" 25114.34451 | \n",
" 332343.61 | \n",
" 332343.61 | \n",
" 2011 | \n",
"
\n",
" \n",
" | 4 | \n",
" 5 | \n",
" PATRICK GARDNER | \n",
" DEPUTY CHIEF OF DEPARTMENT,(FIRE DEPARTMENT) | \n",
" 134401.60 | \n",
" 9737.00 | \n",
" 182234.59 | \n",
" 25114.34451 | \n",
" 326373.19 | \n",
" 326373.19 | \n",
" 2011 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Id EmployeeName JobTitle \\\n",
"0 1 NATHANIEL FORD GENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY \n",
"1 2 GARY JIMENEZ CAPTAIN III (POLICE DEPARTMENT) \n",
"2 3 ALBERT PARDINI CAPTAIN III (POLICE DEPARTMENT) \n",
"3 4 CHRISTOPHER CHONG WIRE ROPE CABLE MAINTENANCE MECHANIC \n",
"4 5 PATRICK GARDNER DEPUTY CHIEF OF DEPARTMENT,(FIRE DEPARTMENT) \n",
"\n",
" BasePay OvertimePay OtherPay Benefits TotalPay \\\n",
"0 167411.18 0.00 400184.25 25114.34451 567595.43 \n",
"1 155966.02 245131.88 137811.38 25114.34451 538909.28 \n",
"2 212739.13 106088.18 16452.60 25114.34451 335279.91 \n",
"3 77916.00 56120.71 198306.90 25114.34451 332343.61 \n",
"4 134401.60 9737.00 182234.59 25114.34451 326373.19 \n",
"\n",
" TotalPayBenefits Year \n",
"0 567595.43 2011 \n",
"1 538909.28 2011 \n",
"2 335279.91 2011 \n",
"3 332343.61 2011 \n",
"4 326373.19 2011 "
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"salaries.head()"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Int64Index: 148045 entries, 0 to 148653\n",
"Data columns (total 10 columns):\n",
"Id 148045 non-null int64\n",
"EmployeeName 148045 non-null object\n",
"JobTitle 148045 non-null object\n",
"BasePay 148045 non-null float64\n",
"OvertimePay 148045 non-null float64\n",
"OtherPay 148045 non-null float64\n",
"Benefits 148045 non-null float64\n",
"TotalPay 148045 non-null float64\n",
"TotalPayBenefits 148045 non-null float64\n",
"Year 148045 non-null int64\n",
"dtypes: float64(6), int64(2), object(2)\n",
"memory usage: 12.4+ MB\n"
]
}
],
"source": [
"salaries.info()"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" BasePay | \n",
" OvertimePay | \n",
" OtherPay | \n",
" Benefits | \n",
" TotalPay | \n",
" TotalPayBenefits | \n",
" Year | \n",
"
\n",
" \n",
" \n",
" \n",
" | count | \n",
" 148045.000000 | \n",
" 148045.000000 | \n",
" 148045.000000 | \n",
" 148045.000000 | \n",
" 148045.000000 | \n",
" 148045.000000 | \n",
" 148045.000000 | \n",
"
\n",
" \n",
" | mean | \n",
" 66325.448841 | \n",
" 5086.740800 | \n",
" 3645.661408 | \n",
" 25114.344510 | \n",
" 75057.851048 | \n",
" 94038.185067 | \n",
" 2012.520653 | \n",
"
\n",
" \n",
" | std | \n",
" 42764.635495 | \n",
" 11473.181557 | \n",
" 8019.498636 | \n",
" 13364.208347 | \n",
" 50409.761170 | \n",
" 62683.477905 | \n",
" 1.119390 | \n",
"
\n",
" \n",
" | min | \n",
" -166.010000 | \n",
" -0.010000 | \n",
" -7058.590000 | \n",
" -13.800000 | \n",
" -618.130000 | \n",
" -618.130000 | \n",
" 2011.000000 | \n",
"
\n",
" \n",
" | 25% | \n",
" 33588.200000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 22747.200000 | \n",
" 36958.160000 | \n",
" 45077.410000 | \n",
" 2012.000000 | \n",
"
\n",
" \n",
" | 50% | \n",
" 65007.450000 | \n",
" 0.000000 | \n",
" 817.170000 | \n",
" 25114.344510 | \n",
" 71644.710000 | \n",
" 92675.510000 | \n",
" 2013.000000 | \n",
"
\n",
" \n",
" | 75% | \n",
" 94691.050000 | \n",
" 4695.710000 | \n",
" 4242.900000 | \n",
" 33507.310000 | \n",
" 106028.000000 | \n",
" 133085.410000 | \n",
" 2014.000000 | \n",
"
\n",
" \n",
" | max | \n",
" 319275.010000 | \n",
" 245131.880000 | \n",
" 400184.250000 | \n",
" 96570.660000 | \n",
" 567595.430000 | \n",
" 567595.430000 | \n",
" 2014.000000 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" BasePay OvertimePay OtherPay Benefits \\\n",
"count 148045.000000 148045.000000 148045.000000 148045.000000 \n",
"mean 66325.448841 5086.740800 3645.661408 25114.344510 \n",
"std 42764.635495 11473.181557 8019.498636 13364.208347 \n",
"min -166.010000 -0.010000 -7058.590000 -13.800000 \n",
"25% 33588.200000 0.000000 0.000000 22747.200000 \n",
"50% 65007.450000 0.000000 817.170000 25114.344510 \n",
"75% 94691.050000 4695.710000 4242.900000 33507.310000 \n",
"max 319275.010000 245131.880000 400184.250000 96570.660000 \n",
"\n",
" TotalPay TotalPayBenefits Year \n",
"count 148045.000000 148045.000000 148045.000000 \n",
"mean 75057.851048 94038.185067 2012.520653 \n",
"std 50409.761170 62683.477905 1.119390 \n",
"min -618.130000 -618.130000 2011.000000 \n",
"25% 36958.160000 45077.410000 2012.000000 \n",
"50% 71644.710000 92675.510000 2013.000000 \n",
"75% 106028.000000 133085.410000 2014.000000 \n",
"max 567595.430000 567595.430000 2014.000000 "
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"salaries.drop('Id', axis=1).describe()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Putting people into different groups based on their job"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"I use the very convenient job title bin function from https://www.kaggle.com/mevanoff24/data-exploration-predicting-salaries"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def findJobTitle(row):\n",
" \n",
" police_title = ['police', 'sherif', 'probation', 'sergeant', 'officer', 'lieutenant']\n",
" fire_title = ['fire']\n",
" transit_title = ['mta', 'transit']\n",
" medical_title = ['anesth', 'medical', 'nurs', 'health', 'physician', 'orthopedic', 'pharm', 'care']\n",
" court_title = ['court', 'legal']\n",
" automotive_title = ['automotive', 'mechanic', 'truck']\n",
" engineer_title = ['engineer', 'engr', 'eng', 'program']\n",
" general_laborer_title = ['general laborer', 'painter', 'inspector', 'carpenter', \n",
" 'electrician', 'plumber', 'maintenance']\n",
" aide_title = ['aide', 'assistant', 'secretary', 'attendant']\n",
" \n",
" for police in police_title:\n",
" if police in row.lower():\n",
" return 'police' \n",
" for fire in fire_title:\n",
" if fire in row.lower():\n",
" return 'fire'\n",
" for aide in aide_title:\n",
" if aide in row.lower():\n",
" return 'assistant'\n",
" for transit in transit_title:\n",
" if transit in row.lower():\n",
" return 'transit'\n",
" for medical in medical_title:\n",
" if medical in row.lower():\n",
" return 'medical'\n",
" if 'airport' in row.lower():\n",
" return 'airport'\n",
" if 'worker' in row.lower():\n",
" return 'social worker'\n",
" if 'architect' in row.lower():\n",
" return 'architect'\n",
" for court in court_title:\n",
" if court in row.lower():\n",
" return 'court'\n",
" if 'major' in row.lower():\n",
" return 'mayor'\n",
" if 'librar' in row.lower():\n",
" return 'library'\n",
" if 'guard' in row.lower():\n",
" return 'guard'\n",
" if 'public' in row.lower():\n",
" return 'public works'\n",
" if 'attorney' in row.lower():\n",
" return 'attorney'\n",
" if 'custodian' in row.lower():\n",
" return 'custodian'\n",
" if 'account' in row.lower():\n",
" return 'account'\n",
" if 'garden' in row.lower():\n",
" return 'gardener'\n",
" if 'recreation' in row.lower():\n",
" return 'recreation leader'\n",
" for automotive in automotive_title:\n",
" if automotive in row.lower():\n",
" return 'automotive'\n",
" for engineer in engineer_title:\n",
" if engineer in row.lower():\n",
" return 'engineer'\n",
" for general_laborer in general_laborer_title:\n",
" if general_laborer in row.lower():\n",
" return 'general laborer'\n",
" if 'food serv' in row.lower():\n",
" return 'food service'\n",
" if 'clerk' in row.lower():\n",
" return 'clerk'\n",
" if 'porter' in row.lower():\n",
" return 'porter' \n",
" if 'analy' in row.lower():\n",
" return 'analyst'\n",
" if 'manager' in row.lower():\n",
" return 'manager'\n",
" else:\n",
" return 'other'\n",
" \n",
"salaries['GeneralJobTitle'] = salaries['JobTitle'].map(findJobTitle)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Exploring the data"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Changes in salaries over time between different groups of people"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" TotalPay | \n",
"
\n",
" \n",
" | Year | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" | 2011 | \n",
" 71744.103871 | \n",
"
\n",
" \n",
" | 2012 | \n",
" 74113.262265 | \n",
"
\n",
" \n",
" | 2013 | \n",
" 78808.285041 | \n",
"
\n",
" \n",
" | 2014 | \n",
" 75471.836912 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" TotalPay\n",
"Year \n",
"2011 71744.103871\n",
"2012 74113.262265\n",
"2013 78808.285041\n",
"2014 75471.836912"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pay_by_year=salaries[['Year', 'TotalPay']].groupby('Year').mean()\n",
"pay_by_year"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
""
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAY4AAAEBCAYAAABv4kJxAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAFCNJREFUeJzt3X+MXfV55/H3DMYxU9uQSdnQpCC2\ncfpoolZIcbKQiBRH2QQDraialVK1oSWkTZN65ZBSUZqaQhS2VVaE3bosSzcBkZ8lWyIalYCLqpbI\npdCEqalCmT54aIK3cRMRDw44EwL2zP5xz4TJdDzc75x77o+575eEuPfM95z7nMf2fO73nHPPHZmf\nn0eSpHaN9roASdJgMTgkSUUMDklSEYNDklTE4JAkFTE4JElF1vW6gG6YnJz0mmNJWoWtW7eOLF02\nFMEBsHXr1l6XIEkDZXJyctnlHqqSJBUxOCRJRQwOSVIRg0OSVMTgkCQVMTgkSUUMDklSkaH5HIck\n1bVnzx7uvvvuWtuYmZkBYHx8vNZ2LrzwQrZv315rG6tlcEhSFx06dAioHxy9ZHBIUpu2b99e+13+\nzp07Adi9e3cnSuoJg0Nawzy0oiYYHJJWtBYOraizDA5pDfPQiprg5biSpCIGhySpiMEhSSpicEiS\nihgckqQiBockqYjBIUkqYnBIkor4AUD1nbq3yfAWGVKzOh4cEXEicCtwJvAS4DrgUeA2YB54BNiR\nmXMRcQ1wEXAUuDwzvxwRW+qO7fQ+abB4iwypWU3MON4JHMrMSyLiZcA+4GFgV2beFxE3AxdHxBPA\necDZwOnA54HXAzfUGQvc2cA+qYvq3ibDW2RIzWoiOP4MuGPR86PAVuBL1fN7gLcBCdybmfPAgYhY\nFxGndmBs14OjX+5A6qEVSd3Q8eDIzCMAEbGJVoDsAq6vfukDPAOcDGwGDi1adWH5SM2xy5qamqqx\nVys7ePAgs7Oztbbx5JNPArBhw4ZadTS5n4Ni4c/CXnSG/eystdDPRk6OR8TptN7535SZn42I/77o\nx5uAw8DT1eOly+dqjl3WxMRE+Y60aWJigssuu6zWNjy80jljY2NAs3/mw8R+dtYg9XNycnLZ5R2/\nHDciXg7cC/xOZt5aLd4XEduqxxcAe4H7gfMjYjQizgBGM/PbHRgrSWpQEzOODwIvBa6OiKurZe8H\ndkfEemAKuCMzj0XEXuABWgG2oxp7BfCx1Y5tYH8kSYs0cY7j/bSCYqnzlhl7LXDtkmWP1R0rSWqO\nnxyXJBUxOCRJRQwOSVIRg0OSVMTgkCQVMTgkSUUMDklSEYNDklTE4JAkFTE4JElFDA5JUhGDQ5JU\nxOCQJBUxOCRJRQwOSVIRg0OSVMTgkCQVMTgkSUUMDklSEYNDklTE4JAkFTE4JElFDA5JUhGDQ5JU\nxOCQJBUxOCRJRQwOSVIRg0OSVMTgkCQVMTgkSUUMDklSEYNDklTE4JAkFTE4JElFDA5JUhGDQ5JU\nxOCQJBVZ19SGI+Js4COZuS0iXgv8BbC/+vH/zszPRcQ1wEXAUeDyzPxyRGwBbgPmgUeAHZk5VzK2\nqX2SJDU044iIK4GPAxuqRa8FbsjMbdV/n6vC5DzgbOAXgf9Vjb0B2JWZbwJGgItLxjaxP5KkFzQ1\n43gc+AXgU9XzrUBExMW0Zh2XA+cC92bmPHAgItZFxKnV2C9V690DvA3IgrF3NrRPkiQaCo7M/HxE\nnLlo0ZeBj2fmZET8HnANcBg4tGjMM8DJwEgVEIuXbS4Yu6ypqanV71AXzM7OAv1f5yCwl51lPztr\nLfSzsXMcS9yZmYcXHgN/DHwB2LRozCZaYTK3zLKnC8Yua2JiYrW1d8XY2BjQ/3UOAnvZWfazswap\nn5OTk8su79ZVVX8ZEf+pevwWYBK4Hzg/IkYj4gxgNDO/DeyLiG3V2AuAvYVjJUkN6taM433AjRHx\nHPBN4D2Z+XRE7AUeoBVgO6qxVwAfi4j1wBRwR2Yea3dsl/ZHkoZWY8GRmV8Hzqke/wPwxmXGXAtc\nu2TZY7SuoFr1WElSc/wAoCSpiMEhSSpicEiSihgckqQiBockqYjBIUkqYnBIkooYHJKkIgaHJKmI\nwSFJKmJwSJKKGBySpCIGhySpiMEhSSryosEREe+PiPFuFCNJ6n/tzDiOAV+MiNsj4q1NFyRJ6m8v\nGhyZeWNmvgH4Q+BdEfG1iNgVEZubL0+S1G9e9BsAI2IT8A7gV4FZ4MpqvS8Ab260OklS32nnq2P3\nAX8K/Epmfm1hYUSc1VhVkqS+1U5wRGYe+8GTiFMz88nMvKrBuiRJfaqd4Pi9iPivwInAjwCPAxON\nViVJ6lvtXFV1EXAG8Fngp4EDjVYkSepr7QTHU5n5LLA5MxMYa7gmSVIfayc4vhERlwLfi4gPA6c0\nW5IkqZ+1ExzvAb4E/DYwA/xSoxVJkvracU+OR8R/AH4LOALckJmzwP/oVmGSpP600ozjk8C/AiPA\nR7pTjiSp3610Oe76zLwRICL+qkv1SJL63Eozjvk2x0mShshKM46xiPiPtELjpOrxCEBm/ks3ipMk\n9Z+VguMYrfMcAEcXPZ4HfqbJoiS17N69m+np6Z7WsH//fgB27tzZ0zoAtmzZ0hd1DLvjBkdmntvN\nQiT9e9PT0+z7p329/fRUdaB63zf29bAI4HBvX14vWOly3L388HmOH8hMZxxSt5wCc9vmel1Fz43e\nV+9Uaz/M3qB/ZnB1Zm8rHaq6dFVblKQ+ND09zT8//DCn9biOk6r/H3744Z7V8M2a6690qOpxgOqk\n+H+hdXfcEeAVwI6arytJXXca8O7WNT5D7ZblDya1rZ3bqn8a2EPrhPi3eCEwpX+nHw4H9MuhAPBk\nrtamdoLje5n54Yi4NTMvq859SMuanp7msUf+gTM2HnvxwQ3ZPN96R/ns17/SsxoADhw5oaevLzWl\nneAYiYhTgY0RcRIw3s6GI+Js4COZuS0itgC30TrZ/giwIzPnIuIaWt/3cRS4PDO/3Imxbe470B/v\nkKF/3iV34h3yGRuPset1RzpU0eC67qGNvS5BakQ7wXEd8A5a3zt+APjMi60QEVcClwDfrRbdAOzK\nzPsi4mbg4oh4AjgPOBs4Hfg88Pq6Y4E729rzyvT0NPu++ihzY23lYWNGjrX+KCYfr3vaavVGZ2d6\n9tqSBkc7wbFh4Z5VwJ0R8fY21nkc+AXgU9XzrbRuzQ5wD/A2IIF7M3MeOBAR66qZTd2xRcEBMDc2\nzrOv+dnS1dacDY/e1esSJA2AlT7HcRFwDvDOiFgIgFHg7bTe8R9XZn4+Is5ctGik+qUP8AxwMrAZ\nOLRozMLyumOXNTU1tezy2dnZlXZl6MzOzh63V+2u743NXtCJfuoFdfppL39YnV6uNON4hNbVa98H\nnqiWzQHvXMXrLD7vsInWZ0Cfrh4vXV537LImJiaWXT42NlZtXtDqx/F61e76z3awnkHXiX7yVAcL\nGnB1+jk2NsZzHa5nkLXTy8nJyWWXH/fNYWY+kZm3AD8N3E/r2/++kpnLb2ll+yJiW/X4AmBvtc3z\nI2I0Is4ARjPz2x0YK0lqUDtHFX6d1rmKtwCfiIgPrOJ1rgA+FBEPAOuBO6oA2gs8QOvQ145OjF1F\nbZKkAu2cHL8EeGNmPh8R62m9+3/Rr5DNzK/TOkdCZj5G66qopWOuBa5dsqz2WElSc9qZcYxk5vMA\nmfkceJhQkoZZOzOOByLidlqHis4F/r7ZkiRJ/ey4M46I+BxAZn6A1of/NgG3Z+Zvdak2SVIfWmnG\ncerCg8z8AvCF5suRJPW7lYLjVRHxB8v9IDM/2FA9kqQ+t1JwzNK61YckST+wUnB8MzM/0bVKJEkD\nYaXLcVfzCXFJ0hq30i1HfrubhUiSBoM3MpUkFTE4JElFDA5JUhGDQ5JUxOCQJBUxOCRJRQwOSVKR\ndm6rLqlHZmZm4DCM3ud7PA7DzEkzva5COOOQJBVyxiH1sfHxcZ743hPMbZvrdSk9N3rfKOPj470u\nQxgckobEzMwM3wJuYb7XpfTcvwFzM6s/7GdwqKNmZmZ48pkTuO6hjb0upeeeeOYETq3xj1PqVwaH\npKEwPj7O6IEDvJuRXpfSc7cwzyk1DvsZHOqo8fFxxp5+nF2vO9LrUnruuoc2ssFj8lqDhj44ZmZm\nGJ09xIZH7+p1KT03OnuImZn1vS5DUp/zclxJUpGhn3GMj4/ztaee49nX/GyvS+m5DY/e5eWOkl6U\nMw5JUhGDQ5JUxOCQJBUxOCRJRQwOSVIRg0OSVMTgkCQVMTgkSUUMDklSEYNDklTE4JAkFenqvaoi\nYh/wnerp14A/Af4IOArcm5kfiohR4CbgLOD7wK9l5nREnNPu2G7ukyQNm64FR0RsAMjMbYuWPQy8\nHfgX4IsR8VrgTGBDZr6hCouPAhcDNxeMlSQ1pJszjrOAsYi4t3rda4GXZObjABHxl8BbgB8D9gBk\n5oMR8bqI2Nzu2C7ujyQNpW4GxyxwPfBx4NXAPcDhRT9/BvgJYDMvHM4COFYte7qdsRGxLjOPLn3x\nqamp5YuanS3djzVtdnb2uL1qd31PnL2gE/3UC+r0017+sDq97GZwPAZMZ+Y88FhEfAdY/OUPm2gF\nyVj1eMEordDY1M7Y5UIDYGJiYtmixsbG+OFMGm5jY2PH7VW76z/bwXoGXSf6yVMdLGjA1enn2NgY\nz3W4nkHWTi8nJyeXXd7NN4eX0ToHQUS8gtYv/e9GxKsiYgQ4H9gL3A9cWI07B/hqZj4NPNfO2C7u\njyQNpW7OOG4BbouIvwXmaQXJHPAZ4ARaV0r9fUR8BXhrRPwdMAK8q1r/vQVjJUkN6VpwZOZzwC8t\n86NzloyboxUSS9d/sN2xkqTmeB5TklTE4JAkFTE4JElFunrLEUmrcBhG7+vhe7yF66s39K4EoHUB\n/it7XIMAg0Pqa1u2bOl1Cezfvx+AV7/y1b0t5JX90Q8ZHGrAgSMncN1DG3v2+t95bgSAk9fP96wG\naPXhJ2tuY+fOnR2ppRM17N69u8eVqF8YHMDo7AwbHr2rpzWMPP89AOZPPKlnNYzOzgCn1dpGP7wj\n/H/VO+SXn9nbd8g/SX/0Q+q0oQ+OfvmH/YPDAa+q94u7ntNq98N3yNLaN/TB0Q+/6MBfdpIGx9AH\nh6Th8U3gFnp77utI9f/enQVs9eGUGusbHJKGQr8cln6yOiz946/u3Tm4U6jXD4ND0lDwsHTn+Mlx\nSVIRg0OSVMTgkCQVMTgkSUUMDklSEYNDklTE4JAkFTE4JElFDA5JUhGDQ5JUxOCQJBUxOCRJRQwO\nSVIRg0OSVMTgkCQVMTgkSUUMDklSEYNDklTE4JAkFTE4JElFDA5JUhGDQ5JUxOCQJBUxOCRJRQwO\nSVKRdb0uoK6IGAVuAs4Cvg/8WmZO97YqSVq71sKM4+eBDZn5BuAq4KM9rkeS1rSBn3EA5wJ7ADLz\nwYh4XbcL2LNnD3fffXetbezfvx+AnTt3rnobF154Idu3b69VRz+o289O9BLWRj/75e8m2M8Fa6Gf\nayE4NgPfWfT8WESsy8yjiwdNTU01VsDBgweZnZ2ttY2NGzcC1NrOwYMHG93Pbqnbz070cqGOQe9n\nv/zdXKjFfq6Nfo7Mz8/35IU7JSJuAB7MzP9bPf/XzPzxxWMmJyfnt27d2pP6JGlQTU5OsnXr1pGl\ny9fCOY77gQsBIuIc4Ku9LUeS1ra1cKjqTuCtEfF3wAjwrh7XI0lr2sAHR2bOAe/tdR2SNCzWwqEq\nSVIXGRySpCIGhySpiMEhSSpicEiSigz8VVXtmpyc7HUJkrQmDPwnxyVJ3eWhKklSEYNDklRkaM5x\n9EpEnAjcCpwJvAS4DngUuA2YBx4BdlSfgCcitgB/npk/tWQ7lwOnZeZVXSu+z9TtZUScUa2/jtbt\nad6TmdndvegfHejnacBngPXAvwGXZma9W74OsA7+W/8Z4DOZeXrXii/kjKN57wQOZeabgAuAG4Eb\ngF3VshHgYoCIuAS4HfjRhZUj4qSI+DSwo9uF96FavQQ+DNyYmduAPwD+sHul96W6/bwK+EQ19lHg\nN7pYez+q208i4nTgCuDELtZdzOBo3p8BVy96fhTYCnypen4P8J+rx08B5y1ZfwPwSeC/NVjjoKjb\nyyuAL1aP1wHPNlPmwKjbzw8An66+vvl04FvNlToQavUzIjYANwO/2WyZ9RkcDcvMI5n5TERsAu4A\ndgEjmblwOdszwMnV2Lsy87tL1n8qM+/tatF9qgO9/HZmPh8RAVwPfKiL5fedDvRzHjiB1iGYN9P6\nioOhVbeftGYo12fmN7pW9CoZHF1QTT//BvhUZn4WmFv0403A4Z4UNoDq9jIi3gz8OXDJMJ/fWFC3\nn5n5fGa+BngPrZnxUFttPyPiFcCbgGsi4j5gPCJub7jcVTM4GhYRLwfuBX4nM2+tFu+LiG3V4wuA\nvb2obdDU7WUVGn8EbM/Mh5qsdRB0oJ83VT2F1rvpueONHQZ1+pmZBzMzMnNbdQ5uJjN/semaV8ur\nqpr3QeClwNURsXD88/3A7ohYD0zRmtbqxdXt5f+kdQXQJ1pHq8jMHOYTunX7uRu4OSJ+n1Zo9P2x\n+YYNzb91PzkuSSrioSpJUhGDQ5JUxOCQJBUxOCRJRQwOSVIRg0NqSETcERFXLXq+MSIyIs7qZV1S\nXQaH1Jz3Au+LiNdUz68H/k9m/mMPa5Jq83McUoMi4ueA363+2wW8DfgpWh+eGwEOAZcBR4A/oXWz\nwJcB92Tm1RFxW/X8ZcBFmflUt/dBWsoZh9SgzPwL4J9pfSfDpdUN7z5G63sZtgF3A1fSCowHM/N8\n4FzgfYs289eZ+UZDQ/3CW45IzfskMLborqcTwE3VbU9OBB4DZoDXV/d+eprWFwEtGPqbMaq/OOOQ\nui+BX6lmHFfS+o6QS4HDmfnLwEeBsYgYqcYP9c0D1X+ccUjd9z7gkxFxQvX83bRugHd7RLwJ+C6w\nH3hFj+qTVuTJcUlSEQ9VSZKKGBySpCIGhySpiMEhSSpicEiSihgckqQiBockqYjBIUkq8v8BXgyX\n5FTFelEAAAAASUVORK5CYII=\n",
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.boxplot(x='Year', y='TotalPay', data=salaries, showfliers=False)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"I can notice the upward trend in the TotalPay in years 2011-2013, but in 2014 the TotalPay mean slightly decreased."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Checking how the pays changed over the years in groups of people"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" | \n",
" TotalPay | \n",
"
\n",
" \n",
" | GeneralJobTitle | \n",
" Year | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" | account | \n",
" 2011 | \n",
" 56177.342558 | \n",
"
\n",
" \n",
" | 2012 | \n",
" 63186.929962 | \n",
"
\n",
" \n",
" | 2013 | \n",
" 66857.307417 | \n",
"
\n",
" \n",
" | 2014 | \n",
" 63641.837870 | \n",
"
\n",
" \n",
" | airport | \n",
" 2011 | \n",
" 92273.242673 | \n",
"
\n",
" \n",
" | 2012 | \n",
" 94104.593235 | \n",
"
\n",
" \n",
" | 2013 | \n",
" 98160.944660 | \n",
"
\n",
" \n",
" | 2014 | \n",
" 94067.513077 | \n",
"
\n",
" \n",
" | analyst | \n",
" 2011 | \n",
" 79492.329936 | \n",
"
\n",
" \n",
" | 2012 | \n",
" 81830.453873 | \n",
"
\n",
" \n",
" | 2013 | \n",
" 87654.166634 | \n",
"
\n",
" \n",
" | 2014 | \n",
" 83168.412823 | \n",
"
\n",
" \n",
" | architect | \n",
" 2011 | \n",
" 96071.390972 | \n",
"
\n",
" \n",
" | 2012 | \n",
" 113270.148969 | \n",
"
\n",
" \n",
" | 2013 | \n",
" 125333.457143 | \n",
"
\n",
" \n",
" | 2014 | \n",
" 124302.629417 | \n",
"
\n",
" \n",
" | assistant | \n",
" 2011 | \n",
" 43198.079421 | \n",
"
\n",
" \n",
" | 2012 | \n",
" 40129.112925 | \n",
"
\n",
" \n",
" | 2013 | \n",
" 40410.167382 | \n",
"
\n",
" \n",
" | 2014 | \n",
" 39600.355957 | \n",
"
\n",
" \n",
" | attorney | \n",
" 2011 | \n",
" 126626.434486 | \n",
"
\n",
" \n",
" | 2012 | \n",
" 133973.170860 | \n",
"
\n",
" \n",
" | 2013 | \n",
" 144234.833244 | \n",
"
\n",
" \n",
" | 2014 | \n",
" 138208.046967 | \n",
"
\n",
" \n",
" | automotive | \n",
" 2011 | \n",
" 78395.460507 | \n",
"
\n",
" \n",
" | 2012 | \n",
" 81830.392473 | \n",
"
\n",
" \n",
" | 2013 | \n",
" 84965.658302 | \n",
"
\n",
" \n",
" | 2014 | \n",
" 83083.261233 | \n",
"
\n",
" \n",
" | clerk | \n",
" 2011 | \n",
" 40999.051714 | \n",
"
\n",
" \n",
" | 2012 | \n",
" 42632.039917 | \n",
"
\n",
" \n",
" | ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" | medical | \n",
" 2013 | \n",
" 79212.934761 | \n",
"
\n",
" \n",
" | 2014 | \n",
" 77210.463099 | \n",
"
\n",
" \n",
" | other | \n",
" 2011 | \n",
" 66222.015069 | \n",
"
\n",
" \n",
" | 2012 | \n",
" 75053.523929 | \n",
"
\n",
" \n",
" | 2013 | \n",
" 80126.012794 | \n",
"
\n",
" \n",
" | 2014 | \n",
" 78234.947040 | \n",
"
\n",
" \n",
" | police | \n",
" 2011 | \n",
" 109707.702262 | \n",
"
\n",
" \n",
" | 2012 | \n",
" 110355.916599 | \n",
"
\n",
" \n",
" | 2013 | \n",
" 119961.317699 | \n",
"
\n",
" \n",
" | 2014 | \n",
" 113887.308085 | \n",
"
\n",
" \n",
" | porter | \n",
" 2011 | \n",
" 40985.904961 | \n",
"
\n",
" \n",
" | 2012 | \n",
" 42379.496088 | \n",
"
\n",
" \n",
" | 2013 | \n",
" 46010.055586 | \n",
"
\n",
" \n",
" | 2014 | \n",
" 45553.476173 | \n",
"
\n",
" \n",
" | public works | \n",
" 2011 | \n",
" 36133.666306 | \n",
"
\n",
" \n",
" | 2012 | \n",
" 37266.939199 | \n",
"
\n",
" \n",
" | 2013 | \n",
" 35909.203407 | \n",
"
\n",
" \n",
" | 2014 | \n",
" 34340.963239 | \n",
"
\n",
" \n",
" | recreation leader | \n",
" 2011 | \n",
" 10438.966533 | \n",
"
\n",
" \n",
" | 2012 | \n",
" 11370.921782 | \n",
"
\n",
" \n",
" | 2013 | \n",
" 12207.075927 | \n",
"
\n",
" \n",
" | 2014 | \n",
" 12449.075208 | \n",
"
\n",
" \n",
" | social worker | \n",
" 2011 | \n",
" 60714.044061 | \n",
"
\n",
" \n",
" | 2012 | \n",
" 61365.984920 | \n",
"
\n",
" \n",
" | 2013 | \n",
" 63954.010630 | \n",
"
\n",
" \n",
" | 2014 | \n",
" 62010.384171 | \n",
"
\n",
" \n",
" | transit | \n",
" 2011 | \n",
" 70427.436251 | \n",
"
\n",
" \n",
" | 2012 | \n",
" 74160.880798 | \n",
"
\n",
" \n",
" | 2013 | \n",
" 77607.464635 | \n",
"
\n",
" \n",
" | 2014 | \n",
" 71134.737660 | \n",
"
\n",
" \n",
"
\n",
"
104 rows × 1 columns
\n",
"
"
],
"text/plain": [
" TotalPay\n",
"GeneralJobTitle Year \n",
"account 2011 56177.342558\n",
" 2012 63186.929962\n",
" 2013 66857.307417\n",
" 2014 63641.837870\n",
"airport 2011 92273.242673\n",
" 2012 94104.593235\n",
" 2013 98160.944660\n",
" 2014 94067.513077\n",
"analyst 2011 79492.329936\n",
" 2012 81830.453873\n",
" 2013 87654.166634\n",
" 2014 83168.412823\n",
"architect 2011 96071.390972\n",
" 2012 113270.148969\n",
" 2013 125333.457143\n",
" 2014 124302.629417\n",
"assistant 2011 43198.079421\n",
" 2012 40129.112925\n",
" 2013 40410.167382\n",
" 2014 39600.355957\n",
"attorney 2011 126626.434486\n",
" 2012 133973.170860\n",
" 2013 144234.833244\n",
" 2014 138208.046967\n",
"automotive 2011 78395.460507\n",
" 2012 81830.392473\n",
" 2013 84965.658302\n",
" 2014 83083.261233\n",
"clerk 2011 40999.051714\n",
" 2012 42632.039917\n",
"... ...\n",
"medical 2013 79212.934761\n",
" 2014 77210.463099\n",
"other 2011 66222.015069\n",
" 2012 75053.523929\n",
" 2013 80126.012794\n",
" 2014 78234.947040\n",
"police 2011 109707.702262\n",
" 2012 110355.916599\n",
" 2013 119961.317699\n",
" 2014 113887.308085\n",
"porter 2011 40985.904961\n",
" 2012 42379.496088\n",
" 2013 46010.055586\n",
" 2014 45553.476173\n",
"public works 2011 36133.666306\n",
" 2012 37266.939199\n",
" 2013 35909.203407\n",
" 2014 34340.963239\n",
"recreation leader 2011 10438.966533\n",
" 2012 11370.921782\n",
" 2013 12207.075927\n",
" 2014 12449.075208\n",
"social worker 2011 60714.044061\n",
" 2012 61365.984920\n",
" 2013 63954.010630\n",
" 2014 62010.384171\n",
"transit 2011 70427.436251\n",
" 2012 74160.880798\n",
" 2013 77607.464635\n",
" 2014 71134.737660\n",
"\n",
"[104 rows x 1 columns]"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"salaries_jobs_years=salaries[['GeneralJobTitle', 'TotalPay', 'Year']].groupby(['GeneralJobTitle', 'Year']).mean()\n",
"salaries_jobs_years"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.plotly.v1+json": {
"data": [
{
"name": "2011",
"type": "bar",
"x": [
"account",
"airport",
"analyst",
"architect",
"assistant",
"attorney",
"automotive",
"clerk",
"court",
"custodian",
"engineer",
"fire",
"food service",
"gardener",
"general laborer",
"guard",
"library",
"manager",
"medical",
"other",
"police",
"porter",
"public works",
"recreation leader",
"social worker",
"transit"
],
"y": [
56177.342558139564,
92273.2426732673,
79492.32993576021,
96071.3909722222,
43198.07942143331,
126626.43448616585,
78395.46050724639,
40999.051714459914,
68546.73194782622,
40521.552776381934,
81251.4860434056,
142172.54780640136,
42686.877323943656,
53697.759342105266,
79013.0795906433,
19500.85446393766,
34683.7253180212,
115523.68841196792,
69088.4893665589,
66222.01506878574,
109707.70226150741,
40985.90496062993,
36133.66630560935,
10438.966532663315,
60714.0440606705,
70427.43625082301
]
},
{
"name": "2012",
"type": "bar",
"x": [
"account",
"airport",
"analyst",
"architect",
"assistant",
"attorney",
"automotive",
"clerk",
"court",
"custodian",
"engineer",
"fire",
"food service",
"gardener",
"general laborer",
"guard",
"library",
"manager",
"medical",
"other",
"police",
"porter",
"public works",
"recreation leader",
"social worker",
"transit"
],
"y": [
63186.92996240612,
94104.59323529412,
81830.45387337073,
113270.14896907205,
40129.112925414505,
133973.17086021506,
81830.39247334749,
42632.03991691398,
64781.44596685088,
43639.09918787882,
84647.20845991567,
148795.11949920497,
45434.13726027398,
51507.85037617552,
86891.14136308804,
19211.737847358123,
39617.340386740296,
120077.08837209288,
74049.6819703387,
75053.52392894666,
110355.91659884396,
42379.49608808286,
37266.93919866444,
11370.92178176795,
61365.98491978606,
74160.88079762689
]
},
{
"name": "2013",
"type": "bar",
"x": [
"account",
"airport",
"analyst",
"architect",
"assistant",
"attorney",
"automotive",
"clerk",
"court",
"custodian",
"engineer",
"fire",
"food service",
"gardener",
"general laborer",
"guard",
"library",
"manager",
"medical",
"other",
"police",
"porter",
"public works",
"recreation leader",
"social worker",
"transit"
],
"y": [
66857.30741697413,
98160.94466019416,
87654.16663392863,
125333.45714285708,
40410.16738194091,
144234.8332439679,
84965.65830227743,
48565.42210256415,
71246.31403571427,
49467.61480769224,
90204.47242597395,
158751.63883199994,
49410.07479452056,
56403.07606811142,
92613.47836024873,
19129.360374999982,
43176.11009469695,
127630.41192307706,
79212.93476139763,
80126.01279387012,
119961.3176992791,
46010.05558583111,
35909.20340694003,
12207.07592692829,
63954.010629515025,
77607.46463486848
]
},
{
"name": "2014",
"type": "bar",
"x": [
"account",
"airport",
"analyst",
"architect",
"assistant",
"attorney",
"automotive",
"clerk",
"court",
"custodian",
"engineer",
"fire",
"food service",
"gardener",
"general laborer",
"guard",
"library",
"manager",
"medical",
"other",
"police",
"porter",
"public works",
"recreation leader",
"social worker",
"transit"
],
"y": [
63641.83786971834,
94067.51307692307,
83168.41282264778,
124302.62941747568,
39600.35595712471,
138208.04696658088,
83083.2612326044,
45449.74066744737,
62198.70878378374,
48750.94450248754,
87231.11888122212,
144454.13814253244,
47433.87616438354,
54012.492067901265,
88374.48909538469,
18247.304958263805,
46201.31738241313,
123976.42938106811,
77210.46309862063,
78234.9470399353,
113887.30808534652,
45553.476172506766,
34340.96323903821,
12449.075208070622,
62010.38417070799,
71134.7376604809
]
}
],
"layout": {
"barmode": "group"
}
},
"text/html": [
""
],
"text/vnd.plotly.v1+html": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"salaries_jobs_by_year=salaries[['GeneralJobTitle', 'TotalPay', 'Year']].groupby(['Year', 'GeneralJobTitle']).mean()\n",
"job_names=salaries_jobs_by_year.xs(2011, level='Year').index.values.tolist()\n",
"\n",
"trace1 = go.Bar(\n",
" x=job_names,\n",
" y=salaries_jobs_by_year.xs(2011, level='Year')['TotalPay'],\n",
" name='2011'\n",
")\n",
"trace2 = go.Bar(\n",
" x=job_names,\n",
" y=salaries_jobs_by_year.xs(2012, level='Year')['TotalPay'],\n",
" name='2012'\n",
")\n",
"trace3 = go.Bar(\n",
" x=job_names,\n",
" y=salaries_jobs_by_year.xs(2013, level='Year')['TotalPay'],\n",
" name='2013'\n",
")\n",
"trace4 = go.Bar(\n",
" x=job_names,\n",
" y=salaries_jobs_by_year.xs(2014, level='Year')['TotalPay'],\n",
" name='2014'\n",
")\n",
"\n",
"data = [trace1, trace2, trace3, trace4]\n",
"layout = go.Layout(\n",
" barmode='group'\n",
")\n",
"\n",
"fig = go.Figure(data=data, layout=layout)\n",
"cf.iplot(fig, filename='grouped-bar')\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Now I made sure that the Total Pay was building up in almost every group until 2014, when it dropped a little bit.
\n",
"The job groups which were affected the most by whatever happened in 2014 are: Court and Firefighters.
\n",
"We can also notice, that only the Library and Recreation Leader group maintained it's upward trend.
\n",
"The jobs which remained almost unaffected were: Assistants and Guards."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Looking at how base pay, overtime pay and benefits are allocated between job groups"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" BasePay | \n",
" OvertimePay | \n",
" Benefits | \n",
"
\n",
" \n",
" | GeneralJobTitle | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" | account | \n",
" 61613.504421 | \n",
" 287.140250 | \n",
" 25708.762000 | \n",
"
\n",
" \n",
" | airport | \n",
" 84780.208073 | \n",
" 6566.248195 | \n",
" 30316.527111 | \n",
"
\n",
" \n",
" | analyst | \n",
" 82264.012407 | \n",
" 166.780416 | \n",
" 29064.369347 | \n",
"
\n",
" \n",
" | architect | \n",
" 114872.575135 | \n",
" 73.958757 | \n",
" 35758.150797 | \n",
"
\n",
" \n",
" | assistant | \n",
" 37802.116122 | \n",
" 1831.490932 | \n",
" 16931.972103 | \n",
"
\n",
" \n",
" | attorney | \n",
" 132583.724982 | \n",
" 54.810232 | \n",
" 36687.825837 | \n",
"
\n",
" \n",
" | automotive | \n",
" 68174.963729 | \n",
" 9124.607881 | \n",
" 27314.619298 | \n",
"
\n",
" \n",
" | clerk | \n",
" 42546.542254 | \n",
" 1090.232306 | \n",
" 20997.084503 | \n",
"
\n",
" \n",
" | court | \n",
" 64585.883368 | \n",
" 199.010068 | \n",
" 25710.071452 | \n",
"
\n",
" \n",
" | custodian | \n",
" 41832.585984 | \n",
" 1526.100917 | \n",
" 22156.731947 | \n",
"
\n",
" \n",
" | engineer | \n",
" 80059.337953 | \n",
" 2414.459648 | \n",
" 27963.838882 | \n",
"
\n",
" \n",
" | fire | \n",
" 107372.900930 | \n",
" 25623.372684 | \n",
" 34720.246376 | \n",
"
\n",
" \n",
" | food service | \n",
" 42747.673862 | \n",
" 1746.415862 | \n",
" 23226.094070 | \n",
"
\n",
" \n",
" | gardener | \n",
" 50754.503024 | \n",
" 2010.765047 | \n",
" 23284.807426 | \n",
"
\n",
" \n",
" | general laborer | \n",
" 75538.164022 | \n",
" 6700.997761 | \n",
" 27975.448840 | \n",
"
\n",
" \n",
" | guard | \n",
" 16689.208360 | \n",
" 1791.476922 | \n",
" 9832.407203 | \n",
"
\n",
" \n",
" | library | \n",
" 39851.052761 | \n",
" 1.457051 | \n",
" 19297.451219 | \n",
"
\n",
" \n",
" | manager | \n",
" 118890.980502 | \n",
" 37.074164 | \n",
" 39049.842377 | \n",
"
\n",
" \n",
" | medical | \n",
" 68053.483246 | \n",
" 2123.112259 | \n",
" 23100.226239 | \n",
"
\n",
" \n",
" | other | \n",
" 70091.195464 | \n",
" 2890.044988 | \n",
" 26482.241173 | \n",
"
\n",
" \n",
" | police | \n",
" 93186.880378 | \n",
" 11636.756749 | \n",
" 30192.136074 | \n",
"
\n",
" \n",
" | porter | \n",
" 38968.082877 | \n",
" 3260.083322 | \n",
" 20179.401089 | \n",
"
\n",
" \n",
" | public works | \n",
" 30809.095124 | \n",
" 3133.818901 | \n",
" 14508.889048 | \n",
"
\n",
" \n",
" | recreation leader | \n",
" 11374.130731 | \n",
" 2.426723 | \n",
" 8759.502923 | \n",
"
\n",
" \n",
" | social worker | \n",
" 57299.132002 | \n",
" 2809.181354 | \n",
" 24439.990860 | \n",
"
\n",
" \n",
" | transit | \n",
" 57626.474168 | \n",
" 12847.276152 | \n",
" 27828.666833 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" BasePay OvertimePay Benefits\n",
"GeneralJobTitle \n",
"account 61613.504421 287.140250 25708.762000\n",
"airport 84780.208073 6566.248195 30316.527111\n",
"analyst 82264.012407 166.780416 29064.369347\n",
"architect 114872.575135 73.958757 35758.150797\n",
"assistant 37802.116122 1831.490932 16931.972103\n",
"attorney 132583.724982 54.810232 36687.825837\n",
"automotive 68174.963729 9124.607881 27314.619298\n",
"clerk 42546.542254 1090.232306 20997.084503\n",
"court 64585.883368 199.010068 25710.071452\n",
"custodian 41832.585984 1526.100917 22156.731947\n",
"engineer 80059.337953 2414.459648 27963.838882\n",
"fire 107372.900930 25623.372684 34720.246376\n",
"food service 42747.673862 1746.415862 23226.094070\n",
"gardener 50754.503024 2010.765047 23284.807426\n",
"general laborer 75538.164022 6700.997761 27975.448840\n",
"guard 16689.208360 1791.476922 9832.407203\n",
"library 39851.052761 1.457051 19297.451219\n",
"manager 118890.980502 37.074164 39049.842377\n",
"medical 68053.483246 2123.112259 23100.226239\n",
"other 70091.195464 2890.044988 26482.241173\n",
"police 93186.880378 11636.756749 30192.136074\n",
"porter 38968.082877 3260.083322 20179.401089\n",
"public works 30809.095124 3133.818901 14508.889048\n",
"recreation leader 11374.130731 2.426723 8759.502923\n",
"social worker 57299.132002 2809.181354 24439.990860\n",
"transit 57626.474168 12847.276152 27828.666833"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"salaries_jobs_benefits=salaries[['GeneralJobTitle', 'BasePay', 'OvertimePay', 'Benefits']].groupby(['GeneralJobTitle']).mean()\n",
"salaries_jobs_benefits"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.plotly.v1+json": {
"data": [
{
"name": "Base Pay",
"type": "bar",
"x": [
"account",
"airport",
"analyst",
"architect",
"assistant",
"attorney",
"automotive",
"clerk",
"court",
"custodian",
"engineer",
"fire",
"food service",
"gardener",
"general laborer",
"guard",
"library",
"manager",
"medical",
"other",
"police",
"porter",
"public works",
"recreation leader",
"social worker",
"transit"
],
"y": [
61613.50442071801,
84780.20807317077,
82264.01240702174,
114872.57513513515,
37802.11612181428,
132583.72498170714,
68174.96372926715,
42546.54225424753,
64585.88336758029,
41832.58598439947,
80059.33795344448,
107372.90092991511,
42747.67386206894,
50754.50302362204,
75538.16402207575,
16689.208360055003,
39851.05276105357,
118890.98050200132,
68053.48324613085,
70091.19546367193,
93186.88037768792,
38968.08287707646,
30809.09512413508,
11374.130730668434,
57299.13200154768,
57626.47416807743
]
},
{
"name": "Overtime Pay",
"type": "bar",
"x": [
"account",
"airport",
"analyst",
"architect",
"assistant",
"attorney",
"automotive",
"clerk",
"court",
"custodian",
"engineer",
"fire",
"food service",
"gardener",
"general laborer",
"guard",
"library",
"manager",
"medical",
"other",
"police",
"porter",
"public works",
"recreation leader",
"social worker",
"transit"
],
"y": [
287.1402498864152,
6566.24819512195,
166.78041580041574,
73.95875675675676,
1831.4909315654986,
54.81023170731708,
9124.607881219921,
1090.232305825244,
199.01006849315067,
1526.1009173166924,
2414.45964822806,
25623.372684025027,
1746.4158620689648,
2010.7650472440937,
6700.997761194034,
1791.4769216674306,
1.4570507996237065,
37.07416384354789,
2123.1122593411055,
2890.0449882478647,
11636.756748889124,
3260.083322259147,
3133.8189010988967,
2.4267234600262126,
2809.181353723061,
12847.27615210895
]
},
{
"name": "Benefits",
"type": "bar",
"x": [
"account",
"airport",
"analyst",
"architect",
"assistant",
"attorney",
"automotive",
"clerk",
"court",
"custodian",
"engineer",
"fire",
"food service",
"gardener",
"general laborer",
"guard",
"library",
"manager",
"medical",
"other",
"police",
"porter",
"public works",
"recreation leader",
"social worker",
"transit"
],
"y": [
25708.761999550567,
30316.52711096509,
29064.36934677926,
35758.150796512986,
16931.972102684962,
36687.82583657839,
27314.61929752882,
20997.084502678703,
25710.071451580254,
22156.73194691025,
27963.83888246543,
34720.24637571046,
23226.09406965497,
23284.807425981882,
27975.448840278812,
9832.407202729017,
19297.451219463772,
39049.84237729196,
23100.226238798936,
26482.24117326679,
30192.136074445927,
20179.401088542254,
14508.88904826899,
8759.50292262364,
24439.990859984708,
27828.66683288528
]
}
],
"layout": {
"barmode": "group"
}
},
"text/html": [
""
],
"text/vnd.plotly.v1+html": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"trace1 = go.Bar(\n",
" x=job_names,\n",
" y=salaries_jobs_benefits['BasePay'],\n",
" name='Base Pay'\n",
")\n",
"trace2 = go.Bar(\n",
" x=job_names,\n",
" y=salaries_jobs_benefits['OvertimePay'],\n",
" name='Overtime Pay'\n",
")\n",
"trace3 = go.Bar(\n",
" x=job_names,\n",
" y=salaries_jobs_benefits['Benefits'],\n",
" name='Benefits'\n",
")\n",
"\n",
"data = [trace1, trace2, trace3]\n",
"layout = go.Layout(\n",
" barmode='group'\n",
")\n",
"\n",
"fig = go.Figure(data=data, layout=layout)\n",
"cf.iplot(fig, filename='grouped-bar')\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The most important insights from these plots are:
\n",
"-Firefighters, Policeman and Transit are groups which work overtime the most.
\n",
"-Attorney, Manager, Architect and Firefighters are gropus which earn the most Base Pay.
\n",
"-The groups which earn the least are Guards and Recreation Leader.\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Checking if there is any evidence of pay discrimination based on gender"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Creating gender column with some help of gender guesser library"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import gender_guesser.detector as gender\n",
"d = gender.Detector(case_sensitive=False)\n",
"salaries['Gender']=salaries['EmployeeName'].apply(lambda x: x.split()[0]).apply(lambda x: d.get_gender(x.lower()))"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"salaries_job_gender=salaries[(salaries['Gender']=='male') | (salaries['Gender']=='female')][['GeneralJobTitle', 'TotalPay', 'Gender']].groupby(['GeneralJobTitle', 'Gender']).mean()\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Taking a look at data and plotting it"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" | \n",
" TotalPay | \n",
"
\n",
" \n",
" | GeneralJobTitle | \n",
" Gender | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" | account | \n",
" female | \n",
" 62720.881249 | \n",
"
\n",
" \n",
" | male | \n",
" 61554.983552 | \n",
"
\n",
" \n",
" | airport | \n",
" female | \n",
" 89959.928741 | \n",
"
\n",
" \n",
" | male | \n",
" 97818.146010 | \n",
"
\n",
" \n",
" | analyst | \n",
" female | \n",
" 81847.317095 | \n",
"
\n",
" \n",
" | male | \n",
" 84359.228566 | \n",
"
\n",
" \n",
" | architect | \n",
" female | \n",
" 113353.711455 | \n",
"
\n",
" \n",
" | male | \n",
" 114559.383938 | \n",
"
\n",
" \n",
" | assistant | \n",
" female | \n",
" 45963.404888 | \n",
"
\n",
" \n",
" | male | \n",
" 37698.158883 | \n",
"
\n",
" \n",
" | attorney | \n",
" female | \n",
" 134018.130759 | \n",
"
\n",
" \n",
" | male | \n",
" 138007.626231 | \n",
"
\n",
" \n",
" | automotive | \n",
" female | \n",
" 72252.454815 | \n",
"
\n",
" \n",
" | male | \n",
" 81672.705192 | \n",
"
\n",
" \n",
" | clerk | \n",
" female | \n",
" 47233.568245 | \n",
"
\n",
" \n",
" | male | \n",
" 37452.355623 | \n",
"
\n",
" \n",
" | court | \n",
" female | \n",
" 69189.877951 | \n",
"
\n",
" \n",
" | male | \n",
" 65668.484733 | \n",
"
\n",
" \n",
" | custodian | \n",
" female | \n",
" 45257.969013 | \n",
"
\n",
" \n",
" | male | \n",
" 45336.296070 | \n",
"
\n",
" \n",
" | engineer | \n",
" female | \n",
" 71264.697426 | \n",
"
\n",
" \n",
" | male | \n",
" 89319.615244 | \n",
"
\n",
" \n",
" | fire | \n",
" female | \n",
" 139360.142473 | \n",
"
\n",
" \n",
" | male | \n",
" 149907.709934 | \n",
"
\n",
" \n",
" | food service | \n",
" female | \n",
" 39156.752889 | \n",
"
\n",
" \n",
" | male | \n",
" 54430.824388 | \n",
"
\n",
" \n",
" | gardener | \n",
" female | \n",
" 50016.638519 | \n",
"
\n",
" \n",
" | male | \n",
" 55155.304686 | \n",
"
\n",
" \n",
" | general laborer | \n",
" female | \n",
" 80573.823189 | \n",
"
\n",
" \n",
" | male | \n",
" 87667.235885 | \n",
"
\n",
" \n",
" | guard | \n",
" female | \n",
" 16436.266490 | \n",
"
\n",
" \n",
" | male | \n",
" 23580.319757 | \n",
"
\n",
" \n",
" | library | \n",
" female | \n",
" 46529.025088 | \n",
"
\n",
" \n",
" | male | \n",
" 36206.884371 | \n",
"
\n",
" \n",
" | manager | \n",
" female | \n",
" 121272.730786 | \n",
"
\n",
" \n",
" | male | \n",
" 122426.874990 | \n",
"
\n",
" \n",
" | medical | \n",
" female | \n",
" 73540.648174 | \n",
"
\n",
" \n",
" | male | \n",
" 80181.323528 | \n",
"
\n",
" \n",
" | other | \n",
" female | \n",
" 70579.012167 | \n",
"
\n",
" \n",
" | male | \n",
" 79993.082152 | \n",
"
\n",
" \n",
" | police | \n",
" female | \n",
" 95458.979664 | \n",
"
\n",
" \n",
" | male | \n",
" 120337.144226 | \n",
"
\n",
" \n",
" | porter | \n",
" female | \n",
" 47195.294830 | \n",
"
\n",
" \n",
" | male | \n",
" 44687.008480 | \n",
"
\n",
" \n",
" | public works | \n",
" female | \n",
" 41433.817635 | \n",
"
\n",
" \n",
" | male | \n",
" 32639.754413 | \n",
"
\n",
" \n",
" | recreation leader | \n",
" female | \n",
" 11693.305312 | \n",
"
\n",
" \n",
" | male | \n",
" 12549.577932 | \n",
"
\n",
" \n",
" | social worker | \n",
" female | \n",
" 63067.066715 | \n",
"
\n",
" \n",
" | male | \n",
" 62496.266117 | \n",
"
\n",
" \n",
" | transit | \n",
" female | \n",
" 63954.873872 | \n",
"
\n",
" \n",
" | male | \n",
" 76903.350026 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" TotalPay\n",
"GeneralJobTitle Gender \n",
"account female 62720.881249\n",
" male 61554.983552\n",
"airport female 89959.928741\n",
" male 97818.146010\n",
"analyst female 81847.317095\n",
" male 84359.228566\n",
"architect female 113353.711455\n",
" male 114559.383938\n",
"assistant female 45963.404888\n",
" male 37698.158883\n",
"attorney female 134018.130759\n",
" male 138007.626231\n",
"automotive female 72252.454815\n",
" male 81672.705192\n",
"clerk female 47233.568245\n",
" male 37452.355623\n",
"court female 69189.877951\n",
" male 65668.484733\n",
"custodian female 45257.969013\n",
" male 45336.296070\n",
"engineer female 71264.697426\n",
" male 89319.615244\n",
"fire female 139360.142473\n",
" male 149907.709934\n",
"food service female 39156.752889\n",
" male 54430.824388\n",
"gardener female 50016.638519\n",
" male 55155.304686\n",
"general laborer female 80573.823189\n",
" male 87667.235885\n",
"guard female 16436.266490\n",
" male 23580.319757\n",
"library female 46529.025088\n",
" male 36206.884371\n",
"manager female 121272.730786\n",
" male 122426.874990\n",
"medical female 73540.648174\n",
" male 80181.323528\n",
"other female 70579.012167\n",
" male 79993.082152\n",
"police female 95458.979664\n",
" male 120337.144226\n",
"porter female 47195.294830\n",
" male 44687.008480\n",
"public works female 41433.817635\n",
" male 32639.754413\n",
"recreation leader female 11693.305312\n",
" male 12549.577932\n",
"social worker female 63067.066715\n",
" male 62496.266117\n",
"transit female 63954.873872\n",
" male 76903.350026"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"salaries_job_gender"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.plotly.v1+json": {
"data": [
{
"name": "Male",
"type": "bar",
"x": [
"account",
"airport",
"analyst",
"architect",
"assistant",
"attorney",
"automotive",
"clerk",
"court",
"custodian",
"engineer",
"fire",
"food service",
"gardener",
"general laborer",
"guard",
"library",
"manager",
"medical",
"other",
"police",
"porter",
"public works",
"recreation leader",
"social worker",
"transit"
],
"y": [
61554.98355191258,
97818.14600985224,
84359.22856594122,
114559.38393805308,
37698.15888322724,
138007.6262314542,
81672.70519206935,
37452.35562259317,
65668.48473282444,
45336.29606965172,
89319.61524365652,
149907.7099344591,
54430.82438775509,
55155.304686098614,
87667.23588492218,
23580.31975659227,
36206.88437125747,
122426.8749904398,
80181.32352803125,
79993.08215249506,
120337.1442256307,
44687.00847988083,
32639.754412903196,
12549.577932053193,
62496.266117136576,
76903.3500260415
]
},
{
"name": "Female",
"type": "bar",
"x": [
"account",
"airport",
"analyst",
"architect",
"assistant",
"attorney",
"automotive",
"clerk",
"court",
"custodian",
"engineer",
"fire",
"food service",
"gardener",
"general laborer",
"guard",
"library",
"manager",
"medical",
"other",
"police",
"porter",
"public works",
"recreation leader",
"social worker",
"transit"
],
"y": [
62720.881248808415,
89959.92874125874,
81847.3170949434,
113353.71145454547,
45963.404887693934,
134018.1307586208,
72252.4548148148,
47233.56824512543,
69189.87795107042,
45257.96901345292,
71264.69742566509,
139360.14247295228,
39156.75288888888,
50016.638518518535,
80573.82318944839,
16436.266490066213,
46529.0250882353,
121272.7307857143,
73540.64817394386,
70579.01216710889,
95458.97966369026,
47195.29482972139,
41433.817634782645,
11693.305311653117,
63067.066714723864,
63954.87387221697
]
}
],
"layout": {
"barmode": "group"
}
},
"text/html": [
""
],
"text/vnd.plotly.v1+html": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"salaries_gender=salaries[(salaries['Gender']=='male') | (salaries['Gender']=='female')][['GeneralJobTitle', 'TotalPay', 'Gender']].groupby(['Gender', 'GeneralJobTitle']).mean()\n",
"\n",
"trace1 = go.Bar(\n",
" x=job_names,\n",
" y=salaries_gender.xs('male', level='Gender')['TotalPay'],\n",
" name='Male'\n",
")\n",
"trace2 = go.Bar(\n",
" x=job_names,\n",
" y=salaries_gender.xs('female', level='Gender')['TotalPay'],\n",
" name='Female'\n",
")\n",
"\n",
"data = [trace1, trace2]\n",
"layout = go.Layout(\n",
" barmode='group'\n",
")\n",
"\n",
"fig = go.Figure(data=data, layout=layout)\n",
"cf.iplot(fig, filename='grouped-bar')\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"I can easily notice that there is evidence of pay discrimination based on gender.
\n",
"In only 4 gropus (15%) women earn noticeably more than men.
\n",
"In 9 groups (35%) women and men earn more or less the same amount of money.
\n",
"In remaining 13 (50%!) groups men earn significantly more than women.
\n",
"The biggest difference can be seen in the engineer group."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Looking at the number of people in every job, that earn more than $200k"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.plotly.v1+json": {
"data": [
{
"marker": {
"color": "rgba(255, 153, 51, 0.6)",
"line": {
"color": "rgba(255, 153, 51, 1.0)",
"width": 1
}
},
"name": "GeneralJobTitle",
"orientation": "v",
"text": "",
"type": "bar",
"x": [
"fire",
"police",
"medical",
"assistant",
"manager",
"general laborer",
"transit",
"attorney",
"public works",
"architect",
"automotive"
],
"y": [
638,
470,
322,
74,
58,
46,
44,
14,
6,
2,
1
]
}
],
"layout": {
"legend": {
"bgcolor": "#F5F6F9",
"font": {
"color": "#4D5663"
}
},
"paper_bgcolor": "#F5F6F9",
"plot_bgcolor": "#F5F6F9",
"titlefont": {
"color": "#4D5663"
},
"xaxis1": {
"gridcolor": "#E1E5ED",
"showgrid": true,
"tickfont": {
"color": "#4D5663"
},
"title": "",
"titlefont": {
"color": "#4D5663"
},
"zerolinecolor": "#E1E5ED"
},
"yaxis1": {
"gridcolor": "#E1E5ED",
"showgrid": true,
"tickfont": {
"color": "#4D5663"
},
"title": "",
"titlefont": {
"color": "#4D5663"
},
"zerolinecolor": "#E1E5ED"
}
}
},
"text/html": [
""
],
"text/vnd.plotly.v1+html": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"salaries_more_than_100k=salaries[salaries['TotalPay']>200000]['GeneralJobTitle'].value_counts().drop('other')\n",
"salaries_more_than_100k.iplot(kind='bar')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The job groups in which the most people earn supranormal amount of moneyare clearly Firemen, Policemen and Medical."
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}