{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# SF Salaries\n", "In this notebook i explore San Francisco city employee salary data, which can be found on kaggle: \n", "https://www.kaggle.com/kaggle/sf-salaries" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Importing libraries" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "data": { "text/html": [ "" ], "text/vnd.plotly.v1+html": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "" ], "text/vnd.plotly.v1+html": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import numpy as np\n", "\n", "import pandas as pd\n", "\n", "import seaborn as sns\n", "sns.set_style(\"whitegrid\")\n", "\n", "import matplotlib.pyplot as plt\n", "%matplotlib inline\n", "plt.rcParams['patch.force_edgecolor']=True\n", "\n", "from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot\n", "import cufflinks as cf\n", "init_notebook_mode(connected=True)\n", "cf.go_offline()\n", "import plotly.graph_objs as go" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Getting data" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Doing the imports" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\gilew\\Anaconda3\\lib\\site-packages\\IPython\\core\\interactiveshell.py:2698: DtypeWarning:\n", "\n", "Columns (3,4,5,6,12) have mixed types. Specify dtype option on import or set low_memory=False.\n", "\n" ] } ], "source": [ "salaries = pd.read_csv('Salaries.csv')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Because of the DtypeWarning, I have to convert the types of the following columns: 3,4,5,6. I will take care of column 12 later." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Converting types" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": true }, "outputs": [], "source": [ "for column in ['BasePay', 'OvertimePay', 'OtherPay', 'Benefits']:\n", " salaries[column]=pd.to_numeric(salaries[column], errors='coerce')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Checking the head of the data" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
IdEmployeeNameJobTitleBasePayOvertimePayOtherPayBenefitsTotalPayTotalPayBenefitsYearNotesAgencyStatus
01NATHANIEL FORDGENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY167411.180.00400184.25NaN567595.43567595.432011NaNSan FranciscoNaN
12GARY JIMENEZCAPTAIN III (POLICE DEPARTMENT)155966.02245131.88137811.38NaN538909.28538909.282011NaNSan FranciscoNaN
23ALBERT PARDINICAPTAIN III (POLICE DEPARTMENT)212739.13106088.1816452.60NaN335279.91335279.912011NaNSan FranciscoNaN
34CHRISTOPHER CHONGWIRE ROPE CABLE MAINTENANCE MECHANIC77916.0056120.71198306.90NaN332343.61332343.612011NaNSan FranciscoNaN
45PATRICK GARDNERDEPUTY CHIEF OF DEPARTMENT,(FIRE DEPARTMENT)134401.609737.00182234.59NaN326373.19326373.192011NaNSan FranciscoNaN
\n", "
" ], "text/plain": [ " Id EmployeeName JobTitle \\\n", "0 1 NATHANIEL FORD GENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY \n", "1 2 GARY JIMENEZ CAPTAIN III (POLICE DEPARTMENT) \n", "2 3 ALBERT PARDINI CAPTAIN III (POLICE DEPARTMENT) \n", "3 4 CHRISTOPHER CHONG WIRE ROPE CABLE MAINTENANCE MECHANIC \n", "4 5 PATRICK GARDNER DEPUTY CHIEF OF DEPARTMENT,(FIRE DEPARTMENT) \n", "\n", " BasePay OvertimePay OtherPay Benefits TotalPay TotalPayBenefits \\\n", "0 167411.18 0.00 400184.25 NaN 567595.43 567595.43 \n", "1 155966.02 245131.88 137811.38 NaN 538909.28 538909.28 \n", "2 212739.13 106088.18 16452.60 NaN 335279.91 335279.91 \n", "3 77916.00 56120.71 198306.90 NaN 332343.61 332343.61 \n", "4 134401.60 9737.00 182234.59 NaN 326373.19 326373.19 \n", "\n", " Year Notes Agency Status \n", "0 2011 NaN San Francisco NaN \n", "1 2011 NaN San Francisco NaN \n", "2 2011 NaN San Francisco NaN \n", "3 2011 NaN San Francisco NaN \n", "4 2011 NaN San Francisco NaN " ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "salaries.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Cleaning the data" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "I create a heatmap, to see the distribution of Nans in our data." ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAV0AAAE3CAYAAAAJ55KLAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAHlFJREFUeJzt3XucnVV59vHfBIiJHCpgFBAFRLxF\nUCiCRUSQ6uuJ1sOLchCUg0pFKeCpKAKtFYpYtaJoEUmEcJSDCgoFDwgipQiIYN7qJXKqJoJEoYgJ\nISHz/nGvnezEDBiynrWzJ9f385nP7JnJPOvJ7JlrP8863GtkdHQUMzNrY8KgT8DMbFXi0DUza8ih\na2bWkEPXzKwhh66ZWUMOXTOzhlZ/nK+Prj7xGZ2ewIJHZtJ1G25n5W2j18782Xd03s4aT322n5sn\n0M54e24a/X9Gxvqar3TNzBpy6JqZNeTQNTNryKFrZtaQQ9fMrCGHrplZQw5dM7OGHLpmZg05dM3M\nGnLompk15NA1M2vIoWtm1pBD18ysIYeumVlDDl0zs4YcumZmDTl0zcwacuiamTXk0DUza8iha2bW\nkEPXzKwhh66ZWUMOXTOzhhy6ZmYNOXTNzBpy6JqZNeTQNTNryKFrZtaQQ9fMrCGHrplZQw5dM7OG\nHLpmZg05dM3MGnLompk15NA1M2vIoWtm1pBD18ysIYeumVlDDl0zs4YcumZmDTl0zcwacuiamTXk\n0DUza8iha2bWkEPXzKwhh66ZWUMOXTOzhhy6ZmYNOXTNzBpy6JqZNeTQNTNraPVBn4AZwOSNXtZ5\nGwsemdl5G2aPx1e6ZmYNOXTNzBpy6JqZNeTQNTNryKFrZtaQQ9fMrCGHrplZQw5dM7OGHLpmZg05\ndM3MGnLompk15NA1M2vIoWtm1pBD18ysIYeumVlDDl0zs4YcumZmDTl0zcwacuiamTXk0DUza8ih\na2bWkEPXzKwhh66ZWUMOXTOzhhy6ZmYNOXTNzBpy6JqZNeTQNTNryKFrZtaQQ9fMrCGHrplZQw5d\nM7OGHLpmZg05dM3MGnLompk15NA1M2vIoWtm1pBD18ysIYeumVlDDl0zs4YcumZmDTl0zcwacuia\nmTXk0DUza8iha2bWkEPXzKwhh66ZWUMOXTOzhhy6ZmYNOXTNzBpy6JqZNeTQNTNryKFrZtaQQ9fM\nrCGHrplZQw5dM7OGHLpmZg05dM3MGnLompk15NA1M2vIoWtm1pBD18ysIYeumVlDDl0zs4YcumZm\nDTl0zcwacuiamTXk0DUza8iha2bWkEPXzKwhh66ZWUMOXTOzhhy6ZmYNOXTNzBpy6JqZNeTQNTNr\nyKFrZtaQQ9fMrCGHrplZQw5dM7OGHLpmZg05dM3MGnLompk15NA1M2vIoWtm1pBD18ysIYeumVlD\nDl0zs4YcumZmDTl0zcwacuiamTXk0DUza8iha2bWkEPXzKwhh66ZWUMOXTOzhhy6ZmYNOXTNzBpy\n6JqZNeTQNTNryKFrZtaQQ9fMrKHVB30C49HcWdcM+hTMbCXl0O3A5I1e1nkbCx6Z2XkbZlafuxfM\nzBpy6JqZNeTQNTNryKFrZtaQB9LMbJUy6IFuX+mamTXk0DUza8iha2bWkEPXzKwhh66ZWUMOXTOz\nhhy6ZmYNOXTNzBpy6JqZNeTQNTNryKFrZtaQQ9fMrCGHrplZQw5dM7OGHLpmZg05dM3MGnLompk1\n5NA1M2vIoWtm1pBD18ysIYeumVlDDl0zs4YcumZmDTl0zcwacuiamTXk0DUza8iha2bWkEPXzKwh\nh66ZWUMOXTOzhhy6ZmYNOXTNzBpy6JqZNeTQNTNryKFrZtaQQ9fMrCGHrplZQw5dM7OGHLpmZg05\ndM3MGnLompk15NA1M2vIoWtm1pBD18ysIYeumVlDDl0zs4YcumZmDTl0zcwacuiamTXk0DUza8ih\na2bWkEPXzKwhh66ZWUMOXTOzhhy6ZmYNjYyOjg76HMzMVhm+0jUza8iha2bWkEPXzKwhh67ZOBYR\nfzHoc7AlOXStiohYY9DnYMt06aBPoKaIePqgz2FFrdDshYg4dqyvSfrnJ3zgsdtbGzgS2JD8ZbpV\n0i8rt7EacADwLOD7wAxJs2u20dfWjcBZwHRJv++ojc8DUyX9pIvj97VzK3AlcJqkGV221bWIOBw4\ns6vnpK+dpwPrAfOBDwFfkHRr5TYuAb4HCFgIIOnbNdtoKSJ+CNwHTAUuk7Sww7YmACPATsD1kh6p\ncdzVV/D77y3v3wjcCVwL7EAGVhemAf8B7Er+0KeWxzV9CZgF/B/gRmA68LrKbfS8Engr8M2I+BUZ\nWN+t3MalwFERsTFwJnC2pAcrtwGwLfAa4B8jYgr5YnKepIdqNhIRuwBPJu/SPg8cI+mcmm0AjwKX\nRsTd5AvWdyofv+dM4BPAIcDFwGeBv67cxu/I52bb8vEo0EnotnhuJO0cEVsCBwFHR8T3yOfojprt\nRMSJwB3AJsB2ZNbtX+PYK9S9IOlLkr4ETJD0HklnSzoCWLvGyS3D+pKmAfMl/Sf5KlTb5pKOBeZK\n+ibQWZ+YpAckfRF4J/mHfk5EXB8Ru1ds43JJewJvAF4G/CYiTo+ITWq1UdpZSL4gTiP/0P8euCIi\nDq7ZDvBJ4DbgMOClwLsrHx9JJ0t6CXACcGBE3BkRR0fEOpWbWgO4ClhX0lms+EXQn5B0YP8b8NHa\nbfTp/LkpZpGBOAfYGjgpImrfWe9csu0lkl4DbFzrwLWe5PUjYnNJt0fE84Dav5yLlONTrtwe7aCJ\n1SPiqaWNtSm3ZF2IiPcAbwceBE4juzXWAP6LSn1x5argAOBvyT/wncnn/SJg+xptlHY+SQb71cCJ\nkn5Ubs9uAk6t1Q4wl7zqWCDpnoh4UsVjA4ue973IK5s5wD+QP7OLgd0qNjUR+FfgmnKVWL1fPCI+\nBryntPVk4BfAVrXbKVo8N+eTQXsWsJ+kWeXzNwJjdnc+AatFxIuBuyJiIjCl1oFrhe7h5FXaM4B7\ngP0qHXdphwFfAbYELiR/mWo7muwm2ZAMvyM6aKPnGcA+ku7s+9z8iPi7im2cRobeP0ma2/tkRHyl\nYhuQVzgv6u9OkLQwIt5UuZ0/AN8FvhgR7wX+p/LxAW4GzgXe3v/cRMQ2lds5CHg1+fy8EXhb5eMD\nvJa8Svs34DPAFztoo+dBun9uvgxcJ+mhiNiw7/M7V25nOtlFchB5BX9SrQOv6EDadWQfESx5qz8q\naacVObFBK/2SsyV1tk46ItYHXkVe4YwAG0k6oYN2Nlyqjes6aGML4M1LtVPzxaPXztrAMyX9d0Rs\nTT5H91RuYzVJj/Z9PEXSfTXbKMc9SdLhfR9Pk3RQ5Tb+Q9JrI+JMSW+LiB9Kqh1QvbaeRHbP9Z6b\n2yTNq9zGPwLrSPpARFwA3CTpEzXb6NqKXunuXeUs/kwRcTz5yrMoCCVtVOnY/S8g/Z+nwxeQC8jb\nvReSt2ZzajcQEVOBlwBrkreXtwM71m4HOAP4JnnFMQtYq+bBI2IDsttqOvC2iHgu2b10CfDimm0B\nH42IQ8kXkDXJn9mWtQ5eupU+AkwpdwIj5a3qTJzi1xFxEPDHiDiBDrv+gKeSg7ZTyDvRNYHrK7fx\nekkvApD0loi4lhyMrCoi7mTJPHhQ0rZj/fvlsUKhK+nuGiexHHYHNq396ln0XkAmAv1TQ9broK1F\nJL07IqaRg2k/6KCJLck+vC8BR5F/DF2YI+mEiNhC0kERcU3l4+9IdmMFi/uIFwJXVG4H8vfsWcCn\ngc+Vt2okfTEiTiWfj+PLpxd2dFf1d2T3wgVk336XF0qnkj+zY8jf5TOo/wK/MCImSnqkzA3vaq3B\n88r7EeBFwFtqHXjYFkfcDEzq6NjzgCeR03gmlseTybDqTERMIq8IRql8dVj8ofwxr1nmG0/soA2A\nkXI1ulZErEnlFytJ35C0G7CvpN3K2yskHVOzneJ+SQ+Tt7Ei7xCqiYjXSloAzCQH6/YnZ0lU7Voo\n1gQOJqejzWTJC4raJkm6kuxeFPBwB22cAsyIiIuAn5SPq5M0r7w9LOlactpYFdWnqHRsBjnl6R7y\nFWhU0rMrHbvllVTPF8iBum8DvwJ+2EEbN0XEB4FZEXEesFoHbQB8DHgTOap8J9kNUE1EHC3pOLJr\nYYmBWklvrdkWMDMiDgDmRsTHgadUPv5zyvtNKx93WVrMbe+ZFxGvJkf+d6SD0JU0tSz4eDZwe4cL\nl05gcffChlScxTRsobsXsBnwQO0DS/oG8I2IeJ2ky2off4w2L+o9jogLuli0IOmoMvg0lxzJ/lHt\nNsr81Rsl9bpHnla7DXJkHPKWde5j/cMKDia7F74GvINcwFLTW8iR8XUlHVr52EtbX9K0iNhP0n9G\nRBdz23sOBj5F9u1+kA7m6UbEtqWdSeVjag8+Fj/ve3wL+cJVxbCF7t3AH7vo0215JRURbyGn78wh\n5xreUDtwy/zc48gpVkdK+gM50FVVGXD6ALAgIg6V1NWdwX5lUPDD5GrB6uEREU8D3g88BHxG0hxy\nqlVtD5eB2+dFxAv7vyBpl9qNNZjb3vMaSYv6jCPiMCr3hwOnAyeTd4Zd2qH/BTEippNz6lfYsIXu\nM4HbI6K35K/m1LRLyvtO+oiWcgQ5Y2Fdsq/t9R20cQq5omo9cp5hlSWMy/BWsktmHbI/vKvQ/Q7Z\nh7cxWUegF7qj5K1mDdOBb5HPy4nkqrouvI78f3yJXALcxQvICyT9lOwy63Rue0TsQ/4O7xYRvWXM\nE4AXUD9075F0WuVjLlLmFx8NrBsR/7d8egT471ptDFvo7tXhsT8K7CXp6g7b6Jkn6X7g/jLo1IVH\nJV0O0NEATc/DykIgs8vKnU5I+ig5lesYSR/vqJmJkk4GiIjaNTAWKYNod0XE68iFRJuQxZX+X8Vm\nLoiIUyR9lpwy2KXLgd8A67N44HkhOdWutrsi4sPkoPoo1C3gI+kLwBci4ihJ/1LruP2GLXTXIPvD\nFk3AJ6fE1FBtmd9y6rKPrafVLJUW/5evRMRZ5PN1IVlprtZc0P4pWy1+Zv8OzCaL3NxC3inUqrvx\nIuBTEXEFsH/tBST9ygXEVcBV5YVkK+AXkmpPG4ScVRTlDbor4HNKuYKvvnBp2EJ3Ot1NwN88Ipb5\nyibpqIrt9Lc1snS7FdtaPyJeVdpYrzzutVHzl3SriDintNN73Gun9gAU5JVUV3NBnxwRm5GBO7k8\nHgFQ5SpWxRaSDo6I70u6OCL+odaBJf0ROKTUdLg2Iq7v+1oXz0tvxP+5wDXA/hGxi6QP1GxD0oFl\nYczmwE/JHOjChXS0cGnYQrfLCfhzyL7CFo4d43FNPwb2KY9v7ntc+8pgz/J+Ndr0h0+SdGUZ+FRE\n1JyW9CiLp7ot6Hs8ClQf4ALWiIj1gNHSzVS1uFIZQDuBvAqtOoVvDLtIemlp+ySydklVZeD2TeRY\nxenAFkAnM0C6Wrg0bKHb5QT8eySdUfF4Y+pvJyJeQQ4EXU++stZq48C+NtYh+w3vKFdA1fT6wCPi\n25Je9Xj/voLO5oJ2VZPgMRxLBtPTyal81a4KI+JIcsrWoZJa7R6xRkRMUJb5HGEZy+or2JssUXql\npJMi4oYO2gC6W7g0bKHb5QT8myoe689SuhU2JkeWHyHX4+/zmN+0/G3sQY7Grg6cHxGjZWpcbQ9E\nxBtYcoeCai8ifZaeC3pIrQOXO6dlBkUXU7nKFXsAGwD3qu4uCNsD20v6XcVjPp7zyK6M/wL+qnxc\nW6+vvfc8dVESADpcuDRUoVsm33cyAV/SB2FRpaR3kR31M8jdHLqa27izpF1Kn94ZEVEtQPq8n+zz\nvJyct3tjeV/bFJYsgzlK/V0QkPRruqsfcEBHx12mcpdzOEtO9K9ytyCpWq2AxxMRvfmrs4Gzyf/P\nOSxe0FLTuWQGbBIRlwHf6KANgLt7i5ciq5n9Za0DD0XoRsT3WfYVyKikV1Ru7gxyEcZ3yduYaXQ3\nx3X1cgszGrk3WxfhvlDSvHKFOxoRVbsXeiTtFrnzbK8bo+o2PT0RcRRZVHwOi5eCV6k0J+n20sZm\nLFWmEnhvjTaW8llyb7SuJ/p3bekKbCPAgeRzVLUvWdLny3S+rYGfl7nI1UTEy4DnA++LiM+UT69G\nPv9b12hjKEKXP11OuA1ZVLj23lgAG/Stqrk4Irqct/tvZLfGFLJPt4vVT9dExLnAxhFxCtBJH1jD\nbow9yek71ctg9jmLvDPYhdwJYXJH7fyqN5d6mEn6SO9xRDyHHOD6Fh1sAFAGtnpeGxHzyRetL5Sp\nayvqfrK750nl/Xrk9lPVZpYMReiWikWUdeMfJpfj7V1zIUPfxP47I2IHSTeUJZpd9EsCIOmC8qr9\nHPLqsHr/W6m98BpyNsPPJH2rdhtFq26Mu+i+9sJcSR+PUlS88iyZfvdGxMksOdF/2mN/y8qrrOY6\nAnhfh79nk8lFF9eQv287AL8l71BrrOycSI4b7Ur2i/87WevllgrHBoYkdGHRzgTTgVvJddG1b19F\n/uKPAC+PiF6pxy7K0wF/snvqeWW1VdWr93Kr/Fzy//X8iHi+pE/WbKNo0o1B/lH8NCJ+yuKgqj3v\ndCSyEPdaETGZ7moq/7q837Sj4zcRuU3XV4DfAy+udMU5limSeoPNV5RZM8dERK0pXceT2zT9T0Rc\nThaJ+iVZ8OaSx/zOP9NQhG6Zm/f+8nZZ+dxEAFXai17SZn3tjZC3/LMrjygv7ZPAvuRI6UuB86nf\nZXIxWS2ryz8EaNSNQdZE6Npx5JLzc8l9vs7uopESFi9n8ZTBLnaOaGEGOfvmSnIJ7aIvdPCCuE5E\nPE/SzyOLOq0Vue1VrSldEyTdGhEbkTWofwwQEatcacfe/MXe5npdFDsBoPwRTAP+lyx68S5J36nZ\nRp/Od08l+w3/qYPjLqFhN8aPgSPJGqeXknc+tU3q1WAAvl76q6uLrNW7OXknMkruJLFvF2117I0N\n2zoUODty37+5ZP/xXizegWNF9aakvYYcTO/NaFq70vGHI3T7r0IbOI6cyjWr3DZ9jaxw1YUWu6d+\nMyI+QV+VJEnVVydFxKbk6O5kYLuI2E7SP9duhw6LckfE7mQ/4X4RcWb59ARgD3LL+tp27Zsy+JWo\nuwt0M42KRPXa+lGZWnkouanr0ysXQPpu5L5rzwReHxGbk/26X63VwFCEbk9EbEUuNX0Kecs3o4Mr\nqkclzQKQNLPyMtOl7cmSu6d+uYM29gZ+xuJpPV3tbnwuOYjWWWGVosui3DPIEet55LRByIUe+435\nHStm9XIV1eWUwXGhdCfuQ07dmkeWEt1MUtVBVUknRu5M8VtJv+uFrqSv12pjqEKXrM15IBlOU8kr\nntqh+2BE/D05AXsXcnCgK88E3hARb6Z+1bSeeZK6WHSxtDmSPtagnc6Kcis3Wp0aWbB6c/KF6jZJ\nM2q1sZTPkbM8ngZcRxbntmW7i3xh31fSbZFby3cyi0XSz/oe307lEpXDFrpI+mUZIb8vIv5Q67iR\nlYsguxc+SPbp/IYM+a50WTWt5+6I+AjZF1q9/mjfz+3eyFJ4/e10Md3uMHKkfFuyElQXLyjvIp/3\n64GjI+IsSdXnUEs6r0wZ3AL4paT7arcxjpxEFszfNCJOo00Z0U4MW+j+vvR7rRkRe1N3r7Rl7fr7\nEnKPrE9XbKdf19uWQ66qem55g/pVxvp/bgf3Pa66DLiE+6fJmhsfBr5OhtXW5DzXmt4G7CRpfrmt\nvZaKC1ci4slk5aq7yGA/liwW8yFJtf8v44KkE4ETI2JX8me3Q0ScCJzZ4Z1IJ4YtdN9BjvDOJicu\nv6PWgZXbey8hIiaQZfG6Ct1e1bS1o3LVtIhYXblDQaeDM72fW0T8TX//ekTsOfZ3PSHTyIJH65Fd\nStsB95H9yGc+xvc9ESOS5kNOSYyI2tuWn06+eGxFFu/5FLmq6nPk0nMbQxm0uzoinkK+OJ5JxboI\nLQxV6Ep6sNyO3U5eIXS2FLQMbOxMd0tAIUPkjeQvzh3UDY/p5O1Yb9EHLC63V22aXUT8DTnHeJ+I\n6O1Xtxq5Ouj8Wu2Q0+q+U9o8XNJt5XEXNR6ui9yu/hryd6DWzhQ9G0raEyAibpV0anlcteD3eCbp\nAXJH5c8P+lyW11CFbjQohdhnErkx4WG1D7zUrfKF5K0yVJxz2jcpfU9JixYqlHnINd1C7o21Iblt\n9QSyAPi5ldvpn5zeP6Ok2rY6EfFVSXtJel9kmcotgfMkXVyrjWJ+3+PZfY9bbatkAzRUoUubUojA\nou1O3tzR4ftvlS9lyVvlKnNoI2Jn8va1v1rSBHJ+Y5VqScX95M/pFnLwcQtyLXztF8NlbQs0QlaE\nqmXRPnklaGuHbc9GkZuFjiz1eMOO2rOVyLCFbotSiC20uFV+gMXVknp/zAupWC2p+ARwQf+Ci4h4\nB/Cv1O1P7u8jPmWMxyuq1T55FwCbLePxhRXbsJXUsIXu0qUQP/PY/3yl1fmtchnRnVGm13WxMqxn\nG0lL7FElaWpEvLNmI41WPTXZJ0/SMQARcQQwXVKXc8FtJTNsofsDcmDjOcCdkmY/zr9fWbW4Ve55\neUQcr+52v5g/xucXdNRel5rtk1csAC6NiLuBqR3W+LCVyLCF7kVk3+dUBrCnWUUtbpV7pgCzIuJO\ncubCqKSdHud7lsfvI2J7STf2PhER29PtSr6uNP2dKkV1To6IbYAjI+JU8nf7c5K62OrGVgIjo6Nd\nLcXvRinndhA5n/F75BXCHYM9q5VXRGyy9OfKctdax9+UrDN6FTmVbzPglcDfSrqzVjvjUUSsTVbI\n2p/s2jiNvBA6eFnzxm18GMYpKrPIOa1zyFH4kyKiyz7LYbeALHt3KlmVaYOaB5d0F/Bi4GqywPiP\ngL9y4P5Zbib3lHu7pFdLukDSudSfF2wrkaG60o2I88mgPQs4vVcNLCJulLT9QE9uJRURl5Jzgo8h\n95o7Q9KOgz0rg1yA09/XHhFTXH9h/Bu2Pt0vjzHYsHPzMxkekyRdGRFHS1LHpSpt+Xw0cleUNYA1\nye6ZpXfWtXFm2LoX7o2IGyLiNxFxc0RsByDJQTK2eRHxamC1iNiRDvd8s+W2O/AscoumF9BNEXtb\nyQxb6J4EvFPShmTpvaFbdz0AB5M/q6eSJStb1Na1P8/95YJhHUkiNym1cW7YuhcmSLoFQNJPImIY\n54K2tgdwSMc7tNoTMzMiDgDmlv3SnjLg87EGhi1055eqVteQuzrMG/D5DIM1gO9EhMg+8asGfD62\n2MFk98LXyDKltXfOtZXQsM1e2ISsPboludHih2rOOR3PImIH4EPAX0raYtDnsyqLiKcB7wceAj4j\nqbMSpbbyGYor3VK9H3L7nH1ZXBfWHkdETCa7GPYnf27HDvaMjKwk9y1gXeBEsoSorSKGInRZXIi7\nP2yrF+Qepx4iFyy8vVfNzAZuYlkCTCnKb6uQoQhdSb3Sd70dHaaQWyQvHPu7Vm0RsRZZSPxmslj6\nORHxW2Afr+sfuP67tGGbQWQraChCtyci3kSWc7wfWCciDnFlpjEtq87tO6lf59aW35MjYjMycCeX\nxyMAriMy/g3bq+yx5Lr+7ch9uY4f8PmszLbpD1wASacBLxzQ+dhij5L9uqeTtTGmA2eUj22cG6or\nXeB3kn4LIOneiPBt8tjGU53bcUWSl62vwoYtdB+MiCvIilbbk7dp/wLVt1MZD8ZTndtxJSKuYYzZ\nN5J2aXw61tiwhW7/RoEzB3YWw+GDwCURcRVL1bkd5EkZAAcM+gRscIZtccRfALuS26MDIOn8wZ3R\nyq1s4rk7Oa1uJnBx2eXYVgJlAO3N5KrBEWAjSe8d7FlZ14btSvfb5Eq0B8rHo4BDdwylmMpFgz4P\nG9NZwOXkkvZ7gcmDPR1rYdhC938lHTjokzCrZK6kj0fENEkHlb5eG+eGLXSviIh3k1e7AEj6wQDP\nx2xFjETEFGCtslx7vUGfkHVv2EL3ZcCTyH5dyO4Fh64Nq+PIjSnPJQuYnz3Y07EWhi1015L0ykGf\nhFklk3o1GICvR8QeAz0ba2LYZi98ltwp9ceUeY6SfjHQkzJbThGxO7AjsB9wZvn0BGAPSd4jbZwb\ntivdbchlrCPl40nASwZ3OmZPyAxgA7IIf68e9EIyhG2cG4raCxHxVQBJuwGXSdqtPPYmizZ0JN0t\naSq5GeW15CrBGyTdNNgzsxaGInSBp/U9fl3f4+HpGzH7U+8iuxdeAZwREe8b8PlYA8MSuv1GHv+f\nmA2FtwE7STqU7CbzHmmrgGEJ3dExHpsNsxFJ8wEkPQI8MuDzsQaGZSBtq4g4h7zK7X/8/MGeltkK\nuS4iziN3t96ZnJlj49xQTBmLiF3H+pqkq1uei9mKioivStqrPH4Dubv1zyRd/NjfaePBUISu2XgS\nEVdK+utBn4cNhkPXrLGIuJsxlvy6GP/4Nyx9umbjyRxAgz4JGwyHrll790g6Y9AnYYMxLFPGzMYT\nrzxbhblP18ysIV/pmpk15NA1M2vIoWtm1pBD18ysIYeumVlD/x9X2OljIz+wwQAAAABJRU5ErkJg\ngg==\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "sns.heatmap(salaries.isnull(), yticklabels=False, cbar=False)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "There are two columns which are practically filled with Nans, so I decided to drop them. \n", "I also drop the Nan values in BasePay, since there is only few of them and Agency, since I do not need it.\n", "Next I will try to analize the Benefits column and decide whether I can fill in the values or drop the column." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Dropping columns, which have too many nans (Notes, Status)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": true }, "outputs": [], "source": [ "salaries.drop(['Notes', 'Agency', 'Status'], axis=1, inplace=True)\n", "salaries = salaries[np.isfinite(salaries['BasePay'])]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Plotting a distribution of benefits" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYQAAAEBCAYAAAB4wNK4AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAIABJREFUeJzt3Xl4k9eB7/GvNq/YBluADWFzICcm\nCRDIAklolmZvkibt3JlO2pvbtGlJl+l2e7tMp03bJ22mz0wmbbrO0HbSjS7TNG3SNGuzQxIaAwkE\nc4CACYsNtsGbbMm2pPvH+5oII9sSyJYt/z7PwwM675F0jgH99J5z3vN64vE4IiIi3mw3QERExgYF\ngoiIAAoEERFxKRBERARQIIiIiEuBICIiAPiz3YCTUVtbqzWzIiJpWrZsmSdZ+bgOBIBly5aN6vvV\n1dVRU1Mzqu852tTH3DER+qk+pqe2tnbQYxoyEhERQIEgIiIuBYKIiAAKBBERcSkQREQEUCCIiIhL\ngSAiIoACQUREXOP+wrTR9MOf3McW+wbBiuAx5ZXBMj7ywfdnpU0iIpmiQEhDY3Mb1RdeR1Vl1THl\n9pkHstQiEZHM0ZCRiIgACgQREXEpEEREBFAgiIiIS4EgIiKAAkFERFwKBBERARQIIiLiUiCIiAig\nQBAREdewW1cYY7zAD4DFQAS4zVq7M+H4h4BVQB9wp7X2z8aYILAGKAQOALdaa7vSrPtZ4B+BGPBN\na632hxARGUGpnCHcCBRYa1cAXwDu7j9gjKkEPgFcCFwF3GWMyQe+Aqyx1q4ENgKr0qw72a27ArgS\n+HYmOisiIoNLJRAuAh4FsNa+BJyTcOw8YK21NmKtbQN2AosSnwM8AlyeZt0QsAcodn/FTrSDIiKS\nmlR2Oy0F2hIeR40xfmttX5JjHUDZgPJkZcPVBdgLbAV8wF2DNa6uri6FLmRGc0szRb29NDQ2HFc+\nmu0YaeFwOKf6k8xE6CNMjH6qj5mTSiC0AyUJj71uGCQ7VgK0JpR3JylLpe41QBUwz637mDFmrbV2\n/cDG1dTUpNCFzAhWBPEHAsdtf91eERzVdoy0urq6nOpPMhOhjzAx+qk+pqe2tnbQY6kMGa0FrgUw\nxiwHNiccWw+sNMYUGGPKgBpgS+JzcD7cn0+z7hGcgIhYa8M4ITE5hbaKiMgJSiUQHgDCxph1wD3A\np40xnzHG3GCtbQTuxfkQfwr4kvsBfifwHmPMWpyJ4e+lWfd54G/AS8aYF4HtwBOZ67aIiAw07JCR\ntTYG3D6geFvC8dXA6gHPOQhcneS10ql7B3DHcO0TEZHM0IVpIiICKBBERMSlQBAREUCBICIiLgWC\niIgACgQREXEpEEREBFAgiIiIS4EgIiKAAkFERFwKBBERARQIIiLiUiCIiAigQBAREZcCQUREAAWC\niIi4FAgiIgIoEERExKVAEBERQIEgIiIuBYKIiAAKBBERcSkQREQEUCCIiIhLgSAiIoACQUREXAoE\nEREBFAgiIuJSIIiICKBAEBERlwJBREQABYKIiLgUCCIiAigQRETEpUAQERFAgSAiIi4FgoiIAAoE\nERFxKRBERARQIIiIiMs/XAVjjBf4AbAYiAC3WWt3Jhz/ELAK6APutNb+2RgTBNYAhcAB4FZrbVea\nda8B7nDfZgPwMWttPCO9FhGR46RyhnAjUGCtXQF8Abi7/4AxphL4BHAhcBVwlzEmH/gKsMZauxLY\nCKxKs24J8G/Addba5UA9EMxAf0VEZBCpBMJFwKMA1tqXgHMSjp0HrLXWRqy1bcBOYFHic4BHgMvT\nrHsBsBm42xjzPHDQWtt0wr0UEZFhDTtkBJQCbQmPo8YYv7W2L8mxDqBsQHmysuHqBoFLgSVAJ/C8\nMeZFa+32gY2rq6tLoQuZ0dzSTFFvLw2NDceVj2Y7Rlo4HM6p/iQzEfoIE6Of6mPmpBII7UBJwmOv\nGwbJjpUArQnl3UnKUqnbAvzNWtsIYIx5DiccjguEmpqaFLqQGcGKIP5AgKrKqmPK2yuCo9qOkVZX\nV5dT/UlmIvQRJkY/1cf01NbWDnoslSGjtcC1AMaY5ThDOf3WAyuNMQXGmDKgBtiS+BzgGuD5NOvW\nAmcaY4LGGD+wHNiaQltFROQEpRIIDwBhY8w64B7g08aYzxhjbnC/wd+L8yH+FPAla20YuBN4jzFm\nLbAC+F6adZuALwKPAS8Df7DWbslct0VEZKBhh4ystTHg9gHF2xKOrwZWD3jOQeDqJK+VTt3fAL8Z\nrn0iIpIZujBNREQABYKIiLgUCCIiAigQRETEpUAQERFAgSAiIi4FgoiIAAoEERFxKRBERARQIIiI\niEuBICIigAJBRERcCgQREQEUCCIi4lIgiIgIoEAQERGXAkFERAAFgoiIuBQIIiICKBBERMSlQBAR\nEUCBICIiLgWCiIgACgQREXEpEEREBFAgiIiIS4EgIiKAAkFERFwKBBERARQIIiLiUiCIiAigQBAR\nEZcCQUREAAWCiIi4FAgiIgIoEERExKVAEBERQIEgIiIuBUIamvryebWhO9vNEBEZEQqENGyLTOa5\n+hBvNHVmuykiIhnnH66CMcYL/ABYDESA26y1OxOOfwhYBfQBd1pr/2yMCQJrgELgAHCrtbYrnboJ\n7/0w8Cdr7Y8y1ekT1R4NAPDwaw18/LL5eD2eLLdIRCRzhg0E4EagwFq7whizHLgbeCeAMaYS+ARw\nDlAAvGCMeQL4CrDGWnufMeYLwCpjzK9TrQvc4773nUB5hvp60jpiAUrzvTS2h6mtP8K588ZM08a0\nH/7kPhqb244rrwyW8ZEPvn/U2yMiyaUSCBcBjwJYa18yxpyTcOw8YK21NgJEjDE7gUXuc77p1nnE\n/fMbadS9xxjzd0DMLcu61q4eInEf51QWsK8THt/ayFmnlFEQ8GW7aWNeY3Mb5pKbjiu3zzyQhdaI\nyGBSCYRSIPHrXdQY47fW9iU51gGUDShPVjZkXWPMmcDNwN/hnEEMqq6uLoUunDzbFAagJADnVAa4\nv6WLl+1eTgvm09zSPGrtGA3hcDij/Wluaaa0sSFpebZ+bpnu41g1EfqpPmZOKoHQDpQkPPa6YZDs\nWAnQmlDenaQslbq3ADOBp4C5QI8xpt5a++jAxtXU1KTQhZO3PbIfOEBFcR7zZ8/g/tfbIK+Yqsqp\ntFcER60do6Guri6j/QlWBKmqrDquPJs/t0z3cayaCP1UH9NTW1s76LFUAmEtcD3wO3cOYXPCsfXA\nN4wxBUA+UANscZ9zLXAfcA3wfDp1rbXf6n8DY8xXgcZkYTCadjeHgDilBT7y/T4KAz5au3qy2aQJ\nJdk8hOYgRDIrlUB4ALjCGLMO8AC3GmM+A+y01j5ojLkX5wPfC3zJWhs2xtwJ/MxdVdQM3GytDaVa\nN+O9zIA9LV0Ue/vwe52VRZOLArR29Wa5VRNHsnkIzUGIZNawgWCtjQG3DyjelnB8NbB6wHMOAlcn\nea2U6yYc/+pwbRwN9S0hSrxvBcDkojyOhHSGICK5Qxempai+OURpYiAUBjiiISMRySGpDBlNeG1d\nvRzp6qW6MPEMIUCkL0Z3TzSLLRs/DrWH+eu2QxxsD9PW3cvy6gpmx1N/fiwOOw524PF4qJ5arIsC\nRUaAAiEFew6HAI49QyjKA6C1W2cJw4nH4Q8b93OwPcy8YDEVk/J5dnsTM/1VfLa7l7LCwKDPbevu\n5TtP7uB/2uYSXlcPwNSSfN62IEhxGoEiIsNTIKTAWWEEJb5jh4wAWrt6Ne42jP19RbzZ2cWNS2Zy\nnnt198u7W3hw035u+v5a/uuWc5g/bdJxz6vdc4RP/mYjDW1hZvm7uWjpmUT6oryws5n7N+znnMKy\n0e6KSE7TZ1kK9rR0AQyYVHYDoVsrjYYSj8fZ2F3OlKIAy+ZMOVp+/rwKrpq0n7buXm76/lqe3HqQ\neNz5yn+oPczXH9rK3//niwD8z+0ruHjSQRbOKOXs2VP4+KXzWVhVSm13kNo9R7LSL5FcpDOEFNS3\nhKgqK8DveWuMojjfj9/robWrZ+xstjQGPfZ6I4ejBfzd6dPxeY8d958eCPPgqov48M9f4bafv8KU\nogALppWwaV8r0Vicdy+dyZfesZCywgB/Sniex+Ph3UtP4Z5HWvj4mg08/ImVlBfnjW7HRHKQAiEF\n9c0h5lYUQ8KXUa/HQ1mhrkUYzvee3kmpt4fFsyYnPT5zciG/v/0C7t+wjy3726hr7OBdZ8/kI5ec\nypyK4kFftzDPxyXFjTzemc+df97Kf/zDkpHqgsiEoUBIwZ6WLq48Y/oxgQD9F6dpUnkwh9rDbNnf\nzrLC9uPODhIV5vl43/I5gHtF8s42frrz6WPq1G569bgL0yr8Ed5/4Vx+/PwuPnrp/KTzECKSOgXC\nMCJ9UVpCPVSVFdIy4Njkwjx2HOpw7uQgx3lxl/MTq/Inv8tc7YaN3PGt7xxbtulVbv7UV4+r+9yL\n65O+xqq3VfPLl/Zw7193cO8/nn1yDRaZ4BQIwzgScoaEyovzjg+EogAd4T6iBaPfrvFg3c4WSgv8\nTPFFkh4PRXqP+9Y/2Ad/MrUbNnLv939EtbeCB1/tw7P7RSb7erXHkcgJUiAMoyXkfJgFJ+WxY8Cx\nyUUB4kAoph9jv8RN6B5um8MUX4SNr75KzaXH3w/hZPUHyqxIH9sft7xRsoibz5utPY5ETpCWnQ7j\nsLtfUXlx/nHH+i9OC8UGv7BqounfhG7aee+gMxZgcc0CQt3JzxAypSjfz4rqCl7f33b070tE0qdA\nGMZbgXD8ssb+i9N0hnC8XU3OxXzVU0dnond5dQUeD6zffXhU3k8kFykQhtHS6QRCRZJAKFMgDGpX\nUyfFeT6mlxx/ZjUSygoD1FSV8sqew0Tj2udI5EQoEIZxONSDz+tJut+O3+elJN9Pp4aMjhGPx9nV\nHKJ66iQ8o7gJ3fnzKujqiVLfM/j1CyIyOAXCMFpCPUwpCuAdZB19WVFAZwgDtIR6aOvupXrq6H4w\nnzq1mOCkfGxEexyJnAgFwjBaOiNDbotQWhCgK+YbxRaNfXsPO3s/zR3iSuOR4PF4OH9eOU3RQrbs\nbxv+CSJyDAXCMA6HeqhIssKoX2lhgK64zhASNbSF8Xs9BCeNzvxBoqWzp+Alxv0b9o36e4uMdwqE\nYRwO9VA+afAzhLICP71xH109faPYqrHtQFs3lWUFQ25XMVIK83zMCnTx0KsH6IvGRv39RcYzBcIw\nWkI9SVcY9StxJ5sPto/sWvvxIh6HhtYwVWXZu3x7Xl4HzZ09rH1j4LXlIjIUjXUMoTcao627d9g5\nBIDGNuduYBNdV9xPd2+UqrLsbfB0aMs68qpv5Gu/fJKVxYcAtJ2FSAoUCEM40jX4NQj9SgudH+HB\n9vCotGmsO9zn/KyyeYbQHYmwaHaQ1/YHmHfRCvL8Xm1nIZICDRkNYahtK/qVFfQPGSkQAA5H8/EA\nlaXZ3fFvyazJ9PTFqGtoz2o7RMYTBcIQDvdfpTzEpHJ+wIefGI0KBACORPMpL84jP5Ddpbhzg8WU\nFQbYtLc1q+0QGU8UCENoDg0/ZARQ5O3TGYLrcDSPqsnZv0GE1+PhrJll7GzqJNwbzXZzRMYFBcIQ\nDnc6K4eGu1+vEwhaZdQR7qUjlpfV+YNEC6tKicbi2IMd2W6KyLigQBjC4VAPHs9b21wPpsjbR2Ob\nzhC2NTofvGMlEGZXFDEp38/rBzSPIJIKBcIQnH2M8oa9wKrIE+VQR5hYLD5KLRubtrofvNlccprI\n6/GwsKqU7Y0d2gFVJAUKhCEcDvUMO1wEzhlCbzR+dJnqRLX1QDv5niilBWNnNfPCGaX0RGMc6B0b\nISUylikQhtCSRiAAE36l0daGdsp9kVHd8no41VOLKQh42dM7OjfqERnPFAhDOBzqITjEktN+/YEw\nkVca9UZj2IMdlPvG1uS63+vl9MpS9vUWa28jkWEoEIaQ+pCRs6xxIq802tUUoqcvNuYCAZzVRpG4\nT7fXFBmGAmEQ0ZgzJzDUVcr9Cj19eDxM6JVG/VcET/GPvXmU06aX4CPGo683ZrspImOaAmEQR7p6\niMeHvygNwOuBiuL8CT1ktLWhnTy/lzLv2AuEPL+XmYEuHn/94IRfCSYyFAXCIN7ax2j4QACoLJvg\ngXCgHTO9hCzcAiElswOdNLaHeXWftrIQGYwCYRAtnaltW9FvekkBjRN0DiEej7O1oZ2FVaXZbsqg\nTgl04fd6NGwkMgQFwiCOniGksMoIYHpZwYQ9QzjYHuFwqIeFM8ZuIOR7Y6w4tYLHtjQSj2vYSCQZ\nBcIgWkLOt/2h7qecqLK0gMOhHiJ9E28jta0Nzg3tx3IgAFx9ZiX1LV1sP9iZ7aaIjEkKhEE0d0Tw\netKZQ3D275mIK436t6w4vbIkyy0Z2hULp+PxwKNbNGwkkowCYRBNnc41CKneKH5OeREAe1q6RrJZ\nY9LWhnbmVBRR4t4saKyaVlLAstlTeEzzCCJJDbvpjDHGC/wAWAxEgNustTsTjn8IWAX0AXdaa/9s\njAkCa4BC4ABwq7W2K826nwbe477NX6y1X8tMl1PT3BkhOCm14SJwbsgCsKclBEwdoVaNTXUNHdRU\nju3hon5Xn1nJnQ/X0dBRRk22GyMyxqRyhnAjUGCtXQF8Abi7/4AxphL4BHAhcBVwlzEmH/gKsMZa\nuxLYCKxKs2418F7gAmAFcKUxZlEmOpyqdANhWkk+hQEfu5sn1hlCZ6SP+pYQNWN4hVGiq86oBGDd\nm6Est0Rk7EklEC4CHgWw1r4EnJNw7DxgrbU2Yq1tA3YCixKfAzwCXJ5m3b3A1dbaqLU2BgSAUR2c\ndwIhtfkDAI/Hw5yKIvcMYeLYvK+NeBwWzSrLdlNSMqu8iIVVpQoEkSRS2ae4FGhLeBw1xvittX1J\njnUAZQPKk5UNWdda2ws0G2M8wL8BG62125M1rq6uLoUupK+pPYy3J3TM6ze3NFPU20tDY8MxdZtb\nmqmrq6MiL8r2hiMj1qbREg6HU+7D45udC70Kuw5RV9dCc0szpQN+Pl3d3cf9zAYrH6m6/X9HAMum\n+/jlpghrazdTXjR2tuoeCen8XY5X6mPmpPK/oR1IXD7idcMg2bESoDWhvDtJWSp1McYUAD/FCYmP\nDta4mprMjwR39fQR7tvFgtlV1NScerQ8WBHEHwhQVVl1TP32iiA1NTWcWQ9/e6Ge08zpKU9Gj0V1\ndXUp/1wbXqllTkURy88+E3B+RgN/PkWFhceVDVY+UnWf3rOX3zz4OABt0TzizOa7D7/CxTM9fOSD\n7x+2n+NVOn+X45X6mJ7a2tpBj6USCGuB64HfGWOWA5sTjq0HvuF+eOcDNcAW9znXAvcB1wDPp1PX\nPTP4E/CUtfZbKfYzY5o7nIvS0hkyAphXUezcjKW1m1nuqqNct2lvK+fNK892M4YVivRiLrkJcK6s\nfu7RrbRMqqaxeVOWWyYydqQyh/AAEDbGrAPuAT5tjPmMMeYGa20jcC/OB/5TwJestWHgTuA9xpi1\nOJPC30unLs5E9sXANcaYZ9xfKzLY7yE1dToXpQVLUp9UBphT0b/SaGJMLDe2hWlsD7Nk1uRsNyUt\nHo+HU8vzeaOpk66YL9vNERkzhj1DcCd1bx9QvC3h+Gpg9YDnHASuTvJaqdZ9AMjandqb3UCYmsYq\nI4C5QeesoL4lxEULghlv11jTv1Hc4nEWCAALpxVQe6CbHZHxsTpKZDTowrQk+gMhnWWn4Gxwl+/3\nTpiVRpv2tuL3ejhjjG9ZkczkQh8Lpk1ie6RMd1ITcSkQkuifQ6hIcw7B6/Uwt6J4wlyL8OreVmqq\nSikIjM9hl/PnVdAV9/Nk3aFsN0VkTFAgJNHcGWFyUYCAL/0fz0S5FiEai/PavjYWj5PrD5IxlSUU\ne3r51ct7st0UkTFBgZBEulcpJ5obLGbP4a6cvzPXrqZOOiN9LJk1JdtNOWE+r4cF+e08v6OZXU3a\nAVVEgZBES2dP2ktO+82pKKKnL0Zjjt8bYeNeZ0J5yTg+QwA4Ld+59eePnn0j200RyToFQhIndYbg\nLj2tb87tYaMX32ihvDiP6uCkbDflpBR6o7z3/Nncv2F/zv+diQwnt6/bP0FNJzlkBFDf0sUF8zPZ\nqrEjGovz6Gt7qfJ18rV/u/eYY7WbXj16Adh4ULthIwtjPojN4QP3PsjK4kNUBsty+uplkcEoEAYI\n90bpCPed8JBRVWkBRXk+tjW2Z7hlY8dr+1rpjno59+wzMLMuPObYcy+uz1KrTkwo0suSy26gYUsD\nL+zwcsPKZTRueCTbzRLJCg0ZDdAS6t+24sTOELxeD8vmTOHlXYcz2awx5WnbhIc4C6aN7+GiRCsX\nTCXg8/Jk3cFsN0UkaxQIAzR3nNhFaYmWV1dgD3bQ4l7glmuesYcI+sIU5efOCeakfD8rTwuy5UA7\nDb2F2W6OSFYoEAZoPsF9jBKtOLUCgJd3595ZQlNHhNf2tXFKIPcuvnvbgqlMKQrwcleQXl29LBOQ\nAmGAt7atOLE5BICzZpZRlOfjpV0tmWrWmPHs9iYAZgZyb0VOwOflukUzaIvlc9/a+mw3R2TUKRAG\naO48uTkEcD5YzplbnpOB8LQ9xLSSfMp9PdluyoioqSrllECIbz+5nYM5fi2JyEAKhAGaOyOU5PtP\nen+e5dXlbD/YefSMIxe0dfXy9LZDvL1mGp7xe/+fYZ1b2ExvNM43/5Lbd+ESGSh3ZgUzpLmzJ+35\ng9oNG7njW985pqypLx+Yxfrdh7n2rOPv6jUerVn/Jl09Uf738rn8dke2WzNySn29rLq4mu8+tZN/\nPG82y6srst0kkVGhQBiguSNCRXF68weJd+PqNz8W5/E/vcaLb7TkRCD09MW4b91uLpxfwcJxuN11\nOmo3bGRx3EOxdza3//hZri/di9eDLliTnKchowH2Huli5pSTX3bo83qY7u/mhZ3NxOPjf6O7hzcf\n4GB7hNtWVme7KSMuFOnljEtv5MZzT6U1lk/LKRdhLrmJxua2bDdNZEQpEBJE+qIcaO0+uh/RyZqb\n18nu5hCPbx3fFzvF43FWP7eb+dMmcfGCqdluzqhZWFWKmV7CE3UHORzKzUl0kUQKhAR7D3cRi8O8\nYGYCoTqvg3nBYu55Yvu43g77kS2NbG1o57aL5uH15vBs8gAej4d3LpmB1+PhDxv3kQMneiJDUiAk\n6L/T2dwMBYLXA598+wK2NXbwyJbGjLzmaNvTEuLzv3+NxaeU8a6lp2S7OaNuclEeV59Zya6mEDt6\ncnvuRESBkKB/++O5FUUZe83rF89g/rRJfPvJ7UTH2VlCuDfKR3+1Aa/Xw/duXkqef2L+czl3bjnV\nwWJe6QpOiLvhycSlVUYJ6ltCTC4KMLnoxK9SHsjn9fCpyxfw8TUb+ddH6vjna2vwjINF/O3hXj77\nu1d5/UA7lxUf4Merj12TP962uT4ZXo+Hdy87he88vpWPrdnA/R+5gHz/+LyPtMhQFAgJ6ltCGZtQ\nTvSOs6pYv+Iwq5/fTU9fjDuuP2NMj8Xb5jCrHnqB/a3dnFvYxOVXXnVcnfG2zfXJmlKUx4VFh3h6\nv4+7/rKNr95wRrabJJJxCoQE9c1dnDs3c/cIPuaCtTgszK/gZy9CY3uYf3nHQmaVZ25o6mTF43HW\nvdHCf6+t5691B6kqK+C3H17OQ7/9RbabNmbMzgvxgSXz+Ona3SyaoHMqktsUCK5wb5QDbd3MDWbu\nP/nAC9ZMPM4fHnuGZ7d7eXrbs9x8/mzet3wO87NwX4FoLM7u5k4272/jhR0tPLejiaaOCPmeKKfG\nGziHHh767ZYJNTSUii9cczrbGtv5f79/jbLCAG+vmZ7tJolkjALBtfdwF/EMLjlNxuPxENn6NNct\nOYdN3RX8bF2U+9bVE/SFOWNyH595z1WcObMM3zDDSd09UepbQuxudn7tagqx90gXvdEYsVgcv89L\nUZ6PgoCPwoCPgoCXaAx6ojFau3o40NrN/tZuwr3OFs9lhQFWLgjStnMjl1/+dpqbplNV6VxdPdGG\nhoZSu2Ej37j7u8yNe7CemXz4Z+v5+5nt3PVP781200QyQoHg2n10hdHIBQI4Zw1LL7uBpUBHuJdN\ne1vZtLeVZ1vCPPv9tRQEvMwLTqI6WExpYYCiPB990Rit3b00d0aob+5if2v3Ma9Z5OnDF2mjpKgQ\nD3FieOiLe+jDS3ekF2+gAI8njo84AU+MeNcRqksLmVIUocIfoczbg3cv1Ne9SuCqK0a0/+NZ4hnf\nvEgf//X8Ln67H87fuJ8bz56Z5daJnDwFgqu+ZXQCIVFJQYCVC6aycsFUNj31IBdd+y5e3dvKrqZO\ntja00xHuo6unj4DPS1lhgPLiPM6bV87uutc4/awlBCflUzEpj3y/j9V3fZEPffGu494jWfnqu77I\n+5LU1dlA6orz/Xx4ZTU/fnwDn/rtJuzBDj5zxWkEfBNzaa7kBgWCa3dzF1OKApQVBbLy/ls3vUKh\nNwrAbPfX9t3bOO20050KcaDT+dVnX2XRNZdkpZ3yluJ8P1dM2k/0tMv44TNv8Nz2Jr717kWcObMs\n200TOSEKBFd9cyhjVyifiGQ7pj734he5PsmErr7Jjx2bNm5kmQcuKS7m5YY+rv/u8yzIa+ftM/r4\n/O23ZLt5ImlRILj2tIS0772krT/IDbCyJ8oTdY38bbeXnfUx2h7YzC0r5nB6pba8kPFBgUD/ktMw\nc0Zx/kByT2GejxsWz2Tlgqn86en1/L52H2tefpOlsydzw+IZXHlGJTMmn/zW6iIjRYEA2MYOAKqn\nKhDk5E0pyiPPPs5NS5bxRk8pO/ZF+OqbrXz1oa1My+vjunMXsLy6nPPnVWRtzkokGQUC8MTWg3g9\ncOH8YLabIjkiFOll8WXvZLH7+FBHmK0H2nlt+25+9fIefrp2Nx6Pc8+FFdUVLK+u4Nx55ZQVKiAk\nexQIwGOvN3L+vArK07x1pkiqppUUMM0UcODh73H52Utp6svnYF8hjYe6+Mn+I/z4hd14iFPui1Dp\n72ZOXidnTi/go7e9P9tNlwlkwgfCzkOd7DjUyfuWz8l2U2QCCEV6WXjpjceU/ee/fokrbvsiu5qc\nK8+3He7i9cgUXgj10PvkDm60j5/CAAAImElEQVQ8e4bmt2RUTPhAeOx158Y1V56hPWkkO7zxGNXB\nSVQHnT2twr1RtuxvY93mHXz7r9u558ntLJ09mZuWnsJ1Z1UxRWeyMkIUCK83snjWZKrKtPpDxoaC\ngI9z5paz/Q8Pc9GSc9nVM4ld+yJ8+c1W7vjjZi46bRpXLJzOFTXTqSwryHZzJYdM6EDY39rNa/va\n+PzVp2e7KSLHcfa9up6lONuTN7SFeXrd39jTUsyX/7iFL/9xC4tOKeOKmulcMD/IolPKtHWGnJQJ\nHQh/3LgfgKs0XCRjnMfjYcbkQjzbnuTis8+mtTSPvT3F7G0Ic/e+Nu5+YjuFAR/L5kzh/HnlnDuv\nHDO9JNvNlnFm2EAwxniBHwCLgQhwm7V2Z8LxDwGrgD7gTmvtn40xQWANUAgcAG611nadbN2M9Rp4\ndEsD//HEdi4+bSrVU0f/fgQiJyIU6eX0S53tTJa7ZZ2RPn7x3z+hePZCXt/dxQs7m4/WLyvwcnpV\nK/OnTWJesJgZkwupKitgxuRCpk7KH9N37pPRl8oZwo1AgbV2hTFmOXA38E4AY0wl8AngHKAAeMEY\n8wTwFWCNtfY+Y8wXgFXGmF+fbF1rbSQTnX562yH+6dcbWXxKGT9479JMvKRI1kzK9+M/Us8tt68C\noKunj31Hunn0oT8SKK9i15shNtbn0RM/9j7Qfq+H6aUFzJhcQGVZIRXFeZQX5zGlOI+K4jzKCgMU\n5/spzvNR1P97np88v4alclUqgXAR8CiAtfYlY8w5CcfOA9a6H9QRY8xOYJH7nG+6dR5x//xGBur+\n7YR7muCuR+owlSX8963nUZw/oUfNJAcV5fk5bXoJzzZt5brrLqeqsop4PE53b5S27l7+tOY+qqpP\noyvmJ9Tl583OAJt3xogGCugdEBrJBHweCgM+8vw+Aj4PAZ/36O95fu8xj/1eD16PB6/Xg8/jwesF\nr8eDr7/c48HnBZ/Xg8fjof+ExYPzB8/Rx2/xeI49qzly5DDlO18f9DlHH7t/OPrsYd7LM1xbBrze\nYPWHagsDy5O8VxzY33CE0je30dMXo6cvxtmzp4zIPTg88Xh8yArGmB8D91trH3EfvwlUW2v7jDHv\nA86y1n7ePfZz4OfAj9zybmNM9YCyE65rrX0ysW21tbVDN15ERI6zbNmypGOFqXw9bgcSZ6e81tq+\nQY6VAK0J5d1Jyk6mbkqdEhGR9KUyGLgWuBbAnUPYnHBsPbDSGFNgjCkDaoAtic8BrgGez1BdEREZ\nIakMGfWvMlqEM6R1K84H+E5r7YPuaqAP44TLN6219xtjpgM/w/lm3wzcbK0NnWzdTHdeRETeMmwg\nyPBLb8cqY0wA+CkwF8gH7gS2AvfhzFVtAT5mrY0ZY+4A3oGzzPdT1tr1xpj5qdYdzX4NxhgzDagF\nrsBp233kUD+NMV8EbgDycP49Pkvu9TGA8wVxLhAFPkQO/V0aY84HvmWtvSSdtmaibirt0/qx1Bxd\negt8AWfp7XjwPqDFWrsSZzjue8B/AP/ilnmAdxpjlgIXA+cD7wG+7z4/nbpZ5X6Q/CfOXBTkWD+N\nMZcAFwAX4rRrFjnWR9e1gN9aewHwdeAb5Eg/jTGfA36Ms5Q+3baeVN1U26hASM0xS29xro8YD/4H\n+HLC4z5gGc43S3CW+V6O07/HrbVxa+2bgN8YMzXNutn27zir0w64j3Otn1fhzN89ADwE/Jnc6yPA\ndrcdXqAU6CV3+vkG8K6ExyPVr2R1U6JASE0p0JbwOGqMGfMXMFhrO621HcaYEuD3wL8AHmtt/zhh\nB1DG8f3rL0+nbtYYY94PNFlrH0sozrV+BnG+iPwv4HbgVzgr/nKpjwCdOMNF24DVwL3kyN+lOw/a\nm1A0Uv1KVjclCoTUDLX0dkwzxswCngZ+Ya1dAySOJQ63zDedutn0AeAKY8wzwBKca1mmJRzPhX62\nAI9Za3ustRYIc+x/9FzoI8Cncfp5Gs6c3c9w5kz65Uo/YeT+LyarmxIFQmqGWno7ZrkruB4HPm+t\n/albvNEdj4a3lvmuBa4yxniNMbNxAq85zbpZY619m7X2YmvtJcAm4BbgkRzr5wvA1cYYjzFmBlAM\n/DXH+ghwhLe+9R4GAuTgv1nXSPUrWd2UjPlhjzHiAZxvoOt4a+ntePDPwBTgy8aY/rmETwL3GmPy\ngDrg99baqDHmeeBFnC8JH3Pr/l9gdYp1x5p02j7m++luBPk2nGt0+tuzmxzqo+se4Kduu/Jw/g2/\nQu71E0bu3+hxdVNtkJadiogIoCEjERFxKRBERARQIIiIiEuBICIigAJBRERcWnYqMoC7hvt3OBsB\nenDWwn/AWrstA6/9DZxtKH4JlFprv26MuQl42Vp7YOhni4wsBYJIck9Za98DYIy5EmevpOsy8Lr/\nAJxtre1IKPskznYUCgTJKgWCyPCmAPXGmLNw99bB2UriA8DZwOeBHmAe8Ftr7TfcLUP+C2dnyzDO\nvT1uBU4BHjbG3AX8H+AXuNttGGMuxzlzKAMKgc9Za58ZrU6KaA5BJLnLjDHPGGNexLmnxO9xNlv7\nmLtFxl+Az7l15wDvBlYklP07cK+19lL3z/9qrf060AhcibtNt7X2Yd7abmMWUAlcD9wMFI1wH0WO\noTMEkeQSh4wMzvYAxcAPnIcEcLZqBtjsbnbYZ4zpvx/DWcA/G2M+j3NG0TPcG1prXzfGfB/4tfv6\n92awPyLD0hmCyPAOur+/BtziniF8DnjYLU+2/8s2nE0FLwFWMfR+MjHA6w5JlVhr34EznPTdk2+6\nSOp0hiCS3GXudtpRnC2EP4Ozy+3PjTE+t84HgRmDPP+zwA+NMQU48wGfHOK91uFs2X0DcIkx5hac\nM4qvnGwnRNKhze1ERATQkJGIiLgUCCIiAigQRETEpUAQERFAgSAiIi4FgoiIAAoEERFxKRBERASA\n/w9bXBIMnZoHRgAAAABJRU5ErkJggg==\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "sns.distplot(salaries['Benefits'].dropna())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We decite, that with a distribution like this, it is a good idea to fill the Nan benefits values with the mean values." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Filling nans with mean values of benefits" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": true }, "outputs": [], "source": [ "salaries['Benefits']=salaries['Benefits'].fillna(value=salaries['Benefits'].mean())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Checking our data once again" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "I decide to take a look at the datato make sure I got rid of all the Nan values." ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAV0AAAE3CAYAAAAJ55KLAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAGc1JREFUeJzt3Xu0ZVV15/Fv8ZK3CkIEH2AjziBG\nbMQ0GoKixgckbRwYkAgiiESUiBoNhlfHCCEaJT6IDQRKq0BAEBUNRtBWkSAiEHzQsWcUkDYgKAJB\nBHne/mPtQx2KKrC768512PX9jHFHnTq3Lmty77m/s/faa8+1YG5uDklSjVV6FyBJKxNDV5IKGbqS\nVMjQlaRChq4kFTJ0JanQag/z+bnV1nhCSSHLc89d19K7hlmpYxZqmJU6ZqGGWaljFmqYlTpmoYah\njgXL+5xHupJUyNCVpEKGriQVMnQlqZChK0mFDF1JKmToSlIhQ1eSChm6klTI0JWkQoauJBUydCWp\nkKErSYUMXUkqZOhKUiFDV5IKGbqSVMjQlaRChq4kFTJ0JamQoStJhQxdSSpk6EpSIUNXkgoZupJU\nyNCVpEKGriQVMnQlqZChK0mFDF1JKmToSlIhQ1eSChm6klTI0JWkQoauJBUydCWpkKErSYUMXUkq\nZOhKUiFDV5IKGbqSVMjQlaRChq4kFTJ0JamQoStJhQxdSSpk6EpSIUNXkgoZupJUyNCVpEKGriQV\nMnQlqZChK0mFDF1JKmToSlIhQ1eSChm6klTI0JWkQoauJBUydCWpkKErSYUMXUkqZOhKUiFDV5IK\nGbqSVMjQlaRChq4kFTJ0JamQoStJhQxdSSpk6EpSIUNXkgoZupJUyNCVpEKGriQVMnQlqZChK0mF\nDF1JKmToSlIhQ1eSChm6klTI0JWkQoauJBUydCWpkKErSYUMXUkqZOhKUiFDV5IKGbqSVMjQlaRC\nhq4kFTJ0JamQoStJhQxdSSpk6EpSIUNXkgoZupJUyNCVpEKGriQVMnQlqZChK0mFDF1JKmToSlIh\nQ1eSChm6klTI0JWkQoauJBUydCWpkKErSYUMXUkqZOhKUiFDV5IKGbqSVMjQlaRChq4kFTJ0JamQ\noStJhQxdSSpk6EpSIUNXkgoZupJUyNCVpEKGriQVMnQlqZChK0mFDF1JKmToSlIhQ1eSChm6klTI\n0JWkQoauJBUydCWpkKErSYUMXUkqZOhKUiFDV5IKGbqSVMjQlaRChq4kFTJ0JamQoStJhQxdSSpk\n6EpSIUNXkgoZupJUyNCVpEKGriQVMnQlqZChK0mFDF1JKmToSlIhQ1eSChm6klTI0JWkQoauJBUy\ndCWpkKErSYUMXUkqZOhKUiFDV5IKGbqSVMjQlaRChq4kFTJ0JamQoStJhQxdSSpk6EpSIUNXkgoZ\nupJUyNCVpEKGriQVMnQlqZChK0mFDF1JKmToSlIhQ1eSChm6klTI0JWkQoauJBUydCWpkKErSYUM\nXUkqZOhKUiFDV5IKGbqSVMjQlaRChq4kFTJ0JamQoStJhQxdSSpk6EpSIUNXkgoZupJUyNCVpEKG\nriQVMnQlqZChK0mFDF1JKmToSlIhQ1eSChm6klTI0JWkQoauJBUydCWpkKErSYUMXUkqZOhKUiFD\nV5IKGbqSVMjQlaRChq4kFTJ0JamQoStJhQxdSSpk6EpSIUNXkgotmJub612DJK00PNKVpEKGriQV\nMnQlqZChK2mlFxGPrhrL0NVKJSJW712DZtI5VQOttiL/YxFxxPI+l5l/tSLH+nVExHrAwcAmtG/q\ndzPzh8U1rAq8Dngy8FXgisy8sbKGoY5LgVOAxZl5U/X4Qw0fAU7KzG/3GH9wWUR8BTgxM6/oWEd3\nEXEQcHKv18NUHb8BbADcDbwT+PvM/G5xGTcN348E7gPIzPPmY6AVfaR7w/DxXODxwJW0b+azVvA4\nv66FwFXA04DrgZM61HA8sBnwEmA9YHGHGgBeDNwFfD4iTo+IF3eo4RzgkIj4RkQcEBHrd6jhWcB5\nwH+LiK9FxH4RsW51ERGxY0S8LCJ2jogrI+KPq2sA7gXOGV4Pv9dh/ImTaQdGRwMXAB/sUMPPaa+N\n3YE9gFfP10ArNHQz8/jMPB5YJTPflJmfyMy30sKmhw0zcyFwd2Z+A1jQoYYtMvMI4I7M/DxQNnc0\nLTNvycyPAvvRftlOjYiLI2KXwhq+mJm7Aa8Afhf4SUR8PCI2K6zhPuCfaG/IPwf+FDg3IvavqmHw\nPuAHwFuA3wHeWDw+mXlsZj6XFnb7RMTVEXFYhzfD1YGvAY/NzFNYwWfgv47M3Gf6Azh0vsaar/+5\nDSNii8y8MiJ+E+hxRAPAMD4R8URa2FRbLSIeN9SwHsOpS7WIeBPwWuBW4ETalMfqwDcpms+KiK2G\ncf+A9ku2A+01eBawXVEN76OF/vnAezPzWxGxCnAZcEJFDYM7aGeF92Tm9RHxqMKxgftfj7sDewO3\nA39O+3mcDexUWMoawN8CF0TEjrTXZamIeDfwpqGWtYF/A7aej7HmK3QPoh1JPYF2Wr/nPI3zcN4C\nfAzYCvgU7Zta7TDgQtrp0zeBt3aoAeAJwB6ZefXUc3dHxJ8U1nAiLdj+MjPvmDwZER8rrOEHwLMz\n87bJE5l5X0S8srAGgF8AXwY+GhFvBv538fgAlwOnAa+dfl1ExDbFdewLvJT22vhDYK/i8QFeDjwR\n+DvgGOCj8zXQCr0NOCIuAib/welT+bnMfN4KG+gRKCI2Am7MzC73XUfEhrR55dVpP5tNM/PoDnVs\nslQNFxWPvyXwqqVqqHzjmdSxHvCkzPzXiHgG7bVxfXENq2bmvVN/3ygzf1ZZwzDuhzLzoKm/L8zM\nfYtr+KfMfHlEnJyZe0XEP2fmDvMx1oo+0p23yef/FxFxFO1d9P6gy8xNi8aefgOafp5Ob0Bn0k6Z\nnkk7tb29uoCIOIl2kXUd2inclcD2xWUsAj5Pm9q4Dii9iBYRj6dNty0G9oqIp9GmvT4H/HZlLcCh\nEXEg7Q1oHdrPY6uqwYcpr78ANhrONBYMH6UrjAb/HhH7Ar+MiKOZxynRFRq6mXnNivzvrQC7AJtn\n5p0dxp68Aa1BWzUwsUGHWgDIzDdGxELaxbSvdyhhK9o82fHAIbQpn2q3Z+bREbFlZu4bERcUj789\nbfotWDKHfB9wbnEd0H4/ngx8APjw8FEmMz8aESfQXgtHDU/f1+ls8E9o0wtn0q47PDJWL8ygy4E1\nO419J/Ao2nKYNYbHa9ECp4uIWJN2RDNH8RHe4BfDL9Q6w1rlNTrUsGA42lw3Itah+E0wMz+bmTsB\nr8nMnYaPF2Xm4ZV1DG7OzF8B62dm0s4+ykTEyzPzHuBa2sW8vWmrKEqnFgbrAPvTlqtdywMPlFao\n8qUZxa6gLUu6nnbaMpeZ/6lo7Fk6ogH4e9pFvPOAHwP/3KGGyyLiHcB1EXE6sGqHGt4NvJJ2o8jV\nFK+bjojDMvNI2tTCAy4wZ2b1Wt1rI+J1wB0R8R7gMcXjP3X4c/PicZdlIW0p4fNp6/lPGh6vcGMP\n3d2BpwC3VA+cmZ8FPhsRO2fmF6rHX0Y9Z00eR8SZmXlrhxoOGS4g3UG7WvytyvGH9aeXZuZkamXj\nyvEHk+/7Itr3oaf9adMLnwZeD1SH/h8BH6Gtzz2weOylbZiZCyNiz8z8RkTM25r+sYfuNcAve8zp\nzsoRTUT8EW0JzO3Anpl5SXXgDutzj6Qtkzo4M39Bu5hVWcOBwJ8B90TEgZnZ64xjz+GC4ruA36PD\nDTsRsTHwduA24JjMvJ22VKrar4YLzr8ZEc+c/kRm7lhdTNWa/rGH7pOAKyPiquHvlUvXPjf8eVzR\neMvzVtqKhcfS5qv+a4cajqPd9bQB7U6svTvU8Me0qZ71afPsvUL3S8C3aRdtkiWhOwdUTX0tBv6R\n9pp4L+2uvB52pn0fjgcOoM8b0G9l5vdoU4Ela/rHHrq7dxz7UGD3zDy/Yw0Ad2bmzcDNw4WjHu7N\nzC8CdLpIAvCrzLwLuDEielzAAyAzD6Ut1To8M9/TqYw1MvNYgIj4cqcaGC6i/SgidqbdQLUZrSnU\n/yws48yIOC4zP0hbzjjvxh66q9Pmje5fCE9bGlJho6Jx/m/06D2xtFlYMTML34ePRcQptNfJp2gd\n8C4uGnt6SdYs/Dz+O3Aj8ELgO7QzkaqeIM8G3h8R5wJ7V9ygMvbQXUy/hfBbRMRfL+sTmXlIhzoW\nLF1TYR0bRsRLhho2GB5PapiX9nnLsHVEnDrUMHk8qaFHh6/jaetjD6etmV5E3Y0ia0fEU2iBu9bw\neAFAZl71kF85P7bMzP0j4quZeXZE/HnVwJn5S+CAoefDhRFx8dTn5uV1MfbQ7bkQ/nbanF1vRyzn\ncaV/obXLg7Z2evJ4jraErcJuw5+r0n+eHWDNzPzKcME1I+JXhWPfy5KlcvdMPZ4Dyi9gAatHxAbA\n3DAFVtoUariAdjStCdO8LyEce+j2XAh/fWYuKhxvmaZriIgX0S7WXEy7Jbiqhn2malifNnd31XCU\nUVXD+cP452XmSx7u3xe4MyJeCqwaEdsDZaE7Xz0F/j8cQWsG9Ru0ZYR/VjVwRBxMa6t5YGaWdNsb\ne+j2XAh/WeFYD2uYVngi7ersXbR73vd4yC9a8TXsSuu6thpwRkTMDcvqKt0SEa/ggTsElL0BTdkf\neD/wOOAdtKv3JYYzvmXeattjqdZwxB+0jQ9uGHoeV9kO2C4zf1414ArtMqYHG/qkvoG2XOkK2jYx\n5X19I+LrmbnjMG+2U0R8MzNLm81ExIW0iyVfHP68NDOfXVzDV5d6ai4zX1hZQ28RscXyPpeZV1bW\nAvefgR3E1C37M3I2Mi9GeaQ7/GIt691kLjNfVFzOItpNGl+m7ZawkD7rVFcbei/MRdu3rUdD9/sy\n887hCHcuIsqmFyaGN5xHs2SK47aH+5r5EBGH0JqG386SW9RLOuBNgnW4gPaANpfAmytqWMoHaXuj\n/bjD2OVGGbo8eOuTbYAPAacu49/Ot8dn5qRj0dkR0Wvd7t/Rpjw2os3p9rgD6YKIOA14YkQcB1xS\nXcCMTHFAu7C36XA3WC+n0M46dqTtYrFWpzp+PFnHvTIYZegOHZMY7p9+F22bmldX3qgwtQD/6oh4\nTmZeMtzq2GP+kMw8c1gI/1TaEV7ZHNZUDYdExMtoqxm+n5n/WF0D7fbX7WlhcyRw6fBntR/Rv/fC\nHZn5nknT8A5tLiduiIhjaStb5gCy7W04SqMMXbh/h4DFwHeB53Q4jUzaC2gB8IKImLR6rFwadL9h\nHeLatLWZpw93RJUe+Q+ns0+jfU+eHhFPz8z3VdbADExxDNYAvhcR32NJ0FSvF14QbUeTdSNiLfr1\nev734c/NO41fapShOzQ3efvw8YXhuTUAhltB511mPmWqngW00/obi6/MTnsf8Bpai8ffAc6gfrrl\nbFpHq5uLx53WfYpj8N5O4047knar/Gm0Pdo+0aOIzDw8Il7AkuWMPXaOKDPK0GXJOr/JJnM9mooA\nMLyYFgL/ATw2It6QmV+qrGHQffdZ2tzdX3YY934zMsXBMP7BtA1Lz6GdkVVbc9KDAfjMMN9dbujl\nuwXtLGiOtpPEa3rUUmGUoTt9lDkDjgR2yMzrou2O/Glap6lqt9J/99nPR8TfAP86eSIzq5uIbw48\ng3bRaNuI2DYz/6qyhkFZ0+ylRcQutHntPSPi5OHpVYBdgbOW+4Xz5/lTyxk/FrU7VJcbZehORMTW\ntFs+H0M7dbqiw5HNvZl5HUBmXlt8u+e03YAtcsnus//QoYZXA99nyeaHPRaJn0a7iFa68+4ylDXN\nXoYraDci3ElbzgjtRpE9l/sV82u14cyr53LGMqMOXdpGe/vQAuYk2pFFdejeGhF/SmtqsiNwU/H4\nE08CXhERr6K+49rEnZlZdufVctyeme/uXANQ1zR7adk2kD0pIhbTTuu3An6QmVdU1bCUD9NWkWwM\nXAQc+9D//JFt7KFLZv5wuFL9s4j4RdW40bbWhja98A7gZcBPaG8CPfTsuDZxTUT8BW0+c3LFvqTh\nzdTP44aI2GOpGnos43sLrWn2s2itHXu8Gb2B9nq8GDgsIk7JzPL125l5+rCccUvgh5n5s+oaKo09\ndG8a5ofWiYhXU7tX2rJ2/X0ubS+qDxTWMdF763Fodz49bfiA2i5j0z+P/acez9FuSS4xhP8HaL1A\n3gV8hhY2z6CtU620F/C8zLx7WN1zIYU3zUTE2sB+tDXLF9Ma36weEe/MzOrvRZmxh+7raVdCb6Q1\ntnh91cDZttl+gIhYhdY+rkfoTjqurVfdcS0iVht2Ceh2gWTy84iI35+e14+I3Zb/VfNiIa0R0wa0\nqa5tgZ/R5plPfoivmw8LMvNuaEspI6JkOeWUj9PefLamNf95P+1W4A/TbpkfpVGHbmbeOpy2XEl7\nJ+12y+VwgWAH+t1q+W7gD2m/2FdR+wu+mLZH2eSGERj6DVC0hC8ifp+2PnmPiJjsk7cqbc+4Mypq\nGNwzWTIYEQdl5g+Gxz16QFwUEacDF9Bem1U7V0xskpm7AUTEdzPzhOFxWWvHHkYdurPQznDKmrQN\nAN9SOehSp7Ofop3OQuG60Kk7rXbLzPtvRhjWMFf5DrAhbV3s/6ItkbqHtpqh0vTNMdMrWcq2zYmI\nT2bm7pn5tqHN5VbA6Zl5dlUNg7unHt849XgWthCaN6MOXdr62Mn6v0UR0e3K+dCw+1Udhp4+nT2H\nB57OlqyRjYgdaKeQb4uIY4anVwEOpM1lVriZ9v3/Du2i5pbAT6l/E17WtkELgKcX1nD//n1D0FaH\n7cSm0TYqXbDU40061VNi7KE7C+0Me5uF09lbaOtCH8WSX6j7aK0Nq/wNcOb0zRgR8Xrgb6mda56e\nQz5uOY/n26zs33cm8JRlPP5UYQ3lxh66S7czPOah//kodT+dHdZ/XjEs3etx9xfANpl54FJ1nRQR\n+1UWUdnp7iHMxP59mXk4QES8FVicmb3WsJcae+h+nXaB4KnA1Zl548P8+zGahdPZiRdExFE9ds7g\ngfOH0+4prWI2zMT+fVPuAc6JiGuAkzr1Jikz9tA9izZ/eRIztmdZoVk4nZ3YCLguIq6mrVyYy8zn\nPczXrCg3RcR2mXnp5ImI2I5+dwj2NFO/C0PTnWMjYhvg4Ig4gfY7++HMvLVvdSve6PdIi4itgH1p\n6/7+B+2d9Kq+Va2cImKzpZ8bbkmtGHtz4HO0ddJX0uYPXwz8QWZeXVGDli0i1qO1mNybNvVxIu2A\ncP9lrXd/pBv10ozBdbR1qbfTrpR/KCJ6zSuu7O4BjgJOAF5Cu7hWIjN/BPw2cD6tgfi3gP9i4M6E\ny2l71r02M1+amWdm5mnUrxsuMeoj3Yg4gxa0pwAfn3T7iohLM3O7rsWthCLiHNqa4cNp+9gtqt6R\nWLMnIladnuePiI3G3H9h7HO6/7CcSfkdyisRtKbZX4mIwzIzO7a51Gw5dNjtZXVgHdr0z1YP/SWP\nXGOfXrghIi6JiJ9ExOURsS1AZvrL3sedEfFSYNWI2J5O+8Vp5uwCPJm2fdRv0afBfpmxh+6HgP0y\ncxNaC7uPdK5nZbc/7efwOFq7y969dTUbbh4OhNYfdvJeu3dB82ns0wurZOZ3ADLz2xGxMq7JnCW7\nAgdkZs+NKTV7ro2I1wF3DPulPaZzPfNq7KF799Bd6gLarg13dq5nZbc68KWISNp8+9c616PZsD9t\neuHTtPar1VvRlxr76oXNaD06t6JthvjOqnWhWr6IeA7wTuA/Z+aWvetRHxGxMfB24DbgmMzs1nq1\n0iiPdIcu+NC2x3kNS3q3qqOIWIs2xbA37WdyRN+K1NliWiP3xwLvpbU+Hb1Rhi5LmmVPh21p02wt\n0220mxJeO+l2ppXaGsMtwAybDawURhm6mTlpETfZsWEj4KeZed/yv0rzJSLWpTULv5zWTP3UiPgp\nsMcY763Xr2367HPsK6nuN8rQnYiIV9LaOd4MrB8RB4y9g9GMWlYv2/2o72Wr2bJ2RDyFFrhrDY8X\nAIy5P8rY312OoN1fvy1tf6yjOtezstpmOnABMvNE4Jmd6tFsuJc2r/txWl+OxcCi4e+jNeojXeDn\nmflTgMy8ISI8le3DXrZ6kMxcKW/HH3vo3hoR59I6S21HO535ayjflmRlZy9bPUhEXMByVhVl5o7F\n5ZQZe+hOb7h3bbcq9A7gcxHxNZbqZduzKHX3ut4F9DD2myMeDTyftv05AJl5Rr+KVl7DBqG70Jbs\nXQucPeyQrJXccAHtVbQ7FhcAm2bmm/tWNX/GfqR7Hu1OtFuGv88Bhm4HQ0OTs3rXoZl0CvBF2q36\nNwBr9S1nfo09dP8jM/fpXYSkh3RHZr4nIhZm5r7DXO9ojT10z42IN9KOdgHIzK93rEfSgy2IiI2A\ndYdbxTfoXdB8Gnvo/i7wKNq8LrTpBUNXmi1H0jamPI3WwPwTfcuZX2MP3XUz88W9i5D0kNac9GAA\nPhMRu3atZp6NffXCB2k7iv4Lw3rAzPy3rkVJAiAidgG2B/YETh6eXgXYNTNHu0fa2I90t6Hdarpg\n+PuawHP7lSNpyhXA42mbC0z6XN9HC+HRGmXvhYj4JEBm7gR8ITN3Gh67EaI0IzLzmsw8ibYZ5YW0\nOxQvyczL+lY2v0YZusDGU493nno83rkU6ZHrDbTphRcBiyLibZ3rmVdjDd1pCx7+n0jqaC/geZl5\nIG36b9R7pI01dOeW81jS7FmQmXcDZOZdwF2d65lXY72QtnVEnEo7yp1+/PS+ZUlahosi4nTart07\n0FYcjdYol4xFxPOX97nMPL+yFknLFhGfzMzdh8evoO3a/f3MPPuhv/KRbZShK2n2RcRXMvOFveuo\nZuhK6iIirmE5t/yOeZOBsc7pSpp9twPZu4hqhq6kXq7PzEW9i6g21iVjkmbfqO88Wx7ndCWpkEe6\nklTI0JWkQoauJBUydCWpkKErSYX+Dyc/AXubTNhbAAAAAElFTkSuQmCC\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "sns.heatmap(salaries.isnull(), yticklabels=False, cbar=False)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Looking at our clean data" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Now I take a look at the data, to get familiar with it." ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
IdEmployeeNameJobTitleBasePayOvertimePayOtherPayBenefitsTotalPayTotalPayBenefitsYear
01NATHANIEL FORDGENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY167411.180.00400184.2525114.34451567595.43567595.432011
12GARY JIMENEZCAPTAIN III (POLICE DEPARTMENT)155966.02245131.88137811.3825114.34451538909.28538909.282011
23ALBERT PARDINICAPTAIN III (POLICE DEPARTMENT)212739.13106088.1816452.6025114.34451335279.91335279.912011
34CHRISTOPHER CHONGWIRE ROPE CABLE MAINTENANCE MECHANIC77916.0056120.71198306.9025114.34451332343.61332343.612011
45PATRICK GARDNERDEPUTY CHIEF OF DEPARTMENT,(FIRE DEPARTMENT)134401.609737.00182234.5925114.34451326373.19326373.192011
\n", "
" ], "text/plain": [ " Id EmployeeName JobTitle \\\n", "0 1 NATHANIEL FORD GENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY \n", "1 2 GARY JIMENEZ CAPTAIN III (POLICE DEPARTMENT) \n", "2 3 ALBERT PARDINI CAPTAIN III (POLICE DEPARTMENT) \n", "3 4 CHRISTOPHER CHONG WIRE ROPE CABLE MAINTENANCE MECHANIC \n", "4 5 PATRICK GARDNER DEPUTY CHIEF OF DEPARTMENT,(FIRE DEPARTMENT) \n", "\n", " BasePay OvertimePay OtherPay Benefits TotalPay \\\n", "0 167411.18 0.00 400184.25 25114.34451 567595.43 \n", "1 155966.02 245131.88 137811.38 25114.34451 538909.28 \n", "2 212739.13 106088.18 16452.60 25114.34451 335279.91 \n", "3 77916.00 56120.71 198306.90 25114.34451 332343.61 \n", "4 134401.60 9737.00 182234.59 25114.34451 326373.19 \n", "\n", " TotalPayBenefits Year \n", "0 567595.43 2011 \n", "1 538909.28 2011 \n", "2 335279.91 2011 \n", "3 332343.61 2011 \n", "4 326373.19 2011 " ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "salaries.head()" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "Int64Index: 148045 entries, 0 to 148653\n", "Data columns (total 10 columns):\n", "Id 148045 non-null int64\n", "EmployeeName 148045 non-null object\n", "JobTitle 148045 non-null object\n", "BasePay 148045 non-null float64\n", "OvertimePay 148045 non-null float64\n", "OtherPay 148045 non-null float64\n", "Benefits 148045 non-null float64\n", "TotalPay 148045 non-null float64\n", "TotalPayBenefits 148045 non-null float64\n", "Year 148045 non-null int64\n", "dtypes: float64(6), int64(2), object(2)\n", "memory usage: 12.4+ MB\n" ] } ], "source": [ "salaries.info()" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
BasePayOvertimePayOtherPayBenefitsTotalPayTotalPayBenefitsYear
count148045.000000148045.000000148045.000000148045.000000148045.000000148045.000000148045.000000
mean66325.4488415086.7408003645.66140825114.34451075057.85104894038.1850672012.520653
std42764.63549511473.1815578019.49863613364.20834750409.76117062683.4779051.119390
min-166.010000-0.010000-7058.590000-13.800000-618.130000-618.1300002011.000000
25%33588.2000000.0000000.00000022747.20000036958.16000045077.4100002012.000000
50%65007.4500000.000000817.17000025114.34451071644.71000092675.5100002013.000000
75%94691.0500004695.7100004242.90000033507.310000106028.000000133085.4100002014.000000
max319275.010000245131.880000400184.25000096570.660000567595.430000567595.4300002014.000000
\n", "
" ], "text/plain": [ " BasePay OvertimePay OtherPay Benefits \\\n", "count 148045.000000 148045.000000 148045.000000 148045.000000 \n", "mean 66325.448841 5086.740800 3645.661408 25114.344510 \n", "std 42764.635495 11473.181557 8019.498636 13364.208347 \n", "min -166.010000 -0.010000 -7058.590000 -13.800000 \n", "25% 33588.200000 0.000000 0.000000 22747.200000 \n", "50% 65007.450000 0.000000 817.170000 25114.344510 \n", "75% 94691.050000 4695.710000 4242.900000 33507.310000 \n", "max 319275.010000 245131.880000 400184.250000 96570.660000 \n", "\n", " TotalPay TotalPayBenefits Year \n", "count 148045.000000 148045.000000 148045.000000 \n", "mean 75057.851048 94038.185067 2012.520653 \n", "std 50409.761170 62683.477905 1.119390 \n", "min -618.130000 -618.130000 2011.000000 \n", "25% 36958.160000 45077.410000 2012.000000 \n", "50% 71644.710000 92675.510000 2013.000000 \n", "75% 106028.000000 133085.410000 2014.000000 \n", "max 567595.430000 567595.430000 2014.000000 " ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "salaries.drop('Id', axis=1).describe()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Putting people into different groups based on their job" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "I use the very convenient job title bin function from https://www.kaggle.com/mevanoff24/data-exploration-predicting-salaries" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def findJobTitle(row):\n", " \n", " police_title = ['police', 'sherif', 'probation', 'sergeant', 'officer', 'lieutenant']\n", " fire_title = ['fire']\n", " transit_title = ['mta', 'transit']\n", " medical_title = ['anesth', 'medical', 'nurs', 'health', 'physician', 'orthopedic', 'pharm', 'care']\n", " court_title = ['court', 'legal']\n", " automotive_title = ['automotive', 'mechanic', 'truck']\n", " engineer_title = ['engineer', 'engr', 'eng', 'program']\n", " general_laborer_title = ['general laborer', 'painter', 'inspector', 'carpenter', \n", " 'electrician', 'plumber', 'maintenance']\n", " aide_title = ['aide', 'assistant', 'secretary', 'attendant']\n", " \n", " for police in police_title:\n", " if police in row.lower():\n", " return 'police' \n", " for fire in fire_title:\n", " if fire in row.lower():\n", " return 'fire'\n", " for aide in aide_title:\n", " if aide in row.lower():\n", " return 'assistant'\n", " for transit in transit_title:\n", " if transit in row.lower():\n", " return 'transit'\n", " for medical in medical_title:\n", " if medical in row.lower():\n", " return 'medical'\n", " if 'airport' in row.lower():\n", " return 'airport'\n", " if 'worker' in row.lower():\n", " return 'social worker'\n", " if 'architect' in row.lower():\n", " return 'architect'\n", " for court in court_title:\n", " if court in row.lower():\n", " return 'court'\n", " if 'major' in row.lower():\n", " return 'mayor'\n", " if 'librar' in row.lower():\n", " return 'library'\n", " if 'guard' in row.lower():\n", " return 'guard'\n", " if 'public' in row.lower():\n", " return 'public works'\n", " if 'attorney' in row.lower():\n", " return 'attorney'\n", " if 'custodian' in row.lower():\n", " return 'custodian'\n", " if 'account' in row.lower():\n", " return 'account'\n", " if 'garden' in row.lower():\n", " return 'gardener'\n", " if 'recreation' in row.lower():\n", " return 'recreation leader'\n", " for automotive in automotive_title:\n", " if automotive in row.lower():\n", " return 'automotive'\n", " for engineer in engineer_title:\n", " if engineer in row.lower():\n", " return 'engineer'\n", " for general_laborer in general_laborer_title:\n", " if general_laborer in row.lower():\n", " return 'general laborer'\n", " if 'food serv' in row.lower():\n", " return 'food service'\n", " if 'clerk' in row.lower():\n", " return 'clerk'\n", " if 'porter' in row.lower():\n", " return 'porter' \n", " if 'analy' in row.lower():\n", " return 'analyst'\n", " if 'manager' in row.lower():\n", " return 'manager'\n", " else:\n", " return 'other'\n", " \n", "salaries['GeneralJobTitle'] = salaries['JobTitle'].map(findJobTitle)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Exploring the data" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Changes in salaries over time between different groups of people" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TotalPay
Year
201171744.103871
201274113.262265
201378808.285041
201475471.836912
\n", "
" ], "text/plain": [ " TotalPay\n", "Year \n", "2011 71744.103871\n", "2012 74113.262265\n", "2013 78808.285041\n", "2014 75471.836912" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pay_by_year=salaries[['Year', 'TotalPay']].groupby('Year').mean()\n", "pay_by_year" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAY4AAAEBCAYAAABv4kJxAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAFCNJREFUeJzt3X+MXfV55/H3DMYxU9uQSdnQpCC2\ncfpoolZIcbKQiBRH2QQDraialVK1oSWkTZN65ZBSUZqaQhS2VVaE3bosSzcBkZ8lWyIalYCLqpbI\npdCEqalCmT54aIK3cRMRDw44EwL2zP5xz4TJdDzc75x77o+575eEuPfM95z7nMf2fO73nHPPHZmf\nn0eSpHaN9roASdJgMTgkSUUMDklSEYNDklTE4JAkFTE4JElF1vW6gG6YnJz0mmNJWoWtW7eOLF02\nFMEBsHXr1l6XIEkDZXJyctnlHqqSJBUxOCRJRQwOSVIRg0OSVMTgkCQVMTgkSUUMDklSkaH5HIck\n1bVnzx7uvvvuWtuYmZkBYHx8vNZ2LrzwQrZv315rG6tlcEhSFx06dAioHxy9ZHBIUpu2b99e+13+\nzp07Adi9e3cnSuoJg0Nawzy0oiYYHJJWtBYOraizDA5pDfPQiprg5biSpCIGhySpiMEhSSpicEiS\nihgckqQiBockqYjBIUkqYnBIkor4AUD1nbq3yfAWGVKzOh4cEXEicCtwJvAS4DrgUeA2YB54BNiR\nmXMRcQ1wEXAUuDwzvxwRW+qO7fQ+abB4iwypWU3MON4JHMrMSyLiZcA+4GFgV2beFxE3AxdHxBPA\necDZwOnA54HXAzfUGQvc2cA+qYvq3ibDW2RIzWoiOP4MuGPR86PAVuBL1fN7gLcBCdybmfPAgYhY\nFxGndmBs14OjX+5A6qEVSd3Q8eDIzCMAEbGJVoDsAq6vfukDPAOcDGwGDi1adWH5SM2xy5qamqqx\nVys7ePAgs7Oztbbx5JNPArBhw4ZadTS5n4Ni4c/CXnSG/eystdDPRk6OR8TptN7535SZn42I/77o\nx5uAw8DT1eOly+dqjl3WxMRE+Y60aWJigssuu6zWNjy80jljY2NAs3/mw8R+dtYg9XNycnLZ5R2/\nHDciXg7cC/xOZt5aLd4XEduqxxcAe4H7gfMjYjQizgBGM/PbHRgrSWpQEzOODwIvBa6OiKurZe8H\ndkfEemAKuCMzj0XEXuABWgG2oxp7BfCx1Y5tYH8kSYs0cY7j/bSCYqnzlhl7LXDtkmWP1R0rSWqO\nnxyXJBUxOCRJRQwOSVIRg0OSVMTgkCQVMTgkSUUMDklSEYNDklTE4JAkFTE4JElFDA5JUhGDQ5JU\nxOCQJBUxOCRJRQwOSVIRg0OSVMTgkCQVMTgkSUUMDklSEYNDklTE4JAkFTE4JElFDA5JUhGDQ5JU\nxOCQJBUxOCRJRQwOSVIRg0OSVMTgkCQVMTgkSUUMDklSEYNDklTE4JAkFTE4JElFDA5JUhGDQ5JU\nxOCQJBVZ19SGI+Js4COZuS0iXgv8BbC/+vH/zszPRcQ1wEXAUeDyzPxyRGwBbgPmgUeAHZk5VzK2\nqX2SJDU044iIK4GPAxuqRa8FbsjMbdV/n6vC5DzgbOAXgf9Vjb0B2JWZbwJGgItLxjaxP5KkFzQ1\n43gc+AXgU9XzrUBExMW0Zh2XA+cC92bmPHAgItZFxKnV2C9V690DvA3IgrF3NrRPkiQaCo7M/HxE\nnLlo0ZeBj2fmZET8HnANcBg4tGjMM8DJwEgVEIuXbS4Yu6ypqanV71AXzM7OAv1f5yCwl51lPztr\nLfSzsXMcS9yZmYcXHgN/DHwB2LRozCZaYTK3zLKnC8Yua2JiYrW1d8XY2BjQ/3UOAnvZWfazswap\nn5OTk8su79ZVVX8ZEf+pevwWYBK4Hzg/IkYj4gxgNDO/DeyLiG3V2AuAvYVjJUkN6taM433AjRHx\nHPBN4D2Z+XRE7AUeoBVgO6qxVwAfi4j1wBRwR2Yea3dsl/ZHkoZWY8GRmV8Hzqke/wPwxmXGXAtc\nu2TZY7SuoFr1WElSc/wAoCSpiMEhSSpicEiSihgckqQiBockqYjBIUkqYnBIkooYHJKkIgaHJKmI\nwSFJKmJwSJKKGBySpCIGhySpiMEhSSryosEREe+PiPFuFCNJ6n/tzDiOAV+MiNsj4q1NFyRJ6m8v\nGhyZeWNmvgH4Q+BdEfG1iNgVEZubL0+S1G9e9BsAI2IT8A7gV4FZ4MpqvS8Ab260OklS32nnq2P3\nAX8K/Epmfm1hYUSc1VhVkqS+1U5wRGYe+8GTiFMz88nMvKrBuiRJfaqd4Pi9iPivwInAjwCPAxON\nViVJ6lvtXFV1EXAG8Fngp4EDjVYkSepr7QTHU5n5LLA5MxMYa7gmSVIfayc4vhERlwLfi4gPA6c0\nW5IkqZ+1ExzvAb4E/DYwA/xSoxVJkvracU+OR8R/AH4LOALckJmzwP/oVmGSpP600ozjk8C/AiPA\nR7pTjiSp3610Oe76zLwRICL+qkv1SJL63Eozjvk2x0mShshKM46xiPiPtELjpOrxCEBm/ks3ipMk\n9Z+VguMYrfMcAEcXPZ4HfqbJoiS17N69m+np6Z7WsH//fgB27tzZ0zoAtmzZ0hd1DLvjBkdmntvN\nQiT9e9PT0+z7p329/fRUdaB63zf29bAI4HBvX14vWOly3L388HmOH8hMZxxSt5wCc9vmel1Fz43e\nV+9Uaz/M3qB/ZnB1Zm8rHaq6dFVblKQ+ND09zT8//DCn9biOk6r/H3744Z7V8M2a6690qOpxgOqk\n+H+hdXfcEeAVwI6arytJXXca8O7WNT5D7ZblDya1rZ3bqn8a2EPrhPi3eCEwpX+nHw4H9MuhAPBk\nrtamdoLje5n54Yi4NTMvq859SMuanp7msUf+gTM2HnvxwQ3ZPN96R/ns17/SsxoADhw5oaevLzWl\nneAYiYhTgY0RcRIw3s6GI+Js4COZuS0itgC30TrZ/giwIzPnIuIaWt/3cRS4PDO/3Imxbe470B/v\nkKF/3iV34h3yGRuPset1RzpU0eC67qGNvS5BakQ7wXEd8A5a3zt+APjMi60QEVcClwDfrRbdAOzK\nzPsi4mbg4oh4AjgPOBs4Hfg88Pq6Y4E729rzyvT0NPu++ihzY23lYWNGjrX+KCYfr3vaavVGZ2d6\n9tqSBkc7wbFh4Z5VwJ0R8fY21nkc+AXgU9XzrbRuzQ5wD/A2IIF7M3MeOBAR66qZTd2xRcEBMDc2\nzrOv+dnS1dacDY/e1esSJA2AlT7HcRFwDvDOiFgIgFHg7bTe8R9XZn4+Is5ctGik+qUP8AxwMrAZ\nOLRozMLyumOXNTU1tezy2dnZlXZl6MzOzh63V+2u743NXtCJfuoFdfppL39YnV6uNON4hNbVa98H\nnqiWzQHvXMXrLD7vsInWZ0Cfrh4vXV537LImJiaWXT42NlZtXtDqx/F61e76z3awnkHXiX7yVAcL\nGnB1+jk2NsZzHa5nkLXTy8nJyWWXH/fNYWY+kZm3AD8N3E/r2/++kpnLb2ll+yJiW/X4AmBvtc3z\nI2I0Is4ARjPz2x0YK0lqUDtHFX6d1rmKtwCfiIgPrOJ1rgA+FBEPAOuBO6oA2gs8QOvQ145OjF1F\nbZKkAu2cHL8EeGNmPh8R62m9+3/Rr5DNzK/TOkdCZj5G66qopWOuBa5dsqz2WElSc9qZcYxk5vMA\nmfkceJhQkoZZOzOOByLidlqHis4F/r7ZkiRJ/ey4M46I+BxAZn6A1of/NgG3Z+Zvdak2SVIfWmnG\ncerCg8z8AvCF5suRJPW7lYLjVRHxB8v9IDM/2FA9kqQ+t1JwzNK61YckST+wUnB8MzM/0bVKJEkD\nYaXLcVfzCXFJ0hq30i1HfrubhUiSBoM3MpUkFTE4JElFDA5JUhGDQ5JUxOCQJBUxOCRJRQwOSVKR\ndm6rLqlHZmZm4DCM3ud7PA7DzEkzva5COOOQJBVyxiH1sfHxcZ743hPMbZvrdSk9N3rfKOPj470u\nQxgckobEzMwM3wJuYb7XpfTcvwFzM6s/7GdwqKNmZmZ48pkTuO6hjb0upeeeeOYETq3xj1PqVwaH\npKEwPj7O6IEDvJuRXpfSc7cwzyk1DvsZHOqo8fFxxp5+nF2vO9LrUnruuoc2ssFj8lqDhj44ZmZm\nGJ09xIZH7+p1KT03OnuImZn1vS5DUp/zclxJUpGhn3GMj4/ztaee49nX/GyvS+m5DY/e5eWOkl6U\nMw5JUhGDQ5JUxOCQJBUxOCRJRQwOSVIRg0OSVMTgkCQVMTgkSUUMDklSEYNDklTE4JAkFenqvaoi\nYh/wnerp14A/Af4IOArcm5kfiohR4CbgLOD7wK9l5nREnNPu2G7ukyQNm64FR0RsAMjMbYuWPQy8\nHfgX4IsR8VrgTGBDZr6hCouPAhcDNxeMlSQ1pJszjrOAsYi4t3rda4GXZObjABHxl8BbgB8D9gBk\n5oMR8bqI2Nzu2C7ujyQNpW4GxyxwPfBx4NXAPcDhRT9/BvgJYDMvHM4COFYte7qdsRGxLjOPLn3x\nqamp5YuanS3djzVtdnb2uL1qd31PnL2gE/3UC+r0017+sDq97GZwPAZMZ+Y88FhEfAdY/OUPm2gF\nyVj1eMEordDY1M7Y5UIDYGJiYtmixsbG+OFMGm5jY2PH7VW76z/bwXoGXSf6yVMdLGjA1enn2NgY\nz3W4nkHWTi8nJyeXXd7NN4eX0ToHQUS8gtYv/e9GxKsiYgQ4H9gL3A9cWI07B/hqZj4NPNfO2C7u\njyQNpW7OOG4BbouIvwXmaQXJHPAZ4ARaV0r9fUR8BXhrRPwdMAK8q1r/vQVjJUkN6VpwZOZzwC8t\n86NzloyboxUSS9d/sN2xkqTmeB5TklTE4JAkFTE4JElFunrLEUmrcBhG7+vhe7yF66s39K4EoHUB\n/it7XIMAg0Pqa1u2bOl1Cezfvx+AV7/y1b0t5JX90Q8ZHGrAgSMncN1DG3v2+t95bgSAk9fP96wG\naPXhJ2tuY+fOnR2ppRM17N69u8eVqF8YHMDo7AwbHr2rpzWMPP89AOZPPKlnNYzOzgCn1dpGP7wj\n/H/VO+SXn9nbd8g/SX/0Q+q0oQ+OfvmH/YPDAa+q94u7ntNq98N3yNLaN/TB0Q+/6MBfdpIGx9AH\nh6Th8U3gFnp77utI9f/enQVs9eGUGusbHJKGQr8cln6yOiz946/u3Tm4U6jXD4ND0lDwsHTn+Mlx\nSVIRg0OSVMTgkCQVMTgkSUUMDklSEYNDklTE4JAkFTE4JElFDA5JUhGDQ5JUxOCQJBUxOCRJRQwO\nSVIRg0OSVMTgkCQVMTgkSUUMDklSEYNDklTE4JAkFTE4JElFDA5JUhGDQ5JUxOCQJBUxOCRJRQwO\nSVKRdb0uoK6IGAVuAs4Cvg/8WmZO97YqSVq71sKM4+eBDZn5BuAq4KM9rkeS1rSBn3EA5wJ7ADLz\nwYh4XbcL2LNnD3fffXetbezfvx+AnTt3rnobF154Idu3b69VRz+o289O9BLWRj/75e8m2M8Fa6Gf\nayE4NgPfWfT8WESsy8yjiwdNTU01VsDBgweZnZ2ttY2NGzcC1NrOwYMHG93Pbqnbz070cqGOQe9n\nv/zdXKjFfq6Nfo7Mz8/35IU7JSJuAB7MzP9bPf/XzPzxxWMmJyfnt27d2pP6JGlQTU5OsnXr1pGl\ny9fCOY77gQsBIuIc4Ku9LUeS1ra1cKjqTuCtEfF3wAjwrh7XI0lr2sAHR2bOAe/tdR2SNCzWwqEq\nSVIXGRySpCIGhySpiMEhSSpicEiSigz8VVXtmpyc7HUJkrQmDPwnxyVJ3eWhKklSEYNDklRkaM5x\n9EpEnAjcCpwJvAS4DngUuA2YBx4BdlSfgCcitgB/npk/tWQ7lwOnZeZVXSu+z9TtZUScUa2/jtbt\nad6TmdndvegfHejnacBngPXAvwGXZma9W74OsA7+W/8Z4DOZeXrXii/kjKN57wQOZeabgAuAG4Eb\ngF3VshHgYoCIuAS4HfjRhZUj4qSI+DSwo9uF96FavQQ+DNyYmduAPwD+sHul96W6/bwK+EQ19lHg\nN7pYez+q208i4nTgCuDELtZdzOBo3p8BVy96fhTYCnypen4P8J+rx08B5y1ZfwPwSeC/NVjjoKjb\nyyuAL1aP1wHPNlPmwKjbzw8An66+vvl04FvNlToQavUzIjYANwO/2WyZ9RkcDcvMI5n5TERsAu4A\ndgEjmblwOdszwMnV2Lsy87tL1n8qM+/tatF9qgO9/HZmPh8RAVwPfKiL5fedDvRzHjiB1iGYN9P6\nioOhVbeftGYo12fmN7pW9CoZHF1QTT//BvhUZn4WmFv0403A4Z4UNoDq9jIi3gz8OXDJMJ/fWFC3\nn5n5fGa+BngPrZnxUFttPyPiFcCbgGsi4j5gPCJub7jcVTM4GhYRLwfuBX4nM2+tFu+LiG3V4wuA\nvb2obdDU7WUVGn8EbM/Mh5qsdRB0oJ83VT2F1rvpueONHQZ1+pmZBzMzMnNbdQ5uJjN/semaV8ur\nqpr3QeClwNURsXD88/3A7ohYD0zRmtbqxdXt5f+kdQXQJ1pHq8jMHOYTunX7uRu4OSJ+n1Zo9P2x\n+YYNzb91PzkuSSrioSpJUhGDQ5JUxOCQJBUxOCRJRQwOSVIRg0NqSETcERFXLXq+MSIyIs7qZV1S\nXQaH1Jz3Au+LiNdUz68H/k9m/mMPa5Jq83McUoMi4ueA363+2wW8DfgpWh+eGwEOAZcBR4A/oXWz\nwJcB92Tm1RFxW/X8ZcBFmflUt/dBWsoZh9SgzPwL4J9pfSfDpdUN7z5G63sZtgF3A1fSCowHM/N8\n4FzgfYs289eZ+UZDQ/3CW45IzfskMLborqcTwE3VbU9OBB4DZoDXV/d+eprWFwEtGPqbMaq/OOOQ\nui+BX6lmHFfS+o6QS4HDmfnLwEeBsYgYqcYP9c0D1X+ccUjd9z7gkxFxQvX83bRugHd7RLwJ+C6w\nH3hFj+qTVuTJcUlSEQ9VSZKKGBySpCIGhySpiMEhSSpicEiSihgckqQiBockqYjBIUkq8v8BXgyX\n5FTFelEAAAAASUVORK5CYII=\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "sns.boxplot(x='Year', y='TotalPay', data=salaries, showfliers=False)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "I can notice the upward trend in the TotalPay in years 2011-2013, but in 2014 the TotalPay mean slightly decreased." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Checking how the pays changed over the years in groups of people" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TotalPay
GeneralJobTitleYear
account201156177.342558
201263186.929962
201366857.307417
201463641.837870
airport201192273.242673
201294104.593235
201398160.944660
201494067.513077
analyst201179492.329936
201281830.453873
201387654.166634
201483168.412823
architect201196071.390972
2012113270.148969
2013125333.457143
2014124302.629417
assistant201143198.079421
201240129.112925
201340410.167382
201439600.355957
attorney2011126626.434486
2012133973.170860
2013144234.833244
2014138208.046967
automotive201178395.460507
201281830.392473
201384965.658302
201483083.261233
clerk201140999.051714
201242632.039917
.........
medical201379212.934761
201477210.463099
other201166222.015069
201275053.523929
201380126.012794
201478234.947040
police2011109707.702262
2012110355.916599
2013119961.317699
2014113887.308085
porter201140985.904961
201242379.496088
201346010.055586
201445553.476173
public works201136133.666306
201237266.939199
201335909.203407
201434340.963239
recreation leader201110438.966533
201211370.921782
201312207.075927
201412449.075208
social worker201160714.044061
201261365.984920
201363954.010630
201462010.384171
transit201170427.436251
201274160.880798
201377607.464635
201471134.737660
\n", "

104 rows × 1 columns

\n", "
" ], "text/plain": [ " TotalPay\n", "GeneralJobTitle Year \n", "account 2011 56177.342558\n", " 2012 63186.929962\n", " 2013 66857.307417\n", " 2014 63641.837870\n", "airport 2011 92273.242673\n", " 2012 94104.593235\n", " 2013 98160.944660\n", " 2014 94067.513077\n", "analyst 2011 79492.329936\n", " 2012 81830.453873\n", " 2013 87654.166634\n", " 2014 83168.412823\n", "architect 2011 96071.390972\n", " 2012 113270.148969\n", " 2013 125333.457143\n", " 2014 124302.629417\n", "assistant 2011 43198.079421\n", " 2012 40129.112925\n", " 2013 40410.167382\n", " 2014 39600.355957\n", "attorney 2011 126626.434486\n", " 2012 133973.170860\n", " 2013 144234.833244\n", " 2014 138208.046967\n", "automotive 2011 78395.460507\n", " 2012 81830.392473\n", " 2013 84965.658302\n", " 2014 83083.261233\n", "clerk 2011 40999.051714\n", " 2012 42632.039917\n", "... ...\n", "medical 2013 79212.934761\n", " 2014 77210.463099\n", "other 2011 66222.015069\n", " 2012 75053.523929\n", " 2013 80126.012794\n", " 2014 78234.947040\n", "police 2011 109707.702262\n", " 2012 110355.916599\n", " 2013 119961.317699\n", " 2014 113887.308085\n", "porter 2011 40985.904961\n", " 2012 42379.496088\n", " 2013 46010.055586\n", " 2014 45553.476173\n", "public works 2011 36133.666306\n", " 2012 37266.939199\n", " 2013 35909.203407\n", " 2014 34340.963239\n", "recreation leader 2011 10438.966533\n", " 2012 11370.921782\n", " 2013 12207.075927\n", " 2014 12449.075208\n", "social worker 2011 60714.044061\n", " 2012 61365.984920\n", " 2013 63954.010630\n", " 2014 62010.384171\n", "transit 2011 70427.436251\n", " 2012 74160.880798\n", " 2013 77607.464635\n", " 2014 71134.737660\n", "\n", "[104 rows x 1 columns]" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "salaries_jobs_years=salaries[['GeneralJobTitle', 'TotalPay', 'Year']].groupby(['GeneralJobTitle', 'Year']).mean()\n", "salaries_jobs_years" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "application/vnd.plotly.v1+json": { "data": [ { "name": "2011", "type": "bar", "x": [ "account", "airport", "analyst", "architect", "assistant", "attorney", "automotive", "clerk", "court", "custodian", "engineer", "fire", "food service", "gardener", "general laborer", "guard", "library", "manager", "medical", "other", "police", "porter", "public works", "recreation leader", "social worker", "transit" ], "y": [ 56177.342558139564, 92273.2426732673, 79492.32993576021, 96071.3909722222, 43198.07942143331, 126626.43448616585, 78395.46050724639, 40999.051714459914, 68546.73194782622, 40521.552776381934, 81251.4860434056, 142172.54780640136, 42686.877323943656, 53697.759342105266, 79013.0795906433, 19500.85446393766, 34683.7253180212, 115523.68841196792, 69088.4893665589, 66222.01506878574, 109707.70226150741, 40985.90496062993, 36133.66630560935, 10438.966532663315, 60714.0440606705, 70427.43625082301 ] }, { "name": "2012", "type": "bar", "x": [ "account", "airport", "analyst", "architect", "assistant", "attorney", "automotive", "clerk", "court", "custodian", "engineer", "fire", "food service", "gardener", "general laborer", "guard", "library", "manager", "medical", "other", "police", "porter", "public works", "recreation leader", "social worker", "transit" ], "y": [ 63186.92996240612, 94104.59323529412, 81830.45387337073, 113270.14896907205, 40129.112925414505, 133973.17086021506, 81830.39247334749, 42632.03991691398, 64781.44596685088, 43639.09918787882, 84647.20845991567, 148795.11949920497, 45434.13726027398, 51507.85037617552, 86891.14136308804, 19211.737847358123, 39617.340386740296, 120077.08837209288, 74049.6819703387, 75053.52392894666, 110355.91659884396, 42379.49608808286, 37266.93919866444, 11370.92178176795, 61365.98491978606, 74160.88079762689 ] }, { "name": "2013", "type": "bar", "x": [ "account", "airport", "analyst", "architect", "assistant", "attorney", "automotive", "clerk", "court", "custodian", "engineer", "fire", "food service", "gardener", "general laborer", "guard", "library", "manager", "medical", "other", "police", "porter", "public works", "recreation leader", "social worker", "transit" ], "y": [ 66857.30741697413, 98160.94466019416, 87654.16663392863, 125333.45714285708, 40410.16738194091, 144234.8332439679, 84965.65830227743, 48565.42210256415, 71246.31403571427, 49467.61480769224, 90204.47242597395, 158751.63883199994, 49410.07479452056, 56403.07606811142, 92613.47836024873, 19129.360374999982, 43176.11009469695, 127630.41192307706, 79212.93476139763, 80126.01279387012, 119961.3176992791, 46010.05558583111, 35909.20340694003, 12207.07592692829, 63954.010629515025, 77607.46463486848 ] }, { "name": "2014", "type": "bar", "x": [ "account", "airport", "analyst", "architect", "assistant", "attorney", "automotive", "clerk", "court", "custodian", "engineer", "fire", "food service", "gardener", "general laborer", "guard", "library", "manager", "medical", "other", "police", "porter", "public works", "recreation leader", "social worker", "transit" ], "y": [ 63641.83786971834, 94067.51307692307, 83168.41282264778, 124302.62941747568, 39600.35595712471, 138208.04696658088, 83083.2612326044, 45449.74066744737, 62198.70878378374, 48750.94450248754, 87231.11888122212, 144454.13814253244, 47433.87616438354, 54012.492067901265, 88374.48909538469, 18247.304958263805, 46201.31738241313, 123976.42938106811, 77210.46309862063, 78234.9470399353, 113887.30808534652, 45553.476172506766, 34340.96323903821, 12449.075208070622, 62010.38417070799, 71134.7376604809 ] } ], "layout": { "barmode": "group" } }, "text/html": [ "
" ], "text/vnd.plotly.v1+html": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "salaries_jobs_by_year=salaries[['GeneralJobTitle', 'TotalPay', 'Year']].groupby(['Year', 'GeneralJobTitle']).mean()\n", "job_names=salaries_jobs_by_year.xs(2011, level='Year').index.values.tolist()\n", "\n", "trace1 = go.Bar(\n", " x=job_names,\n", " y=salaries_jobs_by_year.xs(2011, level='Year')['TotalPay'],\n", " name='2011'\n", ")\n", "trace2 = go.Bar(\n", " x=job_names,\n", " y=salaries_jobs_by_year.xs(2012, level='Year')['TotalPay'],\n", " name='2012'\n", ")\n", "trace3 = go.Bar(\n", " x=job_names,\n", " y=salaries_jobs_by_year.xs(2013, level='Year')['TotalPay'],\n", " name='2013'\n", ")\n", "trace4 = go.Bar(\n", " x=job_names,\n", " y=salaries_jobs_by_year.xs(2014, level='Year')['TotalPay'],\n", " name='2014'\n", ")\n", "\n", "data = [trace1, trace2, trace3, trace4]\n", "layout = go.Layout(\n", " barmode='group'\n", ")\n", "\n", "fig = go.Figure(data=data, layout=layout)\n", "cf.iplot(fig, filename='grouped-bar')\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Now I made sure that the Total Pay was building up in almost every group until 2014, when it dropped a little bit.
\n", "The job groups which were affected the most by whatever happened in 2014 are: Court and Firefighters.
\n", "We can also notice, that only the Library and Recreation Leader group maintained it's upward trend.
\n", "The jobs which remained almost unaffected were: Assistants and Guards." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Looking at how base pay, overtime pay and benefits are allocated between job groups" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
BasePayOvertimePayBenefits
GeneralJobTitle
account61613.504421287.14025025708.762000
airport84780.2080736566.24819530316.527111
analyst82264.012407166.78041629064.369347
architect114872.57513573.95875735758.150797
assistant37802.1161221831.49093216931.972103
attorney132583.72498254.81023236687.825837
automotive68174.9637299124.60788127314.619298
clerk42546.5422541090.23230620997.084503
court64585.883368199.01006825710.071452
custodian41832.5859841526.10091722156.731947
engineer80059.3379532414.45964827963.838882
fire107372.90093025623.37268434720.246376
food service42747.6738621746.41586223226.094070
gardener50754.5030242010.76504723284.807426
general laborer75538.1640226700.99776127975.448840
guard16689.2083601791.4769229832.407203
library39851.0527611.45705119297.451219
manager118890.98050237.07416439049.842377
medical68053.4832462123.11225923100.226239
other70091.1954642890.04498826482.241173
police93186.88037811636.75674930192.136074
porter38968.0828773260.08332220179.401089
public works30809.0951243133.81890114508.889048
recreation leader11374.1307312.4267238759.502923
social worker57299.1320022809.18135424439.990860
transit57626.47416812847.27615227828.666833
\n", "
" ], "text/plain": [ " BasePay OvertimePay Benefits\n", "GeneralJobTitle \n", "account 61613.504421 287.140250 25708.762000\n", "airport 84780.208073 6566.248195 30316.527111\n", "analyst 82264.012407 166.780416 29064.369347\n", "architect 114872.575135 73.958757 35758.150797\n", "assistant 37802.116122 1831.490932 16931.972103\n", "attorney 132583.724982 54.810232 36687.825837\n", "automotive 68174.963729 9124.607881 27314.619298\n", "clerk 42546.542254 1090.232306 20997.084503\n", "court 64585.883368 199.010068 25710.071452\n", "custodian 41832.585984 1526.100917 22156.731947\n", "engineer 80059.337953 2414.459648 27963.838882\n", "fire 107372.900930 25623.372684 34720.246376\n", "food service 42747.673862 1746.415862 23226.094070\n", "gardener 50754.503024 2010.765047 23284.807426\n", "general laborer 75538.164022 6700.997761 27975.448840\n", "guard 16689.208360 1791.476922 9832.407203\n", "library 39851.052761 1.457051 19297.451219\n", "manager 118890.980502 37.074164 39049.842377\n", "medical 68053.483246 2123.112259 23100.226239\n", "other 70091.195464 2890.044988 26482.241173\n", "police 93186.880378 11636.756749 30192.136074\n", "porter 38968.082877 3260.083322 20179.401089\n", "public works 30809.095124 3133.818901 14508.889048\n", "recreation leader 11374.130731 2.426723 8759.502923\n", "social worker 57299.132002 2809.181354 24439.990860\n", "transit 57626.474168 12847.276152 27828.666833" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "salaries_jobs_benefits=salaries[['GeneralJobTitle', 'BasePay', 'OvertimePay', 'Benefits']].groupby(['GeneralJobTitle']).mean()\n", "salaries_jobs_benefits" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "application/vnd.plotly.v1+json": { "data": [ { "name": "Base Pay", "type": "bar", "x": [ "account", "airport", "analyst", "architect", "assistant", "attorney", "automotive", "clerk", "court", "custodian", "engineer", "fire", "food service", "gardener", "general laborer", "guard", "library", "manager", "medical", "other", "police", "porter", "public works", "recreation leader", "social worker", "transit" ], "y": [ 61613.50442071801, 84780.20807317077, 82264.01240702174, 114872.57513513515, 37802.11612181428, 132583.72498170714, 68174.96372926715, 42546.54225424753, 64585.88336758029, 41832.58598439947, 80059.33795344448, 107372.90092991511, 42747.67386206894, 50754.50302362204, 75538.16402207575, 16689.208360055003, 39851.05276105357, 118890.98050200132, 68053.48324613085, 70091.19546367193, 93186.88037768792, 38968.08287707646, 30809.09512413508, 11374.130730668434, 57299.13200154768, 57626.47416807743 ] }, { "name": "Overtime Pay", "type": "bar", "x": [ "account", "airport", "analyst", "architect", "assistant", "attorney", "automotive", "clerk", "court", "custodian", "engineer", "fire", "food service", "gardener", "general laborer", "guard", "library", "manager", "medical", "other", "police", "porter", "public works", "recreation leader", "social worker", "transit" ], "y": [ 287.1402498864152, 6566.24819512195, 166.78041580041574, 73.95875675675676, 1831.4909315654986, 54.81023170731708, 9124.607881219921, 1090.232305825244, 199.01006849315067, 1526.1009173166924, 2414.45964822806, 25623.372684025027, 1746.4158620689648, 2010.7650472440937, 6700.997761194034, 1791.4769216674306, 1.4570507996237065, 37.07416384354789, 2123.1122593411055, 2890.0449882478647, 11636.756748889124, 3260.083322259147, 3133.8189010988967, 2.4267234600262126, 2809.181353723061, 12847.27615210895 ] }, { "name": "Benefits", "type": "bar", "x": [ "account", "airport", "analyst", "architect", "assistant", "attorney", "automotive", "clerk", "court", "custodian", "engineer", "fire", "food service", "gardener", "general laborer", "guard", "library", "manager", "medical", "other", "police", "porter", "public works", "recreation leader", "social worker", "transit" ], "y": [ 25708.761999550567, 30316.52711096509, 29064.36934677926, 35758.150796512986, 16931.972102684962, 36687.82583657839, 27314.61929752882, 20997.084502678703, 25710.071451580254, 22156.73194691025, 27963.83888246543, 34720.24637571046, 23226.09406965497, 23284.807425981882, 27975.448840278812, 9832.407202729017, 19297.451219463772, 39049.84237729196, 23100.226238798936, 26482.24117326679, 30192.136074445927, 20179.401088542254, 14508.88904826899, 8759.50292262364, 24439.990859984708, 27828.66683288528 ] } ], "layout": { "barmode": "group" } }, "text/html": [ "
" ], "text/vnd.plotly.v1+html": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "trace1 = go.Bar(\n", " x=job_names,\n", " y=salaries_jobs_benefits['BasePay'],\n", " name='Base Pay'\n", ")\n", "trace2 = go.Bar(\n", " x=job_names,\n", " y=salaries_jobs_benefits['OvertimePay'],\n", " name='Overtime Pay'\n", ")\n", "trace3 = go.Bar(\n", " x=job_names,\n", " y=salaries_jobs_benefits['Benefits'],\n", " name='Benefits'\n", ")\n", "\n", "data = [trace1, trace2, trace3]\n", "layout = go.Layout(\n", " barmode='group'\n", ")\n", "\n", "fig = go.Figure(data=data, layout=layout)\n", "cf.iplot(fig, filename='grouped-bar')\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The most important insights from these plots are:
\n", "-Firefighters, Policeman and Transit are groups which work overtime the most.
\n", "-Attorney, Manager, Architect and Firefighters are gropus which earn the most Base Pay.
\n", "-The groups which earn the least are Guards and Recreation Leader.\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Checking if there is any evidence of pay discrimination based on gender" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Creating gender column with some help of gender guesser library" ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import gender_guesser.detector as gender\n", "d = gender.Detector(case_sensitive=False)\n", "salaries['Gender']=salaries['EmployeeName'].apply(lambda x: x.split()[0]).apply(lambda x: d.get_gender(x.lower()))" ] }, { "cell_type": "code", "execution_count": 21, "metadata": { "collapsed": true }, "outputs": [], "source": [ "salaries_job_gender=salaries[(salaries['Gender']=='male') | (salaries['Gender']=='female')][['GeneralJobTitle', 'TotalPay', 'Gender']].groupby(['GeneralJobTitle', 'Gender']).mean()\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Taking a look at data and plotting it" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TotalPay
GeneralJobTitleGender
accountfemale62720.881249
male61554.983552
airportfemale89959.928741
male97818.146010
analystfemale81847.317095
male84359.228566
architectfemale113353.711455
male114559.383938
assistantfemale45963.404888
male37698.158883
attorneyfemale134018.130759
male138007.626231
automotivefemale72252.454815
male81672.705192
clerkfemale47233.568245
male37452.355623
courtfemale69189.877951
male65668.484733
custodianfemale45257.969013
male45336.296070
engineerfemale71264.697426
male89319.615244
firefemale139360.142473
male149907.709934
food servicefemale39156.752889
male54430.824388
gardenerfemale50016.638519
male55155.304686
general laborerfemale80573.823189
male87667.235885
guardfemale16436.266490
male23580.319757
libraryfemale46529.025088
male36206.884371
managerfemale121272.730786
male122426.874990
medicalfemale73540.648174
male80181.323528
otherfemale70579.012167
male79993.082152
policefemale95458.979664
male120337.144226
porterfemale47195.294830
male44687.008480
public worksfemale41433.817635
male32639.754413
recreation leaderfemale11693.305312
male12549.577932
social workerfemale63067.066715
male62496.266117
transitfemale63954.873872
male76903.350026
\n", "
" ], "text/plain": [ " TotalPay\n", "GeneralJobTitle Gender \n", "account female 62720.881249\n", " male 61554.983552\n", "airport female 89959.928741\n", " male 97818.146010\n", "analyst female 81847.317095\n", " male 84359.228566\n", "architect female 113353.711455\n", " male 114559.383938\n", "assistant female 45963.404888\n", " male 37698.158883\n", "attorney female 134018.130759\n", " male 138007.626231\n", "automotive female 72252.454815\n", " male 81672.705192\n", "clerk female 47233.568245\n", " male 37452.355623\n", "court female 69189.877951\n", " male 65668.484733\n", "custodian female 45257.969013\n", " male 45336.296070\n", "engineer female 71264.697426\n", " male 89319.615244\n", "fire female 139360.142473\n", " male 149907.709934\n", "food service female 39156.752889\n", " male 54430.824388\n", "gardener female 50016.638519\n", " male 55155.304686\n", "general laborer female 80573.823189\n", " male 87667.235885\n", "guard female 16436.266490\n", " male 23580.319757\n", "library female 46529.025088\n", " male 36206.884371\n", "manager female 121272.730786\n", " male 122426.874990\n", "medical female 73540.648174\n", " male 80181.323528\n", "other female 70579.012167\n", " male 79993.082152\n", "police female 95458.979664\n", " male 120337.144226\n", "porter female 47195.294830\n", " male 44687.008480\n", "public works female 41433.817635\n", " male 32639.754413\n", "recreation leader female 11693.305312\n", " male 12549.577932\n", "social worker female 63067.066715\n", " male 62496.266117\n", "transit female 63954.873872\n", " male 76903.350026" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "salaries_job_gender" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "application/vnd.plotly.v1+json": { "data": [ { "name": "Male", "type": "bar", "x": [ "account", "airport", "analyst", "architect", "assistant", "attorney", "automotive", "clerk", "court", "custodian", "engineer", "fire", "food service", "gardener", "general laborer", "guard", "library", "manager", "medical", "other", "police", "porter", "public works", "recreation leader", "social worker", "transit" ], "y": [ 61554.98355191258, 97818.14600985224, 84359.22856594122, 114559.38393805308, 37698.15888322724, 138007.6262314542, 81672.70519206935, 37452.35562259317, 65668.48473282444, 45336.29606965172, 89319.61524365652, 149907.7099344591, 54430.82438775509, 55155.304686098614, 87667.23588492218, 23580.31975659227, 36206.88437125747, 122426.8749904398, 80181.32352803125, 79993.08215249506, 120337.1442256307, 44687.00847988083, 32639.754412903196, 12549.577932053193, 62496.266117136576, 76903.3500260415 ] }, { "name": "Female", "type": "bar", "x": [ "account", "airport", "analyst", "architect", "assistant", "attorney", "automotive", "clerk", "court", "custodian", "engineer", "fire", "food service", "gardener", "general laborer", "guard", "library", "manager", "medical", "other", "police", "porter", "public works", "recreation leader", "social worker", "transit" ], "y": [ 62720.881248808415, 89959.92874125874, 81847.3170949434, 113353.71145454547, 45963.404887693934, 134018.1307586208, 72252.4548148148, 47233.56824512543, 69189.87795107042, 45257.96901345292, 71264.69742566509, 139360.14247295228, 39156.75288888888, 50016.638518518535, 80573.82318944839, 16436.266490066213, 46529.0250882353, 121272.7307857143, 73540.64817394386, 70579.01216710889, 95458.97966369026, 47195.29482972139, 41433.817634782645, 11693.305311653117, 63067.066714723864, 63954.87387221697 ] } ], "layout": { "barmode": "group" } }, "text/html": [ "
" ], "text/vnd.plotly.v1+html": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "salaries_gender=salaries[(salaries['Gender']=='male') | (salaries['Gender']=='female')][['GeneralJobTitle', 'TotalPay', 'Gender']].groupby(['Gender', 'GeneralJobTitle']).mean()\n", "\n", "trace1 = go.Bar(\n", " x=job_names,\n", " y=salaries_gender.xs('male', level='Gender')['TotalPay'],\n", " name='Male'\n", ")\n", "trace2 = go.Bar(\n", " x=job_names,\n", " y=salaries_gender.xs('female', level='Gender')['TotalPay'],\n", " name='Female'\n", ")\n", "\n", "data = [trace1, trace2]\n", "layout = go.Layout(\n", " barmode='group'\n", ")\n", "\n", "fig = go.Figure(data=data, layout=layout)\n", "cf.iplot(fig, filename='grouped-bar')\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "I can easily notice that there is evidence of pay discrimination based on gender.
\n", "In only 4 gropus (15%) women earn noticeably more than men.
\n", "In 9 groups (35%) women and men earn more or less the same amount of money.
\n", "In remaining 13 (50%!) groups men earn significantly more than women.
\n", "The biggest difference can be seen in the engineer group." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Looking at the number of people in every job, that earn more than $200k" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "application/vnd.plotly.v1+json": { "data": [ { "marker": { "color": "rgba(255, 153, 51, 0.6)", "line": { "color": "rgba(255, 153, 51, 1.0)", "width": 1 } }, "name": "GeneralJobTitle", "orientation": "v", "text": "", "type": "bar", "x": [ "fire", "police", "medical", "assistant", "manager", "general laborer", "transit", "attorney", "public works", "architect", "automotive" ], "y": [ 638, 470, 322, 74, 58, 46, 44, 14, 6, 2, 1 ] } ], "layout": { "legend": { "bgcolor": "#F5F6F9", "font": { "color": "#4D5663" } }, "paper_bgcolor": "#F5F6F9", "plot_bgcolor": "#F5F6F9", "titlefont": { "color": "#4D5663" }, "xaxis1": { "gridcolor": "#E1E5ED", "showgrid": true, "tickfont": { "color": "#4D5663" }, "title": "", "titlefont": { "color": "#4D5663" }, "zerolinecolor": "#E1E5ED" }, "yaxis1": { "gridcolor": "#E1E5ED", "showgrid": true, "tickfont": { "color": "#4D5663" }, "title": "", "titlefont": { "color": "#4D5663" }, "zerolinecolor": "#E1E5ED" } } }, "text/html": [ "
" ], "text/vnd.plotly.v1+html": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "salaries_more_than_100k=salaries[salaries['TotalPay']>200000]['GeneralJobTitle'].value_counts().drop('other')\n", "salaries_more_than_100k.iplot(kind='bar')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The job groups in which the most people earn supranormal amount of moneyare clearly Firemen, Policemen and Medical." ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.4" } }, "nbformat": 4, "nbformat_minor": 2 }