{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Combine data files with state populations\n", "The first data file has 2000-2010" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "ExecuteTime": { "end_time": "2017-10-30T16:34:12.487554Z", "start_time": "2017-10-30T16:34:10.713622Z" } }, "outputs": [], "source": [ "%matplotlib inline\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "import pandas as pd\n", "import numpy as np\n", "import os\n", "from os.path import join\n", "cwd = os.getcwd()\n", "data_directory = join(cwd, '..', 'Data storage')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Import 2000-2010 data\n", "The sex, origin, race, and age columns are ALL when they have values of 0\n", "\n", "Not clear if these are beginning or end of year values.\n", "\n", "https://www2.census.gov/programs-surveys/popest/datasets/2000-2010/intercensal/state/" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "ExecuteTime": { "end_time": "2017-10-30T16:34:16.891879Z", "start_time": "2017-10-30T16:34:16.674736Z" }, "collapsed": true }, "outputs": [], "source": [ "path = os.path.join(data_directory, 'Population data',\n", " 'st-est00int-alldata.csv')\n", "pop1 = pd.read_csv(path)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "ExecuteTime": { "end_time": "2017-10-30T16:34:18.341992Z", "start_time": "2017-10-30T16:34:18.316636Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
REGIONDIVISIONSTATENAMESEXORIGINRACEAGEGRPESTIMATESBASE2000POPESTIMATE2000...POPESTIMATE2002POPESTIMATE2003POPESTIMATE2004POPESTIMATE2005POPESTIMATE2006POPESTIMATE2007POPESTIMATE2008POPESTIMATE2009CENSUS2010POPPOPESTIMATE2010
0000United States0000281424600282162411...287625193290107933292805298295516599298379912301231207304093966306771529308745538309349689
1000United States00011917615419178293...19429192195924461978588519917400199388832012596220271127202445182020136220200529
2000United States00022054985520463852...19872417196208511945423719389067195446881971461119929602201824992034865720382409
3000United States00032052842520637696...21261421214153532141168021212579210331382084104220706655206605642067719420694011
4000United States00042021878220294955...20610370207971662110255221486214218077092206781622210880221928102204034321959087
\n", "

5 rows × 21 columns

\n", "
" ], "text/plain": [ " REGION DIVISION STATE NAME SEX ORIGIN RACE AGEGRP \\\n", "0 0 0 0 United States 0 0 0 0 \n", "1 0 0 0 United States 0 0 0 1 \n", "2 0 0 0 United States 0 0 0 2 \n", "3 0 0 0 United States 0 0 0 3 \n", "4 0 0 0 United States 0 0 0 4 \n", "\n", " ESTIMATESBASE2000 POPESTIMATE2000 ... POPESTIMATE2002 \\\n", "0 281424600 282162411 ... 287625193 \n", "1 19176154 19178293 ... 19429192 \n", "2 20549855 20463852 ... 19872417 \n", "3 20528425 20637696 ... 21261421 \n", "4 20218782 20294955 ... 20610370 \n", "\n", " POPESTIMATE2003 POPESTIMATE2004 POPESTIMATE2005 POPESTIMATE2006 \\\n", "0 290107933 292805298 295516599 298379912 \n", "1 19592446 19785885 19917400 19938883 \n", "2 19620851 19454237 19389067 19544688 \n", "3 21415353 21411680 21212579 21033138 \n", "4 20797166 21102552 21486214 21807709 \n", "\n", " POPESTIMATE2007 POPESTIMATE2008 POPESTIMATE2009 CENSUS2010POP \\\n", "0 301231207 304093966 306771529 308745538 \n", "1 20125962 20271127 20244518 20201362 \n", "2 19714611 19929602 20182499 20348657 \n", "3 20841042 20706655 20660564 20677194 \n", "4 22067816 22210880 22192810 22040343 \n", "\n", " POPESTIMATE2010 \n", "0 309349689 \n", "1 20200529 \n", "2 20382409 \n", "3 20694011 \n", "4 21959087 \n", "\n", "[5 rows x 21 columns]" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pop1.head()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "ExecuteTime": { "end_time": "2017-10-30T16:34:40.634078Z", "start_time": "2017-10-30T16:34:40.613118Z" }, "collapsed": true }, "outputs": [], "source": [ "pop1 = pop1.loc[(pop1['SEX'] == 0) &\n", " (pop1['ORIGIN'] == 0) &\n", " (pop1['RACE'] == 0) &\n", " (pop1['AGEGRP'] == 0), :]" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "ExecuteTime": { "end_time": "2017-10-30T16:35:15.240643Z", "start_time": "2017-10-30T16:35:15.236029Z" }, "collapsed": true }, "outputs": [], "source": [ "# Column names for population estimate\n", "est_cols = ['POPESTIMATE{}'.format(x) for x in range(2000, 2011)]" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "ExecuteTime": { "end_time": "2017-10-30T16:35:32.342354Z", "start_time": "2017-10-30T16:35:32.332791Z" }, "collapsed": true }, "outputs": [], "source": [ "# Melt the wide-form data into a tidy dataframe\n", "pop1_tidy = pd.melt(pop1, id_vars='NAME',\n", " value_vars=est_cols, var_name='Year',\n", " value_name='Population')" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "ExecuteTime": { "end_time": "2017-10-30T16:35:35.480574Z", "start_time": "2017-10-30T16:35:35.470689Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
NAMEYearPopulation
0United StatesPOPESTIMATE2000282162411
1AlabamaPOPESTIMATE20004452173
2AlaskaPOPESTIMATE2000627963
3ArizonaPOPESTIMATE20005160586
4ArkansasPOPESTIMATE20002678588
\n", "
" ], "text/plain": [ " NAME Year Population\n", "0 United States POPESTIMATE2000 282162411\n", "1 Alabama POPESTIMATE2000 4452173\n", "2 Alaska POPESTIMATE2000 627963\n", "3 Arizona POPESTIMATE2000 5160586\n", "4 Arkansas POPESTIMATE2000 2678588" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pop1_tidy.head()" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "ExecuteTime": { "end_time": "2017-10-30T16:36:04.134970Z", "start_time": "2017-10-30T16:36:04.126266Z" }, "collapsed": true }, "outputs": [], "source": [ "def map_year(x):\n", " 'Return last 4 characters (the year)'\n", " year = x[-4:]\n", " return int(year)\n", "\n", "pop1_tidy['Year'] = pop1_tidy['Year'].map(map_year)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The values shown below are ever slightly different than those listed in the later dataset." ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "ExecuteTime": { "end_time": "2017-10-30T16:36:10.619297Z", "start_time": "2017-10-30T16:36:10.606318Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
NAMEYearPopulation
520United States2010309349689
521Alabama20104785298
522Alaska2010713985
523Arizona20106413737
524Arkansas20102921606
\n", "
" ], "text/plain": [ " NAME Year Population\n", "520 United States 2010 309349689\n", "521 Alabama 2010 4785298\n", "522 Alaska 2010 713985\n", "523 Arizona 2010 6413737\n", "524 Arkansas 2010 2921606" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pop1_tidy.loc[pop1_tidy['Year'] == 2010].head()" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "ExecuteTime": { "end_time": "2017-10-30T16:36:29.041098Z", "start_time": "2017-10-30T16:36:29.031243Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
NAMEYearPopulation
0United States2000282162411
1Alabama20004452173
2Alaska2000627963
3Arizona20005160586
4Arkansas20002678588
\n", "
" ], "text/plain": [ " NAME Year Population\n", "0 United States 2000 282162411\n", "1 Alabama 2000 4452173\n", "2 Alaska 2000 627963\n", "3 Arizona 2000 5160586\n", "4 Arkansas 2000 2678588" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pop1_tidy.head()" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "ExecuteTime": { "end_time": "2017-10-30T16:36:29.457716Z", "start_time": "2017-10-30T16:36:29.447824Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
NAMEYearPopulation
567Virginia20108024617
568Washington20106744496
569West Virginia20101853973
570Wisconsin20105691047
571Wyoming2010564460
\n", "
" ], "text/plain": [ " NAME Year Population\n", "567 Virginia 2010 8024617\n", "568 Washington 2010 6744496\n", "569 West Virginia 2010 1853973\n", "570 Wisconsin 2010 5691047\n", "571 Wyoming 2010 564460" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pop1_tidy.tail()" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "ExecuteTime": { "end_time": "2017-10-30T16:36:29.850270Z", "start_time": "2017-10-30T16:36:29.846689Z" }, "collapsed": true }, "outputs": [], "source": [ "pop1_tidy.columns = ['State', 'Year', 'Population']" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Import 2010-2016 data\n", "https://www.census.gov/data/tables/2016/demo/popest/state-total.html" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "ExecuteTime": { "end_time": "2017-10-30T16:36:43.458638Z", "start_time": "2017-10-30T16:36:43.408276Z" }, "collapsed": true }, "outputs": [], "source": [ "path = os.path.join(data_directory, 'Population data', 'nst-est2016-01.xlsx')\n", "pop2 = pd.read_excel(path, header=3, parse_cols='A, D:J', skip_footer=7)" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "ExecuteTime": { "end_time": "2017-10-30T16:36:45.940900Z", "start_time": "2017-10-30T16:36:45.928511Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
2010201120122013201420152016
United States309348193311663358313998379316204908318563456320896618323127513
Northeast55388056556327665582905955988771561167915618473756209510
Midwest66978602671533316733232067543948677263686783838767941429
South114863114116061801117299171118424320119696311121039206122319574
West72118421728154607353782974247869750239867583428876657000
\n", "
" ], "text/plain": [ " 2010 2011 2012 2013 2014 \\\n", "United States 309348193 311663358 313998379 316204908 318563456 \n", "Northeast 55388056 55632766 55829059 55988771 56116791 \n", "Midwest 66978602 67153331 67332320 67543948 67726368 \n", "South 114863114 116061801 117299171 118424320 119696311 \n", "West 72118421 72815460 73537829 74247869 75023986 \n", "\n", " 2015 2016 \n", "United States 320896618 323127513 \n", "Northeast 56184737 56209510 \n", "Midwest 67838387 67941429 \n", "South 121039206 122319574 \n", "West 75834288 76657000 " ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pop2.head()" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "ExecuteTime": { "end_time": "2017-10-30T16:36:55.381981Z", "start_time": "2017-10-30T16:36:55.369945Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
2010201120122013201420152016
.Virginia8025773811003581920488262692831737283675878411808
.Washington6743226682252068952266968006705419671602907288000
.West Virginia1854230185497218565601853231184851418410531831102
.Wisconsin5690263570964057261775742854575837757678915778708
.Wyoming564513567725576765582684583642586555585501
\n", "
" ], "text/plain": [ " 2010 2011 2012 2013 2014 2015 2016\n", ".Virginia 8025773 8110035 8192048 8262692 8317372 8367587 8411808\n", ".Washington 6743226 6822520 6895226 6968006 7054196 7160290 7288000\n", ".West Virginia 1854230 1854972 1856560 1853231 1848514 1841053 1831102\n", ".Wisconsin 5690263 5709640 5726177 5742854 5758377 5767891 5778708\n", ".Wyoming 564513 567725 576765 582684 583642 586555 585501" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pop2.tail()" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "ExecuteTime": { "end_time": "2017-10-30T16:37:04.068511Z", "start_time": "2017-10-30T16:37:04.063956Z" }, "collapsed": true }, "outputs": [], "source": [ "drop_rows = ['Northeast', 'Midwest', 'South', 'West']\n", "pop2.drop(drop_rows, inplace=True)" ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "ExecuteTime": { "end_time": "2017-10-30T16:37:41.861566Z", "start_time": "2017-10-30T16:37:41.857935Z" } }, "outputs": [], "source": [ "pop2.index = pop2.index.str.strip('.')" ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "ExecuteTime": { "end_time": "2017-10-30T16:37:42.857641Z", "start_time": "2017-10-30T16:37:42.844396Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
2010201120122013201420152016
United States309348193311663358313998379316204908318563456320896618323127513
Alabama4785492479991848159604829479484321448538754863300
Alaska714031722713731089736879736705737709741894
Arizona6408312646716365496346624617671999368175656931071
Arkansas2921995293949329506852958663296691229778532988248
\n", "
" ], "text/plain": [ " 2010 2011 2012 2013 2014 \\\n", "United States 309348193 311663358 313998379 316204908 318563456 \n", "Alabama 4785492 4799918 4815960 4829479 4843214 \n", "Alaska 714031 722713 731089 736879 736705 \n", "Arizona 6408312 6467163 6549634 6624617 6719993 \n", "Arkansas 2921995 2939493 2950685 2958663 2966912 \n", "\n", " 2015 2016 \n", "United States 320896618 323127513 \n", "Alabama 4853875 4863300 \n", "Alaska 737709 741894 \n", "Arizona 6817565 6931071 \n", "Arkansas 2977853 2988248 " ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pop2.head()" ] }, { "cell_type": "code", "execution_count": 21, "metadata": { "ExecuteTime": { "end_time": "2017-10-30T16:37:49.158398Z", "start_time": "2017-10-30T16:37:49.152225Z" } }, "outputs": [ { "data": { "text/plain": [ "Int64Index([2010, 2011, 2012, 2013, 2014, 2015, 2016], dtype='int64')" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pop2.columns" ] }, { "cell_type": "code", "execution_count": 22, "metadata": { "ExecuteTime": { "end_time": "2017-10-30T16:37:54.388262Z", "start_time": "2017-10-30T16:37:54.377205Z" }, "collapsed": true }, "outputs": [], "source": [ "pop2_tidy = pd.melt(pop2.reset_index(), id_vars='index',\n", " value_vars=range(2010, 2017), value_name='Population',\n", " var_name='Year')\n", "pop2_tidy.columns = ['State', 'Year', 'Population']" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Combine data" ] }, { "cell_type": "code", "execution_count": 23, "metadata": { "ExecuteTime": { "end_time": "2017-10-30T16:37:58.078817Z", "start_time": "2017-10-30T16:37:58.073653Z" }, "collapsed": true }, "outputs": [], "source": [ "pop_total = pd.concat([pop1_tidy, pop2_tidy])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The overlapping 2010 values are different, but just barely. I'm going to re-combine the datasets and keep values from the second dataset." ] }, { "cell_type": "code", "execution_count": 24, "metadata": { "ExecuteTime": { "end_time": "2017-10-30T16:38:17.265430Z", "start_time": "2017-10-30T16:38:17.239274Z" }, "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
StateYearPopulation
1Alabama20104785492
521Alabama20104785298
522Alaska2010713985
2Alaska2010714031
523Arizona20106413737
3Arizona20106408312
524Arkansas20102921606
4Arkansas20102921995
525California201037349363
5California201037332685
526Colorado20105049071
6Colorado20105048644
527Connecticut20103577073
7Connecticut20103579899
528Delaware2010899769
8Delaware2010899816
529District of Columbia2010604453
9District of Columbia2010605183
530Florida201018843326
10Florida201018849098
11Georgia20109713521
531Georgia20109712587
532Hawaii20101363621
12Hawaii20101363945
13Idaho20101571010
533Idaho20101571450
14Illinois201012841578
534Illinois201012843166
15Indiana20106490528
535Indiana20106490621
............
558Oregon20103838957
38Oregon20103838048
39Pennsylvania201012712343
559Pennsylvania201012709630
40Rhode Island20101053337
560Rhode Island20101052886
41South Carolina20104635943
561South Carolina20104636312
42South Dakota2010816325
562South Dakota2010816463
43Tennessee20106356671
563Tennessee20106356897
564Texas201025257114
44Texas201025244310
0United States2010309348193
520United States2010309349689
45Utah20102775326
565Utah20102776469
566Vermont2010625960
46Vermont2010625982
47Virginia20108025773
567Virginia20108024617
48Washington20106743226
568Washington20106744496
49West Virginia20101854230
569West Virginia20101853973
50Wisconsin20105690263
570Wisconsin20105691047
571Wyoming2010564460
51Wyoming2010564513
\n", "

104 rows × 3 columns

\n", "
" ], "text/plain": [ " State Year Population\n", "1 Alabama 2010 4785492\n", "521 Alabama 2010 4785298\n", "522 Alaska 2010 713985\n", "2 Alaska 2010 714031\n", "523 Arizona 2010 6413737\n", "3 Arizona 2010 6408312\n", "524 Arkansas 2010 2921606\n", "4 Arkansas 2010 2921995\n", "525 California 2010 37349363\n", "5 California 2010 37332685\n", "526 Colorado 2010 5049071\n", "6 Colorado 2010 5048644\n", "527 Connecticut 2010 3577073\n", "7 Connecticut 2010 3579899\n", "528 Delaware 2010 899769\n", "8 Delaware 2010 899816\n", "529 District of Columbia 2010 604453\n", "9 District of Columbia 2010 605183\n", "530 Florida 2010 18843326\n", "10 Florida 2010 18849098\n", "11 Georgia 2010 9713521\n", "531 Georgia 2010 9712587\n", "532 Hawaii 2010 1363621\n", "12 Hawaii 2010 1363945\n", "13 Idaho 2010 1571010\n", "533 Idaho 2010 1571450\n", "14 Illinois 2010 12841578\n", "534 Illinois 2010 12843166\n", "15 Indiana 2010 6490528\n", "535 Indiana 2010 6490621\n", ".. ... ... ...\n", "558 Oregon 2010 3838957\n", "38 Oregon 2010 3838048\n", "39 Pennsylvania 2010 12712343\n", "559 Pennsylvania 2010 12709630\n", "40 Rhode Island 2010 1053337\n", "560 Rhode Island 2010 1052886\n", "41 South Carolina 2010 4635943\n", "561 South Carolina 2010 4636312\n", "42 South Dakota 2010 816325\n", "562 South Dakota 2010 816463\n", "43 Tennessee 2010 6356671\n", "563 Tennessee 2010 6356897\n", "564 Texas 2010 25257114\n", "44 Texas 2010 25244310\n", "0 United States 2010 309348193\n", "520 United States 2010 309349689\n", "45 Utah 2010 2775326\n", "565 Utah 2010 2776469\n", "566 Vermont 2010 625960\n", "46 Vermont 2010 625982\n", "47 Virginia 2010 8025773\n", "567 Virginia 2010 8024617\n", "48 Washington 2010 6743226\n", "568 Washington 2010 6744496\n", "49 West Virginia 2010 1854230\n", "569 West Virginia 2010 1853973\n", "50 Wisconsin 2010 5690263\n", "570 Wisconsin 2010 5691047\n", "571 Wyoming 2010 564460\n", "51 Wyoming 2010 564513\n", "\n", "[104 rows x 3 columns]" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pop_total.loc[pop_total['Year']==2010].sort_values('State')" ] }, { "cell_type": "code", "execution_count": 25, "metadata": { "ExecuteTime": { "end_time": "2017-10-30T16:38:23.689337Z", "start_time": "2017-10-30T16:38:23.682891Z" }, "collapsed": true }, "outputs": [], "source": [ "pop_total = pd.concat([pop1_tidy.loc[pop1_tidy['Year'] < 2010], pop2_tidy])" ] }, { "cell_type": "code", "execution_count": 26, "metadata": { "ExecuteTime": { "end_time": "2017-10-30T16:38:26.926979Z", "start_time": "2017-10-30T16:38:26.915337Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
StateYearPopulation
0United States2000282162411
1Alabama20004452173
2Alaska2000627963
3Arizona20005160586
4Arkansas20002678588
\n", "
" ], "text/plain": [ " State Year Population\n", "0 United States 2000 282162411\n", "1 Alabama 2000 4452173\n", "2 Alaska 2000 627963\n", "3 Arizona 2000 5160586\n", "4 Arkansas 2000 2678588" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pop_total.head()" ] }, { "cell_type": "code", "execution_count": 27, "metadata": { "ExecuteTime": { "end_time": "2017-10-30T16:38:27.256796Z", "start_time": "2017-10-30T16:38:27.242016Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
StateYearPopulation
359Virginia20168411808
360Washington20167288000
361West Virginia20161831102
362Wisconsin20165778708
363Wyoming2016585501
\n", "
" ], "text/plain": [ " State Year Population\n", "359 Virginia 2016 8411808\n", "360 Washington 2016 7288000\n", "361 West Virginia 2016 1831102\n", "362 Wisconsin 2016 5778708\n", "363 Wyoming 2016 585501" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pop_total.tail()" ] }, { "cell_type": "code", "execution_count": 81, "metadata": { "ExecuteTime": { "end_time": "2017-08-03T17:56:17.320409Z", "start_time": "2017-08-03T17:56:17.312010Z" }, "collapsed": true }, "outputs": [], "source": [ "path = os.path.join('Data storage', 'Derived data', 'State population.csv')\n", "pop_total.to_csv(path, index=False)" ] } ], "metadata": { "kernelspec": { "display_name": "py36", "language": "python", "name": "py36" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.2" }, "varInspector": { "cols": { "lenName": 16, "lenType": 16, "lenVar": 40 }, "kernels_config": { "python": { "delete_cmd_postfix": "", "delete_cmd_prefix": "del ", "library": "var_list.py", "varRefreshCmd": "print(var_dic_list())" }, "r": { "delete_cmd_postfix": ") ", "delete_cmd_prefix": "rm(", "library": "var_list.r", "varRefreshCmd": "cat(var_dic_list()) " } }, "types_to_exclude": [ "module", "function", "builtin_function_or_method", "instance", "_Feature" ], "window_display": false } }, "nbformat": 4, "nbformat_minor": 2 }