{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "%load_ext autoreload\n", "%autoreload 2\n", "from synthpop.census_helpers import Census\n", "from synthpop import categorizer as cat\n", "import pandas as pd\n", "import numpy as np\n", "import os\n", "pd.set_option('display.max_columns', 500)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## The census api needs a key - you can register for can sign up" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### http://api.census.gov/data/key_signup.html" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "c = Census(os.environ[\"CENSUS\"])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Here we get aggregate information on households from ACS - note some variables are associated with block groups and others with tracts" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
B11001_001EB11001_002EB19001_001EB19001_002EB19001_003EB19001_004EB19001_005EB19001_006EB19001_007EB19001_008EB19001_009EB19001_010EB19001_011EB19001_012EB19001_013EB19001_014EB19001_015EB19001_016EB19001_017ENAMEblock groupcountystatetractB08201_001EB08201_002EB08201_003EB08201_004EB08201_005EB08201_006EB08202_001EB08202_002EB08202_003EB08202_004EB08202_005E
03002053007010183405130209274443574Block Group 1, Census Tract 306, San Francisco...1075060306003009911524243006811910110
127319427300013001560063514366484Block Group 2, Census Tract 306, San Francisco...20750603060027398313838327362109929
2305240305001054000061211241839176Block Group 3, Census Tract 306, San Francisco...30750603060030510931544343056912110210
\n", "
" ], "text/plain": [ " B11001_001E B11001_002E B19001_001E B19001_002E B19001_003E \\\n", "0 300 205 300 7 0 \n", "1 273 194 273 0 0 \n", "2 305 240 305 0 0 \n", "\n", " B19001_004E B19001_005E B19001_006E B19001_007E B19001_008E \\\n", "0 10 18 34 0 5 \n", "1 0 13 0 0 15 \n", "2 10 5 4 0 0 \n", "\n", " B19001_009E B19001_010E B19001_011E B19001_012E B19001_013E \\\n", "0 13 0 20 9 27 \n", "1 6 0 0 6 35 \n", "2 0 0 6 12 11 \n", "\n", " B19001_014E B19001_015E B19001_016E B19001_017E \\\n", "0 44 4 35 74 \n", "1 14 36 64 84 \n", "2 24 18 39 176 \n", "\n", " NAME block group county state \\\n", "0 Block Group 1, Census Tract 306, San Francisco... 1 075 06 \n", "1 Block Group 2, Census Tract 306, San Francisco... 2 075 06 \n", "2 Block Group 3, Census Tract 306, San Francisco... 3 075 06 \n", "\n", " tract B08201_001E B08201_002E B08201_003E B08201_004E B08201_005E \\\n", "0 030600 300 9 91 152 42 \n", "1 030600 273 9 83 138 38 \n", "2 030600 305 10 93 154 43 \n", "\n", " B08201_006E B08202_001E B08202_002E B08202_003E B08202_004E \\\n", "0 4 300 68 119 101 \n", "1 3 273 62 109 92 \n", "2 4 305 69 121 102 \n", "\n", " B08202_005E \n", "0 10 \n", "1 9 \n", "2 10 " ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "income_columns = ['B19001_0%02dE'%i for i in range(1, 18)]\n", "vehicle_columns = ['B08201_0%02dE'%i for i in range(1, 7)]\n", "workers_columns = ['B08202_0%02dE'%i for i in range(1, 6)]\n", "families_columns = ['B11001_001E', 'B11001_002E']\n", "block_group_columns = income_columns + families_columns\n", "tract_columns = vehicle_columns + workers_columns\n", "h_acs = c.block_group_and_tract_query(block_group_columns,\n", " tract_columns, \"06\", \"075\", \n", " merge_columns=['tract', 'county', 'state'],\n", " block_group_size_attr=\"B11001_001E\",\n", " tract_size_attr=\"B08201_001E\",\n", " tract=\"030600\")\n", "h_acs" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## And here is aggregate information on people from ACS " ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
B01001_001EB01001_002EB01001_003EB01001_004EB01001_005EB01001_006EB01001_007EB01001_008EB01001_009EB01001_010EB01001_011EB01001_012EB01001_013EB01001_014EB01001_015EB01001_016EB01001_017EB01001_018EB01001_019EB01001_020EB01001_021EB01001_022EB01001_023EB01001_024EB01001_025EB01001_026EB01001_027EB01001_028EB01001_029EB01001_030EB01001_031EB01001_032EB01001_033EB01001_034EB01001_035EB02001_001EB02001_002EB02001_003EB02001_004EB02001_005EB02001_006EB02001_007EB02001_008EB02001_009EB02001_010ENAMEblock groupcountystatetractB01001_036EB01001_037EB01001_038EB01001_039EB01001_040EB01001_041EB01001_042EB01001_043EB01001_044EB01001_045EB01001_046EB01001_047EB01001_048EB01001_049E
0789392564024800026271241318351310201929112700397020163409111021789367902610101511932Block Group 1, Census Tract 306, San Francisco...10750603060016346153429142015302092311
1696342181660500003435526712231700712225113541228346000896964470017902446046Block Group 2, Census Tract 306, San Francisco...207506030600313921182026261361323966
2939518705118306068195252841547068273459164214733191700011139395321203100085085Block Group 3, Census Tract 306, San Francisco...30750603060003646243949100012961832
\n", "
" ], "text/plain": [ " B01001_001E B01001_002E B01001_003E B01001_004E B01001_005E \\\n", "0 789 392 56 40 24 \n", "1 696 342 18 16 6 \n", "2 939 518 70 51 18 \n", "\n", " B01001_006E B01001_007E B01001_008E B01001_009E B01001_010E \\\n", "0 8 0 0 0 26 \n", "1 0 5 0 0 0 \n", "2 30 6 0 6 8 \n", "\n", " B01001_011E B01001_012E B01001_013E B01001_014E B01001_015E \\\n", "0 27 12 4 13 18 \n", "1 0 34 35 52 67 \n", "2 19 5 25 28 41 \n", "\n", " B01001_016E B01001_017E B01001_018E B01001_019E B01001_020E \\\n", "0 35 13 10 20 19 \n", "1 12 23 17 0 0 \n", "2 54 70 6 8 2 \n", "\n", " B01001_021E B01001_022E B01001_023E B01001_024E B01001_025E \\\n", "0 29 11 27 0 0 \n", "1 7 12 22 5 11 \n", "2 7 34 5 9 16 \n", "\n", " B01001_026E B01001_027E B01001_028E B01001_029E B01001_030E \\\n", "0 397 0 20 16 34 \n", "1 354 12 28 34 6 \n", "2 421 47 33 19 17 \n", "\n", " B01001_031E B01001_032E B01001_033E B01001_034E B01001_035E \\\n", "0 0 9 11 10 21 \n", "1 0 0 0 8 9 \n", "2 0 0 0 11 13 \n", "\n", " B02001_001E B02001_002E B02001_003E B02001_004E B02001_005E \\\n", "0 789 367 9 0 261 \n", "1 696 447 0 0 179 \n", "2 939 532 12 0 310 \n", "\n", " B02001_006E B02001_007E B02001_008E B02001_009E B02001_010E \\\n", "0 0 101 51 19 32 \n", "1 0 24 46 0 46 \n", "2 0 0 85 0 85 \n", "\n", " NAME block group county state \\\n", "0 Block Group 1, Census Tract 306, San Francisco... 1 075 06 \n", "1 Block Group 2, Census Tract 306, San Francisco... 2 075 06 \n", "2 Block Group 3, Census Tract 306, San Francisco... 3 075 06 \n", "\n", " tract B01001_036E B01001_037E B01001_038E B01001_039E B01001_040E \\\n", "0 030600 16 34 6 15 34 \n", "1 030600 31 39 21 18 20 \n", "2 030600 0 36 46 24 39 \n", "\n", " B01001_041E B01001_042E B01001_043E B01001_044E B01001_045E \\\n", "0 29 14 20 15 30 \n", "1 26 26 13 6 13 \n", "2 49 10 0 0 12 \n", "\n", " B01001_046E B01001_047E B01001_048E B01001_049E \n", "0 20 9 23 11 \n", "1 23 9 6 6 \n", "2 9 6 18 32 " ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "population = ['B01001_001E']\n", "sex = ['B01001_002E', 'B01001_026E']\n", "race = ['B02001_0%02dE'%i for i in range(1,11)]\n", "male_age_columns = ['B01001_0%02dE'%i for i in range(3,26)]\n", "female_age_columns = ['B01001_0%02dE'%i for i in range(27,50)]\n", "all_columns = population + sex + race + male_age_columns + female_age_columns\n", "p_acs = c.block_group_query(all_columns, \"06\", \"075\", tract=\"030600\")\n", "p_acs" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Get the puma for our test tracts - this actually downloads the mapping file from the census website so it might take a few seconds" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "('07506', '02206')" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "puma = c.tract_to_puma(\"06\", \"075\", \"030600\")\n", "puma" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "puma10 = puma[0]\n", "puma00 = puma[1]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Download PUMS for people records for a PUMA from our server (we processed the large files into smaller ones for you)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\Juan\\Anaconda3\\envs\\synpop_py3\\lib\\site-packages\\IPython\\core\\interactiveshell.py:2910: DtypeWarning: Columns (108) have mixed types. Specify dtype option on import or set low_memory=False.\n", " exec(code_obj, self.user_global_ns, self.user_ns)\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
serialnoRTSPORDERpuma00puma10STADJINCPWGTPAGEPCITCITWP05CITWP12COWDDRSDEARDEYEDOUTDPHYDRATDRATXDREMENGFERGCLGCMGCRHINS1HINS2HINS3HINS4HINS5HINS6HINS7INTPJWMNPJWRIPJWTRLANXMARMARHDMARHMMARHTMARHWMARHYP05MARHYP12MIGMILMLPAMLPBMLPCDMLPEMLPFGMLPHMLPIMLPJMLPKNWABNWAVNWLANWLKNWREOIPPAPRELPRETPSCHSCHGSCHLSEMPSEXSSIPSSPWAGPWKHPWKLWKWWRKYOEP05YOEP12ANCANC1P05ANC1P12ANC2P05ANC2P12DECADEDISDRIVESPESPESRFOD1PFOD2PHICOVHISPINDPJWAPJWDPLANP05LANP12MIGPUMA00MIGPUMA10MIGSP05MIGSP12MSPNAICSPNATIVITYNOPOCOCCP02OCCP10OCCP12PAOCPERNPPINCPPOBP05POBP12POVPIPPOWPUMA00POWPUMA10POWSP05POWSP12PRIVCOVPUBCOVQTRBIRRAC1PRAC2P05RAC2P12RAC3P05RAC3P12RACAIANRACASNRACBLKRACNHPIRACNUMRACSORRACWHTRCSCIENGPSCIENGRLPSFNSFRSOCP00SOCP10SOCP12VPSWAOBFAGEPFANCPFCITPFCITWPFCOWPFDDRSPFDEARPFDEYEPFDOUTPFDPHYPFDRATPFDRATXPFDREMPFENGPFESRPFFERPFFODPFGCLPFGCMPFGCRPFHINS1PFHINS2PFHINS3CFHINS3PFHINS4CFHINS4PFHINS5CFHINS5PFHINS6PFHINS7PFHISPFINDPFINTPFJWDPFJWMNPFJWRIPFJWTRPFLANPFLANXPFMARHDPFMARHMPFMARHTPFMARHWPFMARHYPFMARPFMIGPFMIGSPFMILPPFMILSPFOCCPFOIPFPAPFPOBPFPOWSPFRACPFRELPFRETPFSCHGPFSCHLPFSCHPFSEMPFSEXPFSSIPFSSPFWAGPFWKHPFWKLPFWKWPFWRKPFYOEPPWGTP1PWGTP2PWGTP3PWGTP4PWGTP5PWGTP6PWGTP7PWGTP8PWGTP9PWGTP10PWGTP11PWGTP12PWGTP13PWGTP14PWGTP15PWGTP16PWGTP17PWGTP18PWGTP19PWGTP20PWGTP21PWGTP22PWGTP23PWGTP24PWGTP25PWGTP26PWGTP27PWGTP28PWGTP29PWGTP30PWGTP31PWGTP32PWGTP33PWGTP34PWGTP35PWGTP36PWGTP37PWGTP38PWGTP39PWGTP40PWGTP41PWGTP42PWGTP43PWGTP44PWGTP45PWGTP46PWGTP47PWGTP48PWGTP49PWGTP50PWGTP51PWGTP52PWGTP53PWGTP54PWGTP55PWGTP56PWGTP57PWGTP58PWGTP59PWGTP60PWGTP61PWGTP62PWGTP63PWGTP64PWGTP65PWGTP66PWGTP67PWGTP68PWGTP69PWGTP70PWGTP71PWGTP72PWGTP73PWGTP74PWGTP75PWGTP76PWGTP77PWGTP78PWGTP79PWGTP80
02012000002680P1-975066102488721641NaNNaN2.02.0222.02.0NaNNaN2.0NaNNaN2.0NaNNaN12222220.010.0NaN10.02.05NaNNaNNaNNaNNaNNaN1.04.0NaNNaNNaNNaNNaNNaNNaNNaNNaN3.05.02.02.03.00.00.000.01.0NaN21.00.020.00.059000.024.01.01.01.0NaNNaN3-9995-9999NaN2NaNNaN1.03301.0NaN117870.084.043.0NaNNaNNaNNaNNaNNaN6.0611M11NaN0N.A.N.A.58604.059000.059000.0-936500.0-9.07500.0-9.06.01241-91-91000010102.02.0NaNNaNN.A.//N.A.//439061NaN10000000000000000000000NaN0NaN0NaN000000000000000000000000010000000000001010.0022182372523383065402073335232262221202023371819764637721367616173619191919203919207737387203277222133201820232361921404357402063539222662021
12012000009189P1-975066102488714524-9.01995.0NaN2.0212.02.0NaNNaN2.03.0NaN2.0NaNNaN12222220.0NaNNaNNaN1.012.02.01.02.0-9.01992.01.04.0NaNNaNNaNNaNNaNNaNNaNNaNNaN3.05.03.03.03.010000.00.000.01.0NaN16.00.010.00.00.0NaN3.0NaNNaN-9.01989.04-9999-99995.01NaNNaN6.0NaNNaN11NaNNaNNaN-9.0708.0NaNNaNNaNNaN1.0NaN2NaN0NaNNaNNaNNaN0.010000.0-920743.0NaNNaNNaNNaN1236-943-9501001000NaNNaNNaNNaNNaNNaNNaNNaN40000010011001010000000NaN0NaN0NaN000101000000111110000101100001000101110100.0015526186101151541517172011211612282614284431151327192516141441231512431542427514124163171414261027171530201928542214112614211016194145131454
22012000009189P2-975066102488718454-9.01998.03.02.0212.02.0NaNNaN2.03.02.02.0NaNNaN12222220.0NaNNaNNaN1.012.02.01.02.0-9.01992.01.04.0NaNNaNNaNNaNNaNNaNNaNNaNNaN3.05.03.03.03.00.00.010.01.0NaN16.00.020.00.00.0NaN2.0NaNNaN-9.01992.04-9999-99996.01NaNNaN6.0NaNNaN118370.0NaNNaN-9.0708.0NaNNaNNaNNaN1.062412NaN0N.A.N.A.46102.00.00.0-920743.0NaNNaNNaNNaN1226-943-9501001000NaNNaNNaNNaNN.A.//N.A.//399021NaN40010110011001111000000NaN0NaN0NaN000111000000111110000111100001000101110100.0018536307201962371517303016282019283418327538191933183418161951851720552372530519216187211920261831182330311937663623182817321521226244152156
32012000009189P3-975066102488714101NaNNaNNaN2.022NaN2.0NaNNaN2.0NaNNaNNaNNaNNaN2221222NaNNaNNaNNaN2.05NaNNaNNaNNaNNaNNaN1.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN2NaN1.0NaN7.0NaN1NaNNaNNaNNaNNaNNaNNaNNaNNaN4-9999-9999NaN2NaN4.0NaNNaNNaN11NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN14.01NaNNaNNaNNaNNaNNaN-9643.0NaNNaNNaNNaN2146-943-9501001001NaNNaNNaNNaNNaNNaNNaNNaN10000011101001000000000NaN00.00NaN000100000000000000100000000100000000000000.0015322214131541751112172416231414242517225525141424112713141341541613451452826420133144171313241221141522221623442315152314281512175125131554
42012000009189P4-97506610248871481NaNNaNNaN2.022NaN2.0NaNNaN2.0NaNNaNNaNNaNNaN2221222NaNNaNNaNNaN2.05NaNNaNNaNNaNNaNNaN1.0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN2NaN2.05.06.0NaN1NaNNaNNaNNaNNaNNaNNaNNaNNaN4-9999-9999NaN2NaN4.0NaNNaNNaN11NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN14.01NaNNaNNaNNaNNaNNaN-9643.0NaNNaNNaNNaN2116-943-9501001001NaNNaNNaNNaNNaNNaNNaNNaN10000011101001000000011NaN10.01NaN111100000000000000100000000100101000000000.0015422214121651751112172416221514242517224525131425122713141341441713441452826319133154161313241222141621221523432215152314281512175134131543
\n", "
" ], "text/plain": [ " serialno RT SPORDER puma00 puma10 ST ADJINC PWGTP AGEP CIT \\\n", "0 2012000002680 P 1 -9 7506 6 1024887 21 64 1 \n", "1 2012000009189 P 1 -9 7506 6 1024887 14 52 4 \n", "2 2012000009189 P 2 -9 7506 6 1024887 18 45 4 \n", "3 2012000009189 P 3 -9 7506 6 1024887 14 10 1 \n", "4 2012000009189 P 4 -9 7506 6 1024887 14 8 1 \n", "\n", " CITWP05 CITWP12 COW DDRS DEAR DEYE DOUT DPHY DRAT DRATX DREM \\\n", "0 NaN NaN 2.0 2.0 2 2 2.0 2.0 NaN NaN 2.0 \n", "1 -9.0 1995.0 NaN 2.0 2 1 2.0 2.0 NaN NaN 2.0 \n", "2 -9.0 1998.0 3.0 2.0 2 1 2.0 2.0 NaN NaN 2.0 \n", "3 NaN NaN NaN 2.0 2 2 NaN 2.0 NaN NaN 2.0 \n", "4 NaN NaN NaN 2.0 2 2 NaN 2.0 NaN NaN 2.0 \n", "\n", " ENG FER GCL GCM GCR HINS1 HINS2 HINS3 HINS4 HINS5 HINS6 HINS7 \\\n", "0 NaN NaN 2.0 NaN NaN 1 2 2 2 2 2 2 \n", "1 3.0 NaN 2.0 NaN NaN 1 2 2 2 2 2 2 \n", "2 3.0 2.0 2.0 NaN NaN 1 2 2 2 2 2 2 \n", "3 NaN NaN NaN NaN NaN 2 2 2 1 2 2 2 \n", "4 NaN NaN NaN NaN NaN 2 2 2 1 2 2 2 \n", "\n", " INTP JWMNP JWRIP JWTR LANX MAR MARHD MARHM MARHT MARHW MARHYP05 \\\n", "0 0.0 10.0 NaN 10.0 2.0 5 NaN NaN NaN NaN NaN \n", "1 0.0 NaN NaN NaN 1.0 1 2.0 2.0 1.0 2.0 -9.0 \n", "2 0.0 NaN NaN NaN 1.0 1 2.0 2.0 1.0 2.0 -9.0 \n", "3 NaN NaN NaN NaN 2.0 5 NaN NaN NaN NaN NaN \n", "4 NaN NaN NaN NaN 2.0 5 NaN NaN NaN NaN NaN \n", "\n", " MARHYP12 MIG MIL MLPA MLPB MLPCD MLPE MLPFG MLPH MLPI MLPJ MLPK \\\n", "0 NaN 1.0 4.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN \n", "1 1992.0 1.0 4.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN \n", "2 1992.0 1.0 4.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN \n", "3 NaN 1.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN \n", "4 NaN 1.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN \n", "\n", " NWAB NWAV NWLA NWLK NWRE OIP PAP RELP RETP SCH SCHG SCHL \\\n", "0 3.0 5.0 2.0 2.0 3.0 0.0 0.0 0 0.0 1.0 NaN 21.0 \n", "1 3.0 5.0 3.0 3.0 3.0 10000.0 0.0 0 0.0 1.0 NaN 16.0 \n", "2 3.0 5.0 3.0 3.0 3.0 0.0 0.0 1 0.0 1.0 NaN 16.0 \n", "3 NaN NaN NaN NaN NaN NaN NaN 2 NaN 1.0 NaN 7.0 \n", "4 NaN NaN NaN NaN NaN NaN NaN 2 NaN 2.0 5.0 6.0 \n", "\n", " SEMP SEX SSIP SSP WAGP WKHP WKL WKW WRK YOEP05 YOEP12 ANC \\\n", "0 0.0 2 0.0 0.0 59000.0 24.0 1.0 1.0 1.0 NaN NaN 3 \n", "1 0.0 1 0.0 0.0 0.0 NaN 3.0 NaN NaN -9.0 1989.0 4 \n", "2 0.0 2 0.0 0.0 0.0 NaN 2.0 NaN NaN -9.0 1992.0 4 \n", "3 NaN 1 NaN NaN NaN NaN NaN NaN NaN NaN NaN 4 \n", "4 NaN 1 NaN NaN NaN NaN NaN NaN NaN NaN NaN 4 \n", "\n", " ANC1P05 ANC1P12 ANC2P05 ANC2P12 DECADE DIS DRIVESP ESP ESR FOD1P \\\n", "0 -9 995 -9 999 NaN 2 NaN NaN 1.0 3301.0 \n", "1 -9 999 -9 999 5.0 1 NaN NaN 6.0 NaN \n", "2 -9 999 -9 999 6.0 1 NaN NaN 6.0 NaN \n", "3 -9 999 -9 999 NaN 2 NaN 4.0 NaN NaN \n", "4 -9 999 -9 999 NaN 2 NaN 4.0 NaN NaN \n", "\n", " FOD2P HICOV HISP INDP JWAP JWDP LANP05 LANP12 MIGPUMA00 \\\n", "0 NaN 1 1 7870.0 84.0 43.0 NaN NaN NaN \n", "1 NaN 1 1 NaN NaN NaN -9.0 708.0 NaN \n", "2 NaN 1 1 8370.0 NaN NaN -9.0 708.0 NaN \n", "3 NaN 1 1 NaN NaN NaN NaN NaN NaN \n", "4 NaN 1 1 NaN NaN NaN NaN NaN NaN \n", "\n", " MIGPUMA10 MIGSP05 MIGSP12 MSP NAICSP NATIVITY NOP OC OCCP02 OCCP10 \\\n", "0 NaN NaN NaN 6.0 611M1 1 NaN 0 N.A. N.A. \n", "1 NaN NaN NaN 1.0 NaN 2 NaN 0 NaN NaN \n", "2 NaN NaN NaN 1.0 6241 2 NaN 0 N.A. N.A. \n", "3 NaN NaN NaN NaN NaN 1 4.0 1 NaN NaN \n", "4 NaN NaN NaN NaN NaN 1 4.0 1 NaN NaN \n", "\n", " OCCP12 PAOC PERNP PINCP POBP05 POBP12 POVPIP POWPUMA00 \\\n", "0 5860 4.0 59000.0 59000.0 -9 36 500.0 -9.0 \n", "1 NaN NaN 0.0 10000.0 -9 207 43.0 NaN \n", "2 4610 2.0 0.0 0.0 -9 207 43.0 NaN \n", "3 NaN NaN NaN NaN -9 6 43.0 NaN \n", "4 NaN NaN NaN NaN -9 6 43.0 NaN \n", "\n", " POWPUMA10 POWSP05 POWSP12 PRIVCOV PUBCOV QTRBIR RAC1P RAC2P05 \\\n", "0 7500.0 -9.0 6.0 1 2 4 1 -9 \n", "1 NaN NaN NaN 1 2 3 6 -9 \n", "2 NaN NaN NaN 1 2 2 6 -9 \n", "3 NaN NaN NaN 2 1 4 6 -9 \n", "4 NaN NaN NaN 2 1 1 6 -9 \n", "\n", " RAC2P12 RAC3P05 RAC3P12 RACAIAN RACASN RACBLK RACNHPI RACNUM \\\n", "0 1 -9 1 0 0 0 0 1 \n", "1 43 -9 5 0 1 0 0 1 \n", "2 43 -9 5 0 1 0 0 1 \n", "3 43 -9 5 0 1 0 0 1 \n", "4 43 -9 5 0 1 0 0 1 \n", "\n", " RACSOR RACWHT RC SCIENGP SCIENGRLP SFN SFR SOCP00 SOCP10 SOCP12 \\\n", "0 0 1 0 2.0 2.0 NaN NaN N.A.// N.A.// 439061 \n", "1 0 0 0 NaN NaN NaN NaN NaN NaN NaN \n", "2 0 0 0 NaN NaN NaN NaN N.A.// N.A.// 399021 \n", "3 0 0 1 NaN NaN NaN NaN NaN NaN NaN \n", "4 0 0 1 NaN NaN NaN NaN NaN NaN NaN \n", "\n", " VPS WAOB FAGEP FANCP FCITP FCITWP FCOWP FDDRSP FDEARP FDEYEP \\\n", "0 NaN 1 0 0 0 0 0 0 0 0 \n", "1 NaN 4 0 0 0 0 0 1 0 0 \n", "2 NaN 4 0 0 1 0 1 1 0 0 \n", "3 NaN 1 0 0 0 0 0 1 1 1 \n", "4 NaN 1 0 0 0 0 0 1 1 1 \n", "\n", " FDOUTP FDPHYP FDRATP FDRATXP FDREMP FENGP FESRP FFERP FFODP FGCLP \\\n", "0 0 0 0 0 0 0 0 0 0 0 \n", "1 1 1 0 0 1 0 1 0 0 0 \n", "2 1 1 0 0 1 1 1 1 0 0 \n", "3 0 1 0 0 1 0 0 0 0 0 \n", "4 0 1 0 0 1 0 0 0 0 0 \n", "\n", " FGCMP FGCRP FHINS1P FHINS2P FHINS3C FHINS3P FHINS4C FHINS4P \\\n", "0 0 0 0 0 NaN 0 NaN 0 \n", "1 0 0 0 0 NaN 0 NaN 0 \n", "2 0 0 0 0 NaN 0 NaN 0 \n", "3 0 0 0 0 NaN 0 0.0 0 \n", "4 0 0 1 1 NaN 1 0.0 1 \n", "\n", " FHINS5C FHINS5P FHINS6P FHINS7P FHISP FINDP FINTP FJWDP FJWMNP \\\n", "0 NaN 0 0 0 0 0 0 0 0 \n", "1 NaN 0 0 0 1 0 1 0 0 \n", "2 NaN 0 0 0 1 1 1 0 0 \n", "3 NaN 0 0 0 1 0 0 0 0 \n", "4 NaN 1 1 1 1 0 0 0 0 \n", "\n", " FJWRIP FJWTRP FLANP FLANXP FMARHDP FMARHMP FMARHTP FMARHWP FMARHYP \\\n", "0 0 0 0 0 0 0 0 0 0 \n", "1 0 0 0 0 1 1 1 1 1 \n", "2 0 0 0 0 1 1 1 1 1 \n", "3 0 0 0 0 0 0 0 0 0 \n", "4 0 0 0 0 0 0 0 0 0 \n", "\n", " FMARP FMIGP FMIGSP FMILPP FMILSP FOCCP FOIP FPAP FPOBP FPOWSP \\\n", "0 0 0 0 0 0 0 0 0 1 0 \n", "1 0 0 0 0 1 0 1 1 0 0 \n", "2 0 0 0 0 1 1 1 1 0 0 \n", "3 0 1 0 0 0 0 0 0 0 0 \n", "4 0 1 0 0 0 0 0 0 0 0 \n", "\n", " FRACP FRELP FRETP FSCHGP FSCHLP FSCHP FSEMP FSEXP FSSIP FSSP \\\n", "0 0 0 0 0 0 0 0 0 0 0 \n", "1 0 0 1 0 0 0 1 0 1 1 \n", "2 0 0 1 0 0 0 1 0 1 1 \n", "3 1 0 0 0 0 0 0 0 0 0 \n", "4 1 0 0 1 0 1 0 0 0 0 \n", "\n", " FWAGP FWKHP FWKLP FWKWP FWRKP FYOEP PWGTP1 PWGTP2 PWGTP3 PWGTP4 \\\n", "0 0 1 0 1 0.0 0 22 18 23 7 \n", "1 1 0 1 0 0.0 0 15 5 26 18 \n", "2 1 0 1 0 0.0 0 18 5 36 30 \n", "3 0 0 0 0 0.0 0 15 3 22 21 \n", "4 0 0 0 0 0.0 0 15 4 22 21 \n", "\n", " PWGTP5 PWGTP6 PWGTP7 PWGTP8 PWGTP9 PWGTP10 PWGTP11 PWGTP12 PWGTP13 \\\n", "0 25 23 38 30 6 5 40 20 7 \n", "1 6 10 11 5 15 4 15 17 17 \n", "2 7 20 19 6 23 7 15 17 30 \n", "3 4 13 15 4 17 5 11 12 17 \n", "4 4 12 16 5 17 5 11 12 17 \n", "\n", " PWGTP14 PWGTP15 PWGTP16 PWGTP17 PWGTP18 PWGTP19 PWGTP20 PWGTP21 \\\n", "0 33 35 23 22 6 22 21 20 \n", "1 20 11 21 16 12 28 26 14 \n", "2 30 16 28 20 19 28 34 18 \n", "3 24 16 23 14 14 24 25 17 \n", "4 24 16 22 15 14 24 25 17 \n", "\n", " PWGTP22 PWGTP23 PWGTP24 PWGTP25 PWGTP26 PWGTP27 PWGTP28 PWGTP29 \\\n", "0 20 23 37 18 19 7 6 46 \n", "1 28 4 4 31 15 13 27 19 \n", "2 32 7 5 38 19 19 33 18 \n", "3 22 5 5 25 14 14 24 11 \n", "4 22 4 5 25 13 14 25 12 \n", "\n", " PWGTP30 PWGTP31 PWGTP32 PWGTP33 PWGTP34 PWGTP35 PWGTP36 PWGTP37 \\\n", "0 37 7 21 36 7 6 16 17 \n", "1 25 16 14 14 4 12 3 15 \n", "2 34 18 16 19 5 18 5 17 \n", "3 27 13 14 13 4 15 4 16 \n", "4 27 13 14 13 4 14 4 17 \n", "\n", " PWGTP38 PWGTP39 PWGTP40 PWGTP41 PWGTP42 PWGTP43 PWGTP44 PWGTP45 \\\n", "0 36 19 19 19 19 20 39 19 \n", "1 12 4 3 15 4 24 27 5 \n", "2 20 5 5 23 7 25 30 5 \n", "3 13 4 5 14 5 28 26 4 \n", "4 13 4 4 14 5 28 26 3 \n", "\n", " PWGTP46 PWGTP47 PWGTP48 PWGTP49 PWGTP50 PWGTP51 PWGTP52 PWGTP53 \\\n", "0 20 7 7 37 38 7 20 32 \n", "1 14 12 4 16 3 17 14 14 \n", "2 19 21 6 18 7 21 19 20 \n", "3 20 13 3 14 4 17 13 13 \n", "4 19 13 3 15 4 16 13 13 \n", "\n", " PWGTP54 PWGTP55 PWGTP56 PWGTP57 PWGTP58 PWGTP59 PWGTP60 PWGTP61 \\\n", "0 7 7 22 21 33 20 18 20 \n", "1 26 10 27 17 15 30 20 19 \n", "2 26 18 31 18 23 30 31 19 \n", "3 24 12 21 14 15 22 22 16 \n", "4 24 12 22 14 16 21 22 15 \n", "\n", " PWGTP62 PWGTP63 PWGTP64 PWGTP65 PWGTP66 PWGTP67 PWGTP68 PWGTP69 \\\n", "0 23 23 6 19 21 40 43 5 \n", "1 28 5 4 22 14 11 26 14 \n", "2 37 6 6 36 23 18 28 17 \n", "3 23 4 4 23 15 15 23 14 \n", "4 23 4 3 22 15 15 23 14 \n", "\n", " PWGTP70 PWGTP71 PWGTP72 PWGTP73 PWGTP74 PWGTP75 PWGTP76 PWGTP77 \\\n", "0 7 40 20 6 35 39 22 26 \n", "1 21 10 16 19 4 14 5 13 \n", "2 32 15 21 22 6 24 4 15 \n", "3 28 15 12 17 5 12 5 13 \n", "4 28 15 12 17 5 13 4 13 \n", "\n", " PWGTP78 PWGTP79 PWGTP80 \n", "0 6 20 21 \n", "1 14 5 4 \n", "2 21 5 6 \n", "3 15 5 4 \n", "4 15 4 3 " ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "p_pums = c.download_population_pums(\"06\", puma10=puma10, puma00=puma00)\n", "p_pums.head(5)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Download PUMS for household records for a PUMA" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
serialnoinspRTDIVISIONpuma00puma10REGIONSTADJHSGADJINCWGTPNPTYPEACRAGSBATHBDSPBLDBUSCONPELEPFSFULPGASPHFLMHPMRGIMRGPMRGTMRGXREFRRMSPRNTMRNTPRWATRWATPRSINKSMPSTOVTELTENTOILVACSVALPVEHWATPYBLFESFINCPFPARCGRNTPGRPIPHHLHHTHINCPHUGCLHUPACHUPAOCHUPARCKITLNGIMULTGMVNOCNPFNPPNRNRCOCPIPPARTNERPLMPSFR18R60R65RESMODESMOCPSMXSRNTSVALTAXPWIFWKEXRELWORKSTATFACRPFAGSPFBATHPFBDSPFBLDPFBUSPFCONPFELEPFFSPFFULPFGASPFHFLPFINSPFKITPFMHPFMRGIPFMRGPFMRGTPFMRGXPFMVPFPLMPFREFRPFRMSPFRNTMPFRNTPFRWATPFRWATPRPFSINKPFSMPFSMXHPFSMXSPFSTOVPFTAXPFTELPFTENPFTOILPFVACSPFVALPFVEHPFWATPFYBLPWGTP1WGTP2WGTP3WGTP4WGTP5WGTP6WGTP7WGTP8WGTP9WGTP10WGTP11WGTP12WGTP13WGTP14WGTP15WGTP16WGTP17WGTP18WGTP19WGTP20WGTP21WGTP22WGTP23WGTP24WGTP25WGTP26WGTP27WGTP28WGTP29WGTP30WGTP31WGTP32WGTP33WGTP34WGTP35WGTP36WGTP37WGTP38WGTP39WGTP40WGTP41WGTP42WGTP43WGTP44WGTP45WGTP46WGTP47WGTP48WGTP49WGTP50WGTP51WGTP52WGTP53WGTP54WGTP55WGTP56WGTP57WGTP58WGTP59WGTP60WGTP61WGTP62WGTP63WGTP64WGTP65WGTP66WGTP67WGTP68WGTP69WGTP70WGTP71WGTP72WGTP73WGTP74WGTP75WGTP76WGTP77WGTP78WGTP79WGTP80
02012000002680200.0H9-97506461014531102488721111.0NaN1.02.03.02.00.060.02.02.04.09.0NaN2.0350.02.01.01.05.0NaNNaN1.09.01.0NaN1.01.01.01.0NaN500000.01.0430.01.0NaNNaNNaNNaNNaN1.06.059000.00.04.04.04.01.01.01.06.00.0NaN0.00.00.013.00.01.00.00.01.00.01.0634.03.00.01.032.0NaNNaNNaN0.00.00.00.00.00.00.00.000.01.00.00.00.00.00.00.00.00.000.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.022182372523373065402073335232262221202023371819764636721368517183619181919203819207637387203176222132201820232372020404357402063539222662022
12012000009189660.0H9-9750646101453110248871441NaNNaN1.05.04.0NaN0.050.02.02.050.01.0NaN2.0490.02.01.01.07.0NaNNaN1.09.01.0630.01.01.01.01.0NaN40000.02.02000.01.04.010000.02.0NaNNaN4.01.010000.00.02.02.02.01.02.01.05.02.04.00.00.02.0101.00.01.00.01.00.00.01.01534.02.00.00.023.00.09.09.00.00.00.01.01.00.00.00.001.00.01.00.00.00.00.01.00.00.000.00.01.00.00.00.00.00.01.00.00.00.01.00.00.00.00.00.00.00.00.015526175101251541417172011221612272615274432161327182617131541231512441542428514114173171414261027171531211828632214112514201016193145131454
22012000016466NaNH9-97506461014531102488714311.0NaN1.02.02.02.00.010.02.02.01.01.0NaNNaNNaNNaNNaN1.04.02.02000.01.09.01.0NaN1.01.03.01.0NaNNaN1.01.02.07.030000.02.02010.080.01.03.030000.00.02.02.02.01.01.01.02.02.03.00.00.02.0NaN0.01.00.01.00.00.01.0NaNNaN1.00.0NaN1.013.013.01.00.00.00.00.01.00.00.000.00.00.00.00.00.00.00.00.00.000.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.045251542323141551520171441616252512241741524361517241316141020151545154529164232510155111013175141631201527205193274161124121313162312114414
32012000017340500.0H9-97506461014531102488711411.0NaN1.03.03.02.00.0100.02.01000.030.03.0NaN2.03000.02.01.01.04.0NaNNaN1.09.01.0NaN1.01.01.01.0NaN500000.01.01200.07.05.060000.04.0NaNNaN4.02.060000.00.04.04.04.01.01.01.04.00.04.00.00.00.079.00.01.01.00.01.00.01.03938.03.00.01.065.02.012.011.01.00.00.00.00.01.00.00.000.00.00.00.00.00.00.00.00.00.010.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.0101529381411169221011231252821423111113915911124104111041215341831374123139113012171212181031821413108171220101110410312102101634172
42012000025664200.0H9-97506461014531102488785511.0NaN1.03.03.02.00.050.02.02.03.03.0NaN2.0680.02.01.01.06.0NaNNaN1.09.01.0NaN1.01.01.01.0NaN500000.02.0900.04.07.073500.04.0NaNNaN1.03.073500.00.04.04.04.01.01.02.07.00.05.00.00.00.016.00.01.00.00.01.01.02.0989.03.00.01.032.03.013.013.00.00.00.00.00.00.00.00.000.00.00.01.00.00.01.01.01.00.000.00.00.00.00.00.00.00.01.01.01.00.01.00.00.00.00.01.00.00.01.0182514772269612364832893789877379010213591851081383591138424377681638676101741061001113028871181314090100273679761371061049282166858635247923261167229118143858549110887083338185107116102
\n", "
" ], "text/plain": [ " serialno insp RT DIVISION puma00 puma10 REGION ST ADJHSG \\\n", "0 2012000002680 200.0 H 9 -9 7506 4 6 1014531 \n", "1 2012000009189 660.0 H 9 -9 7506 4 6 1014531 \n", "2 2012000016466 NaN H 9 -9 7506 4 6 1014531 \n", "3 2012000017340 500.0 H 9 -9 7506 4 6 1014531 \n", "4 2012000025664 200.0 H 9 -9 7506 4 6 1014531 \n", "\n", " ADJINC WGTP NP TYPE ACR AGS BATH BDSP BLD BUS CONP ELEP FS \\\n", "0 1024887 21 1 1 1.0 NaN 1.0 2.0 3.0 2.0 0.0 60.0 2.0 \n", "1 1024887 14 4 1 NaN NaN 1.0 5.0 4.0 NaN 0.0 50.0 2.0 \n", "2 1024887 14 3 1 1.0 NaN 1.0 2.0 2.0 2.0 0.0 10.0 2.0 \n", "3 1024887 11 4 1 1.0 NaN 1.0 3.0 3.0 2.0 0.0 100.0 2.0 \n", "4 1024887 85 5 1 1.0 NaN 1.0 3.0 3.0 2.0 0.0 50.0 2.0 \n", "\n", " FULP GASP HFL MHP MRGI MRGP MRGT MRGX REFR RMSP RNTM RNTP \\\n", "0 2.0 4.0 9.0 NaN 2.0 350.0 2.0 1.0 1.0 5.0 NaN NaN \n", "1 2.0 50.0 1.0 NaN 2.0 490.0 2.0 1.0 1.0 7.0 NaN NaN \n", "2 2.0 1.0 1.0 NaN NaN NaN NaN NaN 1.0 4.0 2.0 2000.0 \n", "3 1000.0 30.0 3.0 NaN 2.0 3000.0 2.0 1.0 1.0 4.0 NaN NaN \n", "4 2.0 3.0 3.0 NaN 2.0 680.0 2.0 1.0 1.0 6.0 NaN NaN \n", "\n", " RWAT RWATPR SINK SMP STOV TEL TEN TOIL VACS VALP VEH \\\n", "0 1.0 9.0 1.0 NaN 1.0 1.0 1.0 1.0 NaN 500000.0 1.0 \n", "1 1.0 9.0 1.0 630.0 1.0 1.0 1.0 1.0 NaN 40000.0 2.0 \n", "2 1.0 9.0 1.0 NaN 1.0 1.0 3.0 1.0 NaN NaN 1.0 \n", "3 1.0 9.0 1.0 NaN 1.0 1.0 1.0 1.0 NaN 500000.0 1.0 \n", "4 1.0 9.0 1.0 NaN 1.0 1.0 1.0 1.0 NaN 500000.0 2.0 \n", "\n", " WATP YBL FES FINCP FPARC GRNTP GRPIP HHL HHT HINCP HUGCL \\\n", "0 430.0 1.0 NaN NaN NaN NaN NaN 1.0 6.0 59000.0 0.0 \n", "1 2000.0 1.0 4.0 10000.0 2.0 NaN NaN 4.0 1.0 10000.0 0.0 \n", "2 1.0 2.0 7.0 30000.0 2.0 2010.0 80.0 1.0 3.0 30000.0 0.0 \n", "3 1200.0 7.0 5.0 60000.0 4.0 NaN NaN 4.0 2.0 60000.0 0.0 \n", "4 900.0 4.0 7.0 73500.0 4.0 NaN NaN 1.0 3.0 73500.0 0.0 \n", "\n", " HUPAC HUPAOC HUPARC KIT LNGI MULTG MV NOC NPF NPP NR NRC \\\n", "0 4.0 4.0 4.0 1.0 1.0 1.0 6.0 0.0 NaN 0.0 0.0 0.0 \n", "1 2.0 2.0 2.0 1.0 2.0 1.0 5.0 2.0 4.0 0.0 0.0 2.0 \n", "2 2.0 2.0 2.0 1.0 1.0 1.0 2.0 2.0 3.0 0.0 0.0 2.0 \n", "3 4.0 4.0 4.0 1.0 1.0 1.0 4.0 0.0 4.0 0.0 0.0 0.0 \n", "4 4.0 4.0 4.0 1.0 1.0 2.0 7.0 0.0 5.0 0.0 0.0 0.0 \n", "\n", " OCPIP PARTNER PLM PSF R18 R60 R65 RESMODE SMOCP SMX SRNT SVAL \\\n", "0 13.0 0.0 1.0 0.0 0.0 1.0 0.0 1.0 634.0 3.0 0.0 1.0 \n", "1 101.0 0.0 1.0 0.0 1.0 0.0 0.0 1.0 1534.0 2.0 0.0 0.0 \n", "2 NaN 0.0 1.0 0.0 1.0 0.0 0.0 1.0 NaN NaN 1.0 0.0 \n", "3 79.0 0.0 1.0 1.0 0.0 1.0 0.0 1.0 3938.0 3.0 0.0 1.0 \n", "4 16.0 0.0 1.0 0.0 0.0 1.0 1.0 2.0 989.0 3.0 0.0 1.0 \n", "\n", " TAXP WIF WKEXREL WORKSTAT FACRP FAGSP FBATHP FBDSP FBLDP FBUSP \\\n", "0 32.0 NaN NaN NaN 0.0 0.0 0.0 0.0 0.0 0.0 \n", "1 23.0 0.0 9.0 9.0 0.0 0.0 0.0 1.0 1.0 0.0 \n", "2 NaN 1.0 13.0 13.0 1.0 0.0 0.0 0.0 0.0 1.0 \n", "3 65.0 2.0 12.0 11.0 1.0 0.0 0.0 0.0 0.0 1.0 \n", "4 32.0 3.0 13.0 13.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "\n", " FCONP FELEP FFSP FFULP FGASP FHFLP FINSP FKITP FMHP FMRGIP FMRGP \\\n", "0 0.0 0.0 0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "1 0.0 0.0 0 1.0 0.0 1.0 0.0 0.0 0.0 0.0 1.0 \n", "2 0.0 0.0 0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "3 0.0 0.0 0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "4 0.0 0.0 0 0.0 0.0 0.0 1.0 0.0 0.0 1.0 1.0 \n", "\n", " FMRGTP FMRGXP FMVP FPLMP FREFRP FRMSP FRNTMP FRNTP FRWATP \\\n", "0 0.0 0.0 0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "1 0.0 0.0 0 0.0 0.0 1.0 0.0 0.0 0.0 \n", "2 0.0 0.0 0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "3 0.0 0.0 1 0.0 0.0 0.0 0.0 0.0 0.0 \n", "4 1.0 0.0 0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "\n", " FRWATPRP FSINKP FSMP FSMXHP FSMXSP FSTOVP FTAXP FTELP FTENP \\\n", "0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "1 0.0 0.0 1.0 0.0 0.0 0.0 1.0 0.0 0.0 \n", "2 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "3 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "4 0.0 0.0 1.0 1.0 1.0 0.0 1.0 0.0 0.0 \n", "\n", " FTOILP FVACSP FVALP FVEHP FWATP FYBLP WGTP1 WGTP2 WGTP3 WGTP4 \\\n", "0 0.0 0.0 0.0 0.0 0.0 0.0 22 18 23 7 \n", "1 0.0 0.0 0.0 0.0 0.0 0.0 15 5 26 17 \n", "2 0.0 0.0 0.0 0.0 0.0 0.0 4 5 25 15 \n", "3 0.0 0.0 0.0 0.0 0.0 0.0 10 15 2 9 \n", "4 0.0 0.0 1.0 0.0 0.0 1.0 18 25 147 72 \n", "\n", " WGTP5 WGTP6 WGTP7 WGTP8 WGTP9 WGTP10 WGTP11 WGTP12 WGTP13 WGTP14 \\\n", "0 25 23 37 30 6 5 40 20 7 33 \n", "1 5 10 12 5 15 4 14 17 17 20 \n", "2 4 23 23 14 15 5 15 20 17 14 \n", "3 3 8 14 11 16 9 22 10 11 23 \n", "4 26 96 123 64 83 28 93 78 98 77 \n", "\n", " WGTP15 WGTP16 WGTP17 WGTP18 WGTP19 WGTP20 WGTP21 WGTP22 WGTP23 \\\n", "0 35 23 22 6 22 21 20 20 23 \n", "1 11 22 16 12 27 26 15 27 4 \n", "2 4 16 16 25 25 12 24 17 4 \n", "3 12 5 28 21 4 23 11 11 13 \n", "4 37 90 102 135 91 85 108 138 35 \n", "\n", " WGTP24 WGTP25 WGTP26 WGTP27 WGTP28 WGTP29 WGTP30 WGTP31 WGTP32 \\\n", "0 37 18 19 7 6 46 36 7 21 \n", "1 4 32 16 13 27 18 26 17 13 \n", "2 15 24 3 6 15 17 24 13 16 \n", "3 9 15 9 11 12 4 10 4 11 \n", "4 91 138 42 43 77 68 163 86 76 \n", "\n", " WGTP33 WGTP34 WGTP35 WGTP36 WGTP37 WGTP38 WGTP39 WGTP40 WGTP41 \\\n", "0 36 8 5 17 18 36 19 18 19 \n", "1 15 4 12 3 15 12 4 4 15 \n", "2 14 10 20 15 15 4 5 15 4 \n", "3 10 4 12 15 3 4 18 3 13 \n", "4 101 74 106 100 111 30 28 87 118 \n", "\n", " WGTP42 WGTP43 WGTP44 WGTP45 WGTP46 WGTP47 WGTP48 WGTP49 WGTP50 \\\n", "0 19 20 38 19 20 7 6 37 38 \n", "1 4 24 28 5 14 11 4 17 3 \n", "2 5 29 16 4 23 25 10 15 5 \n", "3 7 4 12 3 13 9 11 30 12 \n", "4 131 40 90 100 27 36 79 76 137 \n", "\n", " WGTP51 WGTP52 WGTP53 WGTP54 WGTP55 WGTP56 WGTP57 WGTP58 WGTP59 \\\n", "0 7 20 31 7 6 22 21 32 20 \n", "1 17 14 14 26 10 27 17 15 31 \n", "2 11 10 13 17 5 14 16 31 20 \n", "3 17 12 12 18 10 3 18 21 4 \n", "4 106 104 92 82 166 85 86 35 24 \n", "\n", " WGTP60 WGTP61 WGTP62 WGTP63 WGTP64 WGTP65 WGTP66 WGTP67 WGTP68 \\\n", "0 18 20 23 23 7 20 20 40 43 \n", "1 21 18 28 6 3 22 14 11 25 \n", "2 15 27 20 5 19 32 7 4 16 \n", "3 13 10 8 17 12 20 10 11 10 \n", "4 79 23 26 116 72 29 118 143 85 \n", "\n", " WGTP69 WGTP70 WGTP71 WGTP72 WGTP73 WGTP74 WGTP75 WGTP76 WGTP77 \\\n", "0 5 7 40 20 6 35 39 22 26 \n", "1 14 20 10 16 19 3 14 5 13 \n", "2 11 24 12 13 13 16 23 12 11 \n", "3 4 10 3 12 10 2 10 16 3 \n", "4 85 49 110 88 70 83 33 81 85 \n", "\n", " WGTP78 WGTP79 WGTP80 \n", "0 6 20 22 \n", "1 14 5 4 \n", "2 4 4 14 \n", "3 4 17 2 \n", "4 107 116 102 " ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "h_pums = c.download_household_pums(\"06\", puma10=puma10, puma00=puma00)\n", "h_pums.head(5)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Now the job is to categorize acs and pums into the same categories - we start with the household acs data" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
cat_namecarschildrenhouseholdsincomeworkers
cat_valuenoneonetwo or morenoyestotalgt100gt35-lt100lt35noneonetwo or more
NAME
Block Group 1, Census Tract 306, San Francisco County, California99119895205300157746968119111
Block Group 2, Census Tract 306, San Francisco County, California98317979194273198621362109101
Block Group 3, Census Tract 306, San Francisco County, California109320165240305257291969121112
\n", "
" ], "text/plain": [ "cat_name cars \\\n", "cat_value none one two or more \n", "NAME \n", "Block Group 1, Census Tract 306, San Francisco ... 9 91 198 \n", "Block Group 2, Census Tract 306, San Francisco ... 9 83 179 \n", "Block Group 3, Census Tract 306, San Francisco ... 10 93 201 \n", "\n", "cat_name children households \\\n", "cat_value no yes total \n", "NAME \n", "Block Group 1, Census Tract 306, San Francisco ... 95 205 300 \n", "Block Group 2, Census Tract 306, San Francisco ... 79 194 273 \n", "Block Group 3, Census Tract 306, San Francisco ... 65 240 305 \n", "\n", "cat_name income \\\n", "cat_value gt100 gt35-lt100 lt35 \n", "NAME \n", "Block Group 1, Census Tract 306, San Francisco ... 157 74 69 \n", "Block Group 2, Census Tract 306, San Francisco ... 198 62 13 \n", "Block Group 3, Census Tract 306, San Francisco ... 257 29 19 \n", "\n", "cat_name workers \n", "cat_value none one two or more \n", "NAME \n", "Block Group 1, Census Tract 306, San Francisco ... 68 119 111 \n", "Block Group 2, Census Tract 306, San Francisco ... 62 109 101 \n", "Block Group 3, Census Tract 306, San Francisco ... 69 121 112 " ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "h_acs_cat = cat.categorize(h_acs, {\n", " (\"households\", \"total\"): \"B11001_001E\",\n", " (\"children\", \"yes\"): \"B11001_002E\",\n", " (\"children\", \"no\"): \"B11001_001E - B11001_002E\",\n", " (\"income\", \"lt35\"): \"B19001_002E + B19001_003E + B19001_004E + \"\n", " \"B19001_005E + B19001_006E + B19001_007E\",\n", " (\"income\", \"gt35-lt100\"): \"B19001_008E + B19001_009E + \"\n", " \"B19001_010E + B19001_011E + B19001_012E\"\n", " \"+ B19001_013E\",\n", " (\"income\", \"gt100\"): \"B19001_014E + B19001_015E + B19001_016E\"\n", " \"+ B19001_017E\",\n", " (\"cars\", \"none\"): \"B08201_002E\",\n", " (\"cars\", \"one\"): \"B08201_003E\",\n", " (\"cars\", \"two or more\"): \"B08201_004E + B08201_005E + B08201_006E\",\n", " (\"workers\", \"none\"): \"B08202_002E\",\n", " (\"workers\", \"one\"): \"B08202_003E\",\n", " (\"workers\", \"two or more\"): \"B08202_004E + B08202_005E\" \n", "}, index_cols=['NAME'])\n", "h_acs_cat" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "assert np.all(cat.sum_accross_category(h_acs_cat) < 2)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## And the same for ACS population - the output of the categorization is the MARGINALS for each variable category" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
cat_nameagepopulationracesex
cat_value19 and under20 to 3535 to 60above 60totalasianblackotherwhitefemalemale
NAME
Block Group 1, Census Tract 306, San Francisco County, California1981322012587892619152367397392
Block Group 2, Census Tract 306, San Francisco County, California12582313176696179070447354342
Block Group 3, Census Tract 306, San Francisco County, California291624121749393101285532421518
\n", "
" ], "text/plain": [ "cat_name age \\\n", "cat_value 19 and under 20 to 35 \n", "NAME \n", "Block Group 1, Census Tract 306, San Francisco ... 198 132 \n", "Block Group 2, Census Tract 306, San Francisco ... 125 82 \n", "Block Group 3, Census Tract 306, San Francisco ... 291 62 \n", "\n", "cat_name \\\n", "cat_value 35 to 60 above 60 \n", "NAME \n", "Block Group 1, Census Tract 306, San Francisco ... 201 258 \n", "Block Group 2, Census Tract 306, San Francisco ... 313 176 \n", "Block Group 3, Census Tract 306, San Francisco ... 412 174 \n", "\n", "cat_name population race \\\n", "cat_value total asian black \n", "NAME \n", "Block Group 1, Census Tract 306, San Francisco ... 789 261 9 \n", "Block Group 2, Census Tract 306, San Francisco ... 696 179 0 \n", "Block Group 3, Census Tract 306, San Francisco ... 939 310 12 \n", "\n", "cat_name sex \n", "cat_value other white female male \n", "NAME \n", "Block Group 1, Census Tract 306, San Francisco ... 152 367 397 392 \n", "Block Group 2, Census Tract 306, San Francisco ... 70 447 354 342 \n", "Block Group 3, Census Tract 306, San Francisco ... 85 532 421 518 " ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "p_acs_cat = cat.categorize(p_acs, {\n", " (\"population\", \"total\"): \"B01001_001E\",\n", " (\"age\", \"19 and under\"): \"B01001_003E + B01001_004E + B01001_005E + \"\n", " \"B01001_006E + B01001_007E + B01001_027E + \"\n", " \"B01001_028E + B01001_029E + B01001_030E + \"\n", " \"B01001_031E\",\n", " (\"age\", \"20 to 35\"): \"B01001_008E + B01001_009E + B01001_010E + \"\n", " \"B01001_011E + B01001_012E + B01001_032E + \"\n", " \"B01001_033E + B01001_034E + B01001_035E + \"\n", " \"B01001_036E\",\n", " (\"age\", \"35 to 60\"): \"B01001_013E + B01001_014E + B01001_015E + \"\n", " \"B01001_016E + B01001_017E + B01001_037E + \"\n", " \"B01001_038E + B01001_039E + B01001_040E + \"\n", " \"B01001_041E\",\n", " (\"age\", \"above 60\"): \"B01001_018E + B01001_019E + B01001_020E + \"\n", " \"B01001_021E + B01001_022E + B01001_023E + \"\n", " \"B01001_024E + B01001_025E + B01001_042E + \"\n", " \"B01001_043E + B01001_044E + B01001_045E + \"\n", " \"B01001_046E + B01001_047E + B01001_048E + \"\n", " \"B01001_049E\", \n", " (\"race\", \"white\"): \"B02001_002E\",\n", " (\"race\", \"black\"): \"B02001_003E\",\n", " (\"race\", \"asian\"): \"B02001_005E\",\n", " (\"race\", \"other\"): \"B02001_004E + B02001_006E + B02001_007E + \"\n", " \"B02001_008E\",\n", " (\"sex\", \"male\"): \"B01001_002E\",\n", " (\"sex\", \"female\"): \"B01001_026E\"\n", "}, index_cols=['NAME'])\n", "p_acs_cat" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "assert np.all(cat.sum_accross_category(p_acs_cat) < 2)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## To get the marginals a series for one geography do this" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "cat_name cat_value \n", "age 19 and under 198\n", " 20 to 35 132\n", " 35 to 60 201\n", " above 60 258\n", "population total 789\n", "race asian 261\n", " black 9\n", " other 152\n", " white 367\n", "sex female 397\n", " male 392\n", "Name: Block Group 1, Census Tract 306, San Francisco County, California, dtype: int32" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "p_acs_cat.iloc[0].transpose()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Now categorize the PUMS population data into the same categories" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
cat_idfrequency
ageracesex
19 and underasianfemale0187
male1215
blackfemale216
male315
otherfemale489
male594
whitefemale6202
male7185
20 to 35asianfemale8238
male9211
blackfemale1021
male1123
otherfemale1259
male1367
whitefemale14198
male15225
35 to 60asianfemale16448
male17378
blackfemale1833
male1939
otherfemale2075
male2178
whitefemale22373
male23453
above 60asianfemale24233
male25182
blackfemale2657
male2743
otherfemale2832
male2925
whitefemale30354
male31294
\n", "
" ], "text/plain": [ " cat_id frequency\n", "age race sex \n", "19 and under asian female 0 187\n", " male 1 215\n", " black female 2 16\n", " male 3 15\n", " other female 4 89\n", " male 5 94\n", " white female 6 202\n", " male 7 185\n", "20 to 35 asian female 8 238\n", " male 9 211\n", " black female 10 21\n", " male 11 23\n", " other female 12 59\n", " male 13 67\n", " white female 14 198\n", " male 15 225\n", "35 to 60 asian female 16 448\n", " male 17 378\n", " black female 18 33\n", " male 19 39\n", " other female 20 75\n", " male 21 78\n", " white female 22 373\n", " male 23 453\n", "above 60 asian female 24 233\n", " male 25 182\n", " black female 26 57\n", " male 27 43\n", " other female 28 32\n", " male 29 25\n", " white female 30 354\n", " male 31 294" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "def age_cat(r):\n", " if r.AGEP <= 19: return \"19 and under\"\n", " elif r.AGEP <= 35: return \"20 to 35\"\n", " elif r.AGEP <= 60: return \"35 to 60\"\n", " return \"above 60\"\n", "\n", "def race_cat(r):\n", " if r.RAC1P == 1: return \"white\"\n", " elif r.RAC1P == 2: return \"black\"\n", " elif r.RAC1P == 6: return \"asian\"\n", " return \"other\"\n", "\n", "def sex_cat(r):\n", " if r.SEX == 1: return \"male\"\n", " return \"female\"\n", "\n", "_, jd_persons = cat.joint_distribution(\n", " p_pums,\n", " cat.category_combinations(p_acs_cat.columns),\n", " {\"age\": age_cat, \"race\": race_cat, \"sex\": sex_cat}\n", ")\n", "jd_persons " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Do the same for households - the output of this step is the JOINT DISTRIBUTIONS for the cross product of all possible categories" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
cat_idfrequency
carschildrenincomeworkers
nonenogt100none00.0
one13.0
two or more25.0
gt35-lt100none33.0
one46.0
two or more510.0
lt35none6174.0
one79.0
two or more81.0
yesgt100none90.0
one100.0
two or more111.0
gt35-lt100none120.0
one134.0
two or more142.0
lt35none152.0
one165.0
two or more170.0
onenogt100none1810.0
one1919.0
two or more2042.0
gt35-lt100none2127.0
one2240.0
two or more2347.0
lt35none24404.0
one2514.0
two or more262.0
yesgt100none271.0
one2815.0
two or more2924.0
gt35-lt100none300.0
one3124.0
two or more3221.0
lt35none335.0
one3418.0
two or more352.0
two or morenogt100none3613.0
one3746.0
two or more38209.0
gt35-lt100none3930.0
one4063.0
two or more4198.0
lt35none42174.0
one4315.0
two or more4411.0
yesgt100none450.0
one4643.0
two or more47159.0
gt35-lt100none480.0
one4919.0
two or more5055.0
lt35none513.0
one526.0
two or more535.0
\n", "
" ], "text/plain": [ " cat_id frequency\n", "cars children income workers \n", "none no gt100 none 0 0.0\n", " one 1 3.0\n", " two or more 2 5.0\n", " gt35-lt100 none 3 3.0\n", " one 4 6.0\n", " two or more 5 10.0\n", " lt35 none 6 174.0\n", " one 7 9.0\n", " two or more 8 1.0\n", " yes gt100 none 9 0.0\n", " one 10 0.0\n", " two or more 11 1.0\n", " gt35-lt100 none 12 0.0\n", " one 13 4.0\n", " two or more 14 2.0\n", " lt35 none 15 2.0\n", " one 16 5.0\n", " two or more 17 0.0\n", "one no gt100 none 18 10.0\n", " one 19 19.0\n", " two or more 20 42.0\n", " gt35-lt100 none 21 27.0\n", " one 22 40.0\n", " two or more 23 47.0\n", " lt35 none 24 404.0\n", " one 25 14.0\n", " two or more 26 2.0\n", " yes gt100 none 27 1.0\n", " one 28 15.0\n", " two or more 29 24.0\n", " gt35-lt100 none 30 0.0\n", " one 31 24.0\n", " two or more 32 21.0\n", " lt35 none 33 5.0\n", " one 34 18.0\n", " two or more 35 2.0\n", "two or more no gt100 none 36 13.0\n", " one 37 46.0\n", " two or more 38 209.0\n", " gt35-lt100 none 39 30.0\n", " one 40 63.0\n", " two or more 41 98.0\n", " lt35 none 42 174.0\n", " one 43 15.0\n", " two or more 44 11.0\n", " yes gt100 none 45 0.0\n", " one 46 43.0\n", " two or more 47 159.0\n", " gt35-lt100 none 48 0.0\n", " one 49 19.0\n", " two or more 50 55.0\n", " lt35 none 51 3.0\n", " one 52 6.0\n", " two or more 53 5.0" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "def cars_cat(r):\n", " if r.VEH == 0: return \"none\"\n", " elif r.VEH == 1: return \"one\"\n", " return \"two or more\"\n", "\n", "def children_cat(r):\n", " if r.NOC > 0: return \"yes\"\n", " return \"no\"\n", "\n", "def income_cat(r):\n", " if r.FINCP > 100000: return \"gt100\"\n", " elif r.FINCP > 35000: return \"gt35-lt100\"\n", " return \"lt35\"\n", "\n", "def workers_cat(r):\n", " if r.WIF == 3: return \"two or more\"\n", " elif r.WIF == 2: return \"two or more\"\n", " elif r.WIF == 1: return \"one\"\n", " return \"none\"\n", "\n", "_, jd_households = cat.joint_distribution(\n", " h_pums,\n", " cat.category_combinations(h_acs_cat.columns),\n", " {\"cars\": cars_cat, \"children\": children_cat, \n", " \"income\": income_cat, \"workers\": workers_cat}\n", ")\n", "jd_households" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## With marginals (aggregate, from ACS) and joint distribution (disaggregate, from PUMS) we're ready for some synthesis" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'TBD'" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "\"TBD\"" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.14" } }, "nbformat": 4, "nbformat_minor": 1 }