{ "metadata": { "name": "", "signature": "sha256:a592422474c56006b4b8fa608447112a9168f56ca3b760518adbb9af36d3fe4f" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "code", "collapsed": false, "input": [ "%load_ext autoreload\n", "%autoreload 2\n", "from synthpop.census_helpers import Census\n", "from synthpop import categorizer as cat\n", "import pandas as pd\n", "import numpy as np\n", "import os\n", "pd.set_option('display.max_columns', 500)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 1 }, { "cell_type": "heading", "level": 2, "metadata": {}, "source": [ "The census api needs a key - you can register for can sign up" ] }, { "cell_type": "heading", "level": 3, "metadata": {}, "source": [ "http://api.census.gov/data/key_signup.html" ] }, { "cell_type": "code", "collapsed": false, "input": [ "c = Census(os.environ[\"CENSUS\"])" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 2 }, { "cell_type": "heading", "level": 2, "metadata": {}, "source": [ "Here we get aggregate information on households from ACS - note some variables are associated with block groups and others with tracts" ] }, { "cell_type": "code", "collapsed": false, "input": [ "income_columns = ['B19001_0%02dE'%i for i in range(1, 18)]\n", "vehicle_columns = ['B08201_0%02dE'%i for i in range(1, 7)]\n", "workers_columns = ['B08202_0%02dE'%i for i in range(1, 6)]\n", "families_columns = ['B11001_001E', 'B11001_002E']\n", "block_group_columns = income_columns + families_columns\n", "tract_columns = vehicle_columns + workers_columns\n", "h_acs = c.block_group_and_tract_query(block_group_columns,\n", " tract_columns, \"06\", \"075\", \n", " merge_columns=['tract', 'county', 'state'],\n", " block_group_size_attr=\"B11001_001E\",\n", " tract_size_attr=\"B08201_001E\",\n", " tract=\"030600\")\n", "h_acs" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
B11001_001EB11001_002EB19001_001EB19001_002EB19001_003EB19001_004EB19001_005EB19001_006EB19001_007EB19001_008EB19001_009EB19001_010EB19001_011EB19001_012EB19001_013EB19001_014EB19001_015EB19001_016EB19001_017ENAMEblock groupcountystatetractB08201_001EB08201_002EB08201_003EB08201_004EB08201_005EB08201_006EB08202_001EB08202_002EB08202_003EB08202_004EB08202_005E
0 294 183 294 0 4 8 28 0 8 0 0 0 27 10 36 33 28 34 78 Block Group 1, Census Tract 306, San Francisco... 1 075 06 030600 294 14 86 125 55 12 294 65 89 118 20
1 226 138 226 0 11 10 9 0 20 0 0 0 9 11 0 25 19 31 81 Block Group 2, Census Tract 306, San Francisco... 2 075 06 030600 226 11 66 96 42 9 226 50 68 91 15
2 287 237 287 3 0 0 8 20 22 0 0 21 7 0 12 22 6 89 77 Block Group 3, Census Tract 306, San Francisco... 3 075 06 030600 287 14 84 122 53 12 287 64 87 115 19
\n", "

3 rows \u00d7 35 columns

\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 3, "text": [ " B11001_001E B11001_002E B19001_001E B19001_002E B19001_003E \\\n", "0 294 183 294 0 4 \n", "1 226 138 226 0 11 \n", "2 287 237 287 3 0 \n", "\n", " B19001_004E B19001_005E B19001_006E B19001_007E B19001_008E \\\n", "0 8 28 0 8 0 \n", "1 10 9 0 20 0 \n", "2 0 8 20 22 0 \n", "\n", " B19001_009E B19001_010E B19001_011E B19001_012E B19001_013E \\\n", "0 0 0 27 10 36 \n", "1 0 0 9 11 0 \n", "2 0 21 7 0 12 \n", "\n", " B19001_014E B19001_015E B19001_016E B19001_017E \\\n", "0 33 28 34 78 \n", "1 25 19 31 81 \n", "2 22 6 89 77 \n", "\n", " NAME block group county state \\\n", "0 Block Group 1, Census Tract 306, San Francisco... 1 075 06 \n", "1 Block Group 2, Census Tract 306, San Francisco... 2 075 06 \n", "2 Block Group 3, Census Tract 306, San Francisco... 3 075 06 \n", "\n", " tract B08201_001E B08201_002E B08201_003E B08201_004E B08201_005E \\\n", "0 030600 294 14 86 125 55 \n", "1 030600 226 11 66 96 42 \n", "2 030600 287 14 84 122 53 \n", "\n", " B08201_006E B08202_001E B08202_002E B08202_003E B08202_004E \\\n", "0 12 294 65 89 118 \n", "1 9 226 50 68 91 \n", "2 12 287 64 87 115 \n", "\n", " B08202_005E \n", "0 20 \n", "1 15 \n", "2 19 \n", "\n", "[3 rows x 35 columns]" ] } ], "prompt_number": 3 }, { "cell_type": "heading", "level": 2, "metadata": {}, "source": [ "And here is aggregate information on people from ACS " ] }, { "cell_type": "code", "collapsed": false, "input": [ "population = ['B01001_001E']\n", "sex = ['B01001_002E', 'B01001_026E']\n", "race = ['B02001_0%02dE'%i for i in range(1,11)]\n", "male_age_columns = ['B01001_0%02dE'%i for i in range(3,26)]\n", "female_age_columns = ['B01001_0%02dE'%i for i in range(27,50)]\n", "all_columns = population + sex + race + male_age_columns + female_age_columns\n", "p_acs = c.block_group_query(all_columns, \"06\", \"075\", tract=\"030600\")\n", "p_acs" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
B01001_001EB01001_002EB01001_003EB01001_004EB01001_005EB01001_006EB01001_007EB01001_008EB01001_009EB01001_010EB01001_011EB01001_012EB01001_013EB01001_014EB01001_015EB01001_016EB01001_017EB01001_018EB01001_019EB01001_020EB01001_021EB01001_022EB01001_023EB01001_024EB01001_025EB01001_026EB01001_027EB01001_028EB01001_029EB01001_030EB01001_031EB01001_032EB01001_033EB01001_034EB01001_035EB02001_001EB02001_002EB02001_003EB02001_004EB02001_005EB02001_006EB02001_007EB02001_008EB02001_009EB02001_010ENAMEblock groupcountystatetractB01001_036EB01001_037EB01001_038EB01001_039EB01001_040EB01001_041EB01001_042EB01001_043EB01001_044EB01001_045EB01001_046EB01001_047EB01001_048EB01001_049E
0 655 321 8 11 8 0 0 0 0 8 0 48 28 21 51 18 38 4 0 0 39 23 12 0 4 334 46 33 23 0 0 0 0 0 0 655 423 11 0 187 0 12 22 0 22 Block Group 1, Census Tract 306, San Francisco... 1 075 06 030600 6 20 65 41 0 14 7 0 23 0 37 4 0 15
1 528 236 7 17 11 0 4 0 0 0 0 0 14 53 39 24 12 19 9 0 17 8 2 0 0 292 48 7 0 0 3 0 0 25 0 528 448 9 0 11 0 0 60 0 60 Block Group 2, Census Tract 306, San Francisco... 2 075 06 030600 9 40 17 41 10 38 18 0 7 0 9 11 9 0
2 858 493 22 44 0 44 0 0 11 0 6 44 57 50 43 38 38 0 10 21 15 20 10 20 0 365 0 0 49 31 13 0 0 3 0 858 623 11 0 218 0 0 6 0 6 Block Group 3, Census Tract 306, San Francisco... 3 075 06 030600 14 0 51 38 66 30 2 8 7 0 20 9 21 3
\n", "

3 rows \u00d7 64 columns

\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 4, "text": [ " B01001_001E B01001_002E B01001_003E B01001_004E B01001_005E \\\n", "0 655 321 8 11 8 \n", "1 528 236 7 17 11 \n", "2 858 493 22 44 0 \n", "\n", " B01001_006E B01001_007E B01001_008E B01001_009E B01001_010E \\\n", "0 0 0 0 0 8 \n", "1 0 4 0 0 0 \n", "2 44 0 0 11 0 \n", "\n", " B01001_011E B01001_012E B01001_013E B01001_014E B01001_015E \\\n", "0 0 48 28 21 51 \n", "1 0 0 14 53 39 \n", "2 6 44 57 50 43 \n", "\n", " B01001_016E B01001_017E B01001_018E B01001_019E B01001_020E \\\n", "0 18 38 4 0 0 \n", "1 24 12 19 9 0 \n", "2 38 38 0 10 21 \n", "\n", " B01001_021E B01001_022E B01001_023E B01001_024E B01001_025E \\\n", "0 39 23 12 0 4 \n", "1 17 8 2 0 0 \n", "2 15 20 10 20 0 \n", "\n", " B01001_026E B01001_027E B01001_028E B01001_029E B01001_030E \\\n", "0 334 46 33 23 0 \n", "1 292 48 7 0 0 \n", "2 365 0 0 49 31 \n", "\n", " B01001_031E B01001_032E B01001_033E B01001_034E B01001_035E \\\n", "0 0 0 0 0 0 \n", "1 3 0 0 25 0 \n", "2 13 0 0 3 0 \n", "\n", " B02001_001E B02001_002E B02001_003E B02001_004E B02001_005E \\\n", "0 655 423 11 0 187 \n", "1 528 448 9 0 11 \n", "2 858 623 11 0 218 \n", "\n", " B02001_006E B02001_007E B02001_008E B02001_009E B02001_010E \\\n", "0 0 12 22 0 22 \n", "1 0 0 60 0 60 \n", "2 0 0 6 0 6 \n", "\n", " NAME block group county state \\\n", "0 Block Group 1, Census Tract 306, San Francisco... 1 075 06 \n", "1 Block Group 2, Census Tract 306, San Francisco... 2 075 06 \n", "2 Block Group 3, Census Tract 306, San Francisco... 3 075 06 \n", "\n", " tract B01001_036E B01001_037E B01001_038E B01001_039E B01001_040E \\\n", "0 030600 6 20 65 41 0 \n", "1 030600 9 40 17 41 10 \n", "2 030600 14 0 51 38 66 \n", "\n", " B01001_041E B01001_042E B01001_043E B01001_044E B01001_045E \\\n", "0 14 7 0 23 0 \n", "1 38 18 0 7 0 \n", "2 30 2 8 7 0 \n", "\n", " B01001_046E B01001_047E B01001_048E B01001_049E \n", "0 37 4 0 15 \n", "1 9 11 9 0 \n", "2 20 9 21 3 \n", "\n", "[3 rows x 64 columns]" ] } ], "prompt_number": 4 }, { "cell_type": "heading", "level": 2, "metadata": {}, "source": [ "Get the puma for our test tracts - this actually downloads the mapping file from the census website so it might take a few seconds" ] }, { "cell_type": "code", "collapsed": false, "input": [ "puma = c.tract_to_pums(\"06\", \"075\", \"030600\")" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 5 }, { "cell_type": "heading", "level": 2, "metadata": {}, "source": [ "Download PUMS for people records for a PUMA from our server (we processed the large files into smaller ones for you)" ] }, { "cell_type": "code", "collapsed": false, "input": [ "p_pums = c.download_population_pums(\"06\", puma)\n", "p_pums.head(5)" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Unnamed: 0serialnoRTSPORDERPUMA00PUMA10STADJINCPWGTPAGEPCITCITWP05CITWP12COWDDRSDEARDEYEDOUTDPHYDRATDRATXDREMENGFERGCLGCMGCRHINS1HINS2HINS3HINS4HINS5HINS6HINS7INTPJWMNPJWRIPJWTRLANXMARMARHDMARHMMARHTMARHWMARHYP05MARHYP12MIGMILMLPAMLPBMLPCMLPDMLPEMLPFMLPGMLPHMLPIMLPJMLPKNWABNWAVNWLANWLKNWREOIPPAPRELPRETPSCHSCHGSCHLSEMPSEXSSIPSSPWAGPWKHPWKLWKWYOEP05YOEP12ANCANC1P05ANC1P12ANC2P05ANC2P12DECADEDISDRIVESPESPESRHICOVHISPINDPJWAPJWDPLANP05LANP12MIGPUMA00MIGPUMA10MIGSP05MIGSP12MSPNAICSPNATIVITYNOPOCOCCP02OCCP10OCCP12PAOCPERNPPINCPPOBP05POBP12POVPIPPOWPUMA00POWPUMA10POWSP05POWSP12PRIVCOVPUBCOVQTRBIRRAC1PRAC2P05RAC2P12RAC3P05RAC3P12RACAIANRACASNRACBLKRACNHPIRACNUMRACSORRACWHTRCSFNSFRSOCP00SOCP10SOCP12VPSWAOBFAGEPFANCPFCITPFCITWPFCOWPFDDRSPFDEARPFDEYEPFDOUTPFDPHYPFDRATPFDRATXPFDREMPFENGPFESRPFFERPFGCLPFGCMPFGCRPFHINS1PFHINS2PFHINS3CFHINS3PFHINS4CFHINS4PFHINS5CFHINS5PFHINS6PFHINS7PFHISPFINDPFINTPFJWDPFJWMNPFJWRIPFJWTRPFLANPFLANXPFMARHDPFMARHMPFMARHTPFMARHWPFMARHYPFMARPFMIGPFMIGSPFMILPPFMILSPFOCCPFOIPFPAPFPOBPFPOWSPFRACPFRELPFRETPFSCHGPFSCHLPFSCHPFSEMPFSEXPFSSIPFSSPFWAGPFWKHPFWKLPFWKWPFYOEPPWGTP1PWGTP2PWGTP3PWGTP4PWGTP5PWGTP6PWGTP7PWGTP8PWGTP9PWGTP10PWGTP11PWGTP12PWGTP13PWGTP14PWGTP15PWGTP16PWGTP17PWGTP18PWGTP19PWGTP20PWGTP21PWGTP22PWGTP23PWGTP24PWGTP25PWGTP26PWGTP27PWGTP28PWGTP29PWGTP30PWGTP31PWGTP32PWGTP33PWGTP34PWGTP35PWGTP36PWGTP37PWGTP38PWGTP39PWGTP40PWGTP41PWGTP42PWGTP43PWGTP44PWGTP45PWGTP46PWGTP47PWGTP48PWGTP49PWGTP50PWGTP51PWGTP52PWGTP53PWGTP54PWGTP55PWGTP56PWGTP57PWGTP58PWGTP59PWGTP60PWGTP61PWGTP62PWGTP63PWGTP64PWGTP65PWGTP66PWGTP67PWGTP68PWGTP69PWGTP70PWGTP71PWGTP72PWGTP73PWGTP74PWGTP75PWGTP76PWGTP77PWGTP78PWGTP79PWGTP80
0 2165691 2012000002680 P 1-9 07506 06 1010207 20 64 1NaN NaN 2 2 2 2 2 2NaNNaN 2NaNNaN 2NaNNaN 1 2 2 2 2 2 2 0 10NaN 10 2 5NaNNaNNaNNaNNaN NaN 1 5NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN 3 5 2 2 3 0 0 0 0 1NaN 21 0 2 0 0 59000 24 1 1NaN NaN 3-9 995-9 999NaN 2NaNNaN 1 1 1 7870 84 43NaN NaNNaNNaNNaNNaN 6 611M1 1NaN 0 N.A. N.A. 5860 4 59000 59000-9 36 500 -9 7500 -9 6 1 2 4 1-9 1-9 1 0 0 0 0 1 0 1 0NaNNaN N.A.// N.A.// 439061NaN 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0NaN 0NaN 0NaN 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 21 19 20 6 22 23 31 29 7 6 39 21 6 29 30 22 20 7 22 21 18 17 26 44 18 19 7 5 35 33 7 16 32 7 5 22 16 34 20 14 17 16 22 33 18 17 6 6 41 41 7 17 29 6 6 21 19 30 19 17 18 24 25 6 24 24 32 45 5 7 40 21 6 33 36 18 24 7 16 16
1 2167411 2012000009189 P 1-9 07506 06 1010207 16 52 4 -9 1995NaN 2 2 1 2 2NaNNaN 2 3NaN 2NaNNaN 1 2 2 2 2 2 2 0NaNNaNNaN 1 1 2 2 1 2 -9 1992 1 5NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN 3 5 3 3 3 10000 0 0 0 1NaN 16 0 1 0 0 0NaN 3NaN -9 1989 4-9 999-9 999 5 1NaNNaN 6 1 1 NaNNaNNaN -9 708NaNNaNNaNNaN 1 NaN 2NaN 0 NaN NaN NaNNaN 0 10000-9 207 43NaN NaNNaNNaN 1 2 3 6-9 43-9 5 0 1 0 0 1 0 0 0NaNNaN NaN NaN NaNNaN 4 0 0 0 0 0 1 0 0 1 1 0 0 1 0 1 0 0 0 0 0 0NaN 0NaN 0NaN 0 0 0 1 0 1 0 0 0 0 0 0 1 1 1 1 1 0 0 0 0 1 0 1 1 0 0 0 0 1 0 0 0 1 0 1 1 1 0 1 0 0 19 11 23 20 7 16 11 7 20 4 21 14 16 27 11 20 19 15 18 26 13 33 4 4 50 16 12 40 25 37 19 17 10 3 12 3 11 14 5 5 16 4 28 24 6 19 14 5 17 3 17 15 12 27 13 33 15 19 47 18 22 28 6 5 28 12 12 25 21 15 12 12 15 5 14 7 15 13 7 4
2 2167412 2012000009189 P 2-9 07506 06 1010207 13 45 4 -9 1998 3 2 2 1 2 2NaNNaN 2 3 2 2NaNNaN 1 2 2 2 2 2 2 0NaNNaNNaN 1 1 2 2 1 2 -9 1992 1 5NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN 3 5 3 3 3 0 0 1 0 1NaN 16 0 2 0 0 0NaN 2NaN -9 1992 4-9 999-9 999 6 1NaNNaN 6 1 1 8370NaNNaN -9 708NaNNaNNaNNaN 1 6241 2NaN 0 N.A. N.A. 4610 2 0 0-9 207 43NaN NaNNaNNaN 1 2 2 6-9 43-9 5 0 1 0 0 1 0 0 0NaNNaN N.A.// N.A.// 399021NaN 4 0 0 1 0 1 1 0 0 1 1 0 0 1 1 1 1 0 0 0 0 0NaN 0NaN 0NaN 0 0 0 1 1 1 0 0 0 0 0 0 1 1 1 1 1 0 0 0 0 1 1 1 1 0 0 0 0 1 0 0 0 1 0 1 1 1 0 1 0 0 11 6 25 18 4 15 16 5 12 5 14 13 20 25 9 19 13 16 19 23 12 19 4 4 21 12 12 20 16 21 14 14 11 4 15 3 11 17 4 3 13 5 19 21 4 12 14 4 11 3 19 13 17 25 12 16 12 15 19 27 16 24 5 4 24 12 11 19 13 24 11 16 14 3 15 3 15 12 3 4
3 2167413 2012000009189 P 3-9 07506 06 1010207 14 10 1NaN NaNNaN 2 2 2NaN 2NaNNaN 2NaNNaNNaNNaNNaN 2 2 2 1 2 2 2NaNNaNNaNNaN 2 5NaNNaNNaNNaNNaN NaN 1NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN NaNNaN 2NaN 1NaN 7NaN 1NaNNaN NaNNaNNaNNaNNaN NaN 4-9 999-9 999NaN 2NaN 4NaN 1 1 NaNNaNNaNNaN NaNNaNNaNNaNNaNNaN NaN 1 4 1 NaN NaN NaNNaN NaN NaN-9 6 43NaN NaNNaNNaN 2 1 4 6-9 43-9 5 0 1 0 0 1 0 0 1NaNNaN NaN NaN NaNNaN 1 0 0 0 0 0 1 1 1 0 1 0 0 1 0 0 0 0 0 0 0 0NaN 0 0 0NaN 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 18 4 24 25 5 12 14 6 14 4 10 12 20 28 16 23 18 18 22 23 18 23 4 3 23 14 15 25 12 19 14 19 13 3 16 4 16 17 4 5 11 5 21 23 6 23 13 4 15 4 19 16 13 26 11 21 13 18 22 23 12 26 5 4 23 14 13 21 15 22 15 13 16 5 12 6 12 12 3 3
4 2167414 2012000009189 P 4-9 07506 06 1010207 15 8 1NaN NaNNaN 2 2 2NaN 2NaNNaN 2NaNNaNNaNNaNNaN 2 2 2 1 2 2 2NaNNaNNaNNaN 2 5NaNNaNNaNNaNNaN NaN 1NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN NaNNaN 2NaN 2 5 6NaN 1NaNNaN NaNNaNNaNNaNNaN NaN 4-9 999-9 999NaN 2NaN 4NaN 1 1 NaNNaNNaNNaN NaNNaNNaNNaNNaNNaN NaN 1 4 1 NaN NaN NaNNaN NaN NaN-9 6 43NaN NaNNaNNaN 2 1 1 6-9 43-9 5 0 1 0 0 1 0 0 1NaNNaN NaN NaN NaNNaN 1 0 0 0 0 0 1 1 1 0 1 0 0 1 0 0 0 0 0 0 1 1NaN 1 0 1NaN 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 1 0 1 0 0 0 0 0 0 0 0 0 18 4 25 24 5 11 13 6 14 4 11 11 20 28 15 23 18 18 22 23 18 23 4 4 23 14 16 25 12 19 15 19 13 4 16 3 16 17 4 5 12 7 21 23 5 23 13 5 16 3 20 16 13 26 12 22 13 17 22 22 13 27 5 3 23 14 13 20 14 22 14 13 17 5 13 6 12 13 4 3
\n", "

5 rows \u00d7 291 columns

\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 6, "text": [ " Unnamed: 0 serialno RT SPORDER PUMA00 PUMA10 ST ADJINC PWGTP \\\n", "0 2165691 2012000002680 P 1 -9 07506 06 1010207 20 \n", "1 2167411 2012000009189 P 1 -9 07506 06 1010207 16 \n", "2 2167412 2012000009189 P 2 -9 07506 06 1010207 13 \n", "3 2167413 2012000009189 P 3 -9 07506 06 1010207 14 \n", "4 2167414 2012000009189 P 4 -9 07506 06 1010207 15 \n", "\n", " AGEP CIT CITWP05 CITWP12 COW DDRS DEAR DEYE DOUT DPHY DRAT \\\n", "0 64 1 NaN NaN 2 2 2 2 2 2 NaN \n", "1 52 4 -9 1995 NaN 2 2 1 2 2 NaN \n", "2 45 4 -9 1998 3 2 2 1 2 2 NaN \n", "3 10 1 NaN NaN NaN 2 2 2 NaN 2 NaN \n", "4 8 1 NaN NaN NaN 2 2 2 NaN 2 NaN \n", "\n", " DRATX DREM ENG FER GCL GCM GCR HINS1 HINS2 HINS3 HINS4 HINS5 \\\n", "0 NaN 2 NaN NaN 2 NaN NaN 1 2 2 2 2 \n", "1 NaN 2 3 NaN 2 NaN NaN 1 2 2 2 2 \n", "2 NaN 2 3 2 2 NaN NaN 1 2 2 2 2 \n", "3 NaN 2 NaN NaN NaN NaN NaN 2 2 2 1 2 \n", "4 NaN 2 NaN NaN NaN NaN NaN 2 2 2 1 2 \n", "\n", " HINS6 HINS7 INTP JWMNP JWRIP JWTR LANX MAR MARHD MARHM MARHT \\\n", "0 2 2 0 10 NaN 10 2 5 NaN NaN NaN \n", "1 2 2 0 NaN NaN NaN 1 1 2 2 1 \n", "2 2 2 0 NaN NaN NaN 1 1 2 2 1 \n", "3 2 2 NaN NaN NaN NaN 2 5 NaN NaN NaN \n", "4 2 2 NaN NaN NaN NaN 2 5 NaN NaN NaN \n", "\n", " MARHW MARHYP05 MARHYP12 MIG MIL MLPA MLPB MLPC MLPD MLPE MLPF \\\n", "0 NaN NaN NaN 1 5 NaN NaN NaN NaN NaN NaN \n", "1 2 -9 1992 1 5 NaN NaN NaN NaN NaN NaN \n", "2 2 -9 1992 1 5 NaN NaN NaN NaN NaN NaN \n", "3 NaN NaN NaN 1 NaN NaN NaN NaN NaN NaN NaN \n", "4 NaN NaN NaN 1 NaN NaN NaN NaN NaN NaN NaN \n", "\n", " MLPG MLPH MLPI MLPJ MLPK NWAB NWAV NWLA NWLK NWRE OIP PAP \\\n", "0 NaN NaN NaN NaN NaN 3 5 2 2 3 0 0 \n", "1 NaN NaN NaN NaN NaN 3 5 3 3 3 10000 0 \n", "2 NaN NaN NaN NaN NaN 3 5 3 3 3 0 0 \n", "3 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN \n", "4 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN \n", "\n", " RELP RETP SCH SCHG SCHL SEMP SEX SSIP SSP WAGP WKHP WKL WKW \\\n", "0 0 0 1 NaN 21 0 2 0 0 59000 24 1 1 \n", "1 0 0 1 NaN 16 0 1 0 0 0 NaN 3 NaN \n", "2 1 0 1 NaN 16 0 2 0 0 0 NaN 2 NaN \n", "3 2 NaN 1 NaN 7 NaN 1 NaN NaN NaN NaN NaN NaN \n", "4 2 NaN 2 5 6 NaN 1 NaN NaN NaN NaN NaN NaN \n", "\n", " YOEP05 YOEP12 ANC ANC1P05 ANC1P12 ANC2P05 ANC2P12 DECADE DIS \\\n", "0 NaN NaN 3 -9 995 -9 999 NaN 2 \n", "1 -9 1989 4 -9 999 -9 999 5 1 \n", "2 -9 1992 4 -9 999 -9 999 6 1 \n", "3 NaN NaN 4 -9 999 -9 999 NaN 2 \n", "4 NaN NaN 4 -9 999 -9 999 NaN 2 \n", "\n", " DRIVESP ESP ESR HICOV HISP INDP JWAP JWDP LANP05 LANP12 \\\n", "0 NaN NaN 1 1 1 7870 84 43 NaN NaN \n", "1 NaN NaN 6 1 1 NaN NaN NaN -9 708 \n", "2 NaN NaN 6 1 1 8370 NaN NaN -9 708 \n", "3 NaN 4 NaN 1 1 NaN NaN NaN NaN NaN \n", "4 NaN 4 NaN 1 1 NaN NaN NaN NaN NaN \n", "\n", " MIGPUMA00 MIGPUMA10 MIGSP05 MIGSP12 MSP NAICSP NATIVITY NOP OC \\\n", "0 NaN NaN NaN NaN 6 611M1 1 NaN 0 \n", "1 NaN NaN NaN NaN 1 NaN 2 NaN 0 \n", "2 NaN NaN NaN NaN 1 6241 2 NaN 0 \n", "3 NaN NaN NaN NaN NaN NaN 1 4 1 \n", "4 NaN NaN NaN NaN NaN NaN 1 4 1 \n", "\n", " OCCP02 OCCP10 OCCP12 PAOC PERNP PINCP POBP05 POBP12 POVPIP \\\n", "0 N.A. N.A. 5860 4 59000 59000 -9 36 500 \n", "1 NaN NaN NaN NaN 0 10000 -9 207 43 \n", "2 N.A. N.A. 4610 2 0 0 -9 207 43 \n", "3 NaN NaN NaN NaN NaN NaN -9 6 43 \n", "4 NaN NaN NaN NaN NaN NaN -9 6 43 \n", "\n", " POWPUMA00 POWPUMA10 POWSP05 POWSP12 PRIVCOV PUBCOV QTRBIR RAC1P \\\n", "0 -9 7500 -9 6 1 2 4 1 \n", "1 NaN NaN NaN NaN 1 2 3 6 \n", "2 NaN NaN NaN NaN 1 2 2 6 \n", "3 NaN NaN NaN NaN 2 1 4 6 \n", "4 NaN NaN NaN NaN 2 1 1 6 \n", "\n", " RAC2P05 RAC2P12 RAC3P05 RAC3P12 RACAIAN RACASN RACBLK RACNHPI \\\n", "0 -9 1 -9 1 0 0 0 0 \n", "1 -9 43 -9 5 0 1 0 0 \n", "2 -9 43 -9 5 0 1 0 0 \n", "3 -9 43 -9 5 0 1 0 0 \n", "4 -9 43 -9 5 0 1 0 0 \n", "\n", " RACNUM RACSOR RACWHT RC SFN SFR SOCP00 SOCP10 SOCP12 VPS WAOB \\\n", "0 1 0 1 0 NaN NaN N.A.// N.A.// 439061 NaN 1 \n", "1 1 0 0 0 NaN NaN NaN NaN NaN NaN 4 \n", "2 1 0 0 0 NaN NaN N.A.// N.A.// 399021 NaN 4 \n", "3 1 0 0 1 NaN NaN NaN NaN NaN NaN 1 \n", "4 1 0 0 1 NaN NaN NaN NaN NaN NaN 1 \n", "\n", " FAGEP FANCP FCITP FCITWP FCOWP FDDRSP FDEARP FDEYEP FDOUTP FDPHYP \\\n", "0 0 0 0 0 0 0 0 0 0 0 \n", "1 0 0 0 0 0 1 0 0 1 1 \n", "2 0 0 1 0 1 1 0 0 1 1 \n", "3 0 0 0 0 0 1 1 1 0 1 \n", "4 0 0 0 0 0 1 1 1 0 1 \n", "\n", " FDRATP FDRATXP FDREMP FENGP FESRP FFERP FGCLP FGCMP FGCRP FHINS1P \\\n", "0 0 0 0 0 0 0 0 0 0 0 \n", "1 0 0 1 0 1 0 0 0 0 0 \n", "2 0 0 1 1 1 1 0 0 0 0 \n", "3 0 0 1 0 0 0 0 0 0 0 \n", "4 0 0 1 0 0 0 0 0 0 1 \n", "\n", " FHINS2P FHINS3C FHINS3P FHINS4C FHINS4P FHINS5C FHINS5P FHINS6P \\\n", "0 0 NaN 0 NaN 0 NaN 0 0 \n", "1 0 NaN 0 NaN 0 NaN 0 0 \n", "2 0 NaN 0 NaN 0 NaN 0 0 \n", "3 0 NaN 0 0 0 NaN 0 0 \n", "4 1 NaN 1 0 1 NaN 1 1 \n", "\n", " FHINS7P FHISP FINDP FINTP FJWDP FJWMNP FJWRIP FJWTRP FLANP FLANXP \\\n", "0 0 0 0 0 0 0 0 0 0 0 \n", "1 0 1 0 1 0 0 0 0 0 0 \n", "2 0 1 1 1 0 0 0 0 0 0 \n", "3 0 1 0 0 0 0 0 0 0 0 \n", "4 1 1 0 0 0 0 0 0 0 0 \n", "\n", " FMARHDP FMARHMP FMARHTP FMARHWP FMARHYP FMARP FMIGP FMIGSP FMILPP \\\n", "0 0 0 0 0 0 0 0 0 0 \n", "1 1 1 1 1 1 0 0 0 0 \n", "2 1 1 1 1 1 0 0 0 0 \n", "3 0 0 0 0 0 0 1 0 0 \n", "4 0 0 0 0 0 0 1 0 0 \n", "\n", " FMILSP FOCCP FOIP FPAP FPOBP FPOWSP FRACP FRELP FRETP FSCHGP \\\n", "0 0 0 0 0 1 0 0 0 0 0 \n", "1 1 0 1 1 0 0 0 0 1 0 \n", "2 1 1 1 1 0 0 0 0 1 0 \n", "3 0 0 0 0 0 0 1 0 0 0 \n", "4 0 0 0 0 0 0 1 0 0 1 \n", "\n", " FSCHLP FSCHP FSEMP FSEXP FSSIP FSSP FWAGP FWKHP FWKLP FWKWP \\\n", "0 0 0 0 0 0 0 0 1 0 1 \n", "1 0 0 1 0 1 1 1 0 1 0 \n", "2 0 0 1 0 1 1 1 0 1 0 \n", "3 0 0 0 0 0 0 0 0 0 0 \n", "4 0 1 0 0 0 0 0 0 0 0 \n", "\n", " FYOEP PWGTP1 PWGTP2 PWGTP3 PWGTP4 PWGTP5 PWGTP6 PWGTP7 PWGTP8 \\\n", "0 0 21 19 20 6 22 23 31 29 \n", "1 0 19 11 23 20 7 16 11 7 \n", "2 0 11 6 25 18 4 15 16 5 \n", "3 0 18 4 24 25 5 12 14 6 \n", "4 0 18 4 25 24 5 11 13 6 \n", "\n", " PWGTP9 PWGTP10 PWGTP11 PWGTP12 PWGTP13 PWGTP14 PWGTP15 PWGTP16 \\\n", "0 7 6 39 21 6 29 30 22 \n", "1 20 4 21 14 16 27 11 20 \n", "2 12 5 14 13 20 25 9 19 \n", "3 14 4 10 12 20 28 16 23 \n", "4 14 4 11 11 20 28 15 23 \n", "\n", " PWGTP17 PWGTP18 PWGTP19 PWGTP20 PWGTP21 PWGTP22 PWGTP23 PWGTP24 \\\n", "0 20 7 22 21 18 17 26 44 \n", "1 19 15 18 26 13 33 4 4 \n", "2 13 16 19 23 12 19 4 4 \n", "3 18 18 22 23 18 23 4 3 \n", "4 18 18 22 23 18 23 4 4 \n", "\n", " PWGTP25 PWGTP26 PWGTP27 PWGTP28 PWGTP29 PWGTP30 PWGTP31 PWGTP32 \\\n", "0 18 19 7 5 35 33 7 16 \n", "1 50 16 12 40 25 37 19 17 \n", "2 21 12 12 20 16 21 14 14 \n", "3 23 14 15 25 12 19 14 19 \n", "4 23 14 16 25 12 19 15 19 \n", "\n", " PWGTP33 PWGTP34 PWGTP35 PWGTP36 PWGTP37 PWGTP38 PWGTP39 PWGTP40 \\\n", "0 32 7 5 22 16 34 20 14 \n", "1 10 3 12 3 11 14 5 5 \n", "2 11 4 15 3 11 17 4 3 \n", "3 13 3 16 4 16 17 4 5 \n", "4 13 4 16 3 16 17 4 5 \n", "\n", " PWGTP41 PWGTP42 PWGTP43 PWGTP44 PWGTP45 PWGTP46 PWGTP47 PWGTP48 \\\n", "0 17 16 22 33 18 17 6 6 \n", "1 16 4 28 24 6 19 14 5 \n", "2 13 5 19 21 4 12 14 4 \n", "3 11 5 21 23 6 23 13 4 \n", "4 12 7 21 23 5 23 13 5 \n", "\n", " PWGTP49 PWGTP50 PWGTP51 PWGTP52 PWGTP53 PWGTP54 PWGTP55 PWGTP56 \\\n", "0 41 41 7 17 29 6 6 21 \n", "1 17 3 17 15 12 27 13 33 \n", "2 11 3 19 13 17 25 12 16 \n", "3 15 4 19 16 13 26 11 21 \n", "4 16 3 20 16 13 26 12 22 \n", "\n", " PWGTP57 PWGTP58 PWGTP59 PWGTP60 PWGTP61 PWGTP62 PWGTP63 PWGTP64 \\\n", "0 19 30 19 17 18 24 25 6 \n", "1 15 19 47 18 22 28 6 5 \n", "2 12 15 19 27 16 24 5 4 \n", "3 13 18 22 23 12 26 5 4 \n", "4 13 17 22 22 13 27 5 3 \n", "\n", " PWGTP65 PWGTP66 PWGTP67 PWGTP68 PWGTP69 PWGTP70 PWGTP71 PWGTP72 \\\n", "0 24 24 32 45 5 7 40 21 \n", "1 28 12 12 25 21 15 12 12 \n", "2 24 12 11 19 13 24 11 16 \n", "3 23 14 13 21 15 22 15 13 \n", "4 23 14 13 20 14 22 14 13 \n", "\n", " PWGTP73 PWGTP74 PWGTP75 PWGTP76 PWGTP77 PWGTP78 PWGTP79 PWGTP80 \n", "0 6 33 36 18 24 7 16 16 \n", "1 15 5 14 7 15 13 7 4 \n", "2 14 3 15 3 15 12 3 4 \n", "3 16 5 12 6 12 12 3 3 \n", "4 17 5 13 6 12 13 4 3 \n", "\n", "[5 rows x 291 columns]" ] } ], "prompt_number": 6 }, { "cell_type": "heading", "level": 2, "metadata": {}, "source": [ "Download PUMS for household records for a PUMA" ] }, { "cell_type": "code", "collapsed": false, "input": [ "h_pums = c.download_household_pums(\"06\", puma)\n", "h_pums.head(5)" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Unnamed: 0serialno
0 940390 2012000002680 H 9-9 07506 4 06 1000000 1010207 21 1 1 1NaN 1 2 3 2 0 60 2 2 4 9 200NaN 2 350 2 1 1 5NaN NaN 1 1 NaN 1 1 1 1NaN 500000 1 430 1NaN NaNNaN NaNNaN 1 6 59000 0 4 4 4 1 1 1 6 0NaN 0 0 0 13 0 1 0 0 1 0 1 634 3 0 1 32NaNNaNNaN 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 21 19 21 7 22 23 31 29 6 5 39 21 6 29 30 22 20 6 22 21 17 17 26 44 18 20 7 5 35 33 8 17 32 7 5 23 16 35 20 15 17 16 22 34 18 17 6 6 41 41 6 17 30 6 6 21 20 30 19 17 18 25 25 6 24 24 32 45 5 7 39 21 6 33 36 18 24 7 17 16
1 941064 2012000009189 H 9-9 07506 4 06 1000000 1010207 15 4 1NaNNaN 1 5 4NaN 0 50 2 2 50 1 660NaN 2 490 2 1 1 7NaN NaN 1 1 630 1 1 1 1NaN 40000 2 2000 1 4 10000 2 NaNNaN 4 1 10000 0 2 2 2 1 2 1 5 2 4 0 0 2 101 0 1 0 1 0 0 1 1534 2 0 0 23 0 9 9 0 0 0 1 1 0 0 0 0 1 0 1 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 19 11 23 20 7 16 11 7 20 5 21 14 15 27 11 20 18 15 18 26 13 33 4 4 50 16 13 40 25 37 19 17 10 4 13 3 11 14 6 4 16 4 28 24 6 19 15 5 18 3 17 15 12 27 13 33 14 19 47 18 22 28 5 6 28 13 12 26 21 15 12 12 15 5 13 7 15 13 7 5
2 941824 2012000016466 H 9-9 07506 4 06 1000000 1010207 15 3 1 1NaN 1 2 2 2 0 10 2 2 1 1 NaNNaNNaN NaNNaNNaN 1 4 2 2000 1 1 NaN 1 1 3 1NaN NaN 1 1 2 7 30000 2 2010 80 1 3 30000 0 2 2 2 1 1 1 2 2 3 0 0 2 NaN 0 1 0 1 0 0 1 NaNNaN 1 0NaN 1 13 13 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 4 5 32 14 5 21 21 12 16 6 15 19 20 18 5 13 19 23 25 11 23 21 5 22 24 5 8 12 17 35 24 15 17 10 14 13 16 5 5 18 3 6 27 21 5 25 28 14 10 7 13 15 17 21 4 14 14 22 22 15 31 26 5 17 34 7 4 17 20 24 12 12 14 14 26 18 13 4 5 16
3 941918 2012000017340 H 9-9 07506 4 06 1000000 1010207 10 4 1 1NaN 1 3 3 2 0 100 2 1000 30 3 500NaN 2 3000 2 1 1 4NaN NaN 1 1 NaN 1 1 1 1NaN 500000 1 1200 7 5 60000 4 NaNNaN 4 2 60000 0 4 4 4 1 1 1 4 0 4 0 0 0 79 0 1 1 0 1 0 1 3938 3 0 1 65 2 12 11 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 12 8 3 8 4 6 8 12 17 7 20 11 11 18 14 4 30 23 4 20 10 10 19 9 15 6 12 10 3 12 4 11 8 4 9 18 5 3 18 4 12 10 4 13 3 12 13 10 28 13 20 16 11 17 11 3 18 15 3 13 9 8 14 12 17 10 10 9 5 8 3 13 11 2 9 19 2 3 13 3
4 942803 2012000025664 H 9-9 07506 4 06 1000000 1010207 71 5 1 1NaN 1 3 3 2 0 50 2 2 3 3 200NaN 2 680 2 1 1 6NaN NaN 1 1 NaN 1 1 1 1NaN 500000 2 900 4 7 73500 4 NaNNaN 1 3 73500 0 4 4 4 1 1 2 7 0 5 0 0 0 16 0 1 0 0 1 1 2 989 3 0 1 32 3 13 13 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 1 1 0 0 0 0 0 0 0 0 0 1 1 1 0 1 0 0 0 0 1 0 0 1 33 23 78 66 19 99 88 97 68 29 80 72 50 74 36 57 82 101 77 75 69 93 23 131 105 27 25 83 69 93 65 57 92 74 108 89 128 38 17 55 102 103 44 61 109 29 31 69 66 94 73 66 77 62 104 82 78 38 30 87 37 21 85 63 25 66 93 80 55 29 90 74 86 63 19 70 50 100 115 70
\n", "

5 rows \u00d7 204 columns

\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 7, "text": [ " Unnamed: 0 serialno RT DIVISION PUMA00 PUMA10 REGION ST ADJHSG \\\n", "0 940390 2012000002680 H 9 -9 07506 4 06 1000000 \n", "1 941064 2012000009189 H 9 -9 07506 4 06 1000000 \n", "2 941824 2012000016466 H 9 -9 07506 4 06 1000000 \n", "3 941918 2012000017340 H 9 -9 07506 4 06 1000000 \n", "4 942803 2012000025664 H 9 -9 07506 4 06 1000000 \n", "\n", " ADJINC WGTP NP TYPE ACR AGS BATH BDSP BLD BUS CONP ELEP FS \\\n", "0 1010207 21 1 1 1 NaN 1 2 3 2 0 60 2 \n", "1 1010207 15 4 1 NaN NaN 1 5 4 NaN 0 50 2 \n", "2 1010207 15 3 1 1 NaN 1 2 2 2 0 10 2 \n", "3 1010207 10 4 1 1 NaN 1 3 3 2 0 100 2 \n", "4 1010207 71 5 1 1 NaN 1 3 3 2 0 50 2 \n", "\n", " FULP GASP HFL INSP MHP MRGI MRGP MRGT MRGX REFR RMSP RNTM RNTP \\\n", "0 2 4 9 200 NaN 2 350 2 1 1 5 NaN NaN \n", "1 2 50 1 660 NaN 2 490 2 1 1 7 NaN NaN \n", "2 2 1 1 NaN NaN NaN NaN NaN NaN 1 4 2 2000 \n", "3 1000 30 3 500 NaN 2 3000 2 1 1 4 NaN NaN \n", "4 2 3 3 200 NaN 2 680 2 1 1 6 NaN NaN \n", "\n", " RWAT SINK SMP STOV TEL TEN TOIL VACS VALP VEH WATP YBL FES \\\n", "0 1 1 NaN 1 1 1 1 NaN 500000 1 430 1 NaN \n", "1 1 1 630 1 1 1 1 NaN 40000 2 2000 1 4 \n", "2 1 1 NaN 1 1 3 1 NaN NaN 1 1 2 7 \n", "3 1 1 NaN 1 1 1 1 NaN 500000 1 1200 7 5 \n", "4 1 1 NaN 1 1 1 1 NaN 500000 2 900 4 7 \n", "\n", " FINCP FPARC GRNTP GRPIP HHL HHT HINCP HUGCL HUPAC HUPAOC HUPARC \\\n", "0 NaN NaN NaN NaN 1 6 59000 0 4 4 4 \n", "1 10000 2 NaN NaN 4 1 10000 0 2 2 2 \n", "2 30000 2 2010 80 1 3 30000 0 2 2 2 \n", "3 60000 4 NaN NaN 4 2 60000 0 4 4 4 \n", "4 73500 4 NaN NaN 1 3 73500 0 4 4 4 \n", "\n", " KIT LNGI MULTG MV NOC NPF NPP NR NRC OCPIP PARTNER PLM PSF \\\n", "0 1 1 1 6 0 NaN 0 0 0 13 0 1 0 \n", "1 1 2 1 5 2 4 0 0 2 101 0 1 0 \n", "2 1 1 1 2 2 3 0 0 2 NaN 0 1 0 \n", "3 1 1 1 4 0 4 0 0 0 79 0 1 1 \n", "4 1 1 2 7 0 5 0 0 0 16 0 1 0 \n", "\n", " R18 R60 R65 RESMODE SMOCP SMX SRNT SVAL TAXP WIF WKEXREL \\\n", "0 0 1 0 1 634 3 0 1 32 NaN NaN \n", "1 1 0 0 1 1534 2 0 0 23 0 9 \n", "2 1 0 0 1 NaN NaN 1 0 NaN 1 13 \n", "3 0 1 0 1 3938 3 0 1 65 2 12 \n", "4 0 1 1 2 989 3 0 1 32 3 13 \n", "\n", " WORKSTAT FACRP FAGSP FBATHP FBDSP FBLDP FBUSP FCONP FELEP FFSP \\\n", "0 NaN 0 0 0 0 0 0 0 0 0 \n", "1 9 0 0 0 1 1 0 0 0 0 \n", "2 13 1 0 0 0 0 1 0 0 0 \n", "3 11 1 0 0 0 0 1 0 0 0 \n", "4 13 0 0 0 0 0 0 0 0 0 \n", "\n", " FFULP FGASP FHFLP FINSP FKITP FMHP FMRGIP FMRGP FMRGTP FMRGXP \\\n", "0 0 1 0 0 0 0 0 0 0 0 \n", "1 1 0 1 0 0 0 0 1 0 0 \n", "2 0 0 0 0 0 0 0 0 0 0 \n", "3 0 0 0 0 0 0 0 0 0 0 \n", "4 0 0 0 1 0 0 1 1 1 0 \n", "\n", " FMVP FPLMP FREFRP FRMSP FRNTMP FRNTP FRWATP FSINKP FSMP FSMXHP \\\n", "0 0 0 0 0 0 0 0 0 0 0 \n", "1 0 0 0 1 0 0 0 0 1 0 \n", "2 0 0 0 0 0 0 0 0 0 0 \n", "3 1 0 0 0 0 0 0 0 0 0 \n", "4 0 0 0 0 0 0 0 0 1 1 \n", "\n", " FSMXSP FSTOVP FTAXP FTELP FTENP FTOILP FVACSP FVALP FVEHP FWATP \\\n", "0 0 0 0 0 0 0 0 0 0 0 \n", "1 0 0 1 0 0 0 0 0 0 0 \n", "2 0 0 0 0 0 0 0 0 0 0 \n", "3 0 0 0 0 0 0 0 0 0 0 \n", "4 1 0 1 0 0 0 0 1 0 0 \n", "\n", " FYBLP WGTP1 WGTP2 WGTP3 WGTP4 WGTP5 WGTP6 WGTP7 WGTP8 WGTP9 \\\n", "0 0 21 19 21 7 22 23 31 29 6 \n", "1 0 19 11 23 20 7 16 11 7 20 \n", "2 0 4 5 32 14 5 21 21 12 16 \n", "3 0 12 8 3 8 4 6 8 12 17 \n", "4 1 33 23 78 66 19 99 88 97 68 \n", "\n", " WGTP10 WGTP11 WGTP12 WGTP13 WGTP14 WGTP15 WGTP16 WGTP17 WGTP18 \\\n", "0 5 39 21 6 29 30 22 20 6 \n", "1 5 21 14 15 27 11 20 18 15 \n", "2 6 15 19 20 18 5 13 19 23 \n", "3 7 20 11 11 18 14 4 30 23 \n", "4 29 80 72 50 74 36 57 82 101 \n", "\n", " WGTP19 WGTP20 WGTP21 WGTP22 WGTP23 WGTP24 WGTP25 WGTP26 WGTP27 \\\n", "0 22 21 17 17 26 44 18 20 7 \n", "1 18 26 13 33 4 4 50 16 13 \n", "2 25 11 23 21 5 22 24 5 8 \n", "3 4 20 10 10 19 9 15 6 12 \n", "4 77 75 69 93 23 131 105 27 25 \n", "\n", " WGTP28 WGTP29 WGTP30 WGTP31 WGTP32 WGTP33 WGTP34 WGTP35 WGTP36 \\\n", "0 5 35 33 8 17 32 7 5 23 \n", "1 40 25 37 19 17 10 4 13 3 \n", "2 12 17 35 24 15 17 10 14 13 \n", "3 10 3 12 4 11 8 4 9 18 \n", "4 83 69 93 65 57 92 74 108 89 \n", "\n", " WGTP37 WGTP38 WGTP39 WGTP40 WGTP41 WGTP42 WGTP43 WGTP44 WGTP45 \\\n", "0 16 35 20 15 17 16 22 34 18 \n", "1 11 14 6 4 16 4 28 24 6 \n", "2 16 5 5 18 3 6 27 21 5 \n", "3 5 3 18 4 12 10 4 13 3 \n", "4 128 38 17 55 102 103 44 61 109 \n", "\n", " WGTP46 WGTP47 WGTP48 WGTP49 WGTP50 WGTP51 WGTP52 WGTP53 WGTP54 \\\n", "0 17 6 6 41 41 6 17 30 6 \n", "1 19 15 5 18 3 17 15 12 27 \n", "2 25 28 14 10 7 13 15 17 21 \n", "3 12 13 10 28 13 20 16 11 17 \n", "4 29 31 69 66 94 73 66 77 62 \n", "\n", " WGTP55 WGTP56 WGTP57 WGTP58 WGTP59 WGTP60 WGTP61 WGTP62 WGTP63 \\\n", "0 6 21 20 30 19 17 18 25 25 \n", "1 13 33 14 19 47 18 22 28 5 \n", "2 4 14 14 22 22 15 31 26 5 \n", "3 11 3 18 15 3 13 9 8 14 \n", "4 104 82 78 38 30 87 37 21 85 \n", "\n", " WGTP64 WGTP65 WGTP66 WGTP67 WGTP68 WGTP69 WGTP70 WGTP71 WGTP72 \\\n", "0 6 24 24 32 45 5 7 39 21 \n", "1 6 28 13 12 26 21 15 12 12 \n", "2 17 34 7 4 17 20 24 12 12 \n", "3 12 17 10 10 9 5 8 3 13 \n", "4 63 25 66 93 80 55 29 90 74 \n", "\n", " WGTP73 WGTP74 WGTP75 WGTP76 WGTP77 WGTP78 WGTP79 WGTP80 \n", "0 6 33 36 18 24 7 17 16 \n", "1 15 5 13 7 15 13 7 5 \n", "2 14 14 26 18 13 4 5 16 \n", "3 11 2 9 19 2 3 13 3 \n", "4 86 63 19 70 50 100 115 70 \n", "\n", "[5 rows x 204 columns]" ] } ], "prompt_number": 7 }, { "cell_type": "heading", "level": 2, "metadata": {}, "source": [ "Now the job is to categorize acs and pums into the same categories - we start with the household acs data" ] }, { "cell_type": "code", "collapsed": false, "input": [ "h_acs_cat = cat.categorize(h_acs, {\n", " (\"households\", \"total\"): \"B11001_001E\",\n", " (\"children\", \"yes\"): \"B11001_002E\",\n", " (\"children\", \"no\"): \"B11001_001E - B11001_002E\",\n", " (\"income\", \"lt35\"): \"B19001_002E + B19001_003E + B19001_004E + \"\n", " \"B19001_005E + B19001_006E + B19001_007E\",\n", " (\"income\", \"gt35-lt100\"): \"B19001_008E + B19001_009E + \"\n", " \"B19001_010E + B19001_011E + B19001_012E\"\n", " \"+ B19001_013E\",\n", " (\"income\", \"gt100\"): \"B19001_014E + B19001_015E + B19001_016E\"\n", " \"+ B19001_017E\",\n", " (\"cars\", \"none\"): \"B08201_002E\",\n", " (\"cars\", \"one\"): \"B08201_003E\",\n", " (\"cars\", \"two or more\"): \"B08201_004E + B08201_005E + B08201_006E\",\n", " (\"workers\", \"none\"): \"B08202_002E\",\n", " (\"workers\", \"one\"): \"B08202_003E\",\n", " (\"workers\", \"two or more\"): \"B08202_004E + B08202_005E\" \n", "}, index_cols=['NAME'])\n", "h_acs_cat" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
cat_namecarschildrenhouseholdsincomeworkers
cat_valuenoneonetwo or morenoyestotalgt100gt35-lt100lt35noneonetwo or more
NAME
Block Group 1, Census Tract 306, San Francisco County, California 14 86 192 111 183 294 173 73 48 65 89 138
Block Group 2, Census Tract 306, San Francisco County, California 11 66 147 88 138 226 156 20 50 50 68 106
Block Group 3, Census Tract 306, San Francisco County, California 14 84 187 50 237 287 194 40 53 64 87 134
\n", "

3 rows \u00d7 12 columns

\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 8, "text": [ "cat_name cars \\\n", "cat_value none one \n", "NAME \n", "Block Group 1, Census Tract 306, San Francisco County, California 14 86 \n", "Block Group 2, Census Tract 306, San Francisco County, California 11 66 \n", "Block Group 3, Census Tract 306, San Francisco County, California 14 84 \n", "\n", "cat_name \\\n", "cat_value two or more \n", "NAME \n", "Block Group 1, Census Tract 306, San Francisco County, California 192 \n", "Block Group 2, Census Tract 306, San Francisco County, California 147 \n", "Block Group 3, Census Tract 306, San Francisco County, California 187 \n", "\n", "cat_name children \\\n", "cat_value no \n", "NAME \n", "Block Group 1, Census Tract 306, San Francisco County, California 111 \n", "Block Group 2, Census Tract 306, San Francisco County, California 88 \n", "Block Group 3, Census Tract 306, San Francisco County, California 50 \n", "\n", "cat_name \\\n", "cat_value yes \n", "NAME \n", "Block Group 1, Census Tract 306, San Francisco County, California 183 \n", "Block Group 2, Census Tract 306, San Francisco County, California 138 \n", "Block Group 3, Census Tract 306, San Francisco County, California 237 \n", "\n", "cat_name households \\\n", "cat_value total \n", "NAME \n", "Block Group 1, Census Tract 306, San Francisco County, California 294 \n", "Block Group 2, Census Tract 306, San Francisco County, California 226 \n", "Block Group 3, Census Tract 306, San Francisco County, California 287 \n", "\n", "cat_name income \\\n", "cat_value gt100 \n", "NAME \n", "Block Group 1, Census Tract 306, San Francisco County, California 173 \n", "Block Group 2, Census Tract 306, San Francisco County, California 156 \n", "Block Group 3, Census Tract 306, San Francisco County, California 194 \n", "\n", "cat_name \\\n", "cat_value gt35-lt100 \n", "NAME \n", "Block Group 1, Census Tract 306, San Francisco County, California 73 \n", "Block Group 2, Census Tract 306, San Francisco County, California 20 \n", "Block Group 3, Census Tract 306, San Francisco County, California 40 \n", "\n", "cat_name \\\n", "cat_value lt35 \n", "NAME \n", "Block Group 1, Census Tract 306, San Francisco County, California 48 \n", "Block Group 2, Census Tract 306, San Francisco County, California 50 \n", "Block Group 3, Census Tract 306, San Francisco County, California 53 \n", "\n", "cat_name workers \\\n", "cat_value none \n", "NAME \n", "Block Group 1, Census Tract 306, San Francisco County, California 65 \n", "Block Group 2, Census Tract 306, San Francisco County, California 50 \n", "Block Group 3, Census Tract 306, San Francisco County, California 64 \n", "\n", "cat_name \\\n", "cat_value one \n", "NAME \n", "Block Group 1, Census Tract 306, San Francisco County, California 89 \n", "Block Group 2, Census Tract 306, San Francisco County, California 68 \n", "Block Group 3, Census Tract 306, San Francisco County, California 87 \n", "\n", "cat_name \n", "cat_value two or more \n", "NAME \n", "Block Group 1, Census Tract 306, San Francisco County, California 138 \n", "Block Group 2, Census Tract 306, San Francisco County, California 106 \n", "Block Group 3, Census Tract 306, San Francisco County, California 134 \n", "\n", "[3 rows x 12 columns]" ] } ], "prompt_number": 8 }, { "cell_type": "code", "collapsed": false, "input": [ "assert np.all(cat.sum_accross_category(h_acs_cat) < 2)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 9 }, { "cell_type": "heading", "level": 2, "metadata": {}, "source": [ "And the same for ACS population - the output of the categorization is the MARGINALS for each variable category" ] }, { "cell_type": "code", "collapsed": false, "input": [ "p_acs_cat = cat.categorize(p_acs, {\n", " (\"population\", \"total\"): \"B01001_001E\",\n", " (\"age\", \"19 and under\"): \"B01001_003E + B01001_004E + B01001_005E + \"\n", " \"B01001_006E + B01001_007E + B01001_027E + \"\n", " \"B01001_028E + B01001_029E + B01001_030E + \"\n", " \"B01001_031E\",\n", " (\"age\", \"20 to 35\"): \"B01001_008E + B01001_009E + B01001_010E + \"\n", " \"B01001_011E + B01001_012E + B01001_032E + \"\n", " \"B01001_033E + B01001_034E + B01001_035E + \"\n", " \"B01001_036E\",\n", " (\"age\", \"35 to 60\"): \"B01001_013E + B01001_014E + B01001_015E + \"\n", " \"B01001_016E + B01001_017E + B01001_037E + \"\n", " \"B01001_038E + B01001_039E + B01001_040E + \"\n", " \"B01001_041E\",\n", " (\"age\", \"above 60\"): \"B01001_018E + B01001_019E + B01001_020E + \"\n", " \"B01001_021E + B01001_022E + B01001_023E + \"\n", " \"B01001_024E + B01001_025E + B01001_042E + \"\n", " \"B01001_043E + B01001_044E + B01001_045E + \"\n", " \"B01001_046E + B01001_047E + B01001_048E + \"\n", " \"B01001_049E\", \n", " (\"race\", \"white\"): \"B02001_002E\",\n", " (\"race\", \"black\"): \"B02001_003E\",\n", " (\"race\", \"asian\"): \"B02001_005E\",\n", " (\"race\", \"other\"): \"B02001_004E + B02001_006E + B02001_007E + \"\n", " \"B02001_008E\",\n", " (\"sex\", \"male\"): \"B01001_002E\",\n", " (\"sex\", \"female\"): \"B01001_026E\"\n", "}, index_cols=['NAME'])\n", "p_acs_cat" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
cat_nameagepopulationracesex
cat_value19 and under20 to 3535 to 60above 60totalasianblackotherwhitefemalemale
NAME
Block Group 1, Census Tract 306, San Francisco County, California 129 62 296 168 655 187 11 34 423 334 321
Block Group 2, Census Tract 306, San Francisco County, California 97 34 288 109 528 11 9 60 448 292 236
Block Group 3, Census Tract 306, San Francisco County, California 203 78 411 166 858 218 11 6 623 365 493
\n", "

3 rows \u00d7 11 columns

\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 10, "text": [ "cat_name age \\\n", "cat_value 19 and under \n", "NAME \n", "Block Group 1, Census Tract 306, San Francisco County, California 129 \n", "Block Group 2, Census Tract 306, San Francisco County, California 97 \n", "Block Group 3, Census Tract 306, San Francisco County, California 203 \n", "\n", "cat_name \\\n", "cat_value 20 to 35 \n", "NAME \n", "Block Group 1, Census Tract 306, San Francisco County, California 62 \n", "Block Group 2, Census Tract 306, San Francisco County, California 34 \n", "Block Group 3, Census Tract 306, San Francisco County, California 78 \n", "\n", "cat_name \\\n", "cat_value 35 to 60 \n", "NAME \n", "Block Group 1, Census Tract 306, San Francisco County, California 296 \n", "Block Group 2, Census Tract 306, San Francisco County, California 288 \n", "Block Group 3, Census Tract 306, San Francisco County, California 411 \n", "\n", "cat_name \\\n", "cat_value above 60 \n", "NAME \n", "Block Group 1, Census Tract 306, San Francisco County, California 168 \n", "Block Group 2, Census Tract 306, San Francisco County, California 109 \n", "Block Group 3, Census Tract 306, San Francisco County, California 166 \n", "\n", "cat_name population \\\n", "cat_value total \n", "NAME \n", "Block Group 1, Census Tract 306, San Francisco County, California 655 \n", "Block Group 2, Census Tract 306, San Francisco County, California 528 \n", "Block Group 3, Census Tract 306, San Francisco County, California 858 \n", "\n", "cat_name race \\\n", "cat_value asian \n", "NAME \n", "Block Group 1, Census Tract 306, San Francisco County, California 187 \n", "Block Group 2, Census Tract 306, San Francisco County, California 11 \n", "Block Group 3, Census Tract 306, San Francisco County, California 218 \n", "\n", "cat_name \\\n", "cat_value black \n", "NAME \n", "Block Group 1, Census Tract 306, San Francisco County, California 11 \n", "Block Group 2, Census Tract 306, San Francisco County, California 9 \n", "Block Group 3, Census Tract 306, San Francisco County, California 11 \n", "\n", "cat_name \\\n", "cat_value other \n", "NAME \n", "Block Group 1, Census Tract 306, San Francisco County, California 34 \n", "Block Group 2, Census Tract 306, San Francisco County, California 60 \n", "Block Group 3, Census Tract 306, San Francisco County, California 6 \n", "\n", "cat_name \\\n", "cat_value white \n", "NAME \n", "Block Group 1, Census Tract 306, San Francisco County, California 423 \n", "Block Group 2, Census Tract 306, San Francisco County, California 448 \n", "Block Group 3, Census Tract 306, San Francisco County, California 623 \n", "\n", "cat_name sex \\\n", "cat_value female \n", "NAME \n", "Block Group 1, Census Tract 306, San Francisco County, California 334 \n", "Block Group 2, Census Tract 306, San Francisco County, California 292 \n", "Block Group 3, Census Tract 306, San Francisco County, California 365 \n", "\n", "cat_name \n", "cat_value male \n", "NAME \n", "Block Group 1, Census Tract 306, San Francisco County, California 321 \n", "Block Group 2, Census Tract 306, San Francisco County, California 236 \n", "Block Group 3, Census Tract 306, San Francisco County, California 493 \n", "\n", "[3 rows x 11 columns]" ] } ], "prompt_number": 10 }, { "cell_type": "code", "collapsed": false, "input": [ "assert np.all(cat.sum_accross_category(p_acs_cat) < 2)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 11 }, { "cell_type": "heading", "level": 2, "metadata": {}, "source": [ "To get the marginals a series for one geography do this" ] }, { "cell_type": "code", "collapsed": false, "input": [ "p_acs_cat.iloc[0].transpose()" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 12, "text": [ "cat_name cat_value \n", "age 19 and under 129\n", " 20 to 35 62\n", " 35 to 60 296\n", " above 60 168\n", "population total 655\n", "race asian 187\n", " black 11\n", " other 34\n", " white 423\n", "sex female 334\n", " male 321\n", "Name: Block Group 1, Census Tract 306, San Francisco County, California, dtype: int64" ] } ], "prompt_number": 12 }, { "cell_type": "heading", "level": 2, "metadata": {}, "source": [ "Now categorize the PUMS population data into the same categories" ] }, { "cell_type": "code", "collapsed": false, "input": [ "def age_cat(r):\n", " if r.AGEP <= 19: return \"19 and under\"\n", " elif r.AGEP <= 35: return \"20 to 35\"\n", " elif r.AGEP <= 60: return \"35 to 60\"\n", " return \"above 60\"\n", "\n", "def race_cat(r):\n", " if r.RAC1P == 1: return \"white\"\n", " elif r.RAC1P == 2: return \"black\"\n", " elif r.RAC1P == 6: return \"asian\"\n", " return \"other\"\n", "\n", "def sex_cat(r):\n", " if r.SEX == 1: return \"male\"\n", " return \"female\"\n", "\n", "_, jd_persons = cat.joint_distribution(\n", " p_pums,\n", " cat.category_combinations(p_acs_cat.columns),\n", " {\"age\": age_cat, \"race\": race_cat, \"sex\": sex_cat}\n", ")\n", "jd_persons " ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idfrequency
ageracesex
19 and underasianfemale 0 37
male 1 44
blackfemale 2 1
male 3 5
otherfemale 4 21
male 5 25
whitefemale 6 42
male 7 35
20 to 35asianfemale 8 42
male 9 47
blackfemale 10 6
male 11 6
otherfemale 12 12
male 13 13
whitefemale 14 44
male 15 43
35 to 60asianfemale 16 96
male 17 81
blackfemale 18 10
male 19 7
otherfemale 20 22
male 21 17
whitefemale 22 68
male 23 88
above 60asianfemale 24 56
male 25 38
blackfemale 26 14
male 27 8
otherfemale 28 7
male 29 5
whitefemale 30 64
male 31 59
\n", "

32 rows \u00d7 2 columns

\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 13, "text": [ " id frequency\n", "age race sex \n", "19 and under asian female 0 37\n", " male 1 44\n", " black female 2 1\n", " male 3 5\n", " other female 4 21\n", " male 5 25\n", " white female 6 42\n", " male 7 35\n", "20 to 35 asian female 8 42\n", " male 9 47\n", " black female 10 6\n", " male 11 6\n", " other female 12 12\n", " male 13 13\n", " white female 14 44\n", " male 15 43\n", "35 to 60 asian female 16 96\n", " male 17 81\n", " black female 18 10\n", " male 19 7\n", " other female 20 22\n", " male 21 17\n", " white female 22 68\n", " male 23 88\n", "above 60 asian female 24 56\n", " male 25 38\n", " black female 26 14\n", " male 27 8\n", " other female 28 7\n", " male 29 5\n", " white female 30 64\n", " male 31 59\n", "\n", "[32 rows x 2 columns]" ] } ], "prompt_number": 13 }, { "cell_type": "heading", "level": 2, "metadata": {}, "source": [ "Do the same for households - the output of this step is the JOINT DISTRIBUTIONS for the cross product of all possible categories" ] }, { "cell_type": "code", "collapsed": false, "input": [ "def cars_cat(r):\n", " if r.VEH == 0: return \"none\"\n", " elif r.VEH == 1: return \"one\"\n", " return \"two or more\"\n", "\n", "def children_cat(r):\n", " if r.NOC > 0: return \"yes\"\n", " return \"no\"\n", "\n", "def income_cat(r):\n", " if r.FINCP > 100000: return \"gt100\"\n", " elif r.FINCP > 35000: return \"gt35-lt100\"\n", " return \"lt35\"\n", "\n", "def workers_cat(r):\n", " if r.WIF == 3: return \"two or more\"\n", " elif r.WIF == 2: return \"two or more\"\n", " elif r.WIF == 1: return \"one\"\n", " return \"none\"\n", "\n", "_, jd_households = cat.joint_distribution(\n", " h_pums,\n", " cat.category_combinations(h_acs_cat.columns),\n", " {\"cars\": cars_cat, \"children\": children_cat, \n", " \"income\": income_cat, \"workers\": workers_cat}\n", ")\n", "jd_households" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idfrequency
carsworkerschildrenincome
nonenonenogt100 0 0
gt35-lt100 1 1
lt35 2 37
yesgt100 3 0
gt35-lt100 4 0
lt35 5 0
onenogt100 6 0
gt35-lt100 7 2
lt35 8 2
yesgt100 9 0
gt35-lt100 10 1
lt35 11 0
two or morenogt100 12 2
gt35-lt100 13 5
lt35 14 0
yesgt100 15 0
gt35-lt100 16 1
lt35 17 0
onenonenogt100 18 1
gt35-lt100 19 7
lt35 20 81
yesgt100 21 0
gt35-lt100 22 0
lt35 23 0
onenogt100 24 5
gt35-lt100 25 6
lt35 26 2
yesgt100 27 3
gt35-lt100 28 6
lt35 29 8
two or morenogt100 30 8
gt35-lt100 31 10
lt35 32 1
yesgt100 33 6
gt35-lt100 34 7
lt35 35 1
two or morenonenogt100 36 2
gt35-lt100 37 8
lt35 38 91
yesgt100 39 0
gt35-lt100 40 0
lt35 41 1
onenogt100 42 5
gt35-lt100 43 8
lt35 44 5
yesgt100 45 6
gt35-lt100 46 6
lt35 47 0
two or morenogt100 48 36
gt35-lt100 49 22
lt35 50 5
yesgt100 51 36
gt35-lt100 52 10
lt35 53 0
\n", "

54 rows \u00d7 2 columns

\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 14, "text": [ " id frequency\n", "cars workers children income \n", "none none no gt100 0 0\n", " gt35-lt100 1 1\n", " lt35 2 37\n", " yes gt100 3 0\n", " gt35-lt100 4 0\n", " lt35 5 0\n", " one no gt100 6 0\n", " gt35-lt100 7 2\n", " lt35 8 2\n", " yes gt100 9 0\n", " gt35-lt100 10 1\n", " lt35 11 0\n", " two or more no gt100 12 2\n", " gt35-lt100 13 5\n", " lt35 14 0\n", " yes gt100 15 0\n", " gt35-lt100 16 1\n", " lt35 17 0\n", "one none no gt100 18 1\n", " gt35-lt100 19 7\n", " lt35 20 81\n", " yes gt100 21 0\n", " gt35-lt100 22 0\n", " lt35 23 0\n", " one no gt100 24 5\n", " gt35-lt100 25 6\n", " lt35 26 2\n", " yes gt100 27 3\n", " gt35-lt100 28 6\n", " lt35 29 8\n", " two or more no gt100 30 8\n", " gt35-lt100 31 10\n", " lt35 32 1\n", " yes gt100 33 6\n", " gt35-lt100 34 7\n", " lt35 35 1\n", "two or more none no gt100 36 2\n", " gt35-lt100 37 8\n", " lt35 38 91\n", " yes gt100 39 0\n", " gt35-lt100 40 0\n", " lt35 41 1\n", " one no gt100 42 5\n", " gt35-lt100 43 8\n", " lt35 44 5\n", " yes gt100 45 6\n", " gt35-lt100 46 6\n", " lt35 47 0\n", " two or more no gt100 48 36\n", " gt35-lt100 49 22\n", " lt35 50 5\n", " yes gt100 51 36\n", " gt35-lt100 52 10\n", " lt35 53 0\n", "\n", "[54 rows x 2 columns]" ] } ], "prompt_number": 14 }, { "cell_type": "heading", "level": 2, "metadata": {}, "source": [ "With marginals (aggregate, from ACS) and joint distribution (disaggregate, from PUMS) we're ready for some synthesis" ] }, { "cell_type": "code", "collapsed": false, "input": [ "\"TBD\"" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 16, "text": [ "'TBD'" ] } ], "prompt_number": 16 }, { "cell_type": "code", "collapsed": false, "input": [], "language": "python", "metadata": {}, "outputs": [] } ], "metadata": {} } ] }