{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Sample workflow: 2000 block group parts to 2010 tracts\n", "\n", "## Starting from a subset of 2010 Delaware blocks\n", "\n", "For further background information see:\n", "* **Schroeder, J. P**. 2007. *Target-density weighting interpolation and uncertainty evaluation for temporal analysis of census data*. Geographical Analysis 39 (3):311–335.\n", "\n", "#### NHGIS [block crosswalks](https://www.nhgis.org/user-resources/geographic-crosswalks)" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "ExecuteTime": { "end_time": "2020-06-21T01:22:31.407006Z", "start_time": "2020-06-21T01:22:31.388628Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "2020-06-20T21:22:31-04:00\n", "\n", "CPython 3.7.6\n", "IPython 7.15.0\n", "\n", "compiler : Clang 9.0.1 \n", "system : Darwin\n", "release : 19.5.0\n", "machine : x86_64\n", "processor : i386\n", "CPU cores : 8\n", "interpreter: 64bit\n" ] } ], "source": [ "%load_ext watermark\n", "%watermark" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "ExecuteTime": { "end_time": "2020-06-21T01:22:31.667253Z", "start_time": "2020-06-21T01:22:31.409541Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "watermark 2.0.2\n", "nhgisxwalk 0.0.4\n", "pandas 1.0.4\n", "\n" ] } ], "source": [ "import nhgisxwalk\n", "import inspect\n", "import pandas\n", "\n", "%load_ext autoreload\n", "%autoreload 2\n", "%watermark -w\n", "%watermark -iv" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Source and target years for the crosswalk" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "ExecuteTime": { "end_time": "2020-06-21T01:22:31.688421Z", "start_time": "2020-06-21T01:22:31.669855Z" } }, "outputs": [], "source": [ "source_year, target_year = \"2000\", \"2010\"" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Source-target building base" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "ExecuteTime": { "end_time": "2020-06-21T01:22:31.755998Z", "start_time": "2020-06-21T01:22:31.690527Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
GJOIN2000GJOIN2010WEIGHTPAREA
0G10000100401001000G100001004010010001.0000001.000000
1G10000100401001001G100001004010010010.9999810.999988
2G10000100401001001G100001004010010030.0000190.000012
3G10000100401001002G100001004010010021.0000001.000000
4G10000100401001003G100001004010010031.0000001.000000
\n", "
" ], "text/plain": [ " GJOIN2000 GJOIN2010 WEIGHT PAREA\n", "0 G10000100401001000 G10000100401001000 1.000000 1.000000\n", "1 G10000100401001001 G10000100401001001 0.999981 0.999988\n", "2 G10000100401001001 G10000100401001003 0.000019 0.000012\n", "3 G10000100401001002 G10000100401001002 1.000000 1.000000\n", "4 G10000100401001003 G10000100401001003 1.000000 1.000000" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "subset_data_dir = \"../testing_data_subsets\"\n", "base_xwalk_name = \"/nhgis_blk%s_blk%s_gj.csv.zip\" % (source_year, target_year)\n", "base_xwalk_file = subset_data_dir + base_xwalk_name\n", "data_types = nhgisxwalk.str_types([\"GJOIN%s\"%source_year, \"GJOIN%s\"%target_year])\n", "base_xwalk = pandas.read_csv(base_xwalk_file, index_col=0, dtype=data_types)\n", "base_xwalk.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Convenience code shorthand/lookup" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "ExecuteTime": { "end_time": "2020-06-21T01:22:31.778102Z", "start_time": "2020-06-21T01:22:31.757835Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "def valid_geo_shorthand(shorthand_name=True):\n", " \"\"\"Shorthand lookups for census geographies.\"\"\"\n", " lookup = {\n", " \"blk\": \"block\",\n", " \"bgp\": \"block group part\",\n", " \"bkg\": \"block group\",\n", " \"trt\": \"tract\",\n", " \"cty\": \"county\",\n", " }\n", " if not shorthand_name:\n", " lookup = {v: k for k, v in lookup.items()}\n", " return lookup\n", "\n" ] } ], "source": [ "print(inspect.getsource(nhgisxwalk.valid_geo_shorthand))" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "ExecuteTime": { "end_time": "2020-06-21T01:22:31.799993Z", "start_time": "2020-06-21T01:22:31.779860Z" } }, "outputs": [ { "data": { "text/plain": [ "{'block': 'blk',\n", " 'block group part': 'bgp',\n", " 'block group': 'bkg',\n", " 'tract': 'trt',\n", " 'county': 'cty'}" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "nhgisxwalk.valid_geo_shorthand(shorthand_name=False)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Instantiate an `nhgisxwalk.GeoCrossWalk` object\n", "##### see [nhgisxwalk.GeoCrossWalk](https://github.com/jGaboardi/nhgisxwalk/blob/92b4fe55de0a9c53d0315dcda8ec121faaf20aef/nhgisxwalk/geocrosswalk.py#L19) for full details" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "ExecuteTime": { "end_time": "2020-06-21T01:22:31.822916Z", "start_time": "2020-06-21T01:22:31.801539Z" } }, "outputs": [ { "data": { "text/plain": [ "{'Persons': {'Persons': 'Universe',\n", " 'NP001A': 'Source code',\n", " 'FXS': 'NHGIS code',\n", " 'Total': 'FXS001'},\n", " 'Families': {'Families': 'Universe',\n", " 'NP031A': 'Source code',\n", " 'F2V': 'NHGIS code',\n", " 'Total': 'F2V001'},\n", " 'Households': {'Households': 'Universe',\n", " 'NP010A': 'Source code',\n", " 'FY4': 'NHGIS code',\n", " 'Total': 'FY4001'},\n", " 'Housing Units': {'Housing Units': 'Universe',\n", " 'NH001A': 'Source code',\n", " 'FV5': 'NHGIS code',\n", " 'Total': 'FV5001'}}" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "nhgisxwalk.desc_code_2000_SF1b" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "ExecuteTime": { "end_time": "2020-06-21T01:22:31.847713Z", "start_time": "2020-06-21T01:22:31.826537Z" } }, "outputs": [ { "data": { "text/plain": [ "['FXS001', 'F2V001', 'FY4001', 'FV5001']" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "input_vars = [\n", " nhgisxwalk.desc_code_2000_SF1b[\"Persons\"][\"Total\"],\n", " nhgisxwalk.desc_code_2000_SF1b[\"Families\"][\"Total\"],\n", " nhgisxwalk.desc_code_2000_SF1b[\"Households\"][\"Total\"],\n", " nhgisxwalk.desc_code_2000_SF1b[\"Housing Units\"][\"Total\"]\n", "]\n", "input_vars" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "ExecuteTime": { "end_time": "2020-06-21T01:22:31.869571Z", "start_time": "2020-06-21T01:22:31.849456Z" } }, "outputs": [], "source": [ "input_var_tags = [\"pop\", \"fam\", \"hh\", \"hu\"]" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "ExecuteTime": { "end_time": "2020-06-21T01:22:32.182393Z", "start_time": "2020-06-21T01:22:31.871777Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
bgp2000gjtrt2010gjtrt2010gewt_popwt_famwt_hhwt_hu
0G10000109044444430042202U1G1000010042202100010422021.01.01.01.0
1G10000109044461265042201R1G1000010042201100010422011.01.01.01.0
2G10000109044461265042201U1G1000010042201100010422011.01.01.01.0
3G10000109044461265042201U2G1000010042201100010422011.01.01.01.0
4G10000109044461480042202R2G1000010042202100010422021.01.01.01.0
........................
1038G10000509355299999051500R4G1000050051500100050515001.01.01.01.0
1039G10000509355299999051500U1G1000050051500100050515001.01.01.01.0
1040G10000509355299999051500U3G1000050051500100050515001.01.01.01.0
1041G10000509355299999051500U4G1000050051500100050515001.01.01.01.0
1042G34003301061010600020400U2G1000030990100100039901000.00.00.00.0
\n", "

1043 rows × 7 columns

\n", "
" ], "text/plain": [ " bgp2000gj trt2010gj trt2010ge wt_pop wt_fam \\\n", "0 G10000109044444430042202U1 G1000010042202 10001042202 1.0 1.0 \n", "1 G10000109044461265042201R1 G1000010042201 10001042201 1.0 1.0 \n", "2 G10000109044461265042201U1 G1000010042201 10001042201 1.0 1.0 \n", "3 G10000109044461265042201U2 G1000010042201 10001042201 1.0 1.0 \n", "4 G10000109044461480042202R2 G1000010042202 10001042202 1.0 1.0 \n", "... ... ... ... ... ... \n", "1038 G10000509355299999051500R4 G1000050051500 10005051500 1.0 1.0 \n", "1039 G10000509355299999051500U1 G1000050051500 10005051500 1.0 1.0 \n", "1040 G10000509355299999051500U3 G1000050051500 10005051500 1.0 1.0 \n", "1041 G10000509355299999051500U4 G1000050051500 10005051500 1.0 1.0 \n", "1042 G34003301061010600020400U2 G1000030990100 10003990100 0.0 0.0 \n", "\n", " wt_hh wt_hu \n", "0 1.0 1.0 \n", "1 1.0 1.0 \n", "2 1.0 1.0 \n", "3 1.0 1.0 \n", "4 1.0 1.0 \n", "... ... ... \n", "1038 1.0 1.0 \n", "1039 1.0 1.0 \n", "1040 1.0 1.0 \n", "1041 1.0 1.0 \n", "1042 0.0 0.0 \n", "\n", "[1043 rows x 7 columns]" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "subset_state = \"10\"\n", "bgp2000_to_trt2010 = nhgisxwalk.GeoCrossWalk(\n", " base_xwalk,\n", " source_year=source_year,\n", " target_year=target_year,\n", " source_geo=\"bgp\",\n", " target_geo=\"trt\",\n", " base_source_table=subset_data_dir+\"/2000_block.csv.zip\",\n", " input_var=input_vars,\n", " weight_var=input_var_tags,\n", " stfips=subset_state,\n", " keep_base=True,\n", " add_geoid=True\n", ")\n", "bgp2000_to_trt2010.xwalk" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Write crosswalk to a `.csv`" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "ExecuteTime": { "end_time": "2020-06-21T01:22:32.212868Z", "start_time": "2020-06-21T01:22:32.184589Z" } }, "outputs": [], "source": [ "state_dir = \"../../crosswalks/nhgis_bgp2000_trt2010_state/\"\n", "nhgisxwalk.xwalk_df_to_csv(\n", " cls=bgp2000_to_trt2010,\n", " path=state_dir\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Read crosswalk from a `.csv`" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "ExecuteTime": { "end_time": "2020-06-21T01:22:32.248147Z", "start_time": "2020-06-21T01:22:32.215631Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
bgp2000gjtrt2010gjtrt2010gewt_popwt_famwt_hhwt_hu
0G10000109044444430042202U1G1000010042202100010422021.01.01.01.0
1G10000109044461265042201R1G1000010042201100010422011.01.01.01.0
2G10000109044461265042201U1G1000010042201100010422011.01.01.01.0
3G10000109044461265042201U2G1000010042201100010422011.01.01.01.0
4G10000109044461480042202R2G1000010042202100010422021.01.01.01.0
........................
1038G10000509355299999051500R4G1000050051500100050515001.01.01.01.0
1039G10000509355299999051500U1G1000050051500100050515001.01.01.01.0
1040G10000509355299999051500U3G1000050051500100050515001.01.01.01.0
1041G10000509355299999051500U4G1000050051500100050515001.01.01.01.0
1042G34003301061010600020400U2G1000030990100100039901000.00.00.00.0
\n", "

1043 rows × 7 columns

\n", "
" ], "text/plain": [ " bgp2000gj trt2010gj trt2010ge wt_pop wt_fam \\\n", "0 G10000109044444430042202U1 G1000010042202 10001042202 1.0 1.0 \n", "1 G10000109044461265042201R1 G1000010042201 10001042201 1.0 1.0 \n", "2 G10000109044461265042201U1 G1000010042201 10001042201 1.0 1.0 \n", "3 G10000109044461265042201U2 G1000010042201 10001042201 1.0 1.0 \n", "4 G10000109044461480042202R2 G1000010042202 10001042202 1.0 1.0 \n", "... ... ... ... ... ... \n", "1038 G10000509355299999051500R4 G1000050051500 10005051500 1.0 1.0 \n", "1039 G10000509355299999051500U1 G1000050051500 10005051500 1.0 1.0 \n", "1040 G10000509355299999051500U3 G1000050051500 10005051500 1.0 1.0 \n", "1041 G10000509355299999051500U4 G1000050051500 10005051500 1.0 1.0 \n", "1042 G34003301061010600020400U2 G1000030990100 10003990100 0.0 0.0 \n", "\n", " wt_hh wt_hu \n", "0 1.0 1.0 \n", "1 1.0 1.0 \n", "2 1.0 1.0 \n", "3 1.0 1.0 \n", "4 1.0 1.0 \n", "... ... ... \n", "1038 1.0 1.0 \n", "1039 1.0 1.0 \n", "1040 1.0 1.0 \n", "1041 1.0 1.0 \n", "1042 0.0 0.0 \n", "\n", "[1043 rows x 7 columns]" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fname = state_dir + bgp2000_to_trt2010.xwalk_name\n", "bgp2000_to_trt2010_df = nhgisxwalk.xwalk_df_from_csv(fname)\n", "bgp2000_to_trt2010_df" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "-----------------------------------------------" ] } ], "metadata": { "_draft": { "nbviewer_url": "https://gist.github.com/9f47e4ec2cc37bce83acf20abfca69d2" }, "gist": { "data": { "description": "sample-workflow.ipynb", "public": true }, "id": "9f47e4ec2cc37bce83acf20abfca69d2" }, "kernelspec": { "display_name": "Python [conda env:nhgis]", "language": "python", "name": "conda-env-nhgis-py" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.6" } }, "nbformat": 4, "nbformat_minor": 4 }