{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Generate national and state-level crosswalks\n", "## 2000 block group parts to 2010 tracts\n", "\n", "### NHGIS [block crosswalks](https://www.nhgis.org/user-resources/geographic-crosswalks)\n", "\n", "**James D. Gaboardi, 06/2020**" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "ExecuteTime": { "end_time": "2020-08-19T22:54:51.123976Z", "start_time": "2020-08-19T22:54:51.012145Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "2020-08-19T18:54:51-04:00\n", "\n", "CPython 3.8.5\n", "IPython 7.16.1\n", "\n", "compiler : Clang 10.0.1 \n", "system : Darwin\n", "release : 19.6.0\n", "machine : x86_64\n", "processor : i386\n", "CPU cores : 8\n", "interpreter: 64bit\n" ] } ], "source": [ "%load_ext watermark\n", "%watermark" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "ExecuteTime": { "end_time": "2020-08-19T22:54:51.413338Z", "start_time": "2020-08-19T22:54:51.126049Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "watermark 2.0.2\n", "numpy 1.19.1\n", "nhgisxwalk 0.0.9\n", "pandas 1.1.0\n", "\n" ] } ], "source": [ "import nhgisxwalk\n", "import inspect\n", "import numpy\n", "import pandas\n", "\n", "%load_ext autoreload\n", "%autoreload 2\n", "%watermark -w\n", "%watermark -iv" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Source and target years for the crosswalk" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "ExecuteTime": { "end_time": "2020-08-19T22:54:51.434593Z", "start_time": "2020-08-19T22:54:51.416152Z" } }, "outputs": [], "source": [ "source_year, target_year = \"2000\", \"2010\"\n", "gj_src, gj_trg = \"GJOIN%s\"%source_year, \"GJOIN%s\"%target_year" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "ExecuteTime": { "end_time": "2020-08-19T22:54:51.454021Z", "start_time": "2020-08-19T22:54:51.436294Z" } }, "outputs": [], "source": [ "data_in = \"../../crosswalks/\"\n", "data_tab = \"../../tabular_data/\"\n", "block_file = \"%s_block\" % source_year" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Source-target building base" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "ExecuteTime": { "end_time": "2020-08-19T22:55:04.783556Z", "start_time": "2020-08-19T22:54:51.455409Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
GJOIN2000GJOIN2010WEIGHTPAREA
0G01000100201001000G010001002010020000.0358970.008988
1G01000100201001000G010001002010020010.2533300.263725
2G01000100201001000G010001002010020020.0000000.000385
3G01000100201001000G010001002010020030.0762970.055430
4G01000100201001000G010001002010020040.0324410.007543
\n", "
" ], "text/plain": [ " GJOIN2000 GJOIN2010 WEIGHT PAREA\n", "0 G01000100201001000 G01000100201002000 0.035897 0.008988\n", "1 G01000100201001000 G01000100201002001 0.253330 0.263725\n", "2 G01000100201001000 G01000100201002002 0.000000 0.000385\n", "3 G01000100201001000 G01000100201002003 0.076297 0.055430\n", "4 G01000100201001000 G01000100201002004 0.032441 0.007543" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "base_xwalk_name = \"nhgis_blk%s_blk%s_gj\" % (source_year, target_year)\n", "data_types = nhgisxwalk.str_types([gj_src, gj_trg])\n", "from_csv_kws = {\"path\": data_in, \"archived\": True, \"remove_unpacked\": True}\n", "read_csv_kws = {\"dtype\": data_types}\n", "base_xwalk = nhgisxwalk.xwalk_df_from_csv(\n", " base_xwalk_name, **from_csv_kws, **read_csv_kws\n", ")\n", "base_xwalk.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Source summary data" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "ExecuteTime": { "end_time": "2020-08-19T22:55:04.807297Z", "start_time": "2020-08-19T22:55:04.786312Z" } }, "outputs": [ { "data": { "text/plain": [ "'../../tabular_data/2000_block/2000_block.csv'" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "base_source_name = \"%s/%s.csv\" % (block_file, block_file)\n", "base_source_file = \"%s%s\" % (data_tab, base_source_name)\n", "base_source_file" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Convenience code shorthand/lookup" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "ExecuteTime": { "end_time": "2020-08-19T22:55:04.832198Z", "start_time": "2020-08-19T22:55:04.809480Z" } }, "outputs": [ { "data": { "text/plain": [ "{'block': 'blk',\n", " 'block group part': 'bgp',\n", " 'block group': 'bg',\n", " 'tract': 'tr',\n", " 'county': 'co'}" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "nhgisxwalk.valid_geo_shorthand(shorthand_name=False)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Set the `nhgisxwalk.GeoCrossWalk` parameters\n", "##### see [nhgisxwalk.GeoCrossWalk](https://github.com/jGaboardi/nhgisxwalk/blob/92b4fe55de0a9c53d0315dcda8ec121faaf20aef/nhgisxwalk/geocrosswalk.py#L19) for full details" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "ExecuteTime": { "end_time": "2020-08-19T22:55:04.858273Z", "start_time": "2020-08-19T22:55:04.837565Z" } }, "outputs": [ { "data": { "text/plain": [ "{'Persons': {'Persons': 'Universe',\n", " 'NP001A': 'Source code',\n", " 'FXS': 'NHGIS code',\n", " 'Total': 'FXS001'},\n", " 'Families': {'Families': 'Universe',\n", " 'NP031A': 'Source code',\n", " 'F2V': 'NHGIS code',\n", " 'Total': 'F2V001'},\n", " 'Households': {'Households': 'Universe',\n", " 'NP010A': 'Source code',\n", " 'FY4': 'NHGIS code',\n", " 'Total': 'FY4001'},\n", " 'Housing Units': {'Housing Units': 'Universe',\n", " 'NH001A': 'Source code',\n", " 'FV5': 'NHGIS code',\n", " 'Total': 'FV5001'}}" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "nhgisxwalk.desc_code_2000_SF1b" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "ExecuteTime": { "end_time": "2020-08-19T22:55:04.881247Z", "start_time": "2020-08-19T22:55:04.861244Z" } }, "outputs": [ { "data": { "text/plain": [ "['FXS001', 'F2V001', 'FY4001', 'FV5001']" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "input_vars = [\n", " nhgisxwalk.desc_code_2000_SF1b[\"Persons\"][\"Total\"],\n", " nhgisxwalk.desc_code_2000_SF1b[\"Families\"][\"Total\"],\n", " nhgisxwalk.desc_code_2000_SF1b[\"Households\"][\"Total\"],\n", " nhgisxwalk.desc_code_2000_SF1b[\"Housing Units\"][\"Total\"]\n", "]\n", "input_vars" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "ExecuteTime": { "end_time": "2020-08-19T22:55:04.900327Z", "start_time": "2020-08-19T22:55:04.883292Z" } }, "outputs": [], "source": [ "input_var_tags = [\"pop\", \"fam\", \"hh\", \"hu\"]" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "ExecuteTime": { "end_time": "2020-08-19T22:55:04.919776Z", "start_time": "2020-08-19T22:55:04.901741Z" } }, "outputs": [], "source": [ "xwalk_args = {\n", " \"source_year\": source_year,\n", " \"target_year\": target_year,\n", " \"source_geo\": \"bgp\",\n", " \"target_geo\": \"tr\",\n", " \"base_source_table\": base_source_file,\n", " \"input_var\": input_vars,\n", " \"weight_var\": input_var_tags,\n", " \"keep_base\": False,\n", " \"add_geoid\": True\n", "}" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Generate data product\n", "1. Create a national crosswalk then split by state \n", "2. Write out all products with `README.txt` files" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "ExecuteTime": { "end_time": "2020-08-19T22:57:10.161550Z", "start_time": "2020-08-19T22:55:04.922285Z" } }, "outputs": [], "source": [ "nhgisxwalk.generate_data_product(base_xwalk, xwalk_args, data_in)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "-----------------------------------------------" ] } ], "metadata": { "_draft": { "nbviewer_url": "https://gist.github.com/9f47e4ec2cc37bce83acf20abfca69d2" }, "gist": { "data": { "description": "sample-workflow.ipynb", "public": true }, "id": "9f47e4ec2cc37bce83acf20abfca69d2" }, "kernelspec": { "display_name": "Python [conda env:nhgis]", "language": "python", "name": "conda-env-nhgis-py" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5" } }, "nbformat": 4, "nbformat_minor": 4 }