{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# This file is part of the Minnesota Population Center's NHGISXWALK.\n", "# For copyright and licensing information, see the NOTICE and LICENSE files\n", "# in this project's top-level directory, and also on-line at:\n", "# https://github.com/ipums/nhgisxwalk" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Sample workflow: 2000 block group parts to 2010 counties\n", "\n", "## Starting from a subset of 2010 Delaware blocks\n", "\n", "For further background information see:\n", "\n", "* **Schroeder, J. P**. 2007. *Target-density weighting interpolation and uncertainty evaluation for temporal analysis of census data*. Geographical Analysis 39 (3):311–335.\n", "\n", "#### NHGIS [block crosswalks](https://www.nhgis.org/user-resources/geographic-crosswalks)" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "ExecuteTime": { "end_time": "2020-08-19T22:07:53.079308Z", "start_time": "2020-08-19T22:07:52.962003Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "2020-08-19T18:07:53-04:00\n", "\n", "CPython 3.8.5\n", "IPython 7.16.1\n", "\n", "compiler : Clang 10.0.1 \n", "system : Darwin\n", "release : 19.6.0\n", "machine : x86_64\n", "processor : i386\n", "CPU cores : 8\n", "interpreter: 64bit\n" ] } ], "source": [ "%load_ext watermark\n", "%watermark" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "ExecuteTime": { "end_time": "2020-08-19T22:07:53.366088Z", "start_time": "2020-08-19T22:07:53.082279Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "watermark 2.0.2\n", "pandas 1.1.0\n", "nhgisxwalk 0.0.9\n", "\n" ] } ], "source": [ "import nhgisxwalk\n", "import inspect\n", "import pandas\n", "\n", "%load_ext autoreload\n", "%autoreload 2\n", "%watermark -w\n", "%watermark -iv" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Source and target years for the crosswalk" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "ExecuteTime": { "end_time": "2020-08-19T22:07:53.385820Z", "start_time": "2020-08-19T22:07:53.368554Z" } }, "outputs": [], "source": [ "source_year, target_year = \"2000\", \"2010\"\n", "gj_src, gj_trg = \"GJOIN%s\"%source_year, \"GJOIN%s\"%target_year" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Source-target building base" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "ExecuteTime": { "end_time": "2020-08-19T22:07:53.452406Z", "start_time": "2020-08-19T22:07:53.387660Z" } }, "outputs": [ { "data": { "text/html": [ "
| \n", " | GJOIN2000 | \n", "GJOIN2010 | \n", "WEIGHT | \n", "PAREA | \n", "
|---|---|---|---|---|
| 0 | \n", "G10000100401001000 | \n", "G10000100401001000 | \n", "1.000000 | \n", "1.000000 | \n", "
| 1 | \n", "G10000100401001001 | \n", "G10000100401001001 | \n", "0.999981 | \n", "0.999988 | \n", "
| 2 | \n", "G10000100401001001 | \n", "G10000100401001003 | \n", "0.000019 | \n", "0.000012 | \n", "
| 3 | \n", "G10000100401001002 | \n", "G10000100401001002 | \n", "1.000000 | \n", "1.000000 | \n", "
| 4 | \n", "G10000100401001003 | \n", "G10000100401001003 | \n", "1.000000 | \n", "1.000000 | \n", "
| \n", " | bgp2000gj | \n", "co2010gj | \n", "co2010ge | \n", "wt_pop | \n", "wt_fam | \n", "wt_hh | \n", "wt_hu | \n", "
|---|---|---|---|---|---|---|---|
| 0 | \n", "G10000109044444430042202U1 | \n", "G1000010 | \n", "10001 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "
| 1 | \n", "G10000109044461265042201R1 | \n", "G1000010 | \n", "10001 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "
| 2 | \n", "G10000109044461265042201U1 | \n", "G1000010 | \n", "10001 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "
| 3 | \n", "G10000109044461265042201U2 | \n", "G1000010 | \n", "10001 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "
| 4 | \n", "G10000109044461480042202R2 | \n", "G1000010 | \n", "10001 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
| 903 | \n", "G10000509355299999051500R4 | \n", "G1000050 | \n", "10005 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "
| 904 | \n", "G10000509355299999051500U1 | \n", "G1000050 | \n", "10005 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "
| 905 | \n", "G10000509355299999051500U3 | \n", "G1000050 | \n", "10005 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "
| 906 | \n", "G10000509355299999051500U4 | \n", "G1000050 | \n", "10005 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "
| 907 | \n", "G34003301061010600020400U2 | \n", "G1000030 | \n", "10003 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "
908 rows × 7 columns
\n", "| \n", " | bgp2000gj | \n", "co2010gj | \n", "co2010ge | \n", "wt_pop | \n", "wt_fam | \n", "wt_hh | \n", "wt_hu | \n", "
|---|---|---|---|---|---|---|---|
| 0 | \n", "G10000109044444430042202U1 | \n", "G1000010 | \n", "10001 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "
| 1 | \n", "G10000109044461265042201R1 | \n", "G1000010 | \n", "10001 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "
| 2 | \n", "G10000109044461265042201U1 | \n", "G1000010 | \n", "10001 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "
| 3 | \n", "G10000109044461265042201U2 | \n", "G1000010 | \n", "10001 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "
| 4 | \n", "G10000109044461480042202R2 | \n", "G1000010 | \n", "10001 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
| 903 | \n", "G10000509355299999051500R4 | \n", "G1000050 | \n", "10005 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "
| 904 | \n", "G10000509355299999051500U1 | \n", "G1000050 | \n", "10005 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "
| 905 | \n", "G10000509355299999051500U3 | \n", "G1000050 | \n", "10005 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "
| 906 | \n", "G10000509355299999051500U4 | \n", "G1000050 | \n", "10005 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "
| 907 | \n", "G34003301061010600020400U2 | \n", "G1000030 | \n", "10003 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "
908 rows × 7 columns
\n", "