{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# This file is part of the Minnesota Population Center's NHGISXWALK.\n", "# For copyright and licensing information, see the NOTICE and LICENSE files\n", "# in this project's top-level directory, and also on-line at:\n", "# https://github.com/ipums/nhgisxwalk" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Sample workflow: 1990 block group parts to 2010 tracts\n", "\n", "## Starting from a subset of 2010 Delaware blocks\n", "\n", "For further background information see:\n", "\n", "* **Schroeder, J. P**. 2007. *Target-density weighting interpolation and uncertainty evaluation for temporal analysis of census data*. Geographical Analysis 39 (3):311–335.\n", "\n", "#### NHGIS [block crosswalks](https://www.nhgis.org/user-resources/geographic-crosswalks)" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "ExecuteTime": { "end_time": "2020-08-19T22:07:22.191149Z", "start_time": "2020-08-19T22:07:22.064267Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "2020-08-19T18:07:22-04:00\n", "\n", "CPython 3.8.5\n", "IPython 7.16.1\n", "\n", "compiler : Clang 10.0.1 \n", "system : Darwin\n", "release : 19.6.0\n", "machine : x86_64\n", "processor : i386\n", "CPU cores : 8\n", "interpreter: 64bit\n" ] } ], "source": [ "%load_ext watermark\n", "%watermark" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "ExecuteTime": { "end_time": "2020-08-19T22:07:22.478158Z", "start_time": "2020-08-19T22:07:22.193759Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "watermark 2.0.2\n", "numpy 1.19.1\n", "pandas 1.1.0\n", "nhgisxwalk 0.0.9\n", "\n" ] } ], "source": [ "import nhgisxwalk\n", "import inspect\n", "import numpy\n", "import pandas\n", "\n", "%load_ext autoreload\n", "%autoreload 2\n", "%watermark -w\n", "%watermark -iv" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Source and target years for the crosswalk" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "ExecuteTime": { "end_time": "2020-08-19T22:07:22.498507Z", "start_time": "2020-08-19T22:07:22.480973Z" } }, "outputs": [], "source": [ "source_year, target_year = \"1990\", \"2010\"\n", "gj_src, gj_trg = \"GJOIN%s\"%source_year, \"GJOIN%s\"%target_year" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Source-target building base" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "ExecuteTime": { "end_time": "2020-08-19T22:07:22.571834Z", "start_time": "2020-08-19T22:07:22.500302Z" } }, "outputs": [ { "data": { "text/html": [ "
| \n", " | GJOIN1990 | \n", "GJOIN2010 | \n", "WEIGHT | \n", "PAREA_VIA_BLK00 | \n", "
|---|---|---|---|---|
| 0 | \n", "G10000100401101 | \n", "G10000100401001000 | \n", "1.000000 | \n", "1.000000 | \n", "
| 1 | \n", "G10000100401102 | \n", "G10000100401001001 | \n", "0.921750 | \n", "0.976774 | \n", "
| 2 | \n", "G10000100401102 | \n", "G10000100401001002 | \n", "0.078219 | \n", "0.023215 | \n", "
| 3 | \n", "G10000100401102 | \n", "G10000100401001003 | \n", "0.000031 | \n", "0.000012 | \n", "
| 4 | \n", "G10000100401103 | \n", "G10000100401001003 | \n", "1.000000 | \n", "1.000000 | \n", "
| \n", " | bgp1990gj | \n", "tr2010gj | \n", "tr2010ge | \n", "wt_pop | \n", "wt_fam | \n", "wt_hh | \n", "wt_hu | \n", "
|---|---|---|---|---|---|---|---|
| 0 | \n", "G100001090444072500423009999999999921 | \n", "G1000010043202 | \n", "10001043202 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "
| 1 | \n", "G100001090444444300422009999999999926 | \n", "G1000010042202 | \n", "10001042202 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "
| 2 | \n", "G100001090444612650422009999999219011 | \n", "G1000010041200 | \n", "10001041200 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "
| 3 | \n", "G100001090444612650422009999999219011 | \n", "G1000010042201 | \n", "10001042201 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "
| 4 | \n", "G100001090444612650422009999999219012 | \n", "G1000010042201 | \n", "10001042201 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
| 1058 | \n", "G100005093552999990515009999999999923 | \n", "G1000050051500 | \n", "10005051500 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "
| 1059 | \n", "G100005093552999990515009999999999924 | \n", "G1000050051500 | \n", "10005051500 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "
| 1060 | \n", "G100005093552999990516009999999999921 | \n", "G1000050051702 | \n", "10005051702 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "
| 1061 | \n", "G340033010610106000204029999999916014 | \n", "G1000030990100 | \n", "10003990100 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "
| 1062 | \n", "NaN | \n", "G1000050990000 | \n", "10005990000 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "
1063 rows × 7 columns
\n", "| \n", " | bgp1990gj | \n", "tr2010gj | \n", "tr2010ge | \n", "wt_pop | \n", "wt_fam | \n", "wt_hh | \n", "wt_hu | \n", "
|---|---|---|---|---|---|---|---|
| 0 | \n", "G100001090444072500423009999999999921 | \n", "G1000010043202 | \n", "10001043202 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "
| 1 | \n", "G100001090444444300422009999999999926 | \n", "G1000010042202 | \n", "10001042202 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "
| 2 | \n", "G100001090444612650422009999999219011 | \n", "G1000010041200 | \n", "10001041200 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "
| 3 | \n", "G100001090444612650422009999999219011 | \n", "G1000010042201 | \n", "10001042201 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "
| 4 | \n", "G100001090444612650422009999999219012 | \n", "G1000010042201 | \n", "10001042201 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
| 1058 | \n", "G100005093552999990515009999999999923 | \n", "G1000050051500 | \n", "10005051500 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "
| 1059 | \n", "G100005093552999990515009999999999924 | \n", "G1000050051500 | \n", "10005051500 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "
| 1060 | \n", "G100005093552999990516009999999999921 | \n", "G1000050051702 | \n", "10005051702 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "
| 1061 | \n", "G340033010610106000204029999999916014 | \n", "G1000030990100 | \n", "10003990100 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "
| 1062 | \n", "NaN | \n", "G1000050990000 | \n", "10005990000 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "
1063 rows × 7 columns
\n", "