{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# This file is part of the Minnesota Population Center's NHGISXWALK.\n", "# For copyright and licensing information, see the NOTICE and LICENSE files\n", "# in this project's top-level directory, and also on-line at:\n", "# https://github.com/ipums/nhgisxwalk" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Sample workflow: 2000 block group parts to 2010 block groups\n", "\n", "## Starting from a subset of 2010 Delaware blocks\n", "\n", "For further background information see:\n", "\n", "* **Schroeder, J. P**. 2007. *Target-density weighting interpolation and uncertainty evaluation for temporal analysis of census data*. Geographical Analysis 39 (3):311–335.\n", "\n", "#### NHGIS [block crosswalks](https://www.nhgis.org/user-resources/geographic-crosswalks)" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "ExecuteTime": { "end_time": "2020-08-19T22:07:25.359511Z", "start_time": "2020-08-19T22:07:25.243071Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "2020-08-19T18:07:25-04:00\n", "\n", "CPython 3.8.5\n", "IPython 7.16.1\n", "\n", "compiler : Clang 10.0.1 \n", "system : Darwin\n", "release : 19.6.0\n", "machine : x86_64\n", "processor : i386\n", "CPU cores : 8\n", "interpreter: 64bit\n" ] } ], "source": [ "%load_ext watermark\n", "%watermark" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "ExecuteTime": { "end_time": "2020-08-19T22:07:25.648513Z", "start_time": "2020-08-19T22:07:25.361589Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "watermark 2.0.2\n", "pandas 1.1.0\n", "nhgisxwalk 0.0.9\n", "\n" ] } ], "source": [ "import nhgisxwalk\n", "import inspect\n", "import pandas\n", "\n", "%load_ext autoreload\n", "%autoreload 2\n", "%watermark -w\n", "%watermark -iv" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Source and target years for the crosswalk" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "ExecuteTime": { "end_time": "2020-08-19T22:07:25.671220Z", "start_time": "2020-08-19T22:07:25.651159Z" } }, "outputs": [], "source": [ "source_year, target_year = \"2000\", \"2010\"\n", "gj_src, gj_trg = \"GJOIN%s\"%source_year, \"GJOIN%s\"%target_year" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Source-target building base" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "ExecuteTime": { "end_time": "2020-08-19T22:07:25.740179Z", "start_time": "2020-08-19T22:07:25.673969Z" } }, "outputs": [ { "data": { "text/html": [ "
| \n", " | GJOIN2000 | \n", "GJOIN2010 | \n", "WEIGHT | \n", "PAREA | \n", "
|---|---|---|---|---|
| 0 | \n", "G10000100401001000 | \n", "G10000100401001000 | \n", "1.000000 | \n", "1.000000 | \n", "
| 1 | \n", "G10000100401001001 | \n", "G10000100401001001 | \n", "0.999981 | \n", "0.999988 | \n", "
| 2 | \n", "G10000100401001001 | \n", "G10000100401001003 | \n", "0.000019 | \n", "0.000012 | \n", "
| 3 | \n", "G10000100401001002 | \n", "G10000100401001002 | \n", "1.000000 | \n", "1.000000 | \n", "
| 4 | \n", "G10000100401001003 | \n", "G10000100401001003 | \n", "1.000000 | \n", "1.000000 | \n", "
| \n", " | bgp2000gj | \n", "bg2010gj | \n", "bg2010ge | \n", "wt_pop | \n", "wt_fam | \n", "wt_hh | \n", "wt_hu | \n", "
|---|---|---|---|---|---|---|---|
| 0 | \n", "G10000109044444430042202U1 | \n", "G10000100422021 | \n", "100010422021 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "
| 1 | \n", "G10000109044461265042201R1 | \n", "G10000100422011 | \n", "100010422011 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "
| 2 | \n", "G10000109044461265042201U1 | \n", "G10000100422011 | \n", "100010422011 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "
| 3 | \n", "G10000109044461265042201U2 | \n", "G10000100422012 | \n", "100010422012 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "
| 4 | \n", "G10000109044461480042202R2 | \n", "G10000100422022 | \n", "100010422022 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
| 1220 | \n", "G10000509355299999051500R4 | \n", "G10000500515004 | \n", "100050515004 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "
| 1221 | \n", "G10000509355299999051500U1 | \n", "G10000500515001 | \n", "100050515001 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "
| 1222 | \n", "G10000509355299999051500U3 | \n", "G10000500515003 | \n", "100050515003 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "
| 1223 | \n", "G10000509355299999051500U4 | \n", "G10000500515004 | \n", "100050515004 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "
| 1224 | \n", "G34003301061010600020400U2 | \n", "G10000309901000 | \n", "100039901000 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "
1225 rows × 7 columns
\n", "| \n", " | bgp2000gj | \n", "bg2010gj | \n", "bg2010ge | \n", "wt_pop | \n", "wt_fam | \n", "wt_hh | \n", "wt_hu | \n", "
|---|---|---|---|---|---|---|---|
| 0 | \n", "G10000109044444430042202U1 | \n", "G10000100422021 | \n", "100010422021 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "
| 1 | \n", "G10000109044461265042201R1 | \n", "G10000100422011 | \n", "100010422011 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "
| 2 | \n", "G10000109044461265042201U1 | \n", "G10000100422011 | \n", "100010422011 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "
| 3 | \n", "G10000109044461265042201U2 | \n", "G10000100422012 | \n", "100010422012 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "
| 4 | \n", "G10000109044461480042202R2 | \n", "G10000100422022 | \n", "100010422022 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
| 1220 | \n", "G10000509355299999051500R4 | \n", "G10000500515004 | \n", "100050515004 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "
| 1221 | \n", "G10000509355299999051500U1 | \n", "G10000500515001 | \n", "100050515001 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "
| 1222 | \n", "G10000509355299999051500U3 | \n", "G10000500515003 | \n", "100050515003 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "
| 1223 | \n", "G10000509355299999051500U4 | \n", "G10000500515004 | \n", "100050515004 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "
| 1224 | \n", "G34003301061010600020400U2 | \n", "G10000309901000 | \n", "100039901000 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "
1225 rows × 7 columns
\n", "