{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# This file is part of the Minnesota Population Center's NHGISXWALK.\n", "# For copyright and licensing information, see the NOTICE and LICENSE files\n", "# in this project's top-level directory, and also on-line at:\n", "# https://github.com/ipums/nhgisxwalk" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Split out base crosswalks into states w/`README.txt` files\n", "## `blk{1990/2000}{ge/gj}-blk2010{ge/gj}`\n", "\n", "### NHGIS [block crosswalks](https://www.nhgis.org/user-resources/geographic-crosswalks)\n", "\n", "**James D. Gaboardi, 06/2020**" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "ExecuteTime": { "end_time": "2020-10-01T21:59:32.221802Z", "start_time": "2020-10-01T21:59:32.045902Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "2020-10-01T17:59:32-04:00\n", "\n", "CPython 3.8.5\n", "IPython 7.18.1\n", "\n", "compiler : Clang 10.0.1 \n", "system : Darwin\n", "release : 19.6.0\n", "machine : x86_64\n", "processor : i386\n", "CPU cores : 8\n", "interpreter: 64bit\n" ] } ], "source": [ "%load_ext watermark\n", "%watermark" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "ExecuteTime": { "end_time": "2020-10-01T21:59:32.520691Z", "start_time": "2020-10-01T21:59:32.224310Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "watermark 2.0.2\n", "nhgisxwalk 0.0.9post1\n", "\n" ] } ], "source": [ "import glob\n", "import nhgisxwalk\n", "import shutil\n", "\n", "%load_ext autoreload\n", "%autoreload 2\n", "%watermark -w\n", "%watermark -iv" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Fetch path for each original crosswalk" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "ExecuteTime": { "end_time": "2020-10-01T21:59:32.545544Z", "start_time": "2020-10-01T21:59:32.527569Z" } }, "outputs": [], "source": [ "# Set this to a local directory\n", "cross_dir_in = \"path/to/data/\"\n", "cross_zip_in = [f for f in glob.glob(cross_dir_in+\"nhgis_blk*\") if f.endswith(\"zip\")]" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "ExecuteTime": { "end_time": "2020-10-01T21:59:32.564446Z", "start_time": "2020-10-01T21:59:32.546881Z" } }, "outputs": [], "source": [ "# Set this to a local directory\n", "cross_dir_out = \"path/to/data/\"" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Declare target column names for each original crosswalk" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "ExecuteTime": { "end_time": "2020-10-01T21:59:32.588836Z", "start_time": "2020-10-01T21:59:32.565968Z" } }, "outputs": [ { "data": { "text/plain": [ "['GJOIN2010', 'GEOID10', 'GJOIN2010', 'GEOID10']" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "target_columns = [\"GJOIN2010\", \"GEOID10\", \"GJOIN2010\", \"GEOID10\"]\n", "target_columns" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Regenerate sorted crosswalks, split crosswalk by state, and write out" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "ExecuteTime": { "end_time": "2020-10-01T22:40:24.119449Z", "start_time": "2020-10-01T21:59:32.590397Z" } }, "outputs": [], "source": [ "dtype = nhgisxwalk.str_types(nhgisxwalk.ID_COLS)\n", "for f, tcol in list(zip(cross_zip_in, target_columns)):\n", " nhgisxwalk.regenerate_blk_blk_xwalk(f, cross_dir_out, tcol, dtype)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "-------------" ] } ], "metadata": { "kernelspec": { "display_name": "Python [conda env:nhgis] *", "language": "python", "name": "conda-env-nhgis-py" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5" } }, "nbformat": 4, "nbformat_minor": 4 }