{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "ExecuteTime": { "end_time": "2021-07-07T23:49:42.739638Z", "start_time": "2021-07-07T23:49:42.736613Z" } }, "outputs": [], "source": [ "# This file is part of the Minnesota Population Center's NHGISXWALK.\n", "# For copyright and licensing information, see the NOTICE and LICENSE files\n", "# in this project's top-level directory, and also on-line at:\n", "# https://github.com/ipums/nhgisxwalk" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Test Subsets for testing: 1990\n", "## blocks, block groups parts, and blocks\n", "\n", "\n", "1. From a national crosswalk: \n", " 1. Create target state-level subsets for NHGIS base crosswalks\n", " 1. Create target state-level subsets for NHGIS base tabular data\n", " 1. Record unit tests values for posterity\n", "\n", "\n", "\n", "**This is currently only intended for use with block-level data as base units.**\n", "\n", "\n", "**James Gaboardi** **(<jgaboardi@gmail.com>), 2020-05**" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "ExecuteTime": { "end_time": "2021-07-07T23:49:42.775311Z", "start_time": "2021-07-07T23:49:42.742265Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Last updated: 2021-07-07T19:49:42.757472-04:00\n", "\n", "Python implementation: CPython\n", "Python version : 3.9.6\n", "IPython version : 7.25.0\n", "\n", "Compiler : Clang 11.1.0 \n", "OS : Darwin\n", "Release : 20.5.0\n", "Machine : x86_64\n", "Processor : i386\n", "CPU cores : 8\n", "Architecture: 64bit\n", "\n" ] } ], "source": [ "%load_ext watermark\n", "%watermark" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "ExecuteTime": { "end_time": "2021-07-07T23:49:43.102934Z", "start_time": "2021-07-07T23:49:42.779295Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Watermark: 2.2.0\n", "\n", "json : 2.0.9\n", "numpy : 1.21.0\n", "pandas : 1.3.0\n", "nhgisxwalk: 0.1.1\n", "\n" ] } ], "source": [ "import inspect\n", "import nhgisxwalk\n", "import numpy\n", "import pandas\n", "\n", "%load_ext autoreload\n", "%autoreload 2\n", "%watermark -w\n", "%watermark -iv" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Set the state (for subsetting), source & target, and year & geography" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "ExecuteTime": { "end_time": "2021-07-07T23:49:43.128747Z", "start_time": "2021-07-07T23:49:43.105050Z" } }, "outputs": [], "source": [ "source_year, target_year = \"1990\", \"2010\"\n", "gj_src, gj_trg = \"GJOIN%s\"%source_year, \"GJOIN%s\"%target_year" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "ExecuteTime": { "end_time": "2021-07-07T23:49:43.153433Z", "start_time": "2021-07-07T23:49:43.131013Z" } }, "outputs": [], "source": [ "data_path = \"../testing_data_subsets/\"\n", "block_file = \"%s_block\" % source_year\n", "supp_file = \"%s_blck_grp_598\" % source_year" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Set the base-level crosswalk file name" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "ExecuteTime": { "end_time": "2021-07-07T23:49:43.179773Z", "start_time": "2021-07-07T23:49:43.155331Z" } }, "outputs": [ { "data": { "text/plain": [ "'nhgis_blk1990_blk2010_gj'" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "base_xwalk_name = \"nhgis_blk%s_blk%s_gj\" % (source_year, target_year)\n", "base_xwalk_name" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Set the base (source) summary file name" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "ExecuteTime": { "end_time": "2021-07-07T23:49:43.202464Z", "start_time": "2021-07-07T23:49:43.181315Z" } }, "outputs": [ { "data": { "text/plain": [ "'../testing_data_subsets/1990_block.csv.zip'" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "base_source_name = \"%s.csv.zip\" % block_file\n", "base_source_file = \"%s%s\" % (data_path, base_source_name)\n", "base_source_file" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Set the supplementary summary file name" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "ExecuteTime": { "end_time": "2021-07-07T23:49:43.233569Z", "start_time": "2021-07-07T23:49:43.207738Z" } }, "outputs": [ { "data": { "text/plain": [ "'../testing_data_subsets/1990_blck_grp_598.csv.zip'" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "supp_source_name = \"%s.csv.zip\" % supp_file\n", "supp_source_file = \"%s%s\" % (data_path, supp_source_name)\n", "supp_source_file" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Read in the national the base-level crosswalk" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "ExecuteTime": { "end_time": "2021-07-07T23:49:43.320083Z", "start_time": "2021-07-07T23:49:43.235448Z" } }, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>GJOIN1990</th>\n", " <th>GJOIN2010</th>\n", " <th>WEIGHT</th>\n", " <th>PAREA_VIA_BLK00</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>G10000100401101</td>\n", " <td>G10000100401001000</td>\n", " <td>1.000000</td>\n", " <td>1.000000</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>G10000100401102</td>\n", " <td>G10000100401001001</td>\n", " <td>0.921750</td>\n", " <td>0.976774</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>G10000100401102</td>\n", " <td>G10000100401001002</td>\n", " <td>0.078219</td>\n", " <td>0.023215</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>G10000100401102</td>\n", " <td>G10000100401001003</td>\n", " <td>0.000031</td>\n", " <td>0.000012</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>G10000100401103</td>\n", " <td>G10000100401001003</td>\n", " <td>1.000000</td>\n", " <td>1.000000</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " GJOIN1990 GJOIN2010 WEIGHT PAREA_VIA_BLK00\n", "0 G10000100401101 G10000100401001000 1.000000 1.000000\n", "1 G10000100401102 G10000100401001001 0.921750 0.976774\n", "2 G10000100401102 G10000100401001002 0.078219 0.023215\n", "3 G10000100401102 G10000100401001003 0.000031 0.000012\n", "4 G10000100401103 G10000100401001003 1.000000 1.000000" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data_types = nhgisxwalk.str_types([gj_src, gj_trg])\n", "from_csv_kws = {\"path\": data_path, \"archived\": True, \"remove_unpacked\": True}\n", "read_csv_kws = {\"dtype\": data_types}\n", "base_xwalk = nhgisxwalk.xwalk_df_from_csv(\n", " base_xwalk_name, **from_csv_kws, **read_csv_kws\n", ")\n", "base_xwalk.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Declare input variable\n", "**not needed for creating a subset perse, but should do regardless**" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "ExecuteTime": { "end_time": "2021-07-07T23:49:43.341375Z", "start_time": "2021-07-07T23:49:43.321761Z" } }, "outputs": [], "source": [ "input_vars = [\n", " nhgisxwalk.desc_code_1990[\"Persons\"][\"Total\"],\n", " nhgisxwalk.desc_code_1990[\"Families\"][\"Total\"],\n", " nhgisxwalk.desc_code_1990[\"Households\"][\"Total\"],\n", " nhgisxwalk.desc_code_1990[\"Housing Units\"][\"Total\"]\n", "]\n", "input_var_tags = [\"pop\", \"fam\", \"hh\", \"hu\"]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Generate the desired crosswalk and subset down to the target state" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "ExecuteTime": { "end_time": "2021-07-07T23:49:43.813431Z", "start_time": "2021-07-07T23:49:43.343282Z" } }, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>bgp1990gj</th>\n", " <th>tr2010gj</th>\n", " <th>tr2010ge</th>\n", " <th>wt_pop</th>\n", " <th>wt_fam</th>\n", " <th>wt_hh</th>\n", " <th>wt_hu</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>G100001090444072500423009999999999921</td>\n", " <td>G1000010043202</td>\n", " <td>10001043202</td>\n", " <td>1.0</td>\n", " <td>1.0</td>\n", " <td>1.0</td>\n", " <td>1.0</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>G100001090444444300422009999999999926</td>\n", " <td>G1000010042202</td>\n", " <td>10001042202</td>\n", " <td>1.0</td>\n", " <td>1.0</td>\n", " <td>1.0</td>\n", " <td>1.0</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>G100001090444612650422009999999219011</td>\n", " <td>G1000010041200</td>\n", " <td>10001041200</td>\n", " <td>0.0</td>\n", " <td>0.0</td>\n", " <td>0.0</td>\n", " <td>0.0</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>G100001090444612650422009999999219011</td>\n", " <td>G1000010042201</td>\n", " <td>10001042201</td>\n", " <td>1.0</td>\n", " <td>1.0</td>\n", " <td>1.0</td>\n", " <td>1.0</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>G100001090444612650422009999999219012</td>\n", " <td>G1000010042201</td>\n", " <td>10001042201</td>\n", " <td>1.0</td>\n", " <td>1.0</td>\n", " <td>1.0</td>\n", " <td>1.0</td>\n", " </tr>\n", " <tr>\n", " <th>...</th>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " </tr>\n", " <tr>\n", " <th>1058</th>\n", " <td>G100005093552999990515009999999999923</td>\n", " <td>G1000050051500</td>\n", " <td>10005051500</td>\n", " <td>1.0</td>\n", " <td>1.0</td>\n", " <td>1.0</td>\n", " <td>1.0</td>\n", " </tr>\n", " <tr>\n", " <th>1059</th>\n", " <td>G100005093552999990515009999999999924</td>\n", " <td>G1000050051500</td>\n", " <td>10005051500</td>\n", " <td>1.0</td>\n", " <td>1.0</td>\n", " <td>1.0</td>\n", " <td>1.0</td>\n", " </tr>\n", " <tr>\n", " <th>1060</th>\n", " <td>G100005093552999990516009999999999921</td>\n", " <td>G1000050051702</td>\n", " <td>10005051702</td>\n", " <td>1.0</td>\n", " <td>1.0</td>\n", " <td>1.0</td>\n", " <td>1.0</td>\n", " </tr>\n", " <tr>\n", " <th>1061</th>\n", " <td>G340033010610106000204029999999916014</td>\n", " <td>G1000030990100</td>\n", " <td>10003990100</td>\n", " <td>0.0</td>\n", " <td>0.0</td>\n", " <td>0.0</td>\n", " <td>0.0</td>\n", " </tr>\n", " <tr>\n", " <th>1062</th>\n", " <td>NaN</td>\n", " <td>G1000050990000</td>\n", " <td>10005990000</td>\n", " <td>0.0</td>\n", " <td>0.0</td>\n", " <td>0.0</td>\n", " <td>0.0</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "<p>1063 rows × 7 columns</p>\n", "</div>" ], "text/plain": [ " bgp1990gj tr2010gj tr2010ge \\\n", "0 G100001090444072500423009999999999921 G1000010043202 10001043202 \n", "1 G100001090444444300422009999999999926 G1000010042202 10001042202 \n", "2 G100001090444612650422009999999219011 G1000010041200 10001041200 \n", "3 G100001090444612650422009999999219011 G1000010042201 10001042201 \n", "4 G100001090444612650422009999999219012 G1000010042201 10001042201 \n", "... ... ... ... \n", "1058 G100005093552999990515009999999999923 G1000050051500 10005051500 \n", "1059 G100005093552999990515009999999999924 G1000050051500 10005051500 \n", "1060 G100005093552999990516009999999999921 G1000050051702 10005051702 \n", "1061 G340033010610106000204029999999916014 G1000030990100 10003990100 \n", "1062 NaN G1000050990000 10005990000 \n", "\n", " wt_pop wt_fam wt_hh wt_hu \n", "0 1.0 1.0 1.0 1.0 \n", "1 1.0 1.0 1.0 1.0 \n", "2 0.0 0.0 0.0 0.0 \n", "3 1.0 1.0 1.0 1.0 \n", "4 1.0 1.0 1.0 1.0 \n", "... ... ... ... ... \n", "1058 1.0 1.0 1.0 1.0 \n", "1059 1.0 1.0 1.0 1.0 \n", "1060 1.0 1.0 1.0 1.0 \n", "1061 0.0 0.0 0.0 0.0 \n", "1062 0.0 0.0 0.0 0.0 \n", "\n", "[1063 rows x 7 columns]" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "state_bgp1990tr2010 = nhgisxwalk.GeoCrossWalk(\n", " base_xwalk,\n", " source_year=source_year,\n", " target_year=target_year,\n", " source_geo=\"bgp\",\n", " target_geo=\"tr\",\n", " base_source_table=base_source_file,\n", " supp_source_table=supp_source_file,\n", " input_var=input_vars,\n", " weight_var=input_var_tags,\n", " keep_base=True,\n", " add_geoid=True\n", ")\n", "state_bgp1990tr2010.xwalk" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### unittests" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "ExecuteTime": { "end_time": "2021-07-07T23:49:43.848026Z", "start_time": "2021-07-07T23:49:43.814899Z" } }, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>bgp1990gj</th>\n", " <th>tr2010gj</th>\n", " <th>tr2010ge</th>\n", " <th>wt_pop</th>\n", " <th>wt_fam</th>\n", " <th>wt_hh</th>\n", " <th>wt_hu</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>G100001090444072500423009999999999921</td>\n", " <td>G1000010043202</td>\n", " <td>10001043202</td>\n", " <td>1.0</td>\n", " <td>1.0</td>\n", " <td>1.0</td>\n", " <td>1.0</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>G100001090444444300422009999999999926</td>\n", " <td>G1000010042202</td>\n", " <td>10001042202</td>\n", " <td>1.0</td>\n", " <td>1.0</td>\n", " <td>1.0</td>\n", " <td>1.0</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>G100001090444612650422009999999219011</td>\n", " <td>G1000010041200</td>\n", " <td>10001041200</td>\n", " <td>0.0</td>\n", " <td>0.0</td>\n", " <td>0.0</td>\n", " <td>0.0</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>G100001090444612650422009999999219011</td>\n", " <td>G1000010042201</td>\n", " <td>10001042201</td>\n", " <td>1.0</td>\n", " <td>1.0</td>\n", " <td>1.0</td>\n", " <td>1.0</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>G100001090444612650422009999999219012</td>\n", " <td>G1000010042201</td>\n", " <td>10001042201</td>\n", " <td>1.0</td>\n", " <td>1.0</td>\n", " <td>1.0</td>\n", " <td>1.0</td>\n", " </tr>\n", " <tr>\n", " <th>...</th>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " </tr>\n", " <tr>\n", " <th>1058</th>\n", " <td>G100005093552999990515009999999999923</td>\n", " <td>G1000050051500</td>\n", " <td>10005051500</td>\n", " <td>1.0</td>\n", " <td>1.0</td>\n", " <td>1.0</td>\n", " <td>1.0</td>\n", " </tr>\n", " <tr>\n", " <th>1059</th>\n", " <td>G100005093552999990515009999999999924</td>\n", " <td>G1000050051500</td>\n", " <td>10005051500</td>\n", " <td>1.0</td>\n", " <td>1.0</td>\n", " <td>1.0</td>\n", " <td>1.0</td>\n", " </tr>\n", " <tr>\n", " <th>1060</th>\n", " <td>G100005093552999990516009999999999921</td>\n", " <td>G1000050051702</td>\n", " <td>10005051702</td>\n", " <td>1.0</td>\n", " <td>1.0</td>\n", " <td>1.0</td>\n", " <td>1.0</td>\n", " </tr>\n", " <tr>\n", " <th>1061</th>\n", " <td>G340033010610106000204029999999916014</td>\n", " <td>G1000030990100</td>\n", " <td>10003990100</td>\n", " <td>0.0</td>\n", " <td>0.0</td>\n", " <td>0.0</td>\n", " <td>0.0</td>\n", " </tr>\n", " <tr>\n", " <th>1062</th>\n", " <td>NaN</td>\n", " <td>G1000050990000</td>\n", " <td>10005990000</td>\n", " <td>0.0</td>\n", " <td>0.0</td>\n", " <td>0.0</td>\n", " <td>0.0</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "<p>1063 rows × 7 columns</p>\n", "</div>" ], "text/plain": [ " bgp1990gj tr2010gj tr2010ge \\\n", "0 G100001090444072500423009999999999921 G1000010043202 10001043202 \n", "1 G100001090444444300422009999999999926 G1000010042202 10001042202 \n", "2 G100001090444612650422009999999219011 G1000010041200 10001041200 \n", "3 G100001090444612650422009999999219011 G1000010042201 10001042201 \n", "4 G100001090444612650422009999999219012 G1000010042201 10001042201 \n", "... ... ... ... \n", "1058 G100005093552999990515009999999999923 G1000050051500 10005051500 \n", "1059 G100005093552999990515009999999999924 G1000050051500 10005051500 \n", "1060 G100005093552999990516009999999999921 G1000050051702 10005051702 \n", "1061 G340033010610106000204029999999916014 G1000030990100 10003990100 \n", "1062 NaN G1000050990000 10005990000 \n", "\n", " wt_pop wt_fam wt_hh wt_hu \n", "0 1.0 1.0 1.0 1.0 \n", "1 1.0 1.0 1.0 1.0 \n", "2 0.0 0.0 0.0 0.0 \n", "3 1.0 1.0 1.0 1.0 \n", "4 1.0 1.0 1.0 1.0 \n", "... ... ... ... ... \n", "1058 1.0 1.0 1.0 1.0 \n", "1059 1.0 1.0 1.0 1.0 \n", "1060 1.0 1.0 1.0 1.0 \n", "1061 0.0 0.0 0.0 0.0 \n", "1062 0.0 0.0 0.0 0.0 \n", "\n", "[1063 rows x 7 columns]" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "state_bgp1990tr2010.xwalk.drop_duplicates(subset=[\"bgp1990gj\", \"tr2010gj\"])" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "ExecuteTime": { "end_time": "2021-07-07T23:49:43.880522Z", "start_time": "2021-07-07T23:49:43.849275Z" } }, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>bgp1990gj</th>\n", " <th>tr2010gj</th>\n", " <th>tr2010ge</th>\n", " <th>wt_pop</th>\n", " <th>wt_fam</th>\n", " <th>wt_hh</th>\n", " <th>wt_hu</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>13</th>\n", " <td>G100001090444999990421009999999219012</td>\n", " <td>G1000010042100</td>\n", " <td>10001042100</td>\n", " <td>1.000000</td>\n", " <td>1.000000</td>\n", " <td>1.000000</td>\n", " <td>1.000000</td>\n", " </tr>\n", " <tr>\n", " <th>14</th>\n", " <td>G100001090444999990421009999999999921</td>\n", " <td>G1000010042100</td>\n", " <td>10001042100</td>\n", " <td>0.997664</td>\n", " <td>0.997166</td>\n", " <td>0.997148</td>\n", " <td>0.997278</td>\n", " </tr>\n", " <tr>\n", " <th>15</th>\n", " <td>G100001090444999990421009999999999921</td>\n", " <td>G1000010042201</td>\n", " <td>10001042201</td>\n", " <td>0.002336</td>\n", " <td>0.002834</td>\n", " <td>0.002852</td>\n", " <td>0.002722</td>\n", " </tr>\n", " <tr>\n", " <th>16</th>\n", " <td>G100001090444999990421009999999999922</td>\n", " <td>G1000010042100</td>\n", " <td>10001042100</td>\n", " <td>1.000000</td>\n", " <td>1.000000</td>\n", " <td>1.000000</td>\n", " <td>1.000000</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " bgp1990gj tr2010gj tr2010ge \\\n", "13 G100001090444999990421009999999219012 G1000010042100 10001042100 \n", "14 G100001090444999990421009999999999921 G1000010042100 10001042100 \n", "15 G100001090444999990421009999999999921 G1000010042201 10001042201 \n", "16 G100001090444999990421009999999999922 G1000010042100 10001042100 \n", "\n", " wt_pop wt_fam wt_hh wt_hu \n", "13 1.000000 1.000000 1.000000 1.000000 \n", "14 0.997664 0.997166 0.997148 0.997278 \n", "15 0.002336 0.002834 0.002852 0.002722 \n", "16 1.000000 1.000000 1.000000 1.000000 " ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ix1, ix2 = 13, 17\n", "state_bgp1990tr2010.xwalk.loc[ix1:ix2-1]" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "ExecuteTime": { "end_time": "2021-07-07T23:49:43.905399Z", "start_time": "2021-07-07T23:49:43.882102Z" } }, "outputs": [ { "data": { "text/plain": [ "array([['G100001090444999990421009999999219012', 'G1000010042100',\n", " '10001042100'],\n", " ['G100001090444999990421009999999999921', 'G1000010042100',\n", " '10001042100'],\n", " ['G100001090444999990421009999999999921', 'G1000010042201',\n", " '10001042201'],\n", " ['G100001090444999990421009999999999922', 'G1000010042100',\n", " '10001042100']], dtype=object)" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "id_cols = [\"bgp1990gj\", \"tr2010gj\", \"tr2010ge\"]\n", "obs_str_vals = state_bgp1990tr2010.xwalk[id_cols][ix1:ix2].values\n", "obs_str_vals" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "ExecuteTime": { "end_time": "2021-07-07T23:49:43.932269Z", "start_time": "2021-07-07T23:49:43.907185Z" } }, "outputs": [ { "data": { "text/plain": [ "array([[1. , 1. , 1. , 1. ],\n", " [0.99766436, 0.99716625, 0.99714829, 0.99727768],\n", " [0.00233564, 0.00283375, 0.00285171, 0.00272232],\n", " [1. , 1. , 1. , 1. ]])" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "wgt_cols = [\"wt_pop\", \"wt_fam\", \"wt_hh\", \"wt_hu\"]\n", "obs_num_vals = state_bgp1990tr2010.xwalk[wgt_cols][ix1:ix2].values\n", "obs_num_vals" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "ExecuteTime": { "end_time": "2021-07-07T23:49:43.959830Z", "start_time": "2021-07-07T23:49:43.933753Z" } }, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>wt_pop</th>\n", " <th>wt_fam</th>\n", " <th>wt_hh</th>\n", " <th>wt_hu</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>13</th>\n", " <td>1.000000</td>\n", " <td>1.000000</td>\n", " <td>1.000000</td>\n", " <td>1.000000</td>\n", " </tr>\n", " <tr>\n", " <th>14</th>\n", " <td>0.997664</td>\n", " <td>0.997166</td>\n", " <td>0.997148</td>\n", " <td>0.997278</td>\n", " </tr>\n", " <tr>\n", " <th>15</th>\n", " <td>0.002336</td>\n", " <td>0.002834</td>\n", " <td>0.002852</td>\n", " <td>0.002722</td>\n", " </tr>\n", " <tr>\n", " <th>16</th>\n", " <td>1.000000</td>\n", " <td>1.000000</td>\n", " <td>1.000000</td>\n", " <td>1.000000</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " wt_pop wt_fam wt_hh wt_hu\n", "13 1.000000 1.000000 1.000000 1.000000\n", "14 0.997664 0.997166 0.997148 0.997278\n", "15 0.002336 0.002834 0.002852 0.002722\n", "16 1.000000 1.000000 1.000000 1.000000" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "state_bgp1990tr2010.xwalk[wgt_cols][ix1:ix2]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "-----------------" ] } ], "metadata": { "_draft": { "nbviewer_url": "https://gist.github.com/9f47e4ec2cc37bce83acf20abfca69d2" }, "gist": { "data": { "description": "sample-workflow.ipynb", "public": true }, "id": "9f47e4ec2cc37bce83acf20abfca69d2" }, "kernelspec": { "display_name": "Python [conda env:nhgis]", "language": "python", "name": "conda-env-nhgis-py" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.6" } }, "nbformat": 4, "nbformat_minor": 4 }