{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Test Subsets for testing: 1990\n", "## blocks, block groups parts, and blocks\n", "\n", "\n", "1. From a national crosswalk: \n", " 1. Create target state-level subsets for NHGIS base crosswalks\n", " 1. Create target state-level subsets for NHGIS base tabular data\n", " 1. Record unit tests values for posterity\n", "\n", "\n", "\n", "**This is currently only intended for use with block-level data as base units.**\n", "\n", "\n", "**James Gaboardi** **(<jgaboardi@gmail.com>), 2020-05**" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "ExecuteTime": { "end_time": "2020-06-13T16:56:41.787647Z", "start_time": "2020-06-13T16:56:41.747170Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "2020-06-13T12:56:41-04:00\n", "\n", "CPython 3.7.6\n", "IPython 7.15.0\n", "\n", "compiler : Clang 9.0.1 \n", "system : Darwin\n", "release : 19.5.0\n", "machine : x86_64\n", "processor : i386\n", "CPU cores : 8\n", "interpreter: 64bit\n" ] } ], "source": [ "%load_ext watermark\n", "%watermark" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "ExecuteTime": { "end_time": "2020-06-13T16:56:43.060033Z", "start_time": "2020-06-13T16:56:42.252794Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "watermark 2.0.2\n", "pandas 1.0.4\n", "numpy 1.18.5\n", "nhgisxwalk 0.0.2\n", "\n" ] } ], "source": [ "import inspect\n", "import nhgisxwalk\n", "import numpy\n", "import pandas\n", "\n", "%load_ext autoreload\n", "%autoreload 2\n", "%watermark -w\n", "%watermark -iv" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Set the state (for subsetting), source & target, and year & geography" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "ExecuteTime": { "end_time": "2020-06-13T16:57:38.639677Z", "start_time": "2020-06-13T16:57:38.611046Z" } }, "outputs": [], "source": [ "source_year, target_year = \"1990\", \"2010\"\n", "gj_src, gj_trg = \"GJOIN%s\"%source_year, \"GJOIN%s\"%target_year" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Set the base-level crosswalk file name" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "ExecuteTime": { "end_time": "2020-06-13T16:57:39.323393Z", "start_time": "2020-06-13T16:57:39.288997Z" } }, "outputs": [ { "data": { "text/plain": [ "'../testing_data_subsets/nhgis_blk1990_blk2010_gj.csv.zip'" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "base_xwalk_name = \"nhgis_blk%s_blk%s_gj.csv.zip\" % (source_year, target_year)\n", "base_xwalk_file = \"../testing_data_subsets/%s\" % base_xwalk_name\n", "base_xwalk_file" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Set the base (source) summary file name" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "ExecuteTime": { "end_time": "2020-06-13T16:57:40.099907Z", "start_time": "2020-06-13T16:57:40.069879Z" } }, "outputs": [ { "data": { "text/plain": [ "'../testing_data_subsets/1990_block.csv.zip'" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "base_source_name = \"%s_block.csv.zip\" % source_year\n", "base_source_file = \"../testing_data_subsets/%s\" % base_source_name\n", "base_source_file" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Set the supplementary summary file name" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "ExecuteTime": { "end_time": "2020-06-13T16:57:41.057633Z", "start_time": "2020-06-13T16:57:41.028004Z" } }, "outputs": [ { "data": { "text/plain": [ "'../testing_data_subsets/1990_blck_grp_598_103.csv.zip'" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "supp_source_name = \"%s_blck_grp_598_103.csv.zip\" % source_year\n", "supp_source_file = \"../testing_data_subsets/%s\" % supp_source_name\n", "supp_source_file" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Read in the national the base-level crosswalk" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "ExecuteTime": { "end_time": "2020-06-13T16:57:54.723760Z", "start_time": "2020-06-13T16:57:54.632265Z" } }, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>GJOIN1990</th>\n", " <th>GJOIN2010</th>\n", " <th>WEIGHT</th>\n", " <th>PAREA_VIA_BLK00</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>NaN</td>\n", " <td>G10000100432021078</td>\n", " <td>0.0</td>\n", " <td>0.0</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>NaN</td>\n", " <td>G10000100432023014</td>\n", " <td>0.0</td>\n", " <td>0.0</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>NaN</td>\n", " <td>G10000100432023015</td>\n", " <td>0.0</td>\n", " <td>0.0</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>NaN</td>\n", " <td>G10000109900000011</td>\n", " <td>0.0</td>\n", " <td>0.0</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>NaN</td>\n", " <td>G10000109900000012</td>\n", " <td>0.0</td>\n", " <td>0.0</td>\n", " </tr>\n", " <tr>\n", " <th>...</th>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " </tr>\n", " <tr>\n", " <th>38292</th>\n", " <td>G10000500519289</td>\n", " <td>G10000500519002125</td>\n", " <td>1.0</td>\n", " <td>1.0</td>\n", " </tr>\n", " <tr>\n", " <th>38293</th>\n", " <td>G34003300204401A</td>\n", " <td>G10000309901000007</td>\n", " <td>0.0</td>\n", " <td>0.0</td>\n", " </tr>\n", " <tr>\n", " <th>38294</th>\n", " <td>G34003300204418</td>\n", " <td>G10000309901000007</td>\n", " <td>0.0</td>\n", " <td>0.0</td>\n", " </tr>\n", " <tr>\n", " <th>38295</th>\n", " <td>G34003300204419</td>\n", " <td>G10000309901000007</td>\n", " <td>0.0</td>\n", " <td>0.0</td>\n", " </tr>\n", " <tr>\n", " <th>38296</th>\n", " <td>G34003300204420</td>\n", " <td>G10000309901000007</td>\n", " <td>0.0</td>\n", " <td>0.0</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "<p>38297 rows × 4 columns</p>\n", "</div>" ], "text/plain": [ " GJOIN1990 GJOIN2010 WEIGHT PAREA_VIA_BLK00\n", "0 NaN G10000100432021078 0.0 0.0\n", "1 NaN G10000100432023014 0.0 0.0\n", "2 NaN G10000100432023015 0.0 0.0\n", "3 NaN G10000109900000011 0.0 0.0\n", "4 NaN G10000109900000012 0.0 0.0\n", "... ... ... ... ...\n", "38292 G10000500519289 G10000500519002125 1.0 1.0\n", "38293 G34003300204401A G10000309901000007 0.0 0.0\n", "38294 G34003300204418 G10000309901000007 0.0 0.0\n", "38295 G34003300204419 G10000309901000007 0.0 0.0\n", "38296 G34003300204420 G10000309901000007 0.0 0.0\n", "\n", "[38297 rows x 4 columns]" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data_types = nhgisxwalk.str_types([gj_src, gj_trg])\n", "base_xwalk = pandas.read_csv(base_xwalk_file, index_col=0, dtype=data_types)\n", "base_xwalk" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Declare input variable\n", "**not needed for creating a subset perse, but should do regardless**" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "ExecuteTime": { "end_time": "2020-06-13T16:57:56.549339Z", "start_time": "2020-06-13T16:57:56.519717Z" } }, "outputs": [], "source": [ "input_vars = [\n", " nhgisxwalk.desc_code_1990[\"Persons\"][\"Total\"],\n", " nhgisxwalk.desc_code_1990[\"Families\"][\"Total\"],\n", " nhgisxwalk.desc_code_1990[\"Households\"][\"Total\"],\n", " nhgisxwalk.desc_code_1990[\"Housing Units\"][\"Total\"]\n", "]\n", "input_var_tags = [\"pop\", \"fam\", \"hh\", \"hu\"]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Generate the desired crosswalk and subset down to the target state" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "ExecuteTime": { "end_time": "2020-06-13T16:57:58.621158Z", "start_time": "2020-06-13T16:57:58.174445Z" } }, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>bgp1990gj</th>\n", " <th>trt2010gj</th>\n", " <th>trt2010ge</th>\n", " <th>wt_pop</th>\n", " <th>wt_fam</th>\n", " <th>wt_hh</th>\n", " <th>wt_hu</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>G100001090444072500423009999999999921</td>\n", " <td>G1000010043202</td>\n", " <td>10001043202</td>\n", " <td>1.0</td>\n", " <td>1.0</td>\n", " <td>1.0</td>\n", " <td>1.0</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>G100001090444444300422009999999999926</td>\n", " <td>G1000010042202</td>\n", " <td>10001042202</td>\n", " <td>1.0</td>\n", " <td>1.0</td>\n", " <td>1.0</td>\n", " <td>1.0</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>G100001090444612650422009999999219011</td>\n", " <td>G1000010041200</td>\n", " <td>10001041200</td>\n", " <td>0.0</td>\n", " <td>0.0</td>\n", " <td>0.0</td>\n", " <td>0.0</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>G100001090444612650422009999999219011</td>\n", " <td>G1000010042201</td>\n", " <td>10001042201</td>\n", " <td>1.0</td>\n", " <td>1.0</td>\n", " <td>1.0</td>\n", " <td>1.0</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>G100001090444612650422009999999219012</td>\n", " <td>G1000010042201</td>\n", " <td>10001042201</td>\n", " <td>1.0</td>\n", " <td>1.0</td>\n", " <td>1.0</td>\n", " <td>1.0</td>\n", " </tr>\n", " <tr>\n", " <th>...</th>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " </tr>\n", " <tr>\n", " <th>1058</th>\n", " <td>G100005093552999990515009999999999923</td>\n", " <td>G1000050051500</td>\n", " <td>10005051500</td>\n", " <td>1.0</td>\n", " <td>1.0</td>\n", " <td>1.0</td>\n", " <td>1.0</td>\n", " </tr>\n", " <tr>\n", " <th>1059</th>\n", " <td>G100005093552999990515009999999999924</td>\n", " <td>G1000050051500</td>\n", " <td>10005051500</td>\n", " <td>1.0</td>\n", " <td>1.0</td>\n", " <td>1.0</td>\n", " <td>1.0</td>\n", " </tr>\n", " <tr>\n", " <th>1060</th>\n", " <td>G100005093552999990516009999999999921</td>\n", " <td>G1000050051702</td>\n", " <td>10005051702</td>\n", " <td>1.0</td>\n", " <td>1.0</td>\n", " <td>1.0</td>\n", " <td>1.0</td>\n", " </tr>\n", " <tr>\n", " <th>1061</th>\n", " <td>G340033010610106000204029999999916014</td>\n", " <td>G1000030990100</td>\n", " <td>10003990100</td>\n", " <td>0.0</td>\n", " <td>0.0</td>\n", " <td>0.0</td>\n", " <td>0.0</td>\n", " </tr>\n", " <tr>\n", " <th>1062</th>\n", " <td>NaN</td>\n", " <td>G1000050990000</td>\n", " <td>10005990000</td>\n", " <td>0.0</td>\n", " <td>0.0</td>\n", " <td>0.0</td>\n", " <td>0.0</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "<p>1063 rows × 7 columns</p>\n", "</div>" ], "text/plain": [ " bgp1990gj trt2010gj trt2010ge \\\n", "0 G100001090444072500423009999999999921 G1000010043202 10001043202 \n", "1 G100001090444444300422009999999999926 G1000010042202 10001042202 \n", "2 G100001090444612650422009999999219011 G1000010041200 10001041200 \n", "3 G100001090444612650422009999999219011 G1000010042201 10001042201 \n", "4 G100001090444612650422009999999219012 G1000010042201 10001042201 \n", "... ... ... ... \n", "1058 G100005093552999990515009999999999923 G1000050051500 10005051500 \n", "1059 G100005093552999990515009999999999924 G1000050051500 10005051500 \n", "1060 G100005093552999990516009999999999921 G1000050051702 10005051702 \n", "1061 G340033010610106000204029999999916014 G1000030990100 10003990100 \n", "1062 NaN G1000050990000 10005990000 \n", "\n", " wt_pop wt_fam wt_hh wt_hu \n", "0 1.0 1.0 1.0 1.0 \n", "1 1.0 1.0 1.0 1.0 \n", "2 0.0 0.0 0.0 0.0 \n", "3 1.0 1.0 1.0 1.0 \n", "4 1.0 1.0 1.0 1.0 \n", "... ... ... ... ... \n", "1058 1.0 1.0 1.0 1.0 \n", "1059 1.0 1.0 1.0 1.0 \n", "1060 1.0 1.0 1.0 1.0 \n", "1061 0.0 0.0 0.0 0.0 \n", "1062 0.0 0.0 0.0 0.0 \n", "\n", "[1063 rows x 7 columns]" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "state_bgp1990trt2010 = nhgisxwalk.GeoCrossWalk(\n", " base_xwalk,\n", " source_year=source_year,\n", " target_year=target_year,\n", " source_geo=\"bgp\",\n", " target_geo=\"trt\",\n", " base_source_table=base_source_file,\n", " supp_source_table=supp_source_file,\n", " input_var=input_vars,\n", " weight_var=input_var_tags,\n", " keep_base=True,\n", " add_geoid=True\n", ")\n", "#del base_xwalk\n", "state_bgp1990trt2010.xwalk" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### unittests" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "ExecuteTime": { "end_time": "2020-06-13T16:59:07.728274Z", "start_time": "2020-06-13T16:59:07.688834Z" } }, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>bgp1990gj</th>\n", " <th>trt2010gj</th>\n", " <th>trt2010ge</th>\n", " <th>wt_pop</th>\n", " <th>wt_fam</th>\n", " <th>wt_hh</th>\n", " <th>wt_hu</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>G100001090444072500423009999999999921</td>\n", " <td>G1000010043202</td>\n", " <td>10001043202</td>\n", " <td>1.0</td>\n", " <td>1.0</td>\n", " <td>1.0</td>\n", " <td>1.0</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>G100001090444444300422009999999999926</td>\n", " <td>G1000010042202</td>\n", " <td>10001042202</td>\n", " <td>1.0</td>\n", " <td>1.0</td>\n", " <td>1.0</td>\n", " <td>1.0</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>G100001090444612650422009999999219011</td>\n", " <td>G1000010041200</td>\n", " <td>10001041200</td>\n", " <td>0.0</td>\n", " <td>0.0</td>\n", " <td>0.0</td>\n", " <td>0.0</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>G100001090444612650422009999999219011</td>\n", " <td>G1000010042201</td>\n", " <td>10001042201</td>\n", " <td>1.0</td>\n", " <td>1.0</td>\n", " <td>1.0</td>\n", " <td>1.0</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>G100001090444612650422009999999219012</td>\n", " <td>G1000010042201</td>\n", " <td>10001042201</td>\n", " <td>1.0</td>\n", " <td>1.0</td>\n", " <td>1.0</td>\n", " <td>1.0</td>\n", " </tr>\n", " <tr>\n", " <th>...</th>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " </tr>\n", " <tr>\n", " <th>1058</th>\n", " <td>G100005093552999990515009999999999923</td>\n", " <td>G1000050051500</td>\n", " <td>10005051500</td>\n", " <td>1.0</td>\n", " <td>1.0</td>\n", " <td>1.0</td>\n", " <td>1.0</td>\n", " </tr>\n", " <tr>\n", " <th>1059</th>\n", " <td>G100005093552999990515009999999999924</td>\n", " <td>G1000050051500</td>\n", " <td>10005051500</td>\n", " <td>1.0</td>\n", " <td>1.0</td>\n", " <td>1.0</td>\n", " <td>1.0</td>\n", " </tr>\n", " <tr>\n", " <th>1060</th>\n", " <td>G100005093552999990516009999999999921</td>\n", " <td>G1000050051702</td>\n", " <td>10005051702</td>\n", " <td>1.0</td>\n", " <td>1.0</td>\n", " <td>1.0</td>\n", " <td>1.0</td>\n", " </tr>\n", " <tr>\n", " <th>1061</th>\n", " <td>G340033010610106000204029999999916014</td>\n", " <td>G1000030990100</td>\n", " <td>10003990100</td>\n", " <td>0.0</td>\n", " <td>0.0</td>\n", " <td>0.0</td>\n", " <td>0.0</td>\n", " </tr>\n", " <tr>\n", " <th>1062</th>\n", " <td>NaN</td>\n", " <td>G1000050990000</td>\n", " <td>10005990000</td>\n", " <td>0.0</td>\n", " <td>0.0</td>\n", " <td>0.0</td>\n", " <td>0.0</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "<p>1063 rows × 7 columns</p>\n", "</div>" ], "text/plain": [ " bgp1990gj trt2010gj trt2010ge \\\n", "0 G100001090444072500423009999999999921 G1000010043202 10001043202 \n", "1 G100001090444444300422009999999999926 G1000010042202 10001042202 \n", "2 G100001090444612650422009999999219011 G1000010041200 10001041200 \n", "3 G100001090444612650422009999999219011 G1000010042201 10001042201 \n", "4 G100001090444612650422009999999219012 G1000010042201 10001042201 \n", "... ... ... ... \n", "1058 G100005093552999990515009999999999923 G1000050051500 10005051500 \n", "1059 G100005093552999990515009999999999924 G1000050051500 10005051500 \n", "1060 G100005093552999990516009999999999921 G1000050051702 10005051702 \n", "1061 G340033010610106000204029999999916014 G1000030990100 10003990100 \n", "1062 NaN G1000050990000 10005990000 \n", "\n", " wt_pop wt_fam wt_hh wt_hu \n", "0 1.0 1.0 1.0 1.0 \n", "1 1.0 1.0 1.0 1.0 \n", "2 0.0 0.0 0.0 0.0 \n", "3 1.0 1.0 1.0 1.0 \n", "4 1.0 1.0 1.0 1.0 \n", "... ... ... ... ... \n", "1058 1.0 1.0 1.0 1.0 \n", "1059 1.0 1.0 1.0 1.0 \n", "1060 1.0 1.0 1.0 1.0 \n", "1061 0.0 0.0 0.0 0.0 \n", "1062 0.0 0.0 0.0 0.0 \n", "\n", "[1063 rows x 7 columns]" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "state_bgp1990trt2010.xwalk.drop_duplicates(subset=[\"bgp1990gj\", \"trt2010gj\"])" ] }, { "cell_type": "code", "execution_count": 30, "metadata": { "ExecuteTime": { "end_time": "2020-06-13T17:08:07.609368Z", "start_time": "2020-06-13T17:08:07.522244Z" } }, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>bgp1990gj</th>\n", " <th>trt2010gj</th>\n", " <th>trt2010ge</th>\n", " <th>wt_pop</th>\n", " <th>wt_fam</th>\n", " <th>wt_hh</th>\n", " <th>wt_hu</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>13</th>\n", " <td>G100001090444999990421009999999219012</td>\n", " <td>G1000010042100</td>\n", " <td>10001042100</td>\n", " <td>1.000000</td>\n", " <td>1.000000</td>\n", " <td>1.000000</td>\n", " <td>1.000000</td>\n", " </tr>\n", " <tr>\n", " <th>14</th>\n", " <td>G100001090444999990421009999999999921</td>\n", " <td>G1000010042100</td>\n", " <td>10001042100</td>\n", " <td>0.997664</td>\n", " <td>0.997166</td>\n", " <td>0.997148</td>\n", " <td>0.997278</td>\n", " </tr>\n", " <tr>\n", " <th>15</th>\n", " <td>G100001090444999990421009999999999921</td>\n", " <td>G1000010042201</td>\n", " <td>10001042201</td>\n", " <td>0.002336</td>\n", " <td>0.002834</td>\n", " <td>0.002852</td>\n", " <td>0.002722</td>\n", " </tr>\n", " <tr>\n", " <th>16</th>\n", " <td>G100001090444999990421009999999999922</td>\n", " <td>G1000010042100</td>\n", " <td>10001042100</td>\n", " <td>1.000000</td>\n", " <td>1.000000</td>\n", " <td>1.000000</td>\n", " <td>1.000000</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " bgp1990gj trt2010gj trt2010ge \\\n", "13 G100001090444999990421009999999219012 G1000010042100 10001042100 \n", "14 G100001090444999990421009999999999921 G1000010042100 10001042100 \n", "15 G100001090444999990421009999999999921 G1000010042201 10001042201 \n", "16 G100001090444999990421009999999999922 G1000010042100 10001042100 \n", "\n", " wt_pop wt_fam wt_hh wt_hu \n", "13 1.000000 1.000000 1.000000 1.000000 \n", "14 0.997664 0.997166 0.997148 0.997278 \n", "15 0.002336 0.002834 0.002852 0.002722 \n", "16 1.000000 1.000000 1.000000 1.000000 " ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ix1, ix2 = 13, 17\n", "state_bgp1990trt2010.xwalk.loc[ix1:ix2-1]" ] }, { "cell_type": "code", "execution_count": 31, "metadata": { "ExecuteTime": { "end_time": "2020-06-13T17:08:08.160866Z", "start_time": "2020-06-13T17:08:08.128714Z" } }, "outputs": [ { "data": { "text/plain": [ "array([['G100001090444999990421009999999219012', 'G1000010042100',\n", " '10001042100'],\n", " ['G100001090444999990421009999999999921', 'G1000010042100',\n", " '10001042100'],\n", " ['G100001090444999990421009999999999921', 'G1000010042201',\n", " '10001042201'],\n", " ['G100001090444999990421009999999999922', 'G1000010042100',\n", " '10001042100']], dtype=object)" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "id_cols = [\"bgp1990gj\", \"trt2010gj\", \"trt2010ge\"]\n", "obs_str_vals = state_bgp1990trt2010.xwalk[id_cols][ix1:ix2].values\n", "obs_str_vals" ] }, { "cell_type": "code", "execution_count": 32, "metadata": { "ExecuteTime": { "end_time": "2020-06-13T17:08:08.599038Z", "start_time": "2020-06-13T17:08:08.572652Z" } }, "outputs": [ { "data": { "text/plain": [ "array([[1. , 1. , 1. , 1. ],\n", " [0.99766436, 0.99716625, 0.99714829, 0.99727768],\n", " [0.00233564, 0.00283375, 0.00285171, 0.00272232],\n", " [1. , 1. , 1. , 1. ]])" ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "wgt_cols = [\"wt_pop\", \"wt_fam\", \"wt_hh\", \"wt_hu\"]\n", "obs_num_vals = state_bgp1990trt2010.xwalk[wgt_cols][ix1:ix2].values\n", "obs_num_vals" ] }, { "cell_type": "code", "execution_count": 33, "metadata": { "ExecuteTime": { "end_time": "2020-06-13T17:16:47.096889Z", "start_time": "2020-06-13T17:16:47.063562Z" } }, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>wt_pop</th>\n", " <th>wt_fam</th>\n", " <th>wt_hh</th>\n", " <th>wt_hu</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>13</th>\n", " <td>1.000000</td>\n", " <td>1.000000</td>\n", " <td>1.000000</td>\n", " <td>1.000000</td>\n", " </tr>\n", " <tr>\n", " <th>14</th>\n", " <td>0.997664</td>\n", " <td>0.997166</td>\n", " <td>0.997148</td>\n", " <td>0.997278</td>\n", " </tr>\n", " <tr>\n", " <th>15</th>\n", " <td>0.002336</td>\n", " <td>0.002834</td>\n", " <td>0.002852</td>\n", " <td>0.002722</td>\n", " </tr>\n", " <tr>\n", " <th>16</th>\n", " <td>1.000000</td>\n", " <td>1.000000</td>\n", " <td>1.000000</td>\n", " <td>1.000000</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " wt_pop wt_fam wt_hh wt_hu\n", "13 1.000000 1.000000 1.000000 1.000000\n", "14 0.997664 0.997166 0.997148 0.997278\n", "15 0.002336 0.002834 0.002852 0.002722\n", "16 1.000000 1.000000 1.000000 1.000000" ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ "state_bgp1990trt2010.xwalk[wgt_cols][ix1:ix2]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "-----------------" ] } ], "metadata": { "_draft": { "nbviewer_url": "https://gist.github.com/9f47e4ec2cc37bce83acf20abfca69d2" }, "gist": { "data": { "description": "sample-workflow.ipynb", "public": true }, "id": "9f47e4ec2cc37bce83acf20abfca69d2" }, "kernelspec": { "display_name": "Python [conda env:nhgis]", "language": "python", "name": "conda-env-nhgis-py" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.6" } }, "nbformat": 4, "nbformat_minor": 4 }