# Sample workflow: 2000 block group parts to 2010 tracts

## Starting from a subset of 2010 Delaware blocks

For further background information see:
* **Schroeder, J. P**. 2007. *Target-density weighting interpolation and uncertainty evaluation for temporal analysis of census data*. Geographical Analysis 39 (3):311–335.

#### NHGIS [block crosswalks](https://www.nhgis.org/user-resources/geographic-crosswalks)

In [1]:
%load_ext watermark
%watermark

2020-06-20T21:22:31-04:00

CPython 3.7.6
IPython 7.15.0

compiler   : Clang 9.0.1 
system     : Darwin
release    : 19.5.0
machine    : x86_64
processor  : i386
CPU cores  : 8
interpreter: 64bit


In [2]:
import nhgisxwalk
import inspect
import pandas

%load_ext autoreload
%autoreload 2
%watermark -w
%watermark -iv

watermark 2.0.2
nhgisxwalk 0.0.4
pandas     1.0.4



### Source and target years for the crosswalk

In [3]:
source_year, target_year = "2000", "2010"

### Source-target building base

In [4]:
subset_data_dir = "../testing_data_subsets"
base_xwalk_name = "/nhgis_blk%s_blk%s_gj.csv.zip" % (source_year, target_year)
base_xwalk_file = subset_data_dir + base_xwalk_name
data_types = nhgisxwalk.str_types(["GJOIN%s"%source_year, "GJOIN%s"%target_year])
base_xwalk = pandas.read_csv(base_xwalk_file, index_col=0, dtype=data_types)
base_xwalk.head()

Unnamed: 0,GJOIN2000,GJOIN2010,WEIGHT,PAREA
0,G10000100401001000,G10000100401001000,1.0,1.0
1,G10000100401001001,G10000100401001001,0.999981,0.999988
2,G10000100401001001,G10000100401001003,1.9e-05,1.2e-05
3,G10000100401001002,G10000100401001002,1.0,1.0
4,G10000100401001003,G10000100401001003,1.0,1.0


### Convenience code shorthand/lookup

In [5]:
print(inspect.getsource(nhgisxwalk.valid_geo_shorthand))

def valid_geo_shorthand(shorthand_name=True):
    """Shorthand lookups for census geographies."""
    lookup = {
        "blk": "block",
        "bgp": "block group part",
        "bkg": "block group",
        "trt": "tract",
        "cty": "county",
    }
    if not shorthand_name:
        lookup = {v: k for k, v in lookup.items()}
    return lookup



In [6]:
nhgisxwalk.valid_geo_shorthand(shorthand_name=False)

{'block': 'blk',
 'block group part': 'bgp',
 'block group': 'bkg',
 'tract': 'trt',
 'county': 'cty'}

### Instantiate an `nhgisxwalk.GeoCrossWalk` object
##### see [nhgisxwalk.GeoCrossWalk](https://github.com/jGaboardi/nhgisxwalk/blob/92b4fe55de0a9c53d0315dcda8ec121faaf20aef/nhgisxwalk/geocrosswalk.py#L19) for full details

In [7]:
nhgisxwalk.desc_code_2000_SF1b

{'Persons': {'Persons': 'Universe',
  'NP001A': 'Source code',
  'FXS': 'NHGIS code',
  'Total': 'FXS001'},
 'Families': {'Families': 'Universe',
  'NP031A': 'Source code',
  'F2V': 'NHGIS code',
  'Total': 'F2V001'},
 'Households': {'Households': 'Universe',
  'NP010A': 'Source code',
  'FY4': 'NHGIS code',
  'Total': 'FY4001'},
 'Housing Units': {'Housing Units': 'Universe',
  'NH001A': 'Source code',
  'FV5': 'NHGIS code',
  'Total': 'FV5001'}}

In [8]:
input_vars = [
    nhgisxwalk.desc_code_2000_SF1b["Persons"]["Total"],
    nhgisxwalk.desc_code_2000_SF1b["Families"]["Total"],
    nhgisxwalk.desc_code_2000_SF1b["Households"]["Total"],
    nhgisxwalk.desc_code_2000_SF1b["Housing Units"]["Total"]
]
input_vars

['FXS001', 'F2V001', 'FY4001', 'FV5001']

In [9]:
input_var_tags = ["pop", "fam", "hh", "hu"]

In [10]:
subset_state = "10"
bgp2000_to_trt2010 = nhgisxwalk.GeoCrossWalk(
    base_xwalk,
    source_year=source_year,
    target_year=target_year,
    source_geo="bgp",
    target_geo="trt",
    base_source_table=subset_data_dir+"/2000_block.csv.zip",
    input_var=input_vars,
    weight_var=input_var_tags,
    stfips=subset_state,
    keep_base=True,
    add_geoid=True
)
bgp2000_to_trt2010.xwalk

Unnamed: 0,bgp2000gj,trt2010gj,trt2010ge,wt_pop,wt_fam,wt_hh,wt_hu
0,G10000109044444430042202U1,G1000010042202,10001042202,1.0,1.0,1.0,1.0
1,G10000109044461265042201R1,G1000010042201,10001042201,1.0,1.0,1.0,1.0
2,G10000109044461265042201U1,G1000010042201,10001042201,1.0,1.0,1.0,1.0
3,G10000109044461265042201U2,G1000010042201,10001042201,1.0,1.0,1.0,1.0
4,G10000109044461480042202R2,G1000010042202,10001042202,1.0,1.0,1.0,1.0
...,...,...,...,...,...,...,...
1038,G10000509355299999051500R4,G1000050051500,10005051500,1.0,1.0,1.0,1.0
1039,G10000509355299999051500U1,G1000050051500,10005051500,1.0,1.0,1.0,1.0
1040,G10000509355299999051500U3,G1000050051500,10005051500,1.0,1.0,1.0,1.0
1041,G10000509355299999051500U4,G1000050051500,10005051500,1.0,1.0,1.0,1.0


### Write crosswalk to a `.csv`

In [11]:
state_dir = "../../crosswalks/nhgis_bgp2000_trt2010_state/"
nhgisxwalk.xwalk_df_to_csv(
    cls=bgp2000_to_trt2010,
    path=state_dir
)

### Read crosswalk from a `.csv`

In [12]:
fname = state_dir + bgp2000_to_trt2010.xwalk_name
bgp2000_to_trt2010_df = nhgisxwalk.xwalk_df_from_csv(fname)
bgp2000_to_trt2010_df

Unnamed: 0,bgp2000gj,trt2010gj,trt2010ge,wt_pop,wt_fam,wt_hh,wt_hu
0,G10000109044444430042202U1,G1000010042202,10001042202,1.0,1.0,1.0,1.0
1,G10000109044461265042201R1,G1000010042201,10001042201,1.0,1.0,1.0,1.0
2,G10000109044461265042201U1,G1000010042201,10001042201,1.0,1.0,1.0,1.0
3,G10000109044461265042201U2,G1000010042201,10001042201,1.0,1.0,1.0,1.0
4,G10000109044461480042202R2,G1000010042202,10001042202,1.0,1.0,1.0,1.0
...,...,...,...,...,...,...,...
1038,G10000509355299999051500R4,G1000050051500,10005051500,1.0,1.0,1.0,1.0
1039,G10000509355299999051500U1,G1000050051500,10005051500,1.0,1.0,1.0,1.0
1040,G10000509355299999051500U3,G1000050051500,10005051500,1.0,1.0,1.0,1.0
1041,G10000509355299999051500U4,G1000050051500,10005051500,1.0,1.0,1.0,1.0


-----------------------------------------------