{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "### Matching CPS observations over time\n", "\n", "##### Example with flow from disability to work\n", "\n", "---\n", "\n", "January 14, 2019\n", "\n", "Brian Dew, @bd_econ\n", "\n", "---\n", "\n", "Example uses CPS microdata that has already been cleaned and saved as feather format files, to calculate what percent of people age 25 to 54 who were not working the year before due to disability but are employed in the current month. " ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "ExecuteTime": { "end_time": "2021-12-10T01:59:31.755116Z", "start_time": "2021-12-10T01:59:31.465161Z" } }, "outputs": [], "source": [ "# Import packages and set preliminaries\n", "%matplotlib inline\n", "\n", "import pandas as pd\n", "import numpy as np\n", "import os\n", "from pathlib import Path\n", "\n", "os.chdir('/home/brian/Documents/CPS/')\n", "\n", "cps_dir = Path('/home/brian/Documents/CPS/data/clean/')" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "ExecuteTime": { "end_time": "2021-12-10T01:59:31.759200Z", "start_time": "2021-12-10T01:59:31.756214Z" } }, "outputs": [], "source": [ "def cps_1mo(cps_dir, cps_dt, cols):\n", " '''\n", " Return 1 month of bd_CPS variables cols ending cps_dt\n", " '''\n", "\n", " if 'MONTH' not in cols:\n", " cols = cols + ['MONTH']\n", " if 'YEAR' not in cols:\n", " cols = cols + ['YEAR']\n", "\n", " cps_year = cps_dt.year\n", " cps_month = cps_dt.month\n", " df = (pd.read_feather(cps_dir / f'cps{cps_year}.ft', columns=cols)\n", " .query('MONTH == @cps_month'))\n", " \n", " return df" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "ExecuteTime": { "end_time": "2021-12-10T02:00:11.725698Z", "start_time": "2021-12-10T01:59:31.760435Z" } }, "outputs": [], "source": [ "cols = ['CPSID', 'AGE', 'FEMALE', 'WBHAO', 'NILFREASON', \n", " 'LFS', 'MIS', 'BASICWGT', 'LINENO', 'RETIRED']\n", "\n", "emp = lambda x: np.where(x.LFS_y == 'Employed', 1, 0)\n", "\n", "dates = pd.date_range('1997-01-01', '2021-11-01', freq='MS')\n", "\n", "d = {}\n", "\n", "for dt in dates:\n", " df1 = (cps_1mo(cps_dir, dt - pd.DateOffset(years=1), cols)\n", " .query('RE'))\n", " df2 = cps_1mo(cps_dir, dt, cols)\n", " data = (pd.merge(df1, df2, on=['CPSID', 'LINENO', 'FEMALE', 'WBHAO'])\n", " .query('25 <= AGE_y <= 54'))\n", " data = (data.loc[(data.AGE_x <= data.AGE_y) & \n", " (data.AGE_y - 2 <= data.AGE_x)]).assign(EMP = emp)\n", "\n", " d[dt] = np.average(data.EMP, weights=data.BASICWGT_y) * 100\n", " \n", "df = pd.Series(d).rolling(12).mean().dropna().rename('Share')" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "ExecuteTime": { "end_time": "2021-12-10T02:00:11.849709Z", "start_time": "2021-12-10T02:00:11.726756Z" } }, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "df.plot(color='blue', legend=None, title='Flow, Disability to Work');" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.8" } }, "nbformat": 4, "nbformat_minor": 2 }