{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Problem set 3: Loading and structuring data from Denmark Statistics"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"[](https://mybinder.org/v2/gh/NumEconCopenhagen/exercises-2020/master?urlpath=lab/tree/PS3/problem_set_3.ipynb)"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"%matplotlib inline\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"plt.style.use('seaborn-whitegrid')\n",
"import pandas as pd\n",
"import ipywidgets as widgets"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Tasks"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Create a pandas DataFrame"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Modify the code below such that *income* and *consumption* are variables in the *dt* DataFrame."
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" id | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" 1 | \n",
" 1 | \n",
"
\n",
" \n",
" 2 | \n",
" 2 | \n",
"
\n",
" \n",
" 3 | \n",
" 3 | \n",
"
\n",
" \n",
" 4 | \n",
" 4 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" id\n",
"0 0\n",
"1 1\n",
"2 2\n",
"3 3\n",
"4 4"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np.random.seed(1999)\n",
" \n",
"N = 100\n",
"mydata = {}\n",
"mydata['id'] = range(N)\n",
"income = np.exp(np.random.normal(size=N))\n",
"consumption = np.sqrt(income)\n",
"\n",
"dt = pd.DataFrame(mydata)\n",
"dt.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**Answer:**"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"jupyter": {
"source_hidden": true
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" id | \n",
" income | \n",
" consumption | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 0 | \n",
" 0.727981 | \n",
" 0.853218 | \n",
"
\n",
" \n",
" 1 | \n",
" 1 | \n",
" 1.997831 | \n",
" 1.413447 | \n",
"
\n",
" \n",
" 2 | \n",
" 2 | \n",
" 0.276823 | \n",
" 0.526140 | \n",
"
\n",
" \n",
" 3 | \n",
" 3 | \n",
" 1.481931 | \n",
" 1.217346 | \n",
"
\n",
" \n",
" 4 | \n",
" 4 | \n",
" 1.235904 | \n",
" 1.111712 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" id income consumption\n",
"0 0 0.727981 0.853218\n",
"1 1 1.997831 1.413447\n",
"2 2 0.276823 0.526140\n",
"3 3 1.481931 1.217346\n",
"4 4 1.235904 1.111712"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np.random.seed(1999)\n",
"\n",
"N = 100\n",
"mydata = {}\n",
"mydata['id'] = range(N)\n",
"mydata['income'] = np.exp(np.random.normal(size=N))\n",
"mydata['consumption'] = np.sqrt(mydata['income'])\n",
"\n",
"dt_true = pd.DataFrame(mydata)\n",
"dt_true.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Create new variable"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Add a new variable *ratio* which is the ratio of consumption to income."
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" id | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" 1 | \n",
" 1 | \n",
"
\n",
" \n",
" 2 | \n",
" 2 | \n",
"
\n",
" \n",
" 3 | \n",
" 3 | \n",
"
\n",
" \n",
" 4 | \n",
" 4 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" id\n",
"0 0\n",
"1 1\n",
"2 2\n",
"3 3\n",
"4 4"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# write your code here\n",
"dt.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**Answer:**"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"jupyter": {
"source_hidden": true
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" id | \n",
" income | \n",
" consumption | \n",
" ratio | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 0 | \n",
" 0.727981 | \n",
" 0.853218 | \n",
" 1.172033 | \n",
"
\n",
" \n",
" 1 | \n",
" 1 | \n",
" 1.997831 | \n",
" 1.413447 | \n",
" 0.707490 | \n",
"
\n",
" \n",
" 2 | \n",
" 2 | \n",
" 0.276823 | \n",
" 0.526140 | \n",
" 1.900636 | \n",
"
\n",
" \n",
" 3 | \n",
" 3 | \n",
" 1.481931 | \n",
" 1.217346 | \n",
" 0.821459 | \n",
"
\n",
" \n",
" 4 | \n",
" 4 | \n",
" 1.235904 | \n",
" 1.111712 | \n",
" 0.899513 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" id income consumption ratio\n",
"0 0 0.727981 0.853218 1.172033\n",
"1 1 1.997831 1.413447 0.707490\n",
"2 2 0.276823 0.526140 1.900636\n",
"3 3 1.481931 1.217346 0.821459\n",
"4 4 1.235904 1.111712 0.899513"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dt_true['ratio'] = dt_true['consumption']/dt_true['income']\n",
"dt_true.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Summary statistics"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Produce summary statistics using `.describe()`."
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"# write your code here"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**Answer:**"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"jupyter": {
"source_hidden": true
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" id | \n",
" income | \n",
" consumption | \n",
" ratio | \n",
"
\n",
" \n",
" \n",
" \n",
" count | \n",
" 100.000000 | \n",
" 100.000000 | \n",
" 100.000000 | \n",
" 100.000000 | \n",
"
\n",
" \n",
" mean | \n",
" 49.500000 | \n",
" 1.415547 | \n",
" 1.087844 | \n",
" 1.117517 | \n",
"
\n",
" \n",
" std | \n",
" 29.011492 | \n",
" 1.322203 | \n",
" 0.484238 | \n",
" 0.525452 | \n",
"
\n",
" \n",
" min | \n",
" 0.000000 | \n",
" 0.108402 | \n",
" 0.329245 | \n",
" 0.351134 | \n",
"
\n",
" \n",
" 25% | \n",
" 24.750000 | \n",
" 0.529323 | \n",
" 0.727545 | \n",
" 0.752310 | \n",
"
\n",
" \n",
" 50% | \n",
" 49.500000 | \n",
" 0.981178 | \n",
" 0.990533 | \n",
" 1.009580 | \n",
"
\n",
" \n",
" 75% | \n",
" 74.250000 | \n",
" 1.768211 | \n",
" 1.329572 | \n",
" 1.374491 | \n",
"
\n",
" \n",
" max | \n",
" 99.000000 | \n",
" 8.110612 | \n",
" 2.847914 | \n",
" 3.037255 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" id income consumption ratio\n",
"count 100.000000 100.000000 100.000000 100.000000\n",
"mean 49.500000 1.415547 1.087844 1.117517\n",
"std 29.011492 1.322203 0.484238 0.525452\n",
"min 0.000000 0.108402 0.329245 0.351134\n",
"25% 24.750000 0.529323 0.727545 0.752310\n",
"50% 49.500000 0.981178 0.990533 1.009580\n",
"75% 74.250000 1.768211 1.329572 1.374491\n",
"max 99.000000 8.110612 2.847914 3.037255"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dt_true.describe()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Indexing"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Select everybody with an income above 1."
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" id | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" 1 | \n",
" 1 | \n",
"
\n",
" \n",
" 2 | \n",
" 2 | \n",
"
\n",
" \n",
" 3 | \n",
" 3 | \n",
"
\n",
" \n",
" 4 | \n",
" 4 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" id\n",
"0 0\n",
"1 1\n",
"2 2\n",
"3 3\n",
"4 4"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# write your code here\n",
"dt.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**Answer:**"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"jupyter": {
"source_hidden": true
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" id | \n",
" income | \n",
" consumption | \n",
" ratio | \n",
"
\n",
" \n",
" \n",
" \n",
" 1 | \n",
" 1 | \n",
" 1.997831 | \n",
" 1.413447 | \n",
" 0.707490 | \n",
"
\n",
" \n",
" 3 | \n",
" 3 | \n",
" 1.481931 | \n",
" 1.217346 | \n",
" 0.821459 | \n",
"
\n",
" \n",
" 4 | \n",
" 4 | \n",
" 1.235904 | \n",
" 1.111712 | \n",
" 0.899513 | \n",
"
\n",
" \n",
" 6 | \n",
" 6 | \n",
" 2.574032 | \n",
" 1.604379 | \n",
" 0.623294 | \n",
"
\n",
" \n",
" 7 | \n",
" 7 | \n",
" 2.475478 | \n",
" 1.573365 | \n",
" 0.635580 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" id income consumption ratio\n",
"1 1 1.997831 1.413447 0.707490\n",
"3 3 1.481931 1.217346 0.821459\n",
"4 4 1.235904 1.111712 0.899513\n",
"6 6 2.574032 1.604379 0.623294\n",
"7 7 2.475478 1.573365 0.635580"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"I = dt_true['income'] > 1\n",
"dt_true.loc[I, :].head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Select everybody with an income *above* 1 and a ratio *above* 0.7."
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"# write your code here"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**Answer:**"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"jupyter": {
"source_hidden": true
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" id | \n",
" income | \n",
" consumption | \n",
" ratio | \n",
"
\n",
" \n",
" \n",
" \n",
" 1 | \n",
" 1 | \n",
" 1.997831 | \n",
" 1.413447 | \n",
" 0.707490 | \n",
"
\n",
" \n",
" 3 | \n",
" 3 | \n",
" 1.481931 | \n",
" 1.217346 | \n",
" 0.821459 | \n",
"
\n",
" \n",
" 4 | \n",
" 4 | \n",
" 1.235904 | \n",
" 1.111712 | \n",
" 0.899513 | \n",
"
\n",
" \n",
" 11 | \n",
" 11 | \n",
" 2.031708 | \n",
" 1.425380 | \n",
" 0.701567 | \n",
"
\n",
" \n",
" 18 | \n",
" 18 | \n",
" 1.280235 | \n",
" 1.131475 | \n",
" 0.883802 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" id income consumption ratio\n",
"1 1 1.997831 1.413447 0.707490\n",
"3 3 1.481931 1.217346 0.821459\n",
"4 4 1.235904 1.111712 0.899513\n",
"11 11 2.031708 1.425380 0.701567\n",
"18 18 1.280235 1.131475 0.883802"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"I = (dt_true['income'] > 1) & (dt_true['ratio'] > 0.7)\n",
"dt_true.loc[I].head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Set consumption equal to 0.5 if income is less than 0.5."
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"# write your code here\n",
"# dt['consumption'].mean() # <- compare with answer"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**Answer:**"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"jupyter": {
"source_hidden": true
}
},
"outputs": [
{
"data": {
"text/plain": [
"1.075479712048503"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"I = (dt_true['income'] < 0.5)\n",
"dt_true.loc[I,['consumption']] = 0.5\n",
"dt_true['consumption'].mean()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Set consumption equal to income if income is less than 0.5."
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"# write your code here\n",
"# dt['consumption'].mean() # <- compare with answer"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**Answer:**"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"jupyter": {
"source_hidden": true
}
},
"outputs": [
{
"data": {
"text/plain": [
"1.0337728690050052"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"I = (dt_true['income'] < 0.5)\n",
"dt_true.loc[I,['consumption']] = dt_true.loc[I,['income']].values\n",
"dt_true['consumption'].mean()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Dropping"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Drop the *ratio* variable and all rows with an income above 1.5. After this, also drop the first 5 rows."
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"before: 100 observations, 1 variables\n",
"after: 100 observations, 1 variables\n"
]
}
],
"source": [
"print(f'before: {dt.shape[0]} observations, {dt.shape[1]} variables')\n",
"# write your code here\n",
"print(f'after: {dt.shape[0]} observations, {dt.shape[1]} variables')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**Answer:**"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"jupyter": {
"source_hidden": true
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"before: 100 observations, 4 variables\n",
"after: 65 observations, 3 variables\n",
"after (alt): 65 observations, 3 variables\n"
]
}
],
"source": [
"dt_alt = dt_true.copy()\n",
"\n",
"print(f'before: {dt_true.shape[0]} observations, {dt_true.shape[1]} variables')\n",
"dt_true.drop('ratio',axis=1,inplace=True)\n",
"I = dt_true['income'] > 1.5\n",
"dt_true.drop(dt_true[I].index,inplace=True)\n",
"dt_true.drop(dt_true.loc[:5].index,inplace=True)\n",
"print(f'after: {dt_true.shape[0]} observations, {dt_true.shape[1]} variables')\n",
"\n",
"# alternative: keep where I is false\n",
"del dt_alt['ratio']\n",
"I = dt_alt['income'] > 1.5\n",
"dt_alt = dt_alt[~I]\n",
"dt_alt = dt_alt.iloc[5:,:]\n",
"print(f'after (alt): {dt_alt.shape[0]} observations, {dt_alt.shape[1]} variables')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Renaming"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Rename *consumption* to *cons* and *income* to *inc*."
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" id | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" 1 | \n",
" 1 | \n",
"
\n",
" \n",
" 2 | \n",
" 2 | \n",
"
\n",
" \n",
" 3 | \n",
" 3 | \n",
"
\n",
" \n",
" 4 | \n",
" 4 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" id\n",
"0 0\n",
"1 1\n",
"2 2\n",
"3 3\n",
"4 4"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# write your code\n",
"dt.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**Answer:**"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {
"jupyter": {
"source_hidden": true
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" id | \n",
" inc | \n",
" con | \n",
"
\n",
" \n",
" \n",
" \n",
" 8 | \n",
" 8 | \n",
" 0.582074 | \n",
" 0.762938 | \n",
"
\n",
" \n",
" 10 | \n",
" 10 | \n",
" 0.932044 | \n",
" 0.965424 | \n",
"
\n",
" \n",
" 12 | \n",
" 12 | \n",
" 0.356952 | \n",
" 0.356952 | \n",
"
\n",
" \n",
" 13 | \n",
" 13 | \n",
" 0.379825 | \n",
" 0.379825 | \n",
"
\n",
" \n",
" 16 | \n",
" 16 | \n",
" 0.700896 | \n",
" 0.837195 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" id inc con\n",
"8 8 0.582074 0.762938\n",
"10 10 0.932044 0.965424\n",
"12 12 0.356952 0.356952\n",
"13 13 0.379825 0.379825\n",
"16 16 0.700896 0.837195"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dt_true.rename(columns={'income':'inc','consumption':'con'},inplace=True)\n",
"dt_true.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Functions"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Correct the wrong lines such that `assets_1 = assets_2 = assets_3 = assets_4`."
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"failed\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" id | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" 1 | \n",
" 1 | \n",
"
\n",
" \n",
" 2 | \n",
" 2 | \n",
"
\n",
" \n",
" 3 | \n",
" 3 | \n",
"
\n",
" \n",
" 4 | \n",
" 4 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" id\n",
"0 0\n",
"1 1\n",
"2 2\n",
"3 3\n",
"4 4"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"def assets_row_by_row(x,R,Y):\n",
" return 0 # wrong line\n",
" \n",
"def assets_all_at_once(income,consumption,R,Y):\n",
" return 0 # wrong line\n",
"\n",
"def assets_adj(assets,R,Y):\n",
" # missing lines\n",
" pass\n",
"\n",
"R = 1.2 # return rate\n",
"Y = 1 # income\n",
"try:\n",
" dt['assets_1'] = R*(dt['inc']-dt['con'])+Y\n",
" dt['assets_2'] = dt.apply(assets_row_by_row,axis=1,args=(R,Y))\n",
" dt['assets_3'] = assets_all_at_once(dt['inc'].values(),dt['con'].values(),R,Y)\n",
" dt['assets_4'] = dt['inc']-dt['con']\n",
" assets_adj(dt['assets_4'],R,Y)\n",
"except:\n",
" print('failed')\n",
"dt.head() "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**Answer:**"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {
"jupyter": {
"source_hidden": true
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" id | \n",
" inc | \n",
" con | \n",
" assets_1 | \n",
" assets_2 | \n",
" assets_3 | \n",
" assets_4 | \n",
"
\n",
" \n",
" \n",
" \n",
" 8 | \n",
" 8 | \n",
" 0.582074 | \n",
" 0.762938 | \n",
" 0.782963 | \n",
" 0.782963 | \n",
" 0.782963 | \n",
" 0.782963 | \n",
"
\n",
" \n",
" 10 | \n",
" 10 | \n",
" 0.932044 | \n",
" 0.965424 | \n",
" 0.959943 | \n",
" 0.959943 | \n",
" 0.959943 | \n",
" 0.959943 | \n",
"
\n",
" \n",
" 12 | \n",
" 12 | \n",
" 0.356952 | \n",
" 0.356952 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
"
\n",
" \n",
" 13 | \n",
" 13 | \n",
" 0.379825 | \n",
" 0.379825 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
"
\n",
" \n",
" 16 | \n",
" 16 | \n",
" 0.700896 | \n",
" 0.837195 | \n",
" 0.836441 | \n",
" 0.836441 | \n",
" 0.836441 | \n",
" 0.836441 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" id inc con assets_1 assets_2 assets_3 assets_4\n",
"8 8 0.582074 0.762938 0.782963 0.782963 0.782963 0.782963\n",
"10 10 0.932044 0.965424 0.959943 0.959943 0.959943 0.959943\n",
"12 12 0.356952 0.356952 1.000000 1.000000 1.000000 1.000000\n",
"13 13 0.379825 0.379825 1.000000 1.000000 1.000000 1.000000\n",
"16 16 0.700896 0.837195 0.836441 0.836441 0.836441 0.836441"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"def assets_row_by_row(x,R,Y):\n",
" return R*(x['inc']-x['con'])+Y\n",
" \n",
"def assets_all_at_once(income,consumption,R,Y):\n",
" return R*(income-consumption)+Y\n",
"\n",
"def assets_adj(assets,R,Y):\n",
" assets *= R\n",
" assets += Y\n",
"\n",
"R = 1.2 # return rate\n",
"Y = 1 # income\n",
"dt_true['assets_1'] = R*(dt_true['inc']-dt_true['con'])+Y\n",
"dt_true['assets_2'] = dt_true.apply(assets_row_by_row,axis=1,args=(R,Y))\n",
"dt_true['assets_3'] = assets_all_at_once(dt_true['inc'].values,dt_true['con'].values,R,Y)\n",
"dt_true['assets_4'] = dt_true['inc']-dt_true['con']\n",
"assets_adj(dt_true['assets_4'],R,Y)\n",
"\n",
"dt_true.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Problem"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Load the data set in *data/NAH1_pivoted.xlsx* and clean and structure it such that the `plot_timeseries(dataframe)` below can be run and produce an interactive figure. "
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [],
"source": [
"def _plot_timeseries(dataframe, variable, years):\n",
" \n",
" fig = plt.figure(dpi=100)\n",
" ax = fig.add_subplot(1,1,1)\n",
" \n",
" dataframe.loc[:,['year']] = pd.to_numeric(dataframe['year'])\n",
" I = (dataframe['year'] >= years[0]) & (dataframe['year'] <= years[1])\n",
" \n",
" x = dataframe.loc[I,'year']\n",
" y = dataframe.loc[I,variable]\n",
" ax.plot(x,y)\n",
" \n",
" ax.set_xticks(list(range(years[0], years[1] + 1, 5))) \n",
" \n",
"def plot_timeseries(dataframe):\n",
" \n",
" widgets.interact(_plot_timeseries, \n",
" dataframe = widgets.fixed(dataframe),\n",
" variable = widgets.Dropdown(\n",
" description='variable', \n",
" options=['Y','C','G','I','X','M'], \n",
" value='Y'),\n",
" years=widgets.IntRangeSlider(\n",
" description=\"years\",\n",
" min=1966,\n",
" max=2018,\n",
" value=[1980, 2018],\n",
" continuous_update=False,\n",
" ) \n",
"); "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**Hint 1:** You can base your renaming on this dictionary:"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [],
"source": [
"rename_dict = {}\n",
"rename_dict['P.1 Output'] = 'Y'\n",
"rename_dict['P.3 Final consumption expenditure'] = 'C'\n",
"rename_dict['P.3 Government consumption expenditure'] = 'G'\n",
"rename_dict['P.5 Gross capital formation'] = 'I'\n",
"rename_dict['P.6 Export of goods and services'] = 'X'\n",
"rename_dict['P.7 Import of goods and services'] = 'M'"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**Hint 2:** You code should have the following structure:"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [],
"source": [
"# a. load data set\n",
"# nah1 = ?\n",
"\n",
"# b. rename variables\n",
"\n",
"# c. remove rows where Y is nan\n",
"\n",
"# d. correct year column data\n",
"# hint, nah1.loc[J,['year']] = nah1.loc[I,['year']].values\n",
"\n",
"# e. only keep rows with '2010-prices, chained values'\n",
"\n",
"# f. only keep renamed variables\n",
"\n",
"# g. interactive plot\n",
"# plot_timeseries(nan)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**Answer:**"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {
"jupyter": {
"source_hidden": true
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" year | \n",
" Y | \n",
" C | \n",
" G | \n",
" I | \n",
" X | \n",
" M | \n",
"
\n",
" \n",
" \n",
" \n",
" 1 | \n",
" 1966 | \n",
" 1089347.0 | \n",
" 549491.0 | \n",
" 171307.0 | \n",
" 127631.0 | \n",
" 123594.0 | \n",
" 112171.0 | \n",
"
\n",
" \n",
" 3 | \n",
" 1967 | \n",
" 1124579.0 | \n",
" 588094.0 | \n",
" 184983.0 | \n",
" 133749.0 | \n",
" 128182.0 | \n",
" 120028.0 | \n",
"
\n",
" \n",
" 5 | \n",
" 1968 | \n",
" 1162778.0 | \n",
" 612327.0 | \n",
" 195664.0 | \n",
" 142242.0 | \n",
" 140746.0 | \n",
" 127670.0 | \n",
"
\n",
" \n",
" 7 | \n",
" 1969 | \n",
" 1239121.0 | \n",
" 649337.0 | \n",
" 209375.0 | \n",
" 164447.0 | \n",
" 149184.0 | \n",
" 144021.0 | \n",
"
\n",
" \n",
" 9 | \n",
" 1970 | \n",
" 1282852.0 | \n",
" 671805.0 | \n",
" 222350.0 | \n",
" 168129.0 | \n",
" 154880.0 | \n",
" 157151.0 | \n",
"
\n",
" \n",
" 11 | \n",
" 1971 | \n",
" 1315198.0 | \n",
" 687221.0 | \n",
" 235965.0 | \n",
" 167979.0 | \n",
" 164685.0 | \n",
" 159437.0 | \n",
"
\n",
" \n",
" 13 | \n",
" 1972 | \n",
" 1375211.0 | \n",
" 696409.0 | \n",
" 246412.0 | \n",
" 179858.0 | \n",
" 173351.0 | \n",
" 161273.0 | \n",
"
\n",
" \n",
" 15 | \n",
" 1973 | \n",
" 1433881.0 | \n",
" 736943.0 | \n",
" 250919.0 | \n",
" 195814.0 | \n",
" 187836.0 | \n",
" 189976.0 | \n",
"
\n",
" \n",
" 17 | \n",
" 1974 | \n",
" 1420321.0 | \n",
" 728730.0 | \n",
" 255025.0 | \n",
" 182647.0 | \n",
" 194250.0 | \n",
" 186142.0 | \n",
"
\n",
" \n",
" 19 | \n",
" 1975 | \n",
" 1384905.0 | \n",
" 742587.0 | \n",
" 252814.0 | \n",
" 152977.0 | \n",
" 192843.0 | \n",
" 176903.0 | \n",
"
\n",
" \n",
" 21 | \n",
" 1976 | \n",
" 1481439.0 | \n",
" 791533.0 | \n",
" 263746.0 | \n",
" 185880.0 | \n",
" 199459.0 | \n",
" 206589.0 | \n",
"
\n",
" \n",
" 23 | \n",
" 1977 | \n",
" 1511387.0 | \n",
" 806350.0 | \n",
" 268008.0 | \n",
" 183381.0 | \n",
" 206612.0 | \n",
" 208364.0 | \n",
"
\n",
" \n",
" 25 | \n",
" 1978 | \n",
" 1553317.0 | \n",
" 831830.0 | \n",
" 285396.0 | \n",
" 180665.0 | \n",
" 209496.0 | \n",
" 210385.0 | \n",
"
\n",
" \n",
" 27 | \n",
" 1979 | \n",
" 1620148.0 | \n",
" 851321.0 | \n",
" 300440.0 | \n",
" 188518.0 | \n",
" 232309.0 | \n",
" 224407.0 | \n",
"
\n",
" \n",
" 29 | \n",
" 1980 | \n",
" 1615002.0 | \n",
" 846984.0 | \n",
" 310237.0 | \n",
" 160224.0 | \n",
" 245273.0 | \n",
" 211920.0 | \n",
"
\n",
" \n",
" 31 | \n",
" 1981 | \n",
" 1594442.0 | \n",
" 847062.0 | \n",
" 318344.0 | \n",
" 133904.0 | \n",
" 266517.0 | \n",
" 213097.0 | \n",
"
\n",
" \n",
" 33 | \n",
" 1982 | \n",
" 1627760.0 | \n",
" 864265.0 | \n",
" 328757.0 | \n",
" 149436.0 | \n",
" 274952.0 | \n",
" 219781.0 | \n",
"
\n",
" \n",
" 35 | \n",
" 1983 | \n",
" 1654323.0 | \n",
" 874955.0 | \n",
" 328561.0 | \n",
" 154643.0 | \n",
" 287589.0 | \n",
" 224112.0 | \n",
"
\n",
" \n",
" 37 | \n",
" 1984 | \n",
" 1740838.0 | \n",
" 890327.0 | \n",
" 323091.0 | \n",
" 183220.0 | \n",
" 297074.0 | \n",
" 236128.0 | \n",
"
\n",
" \n",
" 39 | \n",
" 1985 | \n",
" 1825992.0 | \n",
" 923762.0 | \n",
" 330263.0 | \n",
" 203955.0 | \n",
" 315007.0 | \n",
" 259576.0 | \n",
"
\n",
" \n",
" 41 | \n",
" 1986 | \n",
" 1938199.0 | \n",
" 970914.0 | \n",
" 332830.0 | \n",
" 236282.0 | \n",
" 319226.0 | \n",
" 281585.0 | \n",
"
\n",
" \n",
" 43 | \n",
" 1987 | \n",
" 1960401.0 | \n",
" 959976.0 | \n",
" 334893.0 | \n",
" 227970.0 | \n",
" 334732.0 | \n",
" 278267.0 | \n",
"
\n",
" \n",
" 45 | \n",
" 1988 | \n",
" 1983427.0 | \n",
" 947774.0 | \n",
" 334632.0 | \n",
" 220781.0 | \n",
" 365333.0 | \n",
" 290022.0 | \n",
"
\n",
" \n",
" 47 | \n",
" 1989 | \n",
" 1994879.0 | \n",
" 947038.0 | \n",
" 331650.0 | \n",
" 228800.0 | \n",
" 382350.0 | \n",
" 305677.0 | \n",
"
\n",
" \n",
" 49 | \n",
" 1990 | \n",
" 2032441.0 | \n",
" 947408.0 | \n",
" 326540.0 | \n",
" 227572.0 | \n",
" 407305.0 | \n",
" 312939.0 | \n",
"
\n",
" \n",
" 51 | \n",
" 1991 | \n",
" 2049177.0 | \n",
" 962895.0 | \n",
" 331961.0 | \n",
" 218680.0 | \n",
" 432431.0 | \n",
" 325555.0 | \n",
"
\n",
" \n",
" 53 | \n",
" 1992 | \n",
" 2080460.0 | \n",
" 981584.0 | \n",
" 334136.0 | \n",
" 222853.0 | \n",
" 433602.0 | \n",
" 325129.0 | \n",
"
\n",
" \n",
" 55 | \n",
" 1993 | \n",
" 2108104.0 | \n",
" 989510.0 | \n",
" 348366.0 | \n",
" 205585.0 | \n",
" 438932.0 | \n",
" 320528.0 | \n",
"
\n",
" \n",
" 57 | \n",
" 1994 | \n",
" 2210638.0 | \n",
" 1040029.0 | \n",
" 355506.0 | \n",
" 233340.0 | \n",
" 475081.0 | \n",
" 363124.0 | \n",
"
\n",
" \n",
" 59 | \n",
" 1995 | \n",
" 2285325.0 | \n",
" 1059095.0 | \n",
" 363169.0 | \n",
" 267590.0 | \n",
" 488777.0 | \n",
" 388553.0 | \n",
"
\n",
" \n",
" 61 | \n",
" 1996 | \n",
" 2352072.0 | \n",
" 1087124.0 | \n",
" 374242.0 | \n",
" 270410.0 | \n",
" 511506.0 | \n",
" 400680.0 | \n",
"
\n",
" \n",
" 63 | \n",
" 1997 | \n",
" 2425112.0 | \n",
" 1110554.0 | \n",
" 376151.0 | \n",
" 309371.0 | \n",
" 534460.0 | \n",
" 437574.0 | \n",
"
\n",
" \n",
" 65 | \n",
" 1998 | \n",
" 2495172.0 | \n",
" 1139300.0 | \n",
" 387763.0 | \n",
" 327558.0 | \n",
" 556380.0 | \n",
" 470721.0 | \n",
"
\n",
" \n",
" 67 | \n",
" 1999 | \n",
" 2598080.0 | \n",
" 1150485.0 | \n",
" 400446.0 | \n",
" 311579.0 | \n",
" 619082.0 | \n",
" 482738.0 | \n",
"
\n",
" \n",
" 69 | \n",
" 2000 | \n",
" 2727229.0 | \n",
" 1164693.0 | \n",
" 412188.0 | \n",
" 348171.0 | \n",
" 696948.0 | \n",
" 548713.0 | \n",
"
\n",
" \n",
" 71 | \n",
" 2001 | \n",
" 2792144.0 | \n",
" 1173920.0 | \n",
" 420034.0 | \n",
" 343040.0 | \n",
" 720315.0 | \n",
" 561855.0 | \n",
"
\n",
" \n",
" 73 | \n",
" 2002 | \n",
" 2820514.0 | \n",
" 1193647.0 | \n",
" 428984.0 | \n",
" 338918.0 | \n",
" 751704.0 | \n",
" 597680.0 | \n",
"
\n",
" \n",
" 75 | \n",
" 2003 | \n",
" 2826381.0 | \n",
" 1205061.0 | \n",
" 430025.0 | \n",
" 336613.0 | \n",
" 742669.0 | \n",
" 591557.0 | \n",
"
\n",
" \n",
" 77 | \n",
" 2004 | \n",
" 2893370.0 | \n",
" 1248028.0 | \n",
" 436581.0 | \n",
" 359370.0 | \n",
" 765061.0 | \n",
" 633775.0 | \n",
"
\n",
" \n",
" 79 | \n",
" 2005 | \n",
" 3042633.0 | \n",
" 1283510.0 | \n",
" 441847.0 | \n",
" 379250.0 | \n",
" 824193.0 | \n",
" 705349.0 | \n",
"
\n",
" \n",
" 81 | \n",
" 2006 | \n",
" 3212095.0 | \n",
" 1319567.0 | \n",
" 453065.0 | \n",
" 427665.0 | \n",
" 909395.0 | \n",
" 803852.0 | \n",
"
\n",
" \n",
" 83 | \n",
" 2007 | \n",
" 3304041.0 | \n",
" 1340375.0 | \n",
" 458504.0 | \n",
" 439093.0 | \n",
" 942620.0 | \n",
" 850781.0 | \n",
"
\n",
" \n",
" 85 | \n",
" 2008 | \n",
" 3333338.0 | \n",
" 1359497.0 | \n",
" 473421.0 | \n",
" 418030.0 | \n",
" 979118.0 | \n",
" 891365.0 | \n",
"
\n",
" \n",
" 87 | \n",
" 2009 | \n",
" 3114274.0 | \n",
" 1343243.0 | \n",
" 487752.0 | \n",
" 329277.0 | \n",
" 888801.0 | \n",
" 784904.0 | \n",
"
\n",
" \n",
" 89 | \n",
" 2010 | \n",
" 3120573.0 | \n",
" 1357797.0 | \n",
" 495575.0 | \n",
" 327344.0 | \n",
" 914933.0 | \n",
" 789148.0 | \n",
"
\n",
" \n",
" 91 | \n",
" 2011 | \n",
" 3198587.0 | \n",
" 1357064.0 | \n",
" 492464.0 | \n",
" 345198.0 | \n",
" 980761.0 | \n",
" 847889.0 | \n",
"
\n",
" \n",
" 93 | \n",
" 2012 | \n",
" 3225817.0 | \n",
" 1365205.0 | \n",
" 496195.0 | \n",
" 353120.0 | \n",
" 992159.0 | \n",
" 870858.0 | \n",
"
\n",
" \n",
" 95 | \n",
" 2013 | \n",
" 3219252.0 | \n",
" 1367181.0 | \n",
" 495701.0 | \n",
" 365093.0 | \n",
" 1008147.0 | \n",
" 883665.0 | \n",
"
\n",
" \n",
" 97 | \n",
" 2014 | \n",
" 3276865.0 | \n",
" 1384585.0 | \n",
" 505299.0 | \n",
" 380755.0 | \n",
" 1039691.0 | \n",
" 918099.0 | \n",
"
\n",
" \n",
" 99 | \n",
" 2015 | \n",
" 3368200.0 | \n",
" 1413235.0 | \n",
" 513801.0 | \n",
" 400705.0 | \n",
" 1076884.0 | \n",
" 959972.0 | \n",
"
\n",
" \n",
" 101 | \n",
" 2016 | \n",
" 3454769.0 | \n",
" 1433261.0 | \n",
" 514584.0 | \n",
" 425666.0 | \n",
" 1118549.0 | \n",
" 1000599.0 | \n",
"
\n",
" \n",
" 103 | \n",
" 2017 | \n",
" 3533677.0 | \n",
" 1456463.0 | \n",
" 518315.0 | \n",
" 442594.0 | \n",
" 1158517.0 | \n",
" 1036219.0 | \n",
"
\n",
" \n",
" 105 | \n",
" 2018 | \n",
" 3570475.0 | \n",
" 1481042.0 | \n",
" 521123.0 | \n",
" 465359.0 | \n",
" 1164099.0 | \n",
" 1065876.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" year Y C G I X M\n",
"1 1966 1089347.0 549491.0 171307.0 127631.0 123594.0 112171.0\n",
"3 1967 1124579.0 588094.0 184983.0 133749.0 128182.0 120028.0\n",
"5 1968 1162778.0 612327.0 195664.0 142242.0 140746.0 127670.0\n",
"7 1969 1239121.0 649337.0 209375.0 164447.0 149184.0 144021.0\n",
"9 1970 1282852.0 671805.0 222350.0 168129.0 154880.0 157151.0\n",
"11 1971 1315198.0 687221.0 235965.0 167979.0 164685.0 159437.0\n",
"13 1972 1375211.0 696409.0 246412.0 179858.0 173351.0 161273.0\n",
"15 1973 1433881.0 736943.0 250919.0 195814.0 187836.0 189976.0\n",
"17 1974 1420321.0 728730.0 255025.0 182647.0 194250.0 186142.0\n",
"19 1975 1384905.0 742587.0 252814.0 152977.0 192843.0 176903.0\n",
"21 1976 1481439.0 791533.0 263746.0 185880.0 199459.0 206589.0\n",
"23 1977 1511387.0 806350.0 268008.0 183381.0 206612.0 208364.0\n",
"25 1978 1553317.0 831830.0 285396.0 180665.0 209496.0 210385.0\n",
"27 1979 1620148.0 851321.0 300440.0 188518.0 232309.0 224407.0\n",
"29 1980 1615002.0 846984.0 310237.0 160224.0 245273.0 211920.0\n",
"31 1981 1594442.0 847062.0 318344.0 133904.0 266517.0 213097.0\n",
"33 1982 1627760.0 864265.0 328757.0 149436.0 274952.0 219781.0\n",
"35 1983 1654323.0 874955.0 328561.0 154643.0 287589.0 224112.0\n",
"37 1984 1740838.0 890327.0 323091.0 183220.0 297074.0 236128.0\n",
"39 1985 1825992.0 923762.0 330263.0 203955.0 315007.0 259576.0\n",
"41 1986 1938199.0 970914.0 332830.0 236282.0 319226.0 281585.0\n",
"43 1987 1960401.0 959976.0 334893.0 227970.0 334732.0 278267.0\n",
"45 1988 1983427.0 947774.0 334632.0 220781.0 365333.0 290022.0\n",
"47 1989 1994879.0 947038.0 331650.0 228800.0 382350.0 305677.0\n",
"49 1990 2032441.0 947408.0 326540.0 227572.0 407305.0 312939.0\n",
"51 1991 2049177.0 962895.0 331961.0 218680.0 432431.0 325555.0\n",
"53 1992 2080460.0 981584.0 334136.0 222853.0 433602.0 325129.0\n",
"55 1993 2108104.0 989510.0 348366.0 205585.0 438932.0 320528.0\n",
"57 1994 2210638.0 1040029.0 355506.0 233340.0 475081.0 363124.0\n",
"59 1995 2285325.0 1059095.0 363169.0 267590.0 488777.0 388553.0\n",
"61 1996 2352072.0 1087124.0 374242.0 270410.0 511506.0 400680.0\n",
"63 1997 2425112.0 1110554.0 376151.0 309371.0 534460.0 437574.0\n",
"65 1998 2495172.0 1139300.0 387763.0 327558.0 556380.0 470721.0\n",
"67 1999 2598080.0 1150485.0 400446.0 311579.0 619082.0 482738.0\n",
"69 2000 2727229.0 1164693.0 412188.0 348171.0 696948.0 548713.0\n",
"71 2001 2792144.0 1173920.0 420034.0 343040.0 720315.0 561855.0\n",
"73 2002 2820514.0 1193647.0 428984.0 338918.0 751704.0 597680.0\n",
"75 2003 2826381.0 1205061.0 430025.0 336613.0 742669.0 591557.0\n",
"77 2004 2893370.0 1248028.0 436581.0 359370.0 765061.0 633775.0\n",
"79 2005 3042633.0 1283510.0 441847.0 379250.0 824193.0 705349.0\n",
"81 2006 3212095.0 1319567.0 453065.0 427665.0 909395.0 803852.0\n",
"83 2007 3304041.0 1340375.0 458504.0 439093.0 942620.0 850781.0\n",
"85 2008 3333338.0 1359497.0 473421.0 418030.0 979118.0 891365.0\n",
"87 2009 3114274.0 1343243.0 487752.0 329277.0 888801.0 784904.0\n",
"89 2010 3120573.0 1357797.0 495575.0 327344.0 914933.0 789148.0\n",
"91 2011 3198587.0 1357064.0 492464.0 345198.0 980761.0 847889.0\n",
"93 2012 3225817.0 1365205.0 496195.0 353120.0 992159.0 870858.0\n",
"95 2013 3219252.0 1367181.0 495701.0 365093.0 1008147.0 883665.0\n",
"97 2014 3276865.0 1384585.0 505299.0 380755.0 1039691.0 918099.0\n",
"99 2015 3368200.0 1413235.0 513801.0 400705.0 1076884.0 959972.0\n",
"101 2016 3454769.0 1433261.0 514584.0 425666.0 1118549.0 1000599.0\n",
"103 2017 3533677.0 1456463.0 518315.0 442594.0 1158517.0 1036219.0\n",
"105 2018 3570475.0 1481042.0 521123.0 465359.0 1164099.0 1065876.0"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# a. load data set\n",
"nah1 = pd.read_excel('data/NAH1_pivoted.xlsx',skiprows=2)\n",
"\n",
"# b. rename variables\n",
"rename_dict['Unnamed: 0'] = 'year'\n",
"nah1.rename(columns=rename_dict,inplace=True)\n",
"\n",
"# c. remove rows where Y is nan\n",
"I = nah1['Y'].notna()\n",
"nah1 = nah1[I]\n",
"\n",
"# d. correct year column data\n",
"I = nah1['year'].notna()\n",
"J = nah1['year'].isna()\n",
"nah1.loc[J,['year']] = nah1.loc[I,['year']].values\n",
"\n",
"# e. only keep rows with '2010-prices, chained values'\n",
"I = nah1['Unnamed: 1'] == '2010-prices, chained values'\n",
"nah1 = nah1[I]\n",
"\n",
"# f. only keep renamed variables\n",
"nah1 = nah1.loc[:,['year','Y','C','G','I','X','M']]\n",
"nah1"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "0d49e12a68764ea9bef99c7dd07bc38a",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"interactive(children=(Dropdown(description='variable', options=('Y', 'C', 'G', 'I', 'X', 'M'), value='Y'), Int…"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"plot_timeseries(nah1)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Extra problems"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Extend interactive plot"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Extend the interactive plot with a choice of *real* vs *nominal*."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## New data set"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Load data from an Excel or CSV file you have downloaded from e.g. [Statistikbanken.dk](https://www.statistikbanken.dk/). Clean, structure and present the data as you see fit."
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.7"
},
"toc-autonumbering": true
},
"nbformat": 4,
"nbformat_minor": 4
}