{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Problem set 3: Loading and structuring data from Denmark Statistics" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "[](https://mybinder.org/v2/gh/NumEconCopenhagen/exercises-2020/master?urlpath=lab/tree/PS3/problem_set_3.ipynb)" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "%matplotlib inline\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "plt.style.use('seaborn-whitegrid')\n", "import pandas as pd\n", "import ipywidgets as widgets" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Tasks" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Create a pandas DataFrame" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Modify the code below such that *income* and *consumption* are variables in the *dt* DataFrame." ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
id
00
11
22
33
44
\n", "
" ], "text/plain": [ " id\n", "0 0\n", "1 1\n", "2 2\n", "3 3\n", "4 4" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.random.seed(1999)\n", " \n", "N = 100\n", "mydata = {}\n", "mydata['id'] = range(N)\n", "income = np.exp(np.random.normal(size=N))\n", "consumption = np.sqrt(income)\n", "\n", "dt = pd.DataFrame(mydata)\n", "dt.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**Answer:**" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "jupyter": { "source_hidden": true } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idincomeconsumption
000.7279810.853218
111.9978311.413447
220.2768230.526140
331.4819311.217346
441.2359041.111712
\n", "
" ], "text/plain": [ " id income consumption\n", "0 0 0.727981 0.853218\n", "1 1 1.997831 1.413447\n", "2 2 0.276823 0.526140\n", "3 3 1.481931 1.217346\n", "4 4 1.235904 1.111712" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.random.seed(1999)\n", "\n", "N = 100\n", "mydata = {}\n", "mydata['id'] = range(N)\n", "mydata['income'] = np.exp(np.random.normal(size=N))\n", "mydata['consumption'] = np.sqrt(mydata['income'])\n", "\n", "dt_true = pd.DataFrame(mydata)\n", "dt_true.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Create new variable" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Add a new variable *ratio* which is the ratio of consumption to income." ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
id
00
11
22
33
44
\n", "
" ], "text/plain": [ " id\n", "0 0\n", "1 1\n", "2 2\n", "3 3\n", "4 4" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# write your code here\n", "dt.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**Answer:**" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "jupyter": { "source_hidden": true } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idincomeconsumptionratio
000.7279810.8532181.172033
111.9978311.4134470.707490
220.2768230.5261401.900636
331.4819311.2173460.821459
441.2359041.1117120.899513
\n", "
" ], "text/plain": [ " id income consumption ratio\n", "0 0 0.727981 0.853218 1.172033\n", "1 1 1.997831 1.413447 0.707490\n", "2 2 0.276823 0.526140 1.900636\n", "3 3 1.481931 1.217346 0.821459\n", "4 4 1.235904 1.111712 0.899513" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dt_true['ratio'] = dt_true['consumption']/dt_true['income']\n", "dt_true.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Summary statistics" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Produce summary statistics using `.describe()`." ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "# write your code here" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**Answer:**" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "jupyter": { "source_hidden": true } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idincomeconsumptionratio
count100.000000100.000000100.000000100.000000
mean49.5000001.4155471.0878441.117517
std29.0114921.3222030.4842380.525452
min0.0000000.1084020.3292450.351134
25%24.7500000.5293230.7275450.752310
50%49.5000000.9811780.9905331.009580
75%74.2500001.7682111.3295721.374491
max99.0000008.1106122.8479143.037255
\n", "
" ], "text/plain": [ " id income consumption ratio\n", "count 100.000000 100.000000 100.000000 100.000000\n", "mean 49.500000 1.415547 1.087844 1.117517\n", "std 29.011492 1.322203 0.484238 0.525452\n", "min 0.000000 0.108402 0.329245 0.351134\n", "25% 24.750000 0.529323 0.727545 0.752310\n", "50% 49.500000 0.981178 0.990533 1.009580\n", "75% 74.250000 1.768211 1.329572 1.374491\n", "max 99.000000 8.110612 2.847914 3.037255" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dt_true.describe()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Indexing" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Select everybody with an income above 1." ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
id
00
11
22
33
44
\n", "
" ], "text/plain": [ " id\n", "0 0\n", "1 1\n", "2 2\n", "3 3\n", "4 4" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# write your code here\n", "dt.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**Answer:**" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "jupyter": { "source_hidden": true } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idincomeconsumptionratio
111.9978311.4134470.707490
331.4819311.2173460.821459
441.2359041.1117120.899513
662.5740321.6043790.623294
772.4754781.5733650.635580
\n", "
" ], "text/plain": [ " id income consumption ratio\n", "1 1 1.997831 1.413447 0.707490\n", "3 3 1.481931 1.217346 0.821459\n", "4 4 1.235904 1.111712 0.899513\n", "6 6 2.574032 1.604379 0.623294\n", "7 7 2.475478 1.573365 0.635580" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "I = dt_true['income'] > 1\n", "dt_true.loc[I, :].head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Select everybody with an income *above* 1 and a ratio *above* 0.7." ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "# write your code here" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**Answer:**" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "jupyter": { "source_hidden": true } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idincomeconsumptionratio
111.9978311.4134470.707490
331.4819311.2173460.821459
441.2359041.1117120.899513
11112.0317081.4253800.701567
18181.2802351.1314750.883802
\n", "
" ], "text/plain": [ " id income consumption ratio\n", "1 1 1.997831 1.413447 0.707490\n", "3 3 1.481931 1.217346 0.821459\n", "4 4 1.235904 1.111712 0.899513\n", "11 11 2.031708 1.425380 0.701567\n", "18 18 1.280235 1.131475 0.883802" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "I = (dt_true['income'] > 1) & (dt_true['ratio'] > 0.7)\n", "dt_true.loc[I].head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Set consumption equal to 0.5 if income is less than 0.5." ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "# write your code here\n", "# dt['consumption'].mean() # <- compare with answer" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**Answer:**" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "jupyter": { "source_hidden": true } }, "outputs": [ { "data": { "text/plain": [ "1.075479712048503" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "I = (dt_true['income'] < 0.5)\n", "dt_true.loc[I,['consumption']] = 0.5\n", "dt_true['consumption'].mean()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Set consumption equal to income if income is less than 0.5." ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "# write your code here\n", "# dt['consumption'].mean() # <- compare with answer" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**Answer:**" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "jupyter": { "source_hidden": true } }, "outputs": [ { "data": { "text/plain": [ "1.0337728690050052" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "I = (dt_true['income'] < 0.5)\n", "dt_true.loc[I,['consumption']] = dt_true.loc[I,['income']].values\n", "dt_true['consumption'].mean()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Dropping" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Drop the *ratio* variable and all rows with an income above 1.5. After this, also drop the first 5 rows." ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "before: 100 observations, 1 variables\n", "after: 100 observations, 1 variables\n" ] } ], "source": [ "print(f'before: {dt.shape[0]} observations, {dt.shape[1]} variables')\n", "# write your code here\n", "print(f'after: {dt.shape[0]} observations, {dt.shape[1]} variables')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**Answer:**" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "jupyter": { "source_hidden": true } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "before: 100 observations, 4 variables\n", "after: 65 observations, 3 variables\n", "after (alt): 65 observations, 3 variables\n" ] } ], "source": [ "dt_alt = dt_true.copy()\n", "\n", "print(f'before: {dt_true.shape[0]} observations, {dt_true.shape[1]} variables')\n", "dt_true.drop('ratio',axis=1,inplace=True)\n", "I = dt_true['income'] > 1.5\n", "dt_true.drop(dt_true[I].index,inplace=True)\n", "dt_true.drop(dt_true.loc[:5].index,inplace=True)\n", "print(f'after: {dt_true.shape[0]} observations, {dt_true.shape[1]} variables')\n", "\n", "# alternative: keep where I is false\n", "del dt_alt['ratio']\n", "I = dt_alt['income'] > 1.5\n", "dt_alt = dt_alt[~I]\n", "dt_alt = dt_alt.iloc[5:,:]\n", "print(f'after (alt): {dt_alt.shape[0]} observations, {dt_alt.shape[1]} variables')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Renaming" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Rename *consumption* to *cons* and *income* to *inc*." ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
id
00
11
22
33
44
\n", "
" ], "text/plain": [ " id\n", "0 0\n", "1 1\n", "2 2\n", "3 3\n", "4 4" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# write your code\n", "dt.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**Answer:**" ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "jupyter": { "source_hidden": true } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idinccon
880.5820740.762938
10100.9320440.965424
12120.3569520.356952
13130.3798250.379825
16160.7008960.837195
\n", "
" ], "text/plain": [ " id inc con\n", "8 8 0.582074 0.762938\n", "10 10 0.932044 0.965424\n", "12 12 0.356952 0.356952\n", "13 13 0.379825 0.379825\n", "16 16 0.700896 0.837195" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dt_true.rename(columns={'income':'inc','consumption':'con'},inplace=True)\n", "dt_true.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Functions" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Correct the wrong lines such that `assets_1 = assets_2 = assets_3 = assets_4`." ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "failed\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
id
00
11
22
33
44
\n", "
" ], "text/plain": [ " id\n", "0 0\n", "1 1\n", "2 2\n", "3 3\n", "4 4" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "def assets_row_by_row(x,R,Y):\n", " return 0 # wrong line\n", " \n", "def assets_all_at_once(income,consumption,R,Y):\n", " return 0 # wrong line\n", "\n", "def assets_adj(assets,R,Y):\n", " # missing lines\n", " pass\n", "\n", "R = 1.2 # return rate\n", "Y = 1 # income\n", "try:\n", " dt['assets_1'] = R*(dt['inc']-dt['con'])+Y\n", " dt['assets_2'] = dt.apply(assets_row_by_row,axis=1,args=(R,Y))\n", " dt['assets_3'] = assets_all_at_once(dt['inc'].values(),dt['con'].values(),R,Y)\n", " dt['assets_4'] = dt['inc']-dt['con']\n", " assets_adj(dt['assets_4'],R,Y)\n", "except:\n", " print('failed')\n", "dt.head() " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**Answer:**" ] }, { "cell_type": "code", "execution_count": 21, "metadata": { "jupyter": { "source_hidden": true } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idincconassets_1assets_2assets_3assets_4
880.5820740.7629380.7829630.7829630.7829630.782963
10100.9320440.9654240.9599430.9599430.9599430.959943
12120.3569520.3569521.0000001.0000001.0000001.000000
13130.3798250.3798251.0000001.0000001.0000001.000000
16160.7008960.8371950.8364410.8364410.8364410.836441
\n", "
" ], "text/plain": [ " id inc con assets_1 assets_2 assets_3 assets_4\n", "8 8 0.582074 0.762938 0.782963 0.782963 0.782963 0.782963\n", "10 10 0.932044 0.965424 0.959943 0.959943 0.959943 0.959943\n", "12 12 0.356952 0.356952 1.000000 1.000000 1.000000 1.000000\n", "13 13 0.379825 0.379825 1.000000 1.000000 1.000000 1.000000\n", "16 16 0.700896 0.837195 0.836441 0.836441 0.836441 0.836441" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "def assets_row_by_row(x,R,Y):\n", " return R*(x['inc']-x['con'])+Y\n", " \n", "def assets_all_at_once(income,consumption,R,Y):\n", " return R*(income-consumption)+Y\n", "\n", "def assets_adj(assets,R,Y):\n", " assets *= R\n", " assets += Y\n", "\n", "R = 1.2 # return rate\n", "Y = 1 # income\n", "dt_true['assets_1'] = R*(dt_true['inc']-dt_true['con'])+Y\n", "dt_true['assets_2'] = dt_true.apply(assets_row_by_row,axis=1,args=(R,Y))\n", "dt_true['assets_3'] = assets_all_at_once(dt_true['inc'].values,dt_true['con'].values,R,Y)\n", "dt_true['assets_4'] = dt_true['inc']-dt_true['con']\n", "assets_adj(dt_true['assets_4'],R,Y)\n", "\n", "dt_true.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Problem" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Load the data set in *data/NAH1_pivoted.xlsx* and clean and structure it such that the `plot_timeseries(dataframe)` below can be run and produce an interactive figure. " ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [], "source": [ "def _plot_timeseries(dataframe, variable, years):\n", " \n", " fig = plt.figure(dpi=100)\n", " ax = fig.add_subplot(1,1,1)\n", " \n", " dataframe.loc[:,['year']] = pd.to_numeric(dataframe['year'])\n", " I = (dataframe['year'] >= years[0]) & (dataframe['year'] <= years[1])\n", " \n", " x = dataframe.loc[I,'year']\n", " y = dataframe.loc[I,variable]\n", " ax.plot(x,y)\n", " \n", " ax.set_xticks(list(range(years[0], years[1] + 1, 5))) \n", " \n", "def plot_timeseries(dataframe):\n", " \n", " widgets.interact(_plot_timeseries, \n", " dataframe = widgets.fixed(dataframe),\n", " variable = widgets.Dropdown(\n", " description='variable', \n", " options=['Y','C','G','I','X','M'], \n", " value='Y'),\n", " years=widgets.IntRangeSlider(\n", " description=\"years\",\n", " min=1966,\n", " max=2018,\n", " value=[1980, 2018],\n", " continuous_update=False,\n", " ) \n", "); " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**Hint 1:** You can base your renaming on this dictionary:" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [], "source": [ "rename_dict = {}\n", "rename_dict['P.1 Output'] = 'Y'\n", "rename_dict['P.3 Final consumption expenditure'] = 'C'\n", "rename_dict['P.3 Government consumption expenditure'] = 'G'\n", "rename_dict['P.5 Gross capital formation'] = 'I'\n", "rename_dict['P.6 Export of goods and services'] = 'X'\n", "rename_dict['P.7 Import of goods and services'] = 'M'" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**Hint 2:** You code should have the following structure:" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [], "source": [ "# a. load data set\n", "# nah1 = ?\n", "\n", "# b. rename variables\n", "\n", "# c. remove rows where Y is nan\n", "\n", "# d. correct year column data\n", "# hint, nah1.loc[J,['year']] = nah1.loc[I,['year']].values\n", "\n", "# e. only keep rows with '2010-prices, chained values'\n", "\n", "# f. only keep renamed variables\n", "\n", "# g. interactive plot\n", "# plot_timeseries(nan)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**Answer:**" ] }, { "cell_type": "code", "execution_count": 25, "metadata": { "jupyter": { "source_hidden": true } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
yearYCGIXM
119661089347.0549491.0171307.0127631.0123594.0112171.0
319671124579.0588094.0184983.0133749.0128182.0120028.0
519681162778.0612327.0195664.0142242.0140746.0127670.0
719691239121.0649337.0209375.0164447.0149184.0144021.0
919701282852.0671805.0222350.0168129.0154880.0157151.0
1119711315198.0687221.0235965.0167979.0164685.0159437.0
1319721375211.0696409.0246412.0179858.0173351.0161273.0
1519731433881.0736943.0250919.0195814.0187836.0189976.0
1719741420321.0728730.0255025.0182647.0194250.0186142.0
1919751384905.0742587.0252814.0152977.0192843.0176903.0
2119761481439.0791533.0263746.0185880.0199459.0206589.0
2319771511387.0806350.0268008.0183381.0206612.0208364.0
2519781553317.0831830.0285396.0180665.0209496.0210385.0
2719791620148.0851321.0300440.0188518.0232309.0224407.0
2919801615002.0846984.0310237.0160224.0245273.0211920.0
3119811594442.0847062.0318344.0133904.0266517.0213097.0
3319821627760.0864265.0328757.0149436.0274952.0219781.0
3519831654323.0874955.0328561.0154643.0287589.0224112.0
3719841740838.0890327.0323091.0183220.0297074.0236128.0
3919851825992.0923762.0330263.0203955.0315007.0259576.0
4119861938199.0970914.0332830.0236282.0319226.0281585.0
4319871960401.0959976.0334893.0227970.0334732.0278267.0
4519881983427.0947774.0334632.0220781.0365333.0290022.0
4719891994879.0947038.0331650.0228800.0382350.0305677.0
4919902032441.0947408.0326540.0227572.0407305.0312939.0
5119912049177.0962895.0331961.0218680.0432431.0325555.0
5319922080460.0981584.0334136.0222853.0433602.0325129.0
5519932108104.0989510.0348366.0205585.0438932.0320528.0
5719942210638.01040029.0355506.0233340.0475081.0363124.0
5919952285325.01059095.0363169.0267590.0488777.0388553.0
6119962352072.01087124.0374242.0270410.0511506.0400680.0
6319972425112.01110554.0376151.0309371.0534460.0437574.0
6519982495172.01139300.0387763.0327558.0556380.0470721.0
6719992598080.01150485.0400446.0311579.0619082.0482738.0
6920002727229.01164693.0412188.0348171.0696948.0548713.0
7120012792144.01173920.0420034.0343040.0720315.0561855.0
7320022820514.01193647.0428984.0338918.0751704.0597680.0
7520032826381.01205061.0430025.0336613.0742669.0591557.0
7720042893370.01248028.0436581.0359370.0765061.0633775.0
7920053042633.01283510.0441847.0379250.0824193.0705349.0
8120063212095.01319567.0453065.0427665.0909395.0803852.0
8320073304041.01340375.0458504.0439093.0942620.0850781.0
8520083333338.01359497.0473421.0418030.0979118.0891365.0
8720093114274.01343243.0487752.0329277.0888801.0784904.0
8920103120573.01357797.0495575.0327344.0914933.0789148.0
9120113198587.01357064.0492464.0345198.0980761.0847889.0
9320123225817.01365205.0496195.0353120.0992159.0870858.0
9520133219252.01367181.0495701.0365093.01008147.0883665.0
9720143276865.01384585.0505299.0380755.01039691.0918099.0
9920153368200.01413235.0513801.0400705.01076884.0959972.0
10120163454769.01433261.0514584.0425666.01118549.01000599.0
10320173533677.01456463.0518315.0442594.01158517.01036219.0
10520183570475.01481042.0521123.0465359.01164099.01065876.0
\n", "
" ], "text/plain": [ " year Y C G I X M\n", "1 1966 1089347.0 549491.0 171307.0 127631.0 123594.0 112171.0\n", "3 1967 1124579.0 588094.0 184983.0 133749.0 128182.0 120028.0\n", "5 1968 1162778.0 612327.0 195664.0 142242.0 140746.0 127670.0\n", "7 1969 1239121.0 649337.0 209375.0 164447.0 149184.0 144021.0\n", "9 1970 1282852.0 671805.0 222350.0 168129.0 154880.0 157151.0\n", "11 1971 1315198.0 687221.0 235965.0 167979.0 164685.0 159437.0\n", "13 1972 1375211.0 696409.0 246412.0 179858.0 173351.0 161273.0\n", "15 1973 1433881.0 736943.0 250919.0 195814.0 187836.0 189976.0\n", "17 1974 1420321.0 728730.0 255025.0 182647.0 194250.0 186142.0\n", "19 1975 1384905.0 742587.0 252814.0 152977.0 192843.0 176903.0\n", "21 1976 1481439.0 791533.0 263746.0 185880.0 199459.0 206589.0\n", "23 1977 1511387.0 806350.0 268008.0 183381.0 206612.0 208364.0\n", "25 1978 1553317.0 831830.0 285396.0 180665.0 209496.0 210385.0\n", "27 1979 1620148.0 851321.0 300440.0 188518.0 232309.0 224407.0\n", "29 1980 1615002.0 846984.0 310237.0 160224.0 245273.0 211920.0\n", "31 1981 1594442.0 847062.0 318344.0 133904.0 266517.0 213097.0\n", "33 1982 1627760.0 864265.0 328757.0 149436.0 274952.0 219781.0\n", "35 1983 1654323.0 874955.0 328561.0 154643.0 287589.0 224112.0\n", "37 1984 1740838.0 890327.0 323091.0 183220.0 297074.0 236128.0\n", "39 1985 1825992.0 923762.0 330263.0 203955.0 315007.0 259576.0\n", "41 1986 1938199.0 970914.0 332830.0 236282.0 319226.0 281585.0\n", "43 1987 1960401.0 959976.0 334893.0 227970.0 334732.0 278267.0\n", "45 1988 1983427.0 947774.0 334632.0 220781.0 365333.0 290022.0\n", "47 1989 1994879.0 947038.0 331650.0 228800.0 382350.0 305677.0\n", "49 1990 2032441.0 947408.0 326540.0 227572.0 407305.0 312939.0\n", "51 1991 2049177.0 962895.0 331961.0 218680.0 432431.0 325555.0\n", "53 1992 2080460.0 981584.0 334136.0 222853.0 433602.0 325129.0\n", "55 1993 2108104.0 989510.0 348366.0 205585.0 438932.0 320528.0\n", "57 1994 2210638.0 1040029.0 355506.0 233340.0 475081.0 363124.0\n", "59 1995 2285325.0 1059095.0 363169.0 267590.0 488777.0 388553.0\n", "61 1996 2352072.0 1087124.0 374242.0 270410.0 511506.0 400680.0\n", "63 1997 2425112.0 1110554.0 376151.0 309371.0 534460.0 437574.0\n", "65 1998 2495172.0 1139300.0 387763.0 327558.0 556380.0 470721.0\n", "67 1999 2598080.0 1150485.0 400446.0 311579.0 619082.0 482738.0\n", "69 2000 2727229.0 1164693.0 412188.0 348171.0 696948.0 548713.0\n", "71 2001 2792144.0 1173920.0 420034.0 343040.0 720315.0 561855.0\n", "73 2002 2820514.0 1193647.0 428984.0 338918.0 751704.0 597680.0\n", "75 2003 2826381.0 1205061.0 430025.0 336613.0 742669.0 591557.0\n", "77 2004 2893370.0 1248028.0 436581.0 359370.0 765061.0 633775.0\n", "79 2005 3042633.0 1283510.0 441847.0 379250.0 824193.0 705349.0\n", "81 2006 3212095.0 1319567.0 453065.0 427665.0 909395.0 803852.0\n", "83 2007 3304041.0 1340375.0 458504.0 439093.0 942620.0 850781.0\n", "85 2008 3333338.0 1359497.0 473421.0 418030.0 979118.0 891365.0\n", "87 2009 3114274.0 1343243.0 487752.0 329277.0 888801.0 784904.0\n", "89 2010 3120573.0 1357797.0 495575.0 327344.0 914933.0 789148.0\n", "91 2011 3198587.0 1357064.0 492464.0 345198.0 980761.0 847889.0\n", "93 2012 3225817.0 1365205.0 496195.0 353120.0 992159.0 870858.0\n", "95 2013 3219252.0 1367181.0 495701.0 365093.0 1008147.0 883665.0\n", "97 2014 3276865.0 1384585.0 505299.0 380755.0 1039691.0 918099.0\n", "99 2015 3368200.0 1413235.0 513801.0 400705.0 1076884.0 959972.0\n", "101 2016 3454769.0 1433261.0 514584.0 425666.0 1118549.0 1000599.0\n", "103 2017 3533677.0 1456463.0 518315.0 442594.0 1158517.0 1036219.0\n", "105 2018 3570475.0 1481042.0 521123.0 465359.0 1164099.0 1065876.0" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# a. load data set\n", "nah1 = pd.read_excel('data/NAH1_pivoted.xlsx',skiprows=2)\n", "\n", "# b. rename variables\n", "rename_dict['Unnamed: 0'] = 'year'\n", "nah1.rename(columns=rename_dict,inplace=True)\n", "\n", "# c. remove rows where Y is nan\n", "I = nah1['Y'].notna()\n", "nah1 = nah1[I]\n", "\n", "# d. correct year column data\n", "I = nah1['year'].notna()\n", "J = nah1['year'].isna()\n", "nah1.loc[J,['year']] = nah1.loc[I,['year']].values\n", "\n", "# e. only keep rows with '2010-prices, chained values'\n", "I = nah1['Unnamed: 1'] == '2010-prices, chained values'\n", "nah1 = nah1[I]\n", "\n", "# f. only keep renamed variables\n", "nah1 = nah1.loc[:,['year','Y','C','G','I','X','M']]\n", "nah1" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "0d49e12a68764ea9bef99c7dd07bc38a", "version_major": 2, "version_minor": 0 }, "text/plain": [ "interactive(children=(Dropdown(description='variable', options=('Y', 'C', 'G', 'I', 'X', 'M'), value='Y'), Int…" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "plot_timeseries(nah1)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Extra problems" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Extend interactive plot" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Extend the interactive plot with a choice of *real* vs *nominal*." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## New data set" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Load data from an Excel or CSV file you have downloaded from e.g. [Statistikbanken.dk](https://www.statistikbanken.dk/). Clean, structure and present the data as you see fit." ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.7" }, "toc-autonumbering": true }, "nbformat": 4, "nbformat_minor": 4 }