{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Compile EPA emissions data\n", "Convert the data from hourly to monthly and export all years as a single file.\n", "\n", "## Instructions\n", "Choose the file format (`csv` or `feather`) that was used when saving hourly EPA data. Feather is much faster for read/write." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2017-10-31T15:16:06.692057Z", "start_time": "2017-10-31T15:16:05.865129Z" }, "collapsed": true }, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import os\n", "from os.path import join\n", "from joblib import Parallel, delayed\n", "import sys\n", "\n", "cwd = os.getcwd()\n", "data_path = join(cwd, '..', 'Data storage')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "file_format = 'csv'\n", "# file_format = 'feather'" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2017-10-31T15:16:06.746937Z", "start_time": "2017-10-31T15:16:06.729066Z" } }, "outputs": [], "source": [ "%load_ext watermark\n", "%watermark -iv -v" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2017-10-31T15:16:07.915270Z", "start_time": "2017-10-31T15:16:07.856952Z" } }, "outputs": [], "source": [ "# Load the \"autoreload\" extension\n", "%load_ext autoreload\n", "\n", "# always reload modules marked with \"%aimport\"\n", "%autoreload 1\n", "\n", "# add the 'src' directory as one where we can import modules\n", "src_dir = join(os.getcwd(), os.pardir, 'src')\n", "sys.path.append(src_dir)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2017-10-31T15:16:09.586532Z", "start_time": "2017-10-31T15:16:09.573896Z" }, "collapsed": true }, "outputs": [], "source": [ "%aimport Data.data_extraction\n", "from Data.data_extraction import import_group_epa, unit_conversion\n", "\n", "%aimport Analysis.index\n", "from Analysis.index import add_datetime, add_quarter" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Change years if necessary" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2017-03-15T14:42:00.093000", "start_time": "2017-03-15T13:50:59.107000" }, "collapsed": true }, "outputs": [], "source": [ "start_year = 2001\n", "end_year = 2017\n", "\n", "if __name__ == '__main__':\n", " base_path = join(data_path, 'EPA emissions')\n", " paths = [join(base_path, 'EPA emissions {}.{}'.format(str(year), file_format))\n", " for year in range(start_year, end_year + 1)]\n", " \n", " df_list = Parallel(n_jobs=-1)(delayed(import_group_epa)(path) \n", " for path in paths)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2017-03-15T14:56:54.117000", "start_time": "2017-03-15T14:56:53.062000" }, "collapsed": true }, "outputs": [], "source": [ "df = pd.concat(df_list)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2017-03-15T14:57:13.961000", "start_time": "2017-03-15T14:57:12.029000" }, "collapsed": true }, "outputs": [], "source": [ "path = os.path.join(data_path, 'Derived data',\n", " 'Monthly EPA emissions 2018-03-06.csv')\n", "df.to_csv(path, index=False)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "psci", "language": "python", "name": "psci" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.4" }, "varInspector": { "cols": { "lenName": 16, "lenType": 16, "lenVar": 40 }, "kernels_config": { "python": { "delete_cmd_postfix": "", "delete_cmd_prefix": "del ", "library": "var_list.py", "varRefreshCmd": "print(var_dic_list())" }, "r": { "delete_cmd_postfix": ") ", "delete_cmd_prefix": "rm(", "library": "var_list.r", "varRefreshCmd": "cat(var_dic_list()) " } }, "types_to_exclude": [ "module", "function", "builtin_function_or_method", "instance", "_Feature" ], "window_display": false } }, "nbformat": 4, "nbformat_minor": 2 }