{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Create files with index and generation data at state level\n",
    "\n",
    "## Instructions\n",
    "Make sure the `file_date` parameter below is set to whatever value you would like appended to file names.\n",
    "\n",
    "The entire notebook can be run at once using *Run All Cells*"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2017-10-31T14:06:15.759171-04:00",
     "start_time": "2017-10-31T14:06:13.605966Z"
    },
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "%matplotlib inline\n",
    "import matplotlib.pyplot as plt\n",
    "import pandas as pd\n",
    "import seaborn as sns\n",
    "import os\n",
    "from os.path import join\n",
    "import glob\n",
    "import numpy as np\n",
    "from joblib import Parallel, delayed\n",
    "import sys\n",
    "import json\n",
    "cwd = os.getcwd()\n",
    "data_path = join(cwd, '..', 'Data storage')\n",
    "idx = pd.IndexSlice"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "file_date = '2018-03-06'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2017-10-31T14:06:17.058917-04:00",
     "start_time": "2017-10-31T14:06:16.913563Z"
    },
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Load the \"autoreload\" extension\n",
    "%load_ext autoreload\n",
    "\n",
    "# always reload modules marked with \"%aimport\"\n",
    "%autoreload 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2017-10-31T14:06:34.425151-04:00",
     "start_time": "2017-10-31T14:06:34.420759Z"
    },
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# add the 'src' directory as one where we can import modules\n",
    "src_dir = join(os.getcwd(), os.pardir, 'src')\n",
    "sys.path.append(src_dir)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2017-10-31T14:06:35.256059-04:00",
     "start_time": "2017-10-31T14:06:35.243753Z"
    },
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "%aimport Analysis.index\n",
    "from Analysis.index import facility_emission_gen, group_facility_data\n",
    "%aimport Analysis.index\n",
    "from Analysis.index import facility_co2, adjust_epa_emissions, group_fuel_cats\n",
    "%aimport Analysis.index\n",
    "from Analysis.index import extra_emissions_gen, add_datetime, add_quarter\n",
    "%aimport util.utils\n",
    "from util.utils import rename_cols, add_facility_location"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2017-10-31T14:06:36.259038-04:00",
     "start_time": "2017-10-31T14:06:36.250072Z"
    },
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "states = [\"AL\", \"AK\", \"AZ\", \"AR\", \"CA\", \"CO\", \"CT\", \"DE\",\n",
    "          \"FL\", \"GA\", \"HI\", \"ID\", \"IL\", \"IN\", \"IA\", \"KS\",\n",
    "          \"KY\", \"LA\", \"ME\", \"MD\", \"MA\", \"MI\", \"MN\", \"MS\",\n",
    "          \"MO\", \"MT\", \"NE\", \"NV\", \"NH\", \"NJ\", \"NM\", \"NY\",\n",
    "          \"NC\", \"ND\", \"OH\", \"OK\", \"OR\", \"PA\", \"RI\", \"SC\",\n",
    "          \"SD\", \"TN\", \"TX\", \"UT\", \"VT\", \"VA\", \"WA\", \"WV\", \"WI\", \"WY\"]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Load data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Emission factors"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2017-10-31T14:06:40.779038-04:00",
     "start_time": "2017-10-31T14:06:40.763721Z"
    },
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "path = join(data_path, 'Final emission factors.csv')\n",
    "ef = pd.read_csv(path, index_col=0)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "EIA facility data and EPA monthly emissions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2017-10-31T14:06:52.574504-04:00",
     "start_time": "2017-10-31T14:06:43.922128Z"
    },
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "facility_path = join(data_path, 'Derived data',\n",
    "                     'Facility gen fuels and CO2 {}.csv'.format(file_date))\n",
    "facility_df = pd.read_csv(facility_path)\n",
    "facility_df['state'] = facility_df.geography.str[-2:]\n",
    "rename_cols(facility_df)\n",
    "\n",
    "epa_path = join(data_path, 'Derived data',\n",
    "                'Monthly EPA emissions {}.csv'.format(file_date))\n",
    "epa_df = pd.read_csv(epa_path)\n",
    "rename_cols(epa_df)\n",
    "facility_locations = pd.read_csv(join(data_path, 'Facility labels',\n",
    "                                      'Facility locations.csv'))\n",
    "# Add state labels to the EPA facilities\n",
    "epa_df = add_facility_location(epa_df, facility_locations, labels=['state'])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "JSON files with fuel categories"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2017-10-31T14:07:01.062456-04:00",
     "start_time": "2017-10-31T14:07:01.052859Z"
    },
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "fuel_cat_folder = join(data_path, 'Fuel categories')\n",
    "state_cats_path = join(fuel_cat_folder, 'State_facility.json')\n",
    "\n",
    "with open(state_cats_path, 'r') as f:\n",
    "    state_fuel_cat = json.load(f)\n",
    "    \n",
    "custom_cats_path = join(fuel_cat_folder, 'Custom_results.json')\n",
    "with open(custom_cats_path, 'r') as f:\n",
    "    custom_fuel_cat = json.load(f)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "EIA total monthly gen and fuel consumption"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2017-10-31T14:09:50.616947-04:00",
     "start_time": "2017-10-31T14:09:49.962168Z"
    },
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "path = join(data_path, 'Derived data',\n",
    "            'EIA state-level gen fuel CO2 {}.csv'.format(file_date))\n",
    "eia_totals = pd.read_csv(path, parse_dates=['datetime'])\n",
    "rename_cols(eia_totals)\n",
    "eia_totals['state'] = eia_totals.geography.str[-2:]\n",
    "\n",
    "\n",
    "# Remove fuel categories that are duplicated with other categories\n",
    "eia_totals = eia_totals.loc[~eia_totals.type.isin(['SPV', 'AOR', 'TSN'])]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Calculate state-level monthly CO₂ intensity and generation by fuel category"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2017-10-31T00:34:57.361630-04:00",
     "start_time": "2017-10-31T00:34:26.908560Z"
    },
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "index_list = []\n",
    "gen_list = []\n",
    "for state in states:\n",
    "    eia_fac_state = facility_df.loc[facility_df.state == state].copy()\n",
    "    eia_totals_state = eia_totals.loc[eia_totals.state == state].copy()\n",
    "    epa_state = epa_df.loc[epa_df.state == state].copy()\n",
    "    \n",
    "\n",
    "    co2, gen_fuels_state = facility_emission_gen(eia_facility=eia_fac_state,\n",
    "                                                 epa=epa_state,\n",
    "                                                 state_fuel_cat=state_fuel_cat,\n",
    "                                                 custom_fuel_cat=custom_fuel_cat,\n",
    "                                                 export_state_cats=True,\n",
    "                                                 print_status=False)\n",
    "    \n",
    "    extra_co2, extra_gen = extra_emissions_gen(gen_fuels_state,\n",
    "                                               eia_totals_state, ef)\n",
    "    \n",
    "    # Combine facility and extra co2, name the series\n",
    "    co2_monthly = co2.groupby(['year', 'month']).sum()\n",
    "    total_co2 = (co2_monthly.loc[:, 'final co2 (kg)']\n",
    "                 + extra_co2.loc[:, 'elec fuel co2 (kg)']\n",
    "                            .groupby(['year', 'month']).sum())\n",
    "    total_co2.name = 'final co2 (kg)'\n",
    "    \n",
    "    # Total gen, and the co2 intensity\n",
    "    total_gen = (eia_totals_state\n",
    "                 .groupby(['year', 'month'])['generation (mwh)'].sum())\n",
    "    \n",
    "    state_index = pd.concat([total_co2, total_gen], axis=1)\n",
    "    state_index['index (g/kwh)'] = (state_index['final co2 (kg)']\n",
    "                                    / state_index['generation (mwh)'])\n",
    "    state_index['state'] = state\n",
    "    state_index.set_index('state', append=True, inplace=True)\n",
    "    \n",
    "    # Generation by fuel category\n",
    "    gen_category = group_fuel_cats(eia_totals_state, custom_fuel_cat, \n",
    "                                   fuel_col='type', new_col='fuel category')\n",
    "    \n",
    "    keep_cols = ['fuel category', 'generation (mwh)', 'total fuel (mmbtu)',\n",
    "                 'elec fuel (mmbtu)', 'all fuel co2 (kg)',\n",
    "                 'elec fuel co2 (kg)', 'year', 'month']\n",
    "    gen_category = gen_category[keep_cols]\n",
    "    gen_category['state'] = state\n",
    "    gen_category.set_index(['year', 'month', 'state'], inplace=True)\n",
    "    \n",
    "    # Add each df to the list\n",
    "    index_list.append(state_index)\n",
    "    gen_list.append(gen_category)\n",
    "\n",
    "\n",
    "# Combine lists of dataframes\n",
    "state_index_all = pd.concat(index_list)\n",
    "add_quarter(state_index_all)\n",
    "\n",
    "gen_category_all = pd.concat(gen_list)\n",
    "add_quarter(gen_category_all)\n",
    "\n",
    "# output state results to file\n",
    "index_fn = 'Monthly index states {}.csv'.format(file_date)\n",
    "gen_fn = 'Monthly generation states {}.csv'.format(file_date)\n",
    "    \n",
    "state_index_all.to_csv(join(data_path, 'final state data', index_fn))\n",
    "gen_category_all.to_csv(join(data_path, 'final state data', gen_fn))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "psci",
   "language": "python",
   "name": "psci"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.4"
  },
  "varInspector": {
   "cols": {
    "lenName": 16,
    "lenType": 16,
    "lenVar": 40
   },
   "kernels_config": {
    "python": {
     "delete_cmd_postfix": "",
     "delete_cmd_prefix": "del ",
     "library": "var_list.py",
     "varRefreshCmd": "print(var_dic_list())"
    },
    "r": {
     "delete_cmd_postfix": ") ",
     "delete_cmd_prefix": "rm(",
     "library": "var_list.r",
     "varRefreshCmd": "cat(var_dic_list()) "
    }
   },
   "types_to_exclude": [
    "module",
    "function",
    "builtin_function_or_method",
    "instance",
    "_Feature"
   ],
   "window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}