{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Extract facility generation and fuel use data\n", "This notebook creates dataframes with monthly facility generation and fuel use data, merges them, and exports the results. The code assumes that you have already downloaded an `ELEC.txt` file from [EIA's bulk download website](https://www.eia.gov/opendata/bulkfiles.php).\n" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "ExecuteTime": { "end_time": "2017-10-31T14:38:05.812704Z", "start_time": "2017-10-31T14:38:05.763742Z" } }, "outputs": [], "source": [ "import json\n", "import pandas as pd\n", "import os\n", "from os.path import join\n", "import numpy as np\n", "from joblib import Parallel, delayed\n", "import sys\n", "\n", "cwd = os.getcwd()\n", "data_path = join(cwd, '..', 'Data storage')" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "ExecuteTime": { "end_time": "2017-10-31T14:38:07.508795Z", "start_time": "2017-10-31T14:38:07.496093Z" }, "collapsed": true }, "outputs": [], "source": [ "%load_ext watermark" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "ExecuteTime": { "end_time": "2017-10-31T14:38:16.336599Z", "start_time": "2017-10-31T14:38:16.329167Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "json 2.0.9\n", "pandas 0.22.0\n", "numpy 1.14.2\n", "CPython 3.6.4\n", "IPython 6.2.1\n" ] } ], "source": [ "%watermark -iv -v" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "ExecuteTime": { "end_time": "2017-10-31T10:09:52.455410-04:00", "start_time": "2017-10-31T10:09:52.336403Z" }, "collapsed": true }, "outputs": [], "source": [ "# Load the \"autoreload\" extension\n", "%load_ext autoreload\n", "\n", "# always reload modules marked with \"%aimport\"\n", "%autoreload 1" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "ExecuteTime": { "end_time": "2017-10-31T10:10:22.675560-04:00", "start_time": "2017-10-31T10:10:22.669508Z" } }, "outputs": [], "source": [ "# add the 'src' directory as one where we can import modules\n", "src_dir = join(os.getcwd(), os.pardir, 'src')\n", "sys.path.append(src_dir)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "ExecuteTime": { "end_time": "2017-10-31T10:15:17.557917-04:00", "start_time": "2017-10-31T10:15:17.543746Z" } }, "outputs": [], "source": [ "%aimport Data.data_extraction\n", "from Data.data_extraction import facility_line_to_df\n", "\n", "%aimport Analysis.index\n", "from Analysis.index import add_datetime, add_quarter" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Read `ELEC.txt` file\n", "Download the most current file from [EIA's bulk download site](https://www.eia.gov/opendata/bulkfiles.php). Save it to `\\Data storage\\Raw data`. I've tried to do this via the requests library, but the data file often gets corrupted." ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "ExecuteTime": { "end_time": "2017-03-15T15:04:13.615000", "start_time": "2017-03-15T15:04:10.736000" }, "collapsed": true }, "outputs": [], "source": [ "path = join(data_path, 'Raw EIA bulk', '2018-03-06 ELEC.txt')\n", "with open(path, 'r') as f:\n", " raw_txt = f.readlines()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Filter lines to only include facility generation \n", "- Include `ELEC.PLANT` in the `series_id`\n", "- Include \"All\" as the only allowable prime mover\n", " - Some facilities have incorrect data at the individual prime mover level\n", "- Do not include \"All\" as a fuel code\n", "- Only monthly frequency" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "ExecuteTime": { "end_time": "2017-03-15T15:04:26.646000", "start_time": "2017-03-15T15:04:24.898000" } }, "outputs": [], "source": [ "gen_rows = [row for row in raw_txt if 'ELEC.PLANT.GEN' in row \n", " and 'series_id' in row \n", " and 'ALL.M' in row \n", " and 'ALL-' not in row]\n", "total_fuel_rows = [row for row in raw_txt if 'ELEC.PLANT.CONS_TOT_BTU' in row \n", " and 'series_id' in row \n", " and 'ALL.M' in row \n", " and 'ALL-' not in row]\n", "eg_fuel_rows = [row for row in raw_txt if 'ELEC.PLANT.CONS_EG_BTU' in row \n", " and 'series_id' in row \n", " and 'ALL.M' in row \n", " and 'ALL-' not in row]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Combine generation into one large dataframe" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "ExecuteTime": { "end_time": "2017-03-15T15:06:10.407000", "start_time": "2017-03-15T15:04:33.616000" } }, "outputs": [], "source": [ "if __name__ == '__main__':\n", " exception_list = []\n", " facility_gen = pd.concat(Parallel(n_jobs=-1)(delayed(facility_line_to_df)(json.loads(row)) for row in gen_rows))\n", " facility_gen.reset_index(drop=True, inplace=True)\n", " facility_gen.rename({'value':'generation (MWh)'}, axis=1, inplace=True)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "ExecuteTime": { "end_time": "2017-03-15T15:15:04.886000", "start_time": "2017-03-15T15:15:02.043000" }, "collapsed": true }, "outputs": [], "source": [ "facility_gen.loc[:,'lat'] = facility_gen.loc[:,'lat'].astype(float)\n", "facility_gen.loc[:,'lon'] = facility_gen.loc[:,'lon'].astype(float)\n", "facility_gen.loc[:, 'plant id'] = facility_gen.loc[:, 'plant id'].astype(int)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "ExecuteTime": { "end_time": "2017-03-15T15:15:05.250000", "start_time": "2017-03-15T15:15:04.888000" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ffuelgeographylast_updatedlatlonmonthplant idprime moverseries_idunitsgeneration (MWh)year
1645711MSCUSA-FL2016-07-07T17:18:42-04:0029.733056-81.6327785136ALLELEC.PLANT.GEN.136-SC-ALL.Mmegawatthours111269.02003
1645712MSCUSA-FL2016-07-07T17:18:42-04:0029.733056-81.6327784136ALLELEC.PLANT.GEN.136-SC-ALL.Mmegawatthours0.02003
1645713MSCUSA-FL2016-07-07T17:18:42-04:0029.733056-81.6327783136ALLELEC.PLANT.GEN.136-SC-ALL.Mmegawatthours131711.02003
1645714MSCUSA-FL2016-07-07T17:18:42-04:0029.733056-81.6327782136ALLELEC.PLANT.GEN.136-SC-ALL.Mmegawatthours127918.02003
1645715MSCUSA-FL2016-07-07T17:18:42-04:0029.733056-81.6327781136ALLELEC.PLANT.GEN.136-SC-ALL.Mmegawatthours112533.02003
\n", "
" ], "text/plain": [ " f fuel geography last_updated lat lon \\\n", "1645711 M SC USA-FL 2016-07-07T17:18:42-04:00 29.733056 -81.632778 \n", "1645712 M SC USA-FL 2016-07-07T17:18:42-04:00 29.733056 -81.632778 \n", "1645713 M SC USA-FL 2016-07-07T17:18:42-04:00 29.733056 -81.632778 \n", "1645714 M SC USA-FL 2016-07-07T17:18:42-04:00 29.733056 -81.632778 \n", "1645715 M SC USA-FL 2016-07-07T17:18:42-04:00 29.733056 -81.632778 \n", "\n", " month plant id prime mover series_id \\\n", "1645711 5 136 ALL ELEC.PLANT.GEN.136-SC-ALL.M \n", "1645712 4 136 ALL ELEC.PLANT.GEN.136-SC-ALL.M \n", "1645713 3 136 ALL ELEC.PLANT.GEN.136-SC-ALL.M \n", "1645714 2 136 ALL ELEC.PLANT.GEN.136-SC-ALL.M \n", "1645715 1 136 ALL ELEC.PLANT.GEN.136-SC-ALL.M \n", "\n", " units generation (MWh) year \n", "1645711 megawatthours 111269.0 2003 \n", "1645712 megawatthours 0.0 2003 \n", "1645713 megawatthours 131711.0 2003 \n", "1645714 megawatthours 127918.0 2003 \n", "1645715 megawatthours 112533.0 2003 " ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#drop\n", "facility_gen.tail()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Combine total fuel use into one large dataframe" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "ExecuteTime": { "end_time": "2017-03-15T15:16:57.292000", "start_time": "2017-03-15T15:15:20.912000" } }, "outputs": [], "source": [ "if __name__ == '__main__':\n", " exception_list = []\n", " facility_all_fuel = pd.concat(Parallel(n_jobs=-1)(delayed(facility_line_to_df)(json.loads(row)) for row in total_fuel_rows))\n", " facility_all_fuel.reset_index(drop=True, inplace=True)\n", " facility_all_fuel.rename({'value':'total fuel (mmbtu)'}, axis=1, inplace=True)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "ExecuteTime": { "end_time": "2017-03-15T15:16:59.988000", "start_time": "2017-03-15T15:16:57.295000" }, "collapsed": true }, "outputs": [], "source": [ "facility_all_fuel.loc[:,'lat'] = facility_all_fuel.loc[:,'lat'].astype(float)\n", "facility_all_fuel.loc[:,'lon'] = facility_all_fuel.loc[:,'lon'].astype(float)\n", "facility_all_fuel.loc[:,'plant id'] = facility_all_fuel.loc[:,'plant id'].astype(int)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Combine total fuel use for electricity into one large dataframe" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "ExecuteTime": { "end_time": "2017-03-15T15:18:48.929000", "start_time": "2017-03-15T15:17:12.787000" } }, "outputs": [], "source": [ "if __name__ == '__main__':\n", " exception_list = []\n", " facility_eg_fuel = pd.concat(Parallel(n_jobs=-1)(delayed(facility_line_to_df)(json.loads(row)) for row in eg_fuel_rows))\n", " facility_eg_fuel.reset_index(drop=True, inplace=True)\n", " facility_eg_fuel.rename({'value':'elec fuel (mmbtu)'}, axis=1, inplace=True)" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "ExecuteTime": { "end_time": "2017-03-15T15:18:51.673000", "start_time": "2017-03-15T15:18:48.931000" }, "collapsed": true }, "outputs": [], "source": [ "facility_eg_fuel.loc[:,'lat'] = facility_eg_fuel.loc[:,'lat'].astype(float)\n", "facility_eg_fuel.loc[:,'lon'] = facility_eg_fuel.loc[:,'lon'].astype(float)\n", "facility_eg_fuel.loc[:,'plant id'] = facility_eg_fuel.loc[:,'plant id'].astype(int)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Merge dataframes\n", "Need to be careful here because there are fuel/prime mover combinations that have generation but no fuel use (e.g. the steam cycle of a combined cycle system - but only in some cases). " ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "ExecuteTime": { "end_time": "2017-03-15T15:19:20.279000", "start_time": "2017-03-15T15:19:16.446000" } }, "outputs": [], "source": [ "keep_cols = ['fuel', 'generation (MWh)', 'month', 'plant id', 'prime mover', 'year',\n", " 'geography', 'lat', 'lon', 'last_updated']\n", "merge_cols = ['fuel', 'month', 'plant id', 'year']\n", "gen_total_fuel = facility_all_fuel.merge(facility_gen.loc[:,keep_cols], \n", " how='outer', on=merge_cols)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Fill in missing values from the first merge" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "ExecuteTime": { "end_time": "2017-03-15T15:19:21.066000", "start_time": "2017-03-15T15:19:21.059000" } }, "outputs": [], "source": [ "def fill_missing(df):\n", " cols = [col[:-2] for col in df.columns if '_x' in col]\n", " \n", " # Create new column from the _x version, fill missing values from the _y version\n", " for col in cols:\n", " df[col] = df.loc[:, col + '_x']\n", " df.loc[df[col].isnull(), col] = df.loc[df[col].isnull(), col + '_y']\n", " \n", " df.drop([col+'_x', col+'_y'], axis=1, inplace=True)" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "ExecuteTime": { "end_time": "2017-03-15T15:19:25.832000", "start_time": "2017-03-15T15:19:21.068000" }, "collapsed": true }, "outputs": [], "source": [ "fill_missing(gen_total_fuel)" ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "ExecuteTime": { "end_time": "2017-03-15T15:19:29.338000", "start_time": "2017-03-15T15:19:26.259000" } }, "outputs": [], "source": [ "keep_cols = ['fuel', 'elec fuel (mmbtu)', 'month', 'plant id', 'prime mover', 'year',\n", " 'geography', 'lat', 'lon', 'last_updated']\n", "all_facility_data = gen_total_fuel.merge(facility_eg_fuel.loc[:,keep_cols], \n", " how='outer', on=merge_cols)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### FIll in missing values from second merge and drop units/series_id columns" ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "ExecuteTime": { "end_time": "2017-03-15T15:19:34.385000", "start_time": "2017-03-15T15:19:29.341000" }, "collapsed": true }, "outputs": [], "source": [ "fill_missing(all_facility_data)" ] }, { "cell_type": "code", "execution_count": 21, "metadata": { "ExecuteTime": { "end_time": "2017-03-15T15:19:35.264000", "start_time": "2017-03-15T15:19:34.490000" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ffuelmonthplant idtotal fuel (mmbtu)yeargeneration (MWh)elec fuel (mmbtu)geographylast_updatedlatlonprime mover
0MDFO1210011116.02017114.5871116.0USA-IN2018-02-28T02:03:13-05:0039.9242-87.4244ALL
1MDFO1110012772.02017265.4282772.0USA-IN2018-02-28T02:03:13-05:0039.9242-87.4244ALL
2MDFO1010014819.02017460.6954819.0USA-IN2018-02-28T02:03:13-05:0039.9242-87.4244ALL
3MDFO910012720.02017300.7052720.0USA-IN2018-02-28T02:03:13-05:0039.9242-87.4244ALL
4MDFO810016273.02017609.3586273.0USA-IN2018-02-28T02:03:13-05:0039.9242-87.4244ALL
\n", "
" ], "text/plain": [ " f fuel month plant id total fuel (mmbtu) year generation (MWh) \\\n", "0 M DFO 12 1001 1116.0 2017 114.587 \n", "1 M DFO 11 1001 2772.0 2017 265.428 \n", "2 M DFO 10 1001 4819.0 2017 460.695 \n", "3 M DFO 9 1001 2720.0 2017 300.705 \n", "4 M DFO 8 1001 6273.0 2017 609.358 \n", "\n", " elec fuel (mmbtu) geography last_updated lat lon \\\n", "0 1116.0 USA-IN 2018-02-28T02:03:13-05:00 39.9242 -87.4244 \n", "1 2772.0 USA-IN 2018-02-28T02:03:13-05:00 39.9242 -87.4244 \n", "2 4819.0 USA-IN 2018-02-28T02:03:13-05:00 39.9242 -87.4244 \n", "3 2720.0 USA-IN 2018-02-28T02:03:13-05:00 39.9242 -87.4244 \n", "4 6273.0 USA-IN 2018-02-28T02:03:13-05:00 39.9242 -87.4244 \n", "\n", " prime mover \n", "0 ALL \n", "1 ALL \n", "2 ALL \n", "3 ALL \n", "4 ALL " ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "all_facility_data.drop(['units', 'series_id'], axis=1, inplace=True)\n", "all_facility_data.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Add datetime and quarter columns" ] }, { "cell_type": "code", "execution_count": 22, "metadata": { "collapsed": true }, "outputs": [], "source": [ "add_quarter(all_facility_data)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Load emission factors\n", "These are mostly [EIA emission factors](https://www.eia.gov/tools/faqs/faq.cfm?id=76&t=11)" ] }, { "cell_type": "code", "execution_count": 23, "metadata": { "ExecuteTime": { "end_time": "2017-03-15T15:19:46.028000", "start_time": "2017-03-15T15:19:45.839000" }, "collapsed": true }, "outputs": [], "source": [ "path = join(data_path, 'Final emission factors.csv')\n", "ef = pd.read_csv(path, index_col=0)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Apply factors to facility generation" ] }, { "cell_type": "code", "execution_count": 24, "metadata": { "ExecuteTime": { "end_time": "2017-03-15T15:19:46.088000", "start_time": "2017-03-15T15:19:46.031000" } }, "outputs": [ { "data": { "text/plain": [ "({'AB': 0.0,\n", " 'BFG': 274.32,\n", " 'BIT': 93.3,\n", " 'BLQ': 0.0,\n", " 'DFO': 73.16,\n", " 'GEO': 7.71,\n", " 'JF': 70.9,\n", " 'KER': 72.3,\n", " 'LFG': 0.0,\n", " 'LIG': 97.7,\n", " 'MSB': 0.0,\n", " 'MSN': 90.7,\n", " 'MSW': 41.69,\n", " 'NG': 53.07,\n", " 'NUC': 0.0,\n", " 'OBG': 0.0,\n", " 'OBL': 0.0,\n", " 'OBS': 0.0,\n", " 'OG': 59.0,\n", " 'OTH': 0.0,\n", " 'PC': 102.1,\n", " 'PG': 63.07,\n", " 'PUR': 0.0,\n", " 'RC': 93.3,\n", " 'RFO': 78.79,\n", " 'SC': 93.3,\n", " 'SGC': 93.3,\n", " 'SGP': 73.16,\n", " 'SLW': 0.0,\n", " 'SUB': 97.2,\n", " 'SUN': 0.0,\n", " 'TDF': 85.97,\n", " 'WAT': 0.0,\n", " 'WC': 93.3,\n", " 'WDL': 0.0,\n", " 'WDS': 0.0,\n", " 'WH': 0.0,\n", " 'WND': 0.0,\n", " 'WO': 95.25},\n", " {'AB': 118.17,\n", " 'BFG': 274.32,\n", " 'BIT': 93.3,\n", " 'BLQ': 94.4,\n", " 'DFO': 73.16,\n", " 'GEO': 7.71,\n", " 'JF': 70.9,\n", " 'KER': 72.3,\n", " 'LFG': 52.17,\n", " 'LIG': 97.7,\n", " 'MSB': 90.7,\n", " 'MSN': 90.7,\n", " 'MSW': 41.69,\n", " 'NG': 53.07,\n", " 'NUC': 0.0,\n", " 'OBG': 52.17,\n", " 'OBL': 83.98,\n", " 'OBS': 105.51,\n", " 'OG': 59.0,\n", " 'OTH': 0.0,\n", " 'PC': 102.1,\n", " 'PG': 63.07,\n", " 'PUR': 0.0,\n", " 'RC': 93.3,\n", " 'RFO': 78.79,\n", " 'SC': 93.3,\n", " 'SGC': 93.3,\n", " 'SGP': 73.16,\n", " 'SLW': 83.98,\n", " 'SUB': 97.2,\n", " 'SUN': 0.0,\n", " 'TDF': 85.97,\n", " 'WAT': 0.0,\n", " 'WC': 93.3,\n", " 'WDL': 83.98,\n", " 'WDS': 93.8,\n", " 'WH': 0.0,\n", " 'WND': 0.0,\n", " 'WO': 95.25})" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fossil_factors = dict(zip(ef.index, ef['Fossil Factor']))\n", "total_factors = dict(zip(ef.index, ef['Total Factor']))\n", "fossil_factors, total_factors" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Apply emission factors\n", "Fuel emission factor is kg/mmbtu" ] }, { "cell_type": "code", "execution_count": 25, "metadata": { "ExecuteTime": { "end_time": "2017-03-15T15:21:38.803000", "start_time": "2017-03-15T15:19:46.091000" }, "collapsed": true }, "outputs": [], "source": [ "# Start with 0 emissions in all rows\n", "# For fuels where we have an emission factor, replace the 0 with the calculated value\n", "all_facility_data['all fuel fossil CO2 (kg)'] = 0\n", "all_facility_data['elec fuel fossil CO2 (kg)'] = 0\n", "all_facility_data['all fuel total CO2 (kg)'] = 0\n", "all_facility_data['elec fuel total CO2 (kg)'] = 0\n", "for fuel in total_factors.keys():\n", " # All fuel CO2 emissions\n", " all_facility_data.loc[all_facility_data['fuel']==fuel,'all fuel fossil CO2 (kg)'] = \\\n", " all_facility_data.loc[all_facility_data['fuel']==fuel,'total fuel (mmbtu)'] * fossil_factors[fuel] \n", " all_facility_data.loc[all_facility_data['fuel']==fuel,'all fuel total CO2 (kg)'] = \\\n", " all_facility_data.loc[all_facility_data['fuel']==fuel,'total fuel (mmbtu)'] * total_factors[fuel]\n", " \n", " # Electric fuel CO2 emissions\n", " all_facility_data.loc[all_facility_data['fuel']==fuel,'elec fuel fossil CO2 (kg)'] = \\\n", " all_facility_data.loc[all_facility_data['fuel']==fuel,'elec fuel (mmbtu)'] * fossil_factors[fuel]\n", " all_facility_data.loc[all_facility_data['fuel']==fuel,'elec fuel total CO2 (kg)'] = \\\n", " all_facility_data.loc[all_facility_data['fuel']==fuel,'elec fuel (mmbtu)'] * total_factors[fuel]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Set nan and negative emissions to 0\n", "When no fuel was used for electricity production, or when negative fuel is somehow reported by EIA, set the emissions to 0. This is implemented by filtering out all values that are greater than or equal to 0." ] }, { "cell_type": "code", "execution_count": 26, "metadata": { "ExecuteTime": { "end_time": "2017-03-15T15:21:38.950000", "start_time": "2017-03-15T15:21:38.806000" }, "scrolled": true }, "outputs": [], "source": [ "# Fossil CO2\n", "all_facility_data.loc[~(all_facility_data['all fuel fossil CO2 (kg)']>=0),\n", " 'all fuel fossil CO2 (kg)'] = 0\n", "all_facility_data.loc[~(all_facility_data['elec fuel fossil CO2 (kg)']>=0),\n", " 'elec fuel fossil CO2 (kg)'] = 0\n", "# Total CO2\n", "all_facility_data.loc[~(all_facility_data['all fuel total CO2 (kg)']>=0),\n", " 'all fuel total CO2 (kg)'] = 0\n", "all_facility_data.loc[~(all_facility_data['elec fuel total CO2 (kg)']>=0),\n", " 'elec fuel total CO2 (kg)'] = 0" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Export" ] }, { "cell_type": "code", "execution_count": 27, "metadata": { "ExecuteTime": { "end_time": "2017-03-15T15:22:11.227000", "start_time": "2017-03-15T15:21:38.953000" }, "collapsed": true }, "outputs": [], "source": [ "path = join(data_path, 'Derived data',\n", " 'Facility gen fuels and CO2 2018-03-06.csv')\n", "all_facility_data.to_csv(path, index=False)" ] } ], "metadata": { "anaconda-cloud": {}, "kernelspec": { "display_name": "psci", "language": "python", "name": "psci" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.4" }, "notify_time": "30", "varInspector": { "cols": { "lenName": 16, "lenType": 16, "lenVar": 40 }, "kernels_config": { "python": { "delete_cmd_postfix": "", "delete_cmd_prefix": "del ", "library": "var_list.py", "varRefreshCmd": "print(var_dic_list())" }, "r": { "delete_cmd_postfix": ") ", "delete_cmd_prefix": "rm(", "library": "var_list.r", "varRefreshCmd": "cat(var_dic_list()) " } }, "types_to_exclude": [ "module", "function", "builtin_function_or_method", "instance", "_Feature" ], "window_display": false } }, "nbformat": 4, "nbformat_minor": 2 }