{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Download and get info from all EIA-923 Excel files\n", "This setup downloads all the zip files, extracts the contents, and identifies the correct header row in the correct file. I'm only getting 2 columns of data (plant id and NERC region), but it can be modified for other data." ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "ExecuteTime": { "end_time": "2017-07-25T13:01:49.691359", "start_time": "2017-07-25T13:01:49.670179" }, "collapsed": true }, "outputs": [], "source": [ "%matplotlib inline\n", "import pandas as pd\n", "import seaborn as sns\n", "import matplotlib.pyplot as plt\n", "import os\n", "import glob\n", "import numpy as np\n", "import requests\n", "from bs4 import BeautifulSoup\n", "from urllib import urlretrieve\n", "import zipfile\n", "import fnmatch" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "ExecuteTime": { "end_time": "2017-07-25T12:43:53.330982", "start_time": "2017-07-25T12:43:53.154315" } }, "outputs": [], "source": [ "url = 'https://www.eia.gov/electricity/data/eia923'\n", "r = requests.get(url)\n", "soup = BeautifulSoup(r.text, 'lxml')\n", "\n", "table = soup.find_all('table', attrs={'class': 'simpletable'})[0]" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "ExecuteTime": { "end_time": "2017-07-25T12:44:48.294638", "start_time": "2017-07-25T12:44:48.285337" } }, "outputs": [], "source": [ "fns = []\n", "links = []\n", "for row in table.find_all('td', attrs={'align': 'center'}):\n", " href = row.a.get('href')\n", " fns.append(href.split('/')[-1])\n", " links.append(url + '/' + href)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "ExecuteTime": { "end_time": "2017-07-25T12:44:52.227670", "start_time": "2017-07-25T12:44:52.220242" } }, "outputs": [ { "data": { "text/plain": [ "['f923_2017.zip',\n", " 'f923_2016.zip',\n", " 'f923_2015.zip',\n", " 'f923_2014.zip',\n", " 'f923_2013.zip',\n", " 'f923_2012.zip',\n", " 'f923_2011.zip',\n", " 'f923_2010.zip',\n", " 'f923_2009.zip',\n", " 'f923_2008.zip',\n", " 'f906920_2007.zip',\n", " 'f906920_2006.zip',\n", " 'f906920_2005.zip',\n", " 'f906920_2004.zip',\n", " 'f906920_2003.zip',\n", " 'f906920_2002.zip',\n", " 'f906920_2001.zip']" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fns" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "ExecuteTime": { "end_time": "2017-07-25T12:45:47.916239", "start_time": "2017-07-25T12:45:47.910853" }, "collapsed": true }, "outputs": [], "source": [ "path = os.path.join('Data storage', '923 raw data')\n", "os.mkdir(path)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "ExecuteTime": { "end_time": "2017-07-25T12:47:25.213563", "start_time": "2017-07-25T12:46:44.812113" } }, "outputs": [ { "data": { "text/plain": [ "('Data storage/923 raw data/f923_2017.zip',\n", " )" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" }, { "data": { "text/plain": [ "('Data storage/923 raw data/f923_2016.zip',\n", " )" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" }, { "data": { "text/plain": [ "('Data storage/923 raw data/f923_2015.zip',\n", " )" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" }, { "data": { "text/plain": [ "('Data storage/923 raw data/f923_2014.zip',\n", " )" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" }, { "data": { "text/plain": [ "('Data storage/923 raw data/f923_2013.zip',\n", " )" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" }, { "data": { "text/plain": [ "('Data storage/923 raw data/f923_2012.zip',\n", " )" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" }, { "data": { "text/plain": [ "('Data storage/923 raw data/f923_2011.zip',\n", " )" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" }, { "data": { "text/plain": [ "('Data storage/923 raw data/f923_2010.zip',\n", " )" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" }, { "data": { "text/plain": [ "('Data storage/923 raw data/f923_2009.zip',\n", " )" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" }, { "data": { "text/plain": [ "('Data storage/923 raw data/f923_2008.zip',\n", " )" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" }, { "data": { "text/plain": [ "('Data storage/923 raw data/f906920_2007.zip',\n", " )" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" }, { "data": { "text/plain": [ "('Data storage/923 raw data/f906920_2006.zip',\n", " )" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" }, { "data": { "text/plain": [ "('Data storage/923 raw data/f906920_2005.zip',\n", " )" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" }, { "data": { "text/plain": [ "('Data storage/923 raw data/f906920_2004.zip',\n", " )" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" }, { "data": { "text/plain": [ "('Data storage/923 raw data/f906920_2003.zip',\n", " )" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" }, { "data": { "text/plain": [ "('Data storage/923 raw data/f906920_2002.zip',\n", " )" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" }, { "data": { "text/plain": [ "('Data storage/923 raw data/f906920_2001.zip',\n", " )" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "base_path = os.path.join('Data storage', '923 raw data')\n", "for fn, link in zip(fns, links):\n", " path = os.path.join(base_path, fn)\n", " urlretrieve(link, filename=path)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "ExecuteTime": { "end_time": "2017-07-25T12:52:55.489784", "start_time": "2017-07-25T12:52:47.905383" } }, "outputs": [], "source": [ "base_path = os.path.join('Data storage', '923 raw data')\n", "for fn in fns:\n", " zip_path = os.path.join(base_path, fn)\n", " target_folder = os.path.join(base_path, fn.split('.')[0])\n", " \n", " with zipfile.ZipFile(zip_path,\"r\") as zip_ref:\n", " zip_ref.extractall(target_folder)" ] }, { "cell_type": "code", "execution_count": 70, "metadata": { "ExecuteTime": { "end_time": "2017-07-25T13:30:05.700314", "start_time": "2017-07-25T13:30:05.690251" }, "collapsed": true }, "outputs": [], "source": [ "matches = []\n", "for root, dirnames, filenames in os.walk(base_path):\n", " for filename in fnmatch.filter(filenames, '*2_3*'):\n", " matches.append(os.path.join(root, filename))\n", " for filename in fnmatch.filter(filenames, 'eia923*'):\n", " matches.append(os.path.join(root, filename))\n", " for filename in fnmatch.filter(filenames, '*906920*.xls'):\n", " matches.append(os.path.join(root, filename))" ] }, { "cell_type": "code", "execution_count": 71, "metadata": { "ExecuteTime": { "end_time": "2017-07-25T13:30:06.450419", "start_time": "2017-07-25T13:30:06.444493" } }, "outputs": [ { "data": { "text/plain": [ "['Data storage/923 raw data/f906920_2001/f906920y2001.xls',\n", " 'Data storage/923 raw data/f906920_2002/f906920y2002.xls',\n", " 'Data storage/923 raw data/f906920_2003/f906920_2003.xls',\n", " 'Data storage/923 raw data/f906920_2004/f906920_2004.xls',\n", " 'Data storage/923 raw data/f906920_2005/f906920_2005.xls',\n", " 'Data storage/923 raw data/f906920_2006/f906920_2006.xls',\n", " 'Data storage/923 raw data/f906920_2007/f906920_2007.xls',\n", " 'Data storage/923 raw data/f923_2008/eia923December2008.xls',\n", " 'Data storage/923 raw data/f923_2009/EIA923 SCHEDULES 2_3_4_5 M Final 2009 REVISED 05252011.XLS',\n", " 'Data storage/923 raw data/f923_2010/EIA923 SCHEDULES 2_3_4_5 Final 2010.xls',\n", " 'Data storage/923 raw data/f923_2011/EIA923_Schedules_2_3_4_5_2011_Final_Revision.xlsx',\n", " 'Data storage/923 raw data/f923_2012/EIA923_Schedules_2_3_4_5_M_12_2012_Final_Revision.xlsx',\n", " 'Data storage/923 raw data/f923_2013/EIA923_Schedules_2_3_4_5_2013_Final_Revision.xlsx',\n", " 'Data storage/923 raw data/f923_2014/EIA923_Schedules_2_3_4_5_M_12_2014_Final_Revision.xlsx',\n", " 'Data storage/923 raw data/f923_2015/EIA923_Schedules_2_3_4_5_M_12_2015_Final_Revision.xlsx',\n", " 'Data storage/923 raw data/f923_2016/EIA923_Schedules_2_3_4_5_M_12_2016.xlsx',\n", " 'Data storage/923 raw data/f923_2017/EIA923_Schedules_2_3_4_5_M_04_2017_22JUN2017.xlsx']" ] }, "execution_count": 71, "metadata": {}, "output_type": "execute_result" } ], "source": [ "matches" ] }, { "cell_type": "code", "execution_count": 84, "metadata": { "ExecuteTime": { "end_time": "2017-07-25T13:48:02.406611", "start_time": "2017-07-25T13:48:02.396934" }, "collapsed": true }, "outputs": [], "source": [ "def clip_at_header(df, year):\n", " \"\"\"Find the appropriate header row, only keep Plant Id and NERC Region columns,\n", " and add a column with the year\"\"\"\n", " header = df.loc[df.iloc[:, 8].str.contains('NERC').replace(np.nan, False)].index[0]\n", "# print header\n", " # Drop rows above header\n", " df = df.loc[header + 1:, :]\n", " # Only keep columns 0 (plant id) and 8 (NERC Region)\n", " df = df.iloc[:, [0, 8]]\n", " df.columns = ['Plant Id', 'NERC Region']\n", " df.reset_index(inplace=True, drop=True)\n", " df.dropna(inplace=True)\n", " df['Plant Id'] = pd.to_numeric(df['Plant Id'])\n", " df['Year'] = year\n", " return df" ] }, { "cell_type": "code", "execution_count": 85, "metadata": { "ExecuteTime": { "end_time": "2017-07-25T13:51:58.371663", "start_time": "2017-07-25T13:48:03.796782" } }, "outputs": [], "source": [ "df_list = []\n", "for fn in matches:\n", " year = int(fn.split('/')[-2].split('_')[-1])\n", " df = pd.read_excel(fn)\n", " df_list.append(clip_at_header(df, year))" ] }, { "cell_type": "code", "execution_count": 186, "metadata": { "ExecuteTime": { "end_time": "2017-07-25T15:05:54.409913", "start_time": "2017-07-25T15:05:54.343971" } }, "outputs": [], "source": [ "nerc_assignment = pd.concat(df_list)\n", "nerc_assignment.reset_index(inplace=True, drop=True)\n", "nerc_assignment.drop_duplicates(inplace=True)" ] }, { "cell_type": "code", "execution_count": 187, "metadata": { "ExecuteTime": { "end_time": "2017-07-25T15:05:54.624545", "start_time": "2017-07-25T15:05:54.526792" }, "collapsed": true }, "outputs": [], "source": [ "nerc_assignment['Year'] = pd.to_numeric(nerc_assignment['Year'])" ] }, { "cell_type": "code", "execution_count": 188, "metadata": { "ExecuteTime": { "end_time": "2017-07-25T15:05:54.729596", "start_time": "2017-07-25T15:05:54.725783" }, "collapsed": true }, "outputs": [], "source": [ "nerc_region = nerc_assignment['NERC Region']\n", "nerc_year = nerc_assignment['Year']" ] }, { "cell_type": "code", "execution_count": 189, "metadata": { "ExecuteTime": { "end_time": "2017-07-25T15:05:55.445076", "start_time": "2017-07-25T15:05:55.247989" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(u'SERC', [2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017])\n", "(u'ECAR', [2001, 2002, 2003, 2004, 2005])\n", "(u'MAPP', [2001, 2002, 2003, 2004])\n", "(u'NPCC', [2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017])\n", "(u'SPP', [2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017])\n", "(u'ERCOT', [2001, 2002, 2003, 2004, 2005, 2006])\n", "(u'FRCC', [2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017])\n", "(u'MAIN', [2001, 2002, 2003, 2004, 2005])\n", "(u'WECC', [2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017])\n", "(u'ASCC', [2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017])\n", "(u'MAAC', [2002, 2003, 2004, 2005])\n", "(u'HICC', [2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017])\n", "(u'WSCC', [2002])\n", "(u'MRO', [2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017])\n", "(u'RFC', [2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017])\n", "(u'TRE', [2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017])\n" ] } ], "source": [ "for region in nerc_assignment['NERC Region'].unique():\n", " years = nerc_assignment.loc[nerc_region == region, 'Year'].unique()\n", " print (region, list(years))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Export original data" ] }, { "cell_type": "code", "execution_count": 190, "metadata": { "ExecuteTime": { "end_time": "2017-07-25T15:05:57.974593", "start_time": "2017-07-25T15:05:57.808782" }, "collapsed": true }, "outputs": [], "source": [ "path = os.path.join('Data storage', 'Plant NERC regions.csv')\n", "nerc_assignment.to_csv(path, index=False)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Assign NERC region to pre-2005/6 facilities based on where they ended up\n", "\n", "Somehow I'm having trouble doing this" ] }, { "cell_type": "code", "execution_count": 183, "metadata": { "ExecuteTime": { "end_time": "2017-07-25T15:02:35.399066", "start_time": "2017-07-25T15:02:35.369816" } }, "outputs": [], "source": [ "region_dict = dict(nerc_assignment.loc[nerc_assignment['Year'] == 2006,\n", " ['Plant Id', 'NERC Region']].values)" ] }, { "cell_type": "code", "execution_count": 184, "metadata": { "ExecuteTime": { "end_time": "2017-07-25T15:02:54.535617", "start_time": "2017-07-25T15:02:54.445285" } }, "outputs": [], "source": [ "regions = ['ECAR', 'MAPP', 'MAIN', 'MAAC']\n", "years = range(2001, 2006)\n", "nerc_assignment.loc[(nerc_region.isin(regions)) &\n", " (nerc_assignment['Year'].isin(years)), \n", " 'Corrected Region'] = nerc_assignment.loc[(nerc_region.isin(regions)) &\n", " (nerc_assignment['Year'].isin(years)),\n", " 'Plant Id'].map(region_dict)" ] }, { "cell_type": "code", "execution_count": 185, "metadata": { "ExecuteTime": { "end_time": "2017-07-25T15:03:02.524416", "start_time": "2017-07-25T15:03:02.505295" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Plant IdNERC RegionYearCorrected Region
02SERC2001NaN
13SERC2001NaN
44SERC2001NaN
55SERC2001NaN
77SERC2001NaN
\n", "
" ], "text/plain": [ " Plant Id NERC Region Year Corrected Region\n", "0 2 SERC 2001 NaN\n", "1 3 SERC 2001 NaN\n", "4 4 SERC 2001 NaN\n", "5 5 SERC 2001 NaN\n", "7 7 SERC 2001 NaN" ] }, "execution_count": 185, "metadata": {}, "output_type": "execute_result" } ], "source": [ "nerc_assignment.head()" ] }, { "cell_type": "code", "execution_count": 162, "metadata": { "ExecuteTime": { "end_time": "2017-07-25T14:39:42.401642", "start_time": "2017-07-25T14:39:42.393256" } }, "outputs": [ { "data": { "text/plain": [ "u'SERC'" ] }, "execution_count": 162, "metadata": {}, "output_type": "execute_result" } ], "source": [ "nerc_assignment.loc[(nerc_assignment['Year'] == 2006) &\n", " (nerc_assignment['Plant Id'] == 3), 'NERC Region'].values[0]" ] }, { "cell_type": "code", "execution_count": 167, "metadata": { "ExecuteTime": { "end_time": "2017-07-25T14:55:46.266858", "start_time": "2017-07-25T14:55:46.248848" } }, "outputs": [], "source": [ "nerc_assignment.loc[(nerc_assignment['Plant Id'] == 3) & \n", " (nerc_assignment['Year'].isin(years)), 'Corrected Region'] = 'SERC'" ] }, { "cell_type": "code", "execution_count": 168, "metadata": { "ExecuteTime": { "end_time": "2017-07-25T14:55:50.683479", "start_time": "2017-07-25T14:55:50.671001" } }, "outputs": [ { "data": { "text/plain": [ "1 SERC\n", "1583 SERC\n", "9921 SERC\n", "19264 SERC\n", "28525 SERC\n", "Name: Corrected Region, dtype: object" ] }, "execution_count": 168, "metadata": {}, "output_type": "execute_result" } ], "source": [ "nerc_assignment.loc[(nerc_assignment['Plant Id'] == 3) & \n", " (nerc_assignment['Year'].isin(years)), 'Corrected Region']" ] }, { "cell_type": "code", "execution_count": 170, "metadata": { "ExecuteTime": { "end_time": "2017-07-25T14:56:43.773144", "start_time": "2017-07-25T14:56:43.756239" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Plant IdNERC RegionYearCorrected Region
15822SERC2002NaN
15833SERC2002SERC
15864SERC2002NaN
15877SERC2002NaN
15908SERC2002NaN
\n", "
" ], "text/plain": [ " Plant Id NERC Region Year Corrected Region\n", "1582 2 SERC 2002 NaN\n", "1583 3 SERC 2002 SERC\n", "1586 4 SERC 2002 NaN\n", "1587 7 SERC 2002 NaN\n", "1590 8 SERC 2002 NaN" ] }, "execution_count": 170, "metadata": {}, "output_type": "execute_result" } ], "source": [ "nerc_assignment.loc[nerc_assignment['Year'] == 2002].head()" ] }, { "cell_type": "code", "execution_count": 115, "metadata": { "ExecuteTime": { "end_time": "2017-07-25T14:12:01.586280", "start_time": "2017-07-25T14:12:01.575236" } }, "outputs": [], "source": [ "nerc_assignment.index = pd.MultiIndex.from_arrays([nerc_assignment['Year'],\n", " nerc_assignment['Plant Id']])" ] }, { "cell_type": "code", "execution_count": 165, "metadata": { "ExecuteTime": { "end_time": "2017-07-25T14:40:44.883190", "start_time": "2017-07-25T14:40:44.868800" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Plant IdNERC RegionYearCorrected Region
02SERC2001NaN
13SERC2001NaN
44SERC2001NaN
55SERC2001NaN
77SERC2001NaN
\n", "
" ], "text/plain": [ " Plant Id NERC Region Year Corrected Region\n", "0 2 SERC 2001 NaN\n", "1 3 SERC 2001 NaN\n", "4 4 SERC 2001 NaN\n", "5 5 SERC 2001 NaN\n", "7 7 SERC 2001 NaN" ] }, "execution_count": 165, "metadata": {}, "output_type": "execute_result" } ], "source": [ "nerc_assignment.head()" ] }, { "cell_type": "code", "execution_count": 136, "metadata": { "ExecuteTime": { "end_time": "2017-07-25T14:20:19.518964", "start_time": "2017-07-25T14:20:19.514845" } }, "outputs": [], "source": [ "idx = pd.IndexSlice" ] }, { "cell_type": "code", "execution_count": 137, "metadata": { "ExecuteTime": { "end_time": "2017-07-25T14:20:37.360503", "start_time": "2017-07-25T14:20:37.349082" } }, "outputs": [], "source": [ "regions_2006 = nerc_assignment.loc[idx[2006, :], 'NERC Region'].copy()" ] }, { "cell_type": "code", "execution_count": 145, "metadata": { "ExecuteTime": { "end_time": "2017-07-25T14:23:57.176921", "start_time": "2017-07-25T14:23:57.163976" } }, "outputs": [], "source": [ "regions_2006 = nerc_assignment.xs(2006, level='Year')['NERC Region']" ] }, { "cell_type": "code", "execution_count": 146, "metadata": { "ExecuteTime": { "end_time": "2017-07-25T14:23:57.671884", "start_time": "2017-07-25T14:23:57.658774" } }, "outputs": [ { "data": { "text/plain": [ "Plant Id\n", "2 SERC\n", "3 SERC\n", "4 SERC\n", "7 SERC\n", "8 SERC\n", "9 WECC\n", "10 SERC\n", "11 SERC\n", "12 SERC\n", "13 SERC\n", "14 SERC\n", "15 SERC\n", "16 SERC\n", "17 SERC\n", "18 SERC\n", "19 SERC\n", "20 SERC\n", "21 SERC\n", "26 SERC\n", "30 MRO\n", "34 WECC\n", "38 SERC\n", "46 SERC\n", "47 SERC\n", "48 SERC\n", "49 SERC\n", "50 SERC\n", "51 SPP\n", "53 SERC\n", "54 SERC\n", " ... \n", "56544 MRO\n", "56545 SERC\n", "56554 WECC\n", "56557 SPP\n", "56558 SPP\n", "56559 SPP\n", "56561 SPP\n", "56562 SPP\n", "56570 WECC\n", "56571 RFC\n", "56577 MRO\n", "56578 MRO\n", "56579 MRO\n", "56580 MRO\n", "56581 MRO\n", "56582 MRO\n", "56583 MRO\n", "56584 MRO\n", "56585 MRO\n", "56586 MRO\n", "56587 MRO\n", "56588 MRO\n", "56589 MRO\n", "56590 MRO\n", "56591 WECC\n", "56595 MRO\n", "56597 SERC\n", "56598 SERC\n", "56599 SERC\n", "56600 SERC\n", "Name: NERC Region, dtype: object" ] }, "execution_count": 146, "metadata": {}, "output_type": "execute_result" } ], "source": [ "regions_2006" ] }, { "cell_type": "code", "execution_count": 151, "metadata": { "ExecuteTime": { "end_time": "2017-07-25T14:24:59.631740", "start_time": "2017-07-25T14:24:59.599141" } }, "outputs": [], "source": [ "for year in range(2001, 2006):\n", " nerc_assignment.xs(year, level='Year')['Corrected NERC'] = regions_2006" ] }, { "cell_type": "code", "execution_count": 152, "metadata": { "ExecuteTime": { "end_time": "2017-07-25T14:25:00.820518", "start_time": "2017-07-25T14:25:00.762046" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Plant IdNERC RegionYearCorrected NERC
YearPlant Id
200122SERC2001NaN
33SERC2001NaN
44SERC2001NaN
55SERC2001NaN
77SERC2001NaN
88SERC2001NaN
1010SERC2001NaN
1111SERC2001NaN
1212SERC2001NaN
1313SERC2001NaN
1414SERC2001NaN
1515SERC2001NaN
1616SERC2001NaN
1717SERC2001NaN
1818SERC2001NaN
1919SERC2001NaN
2020SERC2001NaN
2121SERC2001NaN
2626SERC2001NaN
3838SERC2001NaN
4646SERC2001NaN
4747SERC2001NaN
4848SERC2001NaN
4949SERC2001NaN
5050SERC2001NaN
5353SERC2001NaN
5454ECAR2001NaN
5555SERC2001NaN
5656SERC2001NaN
6060MAPP2001NaN
..................
20176014560145WECC2017NaN
6014960149SERC2017NaN
6015260152WECC2017NaN
6018560185WECC2017NaN
6018660186WECC2017NaN
6021060210TRE2017NaN
6021860218SPP2017NaN
6025660256SPP2017NaN
6025860258WECC2017NaN
6026360263WECC2017NaN
6027860278NPCC2017NaN
6030460304WECC2017NaN
6030760307WECC2017NaN
6030860308WECC2017NaN
6032960329RFC2017NaN
6034960349WECC2017NaN
6035260352WECC2017NaN
6035460354MRO2017NaN
6036660366TRE2017NaN
6037260372TRE2017NaN
6038360383SERC2017NaN
6039060390RFC2017NaN
6041460414SPP2017NaN
6043260432WECC2017NaN
6043660436TRE2017NaN
6044160441WECC2017NaN
6046760467WECC2017NaN
6047060470RFC2017NaN
6050260502TRE2017NaN
6050660506TRE2017NaN
\n", "

85768 rows × 4 columns

\n", "
" ], "text/plain": [ " Plant Id NERC Region Year Corrected NERC\n", "Year Plant Id \n", "2001 2 2 SERC 2001 NaN\n", " 3 3 SERC 2001 NaN\n", " 4 4 SERC 2001 NaN\n", " 5 5 SERC 2001 NaN\n", " 7 7 SERC 2001 NaN\n", " 8 8 SERC 2001 NaN\n", " 10 10 SERC 2001 NaN\n", " 11 11 SERC 2001 NaN\n", " 12 12 SERC 2001 NaN\n", " 13 13 SERC 2001 NaN\n", " 14 14 SERC 2001 NaN\n", " 15 15 SERC 2001 NaN\n", " 16 16 SERC 2001 NaN\n", " 17 17 SERC 2001 NaN\n", " 18 18 SERC 2001 NaN\n", " 19 19 SERC 2001 NaN\n", " 20 20 SERC 2001 NaN\n", " 21 21 SERC 2001 NaN\n", " 26 26 SERC 2001 NaN\n", " 38 38 SERC 2001 NaN\n", " 46 46 SERC 2001 NaN\n", " 47 47 SERC 2001 NaN\n", " 48 48 SERC 2001 NaN\n", " 49 49 SERC 2001 NaN\n", " 50 50 SERC 2001 NaN\n", " 53 53 SERC 2001 NaN\n", " 54 54 ECAR 2001 NaN\n", " 55 55 SERC 2001 NaN\n", " 56 56 SERC 2001 NaN\n", " 60 60 MAPP 2001 NaN\n", "... ... ... ... ...\n", "2017 60145 60145 WECC 2017 NaN\n", " 60149 60149 SERC 2017 NaN\n", " 60152 60152 WECC 2017 NaN\n", " 60185 60185 WECC 2017 NaN\n", " 60186 60186 WECC 2017 NaN\n", " 60210 60210 TRE 2017 NaN\n", " 60218 60218 SPP 2017 NaN\n", " 60256 60256 SPP 2017 NaN\n", " 60258 60258 WECC 2017 NaN\n", " 60263 60263 WECC 2017 NaN\n", " 60278 60278 NPCC 2017 NaN\n", " 60304 60304 WECC 2017 NaN\n", " 60307 60307 WECC 2017 NaN\n", " 60308 60308 WECC 2017 NaN\n", " 60329 60329 RFC 2017 NaN\n", " 60349 60349 WECC 2017 NaN\n", " 60352 60352 WECC 2017 NaN\n", " 60354 60354 MRO 2017 NaN\n", " 60366 60366 TRE 2017 NaN\n", " 60372 60372 TRE 2017 NaN\n", " 60383 60383 SERC 2017 NaN\n", " 60390 60390 RFC 2017 NaN\n", " 60414 60414 SPP 2017 NaN\n", " 60432 60432 WECC 2017 NaN\n", " 60436 60436 TRE 2017 NaN\n", " 60441 60441 WECC 2017 NaN\n", " 60467 60467 WECC 2017 NaN\n", " 60470 60470 RFC 2017 NaN\n", " 60502 60502 TRE 2017 NaN\n", " 60506 60506 TRE 2017 NaN\n", "\n", "[85768 rows x 4 columns]" ] }, "execution_count": 152, "metadata": {}, "output_type": "execute_result" } ], "source": [ "nerc_assignment" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.13" } }, "nbformat": 4, "nbformat_minor": 2 }