{ "metadata": { "name": "", "signature": "sha256:48984802b30a95b6bf01c11b6b871a2468a65dd8f3c37cf9714186c1134d8d0c" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "Load monthly, historical precipitation data from the California Department\n", "of Water Resources.\n", "The tables are linked from http://cdec.water.ca.gov/cgi-progs/prevprecip/PRECIPOUT,\n", "e.g. http://cdec.water.ca.gov/cgi-progs/reports/PRECIPOUT.2011." ] }, { "cell_type": "code", "collapsed": false, "input": [ "import lxml.html\n", "import numpy as np\n", "import pandas as pd" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 3 }, { "cell_type": "code", "collapsed": false, "input": [ "# constants\n", "REGION = 'region'\n", "SUBREGION = 'subregion'\n", "DATAROW = 'datarow'" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 4 }, { "cell_type": "markdown", "metadata": {}, "source": [ "Get some example rows for testing functions later." ] }, { "cell_type": "code", "collapsed": false, "input": [ "h = lxml.html.parse('http://cdec.water.ca.gov/cgi-progs/reports/PRECIPOUT.2011')\n", "table = h.xpath('//*[@id=\"main_content\"]/div/div[1]/table')[0]\n", "rows = table.getchildren()\n", "region_row = rows[0]\n", "subregion_row = rows[2]\n", "station_row = rows[3]" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 5 }, { "cell_type": "code", "collapsed": false, "input": [ "def get_table(url):\n", " \"\"\"Return HTML table from URL.\"\"\"\n", " xpath = '//*[@id=\"main_content\"]/div/div[1]/table'\n", " return lxml.html.parse(url).xpath(xpath)[0]" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 6 }, { "cell_type": "code", "collapsed": false, "input": [ "def row_type(row):\n", " \"\"\"Categorize a row.\"\"\"\n", " cells = row.iterchildren()\n", " \n", " if next(cells).tag == 'th':\n", " # first cell is a
\n", " | region | \n", "subregion | \n", "station | \n", "abbreviation | \n", "elevation | \n", "year | \n", "month | \n", "precip | \n", "avg precip | \n", "pct of avg | \n", "
---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "NORTH COAST | \n", "SMITH RIVER | \n", "Gasquet Ranger Station | \n", "GAS | \n", "384 | \n", "2011 | \n", "Oct | \n", "8.40 | \n", "7.53 | \n", "112 | \n", "
1 | \n", "NORTH COAST | \n", "SMITH RIVER | \n", "Gasquet Ranger Station | \n", "GAS | \n", "384 | \n", "2011 | \n", "Nov | \n", "12.42 | \n", "14.14 | \n", "88 | \n", "
2 | \n", "NORTH COAST | \n", "SMITH RIVER | \n", "Gasquet Ranger Station | \n", "GAS | \n", "384 | \n", "2011 | \n", "Dec | \n", "25.00 | \n", "16.37 | \n", "153 | \n", "
3 | \n", "NORTH COAST | \n", "SMITH RIVER | \n", "Gasquet Ranger Station | \n", "GAS | \n", "384 | \n", "2011 | \n", "Jan | \n", "4.91 | \n", "16.45 | \n", "30 | \n", "
4 | \n", "NORTH COAST | \n", "SMITH RIVER | \n", "Gasquet Ranger Station | \n", "GAS | \n", "384 | \n", "2011 | \n", "Feb | \n", "9.53 | \n", "11.95 | \n", "80 | \n", "
5 | \n", "NORTH COAST | \n", "SMITH RIVER | \n", "Gasquet Ranger Station | \n", "GAS | \n", "384 | \n", "2011 | \n", "Mar | \n", "26.47 | \n", "11.08 | \n", "239 | \n", "
6 | \n", "NORTH COAST | \n", "SMITH RIVER | \n", "Gasquet Ranger Station | \n", "GAS | \n", "384 | \n", "2011 | \n", "Apr | \n", "10.25 | \n", "6.47 | \n", "158 | \n", "
7 | \n", "NORTH COAST | \n", "SMITH RIVER | \n", "Gasquet Ranger Station | \n", "GAS | \n", "384 | \n", "2011 | \n", "May | \n", "4.67 | \n", "4.43 | \n", "105 | \n", "
8 | \n", "NORTH COAST | \n", "SMITH RIVER | \n", "Gasquet Ranger Station | \n", "GAS | \n", "384 | \n", "2011 | \n", "Jun | \n", "1.62 | \n", "0.83 | \n", "195 | \n", "
9 | \n", "NORTH COAST | \n", "SMITH RIVER | \n", "Gasquet Ranger Station | \n", "GAS | \n", "384 | \n", "2011 | \n", "Jul | \n", "0.39 | \n", "0.56 | \n", "70 | \n", "
10 | \n", "NORTH COAST | \n", "SMITH RIVER | \n", "Gasquet Ranger Station | \n", "GAS | \n", "384 | \n", "2011 | \n", "Aug | \n", "0.00 | \n", "0.68 | \n", "0 | \n", "
11 | \n", "NORTH COAST | \n", "SMITH RIVER | \n", "Gasquet Ranger Station | \n", "GAS | \n", "384 | \n", "2011 | \n", "Sep | \n", "0.34 | \n", "1.84 | \n", "18 | \n", "
12 | \n", "NORTH COAST | \n", "SMITH RIVER | \n", "Gasquet Ranger Station | \n", "GAS | \n", "384 | \n", "2011 | \n", "Oct-Sep | \n", "104.00 | \n", "92.33 | \n", "113 | \n", "
13 | \n", "NORTH COAST | \n", "SMITH RIVER | \n", "Gasquet Ranger Station | \n", "GAS | \n", "384 | \n", "2011 | \n", "Wat-Yr | \n", "NaN | \n", "92.33 | \n", "113 | \n", "
14 | \n", "NORTH COAST | \n", "KLAMATH RIVER | \n", "Callahan | \n", "CAL | \n", "3185 | \n", "2011 | \n", "Oct | \n", "4.34 | \n", "1.40 | \n", "310 | \n", "
15 | \n", "NORTH COAST | \n", "KLAMATH RIVER | \n", "Callahan | \n", "CAL | \n", "3185 | \n", "2011 | \n", "Nov | \n", "1.66 | \n", "2.75 | \n", "60 | \n", "
16 | \n", "NORTH COAST | \n", "KLAMATH RIVER | \n", "Callahan | \n", "CAL | \n", "3185 | \n", "2011 | \n", "Dec | \n", "5.65 | \n", "3.67 | \n", "154 | \n", "
17 | \n", "NORTH COAST | \n", "KLAMATH RIVER | \n", "Callahan | \n", "CAL | \n", "3185 | \n", "2011 | \n", "Jan | \n", "0.91 | \n", "3.68 | \n", "25 | \n", "
18 | \n", "NORTH COAST | \n", "KLAMATH RIVER | \n", "Callahan | \n", "CAL | \n", "3185 | \n", "2011 | \n", "Feb | \n", "1.06 | \n", "2.58 | \n", "41 | \n", "
19 | \n", "NORTH COAST | \n", "KLAMATH RIVER | \n", "Callahan | \n", "CAL | \n", "3185 | \n", "2011 | \n", "Mar | \n", "5.42 | \n", "2.14 | \n", "253 | \n", "
20 | \n", "NORTH COAST | \n", "KLAMATH RIVER | \n", "Callahan | \n", "CAL | \n", "3185 | \n", "2011 | \n", "Apr | \n", "0.96 | \n", "1.38 | \n", "70 | \n", "
21 | \n", "NORTH COAST | \n", "KLAMATH RIVER | \n", "Callahan | \n", "CAL | \n", "3185 | \n", "2011 | \n", "May | \n", "1.54 | \n", "0.83 | \n", "186 | \n", "
22 | \n", "NORTH COAST | \n", "KLAMATH RIVER | \n", "Callahan | \n", "CAL | \n", "3185 | \n", "2011 | \n", "Jun | \n", "1.42 | \n", "0.67 | \n", "212 | \n", "
23 | \n", "NORTH COAST | \n", "KLAMATH RIVER | \n", "Callahan | \n", "CAL | \n", "3185 | \n", "2011 | \n", "Jul | \n", "0.42 | \n", "0.18 | \n", "233 | \n", "
24 | \n", "NORTH COAST | \n", "KLAMATH RIVER | \n", "Callahan | \n", "CAL | \n", "3185 | \n", "2011 | \n", "Aug | \n", "0.23 | \n", "0.29 | \n", "79 | \n", "
25 | \n", "NORTH COAST | \n", "KLAMATH RIVER | \n", "Callahan | \n", "CAL | \n", "3185 | \n", "2011 | \n", "Sep | \n", "0.09 | \n", "0.48 | \n", "19 | \n", "
26 | \n", "NORTH COAST | \n", "KLAMATH RIVER | \n", "Callahan | \n", "CAL | \n", "3185 | \n", "2011 | \n", "Oct-Sep | \n", "23.70 | \n", "20.05 | \n", "118 | \n", "
27 | \n", "NORTH COAST | \n", "KLAMATH RIVER | \n", "Callahan | \n", "CAL | \n", "3185 | \n", "2011 | \n", "Wat-Yr | \n", "NaN | \n", "20.05 | \n", "118 | \n", "
28 | \n", "NORTH COAST | \n", "KLAMATH RIVER | \n", "Fort Jones RS | \n", "FJN | \n", "2725 | \n", "2011 | \n", "Oct | \n", "3.25 | \n", "1.50 | \n", "217 | \n", "
29 | \n", "NORTH COAST | \n", "KLAMATH RIVER | \n", "Fort Jones RS | \n", "FJN | \n", "2725 | \n", "2011 | \n", "Nov | \n", "2.64 | \n", "2.90 | \n", "91 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
9650 | \n", "COLORADO RIVER | \n", "COLORADO DESERT | \n", "Blythe | \n", "BLY | \n", "390 | \n", "2014 | \n", "Jul | \n", "0.00 | \n", "0.19 | \n", "0 | \n", "
9651 | \n", "COLORADO RIVER | \n", "COLORADO DESERT | \n", "Blythe | \n", "BLY | \n", "390 | \n", "2014 | \n", "Aug | \n", "0.49 | \n", "0.68 | \n", "72 | \n", "
9652 | \n", "COLORADO RIVER | \n", "COLORADO DESERT | \n", "Blythe | \n", "BLY | \n", "390 | \n", "2014 | \n", "Oct-Aug | \n", "1.38 | \n", "3.23 | \n", "43 | \n", "
9653 | \n", "COLORADO RIVER | \n", "COLORADO DESERT | \n", "Blythe | \n", "BLY | \n", "390 | \n", "2014 | \n", "Wat-Yr | \n", "NaN | \n", "3.52 | \n", "39 | \n", "
9654 | \n", "COLORADO RIVER | \n", "COLORADO DESERT | \n", "Niland | \n", "NLD | \n", "-60 | \n", "2014 | \n", "Oct | \n", "0.03 | \n", "0.24 | \n", "12 | \n", "
9655 | \n", "COLORADO RIVER | \n", "COLORADO DESERT | \n", "Niland | \n", "NLD | \n", "-60 | \n", "2014 | \n", "Nov | \n", "NaN | \n", "0.24 | \n", "NaN | \n", "
9656 | \n", "COLORADO RIVER | \n", "COLORADO DESERT | \n", "Niland | \n", "NLD | \n", "-60 | \n", "2014 | \n", "Dec | \n", "NaN | \n", "0.38 | \n", "NaN | \n", "
9657 | \n", "COLORADO RIVER | \n", "COLORADO DESERT | \n", "Niland | \n", "NLD | \n", "-60 | \n", "2014 | \n", "Jan | \n", "0.00 | \n", "0.33 | \n", "0 | \n", "
9658 | \n", "COLORADO RIVER | \n", "COLORADO DESERT | \n", "Niland | \n", "NLD | \n", "-60 | \n", "2014 | \n", "Feb | \n", "0.00 | \n", "0.27 | \n", "0 | \n", "
9659 | \n", "COLORADO RIVER | \n", "COLORADO DESERT | \n", "Niland | \n", "NLD | \n", "-60 | \n", "2014 | \n", "Mar | \n", "NaN | \n", "0.27 | \n", "NaN | \n", "
9660 | \n", "COLORADO RIVER | \n", "COLORADO DESERT | \n", "Niland | \n", "NLD | \n", "-60 | \n", "2014 | \n", "Apr | \n", "NaN | \n", "0.10 | \n", "NaN | \n", "
9661 | \n", "COLORADO RIVER | \n", "COLORADO DESERT | \n", "Niland | \n", "NLD | \n", "-60 | \n", "2014 | \n", "May | \n", "NaN | \n", "0.01 | \n", "NaN | \n", "
9662 | \n", "COLORADO RIVER | \n", "COLORADO DESERT | \n", "Niland | \n", "NLD | \n", "-60 | \n", "2014 | \n", "Jun | \n", "0.00 | \n", "0.01 | \n", "0 | \n", "
9663 | \n", "COLORADO RIVER | \n", "COLORADO DESERT | \n", "Niland | \n", "NLD | \n", "-60 | \n", "2014 | \n", "Jul | \n", "0.12 | \n", "0.24 | \n", "50 | \n", "
9664 | \n", "COLORADO RIVER | \n", "COLORADO DESERT | \n", "Niland | \n", "NLD | \n", "-60 | \n", "2014 | \n", "Aug | \n", "NaN | \n", "0.32 | \n", "NaN | \n", "
9665 | \n", "COLORADO RIVER | \n", "COLORADO DESERT | \n", "Niland | \n", "NLD | \n", "-60 | \n", "2014 | \n", "Oct-Aug | \n", "NaN | \n", "2.41 | \n", "NaN | \n", "
9666 | \n", "COLORADO RIVER | \n", "COLORADO DESERT | \n", "Niland | \n", "NLD | \n", "-60 | \n", "2014 | \n", "Wat-Yr | \n", "NaN | \n", "2.63 | \n", "NaN | \n", "
9667 | \n", "COLORADO RIVER | \n", "COLORADO DESERT | \n", "Imperial Valley | \n", "IMP | \n", "-64 | \n", "2014 | \n", "Oct | \n", "0.01 | \n", "0.23 | \n", "4 | \n", "
9668 | \n", "COLORADO RIVER | \n", "COLORADO DESERT | \n", "Imperial Valley | \n", "IMP | \n", "-64 | \n", "2014 | \n", "Nov | \n", "0.95 | \n", "0.18 | \n", "528 | \n", "
9669 | \n", "COLORADO RIVER | \n", "COLORADO DESERT | \n", "Imperial Valley | \n", "IMP | \n", "-64 | \n", "2014 | \n", "Dec | \n", "0.00 | \n", "0.43 | \n", "0 | \n", "
9670 | \n", "COLORADO RIVER | \n", "COLORADO DESERT | \n", "Imperial Valley | \n", "IMP | \n", "-64 | \n", "2014 | \n", "Jan | \n", "0.00 | \n", "0.38 | \n", "0 | \n", "
9671 | \n", "COLORADO RIVER | \n", "COLORADO DESERT | \n", "Imperial Valley | \n", "IMP | \n", "-64 | \n", "2014 | \n", "Feb | \n", "0.00 | \n", "0.36 | \n", "0 | \n", "
9672 | \n", "COLORADO RIVER | \n", "COLORADO DESERT | \n", "Imperial Valley | \n", "IMP | \n", "-64 | \n", "2014 | \n", "Mar | \n", "0.01 | \n", "0.19 | \n", "5 | \n", "
9673 | \n", "COLORADO RIVER | \n", "COLORADO DESERT | \n", "Imperial Valley | \n", "IMP | \n", "-64 | \n", "2014 | \n", "Apr | \n", "0.00 | \n", "0.13 | \n", "0 | \n", "
9674 | \n", "COLORADO RIVER | \n", "COLORADO DESERT | \n", "Imperial Valley | \n", "IMP | \n", "-64 | \n", "2014 | \n", "May | \n", "0.00 | \n", "0.01 | \n", "0 | \n", "
9675 | \n", "COLORADO RIVER | \n", "COLORADO DESERT | \n", "Imperial Valley | \n", "IMP | \n", "-64 | \n", "2014 | \n", "Jun | \n", "0.00 | \n", "0.00 | \n", "NaN | \n", "
9676 | \n", "COLORADO RIVER | \n", "COLORADO DESERT | \n", "Imperial Valley | \n", "IMP | \n", "-64 | \n", "2014 | \n", "Jul | \n", "0.00 | \n", "0.10 | \n", "0 | \n", "
9677 | \n", "COLORADO RIVER | \n", "COLORADO DESERT | \n", "Imperial Valley | \n", "IMP | \n", "-64 | \n", "2014 | \n", "Aug | \n", "NaN | \n", "0.36 | \n", "NaN | \n", "
9678 | \n", "COLORADO RIVER | \n", "COLORADO DESERT | \n", "Imperial Valley | \n", "IMP | \n", "-64 | \n", "2014 | \n", "Oct-Aug | \n", "NaN | \n", "2.37 | \n", "NaN | \n", "
9679 | \n", "COLORADO RIVER | \n", "COLORADO DESERT | \n", "Imperial Valley | \n", "IMP | \n", "-64 | \n", "2014 | \n", "Wat-Yr | \n", "NaN | \n", "2.73 | \n", "NaN | \n", "
9680 rows \u00d7 10 columns
\n", "