{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "#Url = \n", "\n", "https://waterdata.usgs.gov/la/nwis/water_use?format=rdb&rdb_compression=value&wu_area=County&wu_year=2000%2C2005%2C2010&wu_county=ALL&wu_category=ALL&wu_county_nms=--ALL%2BCounties--&wu_category_nms=--ALL%2BCategories--" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import sys, os, urllib\n", "import pandas as pd\n", "import numpy as np" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "#Set the URL path and values\n", "state = 'la' #Louisiana\n", "path = 'https://waterdata.usgs.gov/{}/nwis/water_use?'.format(state)\n", "values = {'format':'rdb',\n", " 'rdb_compression':'value',\n", " 'wu_area':'County',\n", " 'wu_year':'2010',#2005,2010',\n", " 'wu_county':'ALL',\n", " 'wu_county_nms':'--ALL+Counties--',\n", " 'wu_category_nms':'--ALL+Categories--'\n", " }" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "#Retrieve the data as a response object\n", "url = path + urllib.urlencode(values)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "#Pull data in using the URL and remove the 2nd row of headers\n", "dfRaw = pd.read_table(url,comment='#',header=[0,1],na_values='-')\n", "dfRaw.columns = dfRaw.columns.droplevel(level=1)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "#Read locally, for debugging, and drop the 2nd row of headers\n", "#dfRaw = pd.read_table('../../Data/Proprietary/LA.txt',comment='#',header=[0,1],na_values='-')\n", "#dfRaw.columns = dfRaw.columns.droplevel(level=1)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(17664, 7)" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#Tidy the data\n", "rowHeadings = ['county_cd', 'county_nm', 'state_cd', 'state_name', 'year']\n", "dfTidy = pd.melt(dfRaw,id_vars=rowHeadings,value_name='MGal',var_name='Group')\n", "dfTidy.shape" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(15040, 7)" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#Remove rows that don't have volume data\n", "dfTidy = dfTidy[dfTidy['Group'].str.contains('Mgal')]\n", "dfTidy.shape" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "50620.130000000005" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#Change the type of the MGal column to float\n", "dfTidy['MGal'] = dfTidy.MGal.astype(np.float)\n", "dfTidy['MGal'].sum()" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "#Fill in Source and Destination columns\n", "dfTidy.loc[dfTidy['Group'].str.startswith('Public Supply'), 'Destination'] = 'PS'" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "#Summarize \n", "dfState = dfTidy.groupby(['Group'])['MGal'].sum()\n", "dfState.to_csv('{}.csv'.format(state))" ] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.12" } }, "nbformat": 4, "nbformat_minor": 2 }