{ "metadata": { "name": "California_breathing" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "code", "collapsed": false, "input": [ "import pandas as pd\n", "from pandas import DataFrame\n", "#This is the 'Active Asthma Prevalence, All Ages, 2007' from\n", "#http://www.californiabreathing.org/asthma-data/county-comparisons/active-asthma-prevalence\n", "asth = open('asthma_prev_2007.csv')" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 3 }, { "cell_type": "code", "collapsed": false, "input": [ "asth_county = pd.read_csv(asth)\n", "asth_county" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
\ufeff'COUNTY (OR GROUP)''Percent with Active Asthma''95% Confidence Interval'
0 'Merced' '14.4' '(9.3 - 19.6)'
1 'Tehama/Glenn/Colusa' '13.7' '(9.5 - 17.9)'
2 'Yuba' '12.7' '(8.9 - 16.6)'
3 'Kern' '11.9' '(8.6 - 15.2)'
4 'Fresno' '11.3' '(8.4 - 14.3)'
5 'Sacramento' '11.2' '(8.9 - 13.5)'
6 'Solano' '10.9' '(6.7 - 15.1)'
7 'San Joaquin' '10.8' '(7.5 - 14.2)'
8 'Santa Cruz' '10.6' '(6.6 - 14.5)'
9 'Kings' '10.1' '(7.4 - 12.9)'
10 'Sutter' '10.0' '(7.2 - 12.8)'
11 'Alameda' '9.8' '(7.5 - 12.2)'
12 'Lake' '9.8' '(6.9 - 12.7)'
13 'Napa' '9.8' '(6.8 - 12.7)'
14 'Butte' '9.7' '(7.2 - 12.2)'
15 'Shasta' '9.7' '(5.7 - 13.7)'
16 'Contra Costa' '9.6' '(7.2 - 11.9)'
17 'Imperial' '9.5' '(6.9 - 12.2)'
18 'Madera' '9.5' '(6.9 - 12.1)'
19 'Nevada' '9.4' '(6.6 - 12.1)'
20 'Del Norte/Siskiyou/Lassen/Trinity/Modoc/Pluma... '9.2' '(5.8 - 12.6)'
21 'San Mateo' '9.2' '(5.9 - 12.6)'
22 'Placer' '9.0' '(6.2 - 11.7)'
23 'San Bernardino' '9.0' '(7.5 - 10.5)'
24 'Stanislaus' '9.0' '(5.6 - 12.4)'
25 'Tulare' '8.8' '(6.2 - 11.3)'
26 'Yolo' '8.8' '(5.0 - 12.7)'
27 'Tuolumne/Calaveras/Amador/Inyo/Mariposa/Mono/... '8.7' '(5.8 - 11.6)'
28 'El Dorado' '8.3' '(5.9 - 10.6)'
29 'CALIFORNIA OVERALL' '8.1' '(7.7 - 8.4)'
30 'Ventura' '8.1' '(5.4 - 10.8)'
31 'Santa Barbara' '8.0' '(5.1 - 10.9)'
32 'Sonoma' '7.8' '(5.2 - 10.3)'
33 'San Luis Obispo' '7.5' '(4.4 - 10.6)'
34 'San Diego' '7.3' '(6.3 - 8.3)'
35 'Los Angeles' '7.2' '(6.5 - 7.8)'
36 'Humboldt' '7.0' '(4.9 - 9.2)'
37 'Mendocino' '6.9' '(4.6 - 9.3)'
38 'Monterey' '6.8' '(4.3 - 9.4)'
39 'Orange' '6.8' '(5.7 - 8.0)'
40 'Santa Clara' '6.8' '(5.3 - 8.4)'
41 'Marin' '6.5' '(3.4 - 9.6)'
42 'San Francisco' '6.0' '(3.4 - 8.6)'
43 'Riverside' '5.2' '(4.2 - 6.2)'
\n", "
" ], "output_type": "pyout", "prompt_number": 4, "text": [ " \ufeff'COUNTY (OR GROUP)' \\\n", "0 'Merced' \n", "1 'Tehama/Glenn/Colusa' \n", "2 'Yuba' \n", "3 'Kern' \n", "4 'Fresno' \n", "5 'Sacramento' \n", "6 'Solano' \n", "7 'San Joaquin' \n", "8 'Santa Cruz' \n", "9 'Kings' \n", "10 'Sutter' \n", "11 'Alameda' \n", "12 'Lake' \n", "13 'Napa' \n", "14 'Butte' \n", "15 'Shasta' \n", "16 'Contra Costa' \n", "17 'Imperial' \n", "18 'Madera' \n", "19 'Nevada' \n", "20 'Del Norte/Siskiyou/Lassen/Trinity/Modoc/Pluma... \n", "21 'San Mateo' \n", "22 'Placer' \n", "23 'San Bernardino' \n", "24 'Stanislaus' \n", "25 'Tulare' \n", "26 'Yolo' \n", "27 'Tuolumne/Calaveras/Amador/Inyo/Mariposa/Mono/... \n", "28 'El Dorado' \n", "29 'CALIFORNIA OVERALL' \n", "30 'Ventura' \n", "31 'Santa Barbara' \n", "32 'Sonoma' \n", "33 'San Luis Obispo' \n", "34 'San Diego' \n", "35 'Los Angeles' \n", "36 'Humboldt' \n", "37 'Mendocino' \n", "38 'Monterey' \n", "39 'Orange' \n", "40 'Santa Clara' \n", "41 'Marin' \n", "42 'San Francisco' \n", "43 'Riverside' \n", "\n", " 'Percent with Active Asthma' '95% Confidence Interval' \n", "0 '14.4' '(9.3 - 19.6)' \n", "1 '13.7' '(9.5 - 17.9)' \n", "2 '12.7' '(8.9 - 16.6)' \n", "3 '11.9' '(8.6 - 15.2)' \n", "4 '11.3' '(8.4 - 14.3)' \n", "5 '11.2' '(8.9 - 13.5)' \n", "6 '10.9' '(6.7 - 15.1)' \n", "7 '10.8' '(7.5 - 14.2)' \n", "8 '10.6' '(6.6 - 14.5)' \n", "9 '10.1' '(7.4 - 12.9)' \n", "10 '10.0' '(7.2 - 12.8)' \n", "11 '9.8' '(7.5 - 12.2)' \n", "12 '9.8' '(6.9 - 12.7)' \n", "13 '9.8' '(6.8 - 12.7)' \n", "14 '9.7' '(7.2 - 12.2)' \n", "15 '9.7' '(5.7 - 13.7)' \n", "16 '9.6' '(7.2 - 11.9)' \n", "17 '9.5' '(6.9 - 12.2)' \n", "18 '9.5' '(6.9 - 12.1)' \n", "19 '9.4' '(6.6 - 12.1)' \n", "20 '9.2' '(5.8 - 12.6)' \n", "21 '9.2' '(5.9 - 12.6)' \n", "22 '9.0' '(6.2 - 11.7)' \n", "23 '9.0' '(7.5 - 10.5)' \n", "24 '9.0' '(5.6 - 12.4)' \n", "25 '8.8' '(6.2 - 11.3)' \n", "26 '8.8' '(5.0 - 12.7)' \n", "27 '8.7' '(5.8 - 11.6)' \n", "28 '8.3' '(5.9 - 10.6)' \n", "29 '8.1' '(7.7 - 8.4)' \n", "30 '8.1' '(5.4 - 10.8)' \n", "31 '8.0' '(5.1 - 10.9)' \n", "32 '7.8' '(5.2 - 10.3)' \n", "33 '7.5' '(4.4 - 10.6)' \n", "34 '7.3' '(6.3 - 8.3)' \n", "35 '7.2' '(6.5 - 7.8)' \n", "36 '7.0' '(4.9 - 9.2)' \n", "37 '6.9' '(4.6 - 9.3)' \n", "38 '6.8' '(4.3 - 9.4)' \n", "39 '6.8' '(5.7 - 8.0)' \n", "40 '6.8' '(5.3 - 8.4)' \n", "41 '6.5' '(3.4 - 9.6)' \n", "42 '6.0' '(3.4 - 8.6)' \n", "43 '5.2' '(4.2 - 6.2)' " ] } ], "prompt_number": 4 }, { "cell_type": "code", "collapsed": false, "input": [ "#Clean up the columnames\n", "asth_county\n", "asth_county['percent_active'] = asth_county['\\'Percent with Active Asthma\\''].apply(lambda x: float(x[1:-1]))\n", "asth_county['county_name'] = asth_county[asth_county.columns[0]].apply(lambda x: str(x[1:-1]))\n", "asth_county['95_confidence'] = asth_county['\\'95% Confidence Interval\\''].apply(lambda x: str(x[1:-1]))" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 5 }, { "cell_type": "code", "collapsed": false, "input": [ "#Delete old columns with messy names\n", "del asth_county[asth_county.columns[0]]\n", "del asth_county[asth_county.columns[0]]\n", "del asth_county[asth_county.columns[0]]" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 6 }, { "cell_type": "markdown", "metadata": {}, "source": [ "Some rows represent multiple counties, we got to undo this grouping" ] }, { "cell_type": "code", "collapsed": false, "input": [ "\n", "new_rows = []\n", "def unpack_group(row):\n", " group = row['county_name']\n", " if '/' in group:\n", " counties = group.split('/')\n", " for county in counties:\n", " new_rows.append({'county_name': county, 'percent_active': row['percent_active'], '95_confidence': row['95_confidence']})\n", " " ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 7 }, { "cell_type": "code", "collapsed": false, "input": [ "#Apply said function\n", "asth_county.apply(unpack_group, axis = 1)" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "pyout", "prompt_number": 8, "text": [ "0 None\n", "1 None\n", "2 None\n", "3 None\n", "4 None\n", "5 None\n", "6 None\n", "7 None\n", "8 None\n", "9 None\n", "10 None\n", "11 None\n", "12 None\n", "13 None\n", "14 None\n", "15 None\n", "16 None\n", "17 None\n", "18 None\n", "19 None\n", "20 None\n", "21 None\n", "22 None\n", "23 None\n", "24 None\n", "25 None\n", "26 None\n", "27 None\n", "28 None\n", "29 None\n", "30 None\n", "31 None\n", "32 None\n", "33 None\n", "34 None\n", "35 None\n", "36 None\n", "37 None\n", "38 None\n", "39 None\n", "40 None\n", "41 None\n", "42 None\n", "43 None" ] } ], "prompt_number": 8 }, { "cell_type": "code", "collapsed": false, "input": [ "new_df = DataFrame(new_rows)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 9 }, { "cell_type": "code", "collapsed": false, "input": [ "#Join old dataframe and new rows\n", "new_asth = pd.concat([new_df, asth_county], ignore_index=True)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 10 }, { "cell_type": "code", "collapsed": false, "input": [ "#Remove rows that are groups\n", "isnt_group = lambda x: '/' not in x\n", "\n" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 11 }, { "cell_type": "code", "collapsed": false, "input": [ "new_asth[new_asth['county_name'].apply(isnt_group)]" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
95_confidencecounty_namepercent_active
0 (9.5 - 17.9) Tehama 13.7
1 (9.5 - 17.9) Glenn 13.7
2 (9.5 - 17.9) Colusa 13.7
3 (5.8 - 12.6) Del Norte 9.2
4 (5.8 - 12.6) Siskiyou 9.2
5 (5.8 - 12.6) Lassen 9.2
6 (5.8 - 12.6) Trinity 9.2
7 (5.8 - 12.6) Modoc 9.2
8 (5.8 - 12.6) Plumas 9.2
9 (5.8 - 12.6) Sierra 9.2
10 (5.8 - 11.6) Tuolumne 8.7
11 (5.8 - 11.6) Calaveras 8.7
12 (5.8 - 11.6) Amador 8.7
13 (5.8 - 11.6) Inyo 8.7
14 (5.8 - 11.6) Mariposa 8.7
15 (5.8 - 11.6) Mono 8.7
16 (5.8 - 11.6) Alpine 8.7
17 (9.3 - 19.6) Merced 14.4
19 (8.9 - 16.6) Yuba 12.7
20 (8.6 - 15.2) Kern 11.9
21 (8.4 - 14.3) Fresno 11.3
22 (8.9 - 13.5) Sacramento 11.2
23 (6.7 - 15.1) Solano 10.9
24 (7.5 - 14.2) San Joaquin 10.8
25 (6.6 - 14.5) Santa Cruz 10.6
26 (7.4 - 12.9) Kings 10.1
27 (7.2 - 12.8) Sutter 10.0
28 (7.5 - 12.2) Alameda 9.8
29 (6.9 - 12.7) Lake 9.8
30 (6.8 - 12.7) Napa 9.8
31 (7.2 - 12.2) Butte 9.7
32 (5.7 - 13.7) Shasta 9.7
33 (7.2 - 11.9) Contra Costa 9.6
34 (6.9 - 12.2) Imperial 9.5
35 (6.9 - 12.1) Madera 9.5
36 (6.6 - 12.1) Nevada 9.4
38 (5.9 - 12.6) San Mateo 9.2
39 (6.2 - 11.7) Placer 9.0
40 (7.5 - 10.5) San Bernardino 9.0
41 (5.6 - 12.4) Stanislaus 9.0
42 (6.2 - 11.3) Tulare 8.8
43 (5.0 - 12.7) Yolo 8.8
45 (5.9 - 10.6) El Dorado 8.3
46 (7.7 - 8.4) CALIFORNIA OVERALL 8.1
47 (5.4 - 10.8) Ventura 8.1
48 (5.1 - 10.9) Santa Barbara 8.0
49 (5.2 - 10.3) Sonoma 7.8
50 (4.4 - 10.6) San Luis Obispo 7.5
51 (6.3 - 8.3) San Diego 7.3
52 (6.5 - 7.8) Los Angeles 7.2
53 (4.9 - 9.2) Humboldt 7.0
54 (4.6 - 9.3) Mendocino 6.9
55 (4.3 - 9.4) Monterey 6.8
56 (5.7 - 8.0) Orange 6.8
57 (5.3 - 8.4) Santa Clara 6.8
58 (3.4 - 9.6) Marin 6.5
59 (3.4 - 8.6) San Francisco 6.0
60 (4.2 - 6.2) Riverside 5.2
\n", "
" ], "output_type": "pyout", "prompt_number": 12, "text": [ " 95_confidence county_name percent_active\n", "0 (9.5 - 17.9) Tehama 13.7\n", "1 (9.5 - 17.9) Glenn 13.7\n", "2 (9.5 - 17.9) Colusa 13.7\n", "3 (5.8 - 12.6) Del Norte 9.2\n", "4 (5.8 - 12.6) Siskiyou 9.2\n", "5 (5.8 - 12.6) Lassen 9.2\n", "6 (5.8 - 12.6) Trinity 9.2\n", "7 (5.8 - 12.6) Modoc 9.2\n", "8 (5.8 - 12.6) Plumas 9.2\n", "9 (5.8 - 12.6) Sierra 9.2\n", "10 (5.8 - 11.6) Tuolumne 8.7\n", "11 (5.8 - 11.6) Calaveras 8.7\n", "12 (5.8 - 11.6) Amador 8.7\n", "13 (5.8 - 11.6) Inyo 8.7\n", "14 (5.8 - 11.6) Mariposa 8.7\n", "15 (5.8 - 11.6) Mono 8.7\n", "16 (5.8 - 11.6) Alpine 8.7\n", "17 (9.3 - 19.6) Merced 14.4\n", "19 (8.9 - 16.6) Yuba 12.7\n", "20 (8.6 - 15.2) Kern 11.9\n", "21 (8.4 - 14.3) Fresno 11.3\n", "22 (8.9 - 13.5) Sacramento 11.2\n", "23 (6.7 - 15.1) Solano 10.9\n", "24 (7.5 - 14.2) San Joaquin 10.8\n", "25 (6.6 - 14.5) Santa Cruz 10.6\n", "26 (7.4 - 12.9) Kings 10.1\n", "27 (7.2 - 12.8) Sutter 10.0\n", "28 (7.5 - 12.2) Alameda 9.8\n", "29 (6.9 - 12.7) Lake 9.8\n", "30 (6.8 - 12.7) Napa 9.8\n", "31 (7.2 - 12.2) Butte 9.7\n", "32 (5.7 - 13.7) Shasta 9.7\n", "33 (7.2 - 11.9) Contra Costa 9.6\n", "34 (6.9 - 12.2) Imperial 9.5\n", "35 (6.9 - 12.1) Madera 9.5\n", "36 (6.6 - 12.1) Nevada 9.4\n", "38 (5.9 - 12.6) San Mateo 9.2\n", "39 (6.2 - 11.7) Placer 9.0\n", "40 (7.5 - 10.5) San Bernardino 9.0\n", "41 (5.6 - 12.4) Stanislaus 9.0\n", "42 (6.2 - 11.3) Tulare 8.8\n", "43 (5.0 - 12.7) Yolo 8.8\n", "45 (5.9 - 10.6) El Dorado 8.3\n", "46 (7.7 - 8.4) CALIFORNIA OVERALL 8.1\n", "47 (5.4 - 10.8) Ventura 8.1\n", "48 (5.1 - 10.9) Santa Barbara 8.0\n", "49 (5.2 - 10.3) Sonoma 7.8\n", "50 (4.4 - 10.6) San Luis Obispo 7.5\n", "51 (6.3 - 8.3) San Diego 7.3\n", "52 (6.5 - 7.8) Los Angeles 7.2\n", "53 (4.9 - 9.2) Humboldt 7.0\n", "54 (4.6 - 9.3) Mendocino 6.9\n", "55 (4.3 - 9.4) Monterey 6.8\n", "56 (5.7 - 8.0) Orange 6.8\n", "57 (5.3 - 8.4) Santa Clara 6.8\n", "58 (3.4 - 9.6) Marin 6.5\n", "59 (3.4 - 8.6) San Francisco 6.0\n", "60 (4.2 - 6.2) Riverside 5.2" ] } ], "prompt_number": 12 } ], "metadata": {} } ] }