{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# 2017-08-18 Building the Data Download Function" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Function (Including Packages)\n", "\n" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# define a function to download the data if not already downloaded, and then read it in\n", "\n", "import pandas_datareader\n", "from pandas import DataFrame, Series\n", "import pandas as pd\n", "import os\n", "from urllib.request import urlretrieve\n", "\n", "def getdata_read_or_download(filename, source_URL, force_download = False):\n", " '''Use pandas to read in data from a specified local file in the current\n", " working directory, or download data from a specified source URL if the \n", " local file does not exist in the current working directory. Download\n", " can be forced if the local file is corrupt or simply needs to be updated.\n", " \n", " Parameters:\n", " ===========\n", " \n", " filename : string\n", " location of already-dowloaded data in current working directory\n", " source_URL : string\n", " location of data on internet\n", " force_download: boolean (optional)\n", " if True, force redownload of data\n", " \n", " Returns:\n", " ========\n", " \n", " datafame : pandas dataframe\n", " the data file for the analysis \n", " '''\n", " \n", " if ((force_download == True) or not os.path.exists(filename)):\n", " urlretrieve(source_URL,filename)\n", " dataframe = pd.read_csv(filename)\n", " return dataframe" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "/Users/delong/Dropbox/jupyter notebook files (.ipynb)/2017-08-05-delong-jupyter\r\n" ] } ], "source": [ "!cd getdata_read_or_download\n", "! pwd" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Tests:" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# Test: read in the 2014 data from the web\n", "\n", "ccuds_pandp_data = getdata_read_or_download(filename = \"pandp.csv\", \n", " source_URL = \"http://delong.typepad.com/2017-08-15-distance-to-frontier-2014-3.csv\")\n", "\n", "# ccuds_pandp_data = pd.read_csv(\n", "# 'http://delong.typepad.com/2017-08-15-distance-to-frontier-2014-3.csv')" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 189 entries, 0 to 188\n", "Data columns (total 4 columns):\n", "code 189 non-null object\n", "country 189 non-null object\n", "distance_to_frontier_2014 189 non-null float64\n", "national_income_per_capita_2014 189 non-null int64\n", "dtypes: float64(1), int64(1), object(2)\n", "memory usage: 6.0+ KB\n" ] } ], "source": [ "# Test: check to see if the data is in a comprehensible format...\n", "\n", "ccuds_pandp_data.info()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
codecountrydistance_to_frontier_2014national_income_per_capita_2014
0TCDChad32.062141
1CAFCentral African Republic32.75578
2ERIEritrea32.811140
3SSDSouth Sudan34.072574
4LBYLibya35.4314887
5ZARCongo, Dem. Rep.37.80768
6VENVenezuela, RB38.8116666
7COGCongo, Rep.40.245905
8AFGAfghanistan40.781877
9HTIHaiti42.821670
10NERNiger42.89923
11AGOAngola43.037546
12MMRMyanmar43.143727
13SENSenegal44.072206
14GNBGuinea-Bissau44.141363
15BENBenin44.191779
16TMPTimor-Leste44.712173
17ZWEZimbabwe44.871773
18TJKTajikistan44.882533
19GINGuinea44.941179
20NIGENigeria45.015607
21BFABurkina Faso45.411606
22TGOTogo45.521387
23BANGBangladesh45.572991
24MRTMauritania46.233718
25MDGMadagascar46.411371
26SYRSyrian Arab Republic46.415105
27SURSuriname46.4616649
28ETHEthiopia47.561432
29GNQEquatorial Guinea47.7630783
...............
159BELBelgium74.2340885
160PRTPortugal74.5426055
161AREUnited Arab Emirates75.2660578
162CHESwitzerland75.5755776
163LTULithuania75.8825708
164THAThailand75.9913986
165ESTEstonia76.2925865
166NLDNetherlands77.1045281
167MUSMauritius77.1817731
168AUTAustria77.8443906
169LVALatvia78.2422460
170JAPJapan78.3935635
171TWNTaiwan, China79.2441376
172DEUGermany79.5543444
173CANCanada80.5242817
174MYSMalaysia81.2023579
175AUSAustralia81.6943219
176GEOGeorgia82.097233
177USUnited States82.1452118
178FINFinland82.1838569
179SWESweden82.5144029
180ISLIceland83.0441237
181IRLIreland83.0746633
182NORNorway83.5664020
183KORKorea, Rep.83.9233629
184GBRUnited Kingdom85.6137614
185DNKDenmark85.7142777
186HKGHong Kong SAR, China88.6752552
187NZLNew Zealand89.3233538
188SGPSingapore91.2478958
\n", "

189 rows × 4 columns

\n", "
" ], "text/plain": [ " code country distance_to_frontier_2014 \\\n", "0 TCD Chad 32.06 \n", "1 CAF Central African Republic 32.75 \n", "2 ERI Eritrea 32.81 \n", "3 SSD South Sudan 34.07 \n", "4 LBY Libya 35.43 \n", "5 ZAR Congo, Dem. Rep. 37.80 \n", "6 VEN Venezuela, RB 38.81 \n", "7 COG Congo, Rep. 40.24 \n", "8 AFG Afghanistan 40.78 \n", "9 HTI Haiti 42.82 \n", "10 NER Niger 42.89 \n", "11 AGO Angola 43.03 \n", "12 MMR Myanmar 43.14 \n", "13 SEN Senegal 44.07 \n", "14 GNB Guinea-Bissau 44.14 \n", "15 BEN Benin 44.19 \n", "16 TMP Timor-Leste 44.71 \n", "17 ZWE Zimbabwe 44.87 \n", "18 TJK Tajikistan 44.88 \n", "19 GIN Guinea 44.94 \n", "20 NIGE Nigeria 45.01 \n", "21 BFA Burkina Faso 45.41 \n", "22 TGO Togo 45.52 \n", "23 BANG Bangladesh 45.57 \n", "24 MRT Mauritania 46.23 \n", "25 MDG Madagascar 46.41 \n", "26 SYR Syrian Arab Republic 46.41 \n", "27 SUR Suriname 46.46 \n", "28 ETH Ethiopia 47.56 \n", "29 GNQ Equatorial Guinea 47.76 \n", ".. ... ... ... \n", "159 BEL Belgium 74.23 \n", "160 PRT Portugal 74.54 \n", "161 ARE United Arab Emirates 75.26 \n", "162 CHE Switzerland 75.57 \n", "163 LTU Lithuania 75.88 \n", "164 THA Thailand 75.99 \n", "165 EST Estonia 76.29 \n", "166 NLD Netherlands 77.10 \n", "167 MUS Mauritius 77.18 \n", "168 AUT Austria 77.84 \n", "169 LVA Latvia 78.24 \n", "170 JAP Japan 78.39 \n", "171 TWN Taiwan, China 79.24 \n", "172 DEU Germany 79.55 \n", "173 CAN Canada 80.52 \n", "174 MYS Malaysia 81.20 \n", "175 AUS Australia 81.69 \n", "176 GEO Georgia 82.09 \n", "177 US United States 82.14 \n", "178 FIN Finland 82.18 \n", "179 SWE Sweden 82.51 \n", "180 ISL Iceland 83.04 \n", "181 IRL Ireland 83.07 \n", "182 NOR Norway 83.56 \n", "183 KOR Korea, Rep. 83.92 \n", "184 GBR United Kingdom 85.61 \n", "185 DNK Denmark 85.71 \n", "186 HKG Hong Kong SAR, China 88.67 \n", "187 NZL New Zealand 89.32 \n", "188 SGP Singapore 91.24 \n", "\n", " national_income_per_capita_2014 \n", "0 2141 \n", "1 578 \n", "2 1140 \n", "3 2574 \n", "4 14887 \n", "5 768 \n", "6 16666 \n", "7 5905 \n", "8 1877 \n", "9 1670 \n", "10 923 \n", "11 7546 \n", "12 3727 \n", "13 2206 \n", "14 1363 \n", "15 1779 \n", "16 2173 \n", "17 1773 \n", "18 2533 \n", "19 1179 \n", "20 5607 \n", "21 1606 \n", "22 1387 \n", "23 2991 \n", "24 3718 \n", "25 1371 \n", "26 5105 \n", "27 16649 \n", "28 1432 \n", "29 30783 \n", ".. ... \n", "159 40885 \n", "160 26055 \n", "161 60578 \n", "162 55776 \n", "163 25708 \n", "164 13986 \n", "165 25865 \n", "166 45281 \n", "167 17731 \n", "168 43906 \n", "169 22460 \n", "170 35635 \n", "171 41376 \n", "172 43444 \n", "173 42817 \n", "174 23579 \n", "175 43219 \n", "176 7233 \n", "177 52118 \n", "178 38569 \n", "179 44029 \n", "180 41237 \n", "181 46633 \n", "182 64020 \n", "183 33629 \n", "184 37614 \n", "185 42777 \n", "186 52552 \n", "187 33538 \n", "188 78958 \n", "\n", "[189 rows x 4 columns]" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Test: check to see if the data looks like the right data...\n", "\n", "ccuds_pandp_data" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.1" } }, "nbformat": 4, "nbformat_minor": 2 }