{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Example data analysis notebook\n", "\n", "This notebook downloads and analyses some surface air temperature anomaly data from [Berkeley Earth](http://berkeleyearth.org/)." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Import the required libraries." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import matplotlib.pyplot as plt\n", "import pandas as pd\n", "import requests" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Use the [requests](http://docs.python-requests.org/) library to download the data file for Australia." ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# URL to the data\n", "url = 'http://berkeleyearth.lbl.gov/auto/Regional/TAVG/Text/australia-TAVG-Trend.txt'" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Open & Clean the Data\n", "\n", "To begin there are some data cleaning steps that you need to implement here. \n", "Often when you are building a workflow you build it out in it's entirety first \n", "to get the pieces working. And then you turn those pieces into functions to \n", "modularize and scale your workflow" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
| \n", " | Year | \n", "Month | \n", "Anomaly | \n", "Unc. | \n", "Anomaly.1 | \n", "Unc..1 | \n", "Anomaly.2 | \n", "Unc..2 | \n", "Anomaly.3 | \n", "Unc..3 | \n", "Anomaly.4 | \n", "Unc. | \n", "Day | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| date | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
| 1876-02-01 | \n", "1876 | \n", "2 | \n", "-0.371 | \n", "1.421 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "1 | \n", "
| 1876-03-01 | \n", "1876 | \n", "3 | \n", "0.124 | \n", "1.455 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "1 | \n", "
| 1876-04-01 | \n", "1876 | \n", "4 | \n", "-0.697 | \n", "1.023 | \n", "-0.557 | \n", "0.487 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "1 | \n", "
| 1876-05-01 | \n", "1876 | \n", "5 | \n", "-0.142 | \n", "1.078 | \n", "-0.509 | \n", "0.470 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "1 | \n", "
| 1876-06-01 | \n", "1876 | \n", "6 | \n", "-0.941 | \n", "0.932 | \n", "-0.379 | \n", "0.464 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "1 | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
| 2020-02-01 | \n", "2020 | \n", "2 | \n", "1.022 | \n", "0.146 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "1 | \n", "
| 2020-03-01 | \n", "2020 | \n", "3 | \n", "0.434 | \n", "0.195 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "1 | \n", "
| 2020-04-01 | \n", "2020 | \n", "4 | \n", "1.526 | \n", "0.168 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "1 | \n", "
| 2020-05-01 | \n", "2020 | \n", "5 | \n", "-0.620 | \n", "0.094 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "1 | \n", "
| 2020-06-01 | \n", "2020 | \n", "6 | \n", "0.505 | \n", "0.365 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "1 | \n", "
1733 rows × 13 columns
\n", "| \n", " | Year | \n", "Month | \n", "Anomaly | \n", "Unc. | \n", "Anomaly.1 | \n", "Unc..1 | \n", "Anomaly.2 | \n", "Unc..2 | \n", "Anomaly.3 | \n", "Unc..3 | \n", "Anomaly.4 | \n", "Unc. | \n", "Day | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| date | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
| 1876-02-01 | \n", "1876 | \n", "2 | \n", "-0.371 | \n", "1.421 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "1 | \n", "
| 1876-03-01 | \n", "1876 | \n", "3 | \n", "0.124 | \n", "1.455 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "1 | \n", "
| 1876-04-01 | \n", "1876 | \n", "4 | \n", "-0.697 | \n", "1.023 | \n", "-0.557 | \n", "0.487 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "1 | \n", "
| 1876-05-01 | \n", "1876 | \n", "5 | \n", "-0.142 | \n", "1.078 | \n", "-0.509 | \n", "0.470 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "1 | \n", "
| 1876-06-01 | \n", "1876 | \n", "6 | \n", "-0.941 | \n", "0.932 | \n", "-0.379 | \n", "0.464 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "1 | \n", "