{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Time Series Features with tsfresh Tutorial" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "This notebook explains how to create time series features with `tsfresh`.\n", "\n", "This notebook will use the [Beijing Multi-Site Air-Quality Data](https://archive.ics.uci.edu/ml/datasets/Beijing+Multi-Site+Air-Quality+Data) downloaded from the **UCI Machine Learning Repository**." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Packages" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The documentation for each package used in this tutorial is linked below:\n", "* [pandas](https://pandas.pydata.org/docs/)\n", "* [tsfresh](https://tsfresh.readthedocs.io/en/latest/)\n", "* [urllib](https://docs.python.org/3/library/urllib.html)\n", "* [io](https://docs.python.org/3/library/io.html)\n", "* [zipfile](https://docs.python.org/3/library/zipfile.html)" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import tsfresh\n", "from urllib.request import urlopen\n", "from io import BytesIO\n", "from zipfile import ZipFile" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Create initial dataset" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The zipfile is downloaded from **UCI Machine Learning Repository** using `urllib` and unzipped with `zipfile`. This zipfile contains one csv for each reporting station. Read each of these csv files and append to the pandas dataframe." ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
| \n", " | year | \n", "month | \n", "day | \n", "hour | \n", "PM2.5 | \n", "PM10 | \n", "SO2 | \n", "NO2 | \n", "CO | \n", "O3 | \n", "TEMP | \n", "PRES | \n", "DEWP | \n", "RAIN | \n", "wd | \n", "WSPM | \n", "station | \n", "timestamp | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "2013 | \n", "3 | \n", "1 | \n", "0 | \n", "4.0 | \n", "4.0 | \n", "4.0 | \n", "7.0 | \n", "300.0 | \n", "77.0 | \n", "-0.7 | \n", "1023.0 | \n", "-18.8 | \n", "0.0 | \n", "NNW | \n", "4.4 | \n", "Aotizhongxin | \n", "2013-03-01 | \n", "
| 0 | \n", "2013 | \n", "3 | \n", "1 | \n", "0 | \n", "3.0 | \n", "6.0 | \n", "13.0 | \n", "7.0 | \n", "300.0 | \n", "85.0 | \n", "-2.3 | \n", "1020.8 | \n", "-19.7 | \n", "0.0 | \n", "E | \n", "0.5 | \n", "Changping | \n", "2013-03-01 | \n", "
| 0 | \n", "2013 | \n", "3 | \n", "1 | \n", "0 | \n", "4.0 | \n", "4.0 | \n", "3.0 | \n", "NaN | \n", "200.0 | \n", "82.0 | \n", "-2.3 | \n", "1020.8 | \n", "-19.7 | \n", "0.0 | \n", "E | \n", "0.5 | \n", "Dingling | \n", "2013-03-01 | \n", "
| 0 | \n", "2013 | \n", "3 | \n", "1 | \n", "0 | \n", "9.0 | \n", "9.0 | \n", "3.0 | \n", "17.0 | \n", "300.0 | \n", "89.0 | \n", "-0.5 | \n", "1024.5 | \n", "-21.4 | \n", "0.0 | \n", "NNW | \n", "5.7 | \n", "Dongsi | \n", "2013-03-01 | \n", "
| 0 | \n", "2013 | \n", "3 | \n", "1 | \n", "0 | \n", "4.0 | \n", "4.0 | \n", "14.0 | \n", "20.0 | \n", "300.0 | \n", "69.0 | \n", "-0.7 | \n", "1023.0 | \n", "-18.8 | \n", "0.0 | \n", "NNW | \n", "4.4 | \n", "Guanyuan | \n", "2013-03-01 | \n", "
| 0 | \n", "2013 | \n", "3 | \n", "1 | \n", "0 | \n", "6.0 | \n", "18.0 | \n", "5.0 | \n", "NaN | \n", "800.0 | \n", "88.0 | \n", "0.1 | \n", "1021.1 | \n", "-18.6 | \n", "0.0 | \n", "NW | \n", "4.4 | \n", "Gucheng | \n", "2013-03-01 | \n", "
| 0 | \n", "2013 | \n", "3 | \n", "1 | \n", "0 | \n", "7.0 | \n", "7.0 | \n", "3.0 | \n", "2.0 | \n", "100.0 | \n", "91.0 | \n", "-2.3 | \n", "1020.3 | \n", "-20.7 | \n", "0.0 | \n", "WNW | \n", "3.1 | \n", "Huairou | \n", "2013-03-01 | \n", "
| 0 | \n", "2013 | \n", "3 | \n", "1 | \n", "0 | \n", "5.0 | \n", "14.0 | \n", "4.0 | \n", "12.0 | \n", "200.0 | \n", "85.0 | \n", "-0.5 | \n", "1024.5 | \n", "-21.4 | \n", "0.0 | \n", "NNW | \n", "5.7 | \n", "Nongzhanguan | \n", "2013-03-01 | \n", "
| 0 | \n", "2013 | \n", "3 | \n", "1 | \n", "0 | \n", "3.0 | \n", "6.0 | \n", "3.0 | \n", "8.0 | \n", "300.0 | \n", "44.0 | \n", "-0.9 | \n", "1025.8 | \n", "-20.5 | \n", "0.0 | \n", "NW | \n", "9.3 | \n", "Shunyi | \n", "2013-03-01 | \n", "
| 0 | \n", "2013 | \n", "3 | \n", "1 | \n", "0 | \n", "6.0 | \n", "6.0 | \n", "4.0 | \n", "8.0 | \n", "300.0 | \n", "81.0 | \n", "-0.5 | \n", "1024.5 | \n", "-21.4 | \n", "0.0 | \n", "NNW | \n", "5.7 | \n", "Tiantan | \n", "2013-03-01 | \n", "
| \n", " | station | \n", "timestamp | \n", "PM2.5__sum_values | \n", "PM2.5__median | \n", "PM2.5__mean | \n", "PM2.5__length | \n", "PM2.5__standard_deviation | \n", "PM2.5__variance | \n", "PM2.5__root_mean_square | \n", "PM2.5__maximum | \n", "... | \n", "RAIN__minimum | \n", "WSPM__sum_values | \n", "WSPM__median | \n", "WSPM__mean | \n", "WSPM__length | \n", "WSPM__standard_deviation | \n", "WSPM__variance | \n", "WSPM__root_mean_square | \n", "WSPM__maximum | \n", "WSPM__minimum | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "Aotizhongxin | \n", "2014-03-02 00:00:00 | \n", "2053.0 | \n", "67.0 | \n", "82.12 | \n", "25.0 | \n", "67.658153 | \n", "4577.6256 | \n", "106.401692 | \n", "210.0 | \n", "... | \n", "0.0 | \n", "51.1 | \n", "1.8 | \n", "2.044 | \n", "25.0 | \n", "0.964606 | \n", "0.930464 | \n", "2.260177 | \n", "4.3 | \n", "0.1 | \n", "
| 1 | \n", "Aotizhongxin | \n", "2014-03-02 01:00:00 | \n", "1976.0 | \n", "67.0 | \n", "79.04 | \n", "25.0 | \n", "64.108957 | \n", "4109.9584 | \n", "101.770723 | \n", "210.0 | \n", "... | \n", "0.0 | \n", "52.3 | \n", "2.0 | \n", "2.092 | \n", "25.0 | \n", "0.966197 | \n", "0.933536 | \n", "2.304344 | \n", "4.3 | \n", "0.1 | \n", "
| 2 | \n", "Aotizhongxin | \n", "2014-03-02 02:00:00 | \n", "1902.0 | \n", "67.0 | \n", "76.08 | \n", "25.0 | \n", "59.539513 | \n", "3544.9536 | \n", "96.608074 | \n", "177.0 | \n", "... | \n", "0.0 | \n", "52.6 | \n", "2.1 | \n", "2.104 | \n", "25.0 | \n", "0.964357 | \n", "0.929984 | \n", "2.314476 | \n", "4.3 | \n", "0.1 | \n", "
| 3 | \n", "Aotizhongxin | \n", "2014-03-02 03:00:00 | \n", "1852.0 | \n", "67.0 | \n", "74.08 | \n", "25.0 | \n", "56.897044 | \n", "3237.2736 | \n", "93.408351 | \n", "176.0 | \n", "... | \n", "0.0 | \n", "53.5 | \n", "2.2 | \n", "2.140 | \n", "25.0 | \n", "0.950368 | \n", "0.903200 | \n", "2.341538 | \n", "4.3 | \n", "0.1 | \n", "
| 4 | \n", "Aotizhongxin | \n", "2014-03-02 04:00:00 | \n", "1790.0 | \n", "67.0 | \n", "71.60 | \n", "25.0 | \n", "53.659668 | \n", "2879.3600 | \n", "89.475807 | \n", "175.0 | \n", "... | \n", "0.0 | \n", "54.3 | \n", "2.2 | \n", "2.172 | \n", "25.0 | \n", "0.934888 | \n", "0.874016 | \n", "2.364656 | \n", "4.3 | \n", "0.1 | \n", "
5 rows × 101 columns
\n", "