{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Stumpy Tutorial Dataset Backups\n", "\n", "This notebook copies the download process in active tutorials as part of the Stumpy docs. Then exports CSVs to a local directory.\n", "\n", "The CSVs are subsequently uploaded to the Stumpy community on [Zenodo](https://zenodo.org/communities/stumpy/?page=1&size=20)." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "\n", "import urllib\n", "import ssl\n", "import io\n", "import os\n", "from zipfile import ZipFile\n", "from urllib.request import urlopen\n", "\n", "from scipy.io import loadmat\n", "\n", "context = ssl.SSLContext() # Ignore SSL certificate verification for simplicity" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Steamgen " ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "colnames = ['drum pressure',\n", " 'excess oxygen',\n", " 'water level',\n", " 'steam flow'\n", " ]\n", "\n", "\n", "url = 'https://www.cs.ucr.edu/~eamonn/iSAX/steamgen.dat'\n", "raw_bytes = urllib.request.urlopen(url, context=context).read()\n", "data = io.BytesIO(raw_bytes)\n", "steam_df = pd.read_csv(data, header=None, sep=\"\\\\s+\")\n", "steam_df.columns = colnames\n", "\n", "steam_df.to_csv('STUMPY_Basics_steamgen.csv', index=False)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Taxi" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "# Ref - https://github.com/stanford-futuredata/ASAP\n", "taxi_df = pd.read_csv(\"https://raw.githubusercontent.com/stanford-futuredata/ASAP/master/Taxi.csv\", sep=',')\n", "\n", "taxi_df.to_csv('STUMPY_Basics_Taxi.csv', index=False)\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Kohls" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "url = 'https://sites.google.com/site/timeserieschain/home/Kohls_data.mat?attredirects=0&revision=1'\n", "raw_bytes = urllib.request.urlopen(url, context=context).read()\n", "data = io.BytesIO(raw_bytes)\n", "mat = loadmat(data)\n", "mdata = mat['VarName1']\n", "mdtype = mdata.dtype\n", "\n", "df = pd.DataFrame(mdata, dtype=mdtype, columns=['volume'])\n", "\n", "df.to_csv('Time_Series_Chains_Kohls_data.csv', index=False)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## TiltABP" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "url = 'https://sites.google.com/site/timeserieschain/home/TiltABP_210_25000.txt'\n", "raw_bytes = urllib.request.urlopen(url, context=context).read()\n", "data = io.BytesIO(raw_bytes)\n", "df = pd.read_csv(data, header=None)\n", "df = df.reset_index().rename({'index': 'time', 0: 'abp'}, axis='columns')\n", "\n", "df.to_csv('Semantic_Segmentation_TiltABP.csv', index=False)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Robot Dog" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "T_url = 'https://www.cs.unm.edu/~mueen/robot_dog.txt'\n", "T_raw_bytes = urllib.request.urlopen(T_url, context=context).read()\n", "T_data = io.BytesIO(T_raw_bytes)\n", "T_df = pd.read_csv(T_data, header=None, sep='\\s+', names=['Acceleration'])\n", "\n", "T_df.to_csv('Fast_Pattern_Searching_robot_dog.csv', index=False)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Carpet query" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "Q_url = 'https://www.cs.unm.edu/~mueen/carpet_query.txt'\n", "Q_raw_bytes = urllib.request.urlopen(Q_url, context=context).read()\n", "Q_data = io.BytesIO(Q_raw_bytes)\n", "Q_df = pd.read_csv(Q_data, header=None, sep='\\s+', names=['Acceleration'])\n", "\n", "Q_df.to_csv('carpet_query.csv', index=False)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Gun Point Training Data" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "fzip = ZipFile(io.BytesIO(urlopen(\"http://alumni.cs.ucr.edu/~lexiangy/Shapelet/gun.zip\").read()))\n", "# training set\n", "train = fzip.extract(\"gun_train\")\n", "train_df = pd.read_csv(train, sep=\"\\\\s+\", header=None)\n", "os.remove(train)\n", "\n", "train_df.to_csv(\"gun_point_train_data.csv\", index=False)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Gun Point Test Data" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "fzip = ZipFile(io.BytesIO(urlopen(\"http://alumni.cs.ucr.edu/~lexiangy/Shapelet/gun.zip\").read()))\n", "test = fzip.extract(\"gun_test\")\n", "test_df = pd.read_csv(test, sep=\"\\\\s+\", header=None)\n", "os.remove(test)\n", "\n", "test_df.to_csv(\"gun_point_test_data.csv\", index=False)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Vanilla Ice, Queen, and David Bowie Data" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "fzip = ZipFile(io.BytesIO(urlopen(\"https://www.dropbox.com/s/ybzkw5v6h46bv22/figure9_10.zip?dl=1&sa=D&sntz=1&usg=AFQjCNEDp3G8OKGC-Zj5yucpSSCz7WRpRg\").read()))\n", "mat = fzip.extract(\"figure9_10/data.mat\")\n", "\n", "data = loadmat(mat)\n", "\n", "queen_df = pd.DataFrame(data['mfcc_queen'][0], columns=['under_pressure'])\n", "vanilla_ice_df = pd.DataFrame(data['mfcc_vanilla_ice'][0], columns=['ice_ice_baby'])\n", "\n", "queen_df.to_csv(\"queen.csv\", index=False)\n", "vanilla_ice_df.to_csv(\"vanilla_ice.csv\", index=False)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Mitochondrial DNA (mtDNA) Data" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "T_url = 'https://sites.google.com/site/consensusmotifs/dna.zip?attredirects=0&d=1'\n", "T_raw_bytes = urllib.request.urlopen(T_url, context=context).read()\n", "T_data = io.BytesIO(T_raw_bytes)\n", "T_zipfile = ZipFile(T_data)\n", "animals = ['python', 'hippo', 'red_flying_fox', 'alpaca']\n", "\n", "for animal in animals:\n", " with T_zipfile.open(f'dna/data/{animal}.mat') as f:\n", " data = loadmat(f)['ts'].flatten().astype(float)\n", " df = pd.DataFrame(data)\n", " df.to_csv(f\"{animal}.csv\", index=False)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Multi-dimensional Toy Data" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "url = \"https://github.com/mcyeh/mstamp/blob/master/Python/toy_data.mat?raw=true\"\n", "raw_bytes = urllib.request.urlopen(url, context=context).read()\n", "data = io.BytesIO(raw_bytes)\n", "mat = loadmat(data)\n", "mdata = mat['data']\n", "mdtype = mdata.dtype\n", "\n", "df = pd.DataFrame(mdata, dtype=mdtype, columns=['T3', 'T2', 'T1'])\n", "df = df[['T1', 'T2', 'T3']]\n", "df.to_csv(\"toy.csv\", index=False)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.9" } }, "nbformat": 4, "nbformat_minor": 4 }