{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Stumpy Tutorial Dataset Backups\n",
    "\n",
    "This notebook copies the download process in active tutorials as part of the Stumpy docs. Then exports CSVs to a local directory.\n",
    "\n",
    "The CSVs are subsequently uploaded to the Stumpy community on [Zenodo](https://zenodo.org/communities/stumpy/?page=1&size=20)."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "import urllib\n",
    "import ssl\n",
    "import io\n",
    "import os\n",
    "from zipfile import ZipFile\n",
    "from urllib.request import urlopen\n",
    "\n",
    "from scipy.io import loadmat\n",
    "\n",
    "context = ssl.SSLContext()  # Ignore SSL certificate verification for simplicity"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Steamgen "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "colnames = ['drum pressure',\n",
    "            'excess oxygen',\n",
    "            'water level',\n",
    "            'steam flow'\n",
    "           ]\n",
    "\n",
    "\n",
    "url = 'https://www.cs.ucr.edu/~eamonn/iSAX/steamgen.dat'\n",
    "raw_bytes = urllib.request.urlopen(url, context=context).read()\n",
    "data = io.BytesIO(raw_bytes)\n",
    "steam_df = pd.read_csv(data, header=None, sep=\"\\\\s+\")\n",
    "steam_df.columns = colnames\n",
    "\n",
    "steam_df.to_csv('STUMPY_Basics_steamgen.csv', index=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Taxi"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Ref - https://github.com/stanford-futuredata/ASAP\n",
    "taxi_df = pd.read_csv(\"https://raw.githubusercontent.com/stanford-futuredata/ASAP/master/Taxi.csv\", sep=',')\n",
    "\n",
    "taxi_df.to_csv('STUMPY_Basics_Taxi.csv', index=False)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Kohls"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "url = 'https://sites.google.com/site/timeserieschain/home/Kohls_data.mat?attredirects=0&revision=1'\n",
    "raw_bytes = urllib.request.urlopen(url, context=context).read()\n",
    "data = io.BytesIO(raw_bytes)\n",
    "mat = loadmat(data)\n",
    "mdata = mat['VarName1']\n",
    "mdtype = mdata.dtype\n",
    "\n",
    "df = pd.DataFrame(mdata, dtype=mdtype, columns=['volume'])\n",
    "\n",
    "df.to_csv('Time_Series_Chains_Kohls_data.csv', index=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## TiltABP"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "url = 'https://sites.google.com/site/timeserieschain/home/TiltABP_210_25000.txt'\n",
    "raw_bytes = urllib.request.urlopen(url, context=context).read()\n",
    "data = io.BytesIO(raw_bytes)\n",
    "df = pd.read_csv(data, header=None)\n",
    "df = df.reset_index().rename({'index': 'time', 0: 'abp'}, axis='columns')\n",
    "\n",
    "df.to_csv('Semantic_Segmentation_TiltABP.csv', index=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Robot Dog"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "T_url = 'https://www.cs.unm.edu/~mueen/robot_dog.txt'\n",
    "T_raw_bytes = urllib.request.urlopen(T_url, context=context).read()\n",
    "T_data = io.BytesIO(T_raw_bytes)\n",
    "T_df = pd.read_csv(T_data, header=None, sep='\\s+', names=['Acceleration'])\n",
    "\n",
    "T_df.to_csv('Fast_Pattern_Searching_robot_dog.csv', index=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Carpet query"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "Q_url = 'https://www.cs.unm.edu/~mueen/carpet_query.txt'\n",
    "Q_raw_bytes = urllib.request.urlopen(Q_url, context=context).read()\n",
    "Q_data = io.BytesIO(Q_raw_bytes)\n",
    "Q_df = pd.read_csv(Q_data, header=None, sep='\\s+', names=['Acceleration'])\n",
    "\n",
    "Q_df.to_csv('carpet_query.csv', index=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Gun Point Training Data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "fzip = ZipFile(io.BytesIO(urlopen(\"http://alumni.cs.ucr.edu/~lexiangy/Shapelet/gun.zip\").read()))\n",
    "# training set\n",
    "train = fzip.extract(\"gun_train\")\n",
    "train_df = pd.read_csv(train, sep=\"\\\\s+\", header=None)\n",
    "os.remove(train)\n",
    "\n",
    "train_df.to_csv(\"gun_point_train_data.csv\", index=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Gun Point Test Data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "fzip = ZipFile(io.BytesIO(urlopen(\"http://alumni.cs.ucr.edu/~lexiangy/Shapelet/gun.zip\").read()))\n",
    "test = fzip.extract(\"gun_test\")\n",
    "test_df = pd.read_csv(test, sep=\"\\\\s+\", header=None)\n",
    "os.remove(test)\n",
    "\n",
    "test_df.to_csv(\"gun_point_test_data.csv\", index=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Vanilla Ice, Queen, and David Bowie Data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "fzip = ZipFile(io.BytesIO(urlopen(\"https://www.dropbox.com/s/ybzkw5v6h46bv22/figure9_10.zip?dl=1&sa=D&sntz=1&usg=AFQjCNEDp3G8OKGC-Zj5yucpSSCz7WRpRg\").read()))\n",
    "mat = fzip.extract(\"figure9_10/data.mat\")\n",
    "\n",
    "data = loadmat(mat)\n",
    "\n",
    "queen_df = pd.DataFrame(data['mfcc_queen'][0], columns=['under_pressure'])\n",
    "vanilla_ice_df = pd.DataFrame(data['mfcc_vanilla_ice'][0], columns=['ice_ice_baby'])\n",
    "\n",
    "queen_df.to_csv(\"queen.csv\", index=False)\n",
    "vanilla_ice_df.to_csv(\"vanilla_ice.csv\", index=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Mitochondrial DNA (mtDNA) Data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "T_url = 'https://sites.google.com/site/consensusmotifs/dna.zip?attredirects=0&d=1'\n",
    "T_raw_bytes = urllib.request.urlopen(T_url, context=context).read()\n",
    "T_data = io.BytesIO(T_raw_bytes)\n",
    "T_zipfile = ZipFile(T_data)\n",
    "animals = ['python', 'hippo', 'red_flying_fox', 'alpaca']\n",
    "\n",
    "for animal in animals:\n",
    "    with T_zipfile.open(f'dna/data/{animal}.mat') as f:\n",
    "        data = loadmat(f)['ts'].flatten().astype(float)\n",
    "        df = pd.DataFrame(data)\n",
    "        df.to_csv(f\"{animal}.csv\", index=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Multi-dimensional Toy Data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "url = \"https://github.com/mcyeh/mstamp/blob/master/Python/toy_data.mat?raw=true\"\n",
    "raw_bytes = urllib.request.urlopen(url, context=context).read()\n",
    "data = io.BytesIO(raw_bytes)\n",
    "mat = loadmat(data)\n",
    "mdata = mat['data']\n",
    "mdtype = mdata.dtype\n",
    "\n",
    "df = pd.DataFrame(mdata, dtype=mdtype, columns=['T3', 'T2', 'T1'])\n",
    "df = df[['T1', 'T2', 'T3']]\n",
    "df.to_csv(\"toy.csv\", index=False)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}