{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2020-10-02T09:48:44.751616Z",
     "iopub.status.busy": "2020-10-02T09:48:44.750767Z",
     "iopub.status.idle": "2020-10-02T09:48:45.484638Z",
     "shell.execute_reply": "2020-10-02T09:48:45.485707Z"
    }
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import os\n",
    "from glob import glob"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2020-10-02T09:48:45.492547Z",
     "iopub.status.busy": "2020-10-02T09:48:45.491430Z",
     "iopub.status.idle": "2020-10-02T09:48:45.500122Z",
     "shell.execute_reply": "2020-10-02T09:48:45.501290Z"
    }
   },
   "outputs": [],
   "source": [
    "df_activity = pd.read_csv(\"activities.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2020-10-02T09:48:45.512575Z",
     "iopub.status.busy": "2020-10-02T09:48:45.511616Z",
     "iopub.status.idle": "2020-10-02T09:48:45.515239Z",
     "shell.execute_reply": "2020-10-02T09:48:45.514320Z"
    }
   },
   "outputs": [],
   "source": [
    "def segmentation(x_data,y,overlap_rate,time_window):\n",
    "    \n",
    "    seg_data = []\n",
    "    overlap = int((1 - overlap_rate)*time_window)\n",
    "    y_segmented_list = []\n",
    "    \n",
    "    for i in range(0,x_data.shape[0],overlap):\n",
    "        seg_data.append(x_data[i:i+time_window])\n",
    "        y_segmented_list.append(y)\n",
    "\n",
    "    return seg_data,y_segmented_list"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2020-10-02T09:48:45.522060Z",
     "iopub.status.busy": "2020-10-02T09:48:45.521126Z",
     "iopub.status.idle": "2020-10-02T09:48:45.524173Z",
     "shell.execute_reply": "2020-10-02T09:48:45.525126Z"
    }
   },
   "outputs": [],
   "source": [
    "def handle_missing_values(df):\n",
    "    df['x']=df['x'].replace(0, np.nan)\n",
    "    df['y']=df['y'].replace(0, np.nan)\n",
    "    df['z']=df['z'].replace(0, np.nan)\n",
    "    return df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2020-10-02T09:48:45.532846Z",
     "iopub.status.busy": "2020-10-02T09:48:45.531939Z",
     "iopub.status.idle": "2020-10-02T09:48:45.534235Z",
     "shell.execute_reply": "2020-10-02T09:48:45.535175Z"
    }
   },
   "outputs": [],
   "source": [
    "def load_data(csv_file):\n",
    "\n",
    "    y_list = []\n",
    "    x_data_list = []\n",
    "\n",
    "    csv_df = pd.read_csv(csv_file)\n",
    "    csv_df = handle_missing_values(csv_df)\n",
    "    csv_df.dropna(inplace=True)\n",
    "    x_data = csv_df.values\n",
    "    act_id = get_act_id(int(os.path.splitext(os.path.basename(csv_files[0]))[0].replace(\"segment\",\"\")))\n",
    "    \n",
    "    return x_data,act_id"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2020-10-02T09:48:45.546599Z",
     "iopub.status.busy": "2020-10-02T09:48:45.545143Z",
     "iopub.status.idle": "2020-10-02T09:48:45.548479Z",
     "shell.execute_reply": "2020-10-02T09:48:45.549433Z"
    }
   },
   "outputs": [],
   "source": [
    "def get_act_id(seg_id):\n",
    "    seg = df_activity[df_activity[\"segment_id\"]==seg_id]\n",
    "    activity_id = seg[\"activity_id\"].values\n",
    "    return int(activity_id)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2020-10-02T09:48:45.562002Z",
     "iopub.status.busy": "2020-10-02T09:48:45.561039Z",
     "iopub.status.idle": "2020-10-02T09:48:45.563915Z",
     "shell.execute_reply": "2020-10-02T09:48:45.565023Z"
    }
   },
   "outputs": [],
   "source": [
    "# get features (std,avg,max,min)\n",
    "def get_features(x_data):\n",
    "    features = []\n",
    "    for i in range(x_data.shape[1]):\n",
    "        # std\n",
    "        features.append(x_data.T[i].std(ddof=0))\n",
    "        # avg\n",
    "        features.append(np.average(x_data.T[i]))\n",
    "        # max\n",
    "        features.append(np.max(x_data.T[i]))\n",
    "        # min\n",
    "        features.append(np.min(x_data.T[i]))\n",
    "    return features"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2020-10-02T09:48:45.570569Z",
     "iopub.status.busy": "2020-10-02T09:48:45.569631Z",
     "iopub.status.idle": "2020-10-02T09:48:45.574361Z",
     "shell.execute_reply": "2020-10-02T09:48:45.573391Z"
    }
   },
   "outputs": [],
   "source": [
    "csv_files = glob(\"train/*\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2020-10-02T09:48:45.580544Z",
     "iopub.status.busy": "2020-10-02T09:48:45.579478Z",
     "iopub.status.idle": "2020-10-02T09:48:45.593627Z",
     "shell.execute_reply": "2020-10-02T09:48:45.594741Z"
    }
   },
   "outputs": [],
   "source": [
    "x_data,y = load_data(csv_files[0])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# overroll window feature extraction"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2020-10-02T09:48:45.600937Z",
     "iopub.status.busy": "2020-10-02T09:48:45.599984Z",
     "iopub.status.idle": "2020-10-02T09:48:45.604642Z",
     "shell.execute_reply": "2020-10-02T09:48:45.607995Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[1.6395426934016482, -0.8511891891891893, 3.8689999999999998, -3.486, 2.1418133169244293, -8.786306306306306, 1.455, -10.495999999999999, 2.591852250101581, 1.1703063063063066, 10.454, 0.001]\n"
     ]
    }
   ],
   "source": [
    "x_feature = get_features(x_data)\n",
    "print(x_feature)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# feature extraction after segmentetion "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2020-10-02T09:48:45.614554Z",
     "iopub.status.busy": "2020-10-02T09:48:45.613642Z",
     "iopub.status.idle": "2020-10-02T09:48:45.616030Z",
     "shell.execute_reply": "2020-10-02T09:48:45.616951Z"
    }
   },
   "outputs": [],
   "source": [
    "seg_data_list,y_seg_list = segmentation(x_data,y,overlap_rate=0.5,time_window=10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2020-10-02T09:48:45.631287Z",
     "iopub.status.busy": "2020-10-02T09:48:45.630308Z",
     "iopub.status.idle": "2020-10-02T09:48:45.632579Z",
     "shell.execute_reply": "2020-10-02T09:48:45.633468Z"
    }
   },
   "outputs": [],
   "source": [
    "x_feature_list = []\n",
    "for seg_data in seg_data_list:\n",
    "    x_feature_list.append(get_features(seg_data))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2020-10-02T09:48:45.639244Z",
     "iopub.status.busy": "2020-10-02T09:48:45.638347Z",
     "iopub.status.idle": "2020-10-02T09:48:45.642535Z",
     "shell.execute_reply": "2020-10-02T09:48:45.643455Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "([1.0754768291320833, -0.7467, 1.608, -2.145, 4.320982667171902, -5.6117, 1.034, -9.652999999999999, 3.7309114489625728, 5.1486, 9.747, 0.067],)\n",
      "([0.7604673563013734, 0.13799999999999996, 1.608, -0.8420000000000001, 4.423569866069711, -3.8268, 1.455, -9.73, 3.9595239057240206, 6.916200000000001, 10.454, 0.012],)\n",
      "([1.110200653035297, -0.6511, 0.7659999999999999, -1.992, 3.7452361153871196, -7.3012, 1.455, -9.883, 4.557675872635087, 3.0268, 10.454, 0.012],)\n",
      "([0.6160402908901332, -1.6085999999999998, 0.191, -1.992, 0.17869306086135478, -9.492299999999998, -9.308, -9.883, 0.0305785872793365, 0.053500000000000006, 0.106, 0.016],)\n",
      "([0.2286225929342942, -1.6659, -1.264, -1.915, 0.15927385221686574, -9.5728, -9.385, -9.921, 0.028662867965365924, 0.028800000000000003, 0.098, 0.001],)\n",
      "([0.6184760706769503, -1.2675, -0.38299999999999995, -2.1830000000000003, 0.4105244937881298, -9.362200000000001, -8.389, -9.921, 1.1968831229489367, 0.8036999999999999, 3.03, 0.001],)\n",
      "([0.7281239523597615, -0.46709999999999996, 0.42100000000000004, -2.1830000000000003, 0.30090930527320026, -9.1207, -8.389, -9.462, 1.0875120964844482, 1.0791999999999997, 3.03, 0.02],)\n",
      "([0.9730192238594262, -0.5132999999999999, 0.42100000000000004, -2.068, 0.22940708358723372, -9.285299999999998, -8.772, -9.615, 0.41008712488933374, 0.37650000000000006, 1.449, 0.004],)\n",
      "([0.9195672949817213, -1.3483, 0.306, -2.26, 0.1831383083901345, -9.4846, -9.155, -9.652999999999999, 0.17269800230460106, 0.10699999999999998, 0.451, 0.004],)\n",
      "([1.855044053385256, -0.19159999999999994, 3.0260000000000002, -2.26, 0.18241556951093857, -9.5614, -9.155, -9.807, 0.457729669127969, 0.26449999999999996, 1.178, 0.001],)\n",
      "([1.215110052628979, 1.3826, 3.0260000000000002, -0.536, 0.23438619839913796, -9.5001, -9.079, -9.807, 0.4431890003147641, 0.2981, 1.178, 0.001],)\n",
      "([2.1158356859642953, -0.4865000000000002, 2.26, -3.2560000000000002, 0.2534835103118149, -9.3659, -8.964, -9.73, 0.055635330501399925, 0.0741, 0.17800000000000002, 0.013999999999999999],)\n",
      "([0.6118592975513243, -2.363, -1.455, -3.2560000000000002, 0.19962938160501312, -9.3351, -8.964, -9.538, 0.07103442827249334, 0.1091, 0.23, 0.013999999999999999],)\n",
      "([0.4525318662812598, -1.9761, -1.5319999999999998, -2.719, 0.07357880129493838, -9.3886, -9.27, -9.538, 0.05116453850080152, 0.1387, 0.23, 0.076],)\n",
      "([0.8450581281781746, -1.8805999999999998, -0.804, -3.3710000000000004, 0.2724400851563514, -9.339000000000002, -8.887, -9.73, 0.08172325250502453, 0.13909999999999997, 0.284, 0.040999999999999995],)\n",
      "([1.9111887635709877, -0.6551000000000001, 1.685, -3.3710000000000004, 0.34069393889530813, -9.369800000000001, -8.887, -9.73, 0.12430000000000001, 0.12589999999999998, 0.35100000000000003, 0.02],)\n",
      "([1.9292759289432915, -0.8467000000000002, 1.685, -3.486, 0.4580648971488649, -9.128499999999999, -8.504, -9.692, 0.15378897879887232, 0.21350000000000002, 0.366, 0.02],)\n",
      "([2.3188074283993485, -1.0419, 3.2560000000000002, -3.486, 0.44529118563025727, -9.1094, -8.504, -9.807, 0.1412453185064907, 0.21459999999999996, 0.366, 0.002],)\n",
      "([2.1493577552375966, -0.7888, 3.2560000000000002, -2.528, 0.454636162222056, -9.5154, -8.772, -10.495999999999999, 0.07902689415635666, 0.06849999999999999, 0.287, 0.002],)\n",
      "([1.2888015401915067, -1.5472999999999997, 0.8420000000000001, -2.949, 0.4826668001841433, -9.6954, -8.772, -10.495999999999999, 0.027690431560378394, 0.0348, 0.08199999999999999, 0.001],)\n",
      "([2.227787826522086, -0.018999999999999885, 3.8689999999999998, -2.949, 1.4894286857718297, -8.749300000000002, -5.669, -10.228, 3.1023649946452143, 2.5238000000000005, 7.519, 0.001],)\n",
      "([2.0306569270941743, 0.8303333333333335, 3.8689999999999998, -1.494, 1.4123691286471665, -7.897666666666666, -5.669, -9.577, 2.5589320037859546, 4.597, 7.519, 0.001],)\n",
      "([0.0, -0.536, -0.536, -0.536, 0.0, -8.696, -8.696, -8.696, 0.0, 2.479, 2.479, 2.479],)\n"
     ]
    }
   ],
   "source": [
    "for x_feature in zip(x_feature_list):\n",
    "    print(x_feature)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}