{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "execution": { "iopub.execute_input": "2020-10-02T09:48:44.751616Z", "iopub.status.busy": "2020-10-02T09:48:44.750767Z", "iopub.status.idle": "2020-10-02T09:48:45.484638Z", "shell.execute_reply": "2020-10-02T09:48:45.485707Z" } }, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import os\n", "from glob import glob" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "execution": { "iopub.execute_input": "2020-10-02T09:48:45.492547Z", "iopub.status.busy": "2020-10-02T09:48:45.491430Z", "iopub.status.idle": "2020-10-02T09:48:45.500122Z", "shell.execute_reply": "2020-10-02T09:48:45.501290Z" } }, "outputs": [], "source": [ "df_activity = pd.read_csv(\"activities.csv\")" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "execution": { "iopub.execute_input": "2020-10-02T09:48:45.512575Z", "iopub.status.busy": "2020-10-02T09:48:45.511616Z", "iopub.status.idle": "2020-10-02T09:48:45.515239Z", "shell.execute_reply": "2020-10-02T09:48:45.514320Z" } }, "outputs": [], "source": [ "def segmentation(x_data,y,overlap_rate,time_window):\n", " \n", " seg_data = []\n", " overlap = int((1 - overlap_rate)*time_window)\n", " y_segmented_list = []\n", " \n", " for i in range(0,x_data.shape[0],overlap):\n", " seg_data.append(x_data[i:i+time_window])\n", " y_segmented_list.append(y)\n", "\n", " return seg_data,y_segmented_list" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "execution": { "iopub.execute_input": "2020-10-02T09:48:45.522060Z", "iopub.status.busy": "2020-10-02T09:48:45.521126Z", "iopub.status.idle": "2020-10-02T09:48:45.524173Z", "shell.execute_reply": "2020-10-02T09:48:45.525126Z" } }, "outputs": [], "source": [ "def handle_missing_values(df):\n", " df['x']=df['x'].replace(0, np.nan)\n", " df['y']=df['y'].replace(0, np.nan)\n", " df['z']=df['z'].replace(0, np.nan)\n", " return df" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "execution": { "iopub.execute_input": "2020-10-02T09:48:45.532846Z", "iopub.status.busy": "2020-10-02T09:48:45.531939Z", "iopub.status.idle": "2020-10-02T09:48:45.534235Z", "shell.execute_reply": "2020-10-02T09:48:45.535175Z" } }, "outputs": [], "source": [ "def load_data(csv_file):\n", "\n", " y_list = []\n", " x_data_list = []\n", "\n", " csv_df = pd.read_csv(csv_file)\n", " csv_df = handle_missing_values(csv_df)\n", " csv_df.dropna(inplace=True)\n", " x_data = csv_df.values\n", " act_id = get_act_id(int(os.path.splitext(os.path.basename(csv_files[0]))[0].replace(\"segment\",\"\")))\n", " \n", " return x_data,act_id" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "execution": { "iopub.execute_input": "2020-10-02T09:48:45.546599Z", "iopub.status.busy": "2020-10-02T09:48:45.545143Z", "iopub.status.idle": "2020-10-02T09:48:45.548479Z", "shell.execute_reply": "2020-10-02T09:48:45.549433Z" } }, "outputs": [], "source": [ "def get_act_id(seg_id):\n", " seg = df_activity[df_activity[\"segment_id\"]==seg_id]\n", " activity_id = seg[\"activity_id\"].values\n", " return int(activity_id)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "execution": { "iopub.execute_input": "2020-10-02T09:48:45.562002Z", "iopub.status.busy": "2020-10-02T09:48:45.561039Z", "iopub.status.idle": "2020-10-02T09:48:45.563915Z", "shell.execute_reply": "2020-10-02T09:48:45.565023Z" } }, "outputs": [], "source": [ "# get features (std,avg,max,min)\n", "def get_features(x_data):\n", " features = []\n", " for i in range(x_data.shape[1]):\n", " # std\n", " features.append(x_data.T[i].std(ddof=0))\n", " # avg\n", " features.append(np.average(x_data.T[i]))\n", " # max\n", " features.append(np.max(x_data.T[i]))\n", " # min\n", " features.append(np.min(x_data.T[i]))\n", " return features" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "execution": { "iopub.execute_input": "2020-10-02T09:48:45.570569Z", "iopub.status.busy": "2020-10-02T09:48:45.569631Z", "iopub.status.idle": "2020-10-02T09:48:45.574361Z", "shell.execute_reply": "2020-10-02T09:48:45.573391Z" } }, "outputs": [], "source": [ "csv_files = glob(\"train/*\")" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "execution": { "iopub.execute_input": "2020-10-02T09:48:45.580544Z", "iopub.status.busy": "2020-10-02T09:48:45.579478Z", "iopub.status.idle": "2020-10-02T09:48:45.593627Z", "shell.execute_reply": "2020-10-02T09:48:45.594741Z" } }, "outputs": [], "source": [ "x_data,y = load_data(csv_files[0])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# overroll window feature extraction" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "execution": { "iopub.execute_input": "2020-10-02T09:48:45.600937Z", "iopub.status.busy": "2020-10-02T09:48:45.599984Z", "iopub.status.idle": "2020-10-02T09:48:45.604642Z", "shell.execute_reply": "2020-10-02T09:48:45.607995Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[1.6395426934016482, -0.8511891891891893, 3.8689999999999998, -3.486, 2.1418133169244293, -8.786306306306306, 1.455, -10.495999999999999, 2.591852250101581, 1.1703063063063066, 10.454, 0.001]\n" ] } ], "source": [ "x_feature = get_features(x_data)\n", "print(x_feature)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# feature extraction after segmentetion " ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "execution": { "iopub.execute_input": "2020-10-02T09:48:45.614554Z", "iopub.status.busy": "2020-10-02T09:48:45.613642Z", "iopub.status.idle": "2020-10-02T09:48:45.616030Z", "shell.execute_reply": "2020-10-02T09:48:45.616951Z" } }, "outputs": [], "source": [ "seg_data_list,y_seg_list = segmentation(x_data,y,overlap_rate=0.5,time_window=10)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "execution": { "iopub.execute_input": "2020-10-02T09:48:45.631287Z", "iopub.status.busy": "2020-10-02T09:48:45.630308Z", "iopub.status.idle": "2020-10-02T09:48:45.632579Z", "shell.execute_reply": "2020-10-02T09:48:45.633468Z" } }, "outputs": [], "source": [ "x_feature_list = []\n", "for seg_data in seg_data_list:\n", " x_feature_list.append(get_features(seg_data))" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "execution": { "iopub.execute_input": "2020-10-02T09:48:45.639244Z", "iopub.status.busy": "2020-10-02T09:48:45.638347Z", "iopub.status.idle": "2020-10-02T09:48:45.642535Z", "shell.execute_reply": "2020-10-02T09:48:45.643455Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "([1.0754768291320833, -0.7467, 1.608, -2.145, 4.320982667171902, -5.6117, 1.034, -9.652999999999999, 3.7309114489625728, 5.1486, 9.747, 0.067],)\n", "([0.7604673563013734, 0.13799999999999996, 1.608, -0.8420000000000001, 4.423569866069711, -3.8268, 1.455, -9.73, 3.9595239057240206, 6.916200000000001, 10.454, 0.012],)\n", "([1.110200653035297, -0.6511, 0.7659999999999999, -1.992, 3.7452361153871196, -7.3012, 1.455, -9.883, 4.557675872635087, 3.0268, 10.454, 0.012],)\n", "([0.6160402908901332, -1.6085999999999998, 0.191, -1.992, 0.17869306086135478, -9.492299999999998, -9.308, -9.883, 0.0305785872793365, 0.053500000000000006, 0.106, 0.016],)\n", "([0.2286225929342942, -1.6659, -1.264, -1.915, 0.15927385221686574, -9.5728, -9.385, -9.921, 0.028662867965365924, 0.028800000000000003, 0.098, 0.001],)\n", "([0.6184760706769503, -1.2675, -0.38299999999999995, -2.1830000000000003, 0.4105244937881298, -9.362200000000001, -8.389, -9.921, 1.1968831229489367, 0.8036999999999999, 3.03, 0.001],)\n", "([0.7281239523597615, -0.46709999999999996, 0.42100000000000004, -2.1830000000000003, 0.30090930527320026, -9.1207, -8.389, -9.462, 1.0875120964844482, 1.0791999999999997, 3.03, 0.02],)\n", "([0.9730192238594262, -0.5132999999999999, 0.42100000000000004, -2.068, 0.22940708358723372, -9.285299999999998, -8.772, -9.615, 0.41008712488933374, 0.37650000000000006, 1.449, 0.004],)\n", "([0.9195672949817213, -1.3483, 0.306, -2.26, 0.1831383083901345, -9.4846, -9.155, -9.652999999999999, 0.17269800230460106, 0.10699999999999998, 0.451, 0.004],)\n", "([1.855044053385256, -0.19159999999999994, 3.0260000000000002, -2.26, 0.18241556951093857, -9.5614, -9.155, -9.807, 0.457729669127969, 0.26449999999999996, 1.178, 0.001],)\n", "([1.215110052628979, 1.3826, 3.0260000000000002, -0.536, 0.23438619839913796, -9.5001, -9.079, -9.807, 0.4431890003147641, 0.2981, 1.178, 0.001],)\n", "([2.1158356859642953, -0.4865000000000002, 2.26, -3.2560000000000002, 0.2534835103118149, -9.3659, -8.964, -9.73, 0.055635330501399925, 0.0741, 0.17800000000000002, 0.013999999999999999],)\n", "([0.6118592975513243, -2.363, -1.455, -3.2560000000000002, 0.19962938160501312, -9.3351, -8.964, -9.538, 0.07103442827249334, 0.1091, 0.23, 0.013999999999999999],)\n", "([0.4525318662812598, -1.9761, -1.5319999999999998, -2.719, 0.07357880129493838, -9.3886, -9.27, -9.538, 0.05116453850080152, 0.1387, 0.23, 0.076],)\n", "([0.8450581281781746, -1.8805999999999998, -0.804, -3.3710000000000004, 0.2724400851563514, -9.339000000000002, -8.887, -9.73, 0.08172325250502453, 0.13909999999999997, 0.284, 0.040999999999999995],)\n", "([1.9111887635709877, -0.6551000000000001, 1.685, -3.3710000000000004, 0.34069393889530813, -9.369800000000001, -8.887, -9.73, 0.12430000000000001, 0.12589999999999998, 0.35100000000000003, 0.02],)\n", "([1.9292759289432915, -0.8467000000000002, 1.685, -3.486, 0.4580648971488649, -9.128499999999999, -8.504, -9.692, 0.15378897879887232, 0.21350000000000002, 0.366, 0.02],)\n", "([2.3188074283993485, -1.0419, 3.2560000000000002, -3.486, 0.44529118563025727, -9.1094, -8.504, -9.807, 0.1412453185064907, 0.21459999999999996, 0.366, 0.002],)\n", "([2.1493577552375966, -0.7888, 3.2560000000000002, -2.528, 0.454636162222056, -9.5154, -8.772, -10.495999999999999, 0.07902689415635666, 0.06849999999999999, 0.287, 0.002],)\n", "([1.2888015401915067, -1.5472999999999997, 0.8420000000000001, -2.949, 0.4826668001841433, -9.6954, -8.772, -10.495999999999999, 0.027690431560378394, 0.0348, 0.08199999999999999, 0.001],)\n", "([2.227787826522086, -0.018999999999999885, 3.8689999999999998, -2.949, 1.4894286857718297, -8.749300000000002, -5.669, -10.228, 3.1023649946452143, 2.5238000000000005, 7.519, 0.001],)\n", "([2.0306569270941743, 0.8303333333333335, 3.8689999999999998, -1.494, 1.4123691286471665, -7.897666666666666, -5.669, -9.577, 2.5589320037859546, 4.597, 7.519, 0.001],)\n", "([0.0, -0.536, -0.536, -0.536, 0.0, -8.696, -8.696, -8.696, 0.0, 2.479, 2.479, 2.479],)\n" ] } ], "source": [ "for x_feature in zip(x_feature_list):\n", " print(x_feature)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.2" } }, "nbformat": 4, "nbformat_minor": 2 }