{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Import necessary dependencies and settings"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import datetime\n",
"import numpy as np\n",
"import pandas as pd\n",
"from dateutil.parser import parse\n",
"import pytz"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Load and process sample temporal data"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Time | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 2015-03-08 10:30:00.360000+00:00 | \n",
"
\n",
" \n",
" 1 | \n",
" 2017-07-13 15:45:05.755000-07:00 | \n",
"
\n",
" \n",
" 2 | \n",
" 2012-01-20 22:30:00.254000+05:30 | \n",
"
\n",
" \n",
" 3 | \n",
" 2016-12-25 00:30:00.000000+10:00 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Time\n",
"0 2015-03-08 10:30:00.360000+00:00\n",
"1 2017-07-13 15:45:05.755000-07:00\n",
"2 2012-01-20 22:30:00.254000+05:30\n",
"3 2016-12-25 00:30:00.000000+10:00"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"time_stamps = ['2015-03-08 10:30:00.360000+00:00', '2017-07-13 15:45:05.755000-07:00',\n",
" '2012-01-20 22:30:00.254000+05:30', '2016-12-25 00:30:00.000000+10:00']\n",
"df = pd.DataFrame(time_stamps, columns=['Time'])\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"array([Timestamp('2015-03-08 10:30:00.360000+0000', tz='UTC'),\n",
" Timestamp('2017-07-13 15:45:05.755000-0700', tz='pytz.FixedOffset(-420)'),\n",
" Timestamp('2012-01-20 22:30:00.254000+0530', tz='pytz.FixedOffset(330)'),\n",
" Timestamp('2016-12-25 00:30:00+1000', tz='pytz.FixedOffset(600)')], dtype=object)"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ts_objs = np.array([pd.Timestamp(item) for item in np.array(df.Time)])\n",
"df['TS_obj'] = ts_objs\n",
"ts_objs"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Date based features"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Time | \n",
" Year | \n",
" Month | \n",
" Day | \n",
" Quarter | \n",
" DayOfWeek | \n",
" DayName | \n",
" DayOfYear | \n",
" WeekOfYear | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 2015-03-08 10:30:00.360000+00:00 | \n",
" 2015 | \n",
" 3 | \n",
" 8 | \n",
" 1 | \n",
" 6 | \n",
" Sunday | \n",
" 67 | \n",
" 10 | \n",
"
\n",
" \n",
" 1 | \n",
" 2017-07-13 15:45:05.755000-07:00 | \n",
" 2017 | \n",
" 7 | \n",
" 13 | \n",
" 3 | \n",
" 3 | \n",
" Thursday | \n",
" 194 | \n",
" 28 | \n",
"
\n",
" \n",
" 2 | \n",
" 2012-01-20 22:30:00.254000+05:30 | \n",
" 2012 | \n",
" 1 | \n",
" 20 | \n",
" 1 | \n",
" 4 | \n",
" Friday | \n",
" 20 | \n",
" 3 | \n",
"
\n",
" \n",
" 3 | \n",
" 2016-12-25 00:30:00.000000+10:00 | \n",
" 2016 | \n",
" 12 | \n",
" 25 | \n",
" 4 | \n",
" 6 | \n",
" Saturday | \n",
" 360 | \n",
" 51 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Time Year Month Day Quarter DayOfWeek \\\n",
"0 2015-03-08 10:30:00.360000+00:00 2015 3 8 1 6 \n",
"1 2017-07-13 15:45:05.755000-07:00 2017 7 13 3 3 \n",
"2 2012-01-20 22:30:00.254000+05:30 2012 1 20 1 4 \n",
"3 2016-12-25 00:30:00.000000+10:00 2016 12 25 4 6 \n",
"\n",
" DayName DayOfYear WeekOfYear \n",
"0 Sunday 67 10 \n",
"1 Thursday 194 28 \n",
"2 Friday 20 3 \n",
"3 Saturday 360 51 "
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['Year'] = df['TS_obj'].apply(lambda d: d.year)\n",
"df['Month'] = df['TS_obj'].apply(lambda d: d.month)\n",
"df['Day'] = df['TS_obj'].apply(lambda d: d.day)\n",
"df['DayOfWeek'] = df['TS_obj'].apply(lambda d: d.dayofweek)\n",
"df['DayName'] = df['TS_obj'].apply(lambda d: d.weekday_name)\n",
"df['DayOfYear'] = df['TS_obj'].apply(lambda d: d.dayofyear)\n",
"df['WeekOfYear'] = df['TS_obj'].apply(lambda d: d.weekofyear)\n",
"df['Quarter'] = df['TS_obj'].apply(lambda d: d.quarter)\n",
"\n",
"df[['Time', 'Year', 'Month', 'Day', 'Quarter', \n",
" 'DayOfWeek', 'DayName', 'DayOfYear', 'WeekOfYear']]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Time based features"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Time | \n",
" Hour | \n",
" Minute | \n",
" Second | \n",
" MUsecond | \n",
" UTC_offset | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 2015-03-08 10:30:00.360000+00:00 | \n",
" 10 | \n",
" 30 | \n",
" 0 | \n",
" 360000 | \n",
" 00:00:00 | \n",
"
\n",
" \n",
" 1 | \n",
" 2017-07-13 15:45:05.755000-07:00 | \n",
" 15 | \n",
" 45 | \n",
" 5 | \n",
" 755000 | \n",
" -1 days +17:00:00 | \n",
"
\n",
" \n",
" 2 | \n",
" 2012-01-20 22:30:00.254000+05:30 | \n",
" 22 | \n",
" 30 | \n",
" 0 | \n",
" 254000 | \n",
" 05:30:00 | \n",
"
\n",
" \n",
" 3 | \n",
" 2016-12-25 00:30:00.000000+10:00 | \n",
" 0 | \n",
" 30 | \n",
" 0 | \n",
" 0 | \n",
" 10:00:00 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Time Hour Minute Second MUsecond \\\n",
"0 2015-03-08 10:30:00.360000+00:00 10 30 0 360000 \n",
"1 2017-07-13 15:45:05.755000-07:00 15 45 5 755000 \n",
"2 2012-01-20 22:30:00.254000+05:30 22 30 0 254000 \n",
"3 2016-12-25 00:30:00.000000+10:00 0 30 0 0 \n",
"\n",
" UTC_offset \n",
"0 00:00:00 \n",
"1 -1 days +17:00:00 \n",
"2 05:30:00 \n",
"3 10:00:00 "
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['Hour'] = df['TS_obj'].apply(lambda d: d.hour)\n",
"df['Minute'] = df['TS_obj'].apply(lambda d: d.minute)\n",
"df['Second'] = df['TS_obj'].apply(lambda d: d.second)\n",
"df['MUsecond'] = df['TS_obj'].apply(lambda d: d.microsecond)\n",
"df['UTC_offset'] = df['TS_obj'].apply(lambda d: d.utcoffset())\n",
"\n",
"df[['Time', 'Hour', 'Minute', 'Second', 'MUsecond', 'UTC_offset']]"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Time | \n",
" Hour | \n",
" TimeOfDayBin | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 2015-03-08 10:30:00.360000+00:00 | \n",
" 10 | \n",
" Morning | \n",
"
\n",
" \n",
" 1 | \n",
" 2017-07-13 15:45:05.755000-07:00 | \n",
" 15 | \n",
" Afternoon | \n",
"
\n",
" \n",
" 2 | \n",
" 2012-01-20 22:30:00.254000+05:30 | \n",
" 22 | \n",
" Night | \n",
"
\n",
" \n",
" 3 | \n",
" 2016-12-25 00:30:00.000000+10:00 | \n",
" 0 | \n",
" Late Night | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Time Hour TimeOfDayBin\n",
"0 2015-03-08 10:30:00.360000+00:00 10 Morning\n",
"1 2017-07-13 15:45:05.755000-07:00 15 Afternoon\n",
"2 2012-01-20 22:30:00.254000+05:30 22 Night\n",
"3 2016-12-25 00:30:00.000000+10:00 0 Late Night"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"hour_bins = [-1, 5, 11, 16, 21, 23]\n",
"bin_names = ['Late Night', 'Morning', 'Afternoon', 'Evening', 'Night']\n",
"df['TimeOfDayBin'] = pd.cut(df['Hour'], \n",
" bins=hour_bins, labels=bin_names)\n",
"df[['Time', 'Hour', 'TimeOfDayBin']]"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Time | \n",
" UTC_offset | \n",
" TZ_info | \n",
" TimeZones | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 2015-03-08 10:30:00.360000+00:00 | \n",
" 00:00:00 | \n",
" UTC | \n",
" [WET, UTC, UCT, GMT] | \n",
"
\n",
" \n",
" 1 | \n",
" 2017-07-13 15:45:05.755000-07:00 | \n",
" -1 days +17:00:00 | \n",
" pytz.FixedOffset(-420) | \n",
" [MST, GMT+7, PDT] | \n",
"
\n",
" \n",
" 2 | \n",
" 2012-01-20 22:30:00.254000+05:30 | \n",
" 05:30:00 | \n",
" pytz.FixedOffset(330) | \n",
" [IST] | \n",
"
\n",
" \n",
" 3 | \n",
" 2016-12-25 00:30:00.000000+10:00 | \n",
" 10:00:00 | \n",
" pytz.FixedOffset(600) | \n",
" [VLAT, ChST, AEST, PGT, DDUT, GMT-10, CHUT] | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Time UTC_offset TZ_info \\\n",
"0 2015-03-08 10:30:00.360000+00:00 00:00:00 UTC \n",
"1 2017-07-13 15:45:05.755000-07:00 -1 days +17:00:00 pytz.FixedOffset(-420) \n",
"2 2012-01-20 22:30:00.254000+05:30 05:30:00 pytz.FixedOffset(330) \n",
"3 2016-12-25 00:30:00.000000+10:00 10:00:00 pytz.FixedOffset(600) \n",
"\n",
" TimeZones \n",
"0 [WET, UTC, UCT, GMT] \n",
"1 [MST, GMT+7, PDT] \n",
"2 [IST] \n",
"3 [VLAT, ChST, AEST, PGT, DDUT, GMT-10, CHUT] "
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['TZ_info'] = df['TS_obj'].apply(lambda d: d.tzinfo)\n",
"df['TimeZones'] = df['TS_obj'].apply(lambda d: list({d.astimezone(tz).tzname() \n",
" for tz in map(pytz.timezone, \n",
" pytz.all_timezones_set)\n",
" if d.astimezone(tz).utcoffset() == d.utcoffset()}))\n",
"\n",
"df[['Time', 'UTC_offset', 'TZ_info', 'TimeZones']]"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Time | \n",
" TimeUTC | \n",
" Epoch | \n",
" GregOrdinal | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 2015-03-08 10:30:00.360000+00:00 | \n",
" 2015-03-08 10:30:00.360000+00:00 | \n",
" 1.425811e+09 | \n",
" 735665 | \n",
"
\n",
" \n",
" 1 | \n",
" 2017-07-13 15:45:05.755000-07:00 | \n",
" 2017-07-13 22:45:05.755000+00:00 | \n",
" 1.499986e+09 | \n",
" 736523 | \n",
"
\n",
" \n",
" 2 | \n",
" 2012-01-20 22:30:00.254000+05:30 | \n",
" 2012-01-20 17:00:00.254000+00:00 | \n",
" 1.327079e+09 | \n",
" 734522 | \n",
"
\n",
" \n",
" 3 | \n",
" 2016-12-25 00:30:00.000000+10:00 | \n",
" 2016-12-24 14:30:00+00:00 | \n",
" 1.482590e+09 | \n",
" 736322 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Time TimeUTC \\\n",
"0 2015-03-08 10:30:00.360000+00:00 2015-03-08 10:30:00.360000+00:00 \n",
"1 2017-07-13 15:45:05.755000-07:00 2017-07-13 22:45:05.755000+00:00 \n",
"2 2012-01-20 22:30:00.254000+05:30 2012-01-20 17:00:00.254000+00:00 \n",
"3 2016-12-25 00:30:00.000000+10:00 2016-12-24 14:30:00+00:00 \n",
"\n",
" Epoch GregOrdinal \n",
"0 1.425811e+09 735665 \n",
"1 1.499986e+09 736523 \n",
"2 1.327079e+09 734522 \n",
"3 1.482590e+09 736322 "
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['TimeUTC'] = df['TS_obj'].apply(lambda d: d.tz_convert(pytz.utc))\n",
"df['Epoch'] = df['TimeUTC'].apply(lambda d: d.timestamp())\n",
"df['GregOrdinal'] = df['TimeUTC'].apply(lambda d: d.toordinal())\n",
"\n",
"df[['Time', 'TimeUTC', 'Epoch', 'GregOrdinal']]"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Time | \n",
" TimeUTC | \n",
" DaysElapsedEpoch | \n",
" DaysElapsedOrdinal | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 2015-03-08 10:30:00.360000+00:00 | \n",
" 2015-03-08 10:30:00.360000+00:00 | \n",
" 860.207396 | \n",
" 860 | \n",
"
\n",
" \n",
" 1 | \n",
" 2017-07-13 15:45:05.755000-07:00 | \n",
" 2017-07-13 22:45:05.755000+00:00 | \n",
" 1.696917 | \n",
" 2 | \n",
"
\n",
" \n",
" 2 | \n",
" 2012-01-20 22:30:00.254000+05:30 | \n",
" 2012-01-20 17:00:00.254000+00:00 | \n",
" 2002.936564 | \n",
" 2003 | \n",
"
\n",
" \n",
" 3 | \n",
" 2016-12-25 00:30:00.000000+10:00 | \n",
" 2016-12-24 14:30:00+00:00 | \n",
" 203.040734 | \n",
" 203 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Time TimeUTC \\\n",
"0 2015-03-08 10:30:00.360000+00:00 2015-03-08 10:30:00.360000+00:00 \n",
"1 2017-07-13 15:45:05.755000-07:00 2017-07-13 22:45:05.755000+00:00 \n",
"2 2012-01-20 22:30:00.254000+05:30 2012-01-20 17:00:00.254000+00:00 \n",
"3 2016-12-25 00:30:00.000000+10:00 2016-12-24 14:30:00+00:00 \n",
"\n",
" DaysElapsedEpoch DaysElapsedOrdinal \n",
"0 860.207396 860 \n",
"1 1.696917 2 \n",
"2 2002.936564 2003 \n",
"3 203.040734 203 "
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"curr_ts = datetime.datetime.now(pytz.utc)\n",
"# compute days elapsed since today\n",
"df['DaysElapsedEpoch'] = (curr_ts.timestamp() - df['Epoch']) / (3600*24)\n",
"df['DaysElapsedOrdinal'] = (curr_ts.toordinal() - df['GregOrdinal']) \n",
"\n",
"df[['Time', 'TimeUTC', 'DaysElapsedEpoch', 'DaysElapsedOrdinal']]"
]
}
],
"metadata": {
"anaconda-cloud": {},
"kernelspec": {
"display_name": "Python [conda root]",
"language": "python",
"name": "conda-root-py"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.2"
}
},
"nbformat": 4,
"nbformat_minor": 1
}