{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Import necessary dependencies and settings" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import datetime\n", "import numpy as np\n", "import pandas as pd\n", "from dateutil.parser import parse\n", "import pytz" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Load and process sample temporal data" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Time
02015-03-08 10:30:00.360000+00:00
12017-07-13 15:45:05.755000-07:00
22012-01-20 22:30:00.254000+05:30
32016-12-25 00:30:00.000000+10:00
\n", "
" ], "text/plain": [ " Time\n", "0 2015-03-08 10:30:00.360000+00:00\n", "1 2017-07-13 15:45:05.755000-07:00\n", "2 2012-01-20 22:30:00.254000+05:30\n", "3 2016-12-25 00:30:00.000000+10:00" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "time_stamps = ['2015-03-08 10:30:00.360000+00:00', '2017-07-13 15:45:05.755000-07:00',\n", " '2012-01-20 22:30:00.254000+05:30', '2016-12-25 00:30:00.000000+10:00']\n", "df = pd.DataFrame(time_stamps, columns=['Time'])\n", "df" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "array([Timestamp('2015-03-08 10:30:00.360000+0000', tz='UTC'),\n", " Timestamp('2017-07-13 15:45:05.755000-0700', tz='pytz.FixedOffset(-420)'),\n", " Timestamp('2012-01-20 22:30:00.254000+0530', tz='pytz.FixedOffset(330)'),\n", " Timestamp('2016-12-25 00:30:00+1000', tz='pytz.FixedOffset(600)')], dtype=object)" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ts_objs = np.array([pd.Timestamp(item) for item in np.array(df.Time)])\n", "df['TS_obj'] = ts_objs\n", "ts_objs" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Date based features" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TimeYearMonthDayQuarterDayOfWeekDayNameDayOfYearWeekOfYear
02015-03-08 10:30:00.360000+00:0020153816Sunday6710
12017-07-13 15:45:05.755000-07:00201771333Thursday19428
22012-01-20 22:30:00.254000+05:30201212014Friday203
32016-12-25 00:30:00.000000+10:002016122546Saturday36051
\n", "
" ], "text/plain": [ " Time Year Month Day Quarter DayOfWeek \\\n", "0 2015-03-08 10:30:00.360000+00:00 2015 3 8 1 6 \n", "1 2017-07-13 15:45:05.755000-07:00 2017 7 13 3 3 \n", "2 2012-01-20 22:30:00.254000+05:30 2012 1 20 1 4 \n", "3 2016-12-25 00:30:00.000000+10:00 2016 12 25 4 6 \n", "\n", " DayName DayOfYear WeekOfYear \n", "0 Sunday 67 10 \n", "1 Thursday 194 28 \n", "2 Friday 20 3 \n", "3 Saturday 360 51 " ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['Year'] = df['TS_obj'].apply(lambda d: d.year)\n", "df['Month'] = df['TS_obj'].apply(lambda d: d.month)\n", "df['Day'] = df['TS_obj'].apply(lambda d: d.day)\n", "df['DayOfWeek'] = df['TS_obj'].apply(lambda d: d.dayofweek)\n", "df['DayName'] = df['TS_obj'].apply(lambda d: d.weekday_name)\n", "df['DayOfYear'] = df['TS_obj'].apply(lambda d: d.dayofyear)\n", "df['WeekOfYear'] = df['TS_obj'].apply(lambda d: d.weekofyear)\n", "df['Quarter'] = df['TS_obj'].apply(lambda d: d.quarter)\n", "\n", "df[['Time', 'Year', 'Month', 'Day', 'Quarter', \n", " 'DayOfWeek', 'DayName', 'DayOfYear', 'WeekOfYear']]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Time based features" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TimeHourMinuteSecondMUsecondUTC_offset
02015-03-08 10:30:00.360000+00:001030036000000:00:00
12017-07-13 15:45:05.755000-07:0015455755000-1 days +17:00:00
22012-01-20 22:30:00.254000+05:302230025400005:30:00
32016-12-25 00:30:00.000000+10:000300010:00:00
\n", "
" ], "text/plain": [ " Time Hour Minute Second MUsecond \\\n", "0 2015-03-08 10:30:00.360000+00:00 10 30 0 360000 \n", "1 2017-07-13 15:45:05.755000-07:00 15 45 5 755000 \n", "2 2012-01-20 22:30:00.254000+05:30 22 30 0 254000 \n", "3 2016-12-25 00:30:00.000000+10:00 0 30 0 0 \n", "\n", " UTC_offset \n", "0 00:00:00 \n", "1 -1 days +17:00:00 \n", "2 05:30:00 \n", "3 10:00:00 " ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['Hour'] = df['TS_obj'].apply(lambda d: d.hour)\n", "df['Minute'] = df['TS_obj'].apply(lambda d: d.minute)\n", "df['Second'] = df['TS_obj'].apply(lambda d: d.second)\n", "df['MUsecond'] = df['TS_obj'].apply(lambda d: d.microsecond)\n", "df['UTC_offset'] = df['TS_obj'].apply(lambda d: d.utcoffset())\n", "\n", "df[['Time', 'Hour', 'Minute', 'Second', 'MUsecond', 'UTC_offset']]" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TimeHourTimeOfDayBin
02015-03-08 10:30:00.360000+00:0010Morning
12017-07-13 15:45:05.755000-07:0015Afternoon
22012-01-20 22:30:00.254000+05:3022Night
32016-12-25 00:30:00.000000+10:000Late Night
\n", "
" ], "text/plain": [ " Time Hour TimeOfDayBin\n", "0 2015-03-08 10:30:00.360000+00:00 10 Morning\n", "1 2017-07-13 15:45:05.755000-07:00 15 Afternoon\n", "2 2012-01-20 22:30:00.254000+05:30 22 Night\n", "3 2016-12-25 00:30:00.000000+10:00 0 Late Night" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "hour_bins = [-1, 5, 11, 16, 21, 23]\n", "bin_names = ['Late Night', 'Morning', 'Afternoon', 'Evening', 'Night']\n", "df['TimeOfDayBin'] = pd.cut(df['Hour'], \n", " bins=hour_bins, labels=bin_names)\n", "df[['Time', 'Hour', 'TimeOfDayBin']]" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TimeUTC_offsetTZ_infoTimeZones
02015-03-08 10:30:00.360000+00:0000:00:00UTC[WET, UTC, UCT, GMT]
12017-07-13 15:45:05.755000-07:00-1 days +17:00:00pytz.FixedOffset(-420)[MST, GMT+7, PDT]
22012-01-20 22:30:00.254000+05:3005:30:00pytz.FixedOffset(330)[IST]
32016-12-25 00:30:00.000000+10:0010:00:00pytz.FixedOffset(600)[VLAT, ChST, AEST, PGT, DDUT, GMT-10, CHUT]
\n", "
" ], "text/plain": [ " Time UTC_offset TZ_info \\\n", "0 2015-03-08 10:30:00.360000+00:00 00:00:00 UTC \n", "1 2017-07-13 15:45:05.755000-07:00 -1 days +17:00:00 pytz.FixedOffset(-420) \n", "2 2012-01-20 22:30:00.254000+05:30 05:30:00 pytz.FixedOffset(330) \n", "3 2016-12-25 00:30:00.000000+10:00 10:00:00 pytz.FixedOffset(600) \n", "\n", " TimeZones \n", "0 [WET, UTC, UCT, GMT] \n", "1 [MST, GMT+7, PDT] \n", "2 [IST] \n", "3 [VLAT, ChST, AEST, PGT, DDUT, GMT-10, CHUT] " ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['TZ_info'] = df['TS_obj'].apply(lambda d: d.tzinfo)\n", "df['TimeZones'] = df['TS_obj'].apply(lambda d: list({d.astimezone(tz).tzname() \n", " for tz in map(pytz.timezone, \n", " pytz.all_timezones_set)\n", " if d.astimezone(tz).utcoffset() == d.utcoffset()}))\n", "\n", "df[['Time', 'UTC_offset', 'TZ_info', 'TimeZones']]" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TimeTimeUTCEpochGregOrdinal
02015-03-08 10:30:00.360000+00:002015-03-08 10:30:00.360000+00:001.425811e+09735665
12017-07-13 15:45:05.755000-07:002017-07-13 22:45:05.755000+00:001.499986e+09736523
22012-01-20 22:30:00.254000+05:302012-01-20 17:00:00.254000+00:001.327079e+09734522
32016-12-25 00:30:00.000000+10:002016-12-24 14:30:00+00:001.482590e+09736322
\n", "
" ], "text/plain": [ " Time TimeUTC \\\n", "0 2015-03-08 10:30:00.360000+00:00 2015-03-08 10:30:00.360000+00:00 \n", "1 2017-07-13 15:45:05.755000-07:00 2017-07-13 22:45:05.755000+00:00 \n", "2 2012-01-20 22:30:00.254000+05:30 2012-01-20 17:00:00.254000+00:00 \n", "3 2016-12-25 00:30:00.000000+10:00 2016-12-24 14:30:00+00:00 \n", "\n", " Epoch GregOrdinal \n", "0 1.425811e+09 735665 \n", "1 1.499986e+09 736523 \n", "2 1.327079e+09 734522 \n", "3 1.482590e+09 736322 " ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['TimeUTC'] = df['TS_obj'].apply(lambda d: d.tz_convert(pytz.utc))\n", "df['Epoch'] = df['TimeUTC'].apply(lambda d: d.timestamp())\n", "df['GregOrdinal'] = df['TimeUTC'].apply(lambda d: d.toordinal())\n", "\n", "df[['Time', 'TimeUTC', 'Epoch', 'GregOrdinal']]" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TimeTimeUTCDaysElapsedEpochDaysElapsedOrdinal
02015-03-08 10:30:00.360000+00:002015-03-08 10:30:00.360000+00:00860.207396860
12017-07-13 15:45:05.755000-07:002017-07-13 22:45:05.755000+00:001.6969172
22012-01-20 22:30:00.254000+05:302012-01-20 17:00:00.254000+00:002002.9365642003
32016-12-25 00:30:00.000000+10:002016-12-24 14:30:00+00:00203.040734203
\n", "
" ], "text/plain": [ " Time TimeUTC \\\n", "0 2015-03-08 10:30:00.360000+00:00 2015-03-08 10:30:00.360000+00:00 \n", "1 2017-07-13 15:45:05.755000-07:00 2017-07-13 22:45:05.755000+00:00 \n", "2 2012-01-20 22:30:00.254000+05:30 2012-01-20 17:00:00.254000+00:00 \n", "3 2016-12-25 00:30:00.000000+10:00 2016-12-24 14:30:00+00:00 \n", "\n", " DaysElapsedEpoch DaysElapsedOrdinal \n", "0 860.207396 860 \n", "1 1.696917 2 \n", "2 2002.936564 2003 \n", "3 203.040734 203 " ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "curr_ts = datetime.datetime.now(pytz.utc)\n", "# compute days elapsed since today\n", "df['DaysElapsedEpoch'] = (curr_ts.timestamp() - df['Epoch']) / (3600*24)\n", "df['DaysElapsedOrdinal'] = (curr_ts.toordinal() - df['GregOrdinal']) \n", "\n", "df[['Time', 'TimeUTC', 'DaysElapsedEpoch', 'DaysElapsedOrdinal']]" ] } ], "metadata": { "anaconda-cloud": {}, "kernelspec": { "display_name": "Python [conda root]", "language": "python", "name": "conda-root-py" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.2" } }, "nbformat": 4, "nbformat_minor": 1 }