{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "ExecuteTime": { "end_time": "2017-03-08T21:36:41.287006", "start_time": "2017-03-08T21:36:39.767559" }, "collapsed": true }, "outputs": [], "source": [ "%matplotlib inline\n", "import numpy as np\n", "import pandas as pd\n", "import matplotlib.pylab as plt\n", "import seaborn as sns\n", "from mlxtend.association import apriori\n", "from mlxtend.preprocessing import OnehotTransactions\n", "\n", "# Pandas Printing Options\n", "pd.set_option('display.float_format', lambda x: '%.7f' % x)\n", "pd.set_option('display.max_rows', 500)\n", "pd.set_option('display.max_columns', 500)\n", "pd.set_option('display.width', 1000)\n", "\n", "sns.set_style('white')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# LOAD" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "ExecuteTime": { "end_time": "2017-03-08T21:36:41.534836", "start_time": "2017-03-08T21:36:41.288567" }, "collapsed": false }, "outputs": [], "source": [ "injuries = pd.read_csv(\"../data/injuries_2013_present.csv\", index_col=['DATE'], parse_dates=['DATE'])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# FILTER CONTRIBUTING FACTORS" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "ExecuteTime": { "end_time": "2017-03-08T21:36:41.545166", "start_time": "2017-03-08T21:36:41.536348" }, "collapsed": false }, "outputs": [], "source": [ "contributing_factors = injuries[['CONTRIBUTING FACTOR VEHICLE 1',\n", " 'CONTRIBUTING FACTOR VEHICLE 2',\n", " 'CONTRIBUTING FACTOR VEHICLE 3',\n", " 'CONTRIBUTING FACTOR VEHICLE 4',\n", " 'CONTRIBUTING FACTOR VEHICLE 5']]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# HELPER FUNCTIONS" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "ExecuteTime": { "end_time": "2017-03-08T21:36:41.555919", "start_time": "2017-03-08T21:36:41.546682" }, "collapsed": true }, "outputs": [], "source": [ "def filter_set(row):\n", " filtered = [element for element in row if str(element) != 'nan']\n", " filtered = [element for element in filtered if str(element) != 'Unspecified']\n", " return filtered\n", "\n", "def my_generator(contributing_factors):\n", " for i in contributing_factors.as_matrix():\n", " if filter_set(i):\n", " yield filter_set(i)\n", " \n", "def get_factors(dataset):\n", " oht = OnehotTransactions()\n", " oht_ary = oht.fit(dataset).transform(dataset)\n", " return pd.DataFrame(oht_ary, columns=oht.columns_)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# GET FACTORS" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "ExecuteTime": { "end_time": "2017-03-08T21:36:42.091238", "start_time": "2017-03-08T21:36:41.557361" }, "collapsed": true }, "outputs": [], "source": [ "factors_obj = my_generator(contributing_factors)\n", "contributing_factors_to_injuries = [fact for fact in factors_obj]\n", "df = get_factors(contributing_factors_to_injuries)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# RUN APRIORI" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "ExecuteTime": { "end_time": "2017-03-08T21:36:42.097442", "start_time": "2017-03-08T21:36:42.092787" }, "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "(19641, 47)" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.shape" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "ExecuteTime": { "end_time": "2017-03-08T21:36:42.426404", "start_time": "2017-03-08T21:36:42.099277" }, "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", " | support | \n", "itemsets | \n", "length | \n", "count | \n", "
---|---|---|---|---|
0 | \n", "0.0004073 | \n", "[Accelerator Defective] | \n", "1 | \n", "8.0000000 | \n", "
1 | \n", "0.0134922 | \n", "[Aggressive Driving/Road Rage] | \n", "1 | \n", "265.0000000 | \n", "
2 | \n", "0.0125757 | \n", "[Alcohol Involvement] | \n", "1 | \n", "247.0000000 | \n", "
3 | \n", "0.0707703 | \n", "[Backing Unsafely] | \n", "1 | \n", "1390.0000000 | \n", "
4 | \n", "0.0019347 | \n", "[Brakes Defective] | \n", "1 | \n", "38.0000000 | \n", "