{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Load Necessary Dependencies" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import csv\n", "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "import Orange\n", "from Orange.data import Domain, DiscreteVariable, ContinuousVariable\n", "from orangecontrib.associate.fpgrowth import *\n", "\n", "%matplotlib inline" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Construct and Load the Groceries Dataset" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": true }, "outputs": [], "source": [ "grocery_items = set()\n", "with open(\"grocery_dataset.txt\") as f:\n", " reader = csv.reader(f, delimiter=\",\")\n", " for i, line in enumerate(reader):\n", " grocery_items.update(line)\n", "output_list = list()\n", "with open(\"grocery_dataset.txt\") as f:\n", " reader = csv.reader(f, delimiter=\",\")\n", " for i, line in enumerate(reader):\n", " row_val = {item:0 for item in grocery_items}\n", " row_val.update({item:1 for item in line})\n", " output_list.append(row_val)\n", "grocery_df = pd.DataFrame(output_list)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Instant food productsUHT-milkabrasive cleanerartif. sweetenerbaby cosmeticsbaby foodbagsbaking powderbathroom cleanerbeef...turkeyvinegarwaffleswhipped/sour creamwhiskywhite breadwhite winewhole milkyogurtzwieback
00000000000...0000000000
10000000000...0000000010
20000000000...0000000100
30000000000...0000000010
40000000000...0000000100
\n", "

5 rows × 169 columns

\n", "
" ], "text/plain": [ " Instant food products UHT-milk abrasive cleaner artif. sweetener \\\n", "0 0 0 0 0 \n", "1 0 0 0 0 \n", "2 0 0 0 0 \n", "3 0 0 0 0 \n", "4 0 0 0 0 \n", "\n", " baby cosmetics baby food bags baking powder bathroom cleaner beef \\\n", "0 0 0 0 0 0 0 \n", "1 0 0 0 0 0 0 \n", "2 0 0 0 0 0 0 \n", "3 0 0 0 0 0 0 \n", "4 0 0 0 0 0 0 \n", "\n", " ... turkey vinegar waffles whipped/sour cream whisky \\\n", "0 ... 0 0 0 0 0 \n", "1 ... 0 0 0 0 0 \n", "2 ... 0 0 0 0 0 \n", "3 ... 0 0 0 0 0 \n", "4 ... 0 0 0 0 0 \n", "\n", " white bread white wine whole milk yogurt zwieback \n", "0 0 0 0 0 0 \n", "1 0 0 0 1 0 \n", "2 0 0 1 0 0 \n", "3 0 0 0 1 0 \n", "4 0 0 1 0 0 \n", "\n", "[5 rows x 169 columns]" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "grocery_df.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# View top sold items" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "43367\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
item_nameitem_count
0whole milk2513
1other vegetables1903
2rolls/buns1809
3soda1715
4yogurt1372
\n", "
" ], "text/plain": [ " item_name item_count\n", "0 whole milk 2513\n", "1 other vegetables 1903\n", "2 rolls/buns 1809\n", "3 soda 1715\n", "4 yogurt 1372" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "total_item_count = sum(grocery_df.sum())\n", "print(total_item_count)\n", "item_summary_df = grocery_df.sum().sort_values(ascending = False).reset_index().head(n=20)\n", "item_summary_df.rename(columns={item_summary_df.columns[0]:'item_name',item_summary_df.columns[1]:'item_count'}, inplace=True)\n", "item_summary_df.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Visualize top sold items" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAY8AAAFkCAYAAADYGbemAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzsnXeYZFW1t98fCBKHIEjOSREFJUsQVCQYMCKggqIgyicY\nrgp4FdCLiFfhEhQBJQmIKCKIoAKS8wxpGIIgQUGSARhRib/vj72LPl1TXbVPhQ7T632eerpqV62z\nd1VXnXX2irJNEARBENRhjrFeQBAEQTDxCOURBEEQ1CaURxAEQVCbUB5BEARBbUJ5BEEQBLUJ5REE\nQRDUJpRHELRB0qWSPjHgOQ6UdGq+v7ykf0qas0/H/oGkr+b7W0h6sB/HzcfbTNJd/TpeMLEI5RGM\nGpLul/TWfP+jkq4c6zWNN2z/yfYCtl9o97rSz8/2nra/0Y+1SbKkVSvHvsL2Gv04djDxCOURBLMp\n/dq9BEErQnkEo46kVwM/ADbOJpon8vjLJX1H0p8kPZpNLvPm57aQ9KCkL0l6TNLDkt4taTtJf5D0\nd0n7t5lzO0m3S5op6SFJ/5XHF5F0nqTHJf0j31+2zXF2k3RHfu1vJa2QxyXp8Ly2pyRNl7TWCMdY\nSdJleS0XAotVnlsxX+G/LD/+qKR782vvk/ShNp/fSZKOkXS+pKeBLfPY/zTNv7+kv+ad4Icq48NM\ndNXdjaTL8/Atec4PNpvBJL06H+MJSTMkvavy3EmSvifp1/m9XCdplZE+52D8E8ojGHVs3wHsCVyT\nTTQL56e+BawOrAOsCiwDfK0iuiQwT2X8eODDwLrAZsBXJa00wrQ/Aj5pe0FgLeD3eXwO4ERgBWB5\n4N/A0a0OIGl7YH/gvcDiwBXAT/LTbwM2z+tfCNgB+NsIazkdmEZSGt8Adh1hvvmBI4Ft87rfCNzc\n5vMD2Bk4GFgQaGXWWjLPu0ye9zhJHU1PtjfPd9fOc/60aa1zAb8Cfge8EvgMcFrTsXcEDgIWAe7J\n6wwmKKE8gnGBJAF7AJ+z/XfbM4Fvkk44DZ4DDrb9HHAG6SR4hO2ZtmcAtwNrjzDFc8CakqbY/oft\nGwFs/832Wbb/lec8GHjTCMfYEzjE9h22n8/rWyfvPp4jnbBfBSi/5uEW73N5YH3gq7afsX056aQ7\nEi8Ca0ma1/bD+X224xzbV9l+0fZ/RnhNY+7LgF+TFF2vbAQsAHzL9rO2fw+cB+xUec3Ztq/Pn91p\npIuEYIISyiMYLywOzAdMy2aPJ4Df5PEGf6s4kv+d/z5aef7fpBNYK94HbAc8kE1GGwNImk/SsZIe\nkPQUcDmw8Aj+ghWAIyrr+zsgYJl8sjwa+B7wmKTjJE1pcYylgX/Yfroy9kCrBefXfJCktB7OJp9X\njfD+Gvy5w/Ot5l66g0wJSwN/tv1i07GXqTx+pHL/X4z8vwomAKE8grGiuZzzX0kn/9fYXjjfFrLd\nlxOM7Rtsb08yqfwSODM/9QVgDWBD21NIpidISqGZP5NMXwtXbvPavjrPcaTtdYE1SearL7Y4xsPA\nItkk1WD5Nuv+re2tgKWAO0mmOpj186PDeINWc/8l33+apMAbLNnhWFX+AiwnqXpOWR54qMYxgglE\nKI9grHgUWFbS3AD5ivV44HBJrwSQtIykrXudSNLc2dG8UDZ5PUUyB0EyNf0beELSosABbQ71A2A/\nSa/Jx11I0gfy/fUlbZht/08D/6nM8RK2HwCmAgfldW0KvHOEdS8haft8sn8G+GflmMM+v5o05t4M\neAfwszx+M/DevBtbFfh4k9yjwMojHPM60m7iS5LmkrRFfl9ndLG+YAIQyiMYK34PzAAekfTXPPZl\nkiP12mxCuoi0K+gHHwHuz8fdE2hEGf0fMC9p53MtyVTWEttnA4cCZ+Tj3AZsm5+eQlJ+/yCZa/4G\n/O8Ih9oZ2JBk9joAOGWE180BfJ50Vf93ki/mU/m5Vp9fCY/kNf6F5HfY0/ad+bnDgWdJSuLk/HyV\nA4GTs9lumJ/E9rMkZbEt6bP8PrBL5djBbIaiGVQQBEFQl9h5BEEQBLUJ5REEQRDUJpRHEARBUJtQ\nHkEQBEFtXjbWCxgUiy22mFdcccWxXkYQBMGEYtq0aX+1vXin1822ymPFFVdk6tSpY72MIAiCCYWk\nlhUPmgmzVRAEQVCbUB5BEARBbUJ5BEEQBLUJ5REEQRDUJpRHEARBUJtQHkEQBEFtQnkEQRAEtRmY\n8pC0nKRLJN0uaYakffL4gZIeknRzvm1XkdlP0j2S7qr2cZC0rqTp+bkjc8vSIAiCYIwYZJLg88AX\nbN8oaUFSe9EL83OH2/5O9cWS1iT1q34NqaXlRZJWz21HjwF2JzWcOR/YBrhggGsPgiAI2jAw5WH7\nYVLLTWzPlHQHw/sZN7M9cIbtZ4D7JN0DbCDpfmCK7WsBJJ0CvJsBKo/DL/xDrdd/bqvVB7SSIAiC\n8cmo+DwkrQi8nrRzAPiMpFslnSBpkTy2DKlHdIMH89gy+X7zeKt59pA0VdLUxx9/vI/vIAiCIKgy\ncOUhaQHgLOCztp8imaBWBtYh7Uy+26+5bB9nez3b6y2+eMe6XkEQBEGXDFR5SJqLpDhOs/0LANuP\n2n7B9oukns8b5Jc/BCxXEV82jz2U7zePB0EQBGPEIKOtBPwIuMP2YZXxpSovew9wW75/LrCjpJdL\nWglYDbg++06ekrRRPuYuwDmDWncQBEHQmUFGW20CfASYLunmPLY/sJOkdQAD9wOfBLA9Q9KZwO2k\nSK29cqQVwKeBk4B5SY7yiLQKgiAYQwYZbXUl0Cof4/w2MgcDB7cYnwqs1b/VBUEQBL0QGeZBEARB\nbUJ5BEEQBLUJ5REEQRDUJpRHEARBUJtQHkEQBEFtQnkEQRAEtQnlEQRBENQmlEcQBEFQm1AeQRAE\nQW1CeQRBEAS1CeURBEEQ1CaURxAEQVCbUB5BEARBbUJ5BEEQBLUJ5REEQRDUJpRHEARBUJtQHkEQ\nBEFtQnkEQRAEtQnlEQRBENQmlEcQBEFQm1AeQRAEQW1CeQRBEAS1CeURBEEQ1CaURxAEQVCbUB5B\nEARBbUJ5BEEQBLUJ5REEQRDUJpRHEARBUJtQHkEQBEFtQnkEQRAEtQnlEQRBENQmlEcQBEFQm4Ep\nD0nLSbpE0u2SZkjaJ48vKulCSXfnv4tUZPaTdI+kuyRtXRlfV9L0/NyRkjSodQdBEASdGeTO43ng\nC7bXBDYC9pK0JrAvcLHt1YCL82PyczsCrwG2Ab4vac58rGOA3YHV8m2bAa47CIIg6MDAlIfth23f\nmO/PBO4AlgG2B07OLzsZeHe+vz1whu1nbN8H3ANsIGkpYIrta20bOKUiEwRBEIwBo+LzkLQi8Hrg\nOmAJ2w/npx4Blsj3lwH+XBF7MI8tk+83j7eaZw9JUyVNffzxx/u2/iAIgmA4A1cekhYAzgI+a/up\n6nN5J+F+zWX7ONvr2V5v8cUX79dhgyAIgiYGqjwkzUVSHKfZ/kUefjSbosh/H8vjDwHLVcSXzWMP\n5fvN40EQBMEYMchoKwE/Au6wfVjlqXOBXfP9XYFzKuM7Snq5pJVIjvHrs4nrKUkb5WPuUpEJgiAI\nxoCXDfDYmwAfAaZLujmP7Q98CzhT0seBB4AdAGzPkHQmcDspUmsv2y9kuU8DJwHzAhfkWxAEQTBG\nDEx52L4SGCkf4y0jyBwMHNxifCqwVv9WFwRBEPRCZJgHQRAEtQnlEQRBENRmkD6PScnhF/6htszn\ntlp9ACsJgiAYHLHzCIIgCGoTyiMIgiCoTSiPIAiCoDahPIIgCILahPIIgiAIahPKIwiCIKhNKI8g\nCIKgNpHnMc6omycSOSJBEIwFsfMIgiAIahPKIwiCIKhNKI8gCIKgNqE8giAIgtqE8giCIAhqE8oj\nCIIgqE2E6s5GRJhvEASjRew8giAIgtqE8giCIAhqE8ojCIIgqE0ojyAIgqA2oTyCIAiC2oTyCIIg\nCGoTyiMIgiCoTUflIWmfkrEgCIJg8lCy89i1xdhH+7yOIAiCYAIxYoa5pJ2AnYGVJJ1beWpB4O+D\nXlgQBEEwfmlXnuRq4GFgMeC7lfGZwK2DXFQQBEEwvhlRedh+AHgA2Hj0lhMEQRBMBEoc5u+VdLek\nJyU9JWmmpKdGY3FBEATB+KSkqu63gXfavmPQiwmCIAgmBiXRVo92ozgknSDpMUm3VcYOlPSQpJvz\nbbvKc/tJukfSXZK2royvK2l6fu5ISaq7liAIgqC/lOw8pkr6KfBL4JnGoO1fdJA7CTgaOKVp/HDb\n36kOSFoT2BF4DbA0cJGk1W2/ABwD7A5cB5wPbANcULDuIAiCYECUKI8pwL+At1XGDLRVHrYvl7Ri\n4Tq2B86w/Qxwn6R7gA0k3Q9MsX0tgKRTgHcTyiMIgmBM6ag8bH+sz3N+RtIuwFTgC7b/ASwDXFt5\nzYN57Ll8v3m8JZL2APYAWH755fu87CAIgqBBR+Uh6UTSTmMYtnfrYr5jgG/k432DlD/SzXFaYvs4\n4DiA9dZbb5Y1B0EQBP2hxGx1XuX+PMB7gL90M5ntRxv3JR1fOfZDwHKVly6bxx7K95vHgyAIgjGk\nxGx1VvWxpJ8AV3YzmaSlbD+cH74HaERinQucLukwksN8NeB62y/k3JKNSA7zXYCjupk7CIIg6B8l\nO49mVgNe2elFWclsASwm6UHgAGALSeuQzFb3A58EsD1D0pnA7cDzwF450grg06TIrXlJjvJwlgdB\nEIwxJT6PmaSTvfLfR4Avd5KzvVOL4R+1ef3BwMEtxqcCa3WaLwiCIBg9SsxWC47GQoIgCIKJQ5HZ\nStK7gM3zw0ttn9fu9UEQBMHsTUlhxG8B+5D8EbcD+0j65qAXFgRBEIxfSnYe2wHr2H4RQNLJwE3A\n/oNcWBAEQTB+KSmMCLBw5f5Cg1hIEARBMHEo2XkcAtwk6RJSxNXmwL4DXVUw6hx+4R9qy3xuq9UH\nsJIgCCYCJdFWP5F0KbB+Hvqy7UcGuqogCIJgXFPiMH8P8C/b59o+F/iPpHcPfmlBEATBeKXE53GA\n7ScbD2w/QcoWD4IgCCYpJcqj1Wu6KWsSBEEQzCaUKI+pkg6TtEq+HQZMG/TCgiAIgvFLifL4DPAs\n8FPgDOA/wF6DXFQQBEEwvimJtnqaCM0NgiAIKpQmCQZBEATBS4TyCIIgCGoTyiMIgiCoTUkzqJVI\nTvMVq6+3/a7BLSsIgiAYz5Tka/yS1AHwV8CLg11OEARBMBEoUR7/sX3kwFcSBEEQTBhKlMcRkg4A\nfgc80xi0fePAVhUEQRCMa0qUx2uBjwBvZshs5fw4CIIgmISUKI8PACvbfnbQiwmCIAgmBiWhurcx\nvJNgEARBMMkp2XksDNwp6QaG+zwiVDcIgmCSUqI8ondHEARBMIySwoiXSVoBWM32RZLmA+Yc/NKC\nIAiC8UpJG9rdgZ8Dx+ahZUiJg0EQBMEkpcRhvhewCfAUgO27gVcOclFBEATB+KbE5/GM7WclASDp\nZaQ8jyB4icMv/EOt139uq9UHtJIgCEaDEuVxmaT9gXklbQV8mlTnKgj6QiieIJh4lJit9gUeB6YD\nnwTOt/2Vga4qCIIgGNeU7Dw+Y/sI4PjGgKR98lgQBEEwCSnZeezaYuyjnYQknSDpMUm3VcYWlXSh\npLvz30Uqz+0n6R5Jd0naujK+rqTp+bkj1XC+BEEQBGPGiDsPSTsBOwMrSTq38tSCwN8Ljn0ScDRw\nSmVsX+Bi29+StG9+/GVJawI7Aq8BlgYukrS67ReAY4DdgeuA84FtgAvK3l4wGQifSRCMPu3MVlcD\nDwOLAd+tjM8Ebu10YNuXS1qxaXh7YIt8/2TgUuDLefwM288A90m6B9hA0v3AFNvXAkg6BXg3oTyC\nIAjGlBGVh+0HgAeAjfs43xK2H873HwGWyPeXAa6tvO7BPPZcvt883hJJewB7ACy//PJ9WnIQBEHQ\nTDuz1Uxa53MIsO0pvUxs25L6mi9i+zjgOID11lsvclGCIAgGRLudx4IDmO9RSUvZfljSUsBjefwh\nYLnK65bNYw/l+83jQRAEwRhSEqrbT84lRW99K/89pzJ+uqTDSA7z1YDrbb8g6SlJG5Ec5rsAR43y\nmoPZmHC2B0F3DEx5SPoJyTm+mKQHSaXdvwWcKenjJH/KDgC2Z0g6E7gdeB7YK0daQcpoPwmYl+Qo\nD2d5EATBGDMw5WF7pxGeessIrz8YOLjF+FRgrT4uLQiCIOiRkiTBIAiCIBjGaPs8gmC2oa6/BMJn\nEsw+xM4jCIIgqE0ojyAIgqA2oTyCIAiC2oTPIwjGiMgxCSYysfMIgiAIahPKIwiCIKhNKI8gCIKg\nNqE8giAIgtqEwzwIJiDhbA/GmlAeQTDJiMz4oB+E8giCoBax6wkgfB5BEARBF8TOIwiCUSN2LbMP\noTyCIJgQhK9mfBFmqyAIgqA2oTyCIAiC2oTZKgiCSUH4W/pLKI8gCIIOhL9lVsJsFQRBENQmlEcQ\nBEFQm1AeQRAEQW1CeQRBEAS1CYd5EATBgJkdI71CeQRBEIxjxqviCbNVEARBUJtQHkEQBEFtQnkE\nQRAEtQnlEQRBENQmlEcQBEFQm1AeQRAEQW3GRHlIul/SdEk3S5qaxxaVdKGku/PfRSqv30/SPZLu\nkrT1WKw5CIIgGGIsdx5b2l7H9nr58b7AxbZXAy7Oj5G0JrAj8BpgG+D7kuYciwUHQRAEifFkttoe\nODnfPxl4d2X8DNvP2L4PuAfYYAzWFwRBEGTGSnkYuEjSNEl75LElbD+c7z8CLJHvLwP8uSL7YB6b\nBUl7SJoqaerjjz8+iHUHQRAEjF15kk1tPyTplcCFku6sPmnbklz3oLaPA44DWG+99WrLB0EQBGWM\nyc7D9kP572PA2SQz1KOSlgLIfx/LL38IWK4ivmweC4IgCMaIUVcekuaXtGDjPvA24DbgXGDX/LJd\ngXPy/XOBHSW9XNJKwGrA9aO76iAIgqDKWJitlgDOltSY/3Tbv5F0A3CmpI8DDwA7ANieIelM4Hbg\neWAv2y+MwbqDIAiCzKgrD9v3Amu3GP8b8JYRZA4GDh7w0oIgCIJCxlOobhAEQTBBCOURBEEQ1CaU\nRxAEQVCbUB5BEARBbUJ5BEEQBLUJ5REEQRDUJpRHEARBUJtQHkEQBEFtQnkEQRAEtQnlEQRBENQm\nlEcQBEFQm1AeQRAEQW1CeQRBEAS1CeURBEEQ1CaURxAEQVCbUB5BEARBbUJ5BEEQBLUJ5REEQRDU\nJpRHEARBUJtQHkEQBEFtQnkEQRAEtQnlEQRBENQmlEcQBEFQm1AeQRAEQW1CeQRBEAS1CeURBEEQ\n1CaURxAEQVCbUB5BEARBbUJ5BEEQBLUJ5REEQRDUJpRHEARBUJtQHkEQBEFtQnkEQRAEtZkwykPS\nNpLuknSPpH3Hej1BEASTmQmhPCTNCXwP2BZYE9hJ0ppju6ogCILJy4RQHsAGwD2277X9LHAGsP0Y\nrykIgmDSIttjvYaOSHo/sI3tT+THHwE2tP3/ml63B7BHfrgGcFefl7IY8NcxkJ2sc8e6J8/cse7R\nn3skVrC9eKcXvWwAE48Zto8DjhvU8SVNtb3eaMtO1rlj3ZNn7lj36M/dKxPFbPUQsFzl8bJ5LAiC\nIBgDJoryuAFYTdJKkuYGdgTOHeM1BUEQTFomhNnK9vOS/h/wW2BO4ATbM8ZgKb2YxHo1p03GuWPd\nk2fuWPfoz90TE8JhHgRBEIwvJorZKgiCIBhHhPIIgiAIahPKow2S1m0x9o4ujjOHpCn9WVUwEpI+\nUDIWJJRYrvMr+z7vnJK+0+MxVioZCwZHKI/2HC9prcYDSTsBXy0RlHS6pCmS5gduA26X9MVuFyJp\nEUmv61a+5lxdnYTzSeGSHubdJH9eSPqwpMMkrVDjEPsVjrWae35Jc+T7q0t6l6S5aszdOM58dWWy\n3Cu6lPtxyVgrnBye53czby/YfgHYtMfDnNVi7OelwpJWkPTWfH9eSQvWkH1d/n68t3ErkFm03a1w\n3r58R/vFhIi2GkPeD/xc0s7AZsAuwNsKZde0/ZSkDwEXAPsC04D/LZ1c0qXAu0j/p2nAY5Kusv35\nAtnVgENItcDmaYzbXrlg6v2AnxWMDcP2C5JelLSQ7ScL5mnmGGBtSWsDXwB+CJwCvKmdkKRtge2A\nZSQdWXlqCvB84dyXA5tJWgT4HSk8/IPAh0qEJb0xr3cBYPn8Hj5p+9OF818r6WbgROACl0eyvKZp\nHXMCs+yY23CjpPVt31BDpjrfKsCDtp+RtAXwOuAU2090EL1J0rmk79TTjUHbv+gw36tI73mhppP2\nFCrf8w7H2J1UiWJRYBVS3tgPgLcUyJ5Aeo8zgBcbywbarpv0+zUgYHngH/n+wsCfgJJdU0/f0X4T\nyqMNtu+VtCPwS9I/+G22/10oPle+Kng3cLTt5yTVDW1bKCugT5B+kAdIurVQ9kTgAOBwYEvgY3TY\nafbpJPxPYLqkCxl+Uti7QPZ525a0Pekz+5GkjxfI/QWYSlK00yrjM4HPFa5btv+V5/u+7W/nk3kp\nhwNbk/OPbN8iafMa8qsDbwV2A46UdCZwku0/tFystB+wPzCvpKcaw8Cz1Avh3BD4kKQHSP8vpeW7\ndJd7FrCepFXzvOcAp5O+R+2YB/gb8ObKWMlJeA3gHaST7jsr4zOB3QvXvBepXt51ALbvlvTKQtmN\nbNcuymp7JQBJxwNn2z4/P96WdI4oodfvaF8J5dECSdNJX+QGi5LyS66TROEP61jgfuAW4PJsfnmq\nrcSsvEzSUsAOwFdqys5r+2JJsv0AcKCkacDX2sj04yT8CzqfAEZiZj4pfoR0hTUH0HFbbvsW4BZJ\np9kuVXLNSNLGpKu4hsKas84BbP9ZUnXohRqyBi4ELpS0JXAq8GlJtwD72r6m6fWHAIdIOsR2kWlu\nBLbuQRbgxZyH9R7gKNtHSbqpk5Dtj3Uzme1zgHMkbdz8mdTgGdvPNv5Xkl7G8N97O66RtKbt27uc\neyPbLyk52xdI+nahbM/f0X4SyqM1tZ3izdg+EqhevT+QTwp1+DopMfJK2zdIWhm4u1D2mXzyvVsp\nwfIhkkml3ZpvkXQbsLXtk2uutXGMkyXNCyxvu25hyg8COwO72X5E0vIUmPkknWl7B5IpZJaTQKGy\n/yzJNHe27Rn5s67jv/lzNl057zj3Ae4oFc4+jw+TFOejwGdIu5h1SKadlZpe/yrbdwI/k/SG5uPZ\nvrFkXtsPSNoUWM32iZIWp8P3pInnlHyBuzK0E+io8CWtTjJTLmF7LSV/3rts/08HuS/Z/jawc563\n+f2U7HAvk9TYtW0FfBr4VYEcJDPqNZIeAZ6h/k7tL5L+m3RxAEkR/KVQttfvaF+JJMEWdHJg2f57\nwTGWAL4JLG17W6X+Ixvb/lGfltlp/vVJJ6+FgW8ACwHftn1tgewVwFty+fu6874T+A4wt+2VJK0D\nfN32uwrlVyCdyC5Scj7PaXtmB5mlbD+sEZzreec1UCQtBhxBMj2JZJPex/bfCuX/APwYONH2g03P\nfdn2oU1jx9neQ60DFGz7zS3GW817ALAesIbt1SUtDfzM9iaF8msCewLX2P6JUsTTDs3rbSF3GfBF\n4Fjbr89jt9leq4PcO23/StKurZ4vuejJF1UfJ/kvRbpA+2GJn0nSPcDngekM+TyKv2P53HIA0DBp\nXg4cVHJOGW+E8miBpPsYcm414xKns6QLSH6Hr9heO2+Nb7L92hrrWJxkx12Ryi7R9m6lx+gGSacA\nryZd+Vb9FocVyE4j2bEvrXNSyK97yZFpexUlp/8PbHd0ZPaKpF8xq+niSZIZ71jb/xng3HOSFPsX\nBjVHm7lvBl4P3Fj5f91aciWd132K7doOW0k32F5f0k2VeW+2vU7dY3Ux9/zAf3LUV+N9vNz2vwpk\nr7G98aDXOMLcY/YdbUWYrVrQcG71yGK2z8w2/EZ9rmIbeOYc4ArgIgrt5yN8wV6icAfwx3ybAygO\nYcw8Z/vJJtv/iyO9uIleHJlImsnQe5+bZD552nZJjs29wOLAT/LjD5J8PasDx5PMSe3mPrLF8JPA\n1GynHxGnKLU3Fqyx1by7jHDMUwoP8WwOUnA+3vylc+d1ryBp7i52qX9VitRqzPt+4OFS4bzjamWi\nLNlxXUzaIf4zP56XtFMs+R/cJOl0kpnrmcq8RX6+fEH4JVLEWDUKsmTdPX1H+00ojxY07MmtbMlQ\nbE9+OtuxGz+OjUgnkzrMZ/vLNWUayVfvBZZkyLa6E8mW3hHbB9Wcs8oMpdDmOfPOYW/g6kLZXhyZ\n2H5J0SkdZHtgo0LxN9pev/L4V5Wr45IinPMAr2IonPl9wH2k0OMtbX+2g/zN6iJ0FaiueR5SuOmN\nJNt8CWdKOhZYOO/8diOdiEq5F7gqr73OLnUvUnTWqyQ9RPqsPlxj3v+q3J+H9HmXBkvMY7uhOLD9\nT5Xn58xLUhrVkP2SKLEGpwE/JflV9yT5ih4vlO31O9pXQnm05vMk88l3WzxnhocXtjvGucAqkq4i\nXTG8v+Y6zpO0nXNYXwm2LwOQ9F0PbxTzK0lTS47R41XdZ0iRYc+QQjZ/S/K5lNCLI7N5rQZ+mW36\n+xaILCBpedt/AsjO+objuOSq+nXAJhVTyDGkXeOmJPt4J7oKXbX9mepjSQuT2jQXYfs7+bN+ihQG\n+zXbF5bK0+Uu1fa9wFvzTmeOTn6tFvLTmoauknR9ofjTkt7QuAhUqiRRFILvLqPEKrzCKQR9n/xb\nvUxSaY5Nr9/RvhLKowW298h/60ZHVY9xo6Q3kX6QAu6y/VzNw+wD7C/pGeA5hiI7Ssww80taOf9I\nG6UbSk0SvVzVvd32V6iEFitlp7dNMMzsS3JkTgc+Scp+/mHhvGh40tgcJEdwqR34C8CVkv5I+pxX\nIoXKzg+URJ4tQvohN3aX85N8Ny/k/18nfmj7quqApCKndRNPU5ZwVuUPpO/VRZLmk7Rg6cm8sUuV\nNF+Jz6BBrwElTUEtc5ASIxcqnP6zpCi1v5D+10uSTEAl885D+o42m51K/ZCNc8DDkt5OirQqyjCn\n9+9oXwnrQ0vlAAAgAElEQVSHeRuyI+3tzOqwHnFLrg6lCkpto70iaRuSWeBe0hdtBWAP27/r8njX\n296g4HU32n5Dp7FBIOnEysPnSXk2x9t+rFD+5STTEyRlX+yAVErc+m/gUtLnvTnp5PgT4EDbbUvT\ndPu5Nfm45iBVFDjTdsluq+cgBaW8gx8BC9guzqzvNaCkKajleZLZ6+u2ryyUn4t0YQc1Luwk/Qy4\nkxRS/nVSqO0dtvcplH8HaUe6HHAUKQH3INtFze16+Y72m1AebZB0PunKtTksb0SfQNMJrBnXiZTS\nCBnKti8vlK9+0e60XXIFPNJV3ZG21xhBpJqdvgPJpttgCqlUy4iKR7MmZQ6jRuTP3rYP7/TaNsdY\ni1nLuZT6DlBK6Gy8zxtsd4zfzyffN5KuhqtrnwK8x/baHeSrpVueBx5wU6hvB/mb85qvq0Q9Ta9x\nEr+OZI49t050nXqItlIKtd24eadWIPdm278f6QKv5MKusV7liLSshK6wXepb65oR1v0kML30Aqmf\nhNmqPcuWnLiq9MEmWqV6tToP6UfeCIVtS/5Sf5KhePJLJR1beIVVrcPTuKrrVCakl+z0fiRlvqCU\nNNaV8si+kS1IyuN8YFvgSsodz5AuNB4m/a9WlbRqgaKfm2TuehnDfQZP0cFHlhXmgb2YV+kxSAG6\nzqzvOqDE9ouSjiaFGNfhTcDvGV7W5KXDUub0bvx+nsgXG48AdSICu0qOzHwc2Jj0HkT6vk4DVpL0\nddtFBTH7hu24jXADDiXVs+pG9hWkDPMb8z/4CJKzrJf1LAecVfjaH5LsoG/OtxNJdvVBf2Zz9Si/\nJEkBvRNYsqbs4cDRpCKWb2jcCmWnk3ZZt+THSwAX1pj7E/kY/yBl/f4b+H0N+RUq9+cAphTKXUyq\ngdbt5/1tUo2sO4GtgLOBg2vI/5y0c7qRFBr9X8AZBXJvAK4iKYyrSH6X19WY9zskX5z69d2t8X9e\nhKSI7gUeA/asIX8Z6SLwpsrYbYWyvyUpncbjJfLYoqXH6OctzFZtUKrXcyrpx1zLYa1UGPByhpch\n2ML2W3tYj4AZLijMJukWN5k8Wo2NINv19lg9VPNVKgD5NYaurN5EsmOf0Ek2y3edbd3w6SglOW5J\n2jHdYftVHUQb8tNJYbPX2l5HqfrrN213LNed5U8nhW6+QKqWOgU4wnbb8iySziFdgXdTiLKnbOss\nX82snyPLF2XW511OVwElSjk985N2xv+h3m+zZX03218vnb9bejTX3V797VfPB9XjjRZhtmrPYaRt\n4vTSH1OFpWxXQ1T/R1JRREcDSUcx3Bm6DukKr4QXJK1i+4/5WCtTXqivsT1unIy3oHx7XLuab4Uv\nAq9vnHiyWeNqoK3yUAp7PAL4qgsdpi2YqhTmejzpvf4TqFN47z+2/yMJSS93yhMa0UfUgm5L+PdS\niBInE9DJpMRMk07idXJr/koXJcFz1NKnSaHMBq6Q9AN3cABL2sTJ17F4p9e24enK/XlIZtOiOmS9\nRonRW3LkpZLOYyhy8f15bH6gUwn8/jPaW52JdCPtHOboUvYwYEfSiXMOkiP5OzWPsWvl9iFSHkGp\n7JtJZeQvzbf7gS0LZbveHgPT8t/pzWMF815NqonVeDw3cHWB3M357419+r+vSA0TSpY5m1RH7MD8\nvTkHOL+G/AyS2ednwJvy2C1tXn9x/ntoj+/17cCf83fksvyd2baG/MqkXJzHSSacc4CVC+TOJEVp\nbZlvx5NqanWSa3y/+vK/zsd6OamcTslrL8i/5YZ582XV73rh53UR8C9SsdIrqZgsO8iKZKo7PN/e\nzyib7aq32Hm0516SZr+A4aUI2oXqNkpkiBRB0zBbzUG6mv2vEURnwalC7dykiCkDdarUvgJYi3Qi\nfDdpJ1Ga4b6c7Wo2+mN57O+SOpkWalfzldRobnUPqez9OaT3uz1Q0r/kDkl3A0treL+TWhVPs7mu\ncSV8ZeHckCZ5T757YDafLQT8plSe+iX8l1IqafIuSWfA8DpsLqyqS0qE3dL2PQD5qvjXpJNkCacD\n3wMa739HUnjyhh3k1vJw8+slkkrKnD8n6ThgWbUoCeNCc10T85EaQpXQddmh/LtYz3ZXyZG2rZTo\n+6SHCocuQDKxjjqhPNpzX77NnW8dcaVERq9I2o50UnkpKUjSJ22X/LC/avtnSr3TtyQ5GI+h848a\netse70P6Me5NyizfkrRzakfjM2tkKzdoWxOqge2dJC1J2h0VVe9tRtL3gVUZqhv0SUlvtb1XofxL\nHfVI/6sVSZ9DUeav65fw/xqpJfKypF3usMNRVgUBYGZDcWTupd7JaD4PN2OeqrJ2yzdK2si5yrOk\nDUnRep14B8m/sjXDo/qK0fDQ8DlJ1R9K/R29Rol9iZSH83RHgSY0awfEZSjsgDgIwmE+QJTaRa7G\ncMdxUY5Glr8TeEfzVaELnLiVePRDSNvq00udatkR17gKhxQNc5YLvixVP8tEIn/Wr268x3yVOMP2\nqwvlbyZltK9ICvU9B3iN7U4d9arHeDuzZi63PalJ+qqH+9ZK52o48rciJZCeSTohfgD4kwvb50o6\nlBRhdkaW/yApGul/8/r/3vT6xom7kaT3p/x4BVIuUlGXPklrOzUBq42Gl+5/HnjUhU3ElOrdHUXa\n1d9GLjtku2iXKulbwF9JuVDVAIeSNg895eT0m9h5DIgcObQP6crwZlKBvmsovyKE3q4KH1IqeLcV\ncKhSwmCR4zpvj68kXTUbuL5EcWROkLQsKWLoCuBy2yW1nVBvFUd75R5Sb+lGX4bl8lgpXXXUayDp\nB6SdypakMOv3Ax1rNXWjODLVXIdHGeoT/ziFvcAzO+S/n2wa35H03WmOsus5pwdS47K6MpKm2H6K\nWX9DU5SqCj/lXJtsBPk5SJ9NL2WHGkEz1R1tq8+pFT3n5PST2HkMiF5CN/txVZjtoduQdh13K2U/\nv9YF5Ukk7UC6cryU9APZDPii7Z93ks3yc5Pe+xakk8oCtjvW75H0O9IV2X9RqTjq+pWFi9FQeY+F\n8pqvz483JCnNLQqPcx3wf6SaXu+0fZ8K+5hk+UbGcuPvAsAFtjer/66CVkg6z/Y7NHK/ngVI5Wz2\nb3OMUQ+Jrcz9bZLZeBdSAdJPA7c71ZIb/fWE8hgMGornvhnY0PYzkmbYfk2BbN9KnHSDUt/srZxz\nOvKO4CKX5YhsSlI2m5Gij24mlW/4SVvBJDvN9rqqNCNqfI49vJ1Oc76p3fPOVYoLjtNVR72K/HW2\nN5R0Lclk+DeS2WzVEvmxRD2WdRkvKGXs39bOVCnpOyQLwi9q7Mar8rOEKJNqiXUMO1aPOTn9JpRH\nG9RDKQFJZ5NyHD5LMlX9g5R93dEGrtQP47cubGHab5rtqPlLe0uJbVXS8yRH5iGkUNXiUtGSrrW9\nkaTfkpzHfwF+bnuVDnL9aIA1pkj6KsmW/mZS9BKkE8NXO8i95KiXtAWpNPwptkcl7l8jlHWxXbf9\nQN15X0EKi96Eoei4rw/6N6MekhOz/Jkks1kjCnNnYGHbHxjAcgdKKI82qMs+yy2O8yaSWeSCEvuo\npC+ToknmIpWfuIB6foeeyNvjtRnesezWEvORUqLdJqSaWuuTCkpe0+kkmGVbVRw90Hbbnh792j30\nQsUU0jx3iS0bSfMCnyLt2BpXpMd0uiLth6O+F7J5dm1SuY21lZLoTrW91YDn7XsFh9FATVniI41N\nBMJh3p75bF+v4UXfSqMyfmz7IzCsQdOPKWgVmU0dh0pakBSWuBvwA0l3kHIHfuvheRj95kHS1rxh\nbz/O9tklgrafkHQvSQEsS6p7NFfhvP+w/SQp9HFLSBnFBXMOXDkUUG28NQ/JP1XapwFSHbKZDIXr\n7kwqyrjDiBKJrhz1GsqtaYkL+tVn/u0Ugvq8Ulj4Y6T//UjzVlsFt5q36AqePlRw6Ib8Of8+f08b\nF0tb2P5l4SG6DVEed4TyaE8vpQSG+TayPXXdOpM7JRCdnW8Nu/q2pJPK1nWOVZNXkvI0biSVBvlt\nqWBWHHeSr5yBj9UwXR1FKpjXaWykubuuq9UrLcwl/6dUJ6tlHaUW9JI0txMpuKARQVWirBu5NWuQ\ndoiNfhLvpCDKq0Ktsi7OeVCSvkH6Lf2YZPr5ELBUjXl/J2lHUjAJpOi0Ot/TTYHVbJ+YfXoL2L6v\nQPSA6oVUvlg6AGirPJpClK+WNCxEuXTd4wqPUWr7RLjRupTAih1k9iNdQT5PyhCemW9/Aw6pOf8m\nwPz5/odJyWArjNJ7F0lBnUEKWf0msEqBXO1yLqTs9y+QymR8vnI7kDYlOloc50pSwtStpB/lgSQ7\neIns9CxXvV1BKgPRsRoylSq+pF3InjXXfiqwUeXxhiTfRSe5NUm7lZ3y45WAL9eY93JgwcrjBUnh\n1d18Z1aksKxLq8+m5uc1k2QSfS7/1l6s/Nae6iB7AKmkyh/y46WBqwrnvbXVd6dAboV2t8K5LyT5\nRxqPFyFZIWr/r/pxi51HG9xFn2XbhwCHSDrE9n49LuEYYG2l7mxfIMX/n8JQTP7AsG1Jj5D6FTxP\n+qL+XNKFtr/URu7FkZ5rQ9c9LZqY1/bFkmT7AVKpkNKr/wtIhSNPz493JOVdPAKcROseEFWq/e4b\nXQw7mZyqrMvQFSmknJO7GlesHqHEiu3bSbvExuP7SK0ESlmC4Vnwz+axYtRdWZenlYpANpILd2J4\nwcK2uLdKDu8hVSK+MR/rL9lEXMJUSYcxFNSwFwWZ7vn72CuLuRIIYfsfkop7ifSbUB4tGMke3PB9\nuMwe/BVJHwZWsv0NScuR7LR1TALP55P49sDRtn+k1O50oEjahxRL/leSwvqi7edy1NXdpES+vuHk\ns7hM0km2H1DNftgVatfVqvBWD2/5Ol25DWz+P7bFvTVkgpSTU5teHfWki5Hrc3QgpDpoxf2w1X1Z\nl51JpdyPIK3/qjzWab5XOVUsbmnKdFlNr2fz76phjp6/QKbBZ0hlYX5KWveFDE/4GyQvSlre9p/g\npUz5MYt4CuXRmn7Up/oeaSv9ZlKNp3/msTo5CzOVCrB9GNg8nxhLnc+9sCjw3uarJSfHaF8yhEdg\naaUilAsAxf2wK3RTV6vBnJI2aCh3SeuT6h5BeZBE7fIildd1e2Xak6Pe9sH5M28ER3zMdnFmPOn7\nXS3rcjKpQnCnee8nFb6sy+dJ9Z2+2+K50ppeZypVX1hYqV7UbiSfTUecalIV9YcfAF8BrsxRoI3k\n3T3GaC0RqjsoKlet1aYvRc2YKsdYknQ1doPtKyQtT4rsGHcJWP2I3lGX/bD7QVYWJ5AUl0gms0+Q\nToRvt31mG/ERy4vYHvhOscVaptkuDs7owXmMUgHNvRrKL18NH227rZlPvbVj7RlJW1FJtrN94WjM\n2ytKzbca/dKvdeqnMjZrCeUxMko1mo4iOa4hOVD3sf1ggex1pDDVG7ISWRz4nceotMGgyREnMEL0\nju2Oph8NZVl3pXBz7P8HGnZhpcKUZ9gujkyTtBCAcyhmDbkxKS/SZL6Zg7QT+VSNz+yALLOG7dUl\nLU3qq9ExRDrLX8ZQWRfy/ankSrMeIUFTPeZQqYdM7bEk+4cOJUU0ioIkwz6Z6vpOmK3acyLJgdrI\n/vxwHitJgDqSFGL7SkkHk65E/7tk0jax8LWyWUcT2wcBSLqc1Dd8Zn58IKk/RAl/VupRYUlzkcxQ\nRR3eMl07FJUKR76PFDH0sop/q7RUd+Ok9a98Av479UJPu6XZUX8f9Rz1vTiPoTwUuZmuc6gyp5Ai\nq47Kj3cmhf2OmKnd6+8qh9vvbfvwGuts5tuk2md1vtf9MNX1nVAe7VncdrXO1EmSPlsiaPu0HOnz\nFtKX892lX5geI0nGml6id/YkOVCXITm7f0c9Z2QvDsVzSFfL06g0/qrBr3K+w/+STsSm0I7eLdkH\n9gPbP+3hML04jyHtMhqJgquTGpeVVFLoJYcKusiL6fV3ZfsFpZyaXpTHozUVB7Ybfo1tm3dWeQc2\nJoTyaM/fcqRNI5JkJ1K+RkckLUrKtv1JZWyugh9VQ3ZEXFD7fwzpOnrHXfbDrtCLQ3FZ211FPGXu\nBF6wfZZSMucb6JA41iv5hP1FUuRPt3TtPM5cDmyWTYS/I5Xi/yCd/497AccBr5L0EGnH1NG0WaGn\nTO1sAnopvLhGkMBVko5m1n4cpaajqZJ+SvpuVLuTlvShv5pZE2ZbjY0K4fNoQ75yPYqUxGbSP2rv\nxpVtB9n7SWUa/kE6kS1Myhl4FNjd9oix4Rq5ZDSk7fXAM6Z7If8wG7b+y0t/mGrRVpS0G5hqu6ir\nYLcORaXWpke5sPdIC/mGr2NTUqTXd4Cv2S7p3Ng16qG5UOUYXTuPK4EhnyHl2Xy7pp+qdjvWLHcH\nQ82kIOfFkExfdpvWw5K+RjJvNU7Y7yb5eUoKnl7SYtgu7Dmj1hWz7TaVsnPgzDKkRNKdGTovTCHt\nPDs2hxsEoTwGhKTjSRVhf5sfv41kUz8ROGLQJ5WxpNvonXwCfxVD7W/fR7oifQVwr+2WJsN+OBSz\nyWPVPF+jlWzbk1CTfNedG3shX2g0M2oXGEp1tD5NMuV83PYMFXS3a/YxNcZLfUwa3g1wFtwm9FnS\nXcDaDROQUlHKm22vUTJ3L0iap65TX9KuwEdJgQ03MKQ8ZgInFe5a+k4ojzbkE9/uzPoF79hPo9UP\nqHJ1erPtdQrX8C5ShVqAS22fV7r+saCX6B2lXhabOHdzU+qUdgXJvDDdI1QelXSc7T16uSoc6WTU\n7iTUJH8eyU+zFcmM8G9SlFlxaHY3tDoZlZygKs5jMdwvVLfE+Oak5l1X2T5U0srAZ23v3UHuNwz5\nmF7q3me7lVO4r+TvyXsqUXkLk/pzlHxPWgYI1FB695CsD1fk25WlkX2S3mf7rJLXjgbh82jPOaR/\n8EVUvuCFPKxUWv2M/PiDwKM5YqOohEc2SawPnJaH9pH0RrfpdDYO6CV6ZxFSnkXjxzQ/sGh2VI7o\nxG44FN1FlrdGbk1alx1IWeLfcSqWtxQpFHXQdGUH72NQxkOuhOM6lfRpqzgyvfqYaiPpKJKifBKY\noRTabZLCL638UC2hMg+prW6xA9z2qkr5WpsBbwe+J+mJwovJZZUqF88k+aXeAOzrgu6ggyCUR3vm\nc/ctUHcmFWD7JcPLL8xJeSjldsA6zvWilLJ3bwLGs/LoJXrn28DNki4lXQFvDnwzH+OiTsLqLvb/\ndNIJYBqz+plMWW9pnMqp/KLy+GHqRQ/VomIHn1fS6xluB5+vxnFeah3QbqwN3fasv1rSa7v1MXVJ\nw6E+jVypOnNp6QGad0ZKnQXrVPNdlpQ3thmpD8oMUj2wEnazfYSkrUmm3I+QwpPHRHmE2aoNkv4H\nuNr2+T0cY36nkgbdyN5Kyij/e368KMl0VWSHHwsk/RewGulq7hBS9M7pto9qKzgkvxSwQX54g+2/\n1Jh7tunS1okmO3g1yqiWHbzh8K48fhmpcmxxcyJ10bO+Vx/TeCFHmd3gwnbBkl4kKdpvlgaBVGQb\nZu8jSOeBs0fDrzbiekJ5zEqTPXh+0pf7OWrYg5WS3X5I+iHVrtMkSaQri28AlzB0Jb6ve4vrHzjd\nRu/k9/whYGXbX8/b+yVdWExSPXZp0/DqsFe4vMHPmNGtHVypZtr+wLyklgOQ/l/PAsfbLqrfpC57\n1vfqY+oF9dD3RUN9OSBZERYnlf0/unDutUnfsc1JEWJ3A5fZ/lGB7Imk3eZKpF3LnCQlUqtPUL8I\n5TEg1Ic6TfmL+jaGiileb/uRvi92nCDpGHIxSduvzld1v7NdVExS0qmkukrV2P+9bO9SINtcHfaD\nwB/duTrsmCDpw7ZPlfQFWlfVLeoEqB5bB6i3nvVzkhJIq8EoHcPge0XSlSST8uGk8jkfI4ULd8yW\nb1J6z5OS/upkxqNUuqahdD8MYLtt9FiWmwNYhxR5+IRSH/dlbJeUwO874fNog1Lb2MtJV1K1u33Z\n/rOGl1+o63S/keRYPLfjK8eYPkXvbOhcTBJeKi8yd41ldNUTI9NVddgxpOFLKi05PxIbNA9Iutj2\nWwrlF2OoZ/3e2SzTsWe9Ul7IAaTIo0YAiYHRMFt13ffFqWXA2lTymCjrXwKApKnAy0lBDVcAm9fY\nbZm0W3oH8HXSdyAyzMcpJ5C+JEcplVK4ieQQPKJAttc6TZC6yX1I0gOkKI9xaxfuU/TOc/lqtHEC\nX5zCyLRML9E795CUTeOHvFweG5fYPjb/Pagb+RxcMD+wWN7hVR3uy9RYR7c96/chhXMXVWzoM133\nfVHqdbM7Q8ERp+VQ8SKfHqnEyOO1V5z4PkNtHr5O8m+dRb02D/3DY9TCcKLcSHbFjUjtZR8A7iyU\nW4wUYvsoqUzJqRS0M206RtctK8fw8/pxydgIsh8iVeN9EDiYlDH8gZrzrw38v3xbu4bcZSTb/6X5\n9nQeO5dkehzzz3aEdZ/MrK1JTyiQ24chZ/V9ldstwP+rMf+9wPkk/8mmwNyFcpcALxujz2x9krJY\nlpS0exZp11sieyu5NXR+PD8tWtO2kV+I1E56ar59F1ioUPbG/Pemylhx695+32Ln0QZJF5O+HNeQ\ntpjr236sVNx2L3Wa8Cg4DwfAa6oPcvROkUPPPRSTzHM1XxWeWuOqsNvqsGPN6zxrJeGO0TdOu+cj\nJH2m8PMZiVXdXevhe4FLJf2a4TWeinw1PbKi7RtIDdo+BiDpA8B1BbJiuPn5BWhZRmgkTgBuYyhc\n/yMkBfbeAtled+Z9JZRHe24lnfjWIiUWPSHpGtv/LpC9Sqm+1U+Bs6o/8NmRavSOpKcaw+ToncJj\nHEnqv/G9ji9uzcdJV5BP5+MdSlL8HU+Oti/LuRMbkH6cN3hiBCfMIWkR2/+Al8K56/yuj5W0N5Uq\nBqQeGx0LeGZWzYEOdZs6/Snf5s630WQ/hkrgtBtrxYnAdUqFP0XqhtgxUqrCKrbfV3l8kKSbC2W7\nbvMwCCLaqgClDOmPksowLGn75YVyGwA7kgqv3U46MZ7aXmpi00v0Ts5d+CCp4N3ZpM+rTqXU6aTd\nYaNm0TwkJdC2zlJ+7SdIu4/fk04KbyKFYJ5Q+42MIpJ2ISntxonvA8DBtn9cKP9Dko+iUfn4I6Tq\nwJ8olO+pqdNoImlbUuLtDgyvRDwFWNP2LMEDIxynUZEXUjBNcdteSdcAX7R9ZX68CakqwcaF8q9i\naGd+cZ2deb8J5dGG7EzbjLT7uJ9cj8b272seZzGSnfNDtufs9PqJTKtInZrRO42r5/eRFO/ytlcr\nlPs8qWd5tRz8Sbb/r0D2LuCNzg7cHAZ5tUehWF6vKJWAb9Rl+r3ttn0tmmRnqYDbaqyN/A2219fw\n7o8da7dlk8uXmLXn+8AaG+UoqXVIzuaqmXImcElj99bhGKsAD9p+RtKWwGuBU0otC3kNp5B8H5Cq\nbu/qwnDbHNywHMPDm6OT4DhkHtJJf5rrx3JPIdV52hFYhXRCK7qymYj0K3onsyqpuu4K1KsbdJhS\naZPGVeHHalwV/o3h9a1mUti7ZazJyqJYYTTxgqRVbP8RQKmwYZ2Q8m6bOp1Guvp/B6kJ2K5At1FI\nRdi+BbhF0umkc9/ytu+qeZizgPUkrQr8gBRQcTppR9OWHOG1hu218/kBp7pqRUj6BskC8keGwuHH\nrJNg7DwGhFKp7F8CZ9q+ZqzXM2iys/qzwNJAtaTIU6SM5Y4ZuJK+TVK4fyQVlPxljSu6OYEZ7rK3\ngaRTSFeR55B+kNuTfF63wqg5ckcdSW8h2fHvJSn8FUhKt1WF4lbyK5OaOr2RdBV9H2mH3TbYQ9I0\n2+sql9zIYze4MCG0FyS9k9RvZW7bK0lah2SibNlvvUm20b/kS6QOikepRokQSVNtr9fluu8CXusa\niZiDJHYeg2NlTyLN3KfonT8CG7uwgVPT/C9IukuVNrRdzP3HyuNG3aGJ3BK4I07JcquR/EwAd9mu\n04b3IZLyuQRYlHSxsCvJNNSOhkP+YUlvJ11wtK2H1UcOJFkBLgWwfbOklQpln1NqRbsLKTsdyvJa\nGlykVP+tm+Zdt5FKwJRGfA6U2HkEfUUpI3xPuo/e6WXuy0nl4K9n+A+z4xVl5RgLZJl/9n2B4xB1\nV4m4Kv8b4AlSNYTivhyS3pHnWo4UDTcFOMijUE1B0rW2N2ry07y0A+oguybp+32N7Z9kpbOD7UML\n5+66eZek9UgXNbcxPLy5+PvdT0J5BH2l1+idHud+U6tx25cVyK5FKm/duPr9K7CL7fFcoqRn1GMl\n4vEaWdUOST8CLgb2JQVm7A3MZXvPNjLHARcAF7lmy9x+IWkGcCwwnUp+R8n3eyB4lLIRJ9ONlJX+\nubFexxi991kyXluNDWjuQ0vGRpC9Gtiy8ngLUrTVmH+mA/7Mbi8ZayN/HMkOX3ferjLj+/Se5yNV\nMLiBlOV9MDBPB5kNSeauK0iK58vUqGBQOc48wOdJiaxnkfyEbeeuyN4w1t+X6i12HgNC0vUujBuf\nnZB0I6mkSDV65+eu9IxoI9tTYyI19abIY6XmiJ5CVicq6qEScX59V305WjmZ6ziex5Icxv02YFtS\nIccbgd/YPrNAtuudnqTDSJ/xuQw3W0Wo7mzGVZKOZlbH2Jj8o0eRLwKXKBXLeyl6p1C2ubTJnBSU\nNpH0KZLdfmWlBloNFiR1cCzhXklfJZmuIJXKvrdQdsKhob4UczFUidik/1edCtLbdrmEXjPju0bS\nr5i1jP2TpF3Ise7g73HKBfpJviFpXcqLcq7l4f1lLskKuISGYt2ouhwiVHf2QlKrUEd7gElQ4wVJ\nL6dG9I5mbUzUyBF5FjjOHTLWJS1EMnscQrJjN5jpsiiWRvLVQVQyh4EDXZA4NhHRCM2YGnjAddV6\nzYzvce4jSE2cqr1bniKdiKe02unmBNQRcXn/lJ52euOJUB5BX+klekc9NiYKJha9ZMb3OO8s+SSV\nTKO9Co0AAAhISURBVPkZtl/TQuaAfHcNUlXeRlTYO0lN2j7cYc7qTm8NUl2vl3Z6Luh2mS+SDmAo\nkvEyUn7Kk51kB0EojwEhaQngm8DStrfNP5SNXdBuciLTh+idd1EJ87V9Xv9X2XLe1Um1y1ZkeOmH\n2X6nONmQdAewtXM+kFK74986da9s63fJ4eBvd464Uqp792vbm48kk1/X805P0lmkMN1qJOPatksq\n8vad8HkMjpNIyVNfyY//QPJ/zNbKgx5supIOISVvnZaH9pH0Rtv793uRLfgZqdzED6nf8TGYWHwB\nuFLSH0km0pWAT0uan6ET80gsQTKnNng2j7WlT2bAXiry9p1QHoNjMdtnZns+tp+XNBlOSjdK2qjJ\npltaGfftwDrO/SGUWsHeRLKND5rnbR8zCvMEY4zt83NWfaOUzV0Vs2qnIpqnANcrlWSHVHyzk8Lp\nF/+WtKmHV+QtaQ8xEEJ5DI6nc0hfo2DcRqSIjtmSPkbvLAw0nNwLtXthP8hRPgC/kvRpUgHLahhk\nkcM9mDhImo+Ua7GC7d0lrSZpjRITqe2DJV3AUA/zOsU3e+VTwMnZ9yHS7+SjozT3LITPY0Ao1fw/\nitRI6jZSdMf7XVh6eaLRJ5vuTsC3SHWSRPJ97Gv7p20FeyCXizC07AZnF5SNCCYWkn4KTCNVEFgr\nK5Or3aGMfEV+U2A12ycqlZZfwHarsiMDQV1U5B3IOkJ5DA6lFqxrkE5Md3kU6jtNdCQtRYpmgRTF\nMhG6+QUTCOXKtk21rYoSQnPU1Xqk0uqrS1oa+JntTQa43r6ECfebMFsNlg0Yit55gyRsnzK2Sxr3\nrM9QtJWBX43GpEo9rH9je6ak/wbeAHxjFE0SwejxrKR5GTIpr0LFVNmB95CS9W4EsP2XHHE1SBrH\nbxkmPOC5RySUx4CQ9GNSE6ibGYreMcnhFrRA0rdIP45GtNXekjYepWirr9r+WTZJvBX4X1L01Yaj\nMHcwuhwI/AZYTtJpwCaU+w6etW1JDcUz/0BWWMH2QXmuy4E3VMKEDwR+Pej5RyKUx+BYj9QXOeyC\n5WzH2EVbNRT820lZ7b+W9D+jMG8wytj+naRppDIfAvZxeQ+ZMyUdCywsaXdgN+D4AS21ma7ChAdF\nKI/BcRuwJGUtOYMhRjXaqsJD+aSwFXBoLrEyxyjOH4wSubbV6cC5tp/u9Poqtr8jaStSOZPVga/Z\nvnAAy2xFqzDhk0Zp7lkIh3mfqRRdWxBYh2STHPPGLROBsYi2qsw9H6m43XTbd2fH/Wtt/27Qcwej\ni1Lflw+Sdpk3kFoen1dSQifLL0nyZ5pUJn3UgjpyFGcjTPjysfTJhfLoMxqhIVEDj1XjlnGOJAHL\nAs8T0VbBKJCrNr8Z2B3YxvaUAplPAF8Dfk+6wHkTqb7UCYNc63gklMeAkHSo7S93GguGkDTd9mvH\neh3B7E+OtnonaQfyBtLO4zMFcncBb8xl2Ru9Pa62vUZ7ydmPsOkOjq1ajHXb+2CycKOk9Tu/LAi6\nJxfvvIO06ziaVDOqo+LI/I1U+LPBzDw26YidR59RpTER8MfKUwsCV3Uq3TyZkXQnqSvdA6QGWkVd\n6YKgBElL57yMrUm9yGvXmpN0CvBa4BySz2N74NZ8G7OEvbEgoq36z+nABfTQmGgSs/VYLyCYrflh\nrmV2KfCMpCttP1/zGH9k+EXhOfnvoBMFxx2x8xggktZmKDLiCtu3jOV6gmCyk5uVbUEyIW9Casr0\nG1J1gT/VOM4U0q54ZscXz6aE8hgQkvYG9gB+kYfeQ0o+O2rsVhUEQRVJK5EUyTbAkrY36PD69Uh9\neho7jSeB3WxPG+hCxyGhPAaEpFtJnQOfzo/nB64J+30QjC0jREJ+G/hv28+OINZ43a2knuNX5Meb\nAt+fjL/riLYaHGJ4R7oXaF32OwiC0aVVJOQ2nRRH5oWG4gDIjZnq+k1mC8JhPjhOBK5rKiUwu7eg\nDYJxSzUSMu8gGiwIXF14mMtyGZufkKKtPghcmjO/sX1jH5c8rgmz1QDJX6hN88Mrorx3EIwduQPf\nIvQQCSnpkjZP2/abe1jihCKURxAEkwJJU2w/VWk9PIwIpa9HKI8gCCYFks6z/Y4RWg+3bTks6cO2\nTx2pq99kSg5sED6PIAgmBbbfkf+u1IV4o+nTpEsGHInYeQRBMKmQtHmrcduXj/ZaJjKhPIIgmFTk\nnjsN5iH15phW4uyWtDiphPuKVCw3tnfr8zLHPWG2CoJgUmH7ndXHkpYD/q9Q/BzgCuAihudxTTpi\n5xEEwaQmNyKbYXvNgtfebHudUVjWuCd2HkEQTCokHUWKtoJUZWMdoDS57zxJ29k+fyCLm0DEziMI\ngkmFpF0rD58H7rd9VQeZmQyF984PPAM8x1DPmY4tbGc3YucRBMGkIfctf5vtD9WRsx0huk1EYcQg\nCCYNuXvgCpLm7kZe0o8l7S7pVX1e2oQjzFZBEEwqcivZVwPnktodA2VZ4pK2JDV42wxYBbgJuNz2\nEYNZ7fgllEcQBJMKSQe0Grd9UKH8nMD6wJbAnsC/bU+6nUj4PIIgmBRI+rHtjwBPdLtTkHQxyWF+\nDSnfY33bj/VxmROG8HkEQTBZWFfS0sBukhaRtGj1VniMW4FngbWA1wFrSZp3UAsez4TZKgiCSYGk\nvYFPASsDD/3/9u4QB4EYCMPoX4XiNFwIS4Lcu3AbDJo7cJNBNDgEQ9Zs9j3fZtyXtEmbxqu6X/Y6\nJjknWTL/Pj+sOOomiAewK2OMW1Vd/lx7zbwsPyV5ZR5dParqvt6E2yAeAD8aYyyZwXhW1S7/Lv8Q\nDwDaXJgD0CYeALSJBwBt4gFA2xvlkg7zl/yPqQAAAABJRU5ErkJggg==\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "objects = (list(item_summary_df['item_name'].head(n=20)))\n", "y_pos = np.arange(len(objects))\n", "performance = list(item_summary_df['item_count'].head(n=20))\n", " \n", "plt.bar(y_pos, performance, align='center', alpha=0.5)\n", "plt.xticks(y_pos, objects, rotation='vertical')\n", "plt.ylabel('Item count')\n", "plt.title('Item sales distribution')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Analyze items contributing to top sales" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
item_nameitem_countitem_perctotal_perc
0whole milk25130.0579470.057947
1other vegetables19030.0438810.101829
2rolls/buns18090.0417140.143542
3soda17150.0395460.183089
4yogurt13720.0316370.214725
5bottled water10870.0250650.239791
6root vegetables10720.0247190.264510
7tropical fruit10320.0237970.288307
8shopping bags9690.0223440.310651
9sausage9240.0213070.331957
\n", "
" ], "text/plain": [ " item_name item_count item_perc total_perc\n", "0 whole milk 2513 0.057947 0.057947\n", "1 other vegetables 1903 0.043881 0.101829\n", "2 rolls/buns 1809 0.041714 0.143542\n", "3 soda 1715 0.039546 0.183089\n", "4 yogurt 1372 0.031637 0.214725\n", "5 bottled water 1087 0.025065 0.239791\n", "6 root vegetables 1072 0.024719 0.264510\n", "7 tropical fruit 1032 0.023797 0.288307\n", "8 shopping bags 969 0.022344 0.310651\n", "9 sausage 924 0.021307 0.331957" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "item_summary_df['item_perc'] = item_summary_df['item_count']/total_item_count\n", "item_summary_df['total_perc'] = item_summary_df.item_perc.cumsum()\n", "item_summary_df.head(10)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Analyze items contributing to top 50% of sales" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(19, 4)" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "item_summary_df[item_summary_df.total_perc <= 0.5].shape" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
item_nameitem_countitem_perctotal_perc
0whole milk25130.0579470.057947
1other vegetables19030.0438810.101829
2rolls/buns18090.0417140.143542
3soda17150.0395460.183089
4yogurt13720.0316370.214725
5bottled water10870.0250650.239791
6root vegetables10720.0247190.264510
7tropical fruit10320.0237970.288307
8shopping bags9690.0223440.310651
9sausage9240.0213070.331957
10pastry8750.0201770.352134
11citrus fruit8140.0187700.370904
12bottled beer7920.0182630.389167
13newspapers7850.0181010.407268
14canned beer7640.0176170.424885
15pip fruit7440.0171560.442041
16fruit/vegetable juice7110.0163950.458436
17whipped/sour cream7050.0162570.474693
18brown bread6380.0147120.489404
\n", "
" ], "text/plain": [ " item_name item_count item_perc total_perc\n", "0 whole milk 2513 0.057947 0.057947\n", "1 other vegetables 1903 0.043881 0.101829\n", "2 rolls/buns 1809 0.041714 0.143542\n", "3 soda 1715 0.039546 0.183089\n", "4 yogurt 1372 0.031637 0.214725\n", "5 bottled water 1087 0.025065 0.239791\n", "6 root vegetables 1072 0.024719 0.264510\n", "7 tropical fruit 1032 0.023797 0.288307\n", "8 shopping bags 969 0.022344 0.310651\n", "9 sausage 924 0.021307 0.331957\n", "10 pastry 875 0.020177 0.352134\n", "11 citrus fruit 814 0.018770 0.370904\n", "12 bottled beer 792 0.018263 0.389167\n", "13 newspapers 785 0.018101 0.407268\n", "14 canned beer 764 0.017617 0.424885\n", "15 pip fruit 744 0.017156 0.442041\n", "16 fruit/vegetable juice 711 0.016395 0.458436\n", "17 whipped/sour cream 705 0.016257 0.474693\n", "18 brown bread 638 0.014712 0.489404" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "item_summary_df[item_summary_df.total_perc <= 0.5]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Construct Orange Table " ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "input_assoc_rules = grocery_df\n", "domain_grocery = Domain([DiscreteVariable.make(name=item,values=['0', '1']) for item in input_assoc_rules.columns])\n", "data_gro_1 = Orange.data.Table.from_numpy(domain=domain_grocery, X=input_assoc_rules.as_matrix(),Y= None)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Prune Dataset for frequently purchased items" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def prune_dataset(input_df, length_trans = 2, total_sales_perc = 0.5, start_item = None, end_item = None):\n", " if 'total_items' in input_df.columns:\n", " del(input_df['total_items'])\n", " item_count = input_df.sum().sort_values(ascending = False).reset_index()\n", " total_items = sum(input_df.sum().sort_values(ascending = False))\n", " item_count.rename(columns={item_count.columns[0]:'item_name',item_count.columns[1]:'item_count'}, inplace=True)\n", " if not start_item and not end_item: \n", " item_count['item_perc'] = item_count['item_count']/total_items\n", " item_count['total_perc'] = item_count.item_perc.cumsum()\n", " selected_items = list(item_count[item_count.total_perc < total_sales_perc].item_name)\n", " input_df['total_items'] = input_df[selected_items].sum(axis = 1)\n", " input_df = input_df[input_df.total_items >= length_trans]\n", " del(input_df['total_items'])\n", " return input_df[selected_items], item_count[item_count.total_perc < total_sales_perc]\n", " elif end_item > start_item:\n", " selected_items = list(item_count[start_item:end_item].item_name)\n", " input_df['total_items'] = input_df[selected_items].sum(axis = 1)\n", " input_df = input_df[input_df.total_items >= length_trans]\n", " del(input_df['total_items'])\n", " return input_df[selected_items],item_count[start_item:end_item]" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(4585, 13)\n", "['whole milk', 'other vegetables', 'rolls/buns', 'soda', 'yogurt', 'bottled water', 'root vegetables', 'tropical fruit', 'shopping bags', 'sausage', 'pastry', 'citrus fruit', 'bottled beer']\n" ] } ], "source": [ "output_df, item_counts = prune_dataset(input_df=grocery_df, length_trans=2,total_sales_perc=0.4)\n", "print(output_df.shape)\n", "print(list(output_df.columns))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Association Rule Mining with FP Growth" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [], "source": [ "input_assoc_rules = output_df\n", "domain_grocery = Domain([DiscreteVariable.make(name=item,values=['0', '1']) for item in input_assoc_rules.columns])\n", "data_gro_1 = Orange.data.Table.from_numpy(domain=domain_grocery, X=input_assoc_rules.as_matrix(),Y= None)\n", "data_gro_1_en, mapping = OneHot.encode(data_gro_1, include_class=False)" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "num of required transactions = 45\n" ] } ], "source": [ "min_support = 0.01\n", "print(\"num of required transactions = \", int(input_assoc_rules.shape[0]*min_support))\n", "num_trans = input_assoc_rules.shape[0]*min_support\n", "itemsets = dict(frequent_itemsets(data_gro_1_en, min_support=min_support))" ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "166886" ] }, "execution_count": 39, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(itemsets)" ] }, { "cell_type": "code", "execution_count": 59, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Raw rules data frame of 16628 rules generated\n" ] } ], "source": [ "confidence = 0.3\n", "rules_df = pd.DataFrame()\n", "\n", "if len(itemsets) < 1000000: \n", " rules = [(P, Q, supp, conf)\n", " for P, Q, supp, conf in association_rules(itemsets, confidence)\n", " if len(Q) == 1 ]\n", "\n", " names = {item: '{}={}'.format(var.name, val)\n", " for item, var, val in OneHot.decode(mapping, data_gro_1, mapping)}\n", " \n", " eligible_ante = [v for k,v in names.items() if v.endswith(\"1\")]\n", " \n", " N = input_assoc_rules.shape[0]\n", " \n", " rule_stats = list(rules_stats(rules, itemsets, N))\n", " \n", " rule_list_df = []\n", " for ex_rule_frm_rule_stat in rule_stats:\n", " ante = ex_rule_frm_rule_stat[0] \n", " cons = ex_rule_frm_rule_stat[1]\n", " named_cons = names[next(iter(cons))]\n", " if named_cons in eligible_ante:\n", " rule_lhs = [names[i][:-2] for i in ante if names[i] in eligible_ante]\n", " ante_rule = ', '.join(rule_lhs)\n", " if ante_rule and len(rule_lhs)>1 :\n", " rule_dict = {'support' : ex_rule_frm_rule_stat[2],\n", " 'confidence' : ex_rule_frm_rule_stat[3],\n", " 'coverage' : ex_rule_frm_rule_stat[4],\n", " 'strength' : ex_rule_frm_rule_stat[5],\n", " 'lift' : ex_rule_frm_rule_stat[6],\n", " 'leverage' : ex_rule_frm_rule_stat[7],\n", " 'antecedent': ante_rule,\n", " 'consequent':named_cons[:-2] }\n", " rule_list_df.append(rule_dict)\n", " rules_df = pd.DataFrame(rule_list_df)\n", " print(\"Raw rules data frame of {} rules generated\".format(rules_df.shape[0]))\n", " if not rules_df.empty:\n", " pruned_rules_df = rules_df.groupby(['antecedent','consequent']).max().reset_index()\n", " else:\n", " print(\"Unable to generate any rule\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Sorting rules in our Grocery Dataset" ] }, { "cell_type": "code", "execution_count": 60, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
consequentantecedentsupportconfidencelift
4root vegetablesyogurt, whole milk, tropical fruit2280.4636362.230611
5sausageshopping bags, rolls/buns590.3931622.201037
8tropical fruityogurt, root vegetables, whole milk920.4299072.156588
1citrus fruitwhole milk, other vegetables, tropical fruit660.3333332.125637
10yogurtwhole milk, tropical fruit1990.4842111.891061
2other vegetablesyogurt, whole milk, tropical fruit2280.6438361.826724
6shopping bagssausage, soda500.3048781.782992
0bottled wateryogurt, soda590.3333331.707635
9whole milkyogurt, tropical fruit2280.7540981.703222
3rolls/bunsyogurt, whole milk, tropical fruit970.5222221.679095
7sodayogurt, sausage950.3906251.398139
\n", "
" ], "text/plain": [ " consequent antecedent support \\\n", "4 root vegetables yogurt, whole milk, tropical fruit 228 \n", "5 sausage shopping bags, rolls/buns 59 \n", "8 tropical fruit yogurt, root vegetables, whole milk 92 \n", "1 citrus fruit whole milk, other vegetables, tropical fruit 66 \n", "10 yogurt whole milk, tropical fruit 199 \n", "2 other vegetables yogurt, whole milk, tropical fruit 228 \n", "6 shopping bags sausage, soda 50 \n", "0 bottled water yogurt, soda 59 \n", "9 whole milk yogurt, tropical fruit 228 \n", "3 rolls/buns yogurt, whole milk, tropical fruit 97 \n", "7 soda yogurt, sausage 95 \n", "\n", " confidence lift \n", "4 0.463636 2.230611 \n", "5 0.393162 2.201037 \n", "8 0.429907 2.156588 \n", "1 0.333333 2.125637 \n", "10 0.484211 1.891061 \n", "2 0.643836 1.826724 \n", "6 0.304878 1.782992 \n", "0 0.333333 1.707635 \n", "9 0.754098 1.703222 \n", "3 0.522222 1.679095 \n", "7 0.390625 1.398139 " ] }, "execution_count": 60, "metadata": {}, "output_type": "execute_result" } ], "source": [ "(pruned_rules_df[['antecedent','consequent',\n", " 'support','confidence','lift']].groupby('consequent')\n", " .max()\n", " .reset_index()\n", " .sort_values(['lift', 'support','confidence'],\n", " ascending=False))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Association rule mining on our Online Retail dataset" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Load and Filter Dataset" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": true }, "outputs": [], "source": [ "cs_mba = pd.read_excel(io=r'Online Retail.xlsx')\n", "cs_mba_uk = cs_mba[cs_mba.Country == 'United Kingdom']" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
InvoiceNoStockCodeDescriptionQuantityInvoiceDateUnitPriceCustomerIDCountry
053636585123AWHITE HANGING HEART T-LIGHT HOLDER62010-12-01 08:26:002.5517850.0United Kingdom
153636571053WHITE METAL LANTERN62010-12-01 08:26:003.3917850.0United Kingdom
253636584406BCREAM CUPID HEARTS COAT HANGER82010-12-01 08:26:002.7517850.0United Kingdom
353636584029GKNITTED UNION FLAG HOT WATER BOTTLE62010-12-01 08:26:003.3917850.0United Kingdom
453636584029ERED WOOLLY HOTTIE WHITE HEART.62010-12-01 08:26:003.3917850.0United Kingdom
\n", "
" ], "text/plain": [ " InvoiceNo StockCode Description Quantity \\\n", "0 536365 85123A WHITE HANGING HEART T-LIGHT HOLDER 6 \n", "1 536365 71053 WHITE METAL LANTERN 6 \n", "2 536365 84406B CREAM CUPID HEARTS COAT HANGER 8 \n", "3 536365 84029G KNITTED UNION FLAG HOT WATER BOTTLE 6 \n", "4 536365 84029E RED WOOLLY HOTTIE WHITE HEART. 6 \n", "\n", " InvoiceDate UnitPrice CustomerID Country \n", "0 2010-12-01 08:26:00 2.55 17850.0 United Kingdom \n", "1 2010-12-01 08:26:00 3.39 17850.0 United Kingdom \n", "2 2010-12-01 08:26:00 2.75 17850.0 United Kingdom \n", "3 2010-12-01 08:26:00 3.39 17850.0 United Kingdom \n", "4 2010-12-01 08:26:00 3.39 17850.0 United Kingdom " ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cs_mba_uk.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Remove returned item as we are only interested in the buying patterns" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "cs_mba_uk = cs_mba_uk[~(cs_mba_uk.InvoiceNo.str.contains(\"C\") == True)]\n", "cs_mba_uk = cs_mba_uk[~cs_mba_uk.Quantity<0]" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(486286, 8)" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cs_mba_uk.shape" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(18786,)" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cs_mba_uk.InvoiceNo.value_counts().shape" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Build Transaction Dataset" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": true }, "outputs": [], "source": [ "items = list(cs_mba_uk.Description.unique())\n", "grouped = cs_mba_uk.groupby('InvoiceNo')\n", "transaction_level_df_uk = grouped.aggregate(lambda x: tuple(x)).reset_index()[['InvoiceNo','Description']]" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": true }, "outputs": [], "source": [ "transaction_dict = {item:0 for item in items}\n", "output_dict = dict()\n", "temp = dict()\n", "for rec in transaction_level_df_uk.to_dict('records'):\n", " invoice_num = rec['InvoiceNo']\n", " items_list = rec['Description']\n", " transaction_dict = {item:0 for item in items}\n", " transaction_dict.update({item:1 for item in items if item in items_list})\n", " temp.update({invoice_num:transaction_dict})\n", "\n", "new = [v for k,v in temp.items()]\n", "tranasction_df = pd.DataFrame(new)\n", "del(tranasction_df[tranasction_df.columns[0]])" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(18786, 4058)" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tranasction_df.shape" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
METAL SIGN HIS DINNER IS SERVEDJAM JAR WITH GREEN LIDSANDWICH BATH SPONGEKIDS RAIN MAC BLUEEMPIRE TISSUE BOXS/2 BEACH HUT TREASURE CHESTSEMBROIDERED RIBBON REEL DAISYGOLD/AMBER DROP EARRINGS W LEAFANTIQUE GLASS PEDESTAL BOWLCUPCAKE LACE PAPER SET 6...RETROSPOT CANDLE MEDIUMLARGE WHITE/PINK ROSE ART FLOWERDOORMAT UNION JACK GUNS AND ROSESGLASS BEAD HOOP NECKLACE BLACKMEDIUM PARLOUR FRAMERED 3 PIECE RETROSPOT CUTLERY SETFRENCH WC SIGN BLUE METALDOORMAT MERRY CHRISTMAS REDSMALL POPCORN HOLDERPINK MURANO TWIST NECKLACE
00000000000...0000000000
10000000000...0000000000
20100000000...0000000000
30000000000...0000000000
40000000000...0000000000
\n", "

5 rows × 4058 columns

\n", "
" ], "text/plain": [ " METAL SIGN HIS DINNER IS SERVED JAM JAR WITH GREEN LID \\\n", "0 0 0 \n", "1 0 0 \n", "2 0 1 \n", "3 0 0 \n", "4 0 0 \n", "\n", " SANDWICH BATH SPONGE KIDS RAIN MAC BLUE EMPIRE TISSUE BOX \\\n", "0 0 0 0 \n", "1 0 0 0 \n", "2 0 0 0 \n", "3 0 0 0 \n", "4 0 0 0 \n", "\n", " S/2 BEACH HUT TREASURE CHESTS EMBROIDERED RIBBON REEL DAISY \\\n", "0 0 0 \n", "1 0 0 \n", "2 0 0 \n", "3 0 0 \n", "4 0 0 \n", "\n", " GOLD/AMBER DROP EARRINGS W LEAF ANTIQUE GLASS PEDESTAL BOWL \\\n", "0 0 0 \n", "1 0 0 \n", "2 0 0 \n", "3 0 0 \n", "4 0 0 \n", "\n", " CUPCAKE LACE PAPER SET 6 ... \\\n", "0 0 ... \n", "1 0 ... \n", "2 0 ... \n", "3 0 ... \n", "4 0 ... \n", "\n", " RETROSPOT CANDLE MEDIUM LARGE WHITE/PINK ROSE ART FLOWER \\\n", "0 0 0 \n", "1 0 0 \n", "2 0 0 \n", "3 0 0 \n", "4 0 0 \n", "\n", " DOORMAT UNION JACK GUNS AND ROSES GLASS BEAD HOOP NECKLACE BLACK \\\n", "0 0 0 \n", "1 0 0 \n", "2 0 0 \n", "3 0 0 \n", "4 0 0 \n", "\n", " MEDIUM PARLOUR FRAME RED 3 PIECE RETROSPOT CUTLERY SET \\\n", "0 0 0 \n", "1 0 0 \n", "2 0 0 \n", "3 0 0 \n", "4 0 0 \n", "\n", " FRENCH WC SIGN BLUE METAL DOORMAT MERRY CHRISTMAS RED \\\n", "0 0 0 \n", "1 0 0 \n", "2 0 0 \n", "3 0 0 \n", "4 0 0 \n", "\n", " SMALL POPCORN HOLDER PINK MURANO TWIST NECKLACE \n", "0 0 0 \n", "1 0 0 \n", "2 0 0 \n", "3 0 0 \n", "4 0 0 \n", "\n", "[5 rows x 4058 columns]" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tranasction_df.head()" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(4961, 15)\n" ] } ], "source": [ "output_df_uk_n, item_counts_n = prune_dataset(input_df=tranasction_df, length_trans=2, start_item=0, end_item=15)\n", "print(output_df_uk_n.shape)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
WHITE HANGING HEART T-LIGHT HOLDERJUMBO BAG RED RETROSPOTREGENCY CAKESTAND 3 TIERPARTY BUNTINGLUNCH BAG RED RETROSPOTASSORTED COLOUR BIRD ORNAMENTSET OF 3 CAKE TINS PANTRY DESIGNNATURAL SLATE HEART CHALKBOARDLUNCH BAG BLACK SKULL.HEART OF WICKER SMALLJUMBO BAG PINK POLKADOTJUMBO SHOPPER VINTAGE RED PAISLEYJUMBO STORAGE BAG SUKIPACK OF 72 RETROSPOT CAKE CASESPAPER CHAIN KIT 50'S CHRISTMAS
3101010100000000
5000001000110000
8000101100000000
16011100000111110
18001100000000000
\n", "
" ], "text/plain": [ " WHITE HANGING HEART T-LIGHT HOLDER JUMBO BAG RED RETROSPOT \\\n", "3 1 0 \n", "5 0 0 \n", "8 0 0 \n", "16 0 1 \n", "18 0 0 \n", "\n", " REGENCY CAKESTAND 3 TIER PARTY BUNTING LUNCH BAG RED RETROSPOT \\\n", "3 1 0 1 \n", "5 0 0 0 \n", "8 0 1 0 \n", "16 1 1 0 \n", "18 1 1 0 \n", "\n", " ASSORTED COLOUR BIRD ORNAMENT SET OF 3 CAKE TINS PANTRY DESIGN \\\n", "3 0 1 \n", "5 1 0 \n", "8 1 1 \n", "16 0 0 \n", "18 0 0 \n", "\n", " NATURAL SLATE HEART CHALKBOARD LUNCH BAG BLACK SKULL. \\\n", "3 0 0 \n", "5 0 0 \n", "8 0 0 \n", "16 0 0 \n", "18 0 0 \n", "\n", " HEART OF WICKER SMALL JUMBO BAG PINK POLKADOT \\\n", "3 0 0 \n", "5 1 1 \n", "8 0 0 \n", "16 1 1 \n", "18 0 0 \n", "\n", " JUMBO SHOPPER VINTAGE RED PAISLEY JUMBO STORAGE BAG SUKI \\\n", "3 0 0 \n", "5 0 0 \n", "8 0 0 \n", "16 1 1 \n", "18 0 0 \n", "\n", " PACK OF 72 RETROSPOT CAKE CASES PAPER CHAIN KIT 50'S CHRISTMAS \n", "3 0 0 \n", "5 0 0 \n", "8 0 0 \n", "16 1 0 \n", "18 0 0 " ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "output_df_uk_n.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Association Rule Mining with FP Growth" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "input_assoc_rules = output_df_uk_n\n", "domain_transac = Domain([DiscreteVariable.make(name=item,values=['0', '1']) for item in input_assoc_rules.columns])\n", "data_tran_uk = Orange.data.Table.from_numpy(domain=domain_transac, X=input_assoc_rules.as_matrix(),Y= None)\n", "data_tran_uk_en, mapping = OneHot.encode(data_tran_uk, include_class=True)" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "num of required transactions = 49\n" ] } ], "source": [ "support = 0.01\n", "print(\"num of required transactions = \", int(input_assoc_rules.shape[0]*support))\n", "num_trans = input_assoc_rules.shape[0]*support\n", "itemsets = dict(frequent_itemsets(data_tran_uk_en, support))" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "645632" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(itemsets)" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Raw rules data frame of 117464 rules generated\n" ] } ], "source": [ "confidence = 0.3\n", "rules_df = pd.DataFrame()\n", "if len(itemsets) < 1000000: \n", " rules = [(P, Q, supp, conf)\n", " for P, Q, supp, conf in association_rules(itemsets, confidence)\n", " if len(Q) == 1 ]\n", "\n", " names = {item: '{}={}'.format(var.name, val)\n", " for item, var, val in OneHot.decode(mapping, data_tran_uk, mapping)}\n", " \n", " eligible_ante = [v for k,v in names.items() if v.endswith(\"1\")]\n", " \n", " N = input_assoc_rules.shape[0]\n", " \n", " rule_stats = list(rules_stats(rules, itemsets, N))\n", " \n", " rule_list_df = []\n", " for ex_rule_frm_rule_stat in rule_stats:\n", " ante = ex_rule_frm_rule_stat[0] \n", " cons = ex_rule_frm_rule_stat[1]\n", " named_cons = names[next(iter(cons))]\n", " if named_cons in eligible_ante:\n", " rule_lhs = [names[i][:-2] for i in ante if names[i] in eligible_ante]\n", " ante_rule = ', '.join(rule_lhs)\n", " if ante_rule and len(rule_lhs)>1 :\n", " rule_dict = {'support' : ex_rule_frm_rule_stat[2],\n", " 'confidence' : ex_rule_frm_rule_stat[3],\n", " 'coverage' : ex_rule_frm_rule_stat[4],\n", " 'strength' : ex_rule_frm_rule_stat[5],\n", " 'lift' : ex_rule_frm_rule_stat[6],\n", " 'leverage' : ex_rule_frm_rule_stat[7],\n", " 'antecedent': ante_rule,\n", " 'consequent':named_cons[:-2] }\n", " rule_list_df.append(rule_dict)\n", " rules_df = pd.DataFrame(rule_list_df)\n", " print(\"Raw rules data frame of {} rules generated\".format(rules_df.shape[0]))\n", " if not rules_df.empty:\n", " pruned_rules_df = rules_df.groupby(['antecedent','consequent']).max().reset_index()\n", " else:\n", " print(\"Unable to generate any rule\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Sort and display rules" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
consequentantecedentsupportconfidencelift
8PACK OF 72 RETROSPOT CAKE CASESWHITE HANGING HEART T-LIGHT HOLDER, REGENCY CAKESTAND 3 TIER, NATURAL SLATE HEART CHALKBOARD1450.9710145.394404
9PAPER CHAIN KIT 50'S CHRISTMASWHITE HANGING HEART T-LIGHT HOLDER, REGENCY CAKESTAND 3 TIER, NATURAL SLATE HEART CHALKBOARD940.5977014.341428
3JUMBO SHOPPER VINTAGE RED PAISLEYWHITE HANGING HEART T-LIGHT HOLDER, PAPER CHAIN KIT 50'S CHRISTMAS3840.8793104.218819
5LUNCH BAG BLACK SKULL.WHITE HANGING HEART T-LIGHT HOLDER, PACK OF 72 RETROSPOT CAKE CASES, LUNCH BAG RED RETROSPOT2270.8524594.078157
4JUMBO STORAGE BAG SUKIWHITE HANGING HEART T-LIGHT HOLDER, SET OF 3 CAKE TINS PANTRY DESIGN , JUMBO BAG PINK POLKADOT4050.8524594.016191
\n", "
" ], "text/plain": [ " consequent \\\n", "8 PACK OF 72 RETROSPOT CAKE CASES \n", "9 PAPER CHAIN KIT 50'S CHRISTMAS \n", "3 JUMBO SHOPPER VINTAGE RED PAISLEY \n", "5 LUNCH BAG BLACK SKULL. \n", "4 JUMBO STORAGE BAG SUKI \n", "\n", " antecedent \\\n", "8 WHITE HANGING HEART T-LIGHT HOLDER, REGENCY CAKESTAND 3 TIER, NATURAL SLATE HEART CHALKBOARD \n", "9 WHITE HANGING HEART T-LIGHT HOLDER, REGENCY CAKESTAND 3 TIER, NATURAL SLATE HEART CHALKBOARD \n", "3 WHITE HANGING HEART T-LIGHT HOLDER, PAPER CHAIN KIT 50'S CHRISTMAS \n", "5 WHITE HANGING HEART T-LIGHT HOLDER, PACK OF 72 RETROSPOT CAKE CASES, LUNCH BAG RED RETROSPOT \n", "4 WHITE HANGING HEART T-LIGHT HOLDER, SET OF 3 CAKE TINS PANTRY DESIGN , JUMBO BAG PINK POLKADOT \n", "\n", " support confidence lift \n", "8 145 0.971014 5.394404 \n", "9 94 0.597701 4.341428 \n", "3 384 0.879310 4.218819 \n", "5 227 0.852459 4.078157 \n", "4 405 0.852459 4.016191 " ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dw = pd.options.display.max_colwidth\n", "pd.options.display.max_colwidth = 100\n", "(pruned_rules_df[['antecedent','consequent',\n", " 'support','confidence','lift']].groupby('consequent')\n", " .max()\n", " .reset_index()\n", " .sort_values(['lift', 'support','confidence'],\n", " ascending=False)).head(5)" ] }, { "cell_type": "code", "execution_count": 26, "metadata": { "collapsed": true }, "outputs": [], "source": [ "pd.options.display.max_colwidth = dw" ] } ], "metadata": { "anaconda-cloud": {}, "kernelspec": { "display_name": "Python [default]", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.2" } }, "nbformat": 4, "nbformat_minor": 1 }