{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Load Necessary Dependencies"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import csv\n",
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"import Orange\n",
"from Orange.data import Domain, DiscreteVariable, ContinuousVariable\n",
"from orangecontrib.associate.fpgrowth import *\n",
"\n",
"%matplotlib inline"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Construct and Load the Groceries Dataset"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"grocery_items = set()\n",
"with open(\"grocery_dataset.txt\") as f:\n",
" reader = csv.reader(f, delimiter=\",\")\n",
" for i, line in enumerate(reader):\n",
" grocery_items.update(line)\n",
"output_list = list()\n",
"with open(\"grocery_dataset.txt\") as f:\n",
" reader = csv.reader(f, delimiter=\",\")\n",
" for i, line in enumerate(reader):\n",
" row_val = {item:0 for item in grocery_items}\n",
" row_val.update({item:1 for item in line})\n",
" output_list.append(row_val)\n",
"grocery_df = pd.DataFrame(output_list)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Instant food products | \n",
" UHT-milk | \n",
" abrasive cleaner | \n",
" artif. sweetener | \n",
" baby cosmetics | \n",
" baby food | \n",
" bags | \n",
" baking powder | \n",
" bathroom cleaner | \n",
" beef | \n",
" ... | \n",
" turkey | \n",
" vinegar | \n",
" waffles | \n",
" whipped/sour cream | \n",
" whisky | \n",
" white bread | \n",
" white wine | \n",
" whole milk | \n",
" yogurt | \n",
" zwieback | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" ... | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" ... | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
" 0 | \n",
"
\n",
" \n",
" 2 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" ... | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" 3 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" ... | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
" 0 | \n",
"
\n",
" \n",
" 4 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" ... | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
"
\n",
"
5 rows × 169 columns
\n",
"
"
],
"text/plain": [
" Instant food products UHT-milk abrasive cleaner artif. sweetener \\\n",
"0 0 0 0 0 \n",
"1 0 0 0 0 \n",
"2 0 0 0 0 \n",
"3 0 0 0 0 \n",
"4 0 0 0 0 \n",
"\n",
" baby cosmetics baby food bags baking powder bathroom cleaner beef \\\n",
"0 0 0 0 0 0 0 \n",
"1 0 0 0 0 0 0 \n",
"2 0 0 0 0 0 0 \n",
"3 0 0 0 0 0 0 \n",
"4 0 0 0 0 0 0 \n",
"\n",
" ... turkey vinegar waffles whipped/sour cream whisky \\\n",
"0 ... 0 0 0 0 0 \n",
"1 ... 0 0 0 0 0 \n",
"2 ... 0 0 0 0 0 \n",
"3 ... 0 0 0 0 0 \n",
"4 ... 0 0 0 0 0 \n",
"\n",
" white bread white wine whole milk yogurt zwieback \n",
"0 0 0 0 0 0 \n",
"1 0 0 0 1 0 \n",
"2 0 0 1 0 0 \n",
"3 0 0 0 1 0 \n",
"4 0 0 1 0 0 \n",
"\n",
"[5 rows x 169 columns]"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"grocery_df.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# View top sold items"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"43367\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" item_name | \n",
" item_count | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" whole milk | \n",
" 2513 | \n",
"
\n",
" \n",
" 1 | \n",
" other vegetables | \n",
" 1903 | \n",
"
\n",
" \n",
" 2 | \n",
" rolls/buns | \n",
" 1809 | \n",
"
\n",
" \n",
" 3 | \n",
" soda | \n",
" 1715 | \n",
"
\n",
" \n",
" 4 | \n",
" yogurt | \n",
" 1372 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" item_name item_count\n",
"0 whole milk 2513\n",
"1 other vegetables 1903\n",
"2 rolls/buns 1809\n",
"3 soda 1715\n",
"4 yogurt 1372"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"total_item_count = sum(grocery_df.sum())\n",
"print(total_item_count)\n",
"item_summary_df = grocery_df.sum().sort_values(ascending = False).reset_index().head(n=20)\n",
"item_summary_df.rename(columns={item_summary_df.columns[0]:'item_name',item_summary_df.columns[1]:'item_count'}, inplace=True)\n",
"item_summary_df.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Visualize top sold items"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
""
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAY8AAAFkCAYAAADYGbemAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzsnXeYZFW1t98fCBKHIEjOSREFJUsQVCQYMCKggqIgyicY\nrgp4FdCLiFfhEhQBJQmIKCKIoAKS8wxpGIIgQUGSARhRib/vj72LPl1TXbVPhQ7T632eerpqV62z\nd1VXnXX2irJNEARBENRhjrFeQBAEQTDxCOURBEEQ1CaURxAEQVCbUB5BEARBbUJ5BEEQBLUJ5REE\nQRDUJpRHELRB0qWSPjHgOQ6UdGq+v7ykf0qas0/H/oGkr+b7W0h6sB/HzcfbTNJd/TpeMLEI5RGM\nGpLul/TWfP+jkq4c6zWNN2z/yfYCtl9o97rSz8/2nra/0Y+1SbKkVSvHvsL2Gv04djDxCOURBLMp\n/dq9BEErQnkEo46kVwM/ADbOJpon8vjLJX1H0p8kPZpNLvPm57aQ9KCkL0l6TNLDkt4taTtJf5D0\nd0n7t5lzO0m3S5op6SFJ/5XHF5F0nqTHJf0j31+2zXF2k3RHfu1vJa2QxyXp8Ly2pyRNl7TWCMdY\nSdJleS0XAotVnlsxX+G/LD/+qKR782vvk/ShNp/fSZKOkXS+pKeBLfPY/zTNv7+kv+ad4Icq48NM\ndNXdjaTL8/Atec4PNpvBJL06H+MJSTMkvavy3EmSvifp1/m9XCdplZE+52D8E8ojGHVs3wHsCVyT\nTTQL56e+BawOrAOsCiwDfK0iuiQwT2X8eODDwLrAZsBXJa00wrQ/Aj5pe0FgLeD3eXwO4ERgBWB5\n4N/A0a0OIGl7YH/gvcDiwBXAT/LTbwM2z+tfCNgB+NsIazkdmEZSGt8Adh1hvvmBI4Ft87rfCNzc\n5vMD2Bk4GFgQaGXWWjLPu0ye9zhJHU1PtjfPd9fOc/60aa1zAb8Cfge8EvgMcFrTsXcEDgIWAe7J\n6wwmKKE8gnGBJAF7AJ+z/XfbM4Fvkk44DZ4DDrb9HHAG6SR4hO2ZtmcAtwNrjzDFc8CakqbY/oft\nGwFs/832Wbb/lec8GHjTCMfYEzjE9h22n8/rWyfvPp4jnbBfBSi/5uEW73N5YH3gq7afsX056aQ7\nEi8Ca0ma1/bD+X224xzbV9l+0fZ/RnhNY+7LgF+TFF2vbAQsAHzL9rO2fw+cB+xUec3Ztq/Pn91p\npIuEYIISyiMYLywOzAdMy2aPJ4Df5PEGf6s4kv+d/z5aef7fpBNYK94HbAc8kE1GGwNImk/SsZIe\nkPQUcDmw8Aj+ghWAIyrr+zsgYJl8sjwa+B7wmKTjJE1pcYylgX/Yfroy9kCrBefXfJCktB7OJp9X\njfD+Gvy5w/Ot5l66g0wJSwN/tv1i07GXqTx+pHL/X4z8vwomAKE8grGiuZzzX0kn/9fYXjjfFrLd\nlxOM7Rtsb08yqfwSODM/9QVgDWBD21NIpidISqGZP5NMXwtXbvPavjrPcaTtdYE1SearL7Y4xsPA\nItkk1WD5Nuv+re2tgKWAO0mmOpj186PDeINWc/8l33+apMAbLNnhWFX+AiwnqXpOWR54qMYxgglE\nKI9grHgUWFbS3AD5ivV44HBJrwSQtIykrXudSNLc2dG8UDZ5PUUyB0EyNf0beELSosABbQ71A2A/\nSa/Jx11I0gfy/fUlbZht/08D/6nM8RK2HwCmAgfldW0KvHOEdS8haft8sn8G+GflmMM+v5o05t4M\neAfwszx+M/DevBtbFfh4k9yjwMojHPM60m7iS5LmkrRFfl9ndLG+YAIQyiMYK34PzAAekfTXPPZl\nkiP12mxCuoi0K+gHHwHuz8fdE2hEGf0fMC9p53MtyVTWEttnA4cCZ+Tj3AZsm5+eQlJ+/yCZa/4G\n/O8Ih9oZ2JBk9joAOGWE180BfJ50Vf93ki/mU/m5Vp9fCY/kNf6F5HfY0/ad+bnDgWdJSuLk/HyV\nA4GTs9lumJ/E9rMkZbEt6bP8PrBL5djBbIaiGVQQBEFQl9h5BEEQBLUJ5REEQRDUJpRHEARBUJtQ\nHkEQBEFtXjbWCxgUiy22mFdcccWxXkYQBMGEYtq0aX+1vXin1822ymPFFVdk6tSpY72MIAiCCYWk\nlhUPmgmzVRAEQVCbUB5BEARBbUJ5BEEQBLUJ5REEQRDUJpRHEARBUJtQHkEQBEFtQnkEQRAEtRmY\n8pC0nKRLJN0uaYakffL4gZIeknRzvm1XkdlP0j2S7qr2cZC0rqTp+bkjc8vSIAiCYIwYZJLg88AX\nbN8oaUFSe9EL83OH2/5O9cWS1iT1q34NqaXlRZJWz21HjwF2JzWcOR/YBrhggGsPgiAI2jAw5WH7\nYVLLTWzPlHQHw/sZN7M9cIbtZ4D7JN0DbCDpfmCK7WsBJJ0CvJsBKo/DL/xDrdd/bqvVB7SSIAiC\n8cmo+DwkrQi8nrRzAPiMpFslnSBpkTy2DKlHdIMH89gy+X7zeKt59pA0VdLUxx9/vI/vIAiCIKgy\ncOUhaQHgLOCztp8imaBWBtYh7Uy+26+5bB9nez3b6y2+eMe6XkEQBEGXDFR5SJqLpDhOs/0LANuP\n2n7B9oukns8b5Jc/BCxXEV82jz2U7zePB0EQBGPEIKOtBPwIuMP2YZXxpSovew9wW75/LrCjpJdL\nWglYDbg++06ekrRRPuYuwDmDWncQBEHQmUFGW20CfASYLunmPLY/sJOkdQAD9wOfBLA9Q9KZwO2k\nSK29cqQVwKeBk4B5SY7yiLQKgiAYQwYZbXUl0Cof4/w2MgcDB7cYnwqs1b/VBUEQBL0QGeZBEARB\nbUJ5BEEQBLUJ5REEQRDUJpRHEARBUJtQHkEQBEFtQnkEQRAEtQnlEQRBENQmlEcQBEFQm1AeQRAE\nQW1CeQRBEAS1CeURBEEQ1CaURxAEQVCbUB5BEARBbUJ5BEEQBLUJ5REEQRDUJpRHEARBUJtQHkEQ\nBEFtQnkEQRAEtQnlEQRBENQmlEcQBEFQm1AeQRAEQW1CeQRBEAS1CeURBEEQ1CaURxAEQVCbUB5B\nEARBbUJ5BEEQBLUJ5REEQRDUJpRHEARBUJtQHkEQBEFtQnkEQRAEtQnlEQRBENQmlEcQBEFQm4Ep\nD0nLSbpE0u2SZkjaJ48vKulCSXfnv4tUZPaTdI+kuyRtXRlfV9L0/NyRkjSodQdBEASdGeTO43ng\nC7bXBDYC9pK0JrAvcLHt1YCL82PyczsCrwG2Ab4vac58rGOA3YHV8m2bAa47CIIg6MDAlIfth23f\nmO/PBO4AlgG2B07OLzsZeHe+vz1whu1nbN8H3ANsIGkpYIrta20bOKUiEwRBEIwBo+LzkLQi8Hrg\nOmAJ2w/npx4Blsj3lwH+XBF7MI8tk+83j7eaZw9JUyVNffzxx/u2/iAIgmA4A1cekhYAzgI+a/up\n6nN5J+F+zWX7ONvr2V5v8cUX79dhgyAIgiYGqjwkzUVSHKfZ/kUefjSbosh/H8vjDwHLVcSXzWMP\n5fvN40EQBMEYMchoKwE/Au6wfVjlqXOBXfP9XYFzKuM7Snq5pJVIjvHrs4nrKUkb5WPuUpEJgiAI\nxoCXDfDYmwAfAaZLujmP7Q98CzhT0seBB4AdAGzPkHQmcDspUmsv2y9kuU8DJwHzAhfkWxAEQTBG\nDEx52L4SGCkf4y0jyBwMHNxifCqwVv9WFwRBEPRCZJgHQRAEtQnlEQRBENRmkD6PScnhF/6htszn\ntlp9ACsJgiAYHLHzCIIgCGoTyiMIgiCoTSiPIAiCoDahPIIgCILahPIIgiAIahPKIwiCIKhNKI8g\nCIKgNpHnMc6omycSOSJBEIwFsfMIgiAIahPKIwiCIKhNKI8gCIKgNqE8giAIgtqE8giCIAhqE8oj\nCIIgqE2E6s5GRJhvEASjRew8giAIgtqE8giCIAhqE8ojCIIgqE0ojyAIgqA2oTyCIAiC2oTyCIIg\nCGoTyiMIgiCoTUflIWmfkrEgCIJg8lCy89i1xdhH+7yOIAiCYAIxYoa5pJ2AnYGVJJ1beWpB4O+D\nXlgQBEEwfmlXnuRq4GFgMeC7lfGZwK2DXFQQBEEwvhlRedh+AHgA2Hj0lhMEQRBMBEoc5u+VdLek\nJyU9JWmmpKdGY3FBEATB+KSkqu63gXfavmPQiwmCIAgmBiXRVo92ozgknSDpMUm3VcYOlPSQpJvz\nbbvKc/tJukfSXZK2royvK2l6fu5ISaq7liAIgqC/lOw8pkr6KfBL4JnGoO1fdJA7CTgaOKVp/HDb\n36kOSFoT2BF4DbA0cJGk1W2/ABwD7A5cB5wPbANcULDuIAiCYECUKI8pwL+At1XGDLRVHrYvl7Ri\n4Tq2B86w/Qxwn6R7gA0k3Q9MsX0tgKRTgHcTyiMIgmBM6ag8bH+sz3N+RtIuwFTgC7b/ASwDXFt5\nzYN57Ll8v3m8JZL2APYAWH755fu87CAIgqBBR+Uh6UTSTmMYtnfrYr5jgG/k432DlD/SzXFaYvs4\n4DiA9dZbb5Y1B0EQBP2hxGx1XuX+PMB7gL90M5ntRxv3JR1fOfZDwHKVly6bxx7K95vHgyAIgjGk\nxGx1VvWxpJ8AV3YzmaSlbD+cH74HaERinQucLukwksN8NeB62y/k3JKNSA7zXYCjupk7CIIg6B8l\nO49mVgNe2elFWclsASwm6UHgAGALSeuQzFb3A58EsD1D0pnA7cDzwF450grg06TIrXlJjvJwlgdB\nEIwxJT6PmaSTvfLfR4Avd5KzvVOL4R+1ef3BwMEtxqcCa3WaLwiCIBg9SsxWC47GQoIgCIKJQ5HZ\nStK7gM3zw0ttn9fu9UEQBMHsTUlhxG8B+5D8EbcD+0j65qAXFgRBEIxfSnYe2wHr2H4RQNLJwE3A\n/oNcWBAEQTB+KSmMCLBw5f5Cg1hIEARBMHEo2XkcAtwk6RJSxNXmwL4DXVUw6hx+4R9qy3xuq9UH\nsJIgCCYCJdFWP5F0KbB+Hvqy7UcGuqogCIJgXFPiMH8P8C/b59o+F/iPpHcPfmlBEATBeKXE53GA\n7ScbD2w/QcoWD4IgCCYpJcqj1Wu6KWsSBEEQzCaUKI+pkg6TtEq+HQZMG/TCgiAIgvFLifL4DPAs\n8FPgDOA/wF6DXFQQBEEwvimJtnqaCM0NgiAIKpQmCQZBEATBS4TyCIIgCGoTyiMIgiCoTUkzqJVI\nTvMVq6+3/a7BLSsIgiAYz5Tka/yS1AHwV8CLg11OEARBMBEoUR7/sX3kwFcSBEEQTBhKlMcRkg4A\nfgc80xi0fePAVhUEQRCMa0qUx2uBjwBvZshs5fw4CIIgmISUKI8PACvbfnbQiwmCIAgmBiWhurcx\nvJNgEARBMMkp2XksDNwp6QaG+zwiVDcIgmCSUqI8ondHEARBMIySwoiXSVoBWM32RZLmA+Yc/NKC\nIAiC8UpJG9rdgZ8Dx+ahZUiJg0EQBMEkpcRhvhewCfAUgO27gVcOclFBEATB+KbE5/GM7WclASDp\nZaQ8jyB4icMv/EOt139uq9UHtJIgCEaDEuVxmaT9gXklbQV8mlTnKgj6QiieIJh4lJit9gUeB6YD\nnwTOt/2Vga4qCIIgGNeU7Dw+Y/sI4PjGgKR98lgQBEEwCSnZeezaYuyjnYQknSDpMUm3VcYWlXSh\npLvz30Uqz+0n6R5Jd0naujK+rqTp+bkj1XC+BEEQBGPGiDsPSTsBOwMrSTq38tSCwN8Ljn0ScDRw\nSmVsX+Bi29+StG9+/GVJawI7Aq8BlgYukrS67ReAY4DdgeuA84FtgAvK3l4wGQifSRCMPu3MVlcD\nDwOLAd+tjM8Ebu10YNuXS1qxaXh7YIt8/2TgUuDLefwM288A90m6B9hA0v3AFNvXAkg6BXg3oTyC\nIAjGlBGVh+0HgAeAjfs43xK2H873HwGWyPeXAa6tvO7BPPZcvt883hJJewB7ACy//PJ9WnIQBEHQ\nTDuz1Uxa53MIsO0pvUxs25L6mi9i+zjgOID11lsvclGCIAgGRLudx4IDmO9RSUvZfljSUsBjefwh\nYLnK65bNYw/l+83jQRAEwRhSEqrbT84lRW99K/89pzJ+uqTDSA7z1YDrbb8g6SlJG5Ec5rsAR43y\nmoPZmHC2B0F3DEx5SPoJyTm+mKQHSaXdvwWcKenjJH/KDgC2Z0g6E7gdeB7YK0daQcpoPwmYl+Qo\nD2d5EATBGDMw5WF7pxGeessIrz8YOLjF+FRgrT4uLQiCIOiRkiTBIAiCIBjGaPs8gmC2oa6/BMJn\nEsw+xM4jCIIgqE0ojyAIgqA2oTyCIAiC2oTPIwjGiMgxCSYysfMIgiAIahPKIwiCIKhNKI8gCIKg\nNqE8giAIgtqEwzwIJiDhbA/GmlAeQTDJiMz4oB+E8giCoBax6wkgfB5BEARBF8TOIwiCUSN2LbMP\noTyCIJgQhK9mfBFmqyAIgqA2oTyCIAiC2oTZKgiCSUH4W/pLKI8gCIIOhL9lVsJsFQRBENQmlEcQ\nBEFQm1AeQRAEQW1CeQRBEAS1CYd5EATBgJkdI71CeQRBEIxjxqviCbNVEARBUJtQHkEQBEFtQnkE\nQRAEtQnlEQRBENQmlEcQBEFQm1AeQRAEQW3GRHlIul/SdEk3S5qaxxaVdKGku/PfRSqv30/SPZLu\nkrT1WKw5CIIgGGIsdx5b2l7H9nr58b7AxbZXAy7Oj5G0JrAj8BpgG+D7kuYciwUHQRAEifFkttoe\nODnfPxl4d2X8DNvP2L4PuAfYYAzWFwRBEGTGSnkYuEjSNEl75LElbD+c7z8CLJHvLwP8uSL7YB6b\nBUl7SJoqaerjjz8+iHUHQRAEjF15kk1tPyTplcCFku6sPmnbklz3oLaPA44DWG+99WrLB0EQBGWM\nyc7D9kP572PA2SQz1KOSlgLIfx/LL38IWK4ivmweC4IgCMaIUVcekuaXtGDjPvA24DbgXGDX/LJd\ngXPy/XOBHSW9XNJKwGrA9aO76iAIgqDKWJitlgDOltSY/3Tbv5F0A3CmpI8DDwA7ANieIelM4Hbg\neWAv2y+MwbqDIAiCzKgrD9v3Amu3GP8b8JYRZA4GDh7w0oIgCIJCxlOobhAEQTBBCOURBEEQ1CaU\nRxAEQVCbUB5BEARBbUJ5BEEQBLUJ5REEQRDUJpRHEARBUJtQHkEQBEFtQnkEQRAEtQnlEQRBENQm\nlEcQBEFQm1AeQRAEQW1CeQRBEAS1CeURBEEQ1CaURxAEQVCbUB5BEARBbUJ5BEEQBLUJ5REEQRDU\nJpRHEARBUJtQHkEQBEFtQnkEQRAEtQnlEQRBENQmlEcQBEFQm1AeQRAEQW1CeQRBEAS1CeURBEEQ\n1CaURxAEQVCbUB5BEARBbUJ5BEEQBLUJ5REEQRDUJpRHEARBUJtQHkEQBEFtQnkEQRAEtZkwykPS\nNpLuknSPpH3Hej1BEASTmQmhPCTNCXwP2BZYE9hJ0ppju6ogCILJy4RQHsAGwD2277X9LHAGsP0Y\nrykIgmDSIttjvYaOSHo/sI3tT+THHwE2tP3/ml63B7BHfrgGcFefl7IY8NcxkJ2sc8e6J8/cse7R\nn3skVrC9eKcXvWwAE48Zto8DjhvU8SVNtb3eaMtO1rlj3ZNn7lj36M/dKxPFbPUQsFzl8bJ5LAiC\nIBgDJoryuAFYTdJKkuYGdgTOHeM1BUEQTFomhNnK9vOS/h/wW2BO4ATbM8ZgKb2YxHo1p03GuWPd\nk2fuWPfoz90TE8JhHgRBEIwvJorZKgiCIBhHhPIIgiAIahPKow2S1m0x9o4ujjOHpCn9WVUwEpI+\nUDIWJJRYrvMr+z7vnJK+0+MxVioZCwZHKI/2HC9prcYDSTsBXy0RlHS6pCmS5gduA26X9MVuFyJp\nEUmv61a+5lxdnYTzSeGSHubdJH9eSPqwpMMkrVDjEPsVjrWae35Jc+T7q0t6l6S5aszdOM58dWWy\n3Cu6lPtxyVgrnBye53czby/YfgHYtMfDnNVi7OelwpJWkPTWfH9eSQvWkH1d/n68t3ErkFm03a1w\n3r58R/vFhIi2GkPeD/xc0s7AZsAuwNsKZde0/ZSkDwEXAPsC04D/LZ1c0qXAu0j/p2nAY5Kusv35\nAtnVgENItcDmaYzbXrlg6v2AnxWMDcP2C5JelLSQ7ScL5mnmGGBtSWsDXwB+CJwCvKmdkKRtge2A\nZSQdWXlqCvB84dyXA5tJWgT4HSk8/IPAh0qEJb0xr3cBYPn8Hj5p+9OF818r6WbgROACl0eyvKZp\nHXMCs+yY23CjpPVt31BDpjrfKsCDtp+RtAXwOuAU2090EL1J0rmk79TTjUHbv+gw36tI73mhppP2\nFCrf8w7H2J1UiWJRYBVS3tgPgLcUyJ5Aeo8zgBcbywbarpv0+zUgYHngH/n+wsCfgJJdU0/f0X4T\nyqMNtu+VtCPwS9I/+G22/10oPle+Kng3cLTt5yTVDW1bKCugT5B+kAdIurVQ9kTgAOBwYEvgY3TY\nafbpJPxPYLqkCxl+Uti7QPZ525a0Pekz+5GkjxfI/QWYSlK00yrjM4HPFa5btv+V5/u+7W/nk3kp\nhwNbk/OPbN8iafMa8qsDbwV2A46UdCZwku0/tFystB+wPzCvpKcaw8Cz1Avh3BD4kKQHSP8vpeW7\ndJd7FrCepFXzvOcAp5O+R+2YB/gb8ObKWMlJeA3gHaST7jsr4zOB3QvXvBepXt51ALbvlvTKQtmN\nbNcuymp7JQBJxwNn2z4/P96WdI4oodfvaF8J5dECSdNJX+QGi5LyS66TROEP61jgfuAW4PJsfnmq\nrcSsvEzSUsAOwFdqys5r+2JJsv0AcKCkacDX2sj04yT8CzqfAEZiZj4pfoR0hTUH0HFbbvsW4BZJ\np9kuVXLNSNLGpKu4hsKas84BbP9ZUnXohRqyBi4ELpS0JXAq8GlJtwD72r6m6fWHAIdIOsR2kWlu\nBLbuQRbgxZyH9R7gKNtHSbqpk5Dtj3Uzme1zgHMkbdz8mdTgGdvPNv5Xkl7G8N97O66RtKbt27uc\neyPbLyk52xdI+nahbM/f0X4SyqM1tZ3izdg+EqhevT+QTwp1+DopMfJK2zdIWhm4u1D2mXzyvVsp\nwfIhkkml3ZpvkXQbsLXtk2uutXGMkyXNCyxvu25hyg8COwO72X5E0vIUmPkknWl7B5IpZJaTQKGy\n/yzJNHe27Rn5s67jv/lzNl057zj3Ae4oFc4+jw+TFOejwGdIu5h1SKadlZpe/yrbdwI/k/SG5uPZ\nvrFkXtsPSNoUWM32iZIWp8P3pInnlHyBuzK0E+io8CWtTjJTLmF7LSV/3rts/08HuS/Z/jawc563\n+f2U7HAvk9TYtW0FfBr4VYEcJDPqNZIeAZ6h/k7tL5L+m3RxAEkR/KVQttfvaF+JJMEWdHJg2f57\nwTGWAL4JLG17W6X+Ixvb/lGfltlp/vVJJ6+FgW8ACwHftn1tgewVwFty+fu6874T+A4wt+2VJK0D\nfN32uwrlVyCdyC5Scj7PaXtmB5mlbD+sEZzreec1UCQtBhxBMj2JZJPex/bfCuX/APwYONH2g03P\nfdn2oU1jx9neQ60DFGz7zS3GW817ALAesIbt1SUtDfzM9iaF8msCewLX2P6JUsTTDs3rbSF3GfBF\n4Fjbr89jt9leq4PcO23/StKurZ4vuejJF1UfJ/kvRbpA+2GJn0nSPcDngekM+TyKv2P53HIA0DBp\nXg4cVHJOGW+E8miBpPsYcm414xKns6QLSH6Hr9heO2+Nb7L92hrrWJxkx12Ryi7R9m6lx+gGSacA\nryZd+Vb9FocVyE4j2bEvrXNSyK97yZFpexUlp/8PbHd0ZPaKpF8xq+niSZIZ71jb/xng3HOSFPsX\nBjVHm7lvBl4P3Fj5f91aciWd132K7doOW0k32F5f0k2VeW+2vU7dY3Ux9/zAf3LUV+N9vNz2vwpk\nr7G98aDXOMLcY/YdbUWYrVrQcG71yGK2z8w2/EZ9rmIbeOYc4ArgIgrt5yN8wV6icAfwx3ybAygO\nYcw8Z/vJJtv/iyO9uIleHJlImsnQe5+bZD552nZJjs29wOLAT/LjD5J8PasDx5PMSe3mPrLF8JPA\n1GynHxGnKLU3Fqyx1by7jHDMUwoP8WwOUnA+3vylc+d1ryBp7i52qX9VitRqzPt+4OFS4bzjamWi\nLNlxXUzaIf4zP56XtFMs+R/cJOl0kpnrmcq8RX6+fEH4JVLEWDUKsmTdPX1H+00ojxY07MmtbMlQ\nbE9+OtuxGz+OjUgnkzrMZ/vLNWUayVfvBZZkyLa6E8mW3hHbB9Wcs8oMpdDmOfPOYW/g6kLZXhyZ\n2H5J0SkdZHtgo0LxN9pev/L4V5Wr45IinPMAr2IonPl9wH2k0OMtbX+2g/zN6iJ0FaiueR5SuOmN\nJNt8CWdKOhZYOO/8diOdiEq5F7gqr73OLnUvUnTWqyQ9RPqsPlxj3v+q3J+H9HmXBkvMY7uhOLD9\nT5Xn58xLUhrVkP2SKLEGpwE/JflV9yT5ih4vlO31O9pXQnm05vMk88l3WzxnhocXtjvGucAqkq4i\nXTG8v+Y6zpO0nXNYXwm2LwOQ9F0PbxTzK0lTS47R41XdZ0iRYc+QQjZ/S/K5lNCLI7N5rQZ+mW36\n+xaILCBpedt/AsjO+objuOSq+nXAJhVTyDGkXeOmJPt4J7oKXbX9mepjSQuT2jQXYfs7+bN+ihQG\n+zXbF5bK0+Uu1fa9wFvzTmeOTn6tFvLTmoauknR9ofjTkt7QuAhUqiRRFILvLqPEKrzCKQR9n/xb\nvUxSaY5Nr9/RvhLKowW298h/60ZHVY9xo6Q3kX6QAu6y/VzNw+wD7C/pGeA5hiI7Ssww80taOf9I\nG6UbSk0SvVzVvd32V6iEFitlp7dNMMzsS3JkTgc+Scp+/mHhvGh40tgcJEdwqR34C8CVkv5I+pxX\nIoXKzg+URJ4tQvohN3aX85N8Ny/k/18nfmj7quqApCKndRNPU5ZwVuUPpO/VRZLmk7Rg6cm8sUuV\nNF+Jz6BBrwElTUEtc5ASIxcqnP6zpCi1v5D+10uSTEAl885D+o42m51K/ZCNc8DDkt5OirQqyjCn\n9+9oXwnrQ0vlAAAgAElEQVSHeRuyI+3tzOqwHnFLrg6lCkpto70iaRuSWeBe0hdtBWAP27/r8njX\n296g4HU32n5Dp7FBIOnEysPnSXk2x9t+rFD+5STTEyRlX+yAVErc+m/gUtLnvTnp5PgT4EDbbUvT\ndPu5Nfm45iBVFDjTdsluq+cgBaW8gx8BC9guzqzvNaCkKajleZLZ6+u2ryyUn4t0YQc1Luwk/Qy4\nkxRS/nVSqO0dtvcplH8HaUe6HHAUKQH3INtFze16+Y72m1AebZB0PunKtTksb0SfQNMJrBnXiZTS\nCBnKti8vlK9+0e60XXIFPNJV3ZG21xhBpJqdvgPJpttgCqlUy4iKR7MmZQ6jRuTP3rYP7/TaNsdY\ni1nLuZT6DlBK6Gy8zxtsd4zfzyffN5KuhqtrnwK8x/baHeSrpVueBx5wU6hvB/mb85qvq0Q9Ta9x\nEr+OZI49t050nXqItlIKtd24eadWIPdm278f6QKv5MKusV7liLSshK6wXepb65oR1v0kML30Aqmf\nhNmqPcuWnLiq9MEmWqV6tToP6UfeCIVtS/5Sf5KhePJLJR1beIVVrcPTuKrrVCakl+z0fiRlvqCU\nNNaV8si+kS1IyuN8YFvgSsodz5AuNB4m/a9WlbRqgaKfm2TuehnDfQZP0cFHlhXmgb2YV+kxSAG6\nzqzvOqDE9ouSjiaFGNfhTcDvGV7W5KXDUub0bvx+nsgXG48AdSICu0qOzHwc2Jj0HkT6vk4DVpL0\nddtFBTH7hu24jXADDiXVs+pG9hWkDPMb8z/4CJKzrJf1LAecVfjaH5LsoG/OtxNJdvVBf2Zz9Si/\nJEkBvRNYsqbs4cDRpCKWb2jcCmWnk3ZZt+THSwAX1pj7E/kY/yBl/f4b+H0N+RUq9+cAphTKXUyq\ngdbt5/1tUo2sO4GtgLOBg2vI/5y0c7qRFBr9X8AZBXJvAK4iKYyrSH6X19WY9zskX5z69d2t8X9e\nhKSI7gUeA/asIX8Z6SLwpsrYbYWyvyUpncbjJfLYoqXH6OctzFZtUKrXcyrpx1zLYa1UGPByhpch\n2ML2W3tYj4AZLijMJukWN5k8Wo2NINv19lg9VPNVKgD5NYaurN5EsmOf0Ek2y3edbd3w6SglOW5J\n2jHdYftVHUQb8tNJYbPX2l5HqfrrN213LNed5U8nhW6+QKqWOgU4wnbb8iySziFdgXdTiLKnbOss\nX82snyPLF2XW511OVwElSjk985N2xv+h3m+zZX03218vnb9bejTX3V797VfPB9XjjRZhtmrPYaRt\n4vTSH1OFpWxXQ1T/R1JRREcDSUcx3Bm6DukKr4QXJK1i+4/5WCtTXqivsT1unIy3oHx7XLuab4Uv\nAq9vnHiyWeNqoK3yUAp7PAL4qgsdpi2YqhTmejzpvf4TqFN47z+2/yMJSS93yhMa0UfUgm5L+PdS\niBInE9DJpMRMk07idXJr/koXJcFz1NKnSaHMBq6Q9AN3cABL2sTJ17F4p9e24enK/XlIZtOiOmS9\nRonRW3LkpZLOYyhy8f15bH6gUwn8/jPaW52JdCPtHOboUvYwYEfSiXMOkiP5OzWPsWvl9iFSHkGp\n7JtJZeQvzbf7gS0LZbveHgPT8t/pzWMF815NqonVeDw3cHWB3M357419+r+vSA0TSpY5m1RH7MD8\nvTkHOL+G/AyS2ednwJvy2C1tXn9x/ntoj+/17cCf83fksvyd2baG/MqkXJzHSSacc4CVC+TOJEVp\nbZlvx5NqanWSa3y/+vK/zsd6OamcTslrL8i/5YZ582XV73rh53UR8C9SsdIrqZgsO8iKZKo7PN/e\nzyib7aq32Hm0516SZr+A4aUI2oXqNkpkiBRB0zBbzUG6mv2vEURnwalC7dykiCkDdarUvgJYi3Qi\nfDdpJ1Ga4b6c7Wo2+mN57O+SOpkWalfzldRobnUPqez9OaT3uz1Q0r/kDkl3A0treL+TWhVPs7mu\ncSV8ZeHckCZ5T757YDafLQT8plSe+iX8l1IqafIuSWfA8DpsLqyqS0qE3dL2PQD5qvjXpJNkCacD\n3wMa739HUnjyhh3k1vJw8+slkkrKnD8n6ThgWbUoCeNCc10T85EaQpXQddmh/LtYz3ZXyZG2rZTo\n+6SHCocuQDKxjjqhPNpzX77NnW8dcaVERq9I2o50UnkpKUjSJ22X/LC/avtnSr3TtyQ5GI+h848a\netse70P6Me5NyizfkrRzakfjM2tkKzdoWxOqge2dJC1J2h0VVe9tRtL3gVUZqhv0SUlvtb1XofxL\nHfVI/6sVSZ9DUeav65fw/xqpJfKypF3usMNRVgUBYGZDcWTupd7JaD4PN2OeqrJ2yzdK2si5yrOk\nDUnRep14B8m/sjXDo/qK0fDQ8DlJ1R9K/R29Rol9iZSH83RHgSY0awfEZSjsgDgIwmE+QJTaRa7G\ncMdxUY5Glr8TeEfzVaELnLiVePRDSNvq00udatkR17gKhxQNc5YLvixVP8tEIn/Wr268x3yVOMP2\nqwvlbyZltK9ICvU9B3iN7U4d9arHeDuzZi63PalJ+qqH+9ZK52o48rciJZCeSTohfgD4kwvb50o6\nlBRhdkaW/yApGul/8/r/3vT6xom7kaT3p/x4BVIuUlGXPklrOzUBq42Gl+5/HnjUhU3ElOrdHUXa\n1d9GLjtku2iXKulbwF9JuVDVAIeSNg895eT0m9h5DIgcObQP6crwZlKBvmsovyKE3q4KH1IqeLcV\ncKhSwmCR4zpvj68kXTUbuL5EcWROkLQsKWLoCuBy2yW1nVBvFUd75R5Sb+lGX4bl8lgpXXXUayDp\nB6SdypakMOv3Ax1rNXWjODLVXIdHGeoT/ziFvcAzO+S/n2wa35H03WmOsus5pwdS47K6MpKm2H6K\nWX9DU5SqCj/lXJtsBPk5SJ9NL2WHGkEz1R1tq8+pFT3n5PST2HkMiF5CN/txVZjtoduQdh13K2U/\nv9YF5Ukk7UC6cryU9APZDPii7Z93ks3yc5Pe+xakk8oCtjvW75H0O9IV2X9RqTjq+pWFi9FQeY+F\n8pqvz483JCnNLQqPcx3wf6SaXu+0fZ8K+5hk+UbGcuPvAsAFtjer/66CVkg6z/Y7NHK/ngVI5Wz2\nb3OMUQ+Jrcz9bZLZeBdSAdJPA7c71ZIb/fWE8hgMGornvhnY0PYzkmbYfk2BbN9KnHSDUt/srZxz\nOvKO4CKX5YhsSlI2m5Gij24mlW/4SVvBJDvN9rqqNCNqfI49vJ1Oc76p3fPOVYoLjtNVR72K/HW2\nN5R0Lclk+DeS2WzVEvmxRD2WdRkvKGXs39bOVCnpOyQLwi9q7Mar8rOEKJNqiXUMO1aPOTn9JpRH\nG9RDKQFJZ5NyHD5LMlX9g5R93dEGrtQP47cubGHab5rtqPlLe0uJbVXS8yRH5iGkUNXiUtGSrrW9\nkaTfkpzHfwF+bnuVDnL9aIA1pkj6KsmW/mZS9BKkE8NXO8i95KiXtAWpNPwptkcl7l8jlHWxXbf9\nQN15X0EKi96Eoei4rw/6N6MekhOz/Jkks1kjCnNnYGHbHxjAcgdKKI82qMs+yy2O8yaSWeSCEvuo\npC+ToknmIpWfuIB6foeeyNvjtRnesezWEvORUqLdJqSaWuuTCkpe0+kkmGVbVRw90Hbbnh792j30\nQsUU0jx3iS0bSfMCnyLt2BpXpMd0uiLth6O+F7J5dm1SuY21lZLoTrW91YDn7XsFh9FATVniI41N\nBMJh3p75bF+v4UXfSqMyfmz7IzCsQdOPKWgVmU0dh0pakBSWuBvwA0l3kHIHfuvheRj95kHS1rxh\nbz/O9tklgrafkHQvSQEsS6p7NFfhvP+w/SQp9HFLSBnFBXMOXDkUUG28NQ/JP1XapwFSHbKZDIXr\n7kwqyrjDiBKJrhz1GsqtaYkL+tVn/u0Ugvq8Ulj4Y6T//UjzVlsFt5q36AqePlRw6Ib8Of8+f08b\nF0tb2P5l4SG6DVEed4TyaE8vpQSG+TayPXXdOpM7JRCdnW8Nu/q2pJPK1nWOVZNXkvI0biSVBvlt\nqWBWHHeSr5yBj9UwXR1FKpjXaWykubuuq9UrLcwl/6dUJ6tlHaUW9JI0txMpuKARQVWirBu5NWuQ\ndoiNfhLvpCDKq0Ktsi7OeVCSvkH6Lf2YZPr5ELBUjXl/J2lHUjAJpOi0Ot/TTYHVbJ+YfXoL2L6v\nQPSA6oVUvlg6AGirPJpClK+WNCxEuXTd4wqPUWr7RLjRupTAih1k9iNdQT5PyhCemW9/Aw6pOf8m\nwPz5/odJyWArjNJ7F0lBnUEKWf0msEqBXO1yLqTs9y+QymR8vnI7kDYlOloc50pSwtStpB/lgSQ7\neIns9CxXvV1BKgPRsRoylSq+pF3InjXXfiqwUeXxhiTfRSe5NUm7lZ3y45WAL9eY93JgwcrjBUnh\n1d18Z1aksKxLq8+m5uc1k2QSfS7/1l6s/Nae6iB7AKmkyh/y46WBqwrnvbXVd6dAboV2t8K5LyT5\nRxqPFyFZIWr/r/pxi51HG9xFn2XbhwCHSDrE9n49LuEYYG2l7mxfIMX/n8JQTP7AsG1Jj5D6FTxP\n+qL+XNKFtr/URu7FkZ5rQ9c9LZqY1/bFkmT7AVKpkNKr/wtIhSNPz493JOVdPAKcROseEFWq/e4b\nXQw7mZyqrMvQFSmknJO7GlesHqHEiu3bSbvExuP7SK0ESlmC4Vnwz+axYtRdWZenlYpANpILd2J4\nwcK2uLdKDu8hVSK+MR/rL9lEXMJUSYcxFNSwFwWZ7vn72CuLuRIIYfsfkop7ifSbUB4tGMke3PB9\nuMwe/BVJHwZWsv0NScuR7LR1TALP55P49sDRtn+k1O50oEjahxRL/leSwvqi7edy1NXdpES+vuHk\ns7hM0km2H1DNftgVatfVqvBWD2/5Ol25DWz+P7bFvTVkgpSTU5teHfWki5Hrc3QgpDpoxf2w1X1Z\nl51JpdyPIK3/qjzWab5XOVUsbmnKdFlNr2fz76phjp6/QKbBZ0hlYX5KWveFDE/4GyQvSlre9p/g\npUz5MYt4CuXRmn7Up/oeaSv9ZlKNp3/msTo5CzOVCrB9GNg8nxhLnc+9sCjw3uarJSfHaF8yhEdg\naaUilAsAxf2wK3RTV6vBnJI2aCh3SeuT6h5BeZBE7fIildd1e2Xak6Pe9sH5M28ER3zMdnFmPOn7\nXS3rcjKpQnCnee8nFb6sy+dJ9Z2+2+K50ppeZypVX1hYqV7UbiSfTUecalIV9YcfAF8BrsxRoI3k\n3T3GaC0RqjsoKlet1aYvRc2YKsdYknQ1doPtKyQtT4rsGHcJWP2I3lGX/bD7QVYWJ5AUl0gms0+Q\nToRvt31mG/ERy4vYHvhOscVaptkuDs7owXmMUgHNvRrKL18NH227rZlPvbVj7RlJW1FJtrN94WjM\n2ytKzbca/dKvdeqnMjZrCeUxMko1mo4iOa4hOVD3sf1ggex1pDDVG7ISWRz4nceotMGgyREnMEL0\nju2Oph8NZVl3pXBz7P8HGnZhpcKUZ9gujkyTtBCAcyhmDbkxKS/SZL6Zg7QT+VSNz+yALLOG7dUl\nLU3qq9ExRDrLX8ZQWRfy/ankSrMeIUFTPeZQqYdM7bEk+4cOJUU0ioIkwz6Z6vpOmK3acyLJgdrI\n/vxwHitJgDqSFGL7SkkHk65E/7tk0jax8LWyWUcT2wcBSLqc1Dd8Zn58IKk/RAl/VupRYUlzkcxQ\nRR3eMl07FJUKR76PFDH0sop/q7RUd+Ok9a98Av479UJPu6XZUX8f9Rz1vTiPoTwUuZmuc6gyp5Ai\nq47Kj3cmhf2OmKnd6+8qh9vvbfvwGuts5tuk2md1vtf9MNX1nVAe7VncdrXO1EmSPlsiaPu0HOnz\nFtKX892lX5geI0nGml6id/YkOVCXITm7f0c9Z2QvDsVzSFfL06g0/qrBr3K+w/+STsSm0I7eLdkH\n9gPbP+3hML04jyHtMhqJgquTGpeVVFLoJYcKusiL6fV3ZfsFpZyaXpTHozUVB7Ybfo1tm3dWeQc2\nJoTyaM/fcqRNI5JkJ1K+RkckLUrKtv1JZWyugh9VQ3ZEXFD7fwzpOnrHXfbDrtCLQ3FZ211FPGXu\nBF6wfZZSMucb6JA41iv5hP1FUuRPt3TtPM5cDmyWTYS/I5Xi/yCd/497AccBr5L0EGnH1NG0WaGn\nTO1sAnopvLhGkMBVko5m1n4cpaajqZJ+SvpuVLuTlvShv5pZE2ZbjY0K4fNoQ75yPYqUxGbSP2rv\nxpVtB9n7SWUa/kE6kS1Myhl4FNjd9oix4Rq5ZDSk7fXAM6Z7If8wG7b+y0t/mGrRVpS0G5hqu6ir\nYLcORaXWpke5sPdIC/mGr2NTUqTXd4Cv2S7p3Ng16qG5UOUYXTuPK4EhnyHl2Xy7pp+qdjvWLHcH\nQ82kIOfFkExfdpvWw5K+RjJvNU7Y7yb5eUoKnl7SYtgu7Dmj1hWz7TaVsnPgzDKkRNKdGTovTCHt\nPDs2hxsEoTwGhKTjSRVhf5sfv41kUz8ROGLQJ5WxpNvonXwCfxVD7W/fR7oifQVwr+2WJsN+OBSz\nyWPVPF+jlWzbk1CTfNedG3shX2g0M2oXGEp1tD5NMuV83PYMFXS3a/YxNcZLfUwa3g1wFtwm9FnS\nXcDaDROQUlHKm22vUTJ3L0iap65TX9KuwEdJgQ03MKQ8ZgInFe5a+k4ojzbkE9/uzPoF79hPo9UP\nqHJ1erPtdQrX8C5ShVqAS22fV7r+saCX6B2lXhabOHdzU+qUdgXJvDDdI1QelXSc7T16uSoc6WTU\n7iTUJH8eyU+zFcmM8G9SlFlxaHY3tDoZlZygKs5jMdwvVLfE+Oak5l1X2T5U0srAZ23v3UHuNwz5\nmF7q3me7lVO4r+TvyXsqUXkLk/pzlHxPWgYI1FB695CsD1fk25WlkX2S3mf7rJLXjgbh82jPOaR/\n8EVUvuCFPKxUWv2M/PiDwKM5YqOohEc2SawPnJaH9pH0RrfpdDYO6CV6ZxFSnkXjxzQ/sGh2VI7o\nxG44FN1FlrdGbk1alx1IWeLfcSqWtxQpFHXQdGUH72NQxkOuhOM6lfRpqzgyvfqYaiPpKJKifBKY\noRTabZLCL638UC2hMg+prW6xA9z2qkr5WpsBbwe+J+mJwovJZZUqF88k+aXeAOzrgu6ggyCUR3vm\nc/ctUHcmFWD7JcPLL8xJeSjldsA6zvWilLJ3bwLGs/LoJXrn28DNki4lXQFvDnwzH+OiTsLqLvb/\ndNIJYBqz+plMWW9pnMqp/KLy+GHqRQ/VomIHn1fS6xluB5+vxnFeah3QbqwN3fasv1rSa7v1MXVJ\nw6E+jVypOnNp6QGad0ZKnQXrVPNdlpQ3thmpD8oMUj2wEnazfYSkrUmm3I+QwpPHRHmE2aoNkv4H\nuNr2+T0cY36nkgbdyN5Kyij/e368KMl0VWSHHwsk/RewGulq7hBS9M7pto9qKzgkvxSwQX54g+2/\n1Jh7tunS1okmO3g1yqiWHbzh8K48fhmpcmxxcyJ10bO+Vx/TeCFHmd3gwnbBkl4kKdpvlgaBVGQb\nZu8jSOeBs0fDrzbiekJ5zEqTPXh+0pf7OWrYg5WS3X5I+iHVrtMkSaQri28AlzB0Jb6ve4vrHzjd\nRu/k9/whYGXbX8/b+yVdWExSPXZp0/DqsFe4vMHPmNGtHVypZtr+wLyklgOQ/l/PAsfbLqrfpC57\n1vfqY+oF9dD3RUN9OSBZERYnlf0/unDutUnfsc1JEWJ3A5fZ/lGB7Imk3eZKpF3LnCQlUqtPUL8I\n5TEg1Ic6TfmL+jaGiileb/uRvi92nCDpGHIxSduvzld1v7NdVExS0qmkukrV2P+9bO9SINtcHfaD\nwB/duTrsmCDpw7ZPlfQFWlfVLeoEqB5bB6i3nvVzkhJIq8EoHcPge0XSlSST8uGk8jkfI4ULd8yW\nb1J6z5OS/upkxqNUuqahdD8MYLtt9FiWmwNYhxR5+IRSH/dlbJeUwO874fNog1Lb2MtJV1K1u33Z\n/rOGl1+o63S/keRYPLfjK8eYPkXvbOhcTBJeKi8yd41ldNUTI9NVddgxpOFLKi05PxIbNA9Iutj2\nWwrlF2OoZ/3e2SzTsWe9Ul7IAaTIo0YAiYHRMFt13ffFqWXA2lTymCjrXwKApKnAy0lBDVcAm9fY\nbZm0W3oH8HXSdyAyzMcpJ5C+JEcplVK4ieQQPKJAttc6TZC6yX1I0gOkKI9xaxfuU/TOc/lqtHEC\nX5zCyLRML9E795CUTeOHvFweG5fYPjb/Pagb+RxcMD+wWN7hVR3uy9RYR7c96/chhXMXVWzoM133\nfVHqdbM7Q8ERp+VQ8SKfHqnEyOO1V5z4PkNtHr5O8m+dRb02D/3DY9TCcKLcSHbFjUjtZR8A7iyU\nW4wUYvsoqUzJqRS0M206RtctK8fw8/pxydgIsh8iVeN9EDiYlDH8gZrzrw38v3xbu4bcZSTb/6X5\n9nQeO5dkehzzz3aEdZ/MrK1JTyiQ24chZ/V9ldstwP+rMf+9wPkk/8mmwNyFcpcALxujz2x9krJY\nlpS0exZp11sieyu5NXR+PD8tWtO2kV+I1E56ar59F1ioUPbG/Pemylhx695+32Ln0QZJF5O+HNeQ\ntpjr236sVNx2L3Wa8Cg4DwfAa6oPcvROkUPPPRSTzHM1XxWeWuOqsNvqsGPN6zxrJeGO0TdOu+cj\nJH2m8PMZiVXdXevhe4FLJf2a4TWeinw1PbKi7RtIDdo+BiDpA8B1BbJiuPn5BWhZRmgkTgBuYyhc\n/yMkBfbeAtled+Z9JZRHe24lnfjWIiUWPSHpGtv/LpC9Sqm+1U+Bs6o/8NmRavSOpKcaw+ToncJj\nHEnqv/G9ji9uzcdJV5BP5+MdSlL8HU+Oti/LuRMbkH6cN3hiBCfMIWkR2/+Al8K56/yuj5W0N5Uq\nBqQeGx0LeGZWzYEOdZs6/Snf5s630WQ/hkrgtBtrxYnAdUqFP0XqhtgxUqrCKrbfV3l8kKSbC2W7\nbvMwCCLaqgClDOmPksowLGn75YVyGwA7kgqv3U46MZ7aXmpi00v0Ts5d+CCp4N3ZpM+rTqXU6aTd\nYaNm0TwkJdC2zlJ+7SdIu4/fk04KbyKFYJ5Q+42MIpJ2ISntxonvA8DBtn9cKP9Dko+iUfn4I6Tq\nwJ8olO+pqdNoImlbUuLtDgyvRDwFWNP2LMEDIxynUZEXUjBNcdteSdcAX7R9ZX68CakqwcaF8q9i\naGd+cZ2deb8J5dGG7EzbjLT7uJ9cj8b272seZzGSnfNDtufs9PqJTKtInZrRO42r5/eRFO/ytlcr\nlPs8qWd5tRz8Sbb/r0D2LuCNzg7cHAZ5tUehWF6vKJWAb9Rl+r3ttn0tmmRnqYDbaqyN/A2219fw\n7o8da7dlk8uXmLXn+8AaG+UoqXVIzuaqmXImcElj99bhGKsAD9p+RtKWwGuBU0otC3kNp5B8H5Cq\nbu/qwnDbHNywHMPDm6OT4DhkHtJJf5rrx3JPIdV52hFYhXRCK7qymYj0K3onsyqpuu4K1KsbdJhS\naZPGVeHHalwV/o3h9a1mUti7ZazJyqJYYTTxgqRVbP8RQKmwYZ2Q8m6bOp1Guvp/B6kJ2K5At1FI\nRdi+BbhF0umkc9/ytu+qeZizgPUkrQr8gBRQcTppR9OWHOG1hu218/kBp7pqRUj6BskC8keGwuHH\nrJNg7DwGhFKp7F8CZ9q+ZqzXM2iys/qzwNJAtaTIU6SM5Y4ZuJK+TVK4fyQVlPxljSu6OYEZ7rK3\ngaRTSFeR55B+kNuTfF63wqg5ckcdSW8h2fHvJSn8FUhKt1WF4lbyK5OaOr2RdBV9H2mH3TbYQ9I0\n2+sql9zIYze4MCG0FyS9k9RvZW7bK0lah2SibNlvvUm20b/kS6QOikepRokQSVNtr9fluu8CXusa\niZiDJHYeg2NlTyLN3KfonT8CG7uwgVPT/C9IukuVNrRdzP3HyuNG3aGJ3BK4I07JcquR/EwAd9mu\n04b3IZLyuQRYlHSxsCvJNNSOhkP+YUlvJ11wtK2H1UcOJFkBLgWwfbOklQpln1NqRbsLKTsdyvJa\nGlykVP+tm+Zdt5FKwJRGfA6U2HkEfUUpI3xPuo/e6WXuy0nl4K9n+A+z4xVl5RgLZJl/9n2B4xB1\nV4m4Kv8b4AlSNYTivhyS3pHnWo4UDTcFOMijUE1B0rW2N2ry07y0A+oguybp+32N7Z9kpbOD7UML\n5+66eZek9UgXNbcxPLy5+PvdT0J5BH2l1+idHud+U6tx25cVyK5FKm/duPr9K7CL7fFcoqRn1GMl\n4vEaWdUOST8CLgb2JQVm7A3MZXvPNjLHARcAF7lmy9x+IWkGcCwwnUp+R8n3eyB4lLIRJ9ONlJX+\nubFexxi991kyXluNDWjuQ0vGRpC9Gtiy8ngLUrTVmH+mA/7Mbi8ZayN/HMkOX3ferjLj+/Se5yNV\nMLiBlOV9MDBPB5kNSeauK0iK58vUqGBQOc48wOdJiaxnkfyEbeeuyN4w1t+X6i12HgNC0vUujBuf\nnZB0I6mkSDV65+eu9IxoI9tTYyI19abIY6XmiJ5CVicq6qEScX59V305WjmZ6ziex5Icxv02YFtS\nIccbgd/YPrNAtuudnqTDSJ/xuQw3W0Wo7mzGVZKOZlbH2Jj8o0eRLwKXKBXLeyl6p1C2ubTJnBSU\nNpH0KZLdfmWlBloNFiR1cCzhXklfJZmuIJXKvrdQdsKhob4UczFUidik/1edCtLbdrmEXjPju0bS\nr5i1jP2TpF3Ise7g73HKBfpJviFpXcqLcq7l4f1lLskKuISGYt2ouhwiVHf2QlKrUEd7gElQ4wVJ\nL6dG9I5mbUzUyBF5FjjOHTLWJS1EMnscQrJjN5jpsiiWRvLVQVQyh4EDXZA4NhHRCM2YGnjAddV6\nzYzvce4jSE2cqr1bniKdiKe02unmBNQRcXn/lJ52euOJUB5BX+klekc9NiYKJha9ZMb3OO8s+SSV\nTKO9Co0AAAhISURBVPkZtl/TQuaAfHcNUlXeRlTYO0lN2j7cYc7qTm8NUl2vl3Z6Luh2mS+SDmAo\nkvEyUn7Kk51kB0EojwEhaQngm8DStrfNP5SNXdBuciLTh+idd1EJ87V9Xv9X2XLe1Um1y1ZkeOmH\n2X6nONmQdAewtXM+kFK74986da9s63fJ4eBvd464Uqp792vbm48kk1/X805P0lmkMN1qJOPatksq\n8vad8HkMjpNIyVNfyY//QPJ/zNbKgx5supIOISVvnZaH9pH0Rtv793uRLfgZqdzED6nf8TGYWHwB\nuFLSH0km0pWAT0uan6ET80gsQTKnNng2j7WlT2bAXiry9p1QHoNjMdtnZns+tp+XNBlOSjdK2qjJ\npltaGfftwDrO/SGUWsHeRLKND5rnbR8zCvMEY4zt83NWfaOUzV0Vs2qnIpqnANcrlWSHVHyzk8Lp\nF/+WtKmHV+QtaQ8xEEJ5DI6nc0hfo2DcRqSIjtmSPkbvLAw0nNwLtXthP8hRPgC/kvRpUgHLahhk\nkcM9mDhImo+Ua7GC7d0lrSZpjRITqe2DJV3AUA/zOsU3e+VTwMnZ9yHS7+SjozT3LITPY0Ao1fw/\nitRI6jZSdMf7XVh6eaLRJ5vuTsC3SHWSRPJ97Gv7p20FeyCXizC07AZnF5SNCCYWkn4KTCNVEFgr\nK5Or3aGMfEV+U2A12ycqlZZfwHarsiMDQV1U5B3IOkJ5DA6lFqxrkE5Md3kU6jtNdCQtRYpmgRTF\nMhG6+QUTCOXKtk21rYoSQnPU1Xqk0uqrS1oa+JntTQa43r6ECfebMFsNlg0Yit55gyRsnzK2Sxr3\nrM9QtJWBX43GpEo9rH9je6ak/wbeAHxjFE0SwejxrKR5GTIpr0LFVNmB95CS9W4EsP2XHHE1SBrH\nbxkmPOC5RySUx4CQ9GNSE6ibGYreMcnhFrRA0rdIP45GtNXekjYepWirr9r+WTZJvBX4X1L01Yaj\nMHcwuhwI/AZYTtJpwCaU+w6etW1JDcUz/0BWWMH2QXmuy4E3VMKEDwR+Pej5RyKUx+BYj9QXOeyC\n5WzH2EVbNRT820lZ7b+W9D+jMG8wytj+naRppDIfAvZxeQ+ZMyUdCywsaXdgN+D4AS21ma7ChAdF\nKI/BcRuwJGUtOYMhRjXaqsJD+aSwFXBoLrEyxyjOH4wSubbV6cC5tp/u9Poqtr8jaStSOZPVga/Z\nvnAAy2xFqzDhk0Zp7lkIh3mfqRRdWxBYh2STHPPGLROBsYi2qsw9H6m43XTbd2fH/Wtt/27Qcwej\ni1Lflw+Sdpk3kFoen1dSQifLL0nyZ5pUJn3UgjpyFGcjTPjysfTJhfLoMxqhIVEDj1XjlnGOJAHL\nAs8T0VbBKJCrNr8Z2B3YxvaUAplPAF8Dfk+6wHkTqb7UCYNc63gklMeAkHSo7S93GguGkDTd9mvH\neh3B7E+OtnonaQfyBtLO4zMFcncBb8xl2Ru9Pa62vUZ7ydmPsOkOjq1ajHXb+2CycKOk9Tu/LAi6\nJxfvvIO06ziaVDOqo+LI/I1U+LPBzDw26YidR59RpTER8MfKUwsCV3Uq3TyZkXQnqSvdA6QGWkVd\n6YKgBElL57yMrUm9yGvXmpN0CvBa4BySz2N74NZ8G7OEvbEgoq36z+nABfTQmGgSs/VYLyCYrflh\nrmV2KfCMpCttP1/zGH9k+EXhOfnvoBMFxx2x8xggktZmKDLiCtu3jOV6gmCyk5uVbUEyIW9Casr0\nG1J1gT/VOM4U0q54ZscXz6aE8hgQkvYG9gB+kYfeQ0o+O2rsVhUEQRVJK5EUyTbAkrY36PD69Uh9\neho7jSeB3WxPG+hCxyGhPAaEpFtJnQOfzo/nB64J+30QjC0jREJ+G/hv28+OINZ43a2knuNX5Meb\nAt+fjL/riLYaHGJ4R7oXaF32OwiC0aVVJOQ2nRRH5oWG4gDIjZnq+k1mC8JhPjhOBK5rKiUwu7eg\nDYJxSzUSMu8gGiwIXF14mMtyGZufkKKtPghcmjO/sX1jH5c8rgmz1QDJX6hN88Mrorx3EIwduQPf\nIvQQCSnpkjZP2/abe1jihCKURxAEkwJJU2w/VWk9PIwIpa9HKI8gCCYFks6z/Y4RWg+3bTks6cO2\nTx2pq99kSg5sED6PIAgmBbbfkf+u1IV4o+nTpEsGHInYeQRBMKmQtHmrcduXj/ZaJjKhPIIgmFTk\nnjsN5iH15phW4uyWtDiphPuKVCw3tnfr8zLHPWG2CoJgUmH7ndXHkpYD/q9Q/BzgCuAihudxTTpi\n5xEEwaQmNyKbYXvNgtfebHudUVjWuCd2HkEQTCokHUWKtoJUZWMdoDS57zxJ29k+fyCLm0DEziMI\ngkmFpF0rD58H7rd9VQeZmQyF984PPAM8x1DPmY4tbGc3YucRBMGkIfctf5vtD9WRsx0huk1EYcQg\nCCYNuXvgCpLm7kZe0o8l7S7pVX1e2oQjzFZBEEwqcivZVwPnktodA2VZ4pK2JDV42wxYBbgJuNz2\nEYNZ7fgllEcQBJMKSQe0Grd9UKH8nMD6wJbAnsC/bU+6nUj4PIIgmBRI+rHtjwBPdLtTkHQxyWF+\nDSnfY33bj/VxmROG8HkEQTBZWFfS0sBukhaRtGj1VniMW4FngbWA1wFrSZp3UAsez4TZKgiCSYGk\nvYFPASsDD/3/9u4QB4EYCMPoX4XiNFwIS4Lcu3AbDJo7cJNBNDgEQ9Zs9j3fZtyXtEmbxqu6X/Y6\nJjknWTL/Pj+sOOomiAewK2OMW1Vd/lx7zbwsPyV5ZR5dParqvt6E2yAeAD8aYyyZwXhW1S7/Lv8Q\nDwDaXJgD0CYeALSJBwBt4gFA2xvlkg7zl/yPqQAAAABJRU5ErkJggg==\n",
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"objects = (list(item_summary_df['item_name'].head(n=20)))\n",
"y_pos = np.arange(len(objects))\n",
"performance = list(item_summary_df['item_count'].head(n=20))\n",
" \n",
"plt.bar(y_pos, performance, align='center', alpha=0.5)\n",
"plt.xticks(y_pos, objects, rotation='vertical')\n",
"plt.ylabel('Item count')\n",
"plt.title('Item sales distribution')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Analyze items contributing to top sales"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" item_name | \n",
" item_count | \n",
" item_perc | \n",
" total_perc | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" whole milk | \n",
" 2513 | \n",
" 0.057947 | \n",
" 0.057947 | \n",
"
\n",
" \n",
" 1 | \n",
" other vegetables | \n",
" 1903 | \n",
" 0.043881 | \n",
" 0.101829 | \n",
"
\n",
" \n",
" 2 | \n",
" rolls/buns | \n",
" 1809 | \n",
" 0.041714 | \n",
" 0.143542 | \n",
"
\n",
" \n",
" 3 | \n",
" soda | \n",
" 1715 | \n",
" 0.039546 | \n",
" 0.183089 | \n",
"
\n",
" \n",
" 4 | \n",
" yogurt | \n",
" 1372 | \n",
" 0.031637 | \n",
" 0.214725 | \n",
"
\n",
" \n",
" 5 | \n",
" bottled water | \n",
" 1087 | \n",
" 0.025065 | \n",
" 0.239791 | \n",
"
\n",
" \n",
" 6 | \n",
" root vegetables | \n",
" 1072 | \n",
" 0.024719 | \n",
" 0.264510 | \n",
"
\n",
" \n",
" 7 | \n",
" tropical fruit | \n",
" 1032 | \n",
" 0.023797 | \n",
" 0.288307 | \n",
"
\n",
" \n",
" 8 | \n",
" shopping bags | \n",
" 969 | \n",
" 0.022344 | \n",
" 0.310651 | \n",
"
\n",
" \n",
" 9 | \n",
" sausage | \n",
" 924 | \n",
" 0.021307 | \n",
" 0.331957 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" item_name item_count item_perc total_perc\n",
"0 whole milk 2513 0.057947 0.057947\n",
"1 other vegetables 1903 0.043881 0.101829\n",
"2 rolls/buns 1809 0.041714 0.143542\n",
"3 soda 1715 0.039546 0.183089\n",
"4 yogurt 1372 0.031637 0.214725\n",
"5 bottled water 1087 0.025065 0.239791\n",
"6 root vegetables 1072 0.024719 0.264510\n",
"7 tropical fruit 1032 0.023797 0.288307\n",
"8 shopping bags 969 0.022344 0.310651\n",
"9 sausage 924 0.021307 0.331957"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"item_summary_df['item_perc'] = item_summary_df['item_count']/total_item_count\n",
"item_summary_df['total_perc'] = item_summary_df.item_perc.cumsum()\n",
"item_summary_df.head(10)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Analyze items contributing to top 50% of sales"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(19, 4)"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"item_summary_df[item_summary_df.total_perc <= 0.5].shape"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" item_name | \n",
" item_count | \n",
" item_perc | \n",
" total_perc | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" whole milk | \n",
" 2513 | \n",
" 0.057947 | \n",
" 0.057947 | \n",
"
\n",
" \n",
" 1 | \n",
" other vegetables | \n",
" 1903 | \n",
" 0.043881 | \n",
" 0.101829 | \n",
"
\n",
" \n",
" 2 | \n",
" rolls/buns | \n",
" 1809 | \n",
" 0.041714 | \n",
" 0.143542 | \n",
"
\n",
" \n",
" 3 | \n",
" soda | \n",
" 1715 | \n",
" 0.039546 | \n",
" 0.183089 | \n",
"
\n",
" \n",
" 4 | \n",
" yogurt | \n",
" 1372 | \n",
" 0.031637 | \n",
" 0.214725 | \n",
"
\n",
" \n",
" 5 | \n",
" bottled water | \n",
" 1087 | \n",
" 0.025065 | \n",
" 0.239791 | \n",
"
\n",
" \n",
" 6 | \n",
" root vegetables | \n",
" 1072 | \n",
" 0.024719 | \n",
" 0.264510 | \n",
"
\n",
" \n",
" 7 | \n",
" tropical fruit | \n",
" 1032 | \n",
" 0.023797 | \n",
" 0.288307 | \n",
"
\n",
" \n",
" 8 | \n",
" shopping bags | \n",
" 969 | \n",
" 0.022344 | \n",
" 0.310651 | \n",
"
\n",
" \n",
" 9 | \n",
" sausage | \n",
" 924 | \n",
" 0.021307 | \n",
" 0.331957 | \n",
"
\n",
" \n",
" 10 | \n",
" pastry | \n",
" 875 | \n",
" 0.020177 | \n",
" 0.352134 | \n",
"
\n",
" \n",
" 11 | \n",
" citrus fruit | \n",
" 814 | \n",
" 0.018770 | \n",
" 0.370904 | \n",
"
\n",
" \n",
" 12 | \n",
" bottled beer | \n",
" 792 | \n",
" 0.018263 | \n",
" 0.389167 | \n",
"
\n",
" \n",
" 13 | \n",
" newspapers | \n",
" 785 | \n",
" 0.018101 | \n",
" 0.407268 | \n",
"
\n",
" \n",
" 14 | \n",
" canned beer | \n",
" 764 | \n",
" 0.017617 | \n",
" 0.424885 | \n",
"
\n",
" \n",
" 15 | \n",
" pip fruit | \n",
" 744 | \n",
" 0.017156 | \n",
" 0.442041 | \n",
"
\n",
" \n",
" 16 | \n",
" fruit/vegetable juice | \n",
" 711 | \n",
" 0.016395 | \n",
" 0.458436 | \n",
"
\n",
" \n",
" 17 | \n",
" whipped/sour cream | \n",
" 705 | \n",
" 0.016257 | \n",
" 0.474693 | \n",
"
\n",
" \n",
" 18 | \n",
" brown bread | \n",
" 638 | \n",
" 0.014712 | \n",
" 0.489404 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" item_name item_count item_perc total_perc\n",
"0 whole milk 2513 0.057947 0.057947\n",
"1 other vegetables 1903 0.043881 0.101829\n",
"2 rolls/buns 1809 0.041714 0.143542\n",
"3 soda 1715 0.039546 0.183089\n",
"4 yogurt 1372 0.031637 0.214725\n",
"5 bottled water 1087 0.025065 0.239791\n",
"6 root vegetables 1072 0.024719 0.264510\n",
"7 tropical fruit 1032 0.023797 0.288307\n",
"8 shopping bags 969 0.022344 0.310651\n",
"9 sausage 924 0.021307 0.331957\n",
"10 pastry 875 0.020177 0.352134\n",
"11 citrus fruit 814 0.018770 0.370904\n",
"12 bottled beer 792 0.018263 0.389167\n",
"13 newspapers 785 0.018101 0.407268\n",
"14 canned beer 764 0.017617 0.424885\n",
"15 pip fruit 744 0.017156 0.442041\n",
"16 fruit/vegetable juice 711 0.016395 0.458436\n",
"17 whipped/sour cream 705 0.016257 0.474693\n",
"18 brown bread 638 0.014712 0.489404"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"item_summary_df[item_summary_df.total_perc <= 0.5]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Construct Orange Table "
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"input_assoc_rules = grocery_df\n",
"domain_grocery = Domain([DiscreteVariable.make(name=item,values=['0', '1']) for item in input_assoc_rules.columns])\n",
"data_gro_1 = Orange.data.Table.from_numpy(domain=domain_grocery, X=input_assoc_rules.as_matrix(),Y= None)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Prune Dataset for frequently purchased items"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def prune_dataset(input_df, length_trans = 2, total_sales_perc = 0.5, start_item = None, end_item = None):\n",
" if 'total_items' in input_df.columns:\n",
" del(input_df['total_items'])\n",
" item_count = input_df.sum().sort_values(ascending = False).reset_index()\n",
" total_items = sum(input_df.sum().sort_values(ascending = False))\n",
" item_count.rename(columns={item_count.columns[0]:'item_name',item_count.columns[1]:'item_count'}, inplace=True)\n",
" if not start_item and not end_item: \n",
" item_count['item_perc'] = item_count['item_count']/total_items\n",
" item_count['total_perc'] = item_count.item_perc.cumsum()\n",
" selected_items = list(item_count[item_count.total_perc < total_sales_perc].item_name)\n",
" input_df['total_items'] = input_df[selected_items].sum(axis = 1)\n",
" input_df = input_df[input_df.total_items >= length_trans]\n",
" del(input_df['total_items'])\n",
" return input_df[selected_items], item_count[item_count.total_perc < total_sales_perc]\n",
" elif end_item > start_item:\n",
" selected_items = list(item_count[start_item:end_item].item_name)\n",
" input_df['total_items'] = input_df[selected_items].sum(axis = 1)\n",
" input_df = input_df[input_df.total_items >= length_trans]\n",
" del(input_df['total_items'])\n",
" return input_df[selected_items],item_count[start_item:end_item]"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(4585, 13)\n",
"['whole milk', 'other vegetables', 'rolls/buns', 'soda', 'yogurt', 'bottled water', 'root vegetables', 'tropical fruit', 'shopping bags', 'sausage', 'pastry', 'citrus fruit', 'bottled beer']\n"
]
}
],
"source": [
"output_df, item_counts = prune_dataset(input_df=grocery_df, length_trans=2,total_sales_perc=0.4)\n",
"print(output_df.shape)\n",
"print(list(output_df.columns))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Association Rule Mining with FP Growth"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {},
"outputs": [],
"source": [
"input_assoc_rules = output_df\n",
"domain_grocery = Domain([DiscreteVariable.make(name=item,values=['0', '1']) for item in input_assoc_rules.columns])\n",
"data_gro_1 = Orange.data.Table.from_numpy(domain=domain_grocery, X=input_assoc_rules.as_matrix(),Y= None)\n",
"data_gro_1_en, mapping = OneHot.encode(data_gro_1, include_class=False)"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"num of required transactions = 45\n"
]
}
],
"source": [
"min_support = 0.01\n",
"print(\"num of required transactions = \", int(input_assoc_rules.shape[0]*min_support))\n",
"num_trans = input_assoc_rules.shape[0]*min_support\n",
"itemsets = dict(frequent_itemsets(data_gro_1_en, min_support=min_support))"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"166886"
]
},
"execution_count": 39,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(itemsets)"
]
},
{
"cell_type": "code",
"execution_count": 59,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Raw rules data frame of 16628 rules generated\n"
]
}
],
"source": [
"confidence = 0.3\n",
"rules_df = pd.DataFrame()\n",
"\n",
"if len(itemsets) < 1000000: \n",
" rules = [(P, Q, supp, conf)\n",
" for P, Q, supp, conf in association_rules(itemsets, confidence)\n",
" if len(Q) == 1 ]\n",
"\n",
" names = {item: '{}={}'.format(var.name, val)\n",
" for item, var, val in OneHot.decode(mapping, data_gro_1, mapping)}\n",
" \n",
" eligible_ante = [v for k,v in names.items() if v.endswith(\"1\")]\n",
" \n",
" N = input_assoc_rules.shape[0]\n",
" \n",
" rule_stats = list(rules_stats(rules, itemsets, N))\n",
" \n",
" rule_list_df = []\n",
" for ex_rule_frm_rule_stat in rule_stats:\n",
" ante = ex_rule_frm_rule_stat[0] \n",
" cons = ex_rule_frm_rule_stat[1]\n",
" named_cons = names[next(iter(cons))]\n",
" if named_cons in eligible_ante:\n",
" rule_lhs = [names[i][:-2] for i in ante if names[i] in eligible_ante]\n",
" ante_rule = ', '.join(rule_lhs)\n",
" if ante_rule and len(rule_lhs)>1 :\n",
" rule_dict = {'support' : ex_rule_frm_rule_stat[2],\n",
" 'confidence' : ex_rule_frm_rule_stat[3],\n",
" 'coverage' : ex_rule_frm_rule_stat[4],\n",
" 'strength' : ex_rule_frm_rule_stat[5],\n",
" 'lift' : ex_rule_frm_rule_stat[6],\n",
" 'leverage' : ex_rule_frm_rule_stat[7],\n",
" 'antecedent': ante_rule,\n",
" 'consequent':named_cons[:-2] }\n",
" rule_list_df.append(rule_dict)\n",
" rules_df = pd.DataFrame(rule_list_df)\n",
" print(\"Raw rules data frame of {} rules generated\".format(rules_df.shape[0]))\n",
" if not rules_df.empty:\n",
" pruned_rules_df = rules_df.groupby(['antecedent','consequent']).max().reset_index()\n",
" else:\n",
" print(\"Unable to generate any rule\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Sorting rules in our Grocery Dataset"
]
},
{
"cell_type": "code",
"execution_count": 60,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" consequent | \n",
" antecedent | \n",
" support | \n",
" confidence | \n",
" lift | \n",
"
\n",
" \n",
" \n",
" \n",
" 4 | \n",
" root vegetables | \n",
" yogurt, whole milk, tropical fruit | \n",
" 228 | \n",
" 0.463636 | \n",
" 2.230611 | \n",
"
\n",
" \n",
" 5 | \n",
" sausage | \n",
" shopping bags, rolls/buns | \n",
" 59 | \n",
" 0.393162 | \n",
" 2.201037 | \n",
"
\n",
" \n",
" 8 | \n",
" tropical fruit | \n",
" yogurt, root vegetables, whole milk | \n",
" 92 | \n",
" 0.429907 | \n",
" 2.156588 | \n",
"
\n",
" \n",
" 1 | \n",
" citrus fruit | \n",
" whole milk, other vegetables, tropical fruit | \n",
" 66 | \n",
" 0.333333 | \n",
" 2.125637 | \n",
"
\n",
" \n",
" 10 | \n",
" yogurt | \n",
" whole milk, tropical fruit | \n",
" 199 | \n",
" 0.484211 | \n",
" 1.891061 | \n",
"
\n",
" \n",
" 2 | \n",
" other vegetables | \n",
" yogurt, whole milk, tropical fruit | \n",
" 228 | \n",
" 0.643836 | \n",
" 1.826724 | \n",
"
\n",
" \n",
" 6 | \n",
" shopping bags | \n",
" sausage, soda | \n",
" 50 | \n",
" 0.304878 | \n",
" 1.782992 | \n",
"
\n",
" \n",
" 0 | \n",
" bottled water | \n",
" yogurt, soda | \n",
" 59 | \n",
" 0.333333 | \n",
" 1.707635 | \n",
"
\n",
" \n",
" 9 | \n",
" whole milk | \n",
" yogurt, tropical fruit | \n",
" 228 | \n",
" 0.754098 | \n",
" 1.703222 | \n",
"
\n",
" \n",
" 3 | \n",
" rolls/buns | \n",
" yogurt, whole milk, tropical fruit | \n",
" 97 | \n",
" 0.522222 | \n",
" 1.679095 | \n",
"
\n",
" \n",
" 7 | \n",
" soda | \n",
" yogurt, sausage | \n",
" 95 | \n",
" 0.390625 | \n",
" 1.398139 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" consequent antecedent support \\\n",
"4 root vegetables yogurt, whole milk, tropical fruit 228 \n",
"5 sausage shopping bags, rolls/buns 59 \n",
"8 tropical fruit yogurt, root vegetables, whole milk 92 \n",
"1 citrus fruit whole milk, other vegetables, tropical fruit 66 \n",
"10 yogurt whole milk, tropical fruit 199 \n",
"2 other vegetables yogurt, whole milk, tropical fruit 228 \n",
"6 shopping bags sausage, soda 50 \n",
"0 bottled water yogurt, soda 59 \n",
"9 whole milk yogurt, tropical fruit 228 \n",
"3 rolls/buns yogurt, whole milk, tropical fruit 97 \n",
"7 soda yogurt, sausage 95 \n",
"\n",
" confidence lift \n",
"4 0.463636 2.230611 \n",
"5 0.393162 2.201037 \n",
"8 0.429907 2.156588 \n",
"1 0.333333 2.125637 \n",
"10 0.484211 1.891061 \n",
"2 0.643836 1.826724 \n",
"6 0.304878 1.782992 \n",
"0 0.333333 1.707635 \n",
"9 0.754098 1.703222 \n",
"3 0.522222 1.679095 \n",
"7 0.390625 1.398139 "
]
},
"execution_count": 60,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"(pruned_rules_df[['antecedent','consequent',\n",
" 'support','confidence','lift']].groupby('consequent')\n",
" .max()\n",
" .reset_index()\n",
" .sort_values(['lift', 'support','confidence'],\n",
" ascending=False))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Association rule mining on our Online Retail dataset"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Load and Filter Dataset"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"cs_mba = pd.read_excel(io=r'Online Retail.xlsx')\n",
"cs_mba_uk = cs_mba[cs_mba.Country == 'United Kingdom']"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" InvoiceNo | \n",
" StockCode | \n",
" Description | \n",
" Quantity | \n",
" InvoiceDate | \n",
" UnitPrice | \n",
" CustomerID | \n",
" Country | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 536365 | \n",
" 85123A | \n",
" WHITE HANGING HEART T-LIGHT HOLDER | \n",
" 6 | \n",
" 2010-12-01 08:26:00 | \n",
" 2.55 | \n",
" 17850.0 | \n",
" United Kingdom | \n",
"
\n",
" \n",
" 1 | \n",
" 536365 | \n",
" 71053 | \n",
" WHITE METAL LANTERN | \n",
" 6 | \n",
" 2010-12-01 08:26:00 | \n",
" 3.39 | \n",
" 17850.0 | \n",
" United Kingdom | \n",
"
\n",
" \n",
" 2 | \n",
" 536365 | \n",
" 84406B | \n",
" CREAM CUPID HEARTS COAT HANGER | \n",
" 8 | \n",
" 2010-12-01 08:26:00 | \n",
" 2.75 | \n",
" 17850.0 | \n",
" United Kingdom | \n",
"
\n",
" \n",
" 3 | \n",
" 536365 | \n",
" 84029G | \n",
" KNITTED UNION FLAG HOT WATER BOTTLE | \n",
" 6 | \n",
" 2010-12-01 08:26:00 | \n",
" 3.39 | \n",
" 17850.0 | \n",
" United Kingdom | \n",
"
\n",
" \n",
" 4 | \n",
" 536365 | \n",
" 84029E | \n",
" RED WOOLLY HOTTIE WHITE HEART. | \n",
" 6 | \n",
" 2010-12-01 08:26:00 | \n",
" 3.39 | \n",
" 17850.0 | \n",
" United Kingdom | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" InvoiceNo StockCode Description Quantity \\\n",
"0 536365 85123A WHITE HANGING HEART T-LIGHT HOLDER 6 \n",
"1 536365 71053 WHITE METAL LANTERN 6 \n",
"2 536365 84406B CREAM CUPID HEARTS COAT HANGER 8 \n",
"3 536365 84029G KNITTED UNION FLAG HOT WATER BOTTLE 6 \n",
"4 536365 84029E RED WOOLLY HOTTIE WHITE HEART. 6 \n",
"\n",
" InvoiceDate UnitPrice CustomerID Country \n",
"0 2010-12-01 08:26:00 2.55 17850.0 United Kingdom \n",
"1 2010-12-01 08:26:00 3.39 17850.0 United Kingdom \n",
"2 2010-12-01 08:26:00 2.75 17850.0 United Kingdom \n",
"3 2010-12-01 08:26:00 3.39 17850.0 United Kingdom \n",
"4 2010-12-01 08:26:00 3.39 17850.0 United Kingdom "
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cs_mba_uk.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Remove returned item as we are only interested in the buying patterns"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"cs_mba_uk = cs_mba_uk[~(cs_mba_uk.InvoiceNo.str.contains(\"C\") == True)]\n",
"cs_mba_uk = cs_mba_uk[~cs_mba_uk.Quantity<0]"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(486286, 8)"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cs_mba_uk.shape"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(18786,)"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cs_mba_uk.InvoiceNo.value_counts().shape"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Build Transaction Dataset"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"items = list(cs_mba_uk.Description.unique())\n",
"grouped = cs_mba_uk.groupby('InvoiceNo')\n",
"transaction_level_df_uk = grouped.aggregate(lambda x: tuple(x)).reset_index()[['InvoiceNo','Description']]"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"transaction_dict = {item:0 for item in items}\n",
"output_dict = dict()\n",
"temp = dict()\n",
"for rec in transaction_level_df_uk.to_dict('records'):\n",
" invoice_num = rec['InvoiceNo']\n",
" items_list = rec['Description']\n",
" transaction_dict = {item:0 for item in items}\n",
" transaction_dict.update({item:1 for item in items if item in items_list})\n",
" temp.update({invoice_num:transaction_dict})\n",
"\n",
"new = [v for k,v in temp.items()]\n",
"tranasction_df = pd.DataFrame(new)\n",
"del(tranasction_df[tranasction_df.columns[0]])"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(18786, 4058)"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"tranasction_df.shape"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" METAL SIGN HIS DINNER IS SERVED | \n",
" JAM JAR WITH GREEN LID | \n",
" SANDWICH BATH SPONGE | \n",
" KIDS RAIN MAC BLUE | \n",
" EMPIRE TISSUE BOX | \n",
" S/2 BEACH HUT TREASURE CHESTS | \n",
" EMBROIDERED RIBBON REEL DAISY | \n",
" GOLD/AMBER DROP EARRINGS W LEAF | \n",
" ANTIQUE GLASS PEDESTAL BOWL | \n",
" CUPCAKE LACE PAPER SET 6 | \n",
" ... | \n",
" RETROSPOT CANDLE MEDIUM | \n",
" LARGE WHITE/PINK ROSE ART FLOWER | \n",
" DOORMAT UNION JACK GUNS AND ROSES | \n",
" GLASS BEAD HOOP NECKLACE BLACK | \n",
" MEDIUM PARLOUR FRAME | \n",
" RED 3 PIECE RETROSPOT CUTLERY SET | \n",
" FRENCH WC SIGN BLUE METAL | \n",
" DOORMAT MERRY CHRISTMAS RED | \n",
" SMALL POPCORN HOLDER | \n",
" PINK MURANO TWIST NECKLACE | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" ... | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" ... | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" 2 | \n",
" 0 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" ... | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" 3 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" ... | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" 4 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" ... | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
"
\n",
"
5 rows × 4058 columns
\n",
"
"
],
"text/plain": [
" METAL SIGN HIS DINNER IS SERVED JAM JAR WITH GREEN LID \\\n",
"0 0 0 \n",
"1 0 0 \n",
"2 0 1 \n",
"3 0 0 \n",
"4 0 0 \n",
"\n",
" SANDWICH BATH SPONGE KIDS RAIN MAC BLUE EMPIRE TISSUE BOX \\\n",
"0 0 0 0 \n",
"1 0 0 0 \n",
"2 0 0 0 \n",
"3 0 0 0 \n",
"4 0 0 0 \n",
"\n",
" S/2 BEACH HUT TREASURE CHESTS EMBROIDERED RIBBON REEL DAISY \\\n",
"0 0 0 \n",
"1 0 0 \n",
"2 0 0 \n",
"3 0 0 \n",
"4 0 0 \n",
"\n",
" GOLD/AMBER DROP EARRINGS W LEAF ANTIQUE GLASS PEDESTAL BOWL \\\n",
"0 0 0 \n",
"1 0 0 \n",
"2 0 0 \n",
"3 0 0 \n",
"4 0 0 \n",
"\n",
" CUPCAKE LACE PAPER SET 6 ... \\\n",
"0 0 ... \n",
"1 0 ... \n",
"2 0 ... \n",
"3 0 ... \n",
"4 0 ... \n",
"\n",
" RETROSPOT CANDLE MEDIUM LARGE WHITE/PINK ROSE ART FLOWER \\\n",
"0 0 0 \n",
"1 0 0 \n",
"2 0 0 \n",
"3 0 0 \n",
"4 0 0 \n",
"\n",
" DOORMAT UNION JACK GUNS AND ROSES GLASS BEAD HOOP NECKLACE BLACK \\\n",
"0 0 0 \n",
"1 0 0 \n",
"2 0 0 \n",
"3 0 0 \n",
"4 0 0 \n",
"\n",
" MEDIUM PARLOUR FRAME RED 3 PIECE RETROSPOT CUTLERY SET \\\n",
"0 0 0 \n",
"1 0 0 \n",
"2 0 0 \n",
"3 0 0 \n",
"4 0 0 \n",
"\n",
" FRENCH WC SIGN BLUE METAL DOORMAT MERRY CHRISTMAS RED \\\n",
"0 0 0 \n",
"1 0 0 \n",
"2 0 0 \n",
"3 0 0 \n",
"4 0 0 \n",
"\n",
" SMALL POPCORN HOLDER PINK MURANO TWIST NECKLACE \n",
"0 0 0 \n",
"1 0 0 \n",
"2 0 0 \n",
"3 0 0 \n",
"4 0 0 \n",
"\n",
"[5 rows x 4058 columns]"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"tranasction_df.head()"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(4961, 15)\n"
]
}
],
"source": [
"output_df_uk_n, item_counts_n = prune_dataset(input_df=tranasction_df, length_trans=2, start_item=0, end_item=15)\n",
"print(output_df_uk_n.shape)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" WHITE HANGING HEART T-LIGHT HOLDER | \n",
" JUMBO BAG RED RETROSPOT | \n",
" REGENCY CAKESTAND 3 TIER | \n",
" PARTY BUNTING | \n",
" LUNCH BAG RED RETROSPOT | \n",
" ASSORTED COLOUR BIRD ORNAMENT | \n",
" SET OF 3 CAKE TINS PANTRY DESIGN | \n",
" NATURAL SLATE HEART CHALKBOARD | \n",
" LUNCH BAG BLACK SKULL. | \n",
" HEART OF WICKER SMALL | \n",
" JUMBO BAG PINK POLKADOT | \n",
" JUMBO SHOPPER VINTAGE RED PAISLEY | \n",
" JUMBO STORAGE BAG SUKI | \n",
" PACK OF 72 RETROSPOT CAKE CASES | \n",
" PAPER CHAIN KIT 50'S CHRISTMAS | \n",
"
\n",
" \n",
" \n",
" \n",
" 3 | \n",
" 1 | \n",
" 0 | \n",
" 1 | \n",
" 0 | \n",
" 1 | \n",
" 0 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" 5 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" 8 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
" 0 | \n",
" 1 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" 16 | \n",
" 0 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 0 | \n",
"
\n",
" \n",
" 18 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" WHITE HANGING HEART T-LIGHT HOLDER JUMBO BAG RED RETROSPOT \\\n",
"3 1 0 \n",
"5 0 0 \n",
"8 0 0 \n",
"16 0 1 \n",
"18 0 0 \n",
"\n",
" REGENCY CAKESTAND 3 TIER PARTY BUNTING LUNCH BAG RED RETROSPOT \\\n",
"3 1 0 1 \n",
"5 0 0 0 \n",
"8 0 1 0 \n",
"16 1 1 0 \n",
"18 1 1 0 \n",
"\n",
" ASSORTED COLOUR BIRD ORNAMENT SET OF 3 CAKE TINS PANTRY DESIGN \\\n",
"3 0 1 \n",
"5 1 0 \n",
"8 1 1 \n",
"16 0 0 \n",
"18 0 0 \n",
"\n",
" NATURAL SLATE HEART CHALKBOARD LUNCH BAG BLACK SKULL. \\\n",
"3 0 0 \n",
"5 0 0 \n",
"8 0 0 \n",
"16 0 0 \n",
"18 0 0 \n",
"\n",
" HEART OF WICKER SMALL JUMBO BAG PINK POLKADOT \\\n",
"3 0 0 \n",
"5 1 1 \n",
"8 0 0 \n",
"16 1 1 \n",
"18 0 0 \n",
"\n",
" JUMBO SHOPPER VINTAGE RED PAISLEY JUMBO STORAGE BAG SUKI \\\n",
"3 0 0 \n",
"5 0 0 \n",
"8 0 0 \n",
"16 1 1 \n",
"18 0 0 \n",
"\n",
" PACK OF 72 RETROSPOT CAKE CASES PAPER CHAIN KIT 50'S CHRISTMAS \n",
"3 0 0 \n",
"5 0 0 \n",
"8 0 0 \n",
"16 1 0 \n",
"18 0 0 "
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"output_df_uk_n.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Association Rule Mining with FP Growth"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"input_assoc_rules = output_df_uk_n\n",
"domain_transac = Domain([DiscreteVariable.make(name=item,values=['0', '1']) for item in input_assoc_rules.columns])\n",
"data_tran_uk = Orange.data.Table.from_numpy(domain=domain_transac, X=input_assoc_rules.as_matrix(),Y= None)\n",
"data_tran_uk_en, mapping = OneHot.encode(data_tran_uk, include_class=True)"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"num of required transactions = 49\n"
]
}
],
"source": [
"support = 0.01\n",
"print(\"num of required transactions = \", int(input_assoc_rules.shape[0]*support))\n",
"num_trans = input_assoc_rules.shape[0]*support\n",
"itemsets = dict(frequent_itemsets(data_tran_uk_en, support))"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"645632"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(itemsets)"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Raw rules data frame of 117464 rules generated\n"
]
}
],
"source": [
"confidence = 0.3\n",
"rules_df = pd.DataFrame()\n",
"if len(itemsets) < 1000000: \n",
" rules = [(P, Q, supp, conf)\n",
" for P, Q, supp, conf in association_rules(itemsets, confidence)\n",
" if len(Q) == 1 ]\n",
"\n",
" names = {item: '{}={}'.format(var.name, val)\n",
" for item, var, val in OneHot.decode(mapping, data_tran_uk, mapping)}\n",
" \n",
" eligible_ante = [v for k,v in names.items() if v.endswith(\"1\")]\n",
" \n",
" N = input_assoc_rules.shape[0]\n",
" \n",
" rule_stats = list(rules_stats(rules, itemsets, N))\n",
" \n",
" rule_list_df = []\n",
" for ex_rule_frm_rule_stat in rule_stats:\n",
" ante = ex_rule_frm_rule_stat[0] \n",
" cons = ex_rule_frm_rule_stat[1]\n",
" named_cons = names[next(iter(cons))]\n",
" if named_cons in eligible_ante:\n",
" rule_lhs = [names[i][:-2] for i in ante if names[i] in eligible_ante]\n",
" ante_rule = ', '.join(rule_lhs)\n",
" if ante_rule and len(rule_lhs)>1 :\n",
" rule_dict = {'support' : ex_rule_frm_rule_stat[2],\n",
" 'confidence' : ex_rule_frm_rule_stat[3],\n",
" 'coverage' : ex_rule_frm_rule_stat[4],\n",
" 'strength' : ex_rule_frm_rule_stat[5],\n",
" 'lift' : ex_rule_frm_rule_stat[6],\n",
" 'leverage' : ex_rule_frm_rule_stat[7],\n",
" 'antecedent': ante_rule,\n",
" 'consequent':named_cons[:-2] }\n",
" rule_list_df.append(rule_dict)\n",
" rules_df = pd.DataFrame(rule_list_df)\n",
" print(\"Raw rules data frame of {} rules generated\".format(rules_df.shape[0]))\n",
" if not rules_df.empty:\n",
" pruned_rules_df = rules_df.groupby(['antecedent','consequent']).max().reset_index()\n",
" else:\n",
" print(\"Unable to generate any rule\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Sort and display rules"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" consequent | \n",
" antecedent | \n",
" support | \n",
" confidence | \n",
" lift | \n",
"
\n",
" \n",
" \n",
" \n",
" 8 | \n",
" PACK OF 72 RETROSPOT CAKE CASES | \n",
" WHITE HANGING HEART T-LIGHT HOLDER, REGENCY CAKESTAND 3 TIER, NATURAL SLATE HEART CHALKBOARD | \n",
" 145 | \n",
" 0.971014 | \n",
" 5.394404 | \n",
"
\n",
" \n",
" 9 | \n",
" PAPER CHAIN KIT 50'S CHRISTMAS | \n",
" WHITE HANGING HEART T-LIGHT HOLDER, REGENCY CAKESTAND 3 TIER, NATURAL SLATE HEART CHALKBOARD | \n",
" 94 | \n",
" 0.597701 | \n",
" 4.341428 | \n",
"
\n",
" \n",
" 3 | \n",
" JUMBO SHOPPER VINTAGE RED PAISLEY | \n",
" WHITE HANGING HEART T-LIGHT HOLDER, PAPER CHAIN KIT 50'S CHRISTMAS | \n",
" 384 | \n",
" 0.879310 | \n",
" 4.218819 | \n",
"
\n",
" \n",
" 5 | \n",
" LUNCH BAG BLACK SKULL. | \n",
" WHITE HANGING HEART T-LIGHT HOLDER, PACK OF 72 RETROSPOT CAKE CASES, LUNCH BAG RED RETROSPOT | \n",
" 227 | \n",
" 0.852459 | \n",
" 4.078157 | \n",
"
\n",
" \n",
" 4 | \n",
" JUMBO STORAGE BAG SUKI | \n",
" WHITE HANGING HEART T-LIGHT HOLDER, SET OF 3 CAKE TINS PANTRY DESIGN , JUMBO BAG PINK POLKADOT | \n",
" 405 | \n",
" 0.852459 | \n",
" 4.016191 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" consequent \\\n",
"8 PACK OF 72 RETROSPOT CAKE CASES \n",
"9 PAPER CHAIN KIT 50'S CHRISTMAS \n",
"3 JUMBO SHOPPER VINTAGE RED PAISLEY \n",
"5 LUNCH BAG BLACK SKULL. \n",
"4 JUMBO STORAGE BAG SUKI \n",
"\n",
" antecedent \\\n",
"8 WHITE HANGING HEART T-LIGHT HOLDER, REGENCY CAKESTAND 3 TIER, NATURAL SLATE HEART CHALKBOARD \n",
"9 WHITE HANGING HEART T-LIGHT HOLDER, REGENCY CAKESTAND 3 TIER, NATURAL SLATE HEART CHALKBOARD \n",
"3 WHITE HANGING HEART T-LIGHT HOLDER, PAPER CHAIN KIT 50'S CHRISTMAS \n",
"5 WHITE HANGING HEART T-LIGHT HOLDER, PACK OF 72 RETROSPOT CAKE CASES, LUNCH BAG RED RETROSPOT \n",
"4 WHITE HANGING HEART T-LIGHT HOLDER, SET OF 3 CAKE TINS PANTRY DESIGN , JUMBO BAG PINK POLKADOT \n",
"\n",
" support confidence lift \n",
"8 145 0.971014 5.394404 \n",
"9 94 0.597701 4.341428 \n",
"3 384 0.879310 4.218819 \n",
"5 227 0.852459 4.078157 \n",
"4 405 0.852459 4.016191 "
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dw = pd.options.display.max_colwidth\n",
"pd.options.display.max_colwidth = 100\n",
"(pruned_rules_df[['antecedent','consequent',\n",
" 'support','confidence','lift']].groupby('consequent')\n",
" .max()\n",
" .reset_index()\n",
" .sort_values(['lift', 'support','confidence'],\n",
" ascending=False)).head(5)"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"pd.options.display.max_colwidth = dw"
]
}
],
"metadata": {
"anaconda-cloud": {},
"kernelspec": {
"display_name": "Python [default]",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.2"
}
},
"nbformat": 4,
"nbformat_minor": 1
}