{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "pyfim_explore.ipynb",
"provenance": [],
"collapsed_sections": [],
"authorship_tag": "ABX9TyPjTZcx9ih2K4LRs8XDTWCp",
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"
"
]
},
{
"cell_type": "code",
"metadata": {
"id": "LbMrmbQestAa"
},
"source": [
"#!pip install pandas pyfim==6.28"
],
"execution_count": 35,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "20GxxD5bs5n8"
},
"source": [
"import pandas as pd\n",
"from fim import arules"
],
"execution_count": 36,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "SyWBp3jNQncL"
},
"source": [
"# display docs\n",
"??arules"
],
"execution_count": 37,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "V-47_PEsx3fh"
},
"source": [
"# for more details visit here: https://borgelt.net/pyfim.html\n",
"\n",
"# inputs\n",
"supp = 2 # minimum support of an assoc. rule (default: 10)\n",
"conf = 50 # minimum confidence of an assoc. rule (default: 80%)\n",
"report = 'asC'"
],
"execution_count": 38,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "bzDDyFEmxG6X"
},
"source": [
"dataset = [['Milk', 'Onion', 'Nutmeg', 'Kidney Beans', 'Eggs', 'Yogurt'],\n",
" ['Dill', 'Onion', 'Nutmeg', 'Kidney Beans', 'Eggs', 'Yogurt'],\n",
" ['Milk', 'Apple', 'Kidney Beans', 'Eggs'],\n",
" ['Milk', 'Unicorn', 'Corn', 'Kidney Beans', 'Yogurt'],\n",
" ['Corn', 'Onion', 'Onion', 'Kidney Beans', 'Ice cream', 'Eggs'],\n",
" ['Milk', 'Unicorn', 'Corn', 'Yogurt', 'Eggs'],\n",
" ['Milk', 'Unicorn', 'Eggs', 'Kidney Beans', 'Yogurt'],\n",
" ['Corn', 'Onion', 'Onion', 'Kidney Beans', 'Yogurt', 'Eggs'],\n",
" ['Corn', 'Yogurt', 'Onion', 'Kidney Beans', 'Ice cream', 'Eggs'],\n",
" ['Milk', 'Unicorn', 'Corn', 'Yogurt', 'Eggs'],\n",
" ]"
],
"execution_count": 39,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "dUju9t9fs5ir",
"outputId": "34e7570a-fdf0-45d4-e584-644049dd4c85",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
}
},
"source": [
"# make dict for nicer looking column names\n",
"report_colnames = {\n",
" 'a': 'support_itemset_absolute',\n",
" 's': 'support_itemset_relative',\n",
" 'S': 'support_itemset_relative_pct',\n",
" 'b': 'support_bodyset_absolute',\n",
" 'x': 'support_bodyset_relative',\n",
" 'X': 'support_bodyset_relative_pct',\n",
" 'h': 'support_headitem_absolute',\n",
" 'y': 'support_headitem_relative',\n",
" 'Y': 'support_headitem_relative_pct',\n",
" 'c': 'confidence',\n",
" 'C': 'confidence_pct',\n",
" 'l': 'lift',\n",
" 'L': 'lift_pct',\n",
" 'e': 'evaluation',\n",
" 'E': 'evaluation_pct',\n",
" 'Q': 'xx',\n",
" 'S': 'support_emptyset',\n",
" }\n",
"\n",
"# run apriori\n",
"result = arules(dataset, supp=supp, conf=conf, report=report)\n",
"\n",
"# make df of results\n",
"colnames = ['consequent', 'antecedent'] + [report_colnames.get(k, k) for k in list(report)]\n",
"df_rules = pd.DataFrame(result, columns=colnames)\n",
"df_rules = df_rules.sort_values('support_itemset_absolute', ascending=False)\n",
"print(df_rules.shape)"
],
"execution_count": 40,
"outputs": [
{
"output_type": "stream",
"text": [
"(484, 5)\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "I7B2HifTy80m",
"outputId": "c1bbdc1d-d897-4870-c0bd-abb94b216a62",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 359
}
},
"source": [
"# look at some higher support rules\n",
"df_rules.head(10)"
],
"execution_count": 41,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" consequent | \n",
" antecedent | \n",
" support_itemset_absolute | \n",
" support_itemset_relative | \n",
" confidence_pct | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" Eggs | \n",
" () | \n",
" 9 | \n",
" 0.9 | \n",
" 90.000000 | \n",
"
\n",
" \n",
" | 3 | \n",
" Yogurt | \n",
" () | \n",
" 8 | \n",
" 0.8 | \n",
" 80.000000 | \n",
"
\n",
" \n",
" | 11 | \n",
" Kidney Beans | \n",
" () | \n",
" 8 | \n",
" 0.8 | \n",
" 80.000000 | \n",
"
\n",
" \n",
" | 2 | \n",
" Yogurt | \n",
" (Eggs,) | \n",
" 7 | \n",
" 0.7 | \n",
" 77.777778 | \n",
"
\n",
" \n",
" | 4 | \n",
" Eggs | \n",
" (Kidney Beans,) | \n",
" 7 | \n",
" 0.7 | \n",
" 87.500000 | \n",
"
\n",
" \n",
" | 5 | \n",
" Kidney Beans | \n",
" (Eggs,) | \n",
" 7 | \n",
" 0.7 | \n",
" 77.777778 | \n",
"
\n",
" \n",
" | 1 | \n",
" Eggs | \n",
" (Yogurt,) | \n",
" 7 | \n",
" 0.7 | \n",
" 87.500000 | \n",
"
\n",
" \n",
" | 10 | \n",
" Kidney Beans | \n",
" (Yogurt,) | \n",
" 6 | \n",
" 0.6 | \n",
" 75.000000 | \n",
"
\n",
" \n",
" | 29 | \n",
" Corn | \n",
" () | \n",
" 6 | \n",
" 0.6 | \n",
" 60.000000 | \n",
"
\n",
" \n",
" | 58 | \n",
" Milk | \n",
" () | \n",
" 6 | \n",
" 0.6 | \n",
" 60.000000 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" consequent antecedent ... support_itemset_relative confidence_pct\n",
"0 Eggs () ... 0.9 90.000000\n",
"3 Yogurt () ... 0.8 80.000000\n",
"11 Kidney Beans () ... 0.8 80.000000\n",
"2 Yogurt (Eggs,) ... 0.7 77.777778\n",
"4 Eggs (Kidney Beans,) ... 0.7 87.500000\n",
"5 Kidney Beans (Eggs,) ... 0.7 77.777778\n",
"1 Eggs (Yogurt,) ... 0.7 87.500000\n",
"10 Kidney Beans (Yogurt,) ... 0.6 75.000000\n",
"29 Corn () ... 0.6 60.000000\n",
"58 Milk () ... 0.6 60.000000\n",
"\n",
"[10 rows x 5 columns]"
]
},
"metadata": {
"tags": []
},
"execution_count": 41
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "CJY1h5f8y8jm",
"outputId": "22df6fe0-1772-4a1d-f8f4-458252ab1494",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 359
}
},
"source": [
"# look at some lower support rules\n",
"df_rules.tail(10)"
],
"execution_count": 42,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" consequent | \n",
" antecedent | \n",
" support_itemset_absolute | \n",
" support_itemset_relative | \n",
" confidence_pct | \n",
"
\n",
" \n",
" \n",
" \n",
" | 294 | \n",
" Yogurt | \n",
" (Ice cream, Kidney Beans) | \n",
" 1 | \n",
" 0.1 | \n",
" 50.0 | \n",
"
\n",
" \n",
" | 295 | \n",
" Kidney Beans | \n",
" (Ice cream, Yogurt) | \n",
" 1 | \n",
" 0.1 | \n",
" 100.0 | \n",
"
\n",
" \n",
" | 112 | \n",
" Yogurt | \n",
" (Onion, Milk, Kidney Beans, Eggs) | \n",
" 1 | \n",
" 0.1 | \n",
" 100.0 | \n",
"
\n",
" \n",
" | 111 | \n",
" Eggs | \n",
" (Onion, Milk, Kidney Beans, Yogurt) | \n",
" 1 | \n",
" 0.1 | \n",
" 100.0 | \n",
"
\n",
" \n",
" | 110 | \n",
" Kidney Beans | \n",
" (Onion, Milk, Eggs) | \n",
" 1 | \n",
" 0.1 | \n",
" 100.0 | \n",
"
\n",
" \n",
" | 299 | \n",
" Eggs | \n",
" (Ice cream, Corn, Yogurt) | \n",
" 1 | \n",
" 0.1 | \n",
" 100.0 | \n",
"
\n",
" \n",
" | 300 | \n",
" Yogurt | \n",
" (Ice cream, Corn, Eggs) | \n",
" 1 | \n",
" 0.1 | \n",
" 50.0 | \n",
"
\n",
" \n",
" | 301 | \n",
" Corn | \n",
" (Ice cream, Yogurt, Eggs) | \n",
" 1 | \n",
" 0.1 | \n",
" 100.0 | \n",
"
\n",
" \n",
" | 302 | \n",
" Yogurt | \n",
" (Ice cream, Corn) | \n",
" 1 | \n",
" 0.1 | \n",
" 50.0 | \n",
"
\n",
" \n",
" | 483 | \n",
" Milk | \n",
" (Apple,) | \n",
" 1 | \n",
" 0.1 | \n",
" 100.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" consequent ... confidence_pct\n",
"294 Yogurt ... 50.0\n",
"295 Kidney Beans ... 100.0\n",
"112 Yogurt ... 100.0\n",
"111 Eggs ... 100.0\n",
"110 Kidney Beans ... 100.0\n",
"299 Eggs ... 100.0\n",
"300 Yogurt ... 50.0\n",
"301 Corn ... 100.0\n",
"302 Yogurt ... 50.0\n",
"483 Milk ... 100.0\n",
"\n",
"[10 rows x 5 columns]"
]
},
"metadata": {
"tags": []
},
"execution_count": 42
}
]
}
]
}