{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "name": "pyfim_explore.ipynb", "provenance": [], "collapsed_sections": [], "authorship_tag": "ABX9TyPjTZcx9ih2K4LRs8XDTWCp", "include_colab_link": true }, "kernelspec": { "name": "python3", "display_name": "Python 3" } }, "cells": [ { "cell_type": "markdown", "metadata": { "id": "view-in-github", "colab_type": "text" }, "source": [ "\"Open" ] }, { "cell_type": "code", "metadata": { "id": "LbMrmbQestAa" }, "source": [ "#!pip install pandas pyfim==6.28" ], "execution_count": 35, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "20GxxD5bs5n8" }, "source": [ "import pandas as pd\n", "from fim import arules" ], "execution_count": 36, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "SyWBp3jNQncL" }, "source": [ "# display docs\n", "??arules" ], "execution_count": 37, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "V-47_PEsx3fh" }, "source": [ "# for more details visit here: https://borgelt.net/pyfim.html\n", "\n", "# inputs\n", "supp = 2 # minimum support of an assoc. rule (default: 10)\n", "conf = 50 # minimum confidence of an assoc. rule (default: 80%)\n", "report = 'asC'" ], "execution_count": 38, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "bzDDyFEmxG6X" }, "source": [ "dataset = [['Milk', 'Onion', 'Nutmeg', 'Kidney Beans', 'Eggs', 'Yogurt'],\n", " ['Dill', 'Onion', 'Nutmeg', 'Kidney Beans', 'Eggs', 'Yogurt'],\n", " ['Milk', 'Apple', 'Kidney Beans', 'Eggs'],\n", " ['Milk', 'Unicorn', 'Corn', 'Kidney Beans', 'Yogurt'],\n", " ['Corn', 'Onion', 'Onion', 'Kidney Beans', 'Ice cream', 'Eggs'],\n", " ['Milk', 'Unicorn', 'Corn', 'Yogurt', 'Eggs'],\n", " ['Milk', 'Unicorn', 'Eggs', 'Kidney Beans', 'Yogurt'],\n", " ['Corn', 'Onion', 'Onion', 'Kidney Beans', 'Yogurt', 'Eggs'],\n", " ['Corn', 'Yogurt', 'Onion', 'Kidney Beans', 'Ice cream', 'Eggs'],\n", " ['Milk', 'Unicorn', 'Corn', 'Yogurt', 'Eggs'],\n", " ]" ], "execution_count": 39, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "dUju9t9fs5ir", "outputId": "34e7570a-fdf0-45d4-e584-644049dd4c85", "colab": { "base_uri": "https://localhost:8080/", "height": 34 } }, "source": [ "# make dict for nicer looking column names\n", "report_colnames = {\n", " 'a': 'support_itemset_absolute',\n", " 's': 'support_itemset_relative',\n", " 'S': 'support_itemset_relative_pct',\n", " 'b': 'support_bodyset_absolute',\n", " 'x': 'support_bodyset_relative',\n", " 'X': 'support_bodyset_relative_pct',\n", " 'h': 'support_headitem_absolute',\n", " 'y': 'support_headitem_relative',\n", " 'Y': 'support_headitem_relative_pct',\n", " 'c': 'confidence',\n", " 'C': 'confidence_pct',\n", " 'l': 'lift',\n", " 'L': 'lift_pct',\n", " 'e': 'evaluation',\n", " 'E': 'evaluation_pct',\n", " 'Q': 'xx',\n", " 'S': 'support_emptyset',\n", " }\n", "\n", "# run apriori\n", "result = arules(dataset, supp=supp, conf=conf, report=report)\n", "\n", "# make df of results\n", "colnames = ['consequent', 'antecedent'] + [report_colnames.get(k, k) for k in list(report)]\n", "df_rules = pd.DataFrame(result, columns=colnames)\n", "df_rules = df_rules.sort_values('support_itemset_absolute', ascending=False)\n", "print(df_rules.shape)" ], "execution_count": 40, "outputs": [ { "output_type": "stream", "text": [ "(484, 5)\n" ], "name": "stdout" } ] }, { "cell_type": "code", "metadata": { "id": "I7B2HifTy80m", "outputId": "c1bbdc1d-d897-4870-c0bd-abb94b216a62", "colab": { "base_uri": "https://localhost:8080/", "height": 359 } }, "source": [ "# look at some higher support rules\n", "df_rules.head(10)" ], "execution_count": 41, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
consequentantecedentsupport_itemset_absolutesupport_itemset_relativeconfidence_pct
0Eggs()90.990.000000
3Yogurt()80.880.000000
11Kidney Beans()80.880.000000
2Yogurt(Eggs,)70.777.777778
4Eggs(Kidney Beans,)70.787.500000
5Kidney Beans(Eggs,)70.777.777778
1Eggs(Yogurt,)70.787.500000
10Kidney Beans(Yogurt,)60.675.000000
29Corn()60.660.000000
58Milk()60.660.000000
\n", "
" ], "text/plain": [ " consequent antecedent ... support_itemset_relative confidence_pct\n", "0 Eggs () ... 0.9 90.000000\n", "3 Yogurt () ... 0.8 80.000000\n", "11 Kidney Beans () ... 0.8 80.000000\n", "2 Yogurt (Eggs,) ... 0.7 77.777778\n", "4 Eggs (Kidney Beans,) ... 0.7 87.500000\n", "5 Kidney Beans (Eggs,) ... 0.7 77.777778\n", "1 Eggs (Yogurt,) ... 0.7 87.500000\n", "10 Kidney Beans (Yogurt,) ... 0.6 75.000000\n", "29 Corn () ... 0.6 60.000000\n", "58 Milk () ... 0.6 60.000000\n", "\n", "[10 rows x 5 columns]" ] }, "metadata": { "tags": [] }, "execution_count": 41 } ] }, { "cell_type": "code", "metadata": { "id": "CJY1h5f8y8jm", "outputId": "22df6fe0-1772-4a1d-f8f4-458252ab1494", "colab": { "base_uri": "https://localhost:8080/", "height": 359 } }, "source": [ "# look at some lower support rules\n", "df_rules.tail(10)" ], "execution_count": 42, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
consequentantecedentsupport_itemset_absolutesupport_itemset_relativeconfidence_pct
294Yogurt(Ice cream, Kidney Beans)10.150.0
295Kidney Beans(Ice cream, Yogurt)10.1100.0
112Yogurt(Onion, Milk, Kidney Beans, Eggs)10.1100.0
111Eggs(Onion, Milk, Kidney Beans, Yogurt)10.1100.0
110Kidney Beans(Onion, Milk, Eggs)10.1100.0
299Eggs(Ice cream, Corn, Yogurt)10.1100.0
300Yogurt(Ice cream, Corn, Eggs)10.150.0
301Corn(Ice cream, Yogurt, Eggs)10.1100.0
302Yogurt(Ice cream, Corn)10.150.0
483Milk(Apple,)10.1100.0
\n", "
" ], "text/plain": [ " consequent ... confidence_pct\n", "294 Yogurt ... 50.0\n", "295 Kidney Beans ... 100.0\n", "112 Yogurt ... 100.0\n", "111 Eggs ... 100.0\n", "110 Kidney Beans ... 100.0\n", "299 Eggs ... 100.0\n", "300 Yogurt ... 50.0\n", "301 Corn ... 100.0\n", "302 Yogurt ... 50.0\n", "483 Milk ... 100.0\n", "\n", "[10 rows x 5 columns]" ] }, "metadata": { "tags": [] }, "execution_count": 42 } ] } ] }