{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "from sklearn.datasets import load_breast_cancer\n", "bc = load_breast_cancer()" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
mean radiusmean texturemean perimetermean areamean smoothnessmean compactnessmean concavitymean concave pointsmean symmetrymean fractal dimension...worst radiusworst textureworst perimeterworst areaworst smoothnessworst compactnessworst concavityworst concave pointsworst symmetryworst fractal dimension
017.9910.38122.801001.00.118400.277600.30010.147100.24190.07871...25.3817.33184.602019.00.16220.66560.71190.26540.46010.11890
120.5717.77132.901326.00.084740.078640.08690.070170.18120.05667...24.9923.41158.801956.00.12380.18660.24160.18600.27500.08902
219.6921.25130.001203.00.109600.159900.19740.127900.20690.05999...23.5725.53152.501709.00.14440.42450.45040.24300.36130.08758
311.4220.3877.58386.10.142500.283900.24140.105200.25970.09744...14.9126.5098.87567.70.20980.86630.68690.25750.66380.17300
420.2914.34135.101297.00.100300.132800.19800.104300.18090.05883...22.5416.67152.201575.00.13740.20500.40000.16250.23640.07678
\n", "

5 rows × 30 columns

\n", "
" ], "text/plain": [ " mean radius mean texture mean perimeter mean area mean smoothness \\\n", "0 17.99 10.38 122.80 1001.0 0.11840 \n", "1 20.57 17.77 132.90 1326.0 0.08474 \n", "2 19.69 21.25 130.00 1203.0 0.10960 \n", "3 11.42 20.38 77.58 386.1 0.14250 \n", "4 20.29 14.34 135.10 1297.0 0.10030 \n", "\n", " mean compactness mean concavity mean concave points mean symmetry \\\n", "0 0.27760 0.3001 0.14710 0.2419 \n", "1 0.07864 0.0869 0.07017 0.1812 \n", "2 0.15990 0.1974 0.12790 0.2069 \n", "3 0.28390 0.2414 0.10520 0.2597 \n", "4 0.13280 0.1980 0.10430 0.1809 \n", "\n", " mean fractal dimension ... worst radius worst texture worst perimeter \\\n", "0 0.07871 ... 25.38 17.33 184.60 \n", "1 0.05667 ... 24.99 23.41 158.80 \n", "2 0.05999 ... 23.57 25.53 152.50 \n", "3 0.09744 ... 14.91 26.50 98.87 \n", "4 0.05883 ... 22.54 16.67 152.20 \n", "\n", " worst area worst smoothness worst compactness worst concavity \\\n", "0 2019.0 0.1622 0.6656 0.7119 \n", "1 1956.0 0.1238 0.1866 0.2416 \n", "2 1709.0 0.1444 0.4245 0.4504 \n", "3 567.7 0.2098 0.8663 0.6869 \n", "4 1575.0 0.1374 0.2050 0.4000 \n", "\n", " worst concave points worst symmetry worst fractal dimension \n", "0 0.2654 0.4601 0.11890 \n", "1 0.1860 0.2750 0.08902 \n", "2 0.2430 0.3613 0.08758 \n", "3 0.2575 0.6638 0.17300 \n", "4 0.1625 0.2364 0.07678 \n", "\n", "[5 rows x 30 columns]" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas as pd\n", "bc_df = pd.DataFrame(bc.data, columns=bc.feature_names)\n", "bc_df.head()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(455, 30)\n", "(455,)\n", "(114, 30)\n", "(114,)\n" ] } ], "source": [ "from sklearn.model_selection import train_test_split\n", "X_train, X_test, Y_train, Y_test = train_test_split(bc_df, bc.target, test_size = 0.2, random_state = 31)\n", "print(X_train.shape)\n", "print(Y_train.shape)\n", "print(X_test.shape)\n", "print(Y_test.shape)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
operation<=>
value10.25411.32811.94212.60413.27014.14215.05817.02619.32410.25411.32811.94212.60413.27014.14215.05817.02619.324
468000000001111111110
179000011111111100000
114111111111000000000
35000000011111111100
88000111111111000000
507011111111100000000
213000000001111111110
134000000001111111110
\n", "
" ], "text/plain": [ "operation <= \\\n", "value 10.254 11.328 11.942 12.604 13.270 14.142 15.058 17.026 19.324 \n", "468 0 0 0 0 0 0 0 0 1 \n", "179 0 0 0 0 1 1 1 1 1 \n", "114 1 1 1 1 1 1 1 1 1 \n", "35 0 0 0 0 0 0 0 1 1 \n", "88 0 0 0 1 1 1 1 1 1 \n", "507 0 1 1 1 1 1 1 1 1 \n", "213 0 0 0 0 0 0 0 0 1 \n", "134 0 0 0 0 0 0 0 0 1 \n", "\n", "operation > \n", "value 10.254 11.328 11.942 12.604 13.270 14.142 15.058 17.026 19.324 \n", "468 1 1 1 1 1 1 1 1 0 \n", "179 1 1 1 1 0 0 0 0 0 \n", "114 0 0 0 0 0 0 0 0 0 \n", "35 1 1 1 1 1 1 1 0 0 \n", "88 1 1 1 0 0 0 0 0 0 \n", "507 1 0 0 0 0 0 0 0 0 \n", "213 1 1 1 1 1 1 1 1 0 \n", "134 1 1 1 1 1 1 1 1 0 " ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from aix360.algorithms.rbm import FeatureBinarizer\n", "fb = FeatureBinarizer(negations=True)\n", "X_train_fb = fb.fit_transform(X_train)\n", "X_test_fb = fb.transform(X_test)\n", "X_train_fb['mean radius'][:8]" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "from aix360.algorithms.rbm import BRCGExplainer, BooleanRuleCG" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "boolean_model = BooleanRuleCG(silent=True)\n", "explainer = BRCGExplainer(boolean_model)\n", "explainer.fit(X_train_fb, Y_train)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "Y_pred = explainer.predict(X_test_fb)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Accuracy = 0.9298245614035088\n", "Precision = 0.9538461538461539\n", "Recall = 0.9253731343283582\n", "F1 = 0.9393939393939394\n" ] } ], "source": [ "from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score\n", "print(f'Accuracy = {accuracy_score(Y_test, Y_pred)}')\n", "print(f'Precision = {precision_score(Y_test, Y_pred)}')\n", "print(f'Recall = {recall_score(Y_test, Y_pred)}')\n", "print(f'F1 = {f1_score(Y_test, Y_pred)}')" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Predict Y=1 if ANY of the following rules are satisfied, otherwise Y=0:\n", "\n", " - compactness error > 0.01 AND worst concavity <= 0.22 AND worst symmetry <= 0.28\n", " - mean texture <= 15.46 AND mean concavity <= 0.15 AND area error <= 54.16\n", " - fractal dimension error > 0.00 AND worst area <= 680.60 AND worst concave points <= 0.18\n", " - mean concave points <= 0.05 AND perimeter error <= 3.80 AND worst area <= 930.88 AND worst smoothness <= 0.16\n" ] } ], "source": [ "e = explainer.explain()\n", "isCNF = 'Predict Y=0 if ANY of the following rules are satisfied, otherwise Y=1:'\n", "notCNF = 'Predict Y=1 if ANY of the following rules are satisfied, otherwise Y=0:'\n", "print(isCNF if e['isCNF'] else notCNF)\n", "print()\n", "for rule in e['rules']:\n", " print(f' - {rule}')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" } }, "nbformat": 4, "nbformat_minor": 2 }