{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Will remove later..." ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "from sklearn.ensemble import RandomForestClassifier\n", "from sklearn.linear_model import LogisticRegression\n", "from sklearn.metrics.pairwise import cosine_similarity, euclidean_distances\n", "from sklearn.metrics import roc_auc_score, f1_score, matthews_corrcoef, balanced_accuracy_score, auc\n", "import pandas as pd\n", "import numpy as np\n", "from imblearn.over_sampling import SMOTE\n", "import matplotlib.pyplot as plt\n", "\n", "from my_util import *\n", "from lime.lime.lime_tabular import LimeTabularExplainer\n", "\n", "\n", "import sys, os, pickle, time\n", "\n", "sys.path.append(os.path.abspath('../'))\n", "from pyexplainer.pyexplainer_pyexplainer import *\n", "\n", "# from datetime import datetime\n", "\n", "from IPython.display import display\n", "\n", "from multiprocessing import Pool\n", "\n", "import warnings\n", "warnings.filterwarnings(\"ignore\")" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "code_folding": [] }, "outputs": [], "source": [ "data_path = './dataset/'\n", "result_dir = './eval_result/'\n", "dump_dataframe_dir = './dump_df/'\n", "pyExp_dir = './explainer_obj_20_4_2021/'\n", "other_object_dir = './other_object/'\n", "proj_name = 'qt' # ['openstack','qt']\n", "\n", "\n", "\n", "if not os.path.exists(result_dir):\n", " os.makedirs(result_dir)\n", " \n", "if not os.path.exists(dump_dataframe_dir):\n", " os.makedirs(dump_dataframe_dir)\n", " \n", "if not os.path.exists(pyExp_dir):\n", " os.makedirs(pyExp_dir)\n", " \n", "if not os.path.exists(other_object_dir):\n", " os.makedirs(other_object_dir)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Prepare data" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "x_train, x_test, y_train, y_test = prepare_data(proj_name, mode = 'all')\n", "\n", "if proj_name == 'openstack':\n", " x_train_original, x_test_original = prepare_data_all_metrics(proj_name, mode='all')\n", " " ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
laldndnsentnrevrtimehcmtselfndevageappasexprsexpasawrrsawr
commit_id
00556b20067c3a4adf6ff33a17d2a4232fdce6ee11110.0000002.02.1042011.0026.00.9391672.0118.01164.00.0085540.083747
c74e4a74ba97d32df7406fb684527d415dd8a6ba477110.9278381.07.0525695.0036.00.9130215.0293.02672.00.0211330.192136
5af870b2459afc9cc934d9e79e80e2e49ff7504922110.0000001.01.4887501.009.0103.4318062.01359.01380.00.0977500.099260
5a83f73ac92a73c76ab7e26e60deba905dc3f64a1410111.0000000.00.0000000.010.00.0000000.00.00.00.0000000.000000
d9d0a944495208635688ca402fe04860e81490f6122110.0000001.00.0076502.002.00.8982061.01700.0574.00.1222510.041325
...................................................
04edeafade9058bde6d6dd58e6b1a89bace2fd50304110.0000002.00.8792941.008.00.2070141.0275.0151.00.0135070.007439
ff3dcc49c4a1912189091e35e87cb61af2f62d4710110.0000006.010.0212504.008.0174.0285302.0436.0655.00.0213850.032102
13171e7e63bc5199d783e33decf7f402019d05cc30110.0000002.00.3092134.001.03.9957061.0469.0173.00.0229990.008514
fa24ef3d721a7b94d0c5abbc6c9558e74bdb0f3d130130320.7888753.00.5297452.0029.00.1768982.01985.03771.00.0971770.184567
33bd1e08d0043e9b1340898039562bdf595879b51712110.0000001.00.6132991.003.012.9773151.0434.0225.00.0212840.011058
\n", "

8277 rows × 16 columns

\n", "
" ], "text/plain": [ " la ld nd ns ent nrev \\\n", "commit_id \n", "00556b20067c3a4adf6ff33a17d2a4232fdce6ee 1 1 1 1 0.000000 2.0 \n", "c74e4a74ba97d32df7406fb684527d415dd8a6ba 47 7 1 1 0.927838 1.0 \n", "5af870b2459afc9cc934d9e79e80e2e49ff75049 2 2 1 1 0.000000 1.0 \n", "5a83f73ac92a73c76ab7e26e60deba905dc3f64a 14 10 1 1 1.000000 0.0 \n", "d9d0a944495208635688ca402fe04860e81490f6 12 2 1 1 0.000000 1.0 \n", "... ... ... .. .. ... ... \n", "04edeafade9058bde6d6dd58e6b1a89bace2fd50 30 4 1 1 0.000000 2.0 \n", "ff3dcc49c4a1912189091e35e87cb61af2f62d47 1 0 1 1 0.000000 6.0 \n", "13171e7e63bc5199d783e33decf7f402019d05cc 3 0 1 1 0.000000 2.0 \n", "fa24ef3d721a7b94d0c5abbc6c9558e74bdb0f3d 130 130 3 2 0.788875 3.0 \n", "33bd1e08d0043e9b1340898039562bdf595879b5 17 12 1 1 0.000000 1.0 \n", "\n", " rtime hcmt self ndev \\\n", "commit_id \n", "00556b20067c3a4adf6ff33a17d2a4232fdce6ee 2.104201 1.0 0 26.0 \n", "c74e4a74ba97d32df7406fb684527d415dd8a6ba 7.052569 5.0 0 36.0 \n", "5af870b2459afc9cc934d9e79e80e2e49ff75049 1.488750 1.0 0 9.0 \n", "5a83f73ac92a73c76ab7e26e60deba905dc3f64a 0.000000 0.0 1 0.0 \n", "d9d0a944495208635688ca402fe04860e81490f6 0.007650 2.0 0 2.0 \n", "... ... ... ... ... \n", "04edeafade9058bde6d6dd58e6b1a89bace2fd50 0.879294 1.0 0 8.0 \n", "ff3dcc49c4a1912189091e35e87cb61af2f62d47 10.021250 4.0 0 8.0 \n", "13171e7e63bc5199d783e33decf7f402019d05cc 0.309213 4.0 0 1.0 \n", "fa24ef3d721a7b94d0c5abbc6c9558e74bdb0f3d 0.529745 2.0 0 29.0 \n", "33bd1e08d0043e9b1340898039562bdf595879b5 0.613299 1.0 0 3.0 \n", "\n", " age app asexp rsexp \\\n", "commit_id \n", "00556b20067c3a4adf6ff33a17d2a4232fdce6ee 0.939167 2.0 118.0 1164.0 \n", "c74e4a74ba97d32df7406fb684527d415dd8a6ba 0.913021 5.0 293.0 2672.0 \n", "5af870b2459afc9cc934d9e79e80e2e49ff75049 103.431806 2.0 1359.0 1380.0 \n", "5a83f73ac92a73c76ab7e26e60deba905dc3f64a 0.000000 0.0 0.0 0.0 \n", "d9d0a944495208635688ca402fe04860e81490f6 0.898206 1.0 1700.0 574.0 \n", "... ... ... ... ... \n", "04edeafade9058bde6d6dd58e6b1a89bace2fd50 0.207014 1.0 275.0 151.0 \n", "ff3dcc49c4a1912189091e35e87cb61af2f62d47 174.028530 2.0 436.0 655.0 \n", "13171e7e63bc5199d783e33decf7f402019d05cc 3.995706 1.0 469.0 173.0 \n", "fa24ef3d721a7b94d0c5abbc6c9558e74bdb0f3d 0.176898 2.0 1985.0 3771.0 \n", "33bd1e08d0043e9b1340898039562bdf595879b5 12.977315 1.0 434.0 225.0 \n", "\n", " asawr rsawr \n", "commit_id \n", "00556b20067c3a4adf6ff33a17d2a4232fdce6ee 0.008554 0.083747 \n", "c74e4a74ba97d32df7406fb684527d415dd8a6ba 0.021133 0.192136 \n", "5af870b2459afc9cc934d9e79e80e2e49ff75049 0.097750 0.099260 \n", "5a83f73ac92a73c76ab7e26e60deba905dc3f64a 0.000000 0.000000 \n", "d9d0a944495208635688ca402fe04860e81490f6 0.122251 0.041325 \n", "... ... ... \n", "04edeafade9058bde6d6dd58e6b1a89bace2fd50 0.013507 0.007439 \n", "ff3dcc49c4a1912189091e35e87cb61af2f62d47 0.021385 0.032102 \n", "13171e7e63bc5199d783e33decf7f402019d05cc 0.022999 0.008514 \n", "fa24ef3d721a7b94d0c5abbc6c9558e74bdb0f3d 0.097177 0.184567 \n", "33bd1e08d0043e9b1340898039562bdf595879b5 0.021284 0.011058 \n", "\n", "[8277 rows x 16 columns]" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "display(x_test)\n", "# display(x_test_original)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "# # print(y_train)\n", "# for col in x_test.columns:\n", "# print(col,len(x_test[col].unique()))\n", "\n", "col = list(x_test.columns)\n", "# print(col)\n", "# print(col.index('self'))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# find the best k_neighbor of SMOTE for max AUC" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "# def find_best_k_neighbor_of_SMOTE(x_train,y_train, global_model_name = 'RF'):\n", "# global_model_name = global_model_name.upper()\n", "# if global_model_name not in ['RF','LR']:\n", "# print('wrong global model name. the global model name must be RF or LR')\n", "# return\n", " \n", "# if global_model_name == 'RF':\n", "# global_model = RandomForestClassifier(n_estimators=100, random_state=0, n_jobs=24)\n", "# elif global_model_name == 'LR':\n", "# global_model = LogisticRegression(random_state=0, n_jobs=24)\n", " \n", "# for k in range(1,51): \n", "# smt = SMOTE(k_neighbors=k, random_state=42, n_jobs=24)\n", "# new_x_train, new_y_train = smt.fit_resample(x_train, y_train)\n", "# global_model.fit(new_x_train, new_y_train)\n", "# # pred = global_model.predict(x_test)\n", "# prob = global_model.predict_proba(x_test)[:,1]\n", "# AUC = roc_auc_score(y_test, prob)\n", " \n", "# print('k_neighbor: {}, AUC: {}'.format(k, AUC))\n", "# # pickle.dump(global_model, open(proj_name+'_'+global_model_name+'_global_model.pkl','wb'))\n", "# # print('train {} finished'.format(global_model_name))" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "# find_best_k_neighbor_of_SMOTE(x_train,y_train, global_model_name = 'RF')" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "# find_best_k_neighbor_of_SMOTE(x_train,y_train, global_model_name = 'LR')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Train global model" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "def train_global_model(x_train,y_train, global_model_name = 'RF'):\n", " global_model_name = global_model_name.upper()\n", " if global_model_name not in ['RF','LR']:\n", " print('wrong global model name. the global model name must be RF or LR')\n", " return\n", " \n", " smt = SMOTE(k_neighbors=5, random_state=42, n_jobs=24)\n", " new_x_train, new_y_train = smt.fit_resample(x_train, y_train)\n", " \n", " if global_model_name == 'RF':\n", " global_model = RandomForestClassifier(n_estimators=100, random_state=0, n_jobs=24)\n", " elif global_model_name == 'LR':\n", " global_model = LogisticRegression(random_state=0, n_jobs=24)\n", " \n", " global_model.fit(new_x_train, new_y_train)\n", " pickle.dump(global_model, open(proj_name+'_'+global_model_name+'_global_model.pkl','wb'))\n", " print('train {} finished'.format(global_model_name))\n", " \n", "train_black_box = False\n", "\n", "if train_black_box:\n", " train_global_model(x_train, y_train,'RF')\n", " train_global_model(x_train, y_train,'LR')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Obtain correctly predicted defective commits" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "# add this function to util file (done later...)\n", "\n", "def get_prediction_result_df(proj_name, global_model_name):\n", " global_model_name = global_model_name.upper()\n", " if global_model_name not in ['RF','LR']:\n", " print('wrong global model name. the global model name must be RF or LR')\n", " return\n", " \n", " prediction_df_dir = dump_dataframe_dir+proj_name+'_'+global_model_name+'_prediction_result.csv'\n", " correctly_predict_df_dir = dump_dataframe_dir+proj_name+'_'+global_model_name+'_correctly_predict_as_defective.csv'\n", " \n", " if not os.path.exists(prediction_df_dir) or not os.path.exists(correctly_predict_df_dir):\n", " global_model = pickle.load(open(proj_name+'_'+global_model_name+'_global_model.pkl','rb'))\n", "\n", " pred = global_model.predict(x_test)\n", " defective_prob = global_model.predict_proba(x_test)[:,1]\n", "\n", " prediction_df = x_test.copy()\n", " prediction_df['pred'] = pred\n", " prediction_df['defective_prob'] = defective_prob\n", " prediction_df['defect'] = y_test\n", "\n", " # print('AUC is',roc_auc_score(y_test, defective_prob))\n", " correctly_predict_df = prediction_df[(prediction_df['pred']==1) & (prediction_df['defect']==1)]\n", "\n", " print('total correct prediction: {}'.format(str(len(correctly_predict_df))))\n", "\n", " prediction_df.to_csv(prediction_df_dir)\n", " correctly_predict_df.to_csv(correctly_predict_df_dir)\n", " \n", " else:\n", " prediction_df = pd.read_csv(prediction_df_dir)\n", " correctly_predict_df = pd.read_csv(correctly_predict_df_dir)\n", " \n", " prediction_df = prediction_df.set_index('commit_id')\n", " correctly_predict_df = correctly_predict_df.set_index('commit_id')\n", " print('total correct prediction: {}'.format(str(len(correctly_predict_df))))\n", " \n", " return prediction_df, correctly_predict_df" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "total correct prediction: 79\n", "total correct prediction: 255\n" ] } ], "source": [ "rf_prediction_df, rf_correctly_predict_df = get_prediction_result_df(proj_name, 'rf')\n", "lr_prediction_df, lr_correctly_predict_df = get_prediction_result_df(proj_name, 'lr')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
landnsentnrevrtimeselfndevageapprrexpasawrrsawrpreddefective_probdefect
commit_id
5ce74eb5469b7f88f4448ccbc1afaa802a7cfdef17210.9366677.017.869722038.00.0457061.01306.00.1131070.247377False0.14False
878ac164a391e761c72c5fdcd12f0caf48c7d35949930.7181165.09.9791090116.013.0354374.0373.00.5978530.296449False0.16False
bbab55c4da531e4695a6e3e577aaa4975f0fce7916210.9886996.081.1071180123.00.3024077.02810.00.0053340.389047False0.18False
a2ba455e2d2bc41f4a80a08d5434b741ed715ef45191310.86177224.016.0038770194.02.3483283.02086.00.1223110.389511True0.61False
9382ee659212285a203550cf60476dd146d27a2989210.9962763.0105.23778909.021.8419163.0632.00.2359550.820225False0.20False
...................................................
209c09a76f5e90aaa2899804686e6a513703d8872110.0000002.020.87252310.00.0000000.00.00.0000000.000000False0.03False
1fdc4afa157887d45e2f326d7373a5b1e8ee7aeb1803910.84675824.092.1303010411.00.5018262.0720.00.0002680.094218False0.39False
aaf5762be5d37cac022dc321b6400b9743a253034210.4138172.018.489861049.026.8729754.02299.00.0045500.352067False0.08False
75c7f6a17a5bb78074518877bf73f0071b7758eb0110.0000001.054.90884304.065.4346878.01361.00.0011390.840547False0.01False
3824051b1e5618388a17c88867a3037397bc96b731210.8582317.075.9873840205.00.0445252.0713.00.0111050.094461False0.20False
\n", "

3963 rows × 16 columns

\n", "
" ], "text/plain": [ " la nd ns ent nrev \\\n", "commit_id \n", "5ce74eb5469b7f88f4448ccbc1afaa802a7cfdef 17 2 1 0.936667 7.0 \n", "878ac164a391e761c72c5fdcd12f0caf48c7d359 49 9 3 0.718116 5.0 \n", "bbab55c4da531e4695a6e3e577aaa4975f0fce79 16 2 1 0.988699 6.0 \n", "a2ba455e2d2bc41f4a80a08d5434b741ed715ef4 519 13 1 0.861772 24.0 \n", "9382ee659212285a203550cf60476dd146d27a29 89 2 1 0.996276 3.0 \n", "... ... .. .. ... ... \n", "209c09a76f5e90aaa2899804686e6a513703d887 2 1 1 0.000000 2.0 \n", "1fdc4afa157887d45e2f326d7373a5b1e8ee7aeb 180 39 1 0.846758 24.0 \n", "aaf5762be5d37cac022dc321b6400b9743a25303 4 2 1 0.413817 2.0 \n", "75c7f6a17a5bb78074518877bf73f0071b7758eb 0 1 1 0.000000 1.0 \n", "3824051b1e5618388a17c88867a3037397bc96b7 31 2 1 0.858231 7.0 \n", "\n", " rtime self ndev age \\\n", "commit_id \n", "5ce74eb5469b7f88f4448ccbc1afaa802a7cfdef 17.869722 0 38.0 0.045706 \n", "878ac164a391e761c72c5fdcd12f0caf48c7d359 9.979109 0 116.0 13.035437 \n", "bbab55c4da531e4695a6e3e577aaa4975f0fce79 81.107118 0 123.0 0.302407 \n", "a2ba455e2d2bc41f4a80a08d5434b741ed715ef4 16.003877 0 194.0 2.348328 \n", "9382ee659212285a203550cf60476dd146d27a29 105.237789 0 9.0 21.841916 \n", "... ... ... ... ... \n", "209c09a76f5e90aaa2899804686e6a513703d887 20.872523 1 0.0 0.000000 \n", "1fdc4afa157887d45e2f326d7373a5b1e8ee7aeb 92.130301 0 411.0 0.501826 \n", "aaf5762be5d37cac022dc321b6400b9743a25303 18.489861 0 49.0 26.872975 \n", "75c7f6a17a5bb78074518877bf73f0071b7758eb 54.908843 0 4.0 65.434687 \n", "3824051b1e5618388a17c88867a3037397bc96b7 75.987384 0 205.0 0.044525 \n", "\n", " app rrexp asawr rsawr \\\n", "commit_id \n", "5ce74eb5469b7f88f4448ccbc1afaa802a7cfdef 1.0 1306.0 0.113107 0.247377 \n", "878ac164a391e761c72c5fdcd12f0caf48c7d359 4.0 373.0 0.597853 0.296449 \n", "bbab55c4da531e4695a6e3e577aaa4975f0fce79 7.0 2810.0 0.005334 0.389047 \n", "a2ba455e2d2bc41f4a80a08d5434b741ed715ef4 3.0 2086.0 0.122311 0.389511 \n", "9382ee659212285a203550cf60476dd146d27a29 3.0 632.0 0.235955 0.820225 \n", "... ... ... ... ... \n", "209c09a76f5e90aaa2899804686e6a513703d887 0.0 0.0 0.000000 0.000000 \n", "1fdc4afa157887d45e2f326d7373a5b1e8ee7aeb 2.0 720.0 0.000268 0.094218 \n", "aaf5762be5d37cac022dc321b6400b9743a25303 4.0 2299.0 0.004550 0.352067 \n", "75c7f6a17a5bb78074518877bf73f0071b7758eb 8.0 1361.0 0.001139 0.840547 \n", "3824051b1e5618388a17c88867a3037397bc96b7 2.0 713.0 0.011105 0.094461 \n", "\n", " pred defective_prob defect \n", "commit_id \n", "5ce74eb5469b7f88f4448ccbc1afaa802a7cfdef False 0.14 False \n", "878ac164a391e761c72c5fdcd12f0caf48c7d359 False 0.16 False \n", "bbab55c4da531e4695a6e3e577aaa4975f0fce79 False 0.18 False \n", "a2ba455e2d2bc41f4a80a08d5434b741ed715ef4 True 0.61 False \n", "9382ee659212285a203550cf60476dd146d27a29 False 0.20 False \n", "... ... ... ... \n", "209c09a76f5e90aaa2899804686e6a513703d887 False 0.03 False \n", "1fdc4afa157887d45e2f326d7373a5b1e8ee7aeb False 0.39 False \n", "aaf5762be5d37cac022dc321b6400b9743a25303 False 0.08 False \n", "75c7f6a17a5bb78074518877bf73f0071b7758eb False 0.01 False \n", "3824051b1e5618388a17c88867a3037397bc96b7 False 0.20 False \n", "\n", "[3963 rows x 16 columns]" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "display(rf_prediction_df)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
landnsentnrevrtimeselfndevageapprrexpasawrrsawrpreddefective_probdefect
commit_id
1e6973aee7137653c62dcef970b1e2527b50517d258210.8262782.018.04956001.05.2427554.0465.00.0042620.082046True0.57True
5d0ccceb20780fdd3adf519d3f8e6b80b1844407190710.83947025.015.1816200164.00.8331372.01931.00.1223400.361879True0.58True
de31210c05f464c4a79255de68b1a515d9b84ed3121210.7585232.00.357431011.04.9835532.0812.00.0574160.873206True0.61True
957533f685caf9ffc0d9cad569598455d59ade34121510.8636297.019.05442104.07.9353475.01364.00.1979170.911458True0.60True
96677735f6d8f1f5b6380127956921f719aab799525210.70702229.020.11737307.04.1448781.098.00.0023810.104762True0.56True
...................................................
9bc29208bda6071a34bcc0da36a396eb8bab4f3022210.8865417.074.999502020.05.0038318.01214.00.0339980.864009True0.70True
1a40831f1711bf46c878dd247679bb05d19ffb5a99210.86148713.036.234155012.019.0555502.0562.00.1660960.512842True0.60True
605749ca12af969ac122008b4fa14904df68caf7627910.6638835.08.9604630181.00.3796264.01467.00.0051150.216853True0.51True
85239cc81440d9e5a4aee3c0961c96a4197ad939166520.7816936.01.36785902.01.5275563.01315.00.1360220.947368True0.54True
f0f52a0085a1f5bbd23e6cfa8f0ea935e8fd56c846210.8524054.073.000278023.05.1827785.01174.00.0068210.847885True0.51True
\n", "

198 rows × 16 columns

\n", "
" ], "text/plain": [ " la nd ns ent nrev \\\n", "commit_id \n", "1e6973aee7137653c62dcef970b1e2527b50517d 258 2 1 0.826278 2.0 \n", "5d0ccceb20780fdd3adf519d3f8e6b80b1844407 190 7 1 0.839470 25.0 \n", "de31210c05f464c4a79255de68b1a515d9b84ed3 121 2 1 0.758523 2.0 \n", "957533f685caf9ffc0d9cad569598455d59ade34 121 5 1 0.863629 7.0 \n", "96677735f6d8f1f5b6380127956921f719aab799 525 2 1 0.707022 29.0 \n", "... ... .. .. ... ... \n", "9bc29208bda6071a34bcc0da36a396eb8bab4f30 22 2 1 0.886541 7.0 \n", "1a40831f1711bf46c878dd247679bb05d19ffb5a 99 2 1 0.861487 13.0 \n", "605749ca12af969ac122008b4fa14904df68caf7 627 9 1 0.663883 5.0 \n", "85239cc81440d9e5a4aee3c0961c96a4197ad939 166 5 2 0.781693 6.0 \n", "f0f52a0085a1f5bbd23e6cfa8f0ea935e8fd56c8 46 2 1 0.852405 4.0 \n", "\n", " rtime self ndev age \\\n", "commit_id \n", "1e6973aee7137653c62dcef970b1e2527b50517d 18.049560 0 1.0 5.242755 \n", "5d0ccceb20780fdd3adf519d3f8e6b80b1844407 15.181620 0 164.0 0.833137 \n", "de31210c05f464c4a79255de68b1a515d9b84ed3 0.357431 0 11.0 4.983553 \n", "957533f685caf9ffc0d9cad569598455d59ade34 19.054421 0 4.0 7.935347 \n", "96677735f6d8f1f5b6380127956921f719aab799 20.117373 0 7.0 4.144878 \n", "... ... ... ... ... \n", "9bc29208bda6071a34bcc0da36a396eb8bab4f30 74.999502 0 20.0 5.003831 \n", "1a40831f1711bf46c878dd247679bb05d19ffb5a 36.234155 0 12.0 19.055550 \n", "605749ca12af969ac122008b4fa14904df68caf7 8.960463 0 181.0 0.379626 \n", "85239cc81440d9e5a4aee3c0961c96a4197ad939 1.367859 0 2.0 1.527556 \n", "f0f52a0085a1f5bbd23e6cfa8f0ea935e8fd56c8 73.000278 0 23.0 5.182778 \n", "\n", " app rrexp asawr rsawr \\\n", "commit_id \n", "1e6973aee7137653c62dcef970b1e2527b50517d 4.0 465.0 0.004262 0.082046 \n", "5d0ccceb20780fdd3adf519d3f8e6b80b1844407 2.0 1931.0 0.122340 0.361879 \n", "de31210c05f464c4a79255de68b1a515d9b84ed3 2.0 812.0 0.057416 0.873206 \n", "957533f685caf9ffc0d9cad569598455d59ade34 5.0 1364.0 0.197917 0.911458 \n", "96677735f6d8f1f5b6380127956921f719aab799 1.0 98.0 0.002381 0.104762 \n", "... ... ... ... ... \n", "9bc29208bda6071a34bcc0da36a396eb8bab4f30 8.0 1214.0 0.033998 0.864009 \n", "1a40831f1711bf46c878dd247679bb05d19ffb5a 2.0 562.0 0.166096 0.512842 \n", "605749ca12af969ac122008b4fa14904df68caf7 4.0 1467.0 0.005115 0.216853 \n", "85239cc81440d9e5a4aee3c0961c96a4197ad939 3.0 1315.0 0.136022 0.947368 \n", "f0f52a0085a1f5bbd23e6cfa8f0ea935e8fd56c8 5.0 1174.0 0.006821 0.847885 \n", "\n", " pred defective_prob defect \n", "commit_id \n", "1e6973aee7137653c62dcef970b1e2527b50517d True 0.57 True \n", "5d0ccceb20780fdd3adf519d3f8e6b80b1844407 True 0.58 True \n", "de31210c05f464c4a79255de68b1a515d9b84ed3 True 0.61 True \n", "957533f685caf9ffc0d9cad569598455d59ade34 True 0.60 True \n", "96677735f6d8f1f5b6380127956921f719aab799 True 0.56 True \n", "... ... ... ... \n", "9bc29208bda6071a34bcc0da36a396eb8bab4f30 True 0.70 True \n", "1a40831f1711bf46c878dd247679bb05d19ffb5a True 0.60 True \n", "605749ca12af969ac122008b4fa14904df68caf7 True 0.51 True \n", "85239cc81440d9e5a4aee3c0961c96a4197ad939 True 0.54 True \n", "f0f52a0085a1f5bbd23e6cfa8f0ea935e8fd56c8 True 0.51 True \n", "\n", "[198 rows x 16 columns]" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "display(rf_correctly_predict_df)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "# rf_prediction_df_exclude_correct_prediction = rf_prediction_df.drop(index=rf_correctly_predict_df.index)\n", "# lr_prediction_df_exclude_correct_prediction = lr_prediction_df.drop(index=lr_correctly_predict_df.index)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "# all_indices = set(x_test.index)\n", "# correct_indices = set(rf_correctly_predict_df.index)\n", "# remain_indices = all_indices-correct_indices\n", "\n", "# print(len(all_indices))\n", "# print(len(correct_indices), len(remain_indices))" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "# display(x_test.loc[rf_correctly_predict_df.index])\n", "# display(y_test.loc[rf_correctly_predict_df.index])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Training local model\n", "\n", "Note: this step includes instance generation" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 12, "metadata": { "code_folding": [] }, "outputs": [], "source": [ "def create_every_explainer(global_model_name, df_indices):\n", " \n", " global_model_name = global_model_name.upper()\n", " if global_model_name not in ['RF','LR']:\n", " print('wrong global model name. the global model name must be RF or LR')\n", " return\n", " \n", " global_model = pickle.load(open(proj_name+'_'+global_model_name+'_global_model.pkl','rb'))\n", "\n", " indep = x_test.columns\n", " dep = 'defect'\n", " class_label = ['clean', 'defect']\n", " \n", " # for our apporach\n", " pyExp = PyExplainer(x_train, y_train, indep, dep, global_model, class_label)\n", "\n", " # for baseline\n", " # note: 6 is index of 'self' feature\n", " lime_explainer = LimeTabularExplainer(x_train.values, categorical_features=[6],\n", " feature_names=indep, class_names=class_label, \n", " random_state=0)\n", "\n", " \n", "\n", " feature_df = x_test.loc[df_indices]\n", " test_label = y_test.loc[df_indices]\n", "\n", " save_dir = os.path.join(pyExp_dir,proj_name,global_model_name)\n", " \n", " if not os.path.exists(save_dir):\n", " os.makedirs(save_dir)\n", " \n", " for i in range(0,len(feature_df)):\n", " X_explain = feature_df.iloc[[i]]\n", " y_explain = test_label.iloc[[i]]\n", "\n", " row_index = str(X_explain.index[0])\n", "\n", " pyExp_obj = pyExp.explain(X_explain,\n", " y_explain,\n", " search_function = 'CrossoverInterpolation')\n", "# synt_pred = pyExp_obj['synthetic_predictions']\n", " pyExp_obj['commit_id'] = row_index\n", "\n", " # because I don't want to change key name in another evaluation file\n", " pyExp_obj['local_model'] = pyExp_obj['local_rulefit_model']\n", " del pyExp_obj['local_rulefit_model']\n", "# print('{}: found {} defect from total {}'.format(row_index, str(np.sum(synt_pred)), \n", "# str(len(synt_pred))))\n", "# pickle.dump(pyExp_obj, open(pyExp_dir+proj_name+'_'+explainer+'_'+search_function.lower()+'_'+row_index+'_20_rules.pkl','wb'))\n", " \n", " X_explain = feature_df.iloc[i] # to prevent error in LIME\n", " exp, synt_inst, synt_inst_for_local_model, selected_feature_indices, local_model = lime_explainer.explain_instance(X_explain, \n", " global_model.predict_proba, \n", " num_samples=5000)\n", "\n", " lime_obj = {}\n", " lime_obj['rule'] = exp\n", " lime_obj['synthetic_instance_for_global_model'] = synt_inst\n", " lime_obj['synthetic_instance_for_lobal_model'] = synt_inst_for_local_model\n", " lime_obj['local_model'] = local_model\n", " lime_obj['selected_feature_indeces'] = selected_feature_indices\n", " lime_obj['commit_id'] = row_index\n", "# pickle.dump(lime_obj, open(pyExp_dir+proj_name+'_lime_'+row_index+'.pkl','wb'))\n", "\n", " all_explainer = {'pyExplainer':pyExp_obj, 'LIME': lime_obj}\n", " \n", "\n", " \n", " pickle.dump(all_explainer, open(save_dir+'/all_explainer_'+row_index+'.pkl','wb'))\n", " \n", " print('finished',row_index)\n", "# break\n", "\n", "\n", "# end = time.time()\n" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "all_indices = set(x_test.index)\n", "rf_correct_indices = set(rf_correctly_predict_df.index)\n", "rf_remain_indices = all_indices-rf_correct_indices\n", "\n", "lr_correct_indices = set(lr_correctly_predict_df.index)\n", "lr_remain_indices = all_indices-lr_correct_indices" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": true }, "outputs": [], "source": [ "create_every_explainer('RF',rf_correct_indices)\n", "create_every_explainer('LR',lr_correct_indices)\n", "# create_every_explainer('RF',rf_remain_indices)\n", "# create_every_explainer('LR',lr_remain_indices)" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [], "source": [ "# print(len(correct_indices))\n", "# print(len(remain_indices))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'dir1/dir2file.f'" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "os.path.join('dir1','dir2')+'/file.f'" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "dict_keys(['pyExplainer', 'LIME'])\n", "dict_keys(['synthetic_data', 'synthetic_predictions', 'X_explain', 'y_explain', 'indep', 'dep', 'top_k_positive_rules', 'top_k_negative_rules', 'commit_id', 'local_model'])\n" ] } ], "source": [ "obj = pickle.load(open(pyExp_dir+proj_name+'_'+'LR'+'_all_explainer_'+'99379d6ec00b7bdfe5a625877d8e680f9240efcc'+'.pkl','rb'))\n", "print(obj.keys())\n", "print(obj['pyExplainer'].keys())" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Index(['la', 'nd', 'ns', 'ent', 'nrev', 'rtime', 'self', 'ndev', 'age', 'app',\n", " 'rrexp', 'asawr', 'rsawr'],\n", " dtype='object')\n" ] } ], "source": [ "print(correctly_predict_df.columns[:-3])" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "# global_model = pickle.load(open(proj_name+'_global_model.pkl','rb'))\n", "\n", "# load_prediction_from_file = True\n", "# class_label = ['clean', 'defect']\n", "\n", "# if load_prediction_from_file:\n", "# correctly_predict_df = pd.read_csv(dump_dataframe_dir+proj_name+'_correctly_predict_as_defective.csv')\n", "# correctly_predict_df = correctly_predict_df.set_index('commit_id')\n", " \n", "# dep = 'defect'\n", "# indep = correctly_predict_df.columns[:-3] # exclude the last 3 columns\n", "\n", "# # print(correctly_predict_df.columns)\n", "# # print(len(correctly_predict_df.columns))\n", "# # print(indep)\n", "# # display(correctly_predict_df)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": true }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "feature_df = correctly_predict_df.loc[:, indep]\n", "test_label = correctly_predict_df.loc[:, dep]\n", "problem_index = [] # store index that cannot build pyExplainer" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "prepare data done...\n", "--------------------------------------------------------------------------------\n", "finished bdba16035dd03310259203f7baf576fc6fcb530b\n", "--------------------------------------------------------------------------------\n", "finished b63c6f9f839188ade489d77747164b0fec8ab116\n", "--------------------------------------------------------------------------------\n", "finished 9d98070f7b2e06ac8cc30d12523798df4418eed0\n", "--------------------------------------------------------------------------------\n", "finished 55bfb412f1732e97da76f1b34e581d2429df38e6\n", "--------------------------------------------------------------------------------\n", "finished da42fe01d99eaeb0cda5387efefeb9e284c6e20b\n", "--------------------------------------------------------------------------------\n", "finished 69191a1858cd3df651240a5ad1c384a470baa356\n", "--------------------------------------------------------------------------------\n", "finished 69a4e6cde8769992d06ec3126257cf2adea0e525\n", "--------------------------------------------------------------------------------\n", "--------------------------------------------------------------------------------\n", "finished 96a8a1643b90219b15aa3aa83f075044e7b85c40\n", "finished 36be3145c76d185ecabac20c4df145b7f46f67b1\n", "--------------------------------------------------------------------------------\n", "finished 4227b128a42a1c5785ac13245de511fbcd358e37\n", "--------------------------------------------------------------------------------\n", "finished 8db51da1e25f1c148fbe8d8f27d144a39d1850f3\n", "--------------------------------------------------------------------------------\n", "finished 85b8ef62da3ae13923ebf449da3dcb3da224adb1\n", "--------------------------------------------------------------------------------\n", "finished 983d815a80039d1f4e0d735c3041882833ecc488\n", "--------------------------------------------------------------------------------\n", "finished 5ce74eb5469b7f88f4448ccbc1afaa802a7cfdef\n", "--------------------------------------------------------------------------------\n", "finished dec1e329c2ebba9fd0f9e96d1ae85aa084ccba6f\n", "--------------------------------------------------------------------------------\n", "finished e43c5c1ea6441ad7d36e25bd283a5a580bf5cb09\n", "--------------------------------------------------------------------------------\n", "finished 9842dcb34cb79b942391fdab4a00dd790b5e299e\n", "--------------------------------------------------------------------------------\n", "finished 35da9612e0ca8b5d7bf238f4059f1ba2e576cb01\n", "--------------------------------------------------------------------------------\n", "finished f66de80e38bcfbc29228ea19ffc96fbf34aad0ba\n", "--------------------------------------------------------------------------------\n", "finished 4f4b47803f3a7f820a6f68866fe7a36a37aff00e\n", "--------------------------------------------------------------------------------\n", "finished 2672a1e24ed0ad0c189110eb04ddcba78cd48be5\n", "--------------------------------------------------------------------------------\n", "finished 18d35d0949868840aba7b6720a76c3d6c5ec9608\n", "--------------------------------------------------------------------------------\n", "finished 506a8f58cf4b8cecf90b647c7deba47da2a4dfec\n", "--------------------------------------------------------------------------------\n", "finished d039736aea3def9015c8c4b1277a3115a63a211a\n", "--------------------------------------------------------------------------------\n", "finished 8d985b826b459335ab89fef69c132470af066daf\n", "--------------------------------------------------------------------------------\n", "finished e91c6ffbea7cdae3865b1f78f76c83cf1c5dc00e\n", "--------------------------------------------------------------------------------\n", "finished fe7198a6ecf45edd8b5ca2030586c746f204fe3f\n", "--------------------------------------------------------------------------------\n", "finished e541ca285d5a0f484c886fb28514b84580c3dc02\n", "--------------------------------------------------------------------------------\n", "finished 0278a38153f9649aab1cc641bfabd8d5738d2d8c\n", "--------------------------------------------------------------------------------\n", "finished e52b5e8e98dd640c69d009a3d5546a479e394d81\n", "--------------------------------------------------------------------------------\n", "finished d4b024ad7d64a854072517eef47b059c93bdfdd3\n", "--------------------------------------------------------------------------------\n", "finished 209c0e0a0d92199ce44f8c787c2582aa41fb27d7\n", "--------------------------------------------------------------------------------\n", "finished 285a3a88a1ede50014d4f4a124994a2a0e85705b\n", "--------------------------------------------------------------------------------\n", "finished e19cf5c0475fcc69cda1a883f29181247c4c8f54\n", "--------------------------------------------------------------------------------\n", "finished 878ac164a391e761c72c5fdcd12f0caf48c7d359\n", "--------------------------------------------------------------------------------\n", "finished e120c7301942e3c4f352ae250a1b4f6bf89aabde\n", "--------------------------------------------------------------------------------\n", "finished 844a228c3c42dac802a82510eb17fcfaccb0e1f8\n", "--------------------------------------------------------------------------------\n", "finished f0b77aa053a2d5456d72d178d4fac8e2684cc111\n", "--------------------------------------------------------------------------------\n", "finished 402a44850ba1783368fb349fafecdeedd9da7fd2\n", "--------------------------------------------------------------------------------\n", "finished 15ef6089e5dbc54f8d6cdbfa9281ff523d60548a\n", "--------------------------------------------------------------------------------\n", "finished 72f85b5e616e3aabcab862b944c1a3a2c2844431\n", "--------------------------------------------------------------------------------\n", "finished ab55af8ed5afd0765a23a85d608d4b6d35bdd166\n", "--------------------------------------------------------------------------------\n", "finished f2dfdd4ab37965a6559f3c033cdf877e0fe172be\n", "--------------------------------------------------------------------------------\n", "finished 928a119ac0f8d750f616f3eff4fadc82b23fc7c6\n", "--------------------------------------------------------------------------------\n", "finished ba4ed39616c3c6fabf24ca390c7037877c672fba\n", "--------------------------------------------------------------------------------\n", "finished 12c8ea8569a64e0688544db18da1946e4a3ffd82\n", "--------------------------------------------------------------------------------\n", "finished 51fc2bd41e236404a8db8e4beefd0ec265fb1a5a\n", "--------------------------------------------------------------------------------\n", "--------------------------------------------------------------------------------\n", "finished 01a44568cc60bb5a6dd7b55d69b20bba57d1b94b\n", "finished 0a87ff988cd21586daeb6ed886e44aedaa49c320\n", "--------------------------------------------------------------------------------\n", "finished de31210c05f464c4a79255de68b1a515d9b84ed3\n", "--------------------------------------------------------------------------------\n", "finished dc5c2cb7f2ddfb8b7311373be423a51cdf700f9f\n", "--------------------------------------------------------------------------------\n", "finished d91bc54749fa4ffdda203db5ad88dc4f20c72eb8\n", "--------------------------------------------------------------------------------\n", "finished d7796858f1e23e5284b12f48d79ddfc63929e7fb\n", "--------------------------------------------------------------------------------\n", "finished bdaf3a26bb924a0debc7e753cdf2135dcc20d833\n", "--------------------------------------------------------------------------------\n", "finished 7aea4d8b21013d5ef6b14111b42c4bd101789ed1\n", "--------------------------------------------------------------------------------\n", "finished 2dbea4a9f524337576ffe5610708f90e08be8853\n", "--------------------------------------------------------------------------------\n", "finished f9cf84273bd4d89cdbe267b2a5ed9c9ed20492e9\n" ] } ], "source": [ "create_every_explainer(x_test, y_test)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "'''\n", " search function: lime or crossoverinterpolation\n", "'''\n", "def create_pyExplainer_obj(search_function, feature_df, test_label, explainer='rulefit'):\n", " \n", " if search_function not in ['lime','CrossoverInterpolation']:\n", " print('the search function must be \"lime\" or \"CrossoverInterpolation\"')\n", " return\n", " \n", " problem_index = []\n", " time_spent = []\n", " \n", " for i in range(0,len(feature_df)):\n", " X_explain = feature_df.iloc[[i]]\n", " y_explain = test_label.iloc[[i]]\n", "\n", " row_index = str(X_explain.index[0])\n", "\n", " start = time.time()\n", " try:\n", " if search_function=='CrossoverInterpolation':\n", " # the returned object is dictionary\n", " pyExp_obj = pyExp.explain(X_explain,\n", " y_explain,\n", " search_function = 'CrossoverInterpolation', \n", " top_k = 15,\n", " max_rules=2000, \n", " max_iter = None, \n", " cv=5,\n", " debug = False)\n", " # synt_pred = pyExp_obj['synthetic_predictions']\n", " pyExp_obj['commit_id'] = row_index\n", " \n", " # because I don't want to change key name in another evaluation file\n", " pyExp_obj['local_model'] = pyExp_obj['local_rulefit_model']\n", " del pyExp_obj['local_rulefit_model']\n", " # print('{}: found {} defect from total {}'.format(row_index, str(np.sum(synt_pred)), \n", " # str(len(synt_pred))))\n", " pickle.dump(pyExp_obj, open(pyExp_dir+proj_name+'_'+explainer+'_'+search_function.lower()+'_'+row_index+'_20_rules.pkl','wb'))\n", " \n", " else:\n", " X_explain = feature_df.iloc[i] # to prevent error in LIME\n", " exp, synt_inst, synt_inst_for_local_model, selected_feature_indices, local_model = lime_explainer.explain_instance(X_explain, \n", " global_model.predict_proba, \n", " num_samples=5000)\n", "\n", " lime_obj = {}\n", " lime_obj['rule'] = exp\n", " lime_obj['synthetic_instance_for_global_model'] = synt_inst\n", " lime_obj['synthetic_instance_for_lobal_model'] = synt_inst_for_local_model\n", " lime_obj['local_model'] = local_model\n", " lime_obj['selected_feature_indeces'] = selected_feature_indices\n", " lime_obj['commit_id'] = row_index\n", " pickle.dump(lime_obj, open(pyExp_dir+proj_name+'_lime_'+row_index+'.pkl','wb'))\n", " \n", " print('finished',row_index)\n", "# print(row_index)\n", "# print('just one rulefit is enough')\n", "# break\n", " \n", " except Exception as e:\n", " problem_index.append(row_index)\n", " print('-'*100)\n", " print(e)\n", "# print('found total {} problematic commit'.format(str(len(problem_index))))\n", " print('-'*100)\n", " \n", "# break\n", "\n", " end = time.time()\n", "\n", " time_spent.append(str(end-start))\n", "# print(row_index)\n", "# break\n", " \n", " print('from total {} commits, there are {} problematic commits'.format(len(feature_df),len(problem_index)))\n", " return time_spent, problem_index" ] }, { "cell_type": "code", "execution_count": 72, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "--------------------------------------------------------------------------------\n", "finished f5dbc876378ae58a7bdfe1e9664fc81caca18dfb\n", "--------------------------------------------------------------------------------\n", "finished 602bd9873786ccadcb67da3036329f3122555cf8\n", "--------------------------------------------------------------------------------\n", "finished 9d75626b1073113d77988bcb52e99215d5af4787\n", "--------------------------------------------------------------------------------\n", "finished 3168c94577cc604fd033ce4e741da5c411f74666\n", "--------------------------------------------------------------------------------\n", "finished 23bdca417bde716c79168ab372083fd885607123\n", "--------------------------------------------------------------------------------\n", "finished 438211ec627073817fcaf6d3a07b76f2aa5d90e0\n", "--------------------------------------------------------------------------------\n", "finished d8090022f66cc6cff6af5ed2ae702212fd172ff7\n", "--------------------------------------------------------------------------------\n", "finished 06eaeb0ac8d861cf3e60b11a8fbbabdeef1d6759\n", "--------------------------------------------------------------------------------\n", "finished 4dbf574b7acb7ae8f852219700afa95f8d568f0e\n", "--------------------------------------------------------------------------------\n", "finished 2eb28f51ce4150fa03b2ddde8c39b502ae57d18a\n", "--------------------------------------------------------------------------------\n", "finished 61b56a89a1cf8a388ff925492700e5eef019c3aa\n", "--------------------------------------------------------------------------------\n", "finished 07860794da5863610f38295c9d517fc457c5de95\n", "--------------------------------------------------------------------------------\n", "finished b508dc98a5085df6ea44b154177fbb6d8b0e8434\n", "--------------------------------------------------------------------------------\n", "finished b449791c3565940e701c68b3f705014804af0c2b\n", "--------------------------------------------------------------------------------\n", "finished 3abecf2ee9fc724af571f8c7da4302f7bee9eadb\n", "--------------------------------------------------------------------------------\n", "finished d8d903826daa8db49e437b76fb80693ab8feb01e\n", "--------------------------------------------------------------------------------\n", "finished 5b456c8da4462f9e11fa4da78a9e6ea86423a1e8\n", "--------------------------------------------------------------------------------\n", "finished 37ca2224eca671200a2710f57f970d2993e62aa5\n", "--------------------------------------------------------------------------------\n", "finished fd619946be51784dc709363324897be6af144c52\n", "--------------------------------------------------------------------------------\n", "finished 0ed9f7496656fa0ea52d703c7fddff26c2192857\n", "--------------------------------------------------------------------------------\n", "finished a25b1fdf7d642d9f252fbfa1789efaa32e03b994\n", "--------------------------------------------------------------------------------\n", "finished 2a11286469bf28b348821f015d72f8cb06f54b95\n", "--------------------------------------------------------------------------------\n", "finished 42cfb5fe4daa586f382bde6936b0ee33b5298f4d\n", "--------------------------------------------------------------------------------\n", "finished 9c82c105a1886473ca144b802ce9f5bec01e35e8\n", "--------------------------------------------------------------------------------\n", "finished aed9a8d49b7470de6809c3bf747b14c7150d7ae6\n", "--------------------------------------------------------------------------------\n", "finished 19b7afca523f221494bd165680f1aa9ddf3a0e31\n", "--------------------------------------------------------------------------------\n", "finished 52fc0a95a109d2e9fa279eeb0284a8178563080b\n", "--------------------------------------------------------------------------------\n", "finished d6d119ac6e606a30993bfea1ac3309f74d15bedc\n", "--------------------------------------------------------------------------------\n", "finished f89f099c55576992b39a8021aace64ff32747624\n", "--------------------------------------------------------------------------------\n", "finished 0eade30f37980c38b14d5cfa475837d15b69a8c5\n", "--------------------------------------------------------------------------------\n", "finished 87f475cbdb89fcbfbce68c3b676a240bb255a6d9\n", "--------------------------------------------------------------------------------\n", "finished e07e1931ac3d2e270d7696f631aba39ea54ba3c4\n", "--------------------------------------------------------------------------------\n", "finished 8d441936d2e8ab8c39a66831a658c192b80ca597\n", "--------------------------------------------------------------------------------\n", "finished 9329f786da8e167130fa36b91ff288bfdb046ce1\n", "--------------------------------------------------------------------------------\n", "finished a0f8be4021caa9bb5055923f0eea3bee0e345235\n", "--------------------------------------------------------------------------------\n", "finished 78b49cf8361b1462cc94a061916a15f0b98e27e3\n", "--------------------------------------------------------------------------------\n", "finished 81dea57593b5b28990bb7f012aae1387c8d2de33\n", "--------------------------------------------------------------------------------\n", "finished f2233c725078d49f6b185e642325dacb47b33240\n", "--------------------------------------------------------------------------------\n", "finished fc0f784e54d5dce72cc6a7e4b1fad243dadfcd76\n", "--------------------------------------------------------------------------------\n", "finished 661e624121004dd73467300e467aacae6d8d2f66\n", "--------------------------------------------------------------------------------\n", "finished a967a9bdcc2c75a0270c2be48d845ded5332e4f0\n", "--------------------------------------------------------------------------------\n", "finished 954b92e1207bfe5ab5a117e8393c191cdf0044d2\n", "--------------------------------------------------------------------------------\n", "finished b7a585ebd57f85c89ee20eda5b1a06819b3e1af0\n", "--------------------------------------------------------------------------------\n", "finished 906d5c5c40183468f9521277c6244a6c46730de6\n", "--------------------------------------------------------------------------------\n", "finished 8480dcbc7180cf1bcafcd03a6ad7087c83582a97\n", "--------------------------------------------------------------------------------\n", "finished ec61d7b9e82e6d653c02df14f34acdba8fc060db\n", "--------------------------------------------------------------------------------\n", "finished 71338b4b2f01268759f7ac6b3eff5abb17420a7c\n", "--------------------------------------------------------------------------------\n", "finished 412eb94de4cae754130ae855236420ebd5c42482\n", "--------------------------------------------------------------------------------\n", "finished 99667804e1c64cace1246d4ce342a218fc1fb1ba\n", "--------------------------------------------------------------------------------\n", "finished 3afffa47feabc80e1bc20ffd2143a722a1c360a2\n", "--------------------------------------------------------------------------------\n", "finished ef6544ee27aeab20a64b4df4bd50401cefa405ef\n", "--------------------------------------------------------------------------------\n", "finished 4e15aa6d7c4f9a03f4ae57b3ba04ade3400cccf1\n", "--------------------------------------------------------------------------------\n", "finished fdd3876111605e84a1f7c31206e20cd99e13f1fb\n", "--------------------------------------------------------------------------------\n", "finished 7d35f5c6b7b8b187388523674b4a95585f369d78\n", "--------------------------------------------------------------------------------\n", "finished 459c9a2a8840995436e610459216957bc7ebd914\n", "--------------------------------------------------------------------------------\n", "finished ba9ba084124403bd8930e29d8afcea9d64b6c0b6\n", "--------------------------------------------------------------------------------\n", "finished eacd58d4e78e7238ba5fcca90ba960aaf3ebd263\n", "--------------------------------------------------------------------------------\n", "finished 097b0a531642f79d8b240f89ca1eacca2ee59a5e\n", "--------------------------------------------------------------------------------\n", "finished a32bed7047469273cc8513cad7b2923d60f8d590\n", "--------------------------------------------------------------------------------\n", "finished 328f2f9c35f3cc5e7049a060a608c3f72876484a\n", "--------------------------------------------------------------------------------\n", "finished db51ffc0d1fbce85fd542403812fab8783c53e3c\n", "--------------------------------------------------------------------------------\n", "finished 1de244ea65f1b40c488fe92b29170c1b1d447233\n", "--------------------------------------------------------------------------------\n", "finished eeaba26596d447c531dfac9d6e6bf5cfe4537813\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "--------------------------------------------------------------------------------\n", "finished 72420889aa54342da4472683e67e2c70b76a170f\n", "--------------------------------------------------------------------------------\n", "finished 9424383e6d0ea1dd02dcf1070259e21550da692a\n", "--------------------------------------------------------------------------------\n", "finished 6953f3a501dfee5064a12558a2e2515b69b35189\n", "--------------------------------------------------------------------------------\n", "finished d437457becef4409682caee15e1050889a977f91\n", "--------------------------------------------------------------------------------\n", "finished e4588b70dde3fe5ba8f77a0b749ec1b071346767\n", "--------------------------------------------------------------------------------\n", "finished a2e92f6dee12320a58a4bcc95c14649b27804427\n", "--------------------------------------------------------------------------------\n", "finished ccb088a6beaff01419ec9325dbd19040f279b6ea\n", "--------------------------------------------------------------------------------\n", "finished 173d16efb54ccc152f19afb9b1c2a87915fb414b\n", "--------------------------------------------------------------------------------\n", "finished 7dbb49bdec6e74f9d1eb096d15bdeea0881c3704\n", "--------------------------------------------------------------------------------\n", "finished ff11af4fbc2948a3a3bc635549c7ac349d249abc\n", "--------------------------------------------------------------------------------\n", "finished 2b67b50af18bb2a17ff10d5a37abfd85fc7e9d01\n", "--------------------------------------------------------------------------------\n", "finished 5f0023a7469e01bada70fbe677b76bb535eaa953\n", "--------------------------------------------------------------------------------\n", "finished 7c737e0edcb93585856c65890ef34e5c5a28ee6b\n", "--------------------------------------------------------------------------------\n", "finished 9bb634a6176c639bd6b52d58151e9927c30919d0\n", "--------------------------------------------------------------------------------\n", "finished 1e8de50674f5b33a50c45224b7e07b3f974f6ab0\n", "--------------------------------------------------------------------------------\n", "finished 979a0406f0013560efbdcc486b32ba93ce8c946f\n", "from total 79 commits, there are 0 problematic commits\n" ] } ], "source": [ "time_spent_rand, problem_index_rand = create_pyExplainer_obj('lime', feature_df, test_label)\n", "pickle.dump(time_spent_rand, open(other_object_dir+proj_name+'_train_time_lime_randompertubation.pkl','wb'))\n", "pickle.dump(problem_index_rand, open(other_object_dir+proj_name+'_problem_index_lime_randompertubation.pkl','wb'))\n", "\n", "# time_spent_rand, problem_index_rand = create_pyExplainer_obj('lime', feature_df, test_label,'rulefit')\n", "# pickle.dump(time_spent_rand, open(other_object_dir+proj_name+'_train_time_lime.pkl','wb'))\n", "# pickle.dump(problem_index_rand, open(other_object_dir+proj_name+'_problem_index_lime.pkl','wb'))" ] }, { "cell_type": "code", "execution_count": 40, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "finished 1e6973aee7137653c62dcef970b1e2527b50517d\n", "finished 5d0ccceb20780fdd3adf519d3f8e6b80b1844407\n", "finished de31210c05f464c4a79255de68b1a515d9b84ed3\n", "finished 957533f685caf9ffc0d9cad569598455d59ade34\n", "finished 96677735f6d8f1f5b6380127956921f719aab799\n", "finished 10593c2eaf4eff4edb13b70f023acbbf743f129f\n", "finished 8db51da1e25f1c148fbe8d8f27d144a39d1850f3\n", "finished ba4ed39616c3c6fabf24ca390c7037877c672fba\n", "finished 2c62e344e26ccba08d5c613a7c4a40a0f076901e\n", "finished 207a7825fbcc69c4c3d4bfefd8e9c33978162ed9\n", "finished 03a41f863b160384593ef8df130f369a0c22d393\n", "finished f0ce71c23c786baf7c828d1fd147d70342593a10\n", "finished b199471154cb4be2d7d9c89939bb31e90f3adff4\n", "finished 608c67439c8fcfa1a2fddd1087ca1fe998a8f9fd\n", "finished 1e90eb66ae999201a0b3d2e7409734c3e2f452d7\n", "finished e8440d1ee8da2cbf4304bbbc0bf43ce78d7a6d1f\n", "finished a9230a38977d6acb804ee8bb6f58c19957dce013\n", "finished e3e4f4d9277c22654ce0dd9a1a0f44a67661e695\n", "finished b6133c35dd587f6b01e8ec12757347b2767713a6\n", "finished 262bdf68b45058fedffda614336d7e75a5b36d4a\n", "finished 9928edb42da42af3463d4a989a0797be46198908\n", "finished d2a5c0f982e8e4f9fbdce17b90925b8d8df56c75\n", "finished 5c2339440a499fb8b86e38893f245b9f02395016\n", "finished 0fb40ceb78c0b35e677b3f2305a6d2477bda0b43\n", "finished 106bf560e291b9b8a0690680170db575086e04b3\n", "finished a10fdea3f580bc1c9ef6ae29f4c2ff404d84464e\n", "finished 8e575be75c80ea71a6ad8fb73e6ace1ed708938f\n", "finished bdf6e2b5f853b0608dfb81b0d0489b469ead5fd2\n", "finished 811a23275da1d35b6ea7dce81ea3d6ac219f42b9\n", "finished 7523489e856e637dc6af23638568f4700980360f\n", "finished 17c4fd71038b0603bf4f0035e46fc0ff1bc517f7\n", "finished d72848c9afba40d21235a4e95cf8e69549290dca\n", "finished a1fe496e1113737d0b133a64078bc45c485dd3b2\n", "finished dc48ac1a9c02c236157347f715190a2b1107ec70\n", "finished fe7198a6ecf45edd8b5ca2030586c746f204fe3f\n", "finished d91bc54749fa4ffdda203db5ad88dc4f20c72eb8\n", "finished 1a2a692b79cefd1fc7e743e8e616292107afacdf\n", "finished 14dc194af688a18b254aea761beb586214c56224\n", "finished 0ebddd87d159072daa0ff36d0ed8af8e4b60fd66\n", "finished d79861560bafce63ff807b10cb435fd59e7becaa\n", "finished 8db14390d418561f7c372902a4a383dc9047603c\n", "finished 1b66a47b5f4cbcdf1a9a1ec38532474588a3fee2\n", "finished 4cdccd69a45aec19d547c10f29f61359b69ad6c1\n", "finished 34a208d1f3829173815beca81d07b53633a12989\n", "finished a2dca1ffc3e6ba81862c0eff625b620f3f175690\n", "finished d632b66dc8b701ca777af4335b6505b4c4cd7828\n", "finished fbbf79f6cadbb1d2b7ce615e9fdb9a0e0f114729\n", "finished f614417f11c4b936e517f0da449038f6a2ca35d4\n", "finished c70350bfa58ced11e7b346f9ad3ba85b0617e8f8\n", "finished 60ab0c8c564af989882f0ea2609019ea7206e2b3\n", "finished 16a9853fe3924ccaf987b2a1628f27bb9c03b950\n", "finished f66de80e38bcfbc29228ea19ffc96fbf34aad0ba\n", "finished 01a44568cc60bb5a6dd7b55d69b20bba57d1b94b\n", "finished acf0209b28e21eed60158967fab77468eb195e7c\n", "finished 010bd1f392e67a6fcd276593b8c79acfe41d1cc7\n", "finished cb0df591a9508e863ad5d5d71190eca349dc551f\n", "finished ca85d237e236f46881dc2c57a589a33e4605917d\n", "finished d4b024ad7d64a854072517eef47b059c93bdfdd3\n", "finished e68617c141c8a16a26bc4ade641aa4a5a809619d\n", "finished 607acb33be18f0e508329f3c8514061322d294a9\n", "finished c586d635387e9baa3c0857afb56d05137fcddd7c\n", "finished be23b19905a9e500fa4b14c43b9492f4b43e9828\n", "finished 4757ccfc8eb10bee756deb36c72d7a7ca37bdbff\n", "finished eb87f1fae8f13c7ab09c9fec56bbfa1fdfdf17fc\n", "finished f0a9b31d90532dd278c12572767c3874648dcaec\n", "finished 1a5c23a2810d4c887d1239eeab09dd9904892c8e\n", "finished 04ec96cdc95396ba2f35493a632b603d182ab6ae\n", "finished 6c4b89a2ad51f92707acc01908b63bc507ad8fe7\n", "finished 512055ec63fcb081cda01b9ef40c15a91f78d7ac\n", "finished a4b5502904b72f3503d70104c1d1296cf6f7d30e\n", "finished 18c3ac4a8935b9997dbb55181d5dbb5232ef2c27\n", "finished ae6b7642e8d32ef5fa75cdcfe55be23c052fd547\n", "finished bd5c3f5a0e3449a4a384090ae4dba75c4cdcf4da\n", "finished 8f3a54f0474e263aff886d671624faa1dc8d223c\n", "finished f4b78c7f17e29448ed54b136eeb4ac700b324120\n", "finished 6925bd7e00d5c39d20450bcf41e848435b6d9830\n", "finished 6d5fd4836475d0f3787b669cab5ef2d5266f474f\n", "finished 724493d21fdfcbb4c095b54975c0c1d612f0a856\n", "finished 604ec1c5786256a8e9390bbe0414df799370862b\n", "finished 1d9a0a620d78ab54f7a3da61b803a97cdbdd01f2\n", "finished c6d82083295e9b1b42f22d3a2d25a1ab7d341a13\n", "finished a52259ea2dda742d528723d58f20b3b225f1eac5\n", "finished 9411a24ba7b5a1380ec8a2aa13d4aa92988d2ff9\n", "finished 23bec4f7cb6e84a8d717c0b5ccc222f51c67f072\n", "finished 7b64653931628328bd5d70b0cdea8952e6c709f8\n", "finished 5c959bd66fa73f96c02a1c7632576578affbf146\n", "finished 30fa37e7776831d6f8022f52d3d92f62189fb702\n", "finished 432a47cbc03414ca7ee6565c9dbcc91d7bf675fb\n", "finished 0493c803ae9612f87ed028e1a39e880aead5bdcb\n", "finished 874e272a56d314a6fb1bd98e7ecd7f986f3b3898\n", "finished d4b7d5fd427d3c46bb61a45edd2623305e23b90b\n", "finished cc70e3a28df3c8492769b305d248bb4d9bf32830\n", "finished 51ecc8016ae5d79397f6c584634030187eedd342\n", "finished a25b2ac5f440f7ace4678b21ada6ebf5ce5dff3c\n", "finished 97a529ad8eaee80e196eb362c4e45901a96ae23c\n", "finished 1d62871fdd980ada24258d9694b50c5125280801\n", "finished 1763c80711993c55f4f13afe56f449b1dd6d3d3a\n", "finished 1957339df302e2da75e0dbe78b5d566194ab2c08\n", "finished aa198b92ac03f14e2da240064904b3a991b92834\n", "finished 9567c2b6a06aa1e8205f9f30beca63d77500dd1d\n", "finished a522d5ce0678bfcffed1972d830627278d778dec\n", "finished 14463cc3d37e1123a855259718e1c8d816baf61d\n", "finished 0a84a7fb24a4605f0da863407512612651890003\n", "finished 80a0a40e970202fa23b5a72f993c54c2264878f1\n", "finished d143540ad1b69ec93c2b7bfadd1f654c4d8c7a34\n", "finished b3c9cc504903eccbc68c441a81b0a727a83117fa\n", "finished a0cbbf8e1afe6e9d6bbe29f977b2726146615447\n", "finished 965542bfac90194bd032e5e6aeb6a507dcb11088\n", "finished 9e5a07c8e2f5d7fb3980f538846b53f0d7c62411\n", "finished f63d2f67ed1e7b8246b36bd08517a55a702a48a9\n", "finished f1b9ac5a542a3125d757094fccda80c80c6dd420\n", "finished b9ff8cd7ad8c402787324d2baca9b32f61eafb4a\n", "finished e8ab7ffb7274190105a3289cc1a3ed1d9ab5feb8\n", "finished b0306a93645ea6475a2bc045d8fc8bc4bd6f00a5\n", "finished b6a8aea4d1fe8be6073af57fad2ab6863d8f359c\n", "finished bff120a4775a1b1f3846a27c38d8eff4a678fd24\n", "finished 713e538237c7fea0b93ade343b1d9368bdbf2698\n", "finished 3fd1beb85c0888251781fe56b067b08c4957b94b\n", "finished 486e10d3740301b417e2a972ec7c797f562e75be\n", "finished 2f49ed4b5dbb5c954fc7a9b42ee7b170c38c775c\n", "finished 65aa92b0348b7ab8413f359b00825610cdf66607\n", "finished 17e556acf5c33583dcb6ed34bfd5a5fd9c148c9e\n", "finished 0eb7e35fbfe5de5f9f00ff1a0cce1d6a61f3a902\n", "finished d122b2e05a9e13d5caea3f8f6578bb473fbb9c5e\n", "finished 1679acd53d1f0c330edf583afe8b347a7304499c\n", "finished 5ad1af72c62dd18ecf38f604094be8fd2c7a5ef8\n", "finished 015555acb75ee4d9298915951d2bfaf0d19d2b02\n", "finished 048faa57037d30c46205dd3dedec45fea62ef2b1\n", "finished 10004672ad1476c55deaad53684a50358da6f656\n", "finished 46922068ac167f492dd303efb359d0c649d69118\n", "finished 11b5487eff312d4d914fbc2f861e18b031421dbe\n", "finished dd9536ac6e6df0e1ae6754e580b99cfbfd05eb77\n", "finished b3e05bdb51e2f238db655e75ff1384bd8a111fe0\n", "finished 5a8e67be491a2b4897c01aeccd584381f74bc294\n", "finished 254e37ab3c995f6514084d38f2f797da9cf5e5a9\n", "finished 848ef0043f60795db680afe8f67b633459eaf52c\n", "finished 10c5c93925abe3d34c4430e0ed852d8358fb2353\n", "finished 3046c4ae22b10f9e4fa83a47bfe089554d4a4681\n", "finished 25bb8fa99ec23ad5bc97a5c28b0426492bc843c6\n", "finished 65ee22f7e7e62f8a1fab5a4318f62733a9a11ebf\n", "finished 2e323ba7b154c6bdb76a4c018e99004dabcb8420\n", "finished 5f9831768461c34533ff93f32b31e095b57d82d1\n", "finished 4dbb55cbb0020702a3ea2ec89ddf61319d00f858\n", "finished a9661fba21d7e5dd3fb5d29f77266993e3adc460\n", "finished befa0b9184eb0c6248d06efa5b02be8217f1722e\n", "finished 73d4a7c1b4681accd993c1109a9eff837ff612f4\n", "finished 2702baed390d094b0eac07d0ae167ed236868d00\n", "finished ecdc5728ca849fd19d01c2a69cf07bf0989e8567\n", "finished a1b08606f2a73339b31ab49d172ae8793d76a55b\n", "finished f9b5dba7b0612222b20a1d63f494b85c63c0e47e\n", "finished 92f281e7a04f27c90cfccc11261880213718ae8b\n", "finished e1c33e2ec7f6a59495ee274bdf95e740bedc1e0f\n", "finished 13f9c5c372d052d630484afa44f6555fc4117326\n", "finished f87e7d964c19cc2be33226df66f0c823af993d49\n", "finished a9a59ccbacafd6eb94f57861cfc28f5a24f474db\n", "finished 3553e400046e50a30e2c5a7dba0c6ea48d9c816f\n", "finished c25c60f6a9ab1ccf12f72f76d400e7c9c0d090b3\n", "finished 1d6235cc3fb3c7806c2c4671f1c5b03022136008\n", "finished 609f01a1ddca64ec191cb15a4f6fb93d219c3336\n", "finished 683323f3593a9123a6e87d6b0f50bb0679f13e56\n", "finished dbacb52f420364cb85500a9c132207670afef4b5\n", "finished 791256cb671630ff70c941272df89717a7216eeb\n", "finished 2a4e50caeaa271937a23ab7c052c7e9c47c1933f\n", "finished 1724c5cee71d3b32466dd9d6cabd22a3339546b4\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "finished fc27c5e3df4b047f2da31e6217e36936b331093f\n", "finished 7255e056092f034daaeb4246a812900645d46911\n", "finished f7fcb809f097cb84e07e2d0093cc9af92c82d29d\n", "finished 4711ff3adff2b4c9a54f5fb4448ace6558f82f6b\n", "finished 9271e300a55f8aa16ff867fff71adadb383849b8\n", "finished 55b54ac9e6b9b83f837e1bf6eaed2be2cce5aab9\n", "finished f8ab9d4366c87724cba2e49e71dc72e2427d5a68\n", "finished bcd9f363ff8a10ff30795c54f471a8c6d76450fe\n", "finished 41e3a94ae18a9219ff4ab10389f3a7799c9b6493\n", "finished b7b0c7dbcd3e6754bc09b2fd75d888c41ae4aadb\n", "finished 901b303f1e836421c16f914d3d60b6c9caabefde\n", "finished 5a7808d676a844c5c1327bd59ec184e26e0546b1\n", "finished 2bfc7171c23d0595aa7f8680271778bc58cb28ba\n", "finished 132f13e5b095bdeaa0db72b151a3bf912ccfca36\n", "finished 4c1b1a893e82f045f5c6c7c9d7b019ddb01cb425\n", "finished e6c0f9c1685e4ffbd72ae75808361252652b7fc2\n", "finished c823016d31b36c10cb6faafcce68a1b98f4dc345\n", "finished e6a3206523b8e1a4edd8867a8e7e36a58631d90a\n", "finished a234ecda87f803b05637f3d74ba53815f20f472f\n", "finished 77821190873a415e2fef445474d997103c8d5651\n", "finished 749b5b98859685d72ebfc684f8066ea138f8b76b\n", "finished 0c33d1443447e76481fdcc19af1b000d60219d10\n", "finished 6bcd9402533e1115efa0e011d2b84feca2b48b19\n", "finished e132bc49c458cb37696d4933ffdf9c478a8bef2b\n", "finished b85e907e727240bacaac56374653b3e38bc33396\n", "finished 2390857d7ae625dcd18a72b2980f2d862b776623\n", "finished 237746a6d63c96726e7966770a602b62205ee333\n", "finished 4d53dc4c74e6bd47d63c5aa0749a5d65066c2baf\n", "finished 827cc5170546e88e5c4ab721f69b8501371e3948\n", "finished 9bc29208bda6071a34bcc0da36a396eb8bab4f30\n", "finished 1a40831f1711bf46c878dd247679bb05d19ffb5a\n", "finished 605749ca12af969ac122008b4fa14904df68caf7\n", "finished 85239cc81440d9e5a4aee3c0961c96a4197ad939\n", "finished f0f52a0085a1f5bbd23e6cfa8f0ea935e8fd56c8\n", "from total 198 commits, there are 0 problematic commits\n" ] } ], "source": [ "# time_spent_ci, problem_index_ci = create_pyExplainer_obj('crossoverinterpolation', feature_df, test_label)\n", "# pickle.dump(time_spent_ci, open(other_object_dir+proj_name+'_train_time_LRR_crossoverinterpolation.pkl','wb'))\n", "# pickle.dump(problem_index_ci, open(other_object_dir+proj_name+'_problem_index_LRR_crossoverinterpolation.pkl','wb'))\n", "\n", "time_spent_ci, problem_index_ci = create_pyExplainer_obj('CrossoverInterpolation', feature_df, test_label,'rulefit')\n", "pickle.dump(time_spent_ci, open(other_object_dir+proj_name+'_train_time_rulefit_crossoverinterpolation.pkl','wb'))\n", "pickle.dump(problem_index_ci, open(other_object_dir+proj_name+'_problem_index_rulefit_crossoverinterpolation.pkl','wb'))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Just for testing" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [], "source": [ "explainer = 'rulefit'\n", "pyExp_random = pickle.load(open(pyExp_dir+proj_name+'_'+explainer+'_randompertubation_3126.pkl','rb'))\n", "pyExp_crossover = pickle.load(open(pyExp_dir+proj_name+'_'+explainer+'_crossoverinterpolation_3126.pkl','rb'))" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "dict_keys(['synthetic_data', 'synthetic_predictions', 'X_explain', 'y_explain', 'indep', 'dep', 'local_model', 'top_k_positive_rules', 'top_k_negative_rules'])\n" ] } ], "source": [ "print(pyExp_random.keys())" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [], "source": [ "# euclid_dist = euclidean_distances(sample_instance.values.reshape(1,-1), synthetic_instances.values)[0]\n", "\n", "dist_rand = euclidean_distances(pyExp_random['X_explain'].values.reshape(1,-1), pyExp_random['synthetic_data'].values)[0]\n", "# plt.boxplot(dist)" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [], "source": [ "dist_cross = euclidean_distances(pyExp_crossover['X_explain'].values.reshape(1,-1), pyExp_crossover['synthetic_data'].values)[0]\n", "# plt.boxplot(dist)" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'whiskers': [,\n", " ,\n", " ,\n", " ],\n", " 'caps': [,\n", " ,\n", " ,\n", " ],\n", " 'boxes': [,\n", " ],\n", " 'medians': [,\n", " ],\n", " 'fliers': [,\n", " ],\n", " 'means': []}" ] }, "execution_count": 34, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAD4CAYAAAAAczaOAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAAVIElEQVR4nO3dfWxd9X3H8c8ncew8AQuJGyV2SFiVVgmBUdViFe2qZtUWClOSCnUyQi1aIlIq4lFtEhTyB2xSJOgaKmFtREFYBal1xlRIkSBraBVRRaJNnYLSPMDmlECeBCYJYGxsx853f/g4uw4mD76+9zj3935JV/fe7z3nnq8l6+Pj3/mdcxwRAgCkYULeDQAAyofQB4CEEPoAkBBCHwASQugDQEKq8m7gfGbNmhULFizIuw0AuKTs2rXrvYioPbs+7kN/wYIFamtry7sNALik2H5rpDrDOwCQEEIfABJC6ANAQgh9AEgIoQ8ACSH0AeSqtbVVS5Ys0cSJE7VkyRK1trbm3VJFG/dTNgFUrtbWVq1bt05PPvmkvvKVr2jHjh1avXq1JOm2227LubvK5PF+aeWGhoZgnj5QmZYsWaLm5mYtXbr0TG379u1qamrSnj17cuzs0md7V0Q0fKJO6APIy8SJE9XT06NJkyadqZ06dUqTJ0/WwMBAjp1d+j4t9BnTB5CbRYsWaceOHcNqO3bs0KJFi3LqqPIR+gBys27dOq1evVrbt2/XqVOntH37dq1evVrr1q3Lu7WKxYFcALkZOljb1NSk/fv3a9GiRVq/fj0HcUuIMX0AqECM6QMACH0ASAmhDyBXnJFbXhzIBZAbzsgtPw7kAsjNkiVLtHLlSm3ZsuXM7J2h95yRW5xPO5DLnj6A3Ozbt09dXV1qaWk5s6e/atUqvfXWiHf6wxhgTB9Abqqrq9XU1KSlS5dq0qRJWrp0qZqamlRdXZ13axWL4R0AuZkwYYJmzZqladOm6a233tL8+fPV1dWl9957T6dPn867vUsa8/QBjDt1dXXq6+uTJNmWJPX19amuri7PtioaoQ8gV1OnTlVLS4t6enrU0tKiqVOn5t1SRSP0AeTm6NGjeuSRR9TU1KTJkyerqalJjzzyiI4ePZp3axWL0AeQm0WLFunZZ59Ve3u7Tp8+rfb2dj377LNcWrmEzhv6tltsv2t7T0HtIdtHbL+WPW4u+Ox+2+2237C9rKD+Rdt/zD57zEMDeACSVVdXpy1btmjVqlV6//33tWrVKm3ZsoUx/RI67+wd21+V9JGkpyNiSVZ7SNJHEfGjs5ZdLKlV0g2S5kr6laTPRcSA7Z2S7pH0W0kvSnosIraer0Fm7wCVa/LkyWpoaFBbW5t6e3tVU1Nz5n1PT0/e7V3SRj17JyJ+I+nEBW5nhaTNEdEbEW9Kapd0g+05ki6PiFdi8K/M05JWXnD3ACpSb2+vXn/9dc2ZM0cTJkzQnDlz9Prrr6u3tzfv1ipWMWP6a23vzoZ/ZmS1OkmHCpY5nNXqstdn10dke43tNtttHR0dRbQIYLzr6+sbNntnaAonSmO0of+4pM9Kul7SMUkbsvpI4/RxjvqIImJTRDRERENtbe0oWwRwKeju7tarr76qU6dO6dVXX1V3d3feLVW0UYV+RLwTEQMRcVrSExocw5cG9+DnFSxaL+loVq8foQ4gcbfccoseeOABTZs2TQ888IBuueWWvFuqaKMK/WyMfsg3JQ3N7HleUqPtGttXS1ooaWdEHJPUaftL2ayd70j6RRF9A6gA9fX12rlzp7Zu3aq+vj5t3bpVO3fuVH19/flXxqic9yqbtlslfU3SLNuHJT0o6Wu2r9fgEM1BSd+VpIjYa/sZSfsk9Uu6OyIGsq/6nqSfSJoiaWv2AJCwH/7wh7rnnnvOXFlz/vz5GhgY0KOPPpp3axWLC64ByFVTU5OeeOKJM1M277zzTjU3N+fd1iWPC64BGHdaW1v1wgsvDBveeeGFF7hlYgmxpw8gN9w5q3S4cxaAcWffvn3q7u7+xD1yDx48mHdrFYvQB5Cb6upq3XjjjWpqajqzp3/jjTdylc0SYkwfQG76+vq0efNmrVq1Sp2dnVq1apU2b97MWbklROgDyE11dbUaGxvV0tKiyy67TC0tLWpsbOQeuSVE6APITV9fn7Zt26auri5FhLq6urRt2zb29EuIMX0Auamrq9Px48f1/vvvKyJ05MgRVVVVcT39EmJPH0Buuru71dfXp4cfflhdXV16+OGH1dfXx0XXSojQB5CbEydO6N577x02pn/vvffqxIkLvYUHLhahDyBXS5cu1Z49ezQwMKA9e/Zo6dKlebdU0RjTB5Cb+vp6rVixQv39/Tp16pQmTZqkqqoqrrJZQuzpA8jN4sWL9fHHH2v69OmSpOnTp+vjjz/W4sWLc+6schH6AHLz8ssv6/bbb9fcuXM1YcIEzZ07V7fffrtefvnlvFurWFxwDUBubKurq0tTp049U+vu7ta0adM03rNpvOPSygDGnZqaGm3cuHFYbePGjaqpqcmpo8rHgVwAubnzzjt13333SZLuuusubdy4Uffdd5/uuuuunDurXAzvAMjVVVddpUOHDp15P2/ePL399ts5dlQZGN4BMO4sW7ZsWOBL0qFDh7Rs2bKcOqp8hD6A3Gzbtk2StHz5cnV0dGj58uXD6hh7hD6AXF133XU6cOCAZs+erQMHDui6667Lu6WKdt7Qt91i+13bewpq/2b7ddu7bT9n+8+y+gLbH9t+LXtsLFjni7b/aLvd9mO2XZKfCMAl5cCBA2publZPT4+am5t14MCBvFuqaBeyp/8TSTedVXtJ0pKIuE7S/0i6v+CzAxFxffYoPAT/uKQ1khZmj7O/E0CCurq6dOutt6q6ulq33nqrurq68m6pop039CPiN5JOnFXbFhH92dvfSjrnhTJsz5F0eUS8EoPThZ6WtHJUHQOoOCdPnhz2jNIZizH9VZK2Fry/2vartl+2/VdZrU7S4YJlDme1EdleY7vNdltHR8cYtAhgPKqpqdEVV1wxrHbFFVdwclYJFRX6ttdJ6pf006x0TNJVEfEFSf8k6We2L5c00vj9p54gEBGbIqIhIhpqa2uLaRHAONbb26vOzk7Nnj1bkjR79mx1dnaqt7c3584q16jPyLV9h6S/k/T1bMhGEdErqTd7vcv2AUmf0+CefeEQUL2ko6PdNoDKUFVVpZqaGk2ZMkUTJkzQlClTNGXKFEK/hEa1p2/7Jkn3SVoeEd0F9VrbE7PXf67BA7Z/iohjkjptfymbtfMdSb8ounsAl7T+/n5VVQ3uew5dHaCqqkr9/f3nWg1FOO+evu1WSV+TNMv2YUkPanC2To2kl7KZl7/NZup8VdK/2u6XNCDprogYOgj8PQ3OBJqiwWMAhccBACSqu7tbH3zwgSTp4MGDmjRpUs4dVTauvQMgNxMmTFBEaMaMGTp58uSZZ9s6ffp03u1d0rj2DoBxZ2ins7Ozc9jzeN8ZvZQR+gByNXXqVNXX18u26uvrh91QBWOP6+kDyFVfX5+OHDmiiDjzjNIh9AHkqnCmzqlTp3LsJA0M7wBAQgh9AEgIoQ8gdxMnThz2jNIh9AHkbujgLQdxS4/QB5C7oROxOCGr9Ah9AEgIoQ8gd9OnTx/2jNIh9AHk7qOPPhr2jNIh9AEgIYQ+ACSE0AeAhBD6AHI1c+ZMZTdjkm3NnDkz544qG6EPIFfHjx8fFvrHjx/PuaPKRugDQEIIfQC544zc8iH0ASAhhD4AJOS8oW+7xfa7tvcU1K60/ZLt/82eZxR8dr/tdttv2F5WUP+i7T9mnz3moSM3AICyuZA9/Z9Iuums2g8k/ToiFkr6dfZethdLapR0TbbOf9geukD245LWSFqYPc7+TgBAiZ039CPiN5JOnFVeIemp7PVTklYW1DdHRG9EvCmpXdINtudIujwiXonBC2Y/XbAOAKBMRjumPzsijklS9vyZrF4n6VDBcoezWl32+uz6iGyvsd1mu62jo2OULQIAzjbWB3JHGqePc9RHFBGbIqIhIhpqa2vHrDkASN1oQ/+dbMhG2fO7Wf2wpHkFy9VLOprV60eoAwDKaLSh/7ykO7LXd0j6RUG90XaN7as1eMB2ZzYE1Gn7S9msne8UrAMAKJOq8y1gu1XS1yTNsn1Y0oOSHpb0jO3Vkt6W9C1Jioi9tp+RtE9Sv6S7I2Ig+6rvaXAm0BRJW7MHAKCMPN7vPt/Q0BBtbW15twGgBM51us54z6bxzvauiGg4u84ZuQCQEEIfABJC6ANAQgh9AEjIeWfvAMBYutBrLRYux0HdsUPoAyirwgBn9k75MbwDIDfXXnvtRdVRPEIfQG527979iYC/9tprtXv37pw6qnwM7wDI1VDA22ZIpwzY0weAhBD6AJAQQh8AEkLoA0BCCH0ASAihDwAJIfQBICGEPgAkhNAHgIQQ+gCQEEIfABJC6ANAQkYd+rY/b/u1gseHtr9v+yHbRwrqNxesc7/tdttv2F42Nj8CAOBCjfoqmxHxhqTrJcn2RElHJD0n6R8k/TgiflS4vO3FkholXSNprqRf2f5cRAyMtgcAwMUZq+Gdr0s6EBFvnWOZFZI2R0RvRLwpqV3SDWO0fQDABRir0G+U1Frwfq3t3bZbbM/IanWSDhUsczirfYLtNbbbbLd1dHSMUYsAgKJD33a1pOWS/isrPS7psxoc+jkmacPQoiOsPuIdEyJiU0Q0RERDbW1tsS0CADJjsaf/DUl/iIh3JCki3omIgYg4LekJ/f8QzmFJ8wrWq5d0dAy2DwC4QGMR+repYGjH9pyCz74paU/2+nlJjbZrbF8taaGknWOwfQDABSrqHrm2p0r6G0nfLSj/0Pb1Ghy6OTj0WUTstf2MpH2S+iXdzcwdACivokI/IrolzTyr9u1zLL9e0vpitgkAGD3OyAWAhBD6AJAQQh8AEkLoA0BCCH0ASAihDwAJIfQBICGEPgAkhNAHgIQQ+gCQEEIfABJC6ANAQgh9AEgIoQ8ACSH0ASAhhD4AJITQT8DMmTNl+8xj5syZ518JQEUi9CvczJkzdeLEiWG1EydOEPxAogj9CjcU+Bs2bFBXV5c2bNgwrA4gLY6IvHs4p4aGhmhra8u7jUuK7VGtN95/F1DZbPM7OIZs74qIhrPrRd0YHTl76IoRy/Hg5WP6fXrog9F9H4Bxp6jQt31QUqekAUn9EdFg+0pJ/ylpgaSDkv4+Ik5my98vaXW2/D9GxC+L2X7yLiCMh/b6r7nmGr344ou6+eabtXfvXkns2aN0rrzySp08efKi17uY/1JnzJjBMOUojMWY/tKIuL7g34gfSPp1RCyU9OvsvWwvltQo6RpJN0n6D9sTx2D7OIe1a9dKkvbu3av58+efCfyhOlAKJ0+eVESU9DGaPyoozYHcFZKeyl4/JWllQX1zRPRGxJuS2iXdUILto0Bzc7PWrl2rmpoaSVJNTY3Wrl2r5ubmnDsDkIdiQz8kbbO9y/aarDY7Io5JUvb8maxeJ+lQwbqHs9on2F5ju812W0dHR5Etorm5WT09PYoI9fT0EPhAwoo9kPvliDhq+zOSXrL9+jmWHWmwbsRB5YjYJGmTNDh7p8geAQCZovb0I+Jo9vyupOc0OFzzju05kpQ9v5stfljSvILV6yUdLWb7AICLM+rQtz3N9mVDryX9raQ9kp6XdEe22B2SfpG9fl5So+0a21dLWihp52i3DwC4eMUM78yW9Fw2xapK0s8i4r9t/17SM7ZXS3pb0rckKSL22n5G0j5J/ZLujoiBoroHAFyUUYd+RPxJ0l+MUD8u6eufss56SetHu00AQHG49g4AJITQB4CEEPoAkBBCHwASQugDQEIIfQBICKEPAAkh9AEgIYQ+ACSE0AeAhBD6AJAQbowOYMzFg5dLD11R+m3gohH6AMac/+VDRZT2/ke2FQ+VdBMVieEdAEgIoQ8ACSH0ASAhhD4AJITQB4CEEPoAkBBCHwASQugDQEJGHfq259nebnu/7b2278nqD9k+Yvu17HFzwTr32263/YbtZWPxAwAALlwxZ+T2S/rniPiD7csk7bL9UvbZjyPiR4UL214sqVHSNZLmSvqV7c9FxEARPQAYp2yX9PtnzJhR0u+vVKMO/Yg4JulY9rrT9n5JdedYZYWkzRHRK+lN2+2SbpD0ymh7ADA+jeYSDLZLfukGjNGYvu0Fkr4g6XdZaa3t3bZbbA/9Oa6TdKhgtcP6lD8SttfYbrPd1tHRMRYtAgA0BqFve7qkn0v6fkR8KOlxSZ+VdL0G/xPYMLToCKuP+Gc9IjZFRENENNTW1hbbIgAgU1To256kwcD/aUQ8K0kR8U5EDETEaUlPaHAIRxrcs59XsHq9pKPFbB8AcHGKmb1jSU9K2h8RjxbU5xQs9k1Je7LXz0tqtF1j+2pJCyXtHO32AQAXr5jZO1+W9G1Jf7T9WlZ7QNJttq/X4NDNQUnflaSI2Gv7GUn7NDjz525m7gBAeRUze2eHRh6nf/Ec66yXtH602wQAFIczcgEgIYQ+ACSE0AeAhBD6AJAQQh8AEkLoA0BCCH0ASAihDwAJIfQBICGEPgAkhNAHgIQQ+gCQEEIfABJC6ANAQgh9AEgIoQ8ACSH0ASAhhD4AJITQB4CEEPoAkJCyh77tm2y/Ybvd9g/KvX0ASFlZQ9/2REn/LukbkhZLus324nL2AAApK/ee/g2S2iPiTxHRJ2mzpBVl7gFAjmyP+DjfZxgbVWXeXp2kQwXvD0v6yzL3ACBHEZF3C0kr957+SH+yP/EbYHuN7TbbbR0dHWVoCwDSUO7QPyxpXsH7eklHz14oIjZFRENENNTW1patOQCodOUO/d9LWmj7atvVkholPV/mHgAgWWUd04+IfttrJf1S0kRJLRGxt5w9AEDKyn0gVxHxoqQXy71dAABn5AJAUgh9AEgIoQ8ACfF4P1HCdoekt/Luo0LMkvRe3k0An4Lfz7E1PyI+Med93Ic+xo7ttohoyLsPYCT8fpYHwzsAkBBCHwASQuinZVPeDQDnwO9nGTCmDwAJYU8fABJC6ANAQgj9BNhusf2u7T159wIUsj3P9nbb+23vtX1P3j1VOsb0E2D7q5I+kvR0RCzJux9giO05kuZExB9sXyZpl6SVEbEv59YqFnv6CYiI30g6kXcfwNki4lhE/CF73SlpvwZvq4oSIfQBjAu2F0j6gqTf5dxKRSP0AeTO9nRJP5f0/Yj4MO9+KhmhDyBXtidpMPB/GhHP5t1PpSP0AeTGtiU9KWl/RDyadz8pIPQTYLtV0iuSPm/7sO3VefcEZL4s6duS/tr2a9nj5rybqmRM2QSAhLCnDwAJIfQBICGEPgAkhNAHgIQQ+gCQEEIfABJC6ANAQv4PXoQXXpe2e9UAAAAASUVORK5CYII=\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "data = [dist_rand, dist_cross]\n", "plt.boxplot(data)" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "# display(feature_df)\n", "# for c in feature_df:\n", "# print(c)\n", "\n", "# for k in range(0,1):\n", "# print(k)" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "scrolled": true }, "outputs": [], "source": [ "# test feature binarizer\n", "# from pyexplainer.features import *\n", "# fb = FeatureBinarizer(negations=True)\n", "# fb.fit(x_train)\n", "# display(fb.transform(feature_df))" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "scrolled": true }, "outputs": [], "source": [ "# search_function='randompertubation'\n", "# i = 3\n", "\n", "# X_explain = feature_df.iloc[[i]]\n", "# y_explain = test_label.iloc[[i]]\n", "\n", "# row_index = str(X_explain.index[0])\n", "\n", "# start = time.time()\n", "\n", "# pyExp_obj = pyExp.explain(X_explain,\n", "# y_explain,\n", "# search_function = search_function, \n", "# top_k = 1000,\n", "# max_rules=2000, \n", "# max_iter =None, \n", "# cv=5,\n", "# explainer='rulefit',\n", "# debug = False)" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "dict_keys(['synthetic_data', 'synthetic_predictions', 'X_explain', 'y_explain', 'indep', 'dep', 'local_model', 'top_k_positive_rules', 'top_k_negative_rules'])\n" ] } ], "source": [ "print(pyExp_obj.keys())" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [], "source": [ "# display(pyExp_obj['synthetic_data_fb'])" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[False]\n" ] } ], "source": [ "# local_model = pyExp_obj['local_model']\n", "# print(local_model.predict(X_explain.values))\n", "# print('------------------Explanation from local model-------------------------')\n", "# print(local_model.explain())" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "# display(pyExp_obj['X_explain'])" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "# print(local_model.predict(pyExp_obj['X_explain_fb']))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### sample code for RQ1" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [], "source": [ "# synthetic_instances = pyExp_obj['synthetic_data']\n", "# sample_instance = pyExp_obj['X_explain']" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [], "source": [ "# def agg_list(val):\n", "# return np.mean(val), np.median(val), np.max(val)\n", "\n", "# cos_sim = cosine_similarity(sample_instance.values.reshape(1,-1), synthetic_instances.values)[0]\n", "# euclid_dist = euclidean_distances(sample_instance.values.reshape(1,-1), synthetic_instances.values)[0]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### sample code for RQ2" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[False] [0.06562197]\n" ] } ], "source": [ "# '''get prediction from global model then compare with result obtained from model (but how to compare??)'''\n", "\n", "# local_model = pyExp_obj['local_model']\n", "# '''In case pyExp uses logistic rule regression'''\n", "# fb_sample_instance = pyExp.feature_binarizer.transform(sample_instance)\n", "# # print(pyExp.feature_binarizer.transform(sample_instance))\n", "# local_pred = local_model.predict(fb_sample_instance)\n", "# local_pred_prob = local_model.predict_proba(fb_sample_instance)\n", "\n", "# '''In case pyExp uses RuleFit'''\n", "# # local_pred = local_model.predict(sample_instance)\n", "# # local_pred_prob = local_model.predict_proba(sample_instance)\n", "\n", "# print(local_pred, local_pred_prob)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "------------------Prediction from local model-------------------------\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/home/oathaha/.conda/envs/env_oat/lib/python3.9/site-packages/sklearn/linear_model/_sag.py:328: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge\n", " warnings.warn(\"The max_iter was reached which means \"\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " rule coefficient\n", "0 (intercept) 0.926494\n", "1 nrev <= 1.00 -11.265911\n", "2 nd <= 1.00 -3.340535\n", "----------------------------------------------------------------------------------------------------\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/home/oathaha/.conda/envs/env_oat/lib/python3.9/site-packages/sklearn/linear_model/_sag.py:328: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge\n", " warnings.warn(\"The max_iter was reached which means \"\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " rule coefficient\n", "0 (intercept) -4.822969\n", "1 nrev <= 1.00 -21.790036\n", "2 ent <= 0.90 11.04842\n", "3 asawr <= 0.16 2.480303\n", "4 nuc <= 3.00 2.480303\n", "----------------------------------------------------------------------------------------------------\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/home/oathaha/.conda/envs/env_oat/lib/python3.9/site-packages/sklearn/linear_model/_sag.py:328: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge\n", " warnings.warn(\"The max_iter was reached which means \"\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " rule coefficient\n", "0 (intercept) -9.127939\n", "1 nrev <= 2.00 12.185013\n", "2 nrev <= 1.00 -7.495421\n", "3 ent <= 0.63 -7.495421\n", "----------------------------------------------------------------------------------------------------\n", "----------------------------------------------------------------------------------------------------\n", "there is only 1 class in the generated instances\n", "----------------------------------------------------------------------------------------------------\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/home/oathaha/.conda/envs/env_oat/lib/python3.9/site-packages/sklearn/linear_model/_sag.py:328: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge\n", " warnings.warn(\"The max_iter was reached which means \"\n", "/home/oathaha/.conda/envs/env_oat/lib/python3.9/site-packages/sklearn/linear_model/_sag.py:328: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge\n", " warnings.warn(\"The max_iter was reached which means \"\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " rule coefficient\n", "0 (intercept) 0.721183\n", "1 nrev <= 1.00 -8.482928\n", "2 asawr <= 0.05 -7.399822\n", "----------------------------------------------------------------------------------------------------\n", " rule coefficient\n", "0 (intercept) -3.748032\n", "----------------------------------------------------------------------------------------------------\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/home/oathaha/.conda/envs/env_oat/lib/python3.9/site-packages/sklearn/linear_model/_sag.py:328: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge\n", " warnings.warn(\"The max_iter was reached which means \"\n", "/home/oathaha/.conda/envs/env_oat/lib/python3.9/site-packages/sklearn/linear_model/_sag.py:328: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge\n", " warnings.warn(\"The max_iter was reached which means \"\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " rule coefficient\n", "0 (intercept) -9.149672\n", "1 nrev <= 2.00 14.773528\n", "2 nrev <= 1.00 -9.91744\n", "3 ent <= 0.00 -8.247625\n", "----------------------------------------------------------------------------------------------------\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/home/oathaha/.conda/envs/env_oat/lib/python3.9/site-packages/sklearn/linear_model/_sag.py:328: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge\n", " warnings.warn(\"The max_iter was reached which means \"\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " rule coefficient\n", "0 (intercept) -10.434791\n", "1 nrev <= 1.00 -15.862358\n", "2 asawr <= 0.09 15.725896\n", "----------------------------------------------------------------------------------------------------\n", " rule coefficient\n", "0 (intercept) -2.933625\n", "----------------------------------------------------------------------------------------------------\n", " rule coefficient\n", "0 (intercept) 1.111113\n", "1 ld <= 0.00 -11.000013\n", "2 nrev <= 1.00 -2.505401\n", "3 asawr <= 0.00 -0.249322\n", "4 asawr <= 0.00 -0.249322\n", "----------------------------------------------------------------------------------------------------\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/home/oathaha/.conda/envs/env_oat/lib/python3.9/site-packages/sklearn/linear_model/_sag.py:328: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge\n", " warnings.warn(\"The max_iter was reached which means \"\n", "/home/oathaha/.conda/envs/env_oat/lib/python3.9/site-packages/sklearn/linear_model/_sag.py:328: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge\n", " warnings.warn(\"The max_iter was reached which means \"\n" ] } ], "source": [ "# test rulefit\n", "search_function='randompertubation'\n", "print('------------------Prediction from local model-------------------------')\n", "for i in [3,5,7,20,50,100,83,25,163,127]:\n", " X_explain = feature_df.iloc[[i]]\n", " y_explain = test_label.iloc[[i]]\n", "\n", " row_index = str(X_explain.index[0])\n", "\n", " start = time.time()\n", "\n", " try:\n", " pyExp_obj = pyExp.explain(X_explain,\n", " y_explain,\n", " search_function = search_function, \n", " top_k = 1000,\n", " max_rules=2000, \n", " max_iter =None, \n", " cv=5,\n", " explainer='LRR',\n", " debug = False)\n", " end = time.time()\n", "# print('time spent to train LRR:',str(end-start),'secs')\n", " \n", " local_model = pyExp_obj['local_model']\n", " print(local_model.explain())\n", " \n", "# print(global_model.predict_proba(X_explain)[:,1], local_model.predict_proba(pyExp.scaler.transform(X_explain.values))[:,1])\n", "# print(local_model.explain())\n", " print('-'*100)\n", " except:\n", " print('-'*100)\n", " print('there is only 1 class in the generated instances')\n", " print('-'*100)\n" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "------------------Prediction from local model-------------------------\n", "[0.84] [0.96695341]\n", "----------------------------------------------------------------------------------------------------\n", "[0.73] [0.49372837]\n", "----------------------------------------------------------------------------------------------------\n", "[0.83] [0.99548978]\n", "----------------------------------------------------------------------------------------------------\n", "[0.67] [0.93530316]\n", "----------------------------------------------------------------------------------------------------\n", "[0.6] [0.94639136]\n", "----------------------------------------------------------------------------------------------------\n", "[0.66] [0.94685406]\n", "----------------------------------------------------------------------------------------------------\n", "[0.62] [0.77372941]\n", "----------------------------------------------------------------------------------------------------\n", "[0.75] [0.91593334]\n", "----------------------------------------------------------------------------------------------------\n", "[0.7] [0.66951567]\n", "----------------------------------------------------------------------------------------------------\n", "[0.58] [0.82852018]\n", "----------------------------------------------------------------------------------------------------\n" ] } ], "source": [ "# test rulefit\n", "search_function='crossoverinterpolation'\n", "print('------------------Prediction from local model-------------------------')\n", "for i in [3,5,7,20,50,100,83,25,163,127]:\n", " X_explain = feature_df.iloc[[i]]\n", " y_explain = test_label.iloc[[i]]\n", "\n", " row_index = str(X_explain.index[0])\n", "\n", " start = time.time()\n", "\n", " start = time.time()\n", " try:\n", " pyExp_obj = pyExp.explain(X_explain,\n", " y_explain,\n", " search_function = search_function, \n", " top_k = 1000,\n", " max_rules=2000, \n", " max_iter =None, \n", " cv=5,\n", " explainer='rulefit',\n", " debug = False)\n", " end = time.time()\n", "# print('time spent to train LRR:',str(end-start),'secs')\n", " \n", " local_model = pyExp_obj['local_model']\n", " \n", " print(global_model.predict_proba(X_explain)[:,1], local_model.predict_proba(X_explain.values)[:,1])\n", "# print(local_model.explain())\n", " print('-'*100)\n", " except:\n", " print('-'*100)\n", " print('there is only 1 class in the generated instances')\n", " print('-'*100)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "import warnings\n", "warnings.simplefilter(\"ignore\")" ] }, { "cell_type": "code", "execution_count": 30, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "time spent to train LRR: 1.4275202751159668 secs\n", "----------------------------------------------------------------------------------------------------\n", "time spent to train LRR: 1.738269329071045 secs\n", "----------------------------------------------------------------------------------------------------\n", "time spent to train LRR: 1.0873517990112305 secs\n", "----------------------------------------------------------------------------------------------------\n", "time spent to train LRR: 0.9002327919006348 secs\n", "----------------------------------------------------------------------------------------------------\n", "time spent to train LRR: 1.3496229648590088 secs\n", "----------------------------------------------------------------------------------------------------\n" ] }, { "ename": "IndexError", "evalue": "positional indexers are out-of-bounds", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mIndexError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m~/.conda/envs/env_oat/lib/python3.9/site-packages/pandas/core/indexing.py\u001b[0m in \u001b[0;36m_get_list_axis\u001b[0;34m(self, key, axis)\u001b[0m\n\u001b[1;32m 1473\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1474\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mobj\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_take_with_is_copy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0maxis\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1475\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mIndexError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0merr\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/.conda/envs/env_oat/lib/python3.9/site-packages/pandas/core/generic.py\u001b[0m in \u001b[0;36m_take_with_is_copy\u001b[0;34m(self, indices, axis)\u001b[0m\n\u001b[1;32m 3599\u001b[0m \"\"\"\n\u001b[0;32m-> 3600\u001b[0;31m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtake\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mindices\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mindices\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0maxis\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3601\u001b[0m \u001b[0;31m# Maybe set copy if we didn't actually change the index.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/.conda/envs/env_oat/lib/python3.9/site-packages/pandas/core/generic.py\u001b[0m in \u001b[0;36mtake\u001b[0;34m(self, indices, axis, is_copy, **kwargs)\u001b[0m\n\u001b[1;32m 3585\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3586\u001b[0;31m new_data = self._mgr.take(\n\u001b[0m\u001b[1;32m 3587\u001b[0m \u001b[0mindices\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_get_block_manager_axis\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0maxis\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mverify\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/.conda/envs/env_oat/lib/python3.9/site-packages/pandas/core/internals/managers.py\u001b[0m in \u001b[0;36mtake\u001b[0;34m(self, indexer, axis, verify, convert)\u001b[0m\n\u001b[1;32m 1466\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mconvert\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1467\u001b[0;31m \u001b[0mindexer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmaybe_convert_indices\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mindexer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mn\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1468\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/.conda/envs/env_oat/lib/python3.9/site-packages/pandas/core/indexers.py\u001b[0m in \u001b[0;36mmaybe_convert_indices\u001b[0;34m(indices, n)\u001b[0m\n\u001b[1;32m 264\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mmask\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0many\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 265\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mIndexError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"indices are out-of-bounds\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 266\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mindices\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mIndexError\u001b[0m: indices are out-of-bounds", "\nThe above exception was the direct cause of the following exception:\n", "\u001b[0;31mIndexError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m7\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m20\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m50\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m100\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m83\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m25\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m163\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m127\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 6\u001b[0;31m \u001b[0mX_explain\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfeature_df\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0miloc\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 7\u001b[0m \u001b[0my_explain\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtest_label\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0miloc\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/.conda/envs/env_oat/lib/python3.9/site-packages/pandas/core/indexing.py\u001b[0m in \u001b[0;36m__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 893\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 894\u001b[0m \u001b[0mmaybe_callable\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcom\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply_if_callable\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mobj\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 895\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_getitem_axis\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmaybe_callable\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0maxis\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 896\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 897\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_is_scalar_access\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mTuple\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/.conda/envs/env_oat/lib/python3.9/site-packages/pandas/core/indexing.py\u001b[0m in \u001b[0;36m_getitem_axis\u001b[0;34m(self, key, axis)\u001b[0m\n\u001b[1;32m 1490\u001b[0m \u001b[0;31m# a list of integers\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1491\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0mis_list_like_indexer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1492\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_get_list_axis\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0maxis\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1493\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1494\u001b[0m \u001b[0;31m# a single integer\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/.conda/envs/env_oat/lib/python3.9/site-packages/pandas/core/indexing.py\u001b[0m in \u001b[0;36m_get_list_axis\u001b[0;34m(self, key, axis)\u001b[0m\n\u001b[1;32m 1475\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mIndexError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0merr\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1476\u001b[0m \u001b[0;31m# re-raise with different error message\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1477\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mIndexError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"positional indexers are out-of-bounds\"\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0merr\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1478\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1479\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_getitem_axis\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mint\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mIndexError\u001b[0m: positional indexers are out-of-bounds" ] } ], "source": [ "# sample LRR when used with random perturbation\n", "\n", "search_function='randompertubation'\n", "\n", "for i in [3,5,7,20,50,100,83,25,163,127]:\n", " X_explain = feature_df.iloc[[i]]\n", " y_explain = test_label.iloc[[i]]\n", "\n", " row_index = str(X_explain.index[0])\n", "\n", " start = time.time()\n", "\n", " start = time.time()\n", " try:\n", " pyExp_obj = pyExp.explain(X_explain,\n", " y_explain,\n", " search_function = search_function, \n", " top_k = 1000,\n", " max_rules=2000, \n", " max_iter =None, \n", " cv=5,\n", " explainer='LRR',\n", " debug = False)\n", " end = time.time()\n", " print('time spent to train LRR:',str(end-start),'secs')\n", "\n", "# local_model = pyExp_obj['local_model']\n", "# print('------------------Explanation from local model-------------------------')\n", "# print(local_model.explain())\n", " print('-'*100)\n", " except:\n", " print('-'*100)\n", " print('there is only 1 class in the generated instances')\n", " print('-'*100)\n", " " ] }, { "cell_type": "code", "execution_count": 31, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "time spent to train LRR: 28.87514853477478 secs\n", "----------------------------------------------------------------------------------------------------\n", "time spent to train LRR: 26.213839769363403 secs\n", "----------------------------------------------------------------------------------------------------\n", "time spent to train LRR: 25.021041870117188 secs\n", "----------------------------------------------------------------------------------------------------\n", "time spent to train LRR: 29.421844244003296 secs\n", "----------------------------------------------------------------------------------------------------\n", "time spent to train LRR: 26.97343945503235 secs\n", "----------------------------------------------------------------------------------------------------\n" ] }, { "ename": "IndexError", "evalue": "positional indexers are out-of-bounds", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mIndexError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m~/.conda/envs/env_oat/lib/python3.9/site-packages/pandas/core/indexing.py\u001b[0m in \u001b[0;36m_get_list_axis\u001b[0;34m(self, key, axis)\u001b[0m\n\u001b[1;32m 1473\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1474\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mobj\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_take_with_is_copy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0maxis\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1475\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mIndexError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0merr\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/.conda/envs/env_oat/lib/python3.9/site-packages/pandas/core/generic.py\u001b[0m in \u001b[0;36m_take_with_is_copy\u001b[0;34m(self, indices, axis)\u001b[0m\n\u001b[1;32m 3599\u001b[0m \"\"\"\n\u001b[0;32m-> 3600\u001b[0;31m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtake\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mindices\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mindices\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0maxis\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3601\u001b[0m \u001b[0;31m# Maybe set copy if we didn't actually change the index.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/.conda/envs/env_oat/lib/python3.9/site-packages/pandas/core/generic.py\u001b[0m in \u001b[0;36mtake\u001b[0;34m(self, indices, axis, is_copy, **kwargs)\u001b[0m\n\u001b[1;32m 3585\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3586\u001b[0;31m new_data = self._mgr.take(\n\u001b[0m\u001b[1;32m 3587\u001b[0m \u001b[0mindices\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_get_block_manager_axis\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0maxis\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mverify\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/.conda/envs/env_oat/lib/python3.9/site-packages/pandas/core/internals/managers.py\u001b[0m in \u001b[0;36mtake\u001b[0;34m(self, indexer, axis, verify, convert)\u001b[0m\n\u001b[1;32m 1466\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mconvert\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1467\u001b[0;31m \u001b[0mindexer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmaybe_convert_indices\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mindexer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mn\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1468\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/.conda/envs/env_oat/lib/python3.9/site-packages/pandas/core/indexers.py\u001b[0m in \u001b[0;36mmaybe_convert_indices\u001b[0;34m(indices, n)\u001b[0m\n\u001b[1;32m 264\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mmask\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0many\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 265\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mIndexError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"indices are out-of-bounds\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 266\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mindices\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mIndexError\u001b[0m: indices are out-of-bounds", "\nThe above exception was the direct cause of the following exception:\n", "\u001b[0;31mIndexError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m7\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m20\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m50\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m100\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m83\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m25\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m163\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m127\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 6\u001b[0;31m \u001b[0mX_explain\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfeature_df\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0miloc\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 7\u001b[0m \u001b[0my_explain\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtest_label\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0miloc\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/.conda/envs/env_oat/lib/python3.9/site-packages/pandas/core/indexing.py\u001b[0m in \u001b[0;36m__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 893\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 894\u001b[0m \u001b[0mmaybe_callable\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcom\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply_if_callable\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mobj\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 895\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_getitem_axis\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmaybe_callable\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0maxis\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 896\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 897\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_is_scalar_access\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mTuple\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/.conda/envs/env_oat/lib/python3.9/site-packages/pandas/core/indexing.py\u001b[0m in \u001b[0;36m_getitem_axis\u001b[0;34m(self, key, axis)\u001b[0m\n\u001b[1;32m 1490\u001b[0m \u001b[0;31m# a list of integers\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1491\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0mis_list_like_indexer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1492\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_get_list_axis\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0maxis\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1493\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1494\u001b[0m \u001b[0;31m# a single integer\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/.conda/envs/env_oat/lib/python3.9/site-packages/pandas/core/indexing.py\u001b[0m in \u001b[0;36m_get_list_axis\u001b[0;34m(self, key, axis)\u001b[0m\n\u001b[1;32m 1475\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mIndexError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0merr\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1476\u001b[0m \u001b[0;31m# re-raise with different error message\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1477\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mIndexError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"positional indexers are out-of-bounds\"\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0merr\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1478\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1479\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_getitem_axis\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mint\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mIndexError\u001b[0m: positional indexers are out-of-bounds" ] } ], "source": [ "# sample LRR when used with crossover interpolation\n", "\n", "search_function='crossoverinterpolation'\n", "\n", "for i in [3,5,7,20,50,100,83,25,163,127]:\n", " X_explain = feature_df.iloc[[i]]\n", " y_explain = test_label.iloc[[i]]\n", "\n", " row_index = str(X_explain.index[0])\n", "\n", " start = time.time()\n", "\n", " start = time.time()\n", " pyExp_obj = pyExp.explain(X_explain,\n", " y_explain,\n", " search_function = search_function, \n", " top_k = 1000,\n", " max_rules=2000, \n", " max_iter =None, \n", " cv=5,\n", " explainer='LRR',\n", " debug = False)\n", " end = time.time()\n", " print('time spent to train LRR:',str(end-start),'secs')\n", " \n", "# local_model = pyExp_obj['local_model']\n", "# print('------------------Explanation from local model-------------------------')\n", "# print(local_model.explain())\n", " print('-'*100)\n", " \n", " " ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "dict_keys(['synthetic_data', 'synthetic_predictions', 'X_explain', 'y_explain', 'X_explain_fb', 'indep', 'dep', 'local_model'])\n" ] } ], "source": [ "print(pyExp_obj.keys())" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [], "source": [ "# display(pyExp_obj['synthetic_data'].columns)\n", "# print(feature_df.index)\n", "local_model = pyExp_obj['local_model']" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " rule coefficient\n", "0 (intercept) 1.21719\n", "1 la <= 44.00 -2.10262\n", "2 la <= 76.00 -0.561556\n" ] } ], "source": [ "print(local_model.explain( maxCoeffs=None))\n", "# print(local_model.z)" ] }, { "cell_type": "code", "execution_count": 22, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "MultiIndex([( 'la', '<=', 1.0),\n", " ( 'la', '<=', 2.0),\n", " ( 'la', '<=', 5.0),\n", " ( 'la', '<=', 9.0),\n", " ( 'la', '<=', 17.0),\n", " ( 'la', '<=', 27.0),\n", " ( 'la', '<=', 44.0),\n", " ( 'la', '<=', 76.0),\n", " ( 'la', '<=', 163.0),\n", " ( 'ld', '<=', 0.0),\n", " ...\n", " ('asawr', '<=', 0.433364602876798),\n", " ('rsawr', '<=', 0.18055330452007923),\n", " ('rsawr', '<=', 0.2564102564102564),\n", " ('rsawr', '<=', 0.3196254791765793),\n", " ('rsawr', '<=', 0.3754889178617992),\n", " ('rsawr', '<=', 0.4287529047714299),\n", " ('rsawr', '<=', 0.4816326530612245),\n", " ('rsawr', '<=', 0.5758975125536251),\n", " ('rsawr', '<=', 0.7078384798099763),\n", " ('rsawr', '<=', 0.8487118531623176)],\n", " names=['feature', 'operation', 'value'], length=165)\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
featurelald...asawrrsawr
operation<=<=...<=<=
value1.0000002.0000005.0000009.00000017.00000027.00000044.00000076.000000163.0000000.000000...0.4333650.1805530.2564100.3196250.3754890.4287530.4816330.5758980.7078380.848712
00000000000...1000000011
10000000001...1000000011
20000000000...1000000111
30000000000...1000000011
40000011110...1000000011
..................................................................
21161111111110...1000001111
21170000000010...1111111111
21180000001111...1000000011
21191111111110...1000000001
21200000000000...1111111111
\n", "

2121 rows × 165 columns

\n", "
" ], "text/plain": [ "feature la \\\n", "operation <= \n", "value 1.000000 2.000000 5.000000 9.000000 17.000000 27.000000 \n", "0 0 0 0 0 0 0 \n", "1 0 0 0 0 0 0 \n", "2 0 0 0 0 0 0 \n", "3 0 0 0 0 0 0 \n", "4 0 0 0 0 0 1 \n", "... ... ... ... ... ... ... \n", "2116 1 1 1 1 1 1 \n", "2117 0 0 0 0 0 0 \n", "2118 0 0 0 0 0 0 \n", "2119 1 1 1 1 1 1 \n", "2120 0 0 0 0 0 0 \n", "\n", "feature ld ... asawr \\\n", "operation <= ... <= \n", "value 44.000000 76.000000 163.000000 0.000000 ... 0.433365 \n", "0 0 0 0 0 ... 1 \n", "1 0 0 0 1 ... 1 \n", "2 0 0 0 0 ... 1 \n", "3 0 0 0 0 ... 1 \n", "4 1 1 1 0 ... 1 \n", "... ... ... ... ... ... ... \n", "2116 1 1 1 0 ... 1 \n", "2117 0 0 1 0 ... 1 \n", "2118 1 1 1 1 ... 1 \n", "2119 1 1 1 0 ... 1 \n", "2120 0 0 0 0 ... 1 \n", "\n", "feature rsawr \\\n", "operation <= \n", "value 0.180553 0.256410 0.319625 0.375489 0.428753 0.481633 \n", "0 0 0 0 0 0 0 \n", "1 0 0 0 0 0 0 \n", "2 0 0 0 0 0 0 \n", "3 0 0 0 0 0 0 \n", "4 0 0 0 0 0 0 \n", "... ... ... ... ... ... ... \n", "2116 0 0 0 0 0 1 \n", "2117 1 1 1 1 1 1 \n", "2118 0 0 0 0 0 0 \n", "2119 0 0 0 0 0 0 \n", "2120 1 1 1 1 1 1 \n", "\n", "feature \n", "operation \n", "value 0.575898 0.707838 0.848712 \n", "0 0 1 1 \n", "1 0 1 1 \n", "2 1 1 1 \n", "3 0 1 1 \n", "4 0 1 1 \n", "... ... ... ... \n", "2116 1 1 1 \n", "2117 1 1 1 \n", "2118 0 1 1 \n", "2119 0 0 1 \n", "2120 1 1 1 \n", "\n", "[2121 rows x 165 columns]" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "generated_instance = pyExp_obj['synthetic_data']\n", "print(generated_instance.columns)\n", "display(generated_instance)" ] }, { "cell_type": "code", "execution_count": 41, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "feature la\n", "operation <=\n", "value 44.0\n", "0 0\n", "1 0\n", "2 0\n", "3 0\n", "4 1\n", "... ...\n", "2116 1\n", "2117 0\n", "2118 1\n", "2119 1\n", "2120 0\n", "\n", "[2121 rows x 1 columns]\n" ] } ], "source": [ "print(generated_instance.loc[:, (generated_instance.columns.get_level_values(0)=='la') & \n", " (generated_instance.columns.get_level_values(1)=='<=') &\n", " (generated_instance.columns.get_level_values(2)==44.0) ])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [], "source": [ "# # search_function = 'crossoverinterpolation' # 'randompertubation' or 'crossoverinterpolation'\n", "# search_functions = ['randompertubation', 'crossoverinterpolation']\n", "\n", "# for i in range(0,len(feature_df)):\n", "# X_explain = feature_df.iloc[[i]]\n", "# y_explain = test_label.iloc[[i]]\n", " \n", "# row_index = str(X_explain.index[0])\n", " \n", "# try:\n", "# pyExp_obj = pyExp.explain(X_explain,\n", "# y_explain,\n", "# search_function = search_function, \n", "# top_k = 1000, \n", "# max_rules=2000, \n", "# max_iter =None, \n", "# cv=5,\n", "# debug = False)\n", "# pickle.dump(pyExp_obj, open(pyExp_dir+search_function+'_'+row_index+'.pkl','wb'))\n", " \n", "# synt_pred = pyExp_obj['synthetic_predictions']\n", "# print('{}: found {} defect from total {}'.format(row_index, str(np.sum(synt_pred)), \n", "# str(len(synt_pred))))\n", "# # print('finished', row_index)\n", "# except:\n", "# problem_index.append(row_index)\n", "# # print(row_index)\n", "# break" ] }, { "cell_type": "code", "execution_count": 24, "metadata": { "scrolled": true }, "outputs": [], "source": [ "# explain_index = 13\n", "# X_explain = feature_df.iloc[[explain_index]]\n", "# X_explain" ] }, { "cell_type": "code", "execution_count": 25, "metadata": { "code_folding": [] }, "outputs": [], "source": [ "# y_explain = test_label.iloc[[explain_index]]\n", "# y_explain" ] }, { "cell_type": "code", "execution_count": 26, "metadata": { "code_folding": [] }, "outputs": [], "source": [ "# search_function = 'crossoverinterpolation' # 'randompertubation' or 'crossoverinterpolation''\n", "# start = time.time()\n", "# create_pyExp_rule_obj = pyExp.explain(X_explain,\n", "# y_explain,\n", "# search_function = search_function, \n", "# top_k = 1000, \n", "# max_rules=2000, \n", "# max_iter =None, \n", "# cv=5,\n", "# debug = False)\n", "\n", "# end = time.time()" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [], "source": [ "# print('time spent {}'.format(str(end-start)))\n", "# pickle.dump(create_pyExp_rule_obj, open(pyExp_dir+search_function+'_'+str(explain_index)+'.pkl','wb'))" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [], "source": [ "# display(create_pyExp_rule_obj['synthetic_data'])" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [], "source": [ "# # print(create_pyExp_rule_obj['synthetic_predictions'])\n", "# # print(np.sum(create_pyExp_rule_obj['synthetic_predictions']))\n", "# display(create_pyExp_rule_obj.keys())\n", "# print(create_pyExp_rule_obj['synthetic_predictions'])" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "pyexp_obj = pickle.load(open(pyExp_dir+'openstack_rulefit_crossoverinterpolation_2a4e50caeaa271937a23ab7c052c7e9c47c1933f_200_rules.pkl','rb'))" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "dict_keys(['synthetic_data', 'synthetic_predictions', 'X_explain', 'y_explain', 'indep', 'dep', 'top_k_positive_rules', 'top_k_negative_rules', 'commit_id', 'local_model'])\n" ] } ], "source": [ "print(pyexp_obj.keys())" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "local_model = pyexp_obj['local_model']\n", "rule = local_model.get_rules()" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [], "source": [ "# print(rule)\n", "rule = rule[rule['type']=='rule']" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " rule type coef \\\n", "13 ndev > -0.9049999713897705 & rsawr <= 0.075000... rule -0.035452 \n", "14 la <= 73.2599983215332 rule -0.005483 \n", "15 la > 104.8949966430664 & rrexp <= 425.71501159... rule -0.034989 \n", "16 la > 104.8949966430664 & ns > 1.32500004768371... rule -0.028039 \n", "17 ns > 1.4300000071525574 & la > 87.27999877929688 rule -0.008070 \n", ".. ... ... ... \n", "206 la > 49.97999954223633 & asawr <= 0.0049999998... rule -0.044510 \n", "207 la > 67.8499984741211 & rrexp > 386.1300048828... rule -0.076416 \n", "208 la > 55.260000228881836 & rsawr > 0.0899999998... rule 0.103231 \n", "209 la <= 55.14999961853027 rule -0.006513 \n", "210 age <= -0.10999999940395355 & ns <= 1.48000001... rule -0.044921 \n", "\n", " support importance \n", "13 0.029333 0.005982 \n", "14 0.157333 0.001996 \n", "15 0.120000 0.011370 \n", "16 0.029333 0.004731 \n", "17 0.074667 0.002121 \n", ".. ... ... \n", "206 0.226667 0.018635 \n", "207 0.168000 0.028570 \n", "208 0.754667 0.044419 \n", "209 0.136000 0.002233 \n", "210 0.090667 0.012898 \n", "\n", "[198 rows x 5 columns]\n" ] } ], "source": [ "print(rule)" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ruletypecoefsupportimportance
27ns <= 1.350000023841858 & ent <= 0.90500000119...rule0.1501450.6346670.072299
125la > 23.53499984741211 & asawr > 0.00499999988...rule0.1167500.5013330.058375
24asawr <= 0.13499999791383743 & la > 51.75 & ns...rule0.1214000.6533330.057775
182ent <= 0.925000011920929 & asawr > 0.004999999...rule0.1124950.6080000.054920
126app <= 3.9550000429153442 & ns <= 1.4550000429...rule0.1102740.5946670.054140
..................
134la > 46.94999885559082 & ns > 1.5849999785423279rule-0.0039940.0666670.000996
204la <= 66.48500061035156rule0.0027350.1280000.000914
191la <= 56.47999954223633rule-0.0021930.1600000.000804
132ns > 1.6449999809265137 & la > 73.2599983215332rule0.0032840.0453330.000683
77la <= 67.8499984741211rule0.0001010.1653330.000038
\n", "

198 rows × 5 columns

\n", "
" ], "text/plain": [ " rule type coef \\\n", "27 ns <= 1.350000023841858 & ent <= 0.90500000119... rule 0.150145 \n", "125 la > 23.53499984741211 & asawr > 0.00499999988... rule 0.116750 \n", "24 asawr <= 0.13499999791383743 & la > 51.75 & ns... rule 0.121400 \n", "182 ent <= 0.925000011920929 & asawr > 0.004999999... rule 0.112495 \n", "126 app <= 3.9550000429153442 & ns <= 1.4550000429... rule 0.110274 \n", ".. ... ... ... \n", "134 la > 46.94999885559082 & ns > 1.5849999785423279 rule -0.003994 \n", "204 la <= 66.48500061035156 rule 0.002735 \n", "191 la <= 56.47999954223633 rule -0.002193 \n", "132 ns > 1.6449999809265137 & la > 73.2599983215332 rule 0.003284 \n", "77 la <= 67.8499984741211 rule 0.000101 \n", "\n", " support importance \n", "27 0.634667 0.072299 \n", "125 0.501333 0.058375 \n", "24 0.653333 0.057775 \n", "182 0.608000 0.054920 \n", "126 0.594667 0.054140 \n", ".. ... ... \n", "134 0.066667 0.000996 \n", "204 0.128000 0.000914 \n", "191 0.160000 0.000804 \n", "132 0.045333 0.000683 \n", "77 0.165333 0.000038 \n", "\n", "[198 rows x 5 columns]" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "display(rule.sort_values(by='importance',ascending=False))" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ruletypecoefsupportimportance
540rtime > 40.69499969482422 & app > 2.0299999713...rule0.9770630.2833330.440281
588asawr > 0.044999999925494194 & la <= 2245.4250...rule0.9486510.3866670.461980
690app <= 3.975000023841858 & age <= 0.8499999940...rule0.9419520.0066670.076653
882nd > 2.3850001096725464 & rrexp <= 2276.844970...rule0.8661760.7000000.396932
527nrev <= 15.140000343322754 & nd > 4.2650001049...rule0.8539260.0166670.109319
..................
1054rsawr <= 0.3050000071525574 & rtime > 7.904999...rule-0.8892390.0966670.262773
980app <= 2.0049999952316284 & rrexp > 849.420013...rule-0.9096460.1100000.284619
1114la > 204.33499908447266 & la > 33.825000762939...rule-1.0498400.2633330.462393
331ndev <= 67.1349983215332 & age > 2.54999995231...rule-1.3399710.3800000.650404
347nd > 4.265000104904175 & nrev > 15.14000034332...rule-1.5041750.2133330.616202
\n", "

1863 rows × 5 columns

\n", "
" ], "text/plain": [ " rule type coef \\\n", "540 rtime > 40.69499969482422 & app > 2.0299999713... rule 0.977063 \n", "588 asawr > 0.044999999925494194 & la <= 2245.4250... rule 0.948651 \n", "690 app <= 3.975000023841858 & age <= 0.8499999940... rule 0.941952 \n", "882 nd > 2.3850001096725464 & rrexp <= 2276.844970... rule 0.866176 \n", "527 nrev <= 15.140000343322754 & nd > 4.2650001049... rule 0.853926 \n", "... ... ... ... \n", "1054 rsawr <= 0.3050000071525574 & rtime > 7.904999... rule -0.889239 \n", "980 app <= 2.0049999952316284 & rrexp > 849.420013... rule -0.909646 \n", "1114 la > 204.33499908447266 & la > 33.825000762939... rule -1.049840 \n", "331 ndev <= 67.1349983215332 & age > 2.54999995231... rule -1.339971 \n", "347 nd > 4.265000104904175 & nrev > 15.14000034332... rule -1.504175 \n", "\n", " support importance \n", "540 0.283333 0.440281 \n", "588 0.386667 0.461980 \n", "690 0.006667 0.076653 \n", "882 0.700000 0.396932 \n", "527 0.016667 0.109319 \n", "... ... ... \n", "1054 0.096667 0.262773 \n", "980 0.110000 0.284619 \n", "1114 0.263333 0.462393 \n", "331 0.380000 0.650404 \n", "347 0.213333 0.616202 \n", "\n", "[1863 rows x 5 columns]" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "display(rule.sort_values(by='coef',ascending=False))" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ruletypecoefsupportimportance
27ns <= 1.350000023841858 & ent <= 0.90500000119...rule0.1501450.6346670.072299
24asawr <= 0.13499999791383743 & la > 51.75 & ns...rule0.1214000.6533330.057775
125la > 23.53499984741211 & asawr > 0.00499999988...rule0.1167500.5013330.058375
182ent <= 0.925000011920929 & asawr > 0.004999999...rule0.1124950.6080000.054920
126app <= 3.9550000429153442 & ns <= 1.4550000429...rule0.1102740.5946670.054140
..................
150asawr <= 0.004999999888241291rule-0.0826660.2533330.035953
45la > 46.64999961853027 & app > 3.9550000429153442rule-0.0826950.1493330.029474
22ns <= 1.4550000429153442 & app > 3.95500004291...rule-0.0881130.1573330.032083
177asawr > 0.13499999791383743 & la > 51.75 & ns ...rule-0.1001960.1413330.034905
136rtime > 96.99500274658203 & app > 2.9750000238...rule-0.1278150.0640000.031283
\n", "

198 rows × 5 columns

\n", "
" ], "text/plain": [ " rule type coef \\\n", "27 ns <= 1.350000023841858 & ent <= 0.90500000119... rule 0.150145 \n", "24 asawr <= 0.13499999791383743 & la > 51.75 & ns... rule 0.121400 \n", "125 la > 23.53499984741211 & asawr > 0.00499999988... rule 0.116750 \n", "182 ent <= 0.925000011920929 & asawr > 0.004999999... rule 0.112495 \n", "126 app <= 3.9550000429153442 & ns <= 1.4550000429... rule 0.110274 \n", ".. ... ... ... \n", "150 asawr <= 0.004999999888241291 rule -0.082666 \n", "45 la > 46.64999961853027 & app > 3.9550000429153442 rule -0.082695 \n", "22 ns <= 1.4550000429153442 & app > 3.95500004291... rule -0.088113 \n", "177 asawr > 0.13499999791383743 & la > 51.75 & ns ... rule -0.100196 \n", "136 rtime > 96.99500274658203 & app > 2.9750000238... rule -0.127815 \n", "\n", " support importance \n", "27 0.634667 0.072299 \n", "24 0.653333 0.057775 \n", "125 0.501333 0.058375 \n", "182 0.608000 0.054920 \n", "126 0.594667 0.054140 \n", ".. ... ... \n", "150 0.253333 0.035953 \n", "45 0.149333 0.029474 \n", "22 0.157333 0.032083 \n", "177 0.141333 0.034905 \n", "136 0.064000 0.031283 \n", "\n", "[198 rows x 5 columns]" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "display(rule.sort_values(by=['coef'],ascending=[False]))\n", "# sort by importance then get coef > 0" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['ns <= 1.350000023841858 & ent <= 0.9050000011920929 & asawr > 0.004999999888241291',\n", " 'asawr <= 0.13499999791383743 & la > 51.75 & ns <= 1.4300000071525574',\n", " 'la > 23.53499984741211 & asawr > 0.004999999888241291 & app <= 3.9000000953674316',\n", " 'ent <= 0.925000011920929 & asawr > 0.004999999888241291 & la > 26.139999389648438',\n", " 'app <= 3.9550000429153442 & ns <= 1.4550000429153442 & la > 82.54999923706055',\n", " 'ns <= 1.4749999642372131 & ndev > 3.1200000047683716 & la > 44.34499931335449',\n", " 'la > 87.41499710083008 & ent <= 0.9749999940395355 & la <= 1110.3800048828125',\n", " 'app <= 3.9550000429153442 & la > 46.64999961853027 & ns <= 1.4399999976158142',\n", " 'la > 55.260000228881836 & rsawr > 0.08999999985098839 & rtime <= 148.375',\n", " 'rsawr > 0.08500000089406967 & la > 88.8650016784668 & rrexp > -407.61500549316406',\n", " 'la > 85.48500061035156 & ent <= 0.9600000083446503 & la <= 975.3250122070312',\n", " 'la > 67.8499984741211 & rtime <= 72.5099983215332 & rrexp > 386.1300048828125',\n", " 'asawr > 0.004999999888241291 & la <= 917.9400024414062 & la > 55.760000228881836',\n", " 'ndev > -0.9049999713897705 & rsawr > 0.07500000111758709 & la > 56.35499954223633',\n", " 'la > 45.53499984741211 & app <= 3.975000023841858 & ns <= 1.4850000143051147',\n", " 'la > 13.130000591278076 & age > -2.5549999475479126 & ns <= 1.425000011920929',\n", " 'asawr > -0.014999999664723873 & la <= 1089.7150268554688 & la > 43.68000030517578',\n", " 'la > 104.8949966430664 & rrexp > 425.7150115966797 & ns <= 1.3250000476837158',\n", " 'la <= 1146.614990234375 & la > 46.77499961853027 & ent <= 0.9549999833106995',\n", " 'ns <= 1.3650000095367432 & asawr > 0.004999999888241291 & la > 15.87999963760376',\n", " 'la > 37.34499931335449 & nrev <= 46.760000228881836 & ns <= 1.4649999737739563',\n", " 'la > 12.25499963760376 & ns <= 1.4550000429153442 & ndev > -10.069999933242798',\n", " 'la > 84.95000076293945 & ent <= 0.9350000023841858 & asawr > -0.024999999441206455',\n", " 'la > 46.78999900817871 & ns <= 1.5 & ndev > -0.9549999833106995',\n", " 'ns <= 1.4449999928474426 & ent <= 0.9350000023841858 & la > -11.50499963760376',\n", " 'la > 44.26499938964844 & ns <= 1.4950000047683716 & rrexp > 423.87998962402344',\n", " 'la > 46.0049991607666 & rrexp > 426.10499572753906 & ns <= 1.4699999690055847',\n", " 'ent <= 0.925000011920929 & la > 83.50500106811523 & ns <= 1.4900000095367432',\n", " 'la > 87.27999877929688 & app <= 4.924999952316284 & ns <= 1.4300000071525574',\n", " 'ns <= 1.4800000190734863 & age > -0.10999999940395355 & la > 87.01499938964844',\n", " 'rtime <= 96.99500274658203 & la > 48.40999984741211',\n", " 'ent <= 0.9549999833106995 & ns <= 1.4800000190734863 & la > 83.02000045776367',\n", " 'ns <= 1.465000033378601 & age > -0.014999999664723873 & la > 55.06999969482422',\n", " 'asawr > 0.004999999888241291 & la > 49.97999954223633 & ns <= 1.465000033378601',\n", " 'la > 83.20499801635742 & ns <= 1.5049999952316284 & ent <= 0.9600000083446503',\n", " 'ns <= 1.4449999928474426 & asawr > 0.004999999888241291 & la > 51.55999946594238',\n", " 'ndev > -1.8149999976158142 & ns <= 1.5049999952316284 & la > 61.70000076293945',\n", " 'la > 46.709999084472656 & ndev > -7.425000190734863 & rtime <= 131.2300033569336',\n", " 'app <= 2.975000023841858 & rtime > 96.99500274658203 & la > 48.40999984741211',\n", " 'la > 55.13999938964844 & rtime <= 172.20000457763672 & ns <= 1.48499995470047',\n", " 'ns <= 1.39000004529953 & ent <= 0.9600000083446503 & la > 85.54500198364258',\n", " 'ent <= 0.925000011920929 & la > 51.55999946594238 & ns <= 1.4600000381469727',\n", " 'la > 46.54999923706055 & ent <= 0.8550000190734863 & ns <= 1.4399999976158142',\n", " 'la > 46.94999885559082 & app <= 4.974999904632568 & ns <= 1.5849999785423279',\n", " 'ns <= 1.6449999809265137 & asawr > 0.004999999888241291 & la > 73.2599983215332',\n", " 'rrexp > 430.3350067138672 & ns <= 1.5449999570846558 & la > 56.47999954223633',\n", " 'la > 70.68000030517578 & ns <= 1.3949999809265137 & ent <= 0.8149999976158142',\n", " 'ent <= 0.9350000023841858 & la > 46.64999961853027 & ns <= 1.3450000286102295',\n", " 'la > 55.14999961853027 & ns <= 1.465000033378601 & nrev > 6.015000104904175',\n", " 'age > -7.080000162124634 & la > 98.27499771118164 & ns <= 1.175000011920929',\n", " 'asawr > -0.014999999664723873 & ns <= 1.5 & la > 66.48500061035156',\n", " 'ns > 1.175000011920929 & la > 98.27499771118164',\n", " 'la <= 61.70000076293945',\n", " 'ns > 1.6449999809265137 & la > 73.2599983215332',\n", " 'la <= 66.48500061035156',\n", " 'la <= 67.8499984741211']" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "display(list(rule[rule['coef']>0].sort_values(by=['coef','importance'],ascending=[False,False])['rule']))" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "142" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(rule[rule['coef']<0].sort_values(by=['importance'],ascending=False))" ] } ], "metadata": { "kernelspec": { "display_name": "Python_Oat", "language": "python", "name": "env_oat" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.2" }, "varInspector": { "cols": { "lenName": 16, "lenType": 16, "lenVar": 40 }, "kernels_config": { "python": { "delete_cmd_postfix": "", "delete_cmd_prefix": "del ", "library": "var_list.py", "varRefreshCmd": "print(var_dic_list())" }, "r": { "delete_cmd_postfix": ") ", "delete_cmd_prefix": "rm(", "library": "var_list.r", "varRefreshCmd": "cat(var_dic_list()) " } }, "types_to_exclude": [ "module", "function", "builtin_function_or_method", "instance", "_Feature" ], "window_display": false } }, "nbformat": 4, "nbformat_minor": 4 }