{ "cells": [ { "cell_type": "markdown", "id": "animal-spencer", "metadata": { "execution": { "iopub.execute_input": "2021-03-25T06:28:01.095660Z", "iopub.status.busy": "2021-03-25T06:28:01.094646Z", "iopub.status.idle": "2021-03-25T06:28:01.098635Z", "shell.execute_reply": "2021-03-25T06:28:01.098635Z" } }, "source": [ "## Data / Model Preparation" ] }, { "cell_type": "code", "execution_count": 1, "id": "exposed-provincial", "metadata": { "execution": { "iopub.execute_input": "2021-03-26T04:55:04.226010Z", "iopub.status.busy": "2021-03-26T04:55:04.225034Z", "iopub.status.idle": "2021-03-26T04:55:05.795830Z", "shell.execute_reply": "2021-03-26T04:55:05.794853Z" } }, "outputs": [], "source": [ "from sklearn.ensemble import RandomForestClassifier\n", "import pandas as pd\n", "from pyexplainer.pyexplainer_pyexplainer import PyExplainer\n", "import pickle\n", "import os\n", "\n", "\n", "cwd = os.getcwd()\n", "parent_dir = os.path.dirname(cwd)\n", "path_train = parent_dir + \"/tests/pyexplainer_test_data/activemq-5.0.0.zip\"\n", "training_data = pd.read_csv(path_train, index_col = 'File')\n", "\n", "dep = training_data.columns[-4]\n", "selected_features = [\"ADEV\", \"AvgCyclomaticModified\", \"AvgEssential\", \"AvgLineBlank\", \"AvgLineComment\",\n", " \"CountClassBase\", \"CountClassCoupled\", \"CountClassDerived\", \"CountDeclClass\",\n", " \"CountDeclClassMethod\", \"CountDeclClassVariable\", \"CountDeclInstanceVariable\",\n", " \"CountDeclMethodDefault\", \"CountDeclMethodPrivate\", \"CountDeclMethodProtected\",\n", " \"CountDeclMethodPublic\", \"CountInput_Mean\", \"CountInput_Min\", \"CountOutput_Min\", \"MAJOR_LINE\",\n", " \"MaxInheritanceTree\", \"MaxNesting_Min\", \"MINOR_COMMIT\", \"OWN_COMMIT\", \"OWN_LINE\",\n", " \"PercentLackOfCohesion\", \"RatioCommentToCode\"]\n", "all_cols = training_data.columns\n", "for col in all_cols:\n", " if col not in selected_features:\n", " all_cols = all_cols.drop(col)\n", "indep = all_cols\n", "\n", "X_train = training_data.loc[:, indep]\n", "y_train = training_data.loc[:, dep]\n", "\n", "blackbox_model = RandomForestClassifier(max_depth=3, random_state=0)\n", "blackbox_model.fit(X_train, y_train)\n", "\n", "class_label = ['Clean', 'Defect']\n", "\n", "path_test = parent_dir + \"/tests/pyexplainer_test_data/activemq-5.1.0.zip\"\n", "testing_data = pd.read_csv(path_test, index_col = 'File')\n", "X_test = testing_data.loc[:, indep]\n", "y_test = testing_data.loc[:, dep]" ] }, { "cell_type": "markdown", "id": "invisible-storage", "metadata": {}, "source": [ "## python unittest" ] }, { "cell_type": "code", "execution_count": 8, "id": "dressed-increase", "metadata": { "execution": { "iopub.execute_input": "2021-03-26T04:55:05.807542Z", "iopub.status.busy": "2021-03-26T04:55:05.803641Z", "iopub.status.idle": "2021-03-26T04:55:13.673592Z", "shell.execute_reply": "2021-03-26T04:55:13.673592Z" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\micha\\anaconda3\\lib\\site-packages\\sklearn\\linear_model\\_logistic.py:763: ConvergenceWarning: lbfgs failed to converge (status=1):\n", "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n", "\n", "Increase the number of iterations (max_iter) or scale the data as shown in:\n", " https://scikit-learn.org/stable/modules/preprocessing.html\n", "Please also refer to the documentation for alternative solver options:\n", " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n", " n_iter_i = _check_optimize_result(\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "1fa7caf45a3c4bd8b37080066cc13961", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(Label(value='Risk Score: '), FloatProgress(value=0.0, bar_style='info', layout=Layout(width='40…" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\micha\\anaconda3\\lib\\site-packages\\ipywidgets\\widgets\\widget.py:412: DeprecationWarning: Passing unrecognized arguments to super(SliderStyle).__init__(widget_width='60%').\n", "object.__init__() takes exactly one argument (the instance to initialize)\n", "This is deprecated in traitlets 4.2.This error will be raised in a future release of traitlets.\n", " super(Widget, self).__init__(**kwargs)\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "d33c1b39f6474736b461e03627dd9a97", "version_major": 2, "version_minor": 0 }, "text/plain": [ "FloatSlider(value=4.0, continuous_update=False, description='#1 Decrease the values of AvgCyclomaticModified t…" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "660a8a8bc4e94444af12f8dc3c9a5977", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Output(layout=Layout(border='3px solid black'))" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stderr", "output_type": "stream", "text": [ "." ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "b026f1f4a11649e19dd521733678d361", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(Label(value='Risk Score: '), FloatProgress(value=0.0, bar_style='info', layout=Layout(width='40…" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\micha\\anaconda3\\lib\\site-packages\\ipywidgets\\widgets\\widget.py:412: DeprecationWarning: Passing unrecognized arguments to super(SliderStyle).__init__(widget_width='60%').\n", "object.__init__() takes exactly one argument (the instance to initialize)\n", "This is deprecated in traitlets 4.2.This error will be raised in a future release of traitlets.\n", " super(Widget, self).__init__(**kwargs)\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "4f76728a616749ad9d78b0279da39316", "version_major": 2, "version_minor": 0 }, "text/plain": [ "FloatSlider(value=1.0, continuous_update=False, description='#1 Decrease the values of CountClassCoupled to le…" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "8f75a2767c524145af404baafbbd129b", "version_major": 2, "version_minor": 0 }, "text/plain": [ "FloatSlider(value=4.0, continuous_update=False, description='#2 Decrease the values of CountDeclMethodProtecte…" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "fb4ff6d1d33e4e648502b03547daa98a", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Output(layout=Layout(border='3px solid black'))" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stderr", "output_type": "stream", "text": [ ".\n", "----------------------------------------------------------------------\n", "Ran 2 tests in 5.005s\n", "\n", "OK\n" ] } ], "source": [ "from sklearn.ensemble import RandomForestClassifier\n", "import pandas as pd\n", "from pyexplainer.pyexplainer_pyexplainer import PyExplainer\n", "import pickle\n", "import os\n", "import unittest\n", "\n", "\n", "class TestVisualisation(unittest.TestCase):\n", " \"\"\"unittest the visualisation stuff in pyexplainer\"\"\"\n", " \n", " def test_visualise_pyobject(self):\n", " def load_object(filename):\n", " with open(filename, 'rb') as file:\n", " object_o = pickle.load(file)\n", " return (object_o)\n", " # load rule obj\n", " if os.path.isfile('../tests/rule_objects/rule_object.pyobject'):\n", " loaded_rule_obj = load_object('../tests/rule_objects/rule_object.pyobject')\n", " py_explainer = PyExplainer(X_train,\n", " y_train,\n", " indep,\n", " dep,\n", " blackbox_model,\n", " class_label=class_label)\n", " py_explainer.visualise(loaded_rule_obj)\n", " \n", " def test_visualise_manually_created(self):\n", " for explain_index in range(14, 15):\n", " X_explain = X_test.iloc[[explain_index]]\n", " y_explain = y_test.iloc[[explain_index]]\n", " py_explainer = PyExplainer(X_train,\n", " y_train,\n", " indep,\n", " dep,\n", " blackbox_model,\n", " class_label=class_label)\n", " rule_object = py_explainer.explain(X_explain, y_explain)\n", " py_explainer.visualise(rule_object)\n", "\n", "\n", "if __name__ == '__main__':\n", " unittest.main(argv=['first-arg-is-ignored'], exit=False)" ] }, { "cell_type": "code", "execution_count": null, "id": "prostate-wholesale", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.8" }, "widgets": { "application/vnd.jupyter.widget-state+json": { "state": { "010eeece21e947e2bdcc8d6160bb530f": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "07821dd5ff694b5d89e293929b0387b0": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": "3px solid black", "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "0952734cd7c4498abdf15901eb7368cb": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "LabelModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "LabelModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "LabelView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_484825b1c6fb477ea3105f19391aa16e", "placeholder": "", "style": "IPY_MODEL_191dd160f0454ff0a821dce7ca60da01", "value": "7.0" } }, "0ef8ecf45aeb4fd88392deff0243a15f": { "model_module": "@jupyter-widgets/output", "model_module_version": "1.0.0", "model_name": "OutputModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/output", "_model_module_version": "1.0.0", "_model_name": "OutputModel", "_view_count": null, "_view_module": "@jupyter-widgets/output", "_view_module_version": "1.0.0", "_view_name": "OutputView", "layout": "IPY_MODEL_07821dd5ff694b5d89e293929b0387b0", "msg_id": "", "outputs": [ { "data": { "text/html": "\n \n \n \n
\n \n \n \n \n \n \n \n \n \n