{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "name": "predictive_maintenance_grid_search_atds_v4_nov2021.ipynb", "provenance": [], "collapsed_sections": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" } }, "cells": [ { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 1000 }, "id": "xipxpNzPAl8f", "outputId": "774200e0-fe4c-4a48-bab6-f9719f204236" }, "source": [ "\n", "!pip install -I numpy==1.19.2\n", "!pip install snowflake-connector-python\n", "import warnings\n", "warnings.filterwarnings(\"ignore\")\n", "!pip install -I pyarrow==5.0.0\n", "!pip install git+https://github.com/pattersonconsulting/ml_tools.git" ], "execution_count": 1, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Collecting numpy==1.19.2\n", " Downloading numpy-1.19.2-cp37-cp37m-manylinux2010_x86_64.whl (14.5 MB)\n", "\u001b[K |████████████████████████████████| 14.5 MB 10.8 MB/s \n", "\u001b[?25hInstalling collected packages: numpy\n", "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", "datascience 0.10.6 requires folium==0.2.1, but you have folium 0.8.3 which is incompatible.\n", "albumentations 0.1.12 requires imgaug<0.2.7,>=0.2.5, but you have imgaug 0.2.9 which is incompatible.\u001b[0m\n", "Successfully installed numpy-1.19.5\n" ] }, { "output_type": "display_data", "data": { "application/vnd.colab-display-data+json": { "pip_warning": { "packages": [ "numpy" ] } } }, "metadata": {} }, { "output_type": "stream", "name": "stdout", "text": [ "Collecting snowflake-connector-python\n", " Downloading snowflake_connector_python-2.7.1-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (14.9 MB)\n", "\u001b[K |████████████████████████████████| 14.9 MB 12.4 MB/s \n", "\u001b[?25hCollecting asn1crypto<2.0.0,>0.24.0\n", " Downloading asn1crypto-1.4.0-py2.py3-none-any.whl (104 kB)\n", "\u001b[K |████████████████████████████████| 104 kB 73.7 MB/s \n", "\u001b[?25hCollecting oscrypto<2.0.0\n", " Downloading oscrypto-1.2.1-py2.py3-none-any.whl (192 kB)\n", "\u001b[K |████████████████████████████████| 192 kB 85.6 MB/s \n", "\u001b[?25hRequirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.7/dist-packages (from snowflake-connector-python) (2.10)\n", "Collecting pyOpenSSL<21.0.0,>=16.2.0\n", " Downloading pyOpenSSL-20.0.1-py2.py3-none-any.whl (54 kB)\n", "\u001b[K |████████████████████████████████| 54 kB 2.2 MB/s \n", "\u001b[?25hRequirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from snowflake-connector-python) (2021.10.8)\n", "Requirement already satisfied: setuptools>34.0.0 in /usr/local/lib/python3.7/dist-packages (from snowflake-connector-python) (57.4.0)\n", "Collecting pycryptodomex!=3.5.0,<4.0.0,>=3.2\n", " Downloading pycryptodomex-3.11.0-cp35-abi3-manylinux2010_x86_64.whl (1.9 MB)\n", "\u001b[K |████████████████████████████████| 1.9 MB 66.5 MB/s \n", "\u001b[?25hRequirement already satisfied: charset-normalizer~=2.0.0 in /usr/local/lib/python3.7/dist-packages (from snowflake-connector-python) (2.0.7)\n", "Collecting cryptography<4.0.0,>=3.1.0\n", " Downloading cryptography-3.4.8-cp36-abi3-manylinux_2_24_x86_64.whl (3.0 MB)\n", "\u001b[K |████████████████████████████████| 3.0 MB 16.3 MB/s \n", "\u001b[?25hRequirement already satisfied: requests<3.0.0 in /usr/local/lib/python3.7/dist-packages (from snowflake-connector-python) (2.23.0)\n", "Collecting pyjwt<3.0.0\n", " Downloading PyJWT-2.3.0-py3-none-any.whl (16 kB)\n", "Requirement already satisfied: cffi<2.0.0,>=1.9 in /usr/local/lib/python3.7/dist-packages (from snowflake-connector-python) (1.15.0)\n", "Requirement already satisfied: pytz in /usr/local/lib/python3.7/dist-packages (from snowflake-connector-python) (2018.9)\n", "Requirement already satisfied: pycparser in /usr/local/lib/python3.7/dist-packages (from cffi<2.0.0,>=1.9->snowflake-connector-python) (2.21)\n", "Requirement already satisfied: six>=1.5.2 in /usr/local/lib/python3.7/dist-packages (from pyOpenSSL<21.0.0,>=16.2.0->snowflake-connector-python) (1.15.0)\n", "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0->snowflake-connector-python) (3.0.4)\n", "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0->snowflake-connector-python) (1.24.3)\n", "Installing collected packages: cryptography, asn1crypto, pyOpenSSL, pyjwt, pycryptodomex, oscrypto, snowflake-connector-python\n", "Successfully installed asn1crypto-1.4.0 cryptography-3.4.8 oscrypto-1.2.1 pyOpenSSL-20.0.1 pycryptodomex-3.11.0 pyjwt-2.3.0 snowflake-connector-python-2.7.1\n", "Collecting pyarrow==5.0.0\n", " Downloading pyarrow-5.0.0-cp37-cp37m-manylinux2014_x86_64.whl (23.6 MB)\n", "\u001b[K |████████████████████████████████| 23.6 MB 1.2 MB/s \n", "\u001b[?25hCollecting numpy>=1.16.6\n", " Downloading numpy-1.21.4-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (15.7 MB)\n", "\u001b[K |████████████████████████████████| 15.7 MB 48.4 MB/s \n", "\u001b[?25hInstalling collected packages: numpy, pyarrow\n", "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", "yellowbrick 1.3.post1 requires numpy<1.20,>=1.16.0, but you have numpy 1.21.4 which is incompatible.\n", "datascience 0.10.6 requires folium==0.2.1, but you have folium 0.8.3 which is incompatible.\n", "albumentations 0.1.12 requires imgaug<0.2.7,>=0.2.5, but you have imgaug 0.2.9 which is incompatible.\u001b[0m\n", "Successfully installed numpy-1.21.4 pyarrow-5.0.0\n" ] }, { "output_type": "display_data", "data": { "application/vnd.colab-display-data+json": { "pip_warning": { "packages": [ "numpy" ] } } }, "metadata": {} }, { "output_type": "stream", "name": "stdout", "text": [ "Collecting git+https://github.com/pattersonconsulting/ml_tools.git\n", " Cloning https://github.com/pattersonconsulting/ml_tools.git to /tmp/pip-req-build-pxdcfi1a\n", " Running command git clone -q https://github.com/pattersonconsulting/ml_tools.git /tmp/pip-req-build-pxdcfi1a\n", "Requirement already satisfied: pandas in /usr/local/lib/python3.7/dist-packages (from ml-valuation==0.0.1) (1.1.5)\n", "Requirement already satisfied: sklearn in /usr/local/lib/python3.7/dist-packages (from ml-valuation==0.0.1) (0.0)\n", "Requirement already satisfied: matplotlib in /usr/local/lib/python3.7/dist-packages (from ml-valuation==0.0.1) (3.2.2)\n", "Requirement already satisfied: numpy in /usr/local/lib/python3.7/dist-packages (from ml-valuation==0.0.1) (1.21.4)\n", "Requirement already satisfied: python-dateutil>=2.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib->ml-valuation==0.0.1) (2.8.2)\n", "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.7/dist-packages (from matplotlib->ml-valuation==0.0.1) (0.11.0)\n", "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib->ml-valuation==0.0.1) (1.3.2)\n", "Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib->ml-valuation==0.0.1) (3.0.6)\n", "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.7/dist-packages (from python-dateutil>=2.1->matplotlib->ml-valuation==0.0.1) (1.15.0)\n", "Requirement already satisfied: pytz>=2017.2 in /usr/local/lib/python3.7/dist-packages (from pandas->ml-valuation==0.0.1) (2018.9)\n", "Requirement already satisfied: scikit-learn in /usr/local/lib/python3.7/dist-packages (from sklearn->ml-valuation==0.0.1) (1.0.1)\n", "Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.7/dist-packages (from scikit-learn->sklearn->ml-valuation==0.0.1) (3.0.0)\n", "Requirement already satisfied: joblib>=0.11 in /usr/local/lib/python3.7/dist-packages (from scikit-learn->sklearn->ml-valuation==0.0.1) (1.1.0)\n", "Requirement already satisfied: scipy>=1.1.0 in /usr/local/lib/python3.7/dist-packages (from scikit-learn->sklearn->ml-valuation==0.0.1) (1.4.1)\n", "Building wheels for collected packages: ml-valuation\n", " Building wheel for ml-valuation (setup.py) ... \u001b[?25l\u001b[?25hdone\n", " Created wheel for ml-valuation: filename=ml_valuation-0.0.1-py3-none-any.whl size=8800 sha256=206661f3f75698901c0c074a57ff1b75fa5768e41e87d9ec538fe360c980cb6a\n", " Stored in directory: /tmp/pip-ephem-wheel-cache-y8az2rma/wheels/ce/52/e8/5f5de6a3a97eca5d2f9e453ecafb0f88f99054a1f2601f637e\n", "Successfully built ml-valuation\n", "Installing collected packages: ml-valuation\n", "Successfully installed ml-valuation-0.0.1\n" ] } ] }, { "cell_type": "code", "metadata": { "id": "L0MjBet4Ravf" }, "source": [ "# import basic data science libraries\n", "\n", "import pandas as pd\n", "import numpy as np\n", "from matplotlib import pyplot as plt\n", "import seaborn as sns\n", "from matplotlib.patches import Patch\n", "from matplotlib.lines import Line2D\n", "%matplotlib inline" ], "execution_count": 2, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "l2Nkk0_xAPYU" }, "source": [ "import sklearn_pandas\n", "from sklearn_pandas import DataFrameMapper, cross_val_score" ], "execution_count": 3, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "U3i_PN8URs-h", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "51bc12a7-e88e-426e-9288-098bd776ea73" }, "source": [ "\n", "# import required machine learning libraries\n", "\n", "from sklearn.linear_model import LogisticRegression\n", "from sklearn.discriminant_analysis import LinearDiscriminantAnalysis\n", "from sklearn.svm import SVC\n", "from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier\n", "from sklearn.model_selection import KFold, cross_val_score, GridSearchCV, train_test_split\n", "from sklearn.metrics import accuracy_score, classification_report, confusion_matrix\n", "from sklearn.metrics import average_precision_score\n", "from sklearn.metrics import precision_recall_curve\n", "from sklearn.metrics import plot_precision_recall_curve\n", "from sklearn.metrics import roc_curve, auc, confusion_matrix\n", "\n", "\n", "import sklearn.preprocessing, sklearn.decomposition, sklearn.linear_model, sklearn.pipeline, sklearn.metrics\n", "from sklearn.preprocessing import Normalizer, StandardScaler\n", "\n", "!pip install scikit-plot\n", "import scikitplot as skplt\n", "\n", "# classifiers\n", "from sklearn.ensemble import GradientBoostingClassifier\n", "from xgboost import XGBClassifier\n", "import lightgbm as lgb\n", "\n", "import warnings\n", "from sklearn.exceptions import DataConversionWarning\n", "warnings.filterwarnings(action='ignore', category=DataConversionWarning)\n", "\n", "import ml_valuation\n", "\n", "from ml_valuation import model_valuation\n", "from ml_valuation import model_visualization" ], "execution_count": 4, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Collecting scikit-plot\n", " Downloading scikit_plot-0.3.7-py3-none-any.whl (33 kB)\n", "Requirement already satisfied: joblib>=0.10 in /usr/local/lib/python3.7/dist-packages (from scikit-plot) (1.1.0)\n", "Requirement already satisfied: scipy>=0.9 in /usr/local/lib/python3.7/dist-packages (from scikit-plot) (1.4.1)\n", "Requirement already satisfied: matplotlib>=1.4.0 in /usr/local/lib/python3.7/dist-packages (from scikit-plot) (3.2.2)\n", "Requirement already satisfied: scikit-learn>=0.18 in /usr/local/lib/python3.7/dist-packages (from scikit-plot) (1.0.1)\n", "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib>=1.4.0->scikit-plot) (1.3.2)\n", "Requirement already satisfied: numpy>=1.11 in /usr/local/lib/python3.7/dist-packages (from matplotlib>=1.4.0->scikit-plot) (1.21.4)\n", "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.7/dist-packages (from matplotlib>=1.4.0->scikit-plot) (0.11.0)\n", "Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib>=1.4.0->scikit-plot) (3.0.6)\n", "Requirement already satisfied: python-dateutil>=2.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib>=1.4.0->scikit-plot) (2.8.2)\n", "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.7/dist-packages (from python-dateutil>=2.1->matplotlib>=1.4.0->scikit-plot) (1.15.0)\n", "Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.7/dist-packages (from scikit-learn>=0.18->scikit-plot) (3.0.0)\n", "Installing collected packages: scikit-plot\n", "Successfully installed scikit-plot-0.3.7\n" ] } ] }, { "cell_type": "code", "metadata": { "id": "SxcPzQwFR9Pk" }, "source": [ "#df = pd.read_csv(\"https://archive.ics.uci.edu/ml/machine-learning-databases/00601/ai4i2020.csv\")\n", "#print( df.head() )\n", "\n", "#print( df.info() )\n", "import snowflake.connector\n", "import getpass\n", "\n", "# using a simpler way to use your login info without embedding it in the notebook \n", "# other enterprise connection patterns (e.g., SSO) are in the Snowflake docs: https://docs.snowflake.com/en/user-guide/python-connector-example.html\n", "snowflake_username = getpass.getpass(\"Enter Snowflake Username\")\n", "snowflake_pwd = getpass.getpass(\"Enter Snowflake Password\")\n", "snowflake_acct = 'nna57244.us-east-1'\n", "\n", "print(snowflake_username)\n", "print(snowflake_acct)\n" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "tOUWxQAy9iqH", "outputId": "bcae8571-4ca2-4b5c-b550-b078e9cf755a" }, "source": [ "\n", "ctx = snowflake.connector.connect(\n", " user=snowflake_username,\n", " password=snowflake_pwd,\n", " account=snowflake_acct\n", " )\n", "cs = ctx.cursor()\n", "try:\n", "\tcs.execute(\"SELECT current_version()\")\n", "\tone_row = cs.fetchone()\n", "\tprint(one_row[0])\n", "\n", "\n", "\n", "\t#cs.cursor().execute(\"USE WAREHOUSE tiny_warehouse_mg\")\n", "\tcs.execute(\"USE DATABASE PREDICTIVE_MAINTENANCE\") \n", "\n", "\n", "\t#cs.execute(\"SELECT count(TYPE) from RAW_DEVICE_DATA where MACHINE_FAILURE = 1;\")\n", "\t#one_row = cs.fetchone()\n", "\t#print(\"Records with Failures: \" + str(one_row[0]))\n", "\n", "\tquery_output = cs.execute( \"select TYPE, AIR_TEMPERATURE, PROCESS_TEMPERATURE, ROTATIONAL_SPEED, TORQUE, TOOL_WEAR, MACHINE_FAILURE from SUMMARY_SENSOR_DATA;\" )\n", "\n", "\tdf = query_output.fetch_pandas_all() #.to_csv(\"/path/to/write/table.csv\")\t\n", "\t#df.to_csv(\"./data/full_snowflake_dataset.csv\", index=False)\t\n", "\n", "\tprint( df )\n", "\n", "\n", "finally:\n", " cs.close()\n", "\n", "\n", "\n", "ctx.close()" ], "execution_count": 6, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "5.41.1\n", " TYPE AIR_TEMPERATURE ... TOOL_WEAR MACHINE_FAILURE\n", "0 M 298.1 ... 0.0 0\n", "1 L 298.2 ... 3.0 0\n", "2 L 298.1 ... 5.0 0\n", "3 L 298.2 ... 7.0 0\n", "4 L 298.2 ... 9.0 0\n", "... ... ... ... ... ...\n", "5245 M 298.8 ... 14.0 0\n", "5246 H 298.9 ... 17.0 0\n", "5247 M 299.0 ... 22.0 0\n", "5248 H 299.0 ... 25.0 0\n", "5249 M 299.0 ... 30.0 0\n", "\n", "[10000 rows x 7 columns]\n" ] } ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 204 }, "id": "gKMs7_wySFaD", "outputId": "cb7b48b2-69d4-4bb7-8395-b52527381581" }, "source": [ "# map categorical variable 'diagnosis' into numeric\n", "\n", "df[\"TYPE\"] = df[\"TYPE\"].map({'H': 2, 'M': 1, 'L': 0})\n", "\n", "df.head()\n", "\n" ], "execution_count": 7, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "
| \n", " | TYPE | \n", "AIR_TEMPERATURE | \n", "PROCESS_TEMPERATURE | \n", "ROTATIONAL_SPEED | \n", "TORQUE | \n", "TOOL_WEAR | \n", "MACHINE_FAILURE | \n", "
|---|---|---|---|---|---|---|---|
| 0 | \n", "1 | \n", "298.1 | \n", "308.6 | \n", "1551.0 | \n", "42.8 | \n", "0.0 | \n", "0 | \n", "
| 1 | \n", "0 | \n", "298.2 | \n", "308.7 | \n", "1408.0 | \n", "46.3 | \n", "3.0 | \n", "0 | \n", "
| 2 | \n", "0 | \n", "298.1 | \n", "308.5 | \n", "1498.0 | \n", "49.4 | \n", "5.0 | \n", "0 | \n", "
| 3 | \n", "0 | \n", "298.2 | \n", "308.6 | \n", "1433.0 | \n", "39.5 | \n", "7.0 | \n", "0 | \n", "
| 4 | \n", "0 | \n", "298.2 | \n", "308.7 | \n", "1408.0 | \n", "40.0 | \n", "9.0 | \n", "0 | \n", "