{ "metadata": { "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": 3 }, "orig_nbformat": 2, "kernelspec": { "name": "python_defaultSpec_1597439255963", "display_name": "Python 3.8.4 64-bit" } }, "nbformat": 4, "nbformat_minor": 2, "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "This is a simple notebook demo to illustrate typically how OptimalFlow's autoCV modules work with classification problem" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Install external packages in binder environment.\n", "!pip install xgboost" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "tags": [] }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": "Now in Progress - Model Selection w/ Cross-validation: Estimate about 0.0 minutes left [####################] 100.0%\n\n *optimalflow* autoCV Module ===> rgcv_CrossValidation with 5 folds:\nBest Parameters: {'fit_intercept': 'False'}\n\nBest CV Score: 0.803456180567801\n\nlgr -- Accuracy: 0.775 / Precision: 0.712 / Recall: 0.646 / Latency: 1.0s\nsvm -- Accuracy: 0.747 / Precision: 0.672 / Recall: 0.6 / Latency: 4.5s\nmlp -- Accuracy: 0.787 / Precision: 0.745 / Recall: 0.631 / Latency: 1.0s\nrf -- Accuracy: 0.809 / Precision: 0.83 / Recall: 0.6 / Latency: 45.6s\nada -- Accuracy: 0.792 / Precision: 0.759 / Recall: 0.631 / Latency: 20.6s\ngb -- Accuracy: 0.815 / Precision: 0.796 / Recall: 0.662 / Latency: 3.2s\nxgb -- Accuracy: 0.815 / Precision: 0.786 / Recall: 0.677 / Latency: 2.0s\nlsvc -- Accuracy: 0.753 / Precision: 0.667 / Recall: 0.646 / Latency: 4.1s\nsgd -- Accuracy: 0.775 / Precision: 0.658 / Recall: 0.8 / Latency: 0.0s\nhgboost -- Accuracy: 0.815 / Precision: 0.82 / Recall: 0.631 / Latency: 9.1s\nrgcv -- Accuracy: 0.753 / Precision: 0.678 / Recall: 0.615 / Latency: 1.0s\n" } ], "source": [ "# Classification Demo \n", "import pandas as pd\n", "from optimalflow.autoCV import dynaClassifier,evaluate_model\n", "import joblib\n", "\n", "tr_features = pd.read_csv('./data/classification/train_features.csv')\n", "tr_labels = pd.read_csv('./data/classification/train_labels.csv')\n", "val_features = pd.read_csv('./data/classification/val_features.csv')\n", "val_labels = pd.read_csv('./data/classification/val_labels.csv')\n", "te_features = pd.read_csv('./data/classification/test_features.csv')\n", "te_labels = pd.read_csv('./data/classification/test_labels.csv')\n", "\n", "custom_ml = ['lgr','svm','mlp','rf','ada','gb','xgb','lsvc','sgd','hgboost','rgcv']\n", "\n", "clf_cv_demo = dynaClassifier(custom_estimators = custom_ml,random_state = 13,cv_num = 5)\n", "\n", "clf_cv_demo.fit(tr_features,tr_labels)\n", "\n", "models = {}\n", "\n", "for mdl in ['lgr','svm','mlp','rf','ada','gb','xgb','lsvc','sgd','hgboost','rgcv']:\n", " models[mdl] = joblib.load('./pkl/{}_clf_model.pkl'.format(mdl))\n", "\n", "for name, mdl in models.items():\n", " try:\n", " ml_evl = evaluate_model(model_type = \"cls\")\n", " ml_evl.fit(name, mdl, val_features, val_labels)\n", " except:\n", " print(f\"Failed to load the {mdl}.\")" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "tags": [] }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": "Now in Progress - Model Selection w/ Cross-validation: Estimate about 0.0 minutes left [####################] 100.0%\n\n *optimalflow* autoCV Module ===> rgcv_CrossValidation with 5 folds:\nBest Parameters: {'fit_intercept': 'False'}\n\nBest CV Score: 0.803456180567801\n\nlgr -- Accuracy: 0.775 / Precision: 0.712 / Recall: 0.646 / Latency: 3.0s\nsvm -- Accuracy: 0.747 / Precision: 0.672 / Recall: 0.6 / Latency: 4.4s\nmlp -- Accuracy: 0.77 / Precision: 0.7 / Recall: 0.646 / Latency: 5.6s\nrf -- Accuracy: 0.809 / Precision: 0.83 / Recall: 0.6 / Latency: 38.2s\nada -- Accuracy: 0.792 / Precision: 0.759 / Recall: 0.631 / Latency: 21.1s\ngb -- Accuracy: 0.815 / Precision: 0.796 / Recall: 0.662 / Latency: 4.0s\nxgb -- Accuracy: 0.82 / Precision: 0.811 / Recall: 0.662 / Latency: 2.0s\nlsvc -- Accuracy: 0.747 / Precision: 0.661 / Recall: 0.631 / Latency: 5.2s\nsgd -- Accuracy: 0.64 / Precision: 0.6 / Recall: 0.046 / Latency: 0.0s\nhgboost -- Accuracy: 0.815 / Precision: 0.82 / Recall: 0.631 / Latency: 5.3s\nrgcv -- Accuracy: 0.753 / Precision: 0.678 / Recall: 0.615 / Latency: 6.3s\n" } ], "source": [ "# fast Classification Demo \n", "import pandas as pd\n", "from optimalflow.autoCV import fastClassifier,evaluate_model\n", "import joblib\n", "\n", "tr_features = pd.read_csv('./data/classification/train_features.csv')\n", "tr_labels = pd.read_csv('./data/classification/train_labels.csv')\n", "val_features = pd.read_csv('./data/classification/val_features.csv')\n", "val_labels = pd.read_csv('./data/classification/val_labels.csv')\n", "te_features = pd.read_csv('./data/classification/test_features.csv')\n", "te_labels = pd.read_csv('./data/classification/test_labels.csv')\n", "\n", "custom_ml = ['lgr','svm','mlp','rf','ada','gb','xgb','lsvc','sgd','hgboost','rgcv']\n", "\n", "clf_cv_demo = fastClassifier(custom_estimators = custom_ml,random_state = 13,cv_num = 5,n_comb = 12)\n", "\n", "clf_cv_demo.fit(tr_features,tr_labels)\n", "\n", "models = {}\n", "\n", "for mdl in ['lgr','svm','mlp','rf','ada','gb','xgb','lsvc','sgd','hgboost','rgcv']:\n", " models[mdl] = joblib.load('./pkl/{}_clf_model.pkl'.format(mdl))\n", "\n", "for name, mdl in models.items():\n", " try:\n", " ml_evl = evaluate_model(model_type = \"cls\")\n", " ml_evl.fit(name, mdl, val_features, val_labels)\n", " except:\n", " print(f\"Failed to load the {mdl}.\")" ] } ] }