{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "name": "Vega + Comet.ipynb", "provenance": [], "collapsed_sections": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" } }, "cells": [ { "cell_type": "code", "metadata": { "id": "Tx5C-o8hy0y6" }, "source": [ "!pip install comet_ml" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "vIofsUO4zQdh" }, "source": [ "import comet_ml\n", "import getpass, os\n", "os.environ[\"COMET_API_KEY\"] = getpass.getpass(\"Paste your COMET API KEY: \")" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "SuF1W-4lAvn-" }, "source": [ "from sklearn.datasets import load_wine as load_data" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "Qd8TTfwrAzWA" }, "source": [ "dataset = load_data()" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "WlVUGqViA9GD" }, "source": [ "X, y = dataset.data, dataset.target\n", "featurecols = dataset.feature_names" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "b6OnxD2PCAqr" }, "source": [ "import pandas as pd\n", "\n", "df = pd.DataFrame(X, columns=featurecols)\n", "df[\"target\"] = y" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "2KsTEoywGFt3" }, "source": [ "df.head()" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "ork4vYk1bA1Z" }, "source": [ "experiment = comet_ml.Experiment(project_name=\"comet-vega\")\n", "experiment.add_tag(\"dataset\")\n", "experiment.log_table(\"wine.json\", df, headers=False, **{\"orient\": \"records\"})\n", "experiment.end()" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "shnDFgEdiMkw" }, "source": [ "df.shape" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "VTdFWCmfWjc4" }, "source": [ "from sklearn.ensemble import RandomForestClassifier\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.metrics import classification_report\n", "from sklearn.preprocessing import OneHotEncoder" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "XfCq1DRajftB" }, "source": [ "y.shape" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "EEpBfvyPavrv" }, "source": [ "RANDOM_STATE = 1\n", "\n", "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=RANDOM_STATE)" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "WawyXybEjvyW" }, "source": [ "y_test.shape" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "reLHUCNJWwfp" }, "source": [ "optimizer_config = {\n", " # We pick the Bayes algorithm:\n", " \"algorithm\": \"random\",\n", "\n", " # Declare your hyperparameters in the Vizier-inspired format:\n", " \"parameters\": {\n", " \"n_estimators\": {\"type\": \"discrete\", \"values\": [10, 100, 500]},\n", " \"max_depth\": {\"type\": \"discrete\", \"values\": [4, 6, 8]}\n", " },\n", "\n", " # Declare what we will be optimizing, and how:\n", " \"spec\": {\n", " \"metric\": \"accuracy\",\n", " \"objective\": \"maximize\",\n", " },\n", "}\n", "optimizer = comet_ml.Optimizer(optimizer_config)" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "Y6ghfoVob8Gx" }, "source": [ "def create_feature_importance_df(model, feature_names):\n", " output = pd.DataFrame()\n", " importances = model.feature_importances_\n", " \n", " output['feature_name'] = feature_names\n", " output['feature_importance'] = importances\n", " \n", " return output" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "oMlJU0r3ZK2a" }, "source": [ "feature_importance_df = pd.DataFrame()\n", "\n", "for experiment in optimizer.get_experiments(project_name=\"comet-vega\"): \n", " model = RandomForestClassifier(\n", " random_state=RANDOM_STATE, \n", " max_depth=experiment.get_parameter(\"max_depth\"), \n", " n_estimators=experiment.get_parameter(\"n_estimators\"))\n", " \n", " model.fit(X_train, y_train)\n", " predictions = model.predict(X_test)\n", "\n", " report = classification_report(y_test, predictions, output_dict=True)\n", " for k, v in report.items():\n", " if isinstance(v, dict):\n", " experiment.log_metrics(v, prefix=f\"label_{k}\")\n", " else:\n", " experiment.log_metric(k, v)\n", " \n", " feature_importance = create_feature_importance_df(model, featurecols)\n", " experiment.log_table(\"importance.json\", feature_importance, headers=False, **{\"orient\": \"records\"})" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "Q6OVXUrvqyCk" }, "source": [ "" ], "execution_count": null, "outputs": [] } ] }