{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "name": "Tensorflow Model Analysis.ipynb",
      "provenance": [],
      "collapsed_sections": []
    },
    "kernelspec": {
      "display_name": "Python 3",
      "name": "python3"
    }
  },
  "cells": [
    {
      "cell_type": "code",
      "metadata": {
        "id": "TMTwnEtTHXE6"
      },
      "source": [
        "!pip install -U pip"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "dWTbGtU2HkZA"
      },
      "source": [
        "!pip install tensorflow-model-analysis"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "l2pryucSqLOS"
      },
      "source": [
        "!pip install comet_ml"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "tMFDWlcEHmEo"
      },
      "source": [
        "import getpass, os\n",
        "import comet_ml\n",
        "import tensorflow_model_analysis as tfma"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "yYhrTG1kffZB"
      },
      "source": [
        "os.environ[\"COMET_API_KEY\"] = getpass.getpass(\"Paste your COMET API KEY: \")"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "cViP4Q6_X323"
      },
      "source": [
        "# This setup was tested with TF 2.3 and TFMA 0.24 (using colab), but it should\n",
        "# also work with the latest release.\n",
        "import sys\n",
        "\n",
        "# Confirm that we're using Python 3\n",
        "assert sys.version_info.major==3, 'This notebook must be run using Python 3.'\n",
        "\n",
        "print('Installing TensorFlow')\n",
        "import tensorflow as tf\n",
        "print('TF version: {}'.format(tf.__version__))"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "qhnrUpHsWjlZ"
      },
      "source": [
        "# Get the Data\n",
        "\n",
        "import io, os, tempfile\n",
        "TAR_NAME = 'saved_models-2.2'\n",
        "BASE_DIR = tempfile.mkdtemp()\n",
        "DATA_DIR = os.path.join(BASE_DIR, TAR_NAME, 'data')\n",
        "MODELS_DIR = os.path.join(BASE_DIR, TAR_NAME, 'models')\n",
        "SCHEMA = os.path.join(BASE_DIR, TAR_NAME, 'schema.pbtxt')\n",
        "OUTPUT_DIR = os.path.join(BASE_DIR, 'output')\n",
        "MODEL_VERSION = '1'\n",
        "\n",
        "!curl -O https://storage.googleapis.com/artifacts.tfx-oss-public.appspot.com/datasets/{TAR_NAME}.tar\n",
        "!tar xf {TAR_NAME}.tar\n",
        "!mv {TAR_NAME} {BASE_DIR}\n",
        "!rm {TAR_NAME}.tar\n",
        "\n",
        "print(\"Here's what we downloaded:\")\n",
        "!ls -R {BASE_DIR}"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "PLRNn-H8Wmmq"
      },
      "source": [
        "# Setup Data Schema\n",
        "\n",
        "import tensorflow as tf\n",
        "from google.protobuf import text_format\n",
        "from tensorflow.python.lib.io import file_io\n",
        "from tensorflow_metadata.proto.v0 import schema_pb2\n",
        "from tensorflow.core.example import example_pb2\n",
        "\n",
        "schema = schema_pb2.Schema()\n",
        "contents = file_io.read_file_to_string(SCHEMA)\n",
        "schema = text_format.Parse(contents, schema)"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "93uAo29pW4dJ"
      },
      "source": [
        "# Encode Data to TFRecords format using the Schema\n",
        "\n",
        "import csv\n",
        "\n",
        "datafile = os.path.join(DATA_DIR, 'eval', 'data.csv')\n",
        "reader = csv.DictReader(open(datafile, 'r'))\n",
        "examples = []\n",
        "for line in reader:\n",
        "  example = example_pb2.Example()\n",
        "  for feature in schema.feature:\n",
        "    key = feature.name\n",
        "    if feature.type == schema_pb2.FLOAT:\n",
        "      example.features.feature[key].float_list.value[:] = (\n",
        "          [float(line[key])] if len(line[key]) > 0 else [])\n",
        "    elif feature.type == schema_pb2.INT:\n",
        "      example.features.feature[key].int64_list.value[:] = (\n",
        "          [int(line[key])] if len(line[key]) > 0 else [])\n",
        "    elif feature.type == schema_pb2.BYTES:\n",
        "      example.features.feature[key].bytes_list.value[:] = (\n",
        "          [line[key].encode('utf8')] if len(line[key]) > 0 else [])\n",
        "  \n",
        "  big_tipper = float(line['tips']) > float(line['fare']) * 0.2\n",
        "  example.features.feature['big_tipper'].float_list.value[:] = [big_tipper]\n",
        "  examples.append(example)\n",
        "\n",
        "tfrecord_file = os.path.join(BASE_DIR, 'train_data.rio')\n",
        "with tf.io.TFRecordWriter(tfrecord_file) as writer:\n",
        "  for example in examples:\n",
        "    writer.write(example.SerializeToString())\n",
        "\n",
        "!ls {tfrecord_file}"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "6q1RGsX5XAmy"
      },
      "source": [
        "import tensorflow_model_analysis as tfma\n",
        "\n",
        "# Setup tfma.EvalConfig settings\n",
        "keras_eval_config = text_format.Parse(\"\"\"\n",
        "  ## Model information\n",
        "  model_specs {\n",
        "    # For keras (and serving models) we need to add a `label_key`.\n",
        "    label_key: \"big_tipper\"\n",
        "  }\n",
        "\n",
        "  ## Post training metric information. These will be merged with any built-in\n",
        "  ## metrics from training.\n",
        "  metrics_specs {\n",
        "    metrics { class_name: \"ExampleCount\" }\n",
        "    metrics { class_name: \"BinaryAccuracy\" }\n",
        "    metrics { class_name: \"BinaryCrossentropy\" }\n",
        "    metrics { class_name: \"AUC\" }\n",
        "    metrics { class_name: \"AUCPrecisionRecall\" }\n",
        "    metrics { class_name: \"Precision\" }\n",
        "    metrics { class_name: \"Recall\" }\n",
        "    metrics { class_name: \"MeanLabel\" }\n",
        "    metrics { class_name: \"MeanPrediction\" }\n",
        "    metrics { class_name: \"Calibration\" }\n",
        "    metrics { class_name: \"CalibrationPlot\" }\n",
        "    metrics { class_name: \"ConfusionMatrixPlot\" }\n",
        "    # ... add additional metrics and plots ...\n",
        "  }\n",
        "\n",
        "  ## Slicing information\n",
        "  slicing_specs {}  # overall slice\n",
        "  slicing_specs {\n",
        "    feature_keys: [\"trip_start_hour\"]\n",
        "  }\n",
        "  slicing_specs {\n",
        "    feature_keys: [\"trip_start_day\"]\n",
        "  }\n",
        "  slicing_specs {\n",
        "    feature_values: {\n",
        "      key: \"trip_start_month\"\n",
        "      value: \"1\"\n",
        "    }\n",
        "  }\n",
        "  slicing_specs {\n",
        "    feature_keys: [\"trip_start_hour\", \"trip_start_day\"]\n",
        "  }\n",
        "\"\"\", tfma.EvalConfig())\n",
        "\n",
        "# Create a tfma.EvalSharedModel that points at our keras model.\n",
        "keras_model_path = os.path.join(MODELS_DIR, 'keras', MODEL_VERSION)\n",
        "keras_eval_shared_model = tfma.default_eval_shared_model(\n",
        "    eval_saved_model_path=keras_model_path,\n",
        "    eval_config=keras_eval_config)\n",
        "\n",
        "keras_output_path = os.path.join(OUTPUT_DIR, 'keras')\n",
        "\n",
        "# Run TFMA\n",
        "results = tfma.run_model_analysis(\n",
        "    eval_shared_model=keras_eval_shared_model,\n",
        "    eval_config=keras_eval_config,\n",
        "    data_location=tfrecord_file,\n",
        "    output_path=keras_output_path)"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "UuHkdVTzrJdg"
      },
      "source": [
        "%env COMET_PROJECT_NAME=tf-model-analysis\n",
        "%env COMET_AUTO_LOG_TFMA=1\n",
        "\n",
        "experiment = comet_ml.Experiment()\n",
        "experiment.log_parameter(\"model_version\", MODEL_VERSION)\n",
        "\n",
        "tfma.view.render_slicing_metrics(results)\n",
        "tfma.view.render_slicing_metrics(results, slicing_column='trip_start_day')\n",
        "tfma.view.render_plot(results)\n",
        "\n",
        "experiment.end()"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "XPtwDTlgsA19"
      },
      "source": [
        ""
      ],
      "execution_count": null,
      "outputs": []
    }
  ]
}