{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "name": "Tensorflow Model Analysis.ipynb", "provenance": [], "collapsed_sections": [] }, "kernelspec": { "display_name": "Python 3", "name": "python3" } }, "cells": [ { "cell_type": "code", "metadata": { "id": "TMTwnEtTHXE6" }, "source": [ "!pip install -U pip" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "dWTbGtU2HkZA" }, "source": [ "!pip install tensorflow-model-analysis" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "l2pryucSqLOS" }, "source": [ "!pip install comet_ml" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "tMFDWlcEHmEo" }, "source": [ "import getpass, os\n", "import comet_ml\n", "import tensorflow_model_analysis as tfma" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "yYhrTG1kffZB" }, "source": [ "os.environ[\"COMET_API_KEY\"] = getpass.getpass(\"Paste your COMET API KEY: \")" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "cViP4Q6_X323" }, "source": [ "# This setup was tested with TF 2.3 and TFMA 0.24 (using colab), but it should\n", "# also work with the latest release.\n", "import sys\n", "\n", "# Confirm that we're using Python 3\n", "assert sys.version_info.major==3, 'This notebook must be run using Python 3.'\n", "\n", "print('Installing TensorFlow')\n", "import tensorflow as tf\n", "print('TF version: {}'.format(tf.__version__))" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "qhnrUpHsWjlZ" }, "source": [ "# Get the Data\n", "\n", "import io, os, tempfile\n", "TAR_NAME = 'saved_models-2.2'\n", "BASE_DIR = tempfile.mkdtemp()\n", "DATA_DIR = os.path.join(BASE_DIR, TAR_NAME, 'data')\n", "MODELS_DIR = os.path.join(BASE_DIR, TAR_NAME, 'models')\n", "SCHEMA = os.path.join(BASE_DIR, TAR_NAME, 'schema.pbtxt')\n", "OUTPUT_DIR = os.path.join(BASE_DIR, 'output')\n", "MODEL_VERSION = '1'\n", "\n", "!curl -O https://storage.googleapis.com/artifacts.tfx-oss-public.appspot.com/datasets/{TAR_NAME}.tar\n", "!tar xf {TAR_NAME}.tar\n", "!mv {TAR_NAME} {BASE_DIR}\n", "!rm {TAR_NAME}.tar\n", "\n", "print(\"Here's what we downloaded:\")\n", "!ls -R {BASE_DIR}" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "PLRNn-H8Wmmq" }, "source": [ "# Setup Data Schema\n", "\n", "import tensorflow as tf\n", "from google.protobuf import text_format\n", "from tensorflow.python.lib.io import file_io\n", "from tensorflow_metadata.proto.v0 import schema_pb2\n", "from tensorflow.core.example import example_pb2\n", "\n", "schema = schema_pb2.Schema()\n", "contents = file_io.read_file_to_string(SCHEMA)\n", "schema = text_format.Parse(contents, schema)" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "93uAo29pW4dJ" }, "source": [ "# Encode Data to TFRecords format using the Schema\n", "\n", "import csv\n", "\n", "datafile = os.path.join(DATA_DIR, 'eval', 'data.csv')\n", "reader = csv.DictReader(open(datafile, 'r'))\n", "examples = []\n", "for line in reader:\n", " example = example_pb2.Example()\n", " for feature in schema.feature:\n", " key = feature.name\n", " if feature.type == schema_pb2.FLOAT:\n", " example.features.feature[key].float_list.value[:] = (\n", " [float(line[key])] if len(line[key]) > 0 else [])\n", " elif feature.type == schema_pb2.INT:\n", " example.features.feature[key].int64_list.value[:] = (\n", " [int(line[key])] if len(line[key]) > 0 else [])\n", " elif feature.type == schema_pb2.BYTES:\n", " example.features.feature[key].bytes_list.value[:] = (\n", " [line[key].encode('utf8')] if len(line[key]) > 0 else [])\n", " \n", " big_tipper = float(line['tips']) > float(line['fare']) * 0.2\n", " example.features.feature['big_tipper'].float_list.value[:] = [big_tipper]\n", " examples.append(example)\n", "\n", "tfrecord_file = os.path.join(BASE_DIR, 'train_data.rio')\n", "with tf.io.TFRecordWriter(tfrecord_file) as writer:\n", " for example in examples:\n", " writer.write(example.SerializeToString())\n", "\n", "!ls {tfrecord_file}" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "6q1RGsX5XAmy" }, "source": [ "import tensorflow_model_analysis as tfma\n", "\n", "# Setup tfma.EvalConfig settings\n", "keras_eval_config = text_format.Parse(\"\"\"\n", " ## Model information\n", " model_specs {\n", " # For keras (and serving models) we need to add a `label_key`.\n", " label_key: \"big_tipper\"\n", " }\n", "\n", " ## Post training metric information. These will be merged with any built-in\n", " ## metrics from training.\n", " metrics_specs {\n", " metrics { class_name: \"ExampleCount\" }\n", " metrics { class_name: \"BinaryAccuracy\" }\n", " metrics { class_name: \"BinaryCrossentropy\" }\n", " metrics { class_name: \"AUC\" }\n", " metrics { class_name: \"AUCPrecisionRecall\" }\n", " metrics { class_name: \"Precision\" }\n", " metrics { class_name: \"Recall\" }\n", " metrics { class_name: \"MeanLabel\" }\n", " metrics { class_name: \"MeanPrediction\" }\n", " metrics { class_name: \"Calibration\" }\n", " metrics { class_name: \"CalibrationPlot\" }\n", " metrics { class_name: \"ConfusionMatrixPlot\" }\n", " # ... add additional metrics and plots ...\n", " }\n", "\n", " ## Slicing information\n", " slicing_specs {} # overall slice\n", " slicing_specs {\n", " feature_keys: [\"trip_start_hour\"]\n", " }\n", " slicing_specs {\n", " feature_keys: [\"trip_start_day\"]\n", " }\n", " slicing_specs {\n", " feature_values: {\n", " key: \"trip_start_month\"\n", " value: \"1\"\n", " }\n", " }\n", " slicing_specs {\n", " feature_keys: [\"trip_start_hour\", \"trip_start_day\"]\n", " }\n", "\"\"\", tfma.EvalConfig())\n", "\n", "# Create a tfma.EvalSharedModel that points at our keras model.\n", "keras_model_path = os.path.join(MODELS_DIR, 'keras', MODEL_VERSION)\n", "keras_eval_shared_model = tfma.default_eval_shared_model(\n", " eval_saved_model_path=keras_model_path,\n", " eval_config=keras_eval_config)\n", "\n", "keras_output_path = os.path.join(OUTPUT_DIR, 'keras')\n", "\n", "# Run TFMA\n", "results = tfma.run_model_analysis(\n", " eval_shared_model=keras_eval_shared_model,\n", " eval_config=keras_eval_config,\n", " data_location=tfrecord_file,\n", " output_path=keras_output_path)" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "UuHkdVTzrJdg" }, "source": [ "%env COMET_PROJECT_NAME=tf-model-analysis\n", "%env COMET_AUTO_LOG_TFMA=1\n", "\n", "experiment = comet_ml.Experiment()\n", "experiment.log_parameter(\"model_version\", MODEL_VERSION)\n", "\n", "tfma.view.render_slicing_metrics(results)\n", "tfma.view.render_slicing_metrics(results, slicing_column='trip_start_day')\n", "tfma.view.render_plot(results)\n", "\n", "experiment.end()" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "XPtwDTlgsA19" }, "source": [ "" ], "execution_count": null, "outputs": [] } ] }