{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Notebook setup" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "%reload_ext autoreload\n", "%autoreload 2\n", "%matplotlib inline" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "from fastai import *\n", "from fastai.torch_core import *\n", "from fastai.vision import *\n", "from fastai.metrics import *\n", "from torchvision.models import resnet18" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "import tensorflow as tf" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [], "source": [ "from io import BytesIO" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "path = untar_data(URLs.MNIST_TINY)\n", "data = ImageDataBunch.from_folder(path)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# TBLogger" ] }, { "cell_type": "code", "execution_count": 46, "metadata": {}, "outputs": [], "source": [ "# From https://gist.github.com/gyglim/1f8dfb1b5c82627ae3efcfbbadb9f514\n", "\"\"\"Simple example on how to log scalars and images to tensorboard without tensor ops.\n", "License: Copyleft\n", "\"\"\"\n", "#__author__ = \"Michael Gygli\"\n", "\n", "#import tensorflow as tf\n", "#from StringIO import StringIO\n", "#import matplotlib.pyplot as plt\n", "#import numpy as np\n", "\n", "class Logger(object):\n", " \"\"\"Logging in tensorboard without tensorflow ops.\"\"\"\n", "\n", " def __init__(self, log_dir):\n", " \"\"\"Creates a summary writer logging to log_dir.\"\"\"\n", " self.writer = tf.summary.FileWriter(log_dir)\n", "\n", " def log_scalar(self, tag, value, step):\n", " \"\"\"Log a scalar variable.\n", " Parameter\n", " ----------\n", " tag : basestring\n", " Name of the scalar\n", " value\n", " step : int\n", " training iteration\n", " \"\"\"\n", " summary = tf.Summary(value=[tf.Summary.Value(tag=tag,\n", " simple_value=value)])\n", " self.writer.add_summary(summary, step)\n", "\n", " def log_images(self, tag, images, step):\n", " \"\"\"Logs a list of images.\"\"\"\n", "\n", " im_summaries = []\n", " for nr, img in enumerate(images):\n", " # Write the image to a string\n", " s = StringIO()\n", " plt.imsave(s, img, format='png')\n", "\n", " # Create an Image object\n", " img_sum = tf.Summary.Image(encoded_image_string=s.getvalue(),\n", " height=img.shape[0],\n", " width=img.shape[1])\n", " # Create a Summary value\n", " im_summaries.append(tf.Summary.Value(tag='%s/%d' % (tag, nr),\n", " image=img_sum))\n", "\n", " # Create and write Summary\n", " summary = tf.Summary(value=im_summaries)\n", " self.writer.add_summary(summary, step)\n", " \n", "\n", " def log_histogram(self, tag, values, step, bins=1000):\n", " \"\"\"Logs the histogram of a list/vector of values.\"\"\"\n", " # Convert to a numpy array\n", " values = np.array(values)\n", " \n", " # Create histogram using numpy \n", " counts, bin_edges = np.histogram(values, bins=bins)\n", "\n", " # Fill fields of histogram proto\n", " hist = tf.HistogramProto()\n", " hist.min = float(np.min(values))\n", " hist.max = float(np.max(values))\n", " hist.num = int(np.prod(values.shape))\n", " hist.sum = float(np.sum(values))\n", " hist.sum_squares = float(np.sum(values**2))\n", "\n", " # Requires equal number as bins, where the first goes from -DBL_MAX to bin_edges[1]\n", " # See https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/summary.proto#L30\n", " # Thus, we drop the start of the first bin\n", " bin_edges = bin_edges[1:]\n", "\n", " # Add bin edges and counts\n", " for edge in bin_edges:\n", " hist.bucket_limit.append(edge)\n", " for c in counts:\n", " hist.bucket.append(c)\n", "\n", " # Create and write Summary\n", " summary = tf.Summary(value=[tf.Summary.Value(tag=tag, histo=hist)])\n", " self.writer.add_summary(summary, step)\n", " self.writer.flush()" ] }, { "cell_type": "code", "execution_count": 86, "metadata": {}, "outputs": [], "source": [ "\"A `Callback` that saves tracked metrics into a log file for Tensorboard.\"\n", "# Based on https://gist.github.com/gyglim/1f8dfb1b5c82627ae3efcfbbadb9f514\n", "# and devforfu: https://nbviewer.jupyter.org/gist/devforfu/ea0b3fcfe194dad323c3762492b05cae\n", "# Contribution from MicPie\n", "\n", "#from ..torch_core import *\n", "#from ..basic_data import DataBunch\n", "#from ..callback import *\n", "#from ..basic_train import Learner, LearnerCallback\n", "#import tensorflow as tf\n", "\n", "__all__ = ['TBLogger']\n", "\n", "@dataclass\n", "class TBLogger(LearnerCallback):\n", " \"A `LearnerCallback` that saves history of metrics while training `learn` into log files for Tensorboard.\"\n", " \n", " log_dir:str = 'logs'\n", " log_name:str = 'data'\n", " log_scalar:bool = True # log scalar values for Tensorboard scalar summary\n", " log_hist:bool = True # log values and gradients of the parameters for Tensorboard histogram summary\n", " log_img:bool = False # log values for Tensorboard image summary\n", "\n", " def __post_init__(self): \n", " super().__post_init__()\n", " #def __init__(self):\n", " # super().__init__()\n", " self.path = self.learn.path\n", " (self.path/self.log_dir).mkdir(parents=True, exist_ok=True) # setup logs directory\n", " self.Log = Logger(str(self.path/self.log_dir/self.log_name))\n", " self.epoch = 0\n", " self.batch = 0\n", " self.log_grads = {}\n", " \n", " def on_backward_end(self, **kwargs:Any):\n", " self.batch = self.batch+1\n", " #print('\\nBatch: ',self.batch)\n", " \n", " if self.log_hist:\n", " for tag, value in learn.model.named_parameters():\n", " tag_grad = tag.replace('.', '/')+'/grad'\n", " \n", " if tag_grad in self.log_grads:\n", " #self.log_grads[tag_grad] += value.grad.data.cpu().detach().numpy()\n", " self.log_grads[tag_grad] = self.log_grads[tag_grad] + value.grad.data.cpu().detach().numpy() # gradients are summed up from every batch\n", " #print('if')\n", " else:\n", " self.log_grads[tag_grad] = value.grad.data.cpu().detach().numpy()\n", " #print('else')\n", " \n", " #print(tag_grad, self.log_grads[tag_grad].sum())\n", " return self.log_grads\n", " \n", " #def on_step_end(self, **kwards:Any):\n", " #print('Step end: ', self.log_grads)\n", "\n", " def on_epoch_end(self, epoch:int, smooth_loss:Tensor, last_metrics:MetricsList, **kwargs:Any) -> bool:\n", " last_metrics = ifnone(last_metrics, [])\n", " tr_info = {name: stat for name, stat in zip(self.learn.recorder.names, [epoch, smooth_loss] + last_metrics)}\n", " self.epoch = tr_info['epoch']\n", " self.batch = 0 # reset batch count\n", " #print('\\nEpoch: ',self.epoch)\n", " \n", " if self.log_scalar:\n", " for tag, value in tr_info.items():\n", " if tag == 'epoch': continue\n", " self.Log.log_scalar(tag, value, self.epoch+1)\n", " \n", " if self.log_hist:\n", " for tag, value in learn.model.named_parameters():\n", " \n", " tag = tag.replace('.', '/')\n", " self.Log.log_histogram(tag, value.data.cpu().numpy(), self.epoch+1)\n", " \n", " tag_grad = tag.replace('.', '/')+'/grad'\n", " self.Log.log_histogram(tag_grad, self.log_grads[tag_grad], self.epoch+1)\n", " #print(tag_grad, self.log_grads[tag_grad].sum())\n", " \n", " #if self.log_img:\n", " # for tag, value in learn.model.named_parameters():\n", " # \n", " # tag = tag.replace('.', '/')\n", " # self.Log.log_images(tag, value.data.cpu().numpy(), self.epoch+1)" ] }, { "cell_type": "code", "execution_count": 87, "metadata": {}, "outputs": [], "source": [ "# If you want to save the log files in a special directory use partial (default directory is 'data'):\n", "#TB = partial(TBLogger, log_name='name')" ] }, { "cell_type": "code", "execution_count": 88, "metadata": {}, "outputs": [], "source": [ "learn = Learner(data, simple_cnn((3, 16, 16, 2)), metrics=[accuracy, error_rate], callback_fns=[TBLogger])" ] }, { "cell_type": "code", "execution_count": 89, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Total time: 00:14\n", "epoch train_loss valid_loss accuracy error_rate\n", "1 0.688752 0.674943 0.741059 0.258941 (00:03)\n", "2 0.650126 0.518507 0.915594 0.084406 (00:01)\n", "3 0.539764 0.203503 0.942775 0.057225 (00:01)\n", "4 0.420268 0.148786 0.948498 0.051502 (00:01)\n", "5 0.350732 0.134421 0.942775 0.057225 (00:01)\n", "6 0.315148 0.123754 0.955651 0.044349 (00:01)\n", "7 0.277169 0.139031 0.945637 0.054363 (00:01)\n", "8 0.250696 0.121647 0.957082 0.042918 (00:01)\n", "9 0.229372 0.119526 0.961373 0.038627 (00:01)\n", "10 0.211137 0.119364 0.961373 0.038627 (00:01)\n", "\n" ] } ], "source": [ "learn.fit_one_cycle(10)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# https://github.com/yunjey/pytorch-tutorial/blob/master/tutorials/04-utils/tensorboard/logger.py ?" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# https://github.com/lanpa/tensorboardX ??? ??? ???" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# To start Tensorboard run the following command in the directoy of the log file folder:\n", "# tensorboard --logdir=./logs\n", "# Then open localhost:6006 if you are on your local machine" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.6" } }, "nbformat": 4, "nbformat_minor": 2 }