{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#|hide\n", "#| eval: false\n", "! [ -e /content ] && pip install -Uqq fastai # upgrade fastai on colab" ] }, { "cell_type": "raw", "metadata": {}, "source": [ "---\n", "skip_exec: true\n", "---" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#|export\n", "from __future__ import annotations\n", "import tempfile\n", "from fastai.basics import *\n", "from fastai.learner import Callback" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#|hide\n", "from nbdev.showdoc import *" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#|default_exp callback.neptune" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Neptune.ai\n", "\n", "> Integration with [neptune.ai](https://www.neptune.ai).\n", "\n", "> [Track fastai experiments](https://ui.neptune.ai/o/neptune-ai/org/fastai-integration) like in this example project." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Registration" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "1. Create account: [neptune.ai/register](https://neptune.ai/register).\n", "2. Export API token to the environment variable (more help [here](https://docs.neptune.ai/python-api/tutorials/get-started.html#copy-api-token)). In your terminal run:\n", "\n", "```\n", "export NEPTUNE_API_TOKEN='YOUR_LONG_API_TOKEN'\n", "```\n", "\n", "or append the command above to your `~/.bashrc` or `~/.bash_profile` files (**recommended**). More help is [here](https://docs.neptune.ai/python-api/tutorials/get-started.html#copy-api-token)." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Installation" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "1. You need to install neptune-client. In your terminal run:\n", "\n", "```\n", "pip install neptune-client\n", "```\n", "\n", "or (alternative installation using conda). In your terminal run:\n", "\n", "```\n", "conda install neptune-client -c conda-forge\n", "```\n", "2. Install [psutil](https://psutil.readthedocs.io/en/latest/) to see hardware monitoring charts:\n", "\n", "```\n", "pip install psutil\n", "```" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## How to use?" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Key is to call `neptune.init()` before you create `Learner()` and call `neptune_create_experiment()`, before you fit the model.\n", "\n", "Use `NeptuneCallback` in your `Learner`, like this:\n", "\n", "```\n", "from fastai.callback.neptune import NeptuneCallback\n", "\n", "neptune.init('USERNAME/PROJECT_NAME') # specify project\n", "\n", "learn = Learner(dls, model,\n", " cbs=NeptuneCallback()\n", " )\n", "\n", "neptune.create_experiment() # start experiment\n", "learn.fit_one_cycle(1)\n", "```" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#|export\n", "import neptune" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#|export\n", "class NeptuneCallback(Callback):\n", " \"Log losses, metrics, model weights, model architecture summary to neptune\"\n", " order = Recorder.order+1\n", " def __init__(self, log_model_weights=True, keep_experiment_running=False):\n", " self.log_model_weights = log_model_weights\n", " self.keep_experiment_running = keep_experiment_running\n", " self.experiment = None\n", "\n", " if neptune.project is None:\n", " raise ValueError('You did not initialize project in neptune.\\n',\n", " 'Please invoke `neptune.init(\"USERNAME/PROJECT_NAME\")` before this callback.')\n", "\n", " def before_fit(self):\n", " try:\n", " self.experiment = neptune.get_experiment()\n", " except ValueError:\n", " print('No active experiment. Please invoke `neptune.create_experiment()` before this callback.')\n", "\n", " try:\n", " self.experiment.set_property('n_epoch', str(self.learn.n_epoch))\n", " self.experiment.set_property('model_class', str(type(self.learn.model)))\n", " except: print(f'Did not log all properties. Check properties in the {neptune.get_experiment()}.')\n", "\n", " try:\n", " with tempfile.NamedTemporaryFile(mode='w') as f:\n", " with open(f.name, 'w') as g: g.write(repr(self.learn.model))\n", " self.experiment.log_artifact(f.name, 'model_summary.txt')\n", " except: print('Did not log model summary. Check if your model is PyTorch model.')\n", "\n", " if self.log_model_weights and not hasattr(self.learn, 'save_model'):\n", " print('Unable to log model to Neptune.\\n',\n", " 'Use \"SaveModelCallback\" to save model checkpoints that will be logged to Neptune.')\n", "\n", " def after_batch(self):\n", " # log loss and opt.hypers\n", " if self.learn.training:\n", " self.experiment.log_metric('batch__smooth_loss', self.learn.smooth_loss)\n", " self.experiment.log_metric('batch__loss', self.learn.loss)\n", " self.experiment.log_metric('batch__train_iter', self.learn.train_iter)\n", " for i, h in enumerate(self.learn.opt.hypers):\n", " for k, v in h.items(): self.experiment.log_metric(f'batch__opt.hypers.{k}', v)\n", "\n", " def after_epoch(self):\n", " # log metrics\n", " for n, v in zip(self.learn.recorder.metric_names, self.learn.recorder.log):\n", " if n not in ['epoch', 'time']: self.experiment.log_metric(f'epoch__{n}', v)\n", " if n == 'time': self.experiment.log_text(f'epoch__{n}', str(v))\n", "\n", " # log model weights\n", " if self.log_model_weights and hasattr(self.learn, 'save_model'):\n", " if self.learn.save_model.every_epoch:\n", " _file = join_path_file(f'{self.learn.save_model.fname}_{self.learn.save_model.epoch}',\n", " self.learn.path / self.learn.model_dir, ext='.pth')\n", " else:\n", " _file = join_path_file(self.learn.save_model.fname,\n", " self.learn.path / self.learn.model_dir, ext='.pth')\n", " self.experiment.log_artifact(_file)\n", "\n", " def after_fit(self):\n", " if not self.keep_experiment_running:\n", " try: self.experiment.stop()\n", " except: print('No neptune experiment to stop.')\n", " else:\n", " print(f'Your experiment (id: {self.experiment.id}, name: {self.experiment.name}) is left in the running state.\\n',\n", " 'You can log more data to it, like this: `neptune.log_metric()`')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/markdown": [ "---\n", "\n", "### NeptuneCallback\n", "\n", "> NeptuneCallback (log_model_weights=True, keep_experiment_running=False)\n", "\n", "Log losses, metrics, model weights, model architecture summary to neptune" ], "text/plain": [ "" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "show_doc(NeptuneCallback)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "python3", "language": "python", "name": "python3" } }, "nbformat": 4, "nbformat_minor": 4 }