{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "name": "stylegan2colab.ipynb", "provenance": [], "include_colab_link": true }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "accelerator": "GPU" }, "cells": [ { "cell_type": "markdown", "metadata": { "id": "view-in-github", "colab_type": "text" }, "source": [ "\"Open" ] }, { "cell_type": "markdown", "metadata": { "id": "txN4BBJyl3f1", "colab_type": "text" }, "source": [ "Setup:\n", "\n", "1) Make sure GPU is enabled, go to edit->notebook settings->Hardware Accelerator GPU\n", "\n", "2) make a copy to your google drive, click on copy to drive in panel\n", "\n", "Note: colab will reset after 12 hours make sure to save your model checkpoints to google drive, then go to runtime->Factory reset runtime. Now copy your train model back into colab and start training again from the previous checkpoint.\n" ] }, { "cell_type": "code", "metadata": { "id": "gBBJvuEKYoDZ", "colab_type": "code", "colab": {} }, "source": [ "!git clone https://github.com/NVlabs/stylegan2.git" ], "execution_count": 0, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "OLmRaFyBZD_W", "colab_type": "code", "colab": {} }, "source": [ "cd stylegan2" ], "execution_count": 0, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "gsyrCYg2mRJ0", "colab_type": "text" }, "source": [ "check you are on a p100, if not do a factory reset runtime" ] }, { "cell_type": "code", "metadata": { "id": "TBW4hj9zYtxp", "colab_type": "code", "colab": {} }, "source": [ "!nvidia-smi" ], "execution_count": 0, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "9Y4nW7qEY6KA", "colab_type": "code", "colab": {} }, "source": [ "!nvcc test_nvcc.cu -o test_nvcc -run" ], "execution_count": 0, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "xNn4N5Vkh_JP", "colab_type": "text" }, "source": [ "get your dataset or use kaggle to fetch a dataset" ] }, { "cell_type": "code", "metadata": { "id": "_h4RZkSJiCYE", "colab_type": "code", "colab": {} }, "source": [ "import os\n", "os.environ['KAGGLE_USERNAME'] = \"xxxxxx\" # username from the json file\n", "os.environ['KAGGLE_KEY'] = \"xxxxxxxxxxxxxxxxxxxxxxxxxxxx\" # key from the json file\n", "!kaggle datasets download -d user/dataset-name" ], "execution_count": 0, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "ZlhgfF1UUF54", "colab_type": "code", "colab": {} }, "source": [ "!pip install Augmentor" ], "execution_count": 0, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "yaYCf-u_UT8E", "colab_type": "code", "colab": {} }, "source": [ "!mkdir outputresized" ], "execution_count": 0, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "wTmpqrcgnyEs", "colab_type": "text" }, "source": [ "restart runtime for augmentor " ] }, { "cell_type": "code", "metadata": { "id": "d6xINd3ZUNIj", "colab_type": "code", "colab": {} }, "source": [ "import Augmentor\n", "p = Augmentor.Pipeline(\"/path/to/dataset/\", \"/content/stylegan2/outputresized/\", save_format=\"PNG\")" ], "execution_count": 0, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "wJy-i8QqVNJc", "colab_type": "code", "colab": {} }, "source": [ "p.resize(probability=1.0, width=256, height=256)" ], "execution_count": 0, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "3d8LlW5UVRNE", "colab_type": "code", "colab": {} }, "source": [ "p.process()" ], "execution_count": 0, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "z_nT6SKeZrO5", "colab_type": "code", "colab": {} }, "source": [ "from fastai.vision import *\n", "verify_images('/content/stylegan2/outputresized/', delete=True)" ], "execution_count": 0, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "ZB_nVogEOym6", "colab_type": "code", "colab": {} }, "source": [ "cd stylegan2/" ], "execution_count": 0, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "x4E3BsjPZ7q_", "colab_type": "code", "colab": {} }, "source": [ "!python dataset_tool.py create_from_images /content/stylegan2/paintingtfrecord/ /content/stylegan2/outputresized/" ], "execution_count": 0, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "OEKRePREZ_ps", "colab_type": "code", "colab": {} }, "source": [ "cd training/" ], "execution_count": 0, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "8czKgqgeaBjV", "colab_type": "code", "colab": {} }, "source": [ "%%writefile training_loop.py\n", "# Copyright (c) 2019, NVIDIA Corporation. All rights reserved.\n", "#\n", "# This work is made available under the Nvidia Source Code License-NC.\n", "# To view a copy of this license, visit\n", "# https://nvlabs.github.io/stylegan2/license.html\n", "\n", "\"\"\"Main training script.\"\"\"\n", "\n", "import numpy as np\n", "import tensorflow as tf\n", "import dnnlib\n", "import dnnlib.tflib as tflib\n", "from dnnlib.tflib.autosummary import autosummary\n", "\n", "from training import dataset\n", "from training import misc\n", "from metrics import metric_base\n", "\n", "#----------------------------------------------------------------------------\n", "# Just-in-time processing of training images before feeding them to the networks.\n", "\n", "def process_reals(x, labels, lod, mirror_augment, drange_data, drange_net):\n", " with tf.name_scope('DynamicRange'):\n", " x = tf.cast(x, tf.float32)\n", " x = misc.adjust_dynamic_range(x, drange_data, drange_net)\n", " if mirror_augment:\n", " with tf.name_scope('MirrorAugment'):\n", " x = tf.where(tf.random_uniform([tf.shape(x)[0]]) < 0.5, x, tf.reverse(x, [3]))\n", " with tf.name_scope('FadeLOD'): # Smooth crossfade between consecutive levels-of-detail.\n", " s = tf.shape(x)\n", " y = tf.reshape(x, [-1, s[1], s[2]//2, 2, s[3]//2, 2])\n", " y = tf.reduce_mean(y, axis=[3, 5], keepdims=True)\n", " y = tf.tile(y, [1, 1, 1, 2, 1, 2])\n", " y = tf.reshape(y, [-1, s[1], s[2], s[3]])\n", " x = tflib.lerp(x, y, lod - tf.floor(lod))\n", " with tf.name_scope('UpscaleLOD'): # Upscale to match the expected input/output size of the networks.\n", " s = tf.shape(x)\n", " factor = tf.cast(2 ** tf.floor(lod), tf.int32)\n", " x = tf.reshape(x, [-1, s[1], s[2], 1, s[3], 1])\n", " x = tf.tile(x, [1, 1, 1, factor, 1, factor])\n", " x = tf.reshape(x, [-1, s[1], s[2] * factor, s[3] * factor])\n", " return x, labels\n", "\n", "#----------------------------------------------------------------------------\n", "# Evaluate time-varying training parameters.\n", "\n", "def training_schedule(\n", " cur_nimg,\n", " training_set,\n", " lod_initial_resolution = None, # Image resolution used at the beginning.\n", " lod_training_kimg = 600, # Thousands of real images to show before doubling the resolution.\n", " lod_transition_kimg = 600, # Thousands of real images to show when fading in new layers.\n", " minibatch_size_base = 32, # Global minibatch size.\n", " minibatch_size_dict = {}, # Resolution-specific overrides.\n", " minibatch_gpu_base = 4, # Number of samples processed at a time by one GPU.\n", " minibatch_gpu_dict = {}, # Resolution-specific overrides.\n", " G_lrate_base = 0.002, # Learning rate for the generator.\n", " G_lrate_dict = {}, # Resolution-specific overrides.\n", " D_lrate_base = 0.002, # Learning rate for the discriminator.\n", " D_lrate_dict = {}, # Resolution-specific overrides.\n", " lrate_rampup_kimg = 0, # Duration of learning rate ramp-up.\n", " tick_kimg_base = 4, # Default interval of progress snapshots.\n", " tick_kimg_dict = {8:28, 16:24, 32:20, 64:16, 128:12, 256:8, 512:6, 1024:4}): # Resolution-specific overrides.\n", "\n", " # Initialize result dict.\n", " s = dnnlib.EasyDict()\n", " s.kimg = cur_nimg / 1000.0\n", "\n", " # Training phase.\n", " phase_dur = lod_training_kimg + lod_transition_kimg\n", " phase_idx = int(np.floor(s.kimg / phase_dur)) if phase_dur > 0 else 0\n", " phase_kimg = s.kimg - phase_idx * phase_dur\n", "\n", " # Level-of-detail and resolution.\n", " if lod_initial_resolution is None:\n", " s.lod = 0.0\n", " else:\n", " s.lod = training_set.resolution_log2\n", " s.lod -= np.floor(np.log2(lod_initial_resolution))\n", " s.lod -= phase_idx\n", " if lod_transition_kimg > 0:\n", " s.lod -= max(phase_kimg - lod_training_kimg, 0.0) / lod_transition_kimg\n", " s.lod = max(s.lod, 0.0)\n", " s.resolution = 2 ** (training_set.resolution_log2 - int(np.floor(s.lod)))\n", "\n", " # Minibatch size.\n", " s.minibatch_size = minibatch_size_dict.get(s.resolution, minibatch_size_base)\n", " s.minibatch_gpu = minibatch_gpu_dict.get(s.resolution, minibatch_gpu_base)\n", "\n", " # Learning rate.\n", " s.G_lrate = G_lrate_dict.get(s.resolution, G_lrate_base)\n", " s.D_lrate = D_lrate_dict.get(s.resolution, D_lrate_base)\n", " if lrate_rampup_kimg > 0:\n", " rampup = min(s.kimg / lrate_rampup_kimg, 1.0)\n", " s.G_lrate *= rampup\n", " s.D_lrate *= rampup\n", "\n", " # Other parameters.\n", " s.tick_kimg = tick_kimg_dict.get(s.resolution, tick_kimg_base)\n", " return s\n", "\n", "#----------------------------------------------------------------------------\n", "# Main training script.\n", "\n", "def training_loop(\n", " G_args = {}, # Options for generator network.\n", " D_args = {}, # Options for discriminator network.\n", " G_opt_args = {}, # Options for generator optimizer.\n", " D_opt_args = {}, # Options for discriminator optimizer.\n", " G_loss_args = {}, # Options for generator loss.\n", " D_loss_args = {}, # Options for discriminator loss.\n", " dataset_args = {}, # Options for dataset.load_dataset().\n", " sched_args = {}, # Options for train.TrainingSchedule.\n", " grid_args = {}, # Options for train.setup_snapshot_image_grid().\n", " metric_arg_list = [], # Options for MetricGroup.\n", " tf_config = {}, # Options for tflib.init_tf().\n", " data_dir = None, # Directory to load datasets from.\n", " G_smoothing_kimg = 10.0, # Half-life of the running average of generator weights.\n", " minibatch_repeats = 4, # Number of minibatches to run before adjusting training parameters.\n", " lazy_regularization = True, # Perform regularization as a separate training step?\n", " G_reg_interval = 4, # How often the perform regularization for G? Ignored if lazy_regularization=False.\n", " D_reg_interval = 16, # How often the perform regularization for D? Ignored if lazy_regularization=False.\n", " reset_opt_for_new_lod = True, # Reset optimizer internal state (e.g. Adam moments) when new layers are introduced?\n", " total_kimg = 50000, # Total length of the training, measured in thousands of real images.\n", " mirror_augment = False, # Enable mirror augment?\n", " drange_net = [-1,1], # Dynamic range used when feeding image data to the networks.\n", " image_snapshot_ticks = 1, # How often to save image snapshots? None = only save 'reals.png' and 'fakes-init.png'.\n", " network_snapshot_ticks = 1, # How often to save network snapshots? None = only save 'networks-final.pkl'.\n", " save_tf_graph = False, # Include full TensorFlow computation graph in the tfevents file?\n", " save_weight_histograms = False, # Include weight histograms in the tfevents file?\n", " resume_pkl = \"/path/to/pkl\", # Network pickle to resume training from, None = train from scratch.\n", " resume_kimg = 25000.0, # Assumed training progress at the beginning. Affects reporting and training schedule.\n", " resume_time = 0.0, # Assumed wallclock time at the beginning. Affects reporting.\n", " resume_with_new_nets = False): # Construct new networks according to G_args and D_args before resuming training?\n", "\n", " # Initialize dnnlib and TensorFlow.\n", " tflib.init_tf(tf_config)\n", " num_gpus = dnnlib.submit_config.num_gpus\n", "\n", " # Load training set.\n", " training_set = dataset.load_dataset(data_dir=dnnlib.convert_path(data_dir), verbose=True, **dataset_args)\n", " grid_size, grid_reals, grid_labels = misc.setup_snapshot_image_grid(training_set, **grid_args)\n", " misc.save_image_grid(grid_reals, dnnlib.make_run_dir_path('reals.png'), drange=training_set.dynamic_range, grid_size=grid_size)\n", "\n", " # Construct or load networks.\n", " with tf.device('/gpu:0'):\n", " if resume_pkl is None or resume_with_new_nets:\n", " print('Constructing networks...')\n", " G = tflib.Network('G', num_channels=training_set.shape[0], resolution=training_set.shape[1], label_size=training_set.label_size, **G_args)\n", " D = tflib.Network('D', num_channels=training_set.shape[0], resolution=training_set.shape[1], label_size=training_set.label_size, **D_args)\n", " Gs = G.clone('Gs')\n", " if resume_pkl is not None:\n", " print('Loading networks from \"%s\"...' % resume_pkl)\n", " rG, rD, rGs = misc.load_pkl(resume_pkl)\n", " if resume_with_new_nets: G.copy_vars_from(rG); D.copy_vars_from(rD); Gs.copy_vars_from(rGs)\n", " else: G = rG; D = rD; Gs = rGs\n", "\n", " # Print layers and generate initial image snapshot.\n", " G.print_layers(); D.print_layers()\n", " sched = training_schedule(cur_nimg=total_kimg*1000, training_set=training_set, **sched_args)\n", " grid_latents = np.random.randn(np.prod(grid_size), *G.input_shape[1:])\n", " grid_fakes = Gs.run(grid_latents, grid_labels, is_validation=True, minibatch_size=sched.minibatch_gpu)\n", " misc.save_image_grid(grid_fakes, dnnlib.make_run_dir_path('fakes_init.png'), drange=drange_net, grid_size=grid_size)\n", "\n", " # Setup training inputs.\n", " print('Building TensorFlow graph...')\n", " with tf.name_scope('Inputs'), tf.device('/cpu:0'):\n", " lod_in = tf.placeholder(tf.float32, name='lod_in', shape=[])\n", " lrate_in = tf.placeholder(tf.float32, name='lrate_in', shape=[])\n", " minibatch_size_in = tf.placeholder(tf.int32, name='minibatch_size_in', shape=[])\n", " minibatch_gpu_in = tf.placeholder(tf.int32, name='minibatch_gpu_in', shape=[])\n", " minibatch_multiplier = minibatch_size_in // (minibatch_gpu_in * num_gpus)\n", " Gs_beta = 0.5 ** tf.div(tf.cast(minibatch_size_in, tf.float32), G_smoothing_kimg * 1000.0) if G_smoothing_kimg > 0.0 else 0.0\n", "\n", " # Setup optimizers.\n", " G_opt_args = dict(G_opt_args)\n", " D_opt_args = dict(D_opt_args)\n", " for args, reg_interval in [(G_opt_args, G_reg_interval), (D_opt_args, D_reg_interval)]:\n", " args['minibatch_multiplier'] = minibatch_multiplier\n", " args['learning_rate'] = lrate_in\n", " if lazy_regularization:\n", " mb_ratio = reg_interval / (reg_interval + 1)\n", " args['learning_rate'] *= mb_ratio\n", " if 'beta1' in args: args['beta1'] **= mb_ratio\n", " if 'beta2' in args: args['beta2'] **= mb_ratio\n", " G_opt = tflib.Optimizer(name='TrainG', **G_opt_args)\n", " D_opt = tflib.Optimizer(name='TrainD', **D_opt_args)\n", " G_reg_opt = tflib.Optimizer(name='RegG', share=G_opt, **G_opt_args)\n", " D_reg_opt = tflib.Optimizer(name='RegD', share=D_opt, **D_opt_args)\n", "\n", " # Build training graph for each GPU.\n", " data_fetch_ops = []\n", " for gpu in range(num_gpus):\n", " with tf.name_scope('GPU%d' % gpu), tf.device('/gpu:%d' % gpu):\n", "\n", " # Create GPU-specific shadow copies of G and D.\n", " G_gpu = G if gpu == 0 else G.clone(G.name + '_shadow')\n", " D_gpu = D if gpu == 0 else D.clone(D.name + '_shadow')\n", "\n", " # Fetch training data via temporary variables.\n", " with tf.name_scope('DataFetch'):\n", " sched = training_schedule(cur_nimg=int(resume_kimg*1000), training_set=training_set, **sched_args)\n", " reals_var = tf.Variable(name='reals', trainable=False, initial_value=tf.zeros([sched.minibatch_gpu] + training_set.shape))\n", " labels_var = tf.Variable(name='labels', trainable=False, initial_value=tf.zeros([sched.minibatch_gpu, training_set.label_size]))\n", " reals_write, labels_write = training_set.get_minibatch_tf()\n", " reals_write, labels_write = process_reals(reals_write, labels_write, lod_in, mirror_augment, training_set.dynamic_range, drange_net)\n", " reals_write = tf.concat([reals_write, reals_var[minibatch_gpu_in:]], axis=0)\n", " labels_write = tf.concat([labels_write, labels_var[minibatch_gpu_in:]], axis=0)\n", " data_fetch_ops += [tf.assign(reals_var, reals_write)]\n", " data_fetch_ops += [tf.assign(labels_var, labels_write)]\n", " reals_read = reals_var[:minibatch_gpu_in]\n", " labels_read = labels_var[:minibatch_gpu_in]\n", "\n", " # Evaluate loss functions.\n", " lod_assign_ops = []\n", " if 'lod' in G_gpu.vars: lod_assign_ops += [tf.assign(G_gpu.vars['lod'], lod_in)]\n", " if 'lod' in D_gpu.vars: lod_assign_ops += [tf.assign(D_gpu.vars['lod'], lod_in)]\n", " with tf.control_dependencies(lod_assign_ops):\n", " with tf.name_scope('G_loss'):\n", " G_loss, G_reg = dnnlib.util.call_func_by_name(G=G_gpu, D=D_gpu, opt=G_opt, training_set=training_set, minibatch_size=minibatch_gpu_in, **G_loss_args)\n", " with tf.name_scope('D_loss'):\n", " D_loss, D_reg = dnnlib.util.call_func_by_name(G=G_gpu, D=D_gpu, opt=D_opt, training_set=training_set, minibatch_size=minibatch_gpu_in, reals=reals_read, labels=labels_read, **D_loss_args)\n", "\n", " # Register gradients.\n", " if not lazy_regularization:\n", " if G_reg is not None: G_loss += G_reg\n", " if D_reg is not None: D_loss += D_reg\n", " else:\n", " if G_reg is not None: G_reg_opt.register_gradients(tf.reduce_mean(G_reg * G_reg_interval), G_gpu.trainables)\n", " if D_reg is not None: D_reg_opt.register_gradients(tf.reduce_mean(D_reg * D_reg_interval), D_gpu.trainables)\n", " G_opt.register_gradients(tf.reduce_mean(G_loss), G_gpu.trainables)\n", " D_opt.register_gradients(tf.reduce_mean(D_loss), D_gpu.trainables)\n", "\n", " # Setup training ops.\n", " data_fetch_op = tf.group(*data_fetch_ops)\n", " G_train_op = G_opt.apply_updates()\n", " D_train_op = D_opt.apply_updates()\n", " G_reg_op = G_reg_opt.apply_updates(allow_no_op=True)\n", " D_reg_op = D_reg_opt.apply_updates(allow_no_op=True)\n", " Gs_update_op = Gs.setup_as_moving_average_of(G, beta=Gs_beta)\n", "\n", " # Finalize graph.\n", " with tf.device('/gpu:0'):\n", " try:\n", " peak_gpu_mem_op = tf.contrib.memory_stats.MaxBytesInUse()\n", " except tf.errors.NotFoundError:\n", " peak_gpu_mem_op = tf.constant(0)\n", " tflib.init_uninitialized_vars()\n", "\n", " print('Initializing logs...')\n", " summary_log = tf.summary.FileWriter(dnnlib.make_run_dir_path())\n", " if save_tf_graph:\n", " summary_log.add_graph(tf.get_default_graph())\n", " if save_weight_histograms:\n", " G.setup_weight_histograms(); D.setup_weight_histograms()\n", " metrics = metric_base.MetricGroup(metric_arg_list)\n", "\n", " print('Training for %d kimg...\\n' % total_kimg)\n", " dnnlib.RunContext.get().update('', cur_epoch=resume_kimg, max_epoch=total_kimg)\n", " maintenance_time = dnnlib.RunContext.get().get_last_update_interval()\n", " cur_nimg = int(resume_kimg * 1000)\n", " cur_tick = -1\n", " tick_start_nimg = cur_nimg\n", " prev_lod = -1.0\n", " running_mb_counter = 0\n", " while cur_nimg < total_kimg * 1000:\n", " if dnnlib.RunContext.get().should_stop(): break\n", "\n", " # Choose training parameters and configure training ops.\n", " sched = training_schedule(cur_nimg=cur_nimg, training_set=training_set, **sched_args)\n", " assert sched.minibatch_size % (sched.minibatch_gpu * num_gpus) == 0\n", " training_set.configure(sched.minibatch_gpu, sched.lod)\n", " if reset_opt_for_new_lod:\n", " if np.floor(sched.lod) != np.floor(prev_lod) or np.ceil(sched.lod) != np.ceil(prev_lod):\n", " G_opt.reset_optimizer_state(); D_opt.reset_optimizer_state()\n", " prev_lod = sched.lod\n", "\n", " # Run training ops.\n", " feed_dict = {lod_in: sched.lod, lrate_in: sched.G_lrate, minibatch_size_in: sched.minibatch_size, minibatch_gpu_in: sched.minibatch_gpu}\n", " for _repeat in range(minibatch_repeats):\n", " rounds = range(0, sched.minibatch_size, sched.minibatch_gpu * num_gpus)\n", " run_G_reg = (lazy_regularization and running_mb_counter % G_reg_interval == 0)\n", " run_D_reg = (lazy_regularization and running_mb_counter % D_reg_interval == 0)\n", " cur_nimg += sched.minibatch_size\n", " running_mb_counter += 1\n", "\n", " # Fast path without gradient accumulation.\n", " if len(rounds) == 1:\n", " tflib.run([G_train_op, data_fetch_op], feed_dict)\n", " if run_G_reg:\n", " tflib.run(G_reg_op, feed_dict)\n", " tflib.run([D_train_op, Gs_update_op], feed_dict)\n", " if run_D_reg:\n", " tflib.run(D_reg_op, feed_dict)\n", "\n", " # Slow path with gradient accumulation.\n", " else:\n", " for _round in rounds:\n", " tflib.run(G_train_op, feed_dict)\n", " if run_G_reg:\n", " for _round in rounds:\n", " tflib.run(G_reg_op, feed_dict)\n", " tflib.run(Gs_update_op, feed_dict)\n", " for _round in rounds:\n", " tflib.run(data_fetch_op, feed_dict)\n", " tflib.run(D_train_op, feed_dict)\n", " if run_D_reg:\n", " for _round in rounds:\n", " tflib.run(D_reg_op, feed_dict)\n", "\n", " # Perform maintenance tasks once per tick.\n", " done = (cur_nimg >= total_kimg * 1000)\n", " if cur_tick < 0 or cur_nimg >= tick_start_nimg + sched.tick_kimg * 1000 or done:\n", " cur_tick += 1\n", " tick_kimg = (cur_nimg - tick_start_nimg) / 1000.0\n", " tick_start_nimg = cur_nimg\n", " tick_time = dnnlib.RunContext.get().get_time_since_last_update()\n", " total_time = dnnlib.RunContext.get().get_time_since_start() + resume_time\n", "\n", " # Report progress.\n", " print('tick %-5d kimg %-8.1f lod %-5.2f minibatch %-4d time %-12s sec/tick %-7.1f sec/kimg %-7.2f maintenance %-6.1f gpumem %.1f' % (\n", " autosummary('Progress/tick', cur_tick),\n", " autosummary('Progress/kimg', cur_nimg / 1000.0),\n", " autosummary('Progress/lod', sched.lod),\n", " autosummary('Progress/minibatch', sched.minibatch_size),\n", " dnnlib.util.format_time(autosummary('Timing/total_sec', total_time)),\n", " autosummary('Timing/sec_per_tick', tick_time),\n", " autosummary('Timing/sec_per_kimg', tick_time / tick_kimg),\n", " autosummary('Timing/maintenance_sec', maintenance_time),\n", " autosummary('Resources/peak_gpu_mem_gb', peak_gpu_mem_op.eval() / 2**30)))\n", " autosummary('Timing/total_hours', total_time / (60.0 * 60.0))\n", " autosummary('Timing/total_days', total_time / (24.0 * 60.0 * 60.0))\n", "\n", " # Save snapshots.\n", " if image_snapshot_ticks is not None and (cur_tick % image_snapshot_ticks == 0 or done):\n", " grid_fakes = Gs.run(grid_latents, grid_labels, is_validation=True, minibatch_size=sched.minibatch_gpu)\n", " misc.save_image_grid(grid_fakes, dnnlib.make_run_dir_path('fakes%06d.png' % (cur_nimg // 1000)), drange=drange_net, grid_size=grid_size)\n", " if network_snapshot_ticks is not None and (cur_tick % network_snapshot_ticks == 0 or done):\n", " pkl = dnnlib.make_run_dir_path('network-snapshot-%06d.pkl' % (cur_nimg // 1000))\n", " misc.save_pkl((G, D, Gs), pkl)\n", " metrics.run(pkl, run_dir=dnnlib.make_run_dir_path(), data_dir=dnnlib.convert_path(data_dir), num_gpus=num_gpus, tf_config=tf_config)\n", "\n", " # Update summaries and RunContext.\n", " metrics.update_autosummaries()\n", " tflib.autosummary.save_summaries(summary_log, cur_nimg)\n", " dnnlib.RunContext.get().update('%.2f' % sched.lod, cur_epoch=cur_nimg // 1000, max_epoch=total_kimg)\n", " maintenance_time = dnnlib.RunContext.get().get_last_update_interval() - tick_time\n", "\n", " # Save final snapshot.\n", " misc.save_pkl((G, D, Gs), dnnlib.make_run_dir_path('network-final.pkl'))\n", "\n", " # All done.\n", " summary_log.close()\n", " training_set.close()\n", "\n", "#----------------------------------------------------------------------------" ], "execution_count": 0, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "hKZVo6DtaDfm", "colab_type": "code", "colab": {} }, "source": [ "cd .." ], "execution_count": 0, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "iTSDh9jmaFTR", "colab_type": "code", "colab": {} }, "source": [ "%%writefile run_training.py\n", "# Copyright (c) 2019, NVIDIA Corporation. All rights reserved.\n", "#\n", "# This work is made available under the Nvidia Source Code License-NC.\n", "# To view a copy of this license, visit\n", "# https://nvlabs.github.io/stylegan2/license.html\n", "\n", "import argparse\n", "import copy\n", "import os\n", "import sys\n", "\n", "import dnnlib\n", "from dnnlib import EasyDict\n", "\n", "from metrics.metric_defaults import metric_defaults\n", "\n", "#----------------------------------------------------------------------------\n", "\n", "_valid_configs = [\n", " # Table 1\n", " 'config-a', # Baseline StyleGAN\n", " 'config-b', # + Weight demodulation\n", " 'config-c', # + Lazy regularization\n", " 'config-d', # + Path length regularization\n", " 'config-e', # + No growing, new G & D arch.\n", " 'config-f', # + Large networks (default)\n", "\n", " # Table 2\n", " 'config-e-Gorig-Dorig', 'config-e-Gorig-Dresnet', 'config-e-Gorig-Dskip',\n", " 'config-e-Gresnet-Dorig', 'config-e-Gresnet-Dresnet', 'config-e-Gresnet-Dskip',\n", " 'config-e-Gskip-Dorig', 'config-e-Gskip-Dresnet', 'config-e-Gskip-Dskip',\n", "]\n", "\n", "#----------------------------------------------------------------------------\n", "\n", "def run(dataset, data_dir, result_dir, config_id, num_gpus, total_kimg, gamma, mirror_augment, metrics):\n", " train = EasyDict(run_func_name='training.training_loop.training_loop') # Options for training loop.\n", " G = EasyDict(func_name='training.networks_stylegan2.G_main') # Options for generator network.\n", " D = EasyDict(func_name='training.networks_stylegan2.D_stylegan2') # Options for discriminator network.\n", " G_opt = EasyDict(beta1=0.0, beta2=0.99, epsilon=1e-8) # Options for generator optimizer.\n", " D_opt = EasyDict(beta1=0.0, beta2=0.99, epsilon=1e-8) # Options for discriminator optimizer.\n", " G_loss = EasyDict(func_name='training.loss.G_logistic_ns_pathreg') # Options for generator loss.\n", " D_loss = EasyDict(func_name='training.loss.D_logistic_r1') # Options for discriminator loss.\n", " sched = EasyDict() # Options for TrainingSchedule.\n", " grid = EasyDict(size='8k', layout='random') # Options for setup_snapshot_image_grid().\n", " sc = dnnlib.SubmitConfig() # Options for dnnlib.submit_run().\n", " tf_config = {'rnd.np_random_seed': 1000} # Options for tflib.init_tf().\n", "\n", " train.data_dir = data_dir\n", " train.total_kimg = total_kimg\n", " train.mirror_augment = mirror_augment\n", " train.image_snapshot_ticks = train.network_snapshot_ticks = 1\n", " sched.G_lrate_base = sched.D_lrate_base = 0.002\n", " sched.minibatch_size_base = 32\n", " sched.minibatch_gpu_base = 4\n", " D_loss.gamma = 10\n", " metrics = [metric_defaults[x] for x in metrics]\n", " desc = 'stylegan2'\n", "\n", " desc += '-' + dataset\n", " dataset_args = EasyDict(tfrecord_dir=dataset)\n", "\n", " assert num_gpus in [1, 2, 4, 8]\n", " sc.num_gpus = num_gpus\n", " desc += '-%dgpu' % num_gpus\n", "\n", " assert config_id in _valid_configs\n", " desc += '-' + config_id\n", "\n", " # Configs A-E: Shrink networks to match original StyleGAN.\n", " if config_id != 'config-f':\n", " G.fmap_base = D.fmap_base = 8 << 10\n", "\n", " # Config E: Set gamma to 100 and override G & D architecture.\n", " if config_id.startswith('config-e'):\n", " D_loss.gamma = 100\n", " if 'Gorig' in config_id: G.architecture = 'orig'\n", " if 'Gskip' in config_id: G.architecture = 'skip' # (default)\n", " if 'Gresnet' in config_id: G.architecture = 'resnet'\n", " if 'Dorig' in config_id: D.architecture = 'orig'\n", " if 'Dskip' in config_id: D.architecture = 'skip'\n", " if 'Dresnet' in config_id: D.architecture = 'resnet' # (default)\n", "\n", " # Configs A-D: Enable progressive growing and switch to networks that support it.\n", " if config_id in ['config-a', 'config-b', 'config-c', 'config-d']:\n", " sched.lod_initial_resolution = 8\n", " sched.G_lrate_base = sched.D_lrate_base = 0.001\n", " sched.G_lrate_dict = sched.D_lrate_dict = {128: 0.0015, 256: 0.002, 512: 0.003, 1024: 0.003}\n", " sched.minibatch_size_base = 32 # (default)\n", " sched.minibatch_size_dict = {8: 256, 16: 128, 32: 64, 64: 32}\n", " sched.minibatch_gpu_base = 4 # (default)\n", " sched.minibatch_gpu_dict = {8: 32, 16: 16, 32: 8, 64: 4}\n", " G.synthesis_func = 'G_synthesis_stylegan_revised'\n", " D.func_name = 'training.networks_stylegan2.D_stylegan'\n", "\n", " # Configs A-C: Disable path length regularization.\n", " if config_id in ['config-a', 'config-b', 'config-c']:\n", " G_loss = EasyDict(func_name='training.loss.G_logistic_ns')\n", "\n", " # Configs A-B: Disable lazy regularization.\n", " if config_id in ['config-a', 'config-b']:\n", " train.lazy_regularization = False\n", "\n", " # Config A: Switch to original StyleGAN networks.\n", " if config_id == 'config-a':\n", " G = EasyDict(func_name='training.networks_stylegan.G_style')\n", " D = EasyDict(func_name='training.networks_stylegan.D_basic')\n", "\n", " if gamma is not None:\n", " D_loss.gamma = gamma\n", "\n", " sc.submit_target = dnnlib.SubmitTarget.LOCAL\n", " sc.local.do_not_copy_source_files = True\n", " kwargs = EasyDict(train)\n", " kwargs.update(G_args=G, D_args=D, G_opt_args=G_opt, D_opt_args=D_opt, G_loss_args=G_loss, D_loss_args=D_loss)\n", " kwargs.update(dataset_args=dataset_args, sched_args=sched, grid_args=grid, metric_arg_list=metrics, tf_config=tf_config)\n", " kwargs.submit_config = copy.deepcopy(sc)\n", " kwargs.submit_config.run_dir_root = result_dir\n", " kwargs.submit_config.run_desc = desc\n", " dnnlib.submit_run(**kwargs)\n", "\n", "#----------------------------------------------------------------------------\n", "\n", "def _str_to_bool(v):\n", " if isinstance(v, bool):\n", " return v\n", " if v.lower() in ('yes', 'true', 't', 'y', '1'):\n", " return True\n", " elif v.lower() in ('no', 'false', 'f', 'n', '0'):\n", " return False\n", " else:\n", " raise argparse.ArgumentTypeError('Boolean value expected.')\n", "\n", "def _parse_comma_sep(s):\n", " if s is None or s.lower() == 'none' or s == '':\n", " return []\n", " return s.split(',')\n", "\n", "#----------------------------------------------------------------------------\n", "\n", "_examples = '''examples:\n", "\n", " # Train StyleGAN2 using the FFHQ dataset\n", " python %(prog)s --num-gpus=8 --data-dir=~/datasets --config=config-f --dataset=ffhq --mirror-augment=true\n", "\n", "valid configs:\n", "\n", " ''' + ', '.join(_valid_configs) + '''\n", "\n", "valid metrics:\n", "\n", " ''' + ', '.join(sorted([x for x in metric_defaults.keys()])) + '''\n", "\n", "'''\n", "\n", "def main():\n", " parser = argparse.ArgumentParser(\n", " description='Train StyleGAN2.',\n", " epilog=_examples,\n", " formatter_class=argparse.RawDescriptionHelpFormatter\n", " )\n", " parser.add_argument('--result-dir', help='Root directory for run results (default: %(default)s)', default='results', metavar='DIR')\n", " parser.add_argument('--data-dir', help='Dataset root directory', required=True)\n", " parser.add_argument('--dataset', help='Training dataset', required=True)\n", " parser.add_argument('--config', help='Training config (default: %(default)s)', default='config-f', required=True, dest='config_id', metavar='CONFIG')\n", " parser.add_argument('--num-gpus', help='Number of GPUs (default: %(default)s)', default=1, type=int, metavar='N')\n", " parser.add_argument('--total-kimg', help='Training length in thousands of images (default: %(default)s)', metavar='KIMG', default=50000, type=int)\n", " parser.add_argument('--gamma', help='R1 regularization weight (default is config dependent)', default=None, type=float)\n", " parser.add_argument('--mirror-augment', help='Mirror augment (default: %(default)s)', default=False, metavar='BOOL', type=_str_to_bool)\n", " parser.add_argument('--metrics', help='Comma-separated list of metrics or \"none\" (default: %(default)s)', default='none', type=_parse_comma_sep)\n", "\n", " args = parser.parse_args()\n", "\n", " if not os.path.exists(args.data_dir):\n", " print ('Error: dataset root directory does not exist.')\n", " sys.exit(1)\n", "\n", " if args.config_id not in _valid_configs:\n", " print ('Error: --config value must be one of: ', ', '.join(_valid_configs))\n", " sys.exit(1)\n", "\n", " for metric in args.metrics:\n", " if metric not in metric_defaults:\n", " print ('Error: unknown metric \\'%s\\'' % metric)\n", " sys.exit(1)\n", "\n", " run(**vars(args))\n", "\n", "#----------------------------------------------------------------------------\n", "\n", "if __name__ == \"__main__\":\n", " main()\n", "\n", "#----------------------------------------------------------------------------\n" ], "execution_count": 0, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "yrRKDJeVaHXd", "colab_type": "code", "colab": {} }, "source": [ "!python run_training.py --num-gpus=1 --data-dir=/content/stylegan2/ --dataset=paintingtfrecord --total-kimg 50000 --config=config-f --metrics none" ], "execution_count": 0, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "b-qlPg14oExW", "colab_type": "text" }, "source": [ "generate interpolation video from https://github.com/tkarras/progressive_growing_of_gans/blob/master/util_scripts.py" ] }, { "cell_type": "code", "metadata": { "id": "x4EXpOCgtRDI", "colab_type": "code", "colab": {} }, "source": [ "!pip install Pillow==4.3.0" ], "execution_count": 0, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "rknAVkFhaMsq", "colab_type": "code", "colab": {} }, "source": [ "import os\n", "import time\n", "import re\n", "import bisect\n", "from collections import OrderedDict\n", "import numpy as np\n", "import tensorflow as tf\n", "import scipy.ndimage\n", "import scipy.misc\n", "\n", "from training import misc\n", "from dnnlib.tflib import tfutil\n", "import training.dataset\n", "# Generate MP4 video of random interpolations using a previously trained network.\n", "\n", "def generate_interpolation_video(snapshot=None, grid_size=[1,1], image_shrink=1, image_zoom=1, duration_sec=60.0, smoothing_sec=1.0, mp4=None, mp4_fps=30, mp4_codec='libx265', mp4_bitrate='16M', random_seed=1000, minibatch_size=8):\n", " network_pkl = misc.open_file_or_url(snapshot)\n", " if mp4 is None:\n", " mp4 = 'interps.mp4'\n", " num_frames = int(np.rint(duration_sec * mp4_fps))\n", " random_state = np.random.RandomState(random_seed)\n", "\n", " print('Loading network from \"%s\"...' % network_pkl)\n", " G, D, Gs = misc.load_pkl(snapshot)\n", "\n", " print('Generating latent vectors...')\n", " shape = [num_frames, np.prod(grid_size)] + Gs.input_shape[1:] # [frame, image, channel, component]\n", " all_latents = random_state.randn(*shape).astype(np.float32)\n", " all_latents = scipy.ndimage.gaussian_filter(all_latents, [smoothing_sec * mp4_fps] + [0] * len(Gs.input_shape), mode='wrap')\n", " all_latents /= np.sqrt(np.mean(np.square(all_latents)))\n", "\n", " # Frame generation func for moviepy.\n", " def make_frame(t):\n", " frame_idx = int(np.clip(np.round(t * mp4_fps), 0, num_frames - 1))\n", " latents = all_latents[frame_idx]\n", " labels = np.zeros([latents.shape[0], 0], np.float32)\n", " images = Gs.run(latents, labels, minibatch_size=minibatch_size, num_gpus=1, out_mul=127.5, out_add=127.5, out_shrink=image_shrink, out_dtype=np.uint8)\n", " grid = misc.create_image_grid(images, grid_size).transpose(1, 2, 0) # HWC\n", " if image_zoom > 1:\n", " grid = scipy.ndimage.zoom(grid, [image_zoom, image_zoom, 1], order=0)\n", " if grid.shape[2] == 1:\n", " grid = grid.repeat(3, 2) # grayscale => RGB\n", " return grid\n", "\n", " # Generate video.\n", " import moviepy.editor # pip install moviepy\n", " result_subdir = '/content/stylegan2/'\n", " moviepy.editor.VideoClip(make_frame, duration=duration_sec).write_videofile(os.path.join(result_subdir, mp4), fps=mp4_fps, codec='libx264', bitrate=mp4_bitrate)\n", " open(os.path.join(result_subdir, '_done.txt'), 'wt').close()\n" ], "execution_count": 0, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "JJ5XzLHVtl37", "colab_type": "code", "colab": {} }, "source": [ "import dnnlib.tflib as tflib\n", "tflib.init_tf()" ], "execution_count": 0, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "hT8F9EDRaWt8", "colab_type": "code", "colab": {} }, "source": [ "generate_interpolation_video('/path/to/pkl')" ], "execution_count": 0, "outputs": [] } ] }