eager_styletransfer • keras

Source: https://github.com/rstudio/keras/blob/master/vignettes/examples/eager_styletransfer.R
This is the companion code to the post “Neural style transfer with eager execution and Keras” on the TensorFlow for R blog.
https://blogs.rstudio.com/tensorflow/posts/2018-09-09-eager-style-transfer
library(keras)
use_implementation("tensorflow")
use_session_with_seed(7777, disable_gpu = FALSE, disable_parallel_cpu = FALSE)
library(tensorflow)
tfe_enable_eager_execution(device_policy = "silent")

library(purrr)
library(glue)

img_shape <- c(128, 128, 3)
content_path <- "isar.jpg"
style_path <- "The_Great_Wave_off_Kanagawa.jpg"


num_iterations <- 2000
content_weight <- 100
style_weight <- 0.8
total_variation_weight <- 0.01

content_image <-
  image_load(content_path, target_size = img_shape[1:2])
content_image %>% image_to_array() %>%
  `/`(., 255) %>%
  as.raster() %>%  plot()

style_image <-
  image_load(style_path, target_size = img_shape[1:2])
style_image %>% image_to_array() %>%
  `/`(., 255) %>%
  as.raster() %>%  plot()


load_and_process_image <- function(path) {
  img <- image_load(path, target_size = img_shape[1:2]) %>%
    image_to_array() %>%
    k_expand_dims(axis = 1) %>%
    imagenet_preprocess_input()
}

deprocess_image <- function(x) {
  x <- x[1, , ,]
  # Remove zero-center by mean pixel
  x[, , 1] <- x[, , 1] + 103.939
  x[, , 2] <- x[, , 2] + 116.779
  x[, , 3] <- x[, , 3] + 123.68
  # 'BGR'->'RGB'
  x <- x[, , c(3, 2, 1)]
  x[x > 255] <- 255
  x[x < 0] <- 0
  x[] <- as.integer(x) / 255
  x
}

content_layers <- c("block5_conv2")
style_layers = c("block1_conv1",
                 "block2_conv1",
                 "block3_conv1",
                 "block4_conv1",
                 "block5_conv1")
num_content_layers <- length(content_layers)
num_style_layers <- length(style_layers)

get_model <- function() {
  vgg <- application_vgg19(include_top = FALSE, weights = "imagenet")
  vgg$trainable <- FALSE
  style_outputs <-
    map(style_layers, function(layer)
      vgg$get_layer(layer)$output)
  content_outputs <-
    map(content_layers, function(layer)
      vgg$get_layer(layer)$output)
  model_outputs <- c(style_outputs, content_outputs)
  keras_model(vgg$input, model_outputs)
}

content_loss <- function(content_image, target) {
  k_sum(k_square(target - content_image))
}

gram_matrix <- function(x) {
  features <- k_batch_flatten(k_permute_dimensions(x, c(3, 1, 2)))
  gram <- k_dot(features, k_transpose(features))
  gram
}

style_loss <- function(gram_target, combination) {
  gram_comb <- gram_matrix(combination)
  k_sum(k_square(gram_target - gram_comb)) / (4 * (img_shape[3] ^ 2) * (img_shape[1] * img_shape[2]) ^
                                                2)
}

total_variation_loss <- function(image) {
  y_ij  <- image[1:(img_shape[1] - 1L), 1:(img_shape[2] - 1L),]
  y_i1j <- image[2:(img_shape[1]), 1:(img_shape[2] - 1L),]
  y_ij1 <- image[1:(img_shape[1] - 1L), 2:(img_shape[2]),]
  a <- k_square(y_ij - y_i1j)
  b <- k_square(y_ij - y_ij1)
  k_sum(k_pow(a + b, 1.25))
}

get_feature_representations <-
  function(model, content_path, style_path) {
    # dim == (1, 128, 128, 3)
    style_image <-
      load_and_process_image(style_path) %>% k_cast("float32")
    # dim == (1, 128, 128, 3)
    content_image <-
      load_and_process_image(content_path) %>% k_cast("float32")
    # dim == (2, 128, 128, 3)
    stack_images <-
      k_concatenate(list(style_image, content_image), axis = 1)
    # length(model_outputs) == 6
    # dim(model_outputs[[1]]) = (2, 128, 128, 64)
    # dim(model_outputs[[6]]) = (2, 8, 8, 512)
    model_outputs <- model(stack_images)
    style_features <- model_outputs[1:num_style_layers] %>%
      map(function(batch)
        batch[1, , , ])
    content_features <-
      model_outputs[(num_style_layers + 1):(num_style_layers + num_content_layers)] %>%
      map(function(batch)
        batch[2, , , ])
    list(style_features, content_features)
  }

compute_loss <-
  function(model,
           loss_weights,
           init_image,
           gram_style_features,
           content_features) {
    c(style_weight, content_weight) %<-% loss_weights
    model_outputs <- model(init_image)
    style_output_features <- model_outputs[1:num_style_layers]
    content_output_features <-
      model_outputs[(num_style_layers + 1):(num_style_layers + num_content_layers)]
    
    weight_per_style_layer <- 1 / num_style_layers
    style_score <- 0
    # str(style_zip, max.level = 1)
    # dim(style_zip[[5]][[1]]) == (512, 512)
    style_zip <-
      transpose(list(gram_style_features, style_output_features))
    for (l in 1:length(style_zip)) {
      # for l == 1:
      # dim(target_style) == (64, 64)
      # dim(comb_style) == (1, 128, 128, 64)
      c(target_style, comb_style) %<-% style_zip[[l]]
      style_score <-
        style_score + weight_per_style_layer * style_loss(target_style, comb_style[1, , , ])
    }
    
    weight_per_content_layer <- 1 / num_content_layers
    content_score <- 0
    content_zip <-
      transpose(list(content_features, content_output_features))
    for (l in 1:length(content_zip)) {
      # dim(comb_content) ==  (1, 8, 8, 512)
      # dim(target_content) == (8, 8, 512)
      c(target_content, comb_content) %<-% content_zip[[l]]
      content_score <-
        content_score + weight_per_content_layer * content_loss(comb_content[1, , , ], target_content)
    }
    
    variation_loss <- total_variation_loss(init_image[1, , ,])
    style_score <- style_score * style_weight
    content_score <- content_score * content_weight
    variation_score <- variation_loss * total_variation_weight
    
    loss <- style_score + content_score + variation_score
    list(loss, style_score, content_score, variation_score)
  }

compute_grads <-
  function(model,
           loss_weights,
           init_image,
           gram_style_features,
           content_features) {
    with(tf$GradientTape() %as% tape, {
      scores <-
        compute_loss(model,
                     loss_weights,
                     init_image,
                     gram_style_features,
                     content_features)
    })
    total_loss <- scores[[1]]
    list(tape$gradient(total_loss, init_image), scores)
  }

run_style_transfer <- function(content_path,
                               style_path) {
  model <- get_model()
  walk(model$layers, function(layer)
    layer$trainable = FALSE)
  
  c(style_features, content_features) %<-% get_feature_representations(model, content_path, style_path)
  # dim(gram_style_features[[1]]) == (64, 64)
  # we compute this once, in advance
  gram_style_features <-
    map(style_features, function(feature)
      gram_matrix(feature))
  
  init_image <- load_and_process_image(content_path)
  init_image <-
    tf$contrib$eager$Variable(init_image, dtype = "float32")
  
  optimizer <-
    tf$train$AdamOptimizer(learning_rate = 1,
                           beta1 = 0.99,
                           epsilon = 1e-1)
  
  c(best_loss, best_image) %<-% list(Inf, NULL)
  loss_weights <- list(style_weight, content_weight)
  
  start_time <- Sys.time()
  global_start <- Sys.time()
  
  norm_means <- c(103.939, 116.779, 123.68)
  min_vals <- -norm_means
  max_vals <- 255 - norm_means
  
  for (i in seq_len(num_iterations)) {
    # dim(grads) == (1, 128, 128, 3)
    c(grads, all_losses) %<-% compute_grads(model,
                                            loss_weights,
                                            init_image,
                                            gram_style_features,
                                            content_features)
    c(loss, style_score, content_score, variation_score) %<-% all_losses
    optimizer$apply_gradients(list(tuple(grads, init_image)))
    clipped <- tf$clip_by_value(init_image, min_vals, max_vals)
    init_image$assign(clipped)
    
    end_time <- Sys.time()
    
    if (k_cast_to_floatx(loss) < best_loss) {
      best_loss <- k_cast_to_floatx(loss)
      best_image <- init_image
    }
    
    if (i %% 50 == 0) {
      glue("Iteration: {i}") %>% print()
      glue(
        "Total loss: {k_cast_to_floatx(loss)}, style loss: {k_cast_to_floatx(style_score)},
        content loss: {k_cast_to_floatx(content_score)}, total variation loss: {k_cast_to_floatx(variation_score)},
        time for 1 iteration: {(Sys.time() - start_time) %>% round(2)}"
      ) %>% print()
      
      if (i %% 100 == 0) {
        png(paste0("style_epoch_", i, ".png"))
        plot_image <- best_image$numpy()
        plot_image <- deprocess_image(plot_image)
        plot(as.raster(plot_image), main = glue("Iteration {i}"))
        dev.off()
      }
    }
  }
  
  glue("Total time: {Sys.time() - global_start} seconds") %>% print()
  list(best_image, best_loss)
}

c(best_image, best_loss) %<-% run_style_transfer(content_path, style_path)