## ----echo=knitr::is_html_output()--------------------------------------------
#| code-summary: "Load libraries"
source("code/setup.R")


## ----echo=knitr::is_html_output()--------------------------------------------
#| code-summary: "Code to simulate data examples"
# Toy examples
set.seed(1071)
n1 <- 162
vc1 <- matrix(c(1, -0.7, -0.7, 1), ncol=2, byrow=TRUE)
dc1 <- rmvn(n=n1, p=2, mn=c(-2, -2), vc=vc1)
vc2 <- matrix(c(1, -0.4, -0.4, 1)*2, ncol=2, byrow=TRUE)
n2 <- 138
dc2 <- rmvn(n=n2, p=2, mn=c(2, 2), vc=vc2)
df1 <- data.frame(x1=mulgar:::scale2(c(dc1[,1], dc2[,1])), 
                 x2=mulgar:::scale2(c(dc1[,2], dc2[,2])), 
                 cl = factor(c(rep("A", n1), 
                               rep("B", n2))))
dc1 <- sphere.hollow(p=2, n=n1)$points*3 + 
  c(rnorm(n1, sd=0.3), rnorm(n1, sd=0.3))
dc2 <- sphere.solid.random(p=2, n=n2)$points
df2 <- data.frame(x1=mulgar:::scale2(c(dc1[,1], dc2[,1])), 
                  x2=mulgar:::scale2(c(dc1[,2], dc2[,2])), 
                  cl = factor(c(rep("A", n1), 
                               rep("B", n2))))


## ----------------------------------------------------------------------------
#| code-fold: false
#| message: false
library(classifly)
library(e1071)
df1_svm <- svm(cl~., data=df1, 
                     probability=TRUE, 
                     kernel="linear", 
               scale=FALSE)
df1_svm_e <- explore(df1_svm, df1)

df2_svm <- svm(cl~., data=df2,  
                     probability=TRUE, 
                     kernel="radial")
df2_svm_e <- explore(df2_svm, df2)


## ----echo=knitr::is_html_output()--------------------------------------------
#| label: fig-svm-toy
#| fig-cap: "SVM classifier fit overlaid on two simulated data examples: (a) groups with different variance-covariance, fitted using a linear kernel, (b) groups with non-linear separation, fitted using a radial kernel. The band of points shown as '+' mark the SVM boundary, and points marked by 'x' are the support vectors used to define the boundary. "
#| fig-width: 8
#| fig-height: 4
#| out-width: 100%
#| code-summary: "Code to make plots"
s1 <- ggplot() + 
  geom_point(data=df1, aes(x=x1, y=x2, colour=cl),
             shape=20) +
  scale_colour_discrete_divergingx(palette="Zissou 1") +
  geom_point(data=df1_svm_e[(!df1_svm_e$.BOUNDARY)&(df1_svm_e$.TYPE=="simulated"),], 
             aes(x=x1, y=x2, colour=cl), shape=3) +
  geom_point(data=df1[df1_svm$index,], 
             aes(x=x1, y=x2, colour=cl), 
             shape=4, size=4) +
  theme_minimal() +
  theme(aspect.ratio=1, legend.position = "none") +
  ggtitle("(a)")

s2 <- ggplot() + 
  geom_point(data=df2, aes(x=x1, y=x2, colour=cl), shape=20) +
  scale_colour_discrete_divergingx(palette="Zissou 1") +
  geom_point(data=df2_svm_e[(!df2_svm_e$.BOUNDARY)&(df2_svm_e$.TYPE=="simulated"),], 
             aes(x=x1, y=x2, colour=cl), 
             shape=3) +
  geom_point(data=df2[df2_svm$index,], 
             aes(x=x1, y=x2, colour=cl), 
             shape=4, size=4) +
  theme_minimal() +
  theme(aspect.ratio=1, legend.position = "none") +
  ggtitle("(b)")

s1+s2


## ----------------------------------------------------------------------------
#| code-fold: false
df1_svm$index


## ----------------------------------------------------------------------------
#| code-fold: false
df1_svm$coefs


## ----------------------------------------------------------------------------
#| code-fold: false
df1_svm$rho


## ----------------------------------------------------------------------------
#| code-fold: false
w = t(df1_svm$SV) %*% df1_svm$coefs
w


## ----------------------------------------------------------------------------
#| eval: false
#| code-fold: false
# s1 + geom_abline(intercept=df1_svm$rho/w[2],
#                  slope=-w[1]/w[2])


## ----------------------------------------------------------------------------
#| code-fold: false
#| warning: false
#| message: false
load("data/penguins_sub.rda")
chinstrap <- penguins_sub %>%
  filter(species == "Chinstrap") %>%
  select(-species) %>%
  mutate_if(is.numeric, mulgar:::scale2)
chinstrap_svm <- svm(sex~., data=chinstrap, 
                     kernel="linear",
                     probability=TRUE, 
                     scale=FALSE)
chinstrap_svm_e <- explore(chinstrap_svm, chinstrap)


## ----echo=knitr::is_html_output()--------------------------------------------
#| eval: false
#| code-summary: "Code to make the tours"
# # Tour raw data
# animate_xy(chinstrap[,1:4], col=chinstrap$sex)
# # Add all SVs, including bounded
# c_pch <- rep(20, nrow(chinstrap))
# c_pch[chinstrap_svm$index] <- 4
# animate_xy(chinstrap[,1:4], col=chinstrap$sex, pch=c_pch)
# # Only show the SVs with |coefs| < 1
# c_pch <- rep(20, nrow(chinstrap))
# c_pch[chinstrap_svm$index[abs(chinstrap_svm$coefs)<1]] <- 4
# c_cex <- rep(1, nrow(chinstrap))
# c_cex[chinstrap_svm$index[abs(chinstrap_svm$coefs)<1]] <- 2
# animate_xy(chinstrap[,1:4], col=chinstrap$sex,
#            pch=c_pch, cex=c_cex)
# render_gif(chinstrap[,1:4],
#            grand_tour(),
#            display_xy(col=chinstrap$sex, pch=c_pch, cex=c_cex),
#            gif_file="gifs/chinstrap_svs.gif",
#            width=400,
#            height=400,
#            frames=500)
# 
# # Tour the separating hyperplane also
# symbols <- c(3, 20)
# c_pch <- symbols[as.numeric(chinstrap_svm_e$.TYPE[!chinstrap_svm_e$.BOUNDARY])]
# animate_xy(chinstrap_svm_e[!chinstrap_svm_e$.BOUNDARY,1:4],
#            col=chinstrap_svm_e$sex[!chinstrap_svm_e$.BOUNDARY],
#            pch=c_pch)
# render_gif(chinstrap_svm_e[!chinstrap_svm_e$.BOUNDARY,1:4],
#            grand_tour(),
#            display_xy(col=chinstrap_svm_e$sex[!chinstrap_svm_e$.BOUNDARY], pch=c_pch),
#            gif_file="gifs/chinstrap_svm.gif",
#            width=400,
#            height=400,
#            frames=500)


## ----------------------------------------------------------------------------
#| code-fold: false
t(chinstrap_svm$SV) %*% chinstrap_svm$coefs


## ----------------------------------------------------------------------------
#| code-fold: false
set.seed(1022)
prj1 <- mulgar::norm_vec(t(chinstrap_svm$SV) %*%
                           chinstrap_svm$coefs)
prj2 <- basis_random(4, 1)
prj <- orthonormalise(cbind(prj1, prj2))
prj


## ----echo=knitr::is_html_output()--------------------------------------------
#| eval: false
#| code-summary: "Code to conduct the radial tours"
# animate_xy(chinstrap_svm_e[!chinstrap_svm_e$.BOUNDARY,1:4],
#            tour_path = radial_tour(start=prj, mvar = 2),
#            col=chinstrap_svm_e$sex[!chinstrap_svm_e$.BOUNDARY],
#            pch=c_pch)
# render_gif(chinstrap_svm_e[!chinstrap_svm_e$.BOUNDARY,1:4],
#            radial_tour(start=prj, mvar = 2),
#            display_xy(col=chinstrap_svm_e$sex[!chinstrap_svm_e$.BOUNDARY], pch=c_pch),
#            gif_file="gifs/chinstrap_rad_bd.gif",
#            apf = 1/30,
#            width=400,
#            height=400,
#            frames=500)
# render_gif(chinstrap_svm_e[!chinstrap_svm_e$.BOUNDARY,1:4],
#            radial_tour(start=prj, mvar = 1),
#            display_xy(col=chinstrap_svm_e$sex[!chinstrap_svm_e$.BOUNDARY], pch=c_pch),
#            gif_file="gifs/chinstrap_rad_bl.gif",
#            apf = 1/30,
#            width=400,
#            height=400,
#            frames=500)
# render_gif(chinstrap_svm_e[!chinstrap_svm_e$.BOUNDARY,1:4],
#            radial_tour(start=prj, mvar = 3),
#            display_xy(col=chinstrap_svm_e$sex[!chinstrap_svm_e$.BOUNDARY], pch=c_pch),
#            gif_file="gifs/chinstrap_rad_fl.gif",
#            apf = 1/30,
#            width=400,
#            height=400,
#            frames=500)
# render_gif(chinstrap_svm_e[!chinstrap_svm_e$.BOUNDARY,1:4],
#            radial_tour(start=prj, mvar = 4),
#            display_xy(col=chinstrap_svm_e$sex[!chinstrap_svm_e$.BOUNDARY], pch=c_pch),
#            gif_file="gifs/chinstrap_rad_bm.gif",
#            apf = 1/30,
#            width=400,
#            height=400,
#            frames=500)


## ----------------------------------------------------------------------------
#| eval: false
#| echo: false
# data(bushfires)
# 
# bushfires_sub <- dplyr::select(bushfires, log_dist_cfa,
#                                log_dist_road,
#                                cause) %>%
#   filter(cause %in% c("lightning", "arson")) %>%
#   mutate(cause = as.factor(cause))
# 
# ggplot(bushfires_sub, aes(log_dist_cfa, log_dist_road, color = cause)) +
#   geom_point()
# 
# bf_lda <- lda(cause ~ ., bushfires_sub)#, prior = c(0.5, 0.5))
# bf_lda_b <- as.data.frame(explore(bf_lda, bushfires_sub))
# ggplot(filter(bf_lda_b, .BOUNDARY != TRUE)) +
#   geom_point(aes(x=log_dist_cfa, y=log_dist_road, colour=cause, shape=.TYPE)) +
#   #scale_color_discrete_divergingx("Zissou 1") +
#   scale_shape_manual(values=c(46, 16)) +
#   theme_minimal() +
#   theme(aspect.ratio = 1, legend.position = "none")
# 
# bf_svm <- svm(cause ~ ., bushfires_sub, kernel = "linear",
#               probability = TRUE)
# bf_svm_b <- as.data.frame(explore(bf_svm, bushfires_sub))
# ggplot(filter(bf_svm_b, .BOUNDARY != TRUE)) +
#   geom_point(aes(x=log_dist_cfa, y=log_dist_road, colour=cause, shape=.TYPE)) +
#   #scale_color_discrete_divergingx("Zissou 1") +
#   scale_shape_manual(values=c(46, 16)) +
#   theme_minimal() +
#   theme(aspect.ratio = 1, legend.position = "none")
# 
# bushfires_sub_2 <- dplyr::select(bushfires, #amaxt90,
#                                  amaxt180, amaxt720,
#                                  log_dist_cfa, log_dist_road,
#                                  cause) %>%
#   filter(cause %in% c("lightning", "arson")) %>%
#   mutate(cause = as.factor(cause))
# 
# bf_2_svm <- svm(cause ~ ., data = bushfires_sub_2,
#                 kernel = "linear", probability = TRUE)
# 
# b1_1 <- t(bf_2_svm$SV) %*% bf_2_svm$coefs
# # it seems most variables are important, maybe amaxt180
# 
# bf_2_svm_e <- explore(bf_2_svm, bushfires_sub_2, n = 50000)
# 
# symbols <- c(46, 20)
# c_pch <- symbols[as.numeric(bf_2_svm_e$.TYPE[!bf_2_svm_e$.BOUNDARY])]
# animate_xy(bf_2_svm_e[!bf_2_svm_e$.BOUNDARY,1:4],
#            col=as.factor(bf_2_svm_e$cause[!bf_2_svm_e$.BOUNDARY]),
#            pch = c_pch)
# # we see that the hyperplane is > 2D and that amaxt180 is less important
# # we also see that main feature in the data is distribution for one group (lightning)
# # and it is difficult to separate the groups because they fall along similar direction
# # now let's check with radial tour
# set.seed(383)
# b_start <- orthonormalise(cbind(b1_1, basis_random(4,1)))
# animate_xy(bf_2_svm_e[!bf_2_svm_e$.BOUNDARY,1:4],
#            tour_path = radial_tour(start = b_start,
#                                    mvar = c(3,4)),
#            col=bf_2_svm_e$cause[!bf_2_svm_e$.BOUNDARY],
#            pch=c_pch)
# # because we use a combination of variables we do not see
# # plane turning when rotating out only one of the variables,
# # but we can see that combinations of the last three is what is
# # important
# 
# data("sketches_train")
# banana_v_boomerang <- filter(sketches_train,
#                           word %in% c("banana", "boomerang"))
# bvb_pca <- prcomp(banana_v_boomerang[,1:784])
# ggscree(bvb_pca, q=25, guide=FALSE) # keeping first 5
# # keeping first 9 seems like a good idea, but would require very large n
# bvb_pc <- as.data.frame(bvb_pca$x[,1:5])
# bvb_pc$word <- droplevels(banana_v_boomerang$word)
# 
# bvb_pc <- mutate_if(bvb_pc, is.numeric, mulgar:::scale2)
# 
# animate_xy(bvb_pc[,1:5], col=bvb_pc$word)
# 
# 
# 
# bvb_svm_l <- svm(word ~ ., data = bvb_pc,
#                 kernel = "linear", probability = TRUE)
# sum(predict(bvb_svm_l) != bvb_pc$word) # compare accuracy as well
# bvb_svm_r <- svm(word ~ ., data = bvb_pc,
#                 kernel = "radial", probability = TRUE)
# sum(predict(bvb_svm_r) != bvb_pc$word)
# 
# bvb_svm_l_e <- explore(bvb_svm_l, bvb_pc, n = 20000)
# 
# symbols <- c(46, 20)
# c_pch <- symbols[as.numeric(bvb_svm_l_e$.TYPE[!bvb_svm_l_e$.BOUNDARY])]
# animate_xy(bvb_svm_l_e[!bvb_svm_l_e$.BOUNDARY,1:5],
#            col=bvb_svm_l_e$word[!bvb_svm_l_e$.BOUNDARY],
#            pch = c_pch)
# # for linear SVM we can use projections to understand the shape
# 
# bvb_svm_r_e <- explore(bvb_svm_r, bvb_pc, n = 20000)
# animate_slice(bvb_svm_r_e[bvb_svm_r_e$.TYPE=="simulated" &
#                             !bvb_svm_r_e$.BOUNDARY,1:5],
#               col=bvb_svm_r_e$word[bvb_svm_r_e$.TYPE=="simulated" &
#                                      !bvb_svm_r_e$.BOUNDARY], v_rel = 10)
# # for radial kernel better use slice tour to see shape of the decision boundary
# # here we only look at simulated points on the boundary, clear that this is not linear
#