## ----echo=knitr::is_html_output()--------------------------------------------
#| code-summary: "Load libraries"
source("code/setup.R")
## ----echo=knitr::is_html_output()--------------------------------------------
#| message: false
#| code-summary: Code for spiral plots
library(Rdimtools)
library(cardinalR)
n_obs <- 1652
set.seed(259)
swiss <- swiss_roll(n_obs, num_noise = 0)
# Standardising ruins structure
# swiss <- apply(swiss, 2, function(x) (x-mean(x))/sd(x))
swiss[,3] <- swiss[,3] * 3 # This scale produces more recognisable results
# Compute distance from (0,0) in first two coordinates - spiral
swiss <- cbind(swiss, sqrt(swiss[,1]^2 +
swiss[,2]^2))
colnames(swiss) <- c("x", "y", "z", "d")
swiss_tbl <- as_tibble(swiss)
swiss_pca <- prcomp(swiss[,1:3], scale=FALSE)
swiss_tbl <- bind_cols(swiss_tbl, as_tibble(swiss_pca$x))
spiral1 <- ggplot(swiss_tbl,
aes(x=PC2,
y=PC3,
colour=d)) +
geom_point() +
scale_colour_continuous_divergingx(palette="Zissou 1",
mid=median(swiss_tbl$d)) +
theme_minimal() +
theme(aspect.ratio=1,
axis.text = element_blank(),
legend.position = "none")
swiss_iso <- do.isomap(swiss[,1:3],
ndim=2, type=c("knn", 10),
weight=FALSE)$Y
colnames(swiss_iso) <- c("iso1", "iso2")
swiss_iso <- as_tibble(swiss_iso) |>
bind_cols(swiss_tbl)
spiral2 <- ggplot(swiss_iso,
aes(x=iso1,
y=iso2,
colour=d)) +
geom_point() +
scale_colour_continuous_divergingx(palette="Zissou 1",
mid=median(swiss_iso$d)) +
theme_minimal() +
theme(aspect.ratio=1,
axis.text = element_blank(),
legend.position = "none")
#spiral1 + spiral2 + plot_layout(ncol=2)
## ----------------------------------------------------------------------------
#| fig-width: 4
#| fig-height: 4
#| out-width: 100%
#| fig-cap: "PCA"
#| fig-alt: "This is an untitled chart has x-axis 'PC2' and y-axis 'PC3' with no labels or legend. Colour is used to show a variable d. The chart is a set of 1652 solid circle points laid out in a spiral pattern, with colours starting with one end of the rainbow smoothly transitioning the the other end of the rainbow scale at the other end of the spiral."
#| echo: false
spiral1
## ----------------------------------------------------------------------------
#| fig-width: 4
#| fig-height: 4
#| out-width: 100%
#| fig-cap: "isomap"
#| fig-alt: "This is an untitled chart has x-axis 'PC2' and y-axis 'PC3' with no labels or legend. Colour is used to show a variable d. The chart is a set of 1652 solid circle points laid out in a slightly squashed rectangle, with colours starting with one end of the rainbow on the left and smoothly transitioning the the other end of the rainbow scale at the right."
#| echo: false
spiral2
## ----------------------------------------------------------------------------
#| fig-width: 4
#| fig-height: 4
#| out-width: 100%
#| fig-cap: "PCA"
#| fig-alt: "This is an untitled chart has x-axis 'PC2' and y-axis 'PC3' with no labels or legend. Colour is used to show a variable d. The chart is a set of 1652 solid circle points laid out in a spiral pattern, with colours starting with one end of the rainbow smoothly transitioning the the other end of the rainbow scale at the other end of the spiral."
#| echo: false
spiral1
## ----------------------------------------------------------------------------
#| fig-width: 4
#| fig-height: 4
#| out-width: 100%
#| fig-cap: "isomap"
#| fig-alt: "This is an untitled chart has x-axis 'PC2' and y-axis 'PC3' with no labels or legend. Colour is used to show a variable d. The chart is a set of 1652 solid circle points laid out in a slightly squashed rectangle, with colours starting with one end of the rainbow on the left and smoothly transitioning the the other end of the rainbow scale at the right."
#| echo: false
spiral2
## ----------------------------------------------------------------------------
#| eval: false
#| echo: false
# swiss_tbl <- as_tibble(swiss)
# spiral1 <- ggplot(swiss_tbl,
# aes(x=x,
# y=y,
# colour=d)) +
# geom_point() +
# scale_colour_continuous_divergingx(palette="Zissou 1",
# mid=median(swiss_tbl$d)) +
# theme_minimal() +
# theme(aspect.ratio=1,
# axis.text = element_blank(),
# legend.position = "none")
# swiss_pca <- prcomp(swiss[,1:3], scale=FALSE)
# ggscatmat(swiss_pca$x)
# ggplot(swiss_pca$x, aes(x=PC1, y=PC2)) + geom_point()
# swiss_mds <- cmdscale(dist(swiss))
# colnames(swiss_mds) <- c("mds1", "mds2")
# swiss_mds <- as_tibble(swiss_mds) |>
# bind_cols(as_tibble(swiss))
# ggplot(swiss_mds, aes(x=mds1, y=mds2, colour=d)) +
# geom_point() +
# scale_colour_continuous_divergingx(palette="Zissou 1",
# mid=median(swiss_mds$d)) +
# theme(aspect.ratio=1)
# swiss_nmds <- monoMDS(dist(swiss[,1:3]),
# model = "local")$points
# colnames(swiss_nmds) <- c("nmds1", "nmds2")
# swiss_nmds <- as_tibble(swiss_nmds) |>
# bind_cols(swiss_tbl)
# ggplot(swiss_nmds, aes(x=nmds1, y=nmds2, colour=d)) +
# geom_point() +
# scale_colour_continuous_divergingx(palette="Zissou 1",
# mid=median(swiss_mds$d)) +
# theme(aspect.ratio=1)
#
# animate_xy(swiss[,1:3])
# animate_xy(swiss[,1:3], guided_tour(holes()))
# set.seed(432)
# render_gif(swiss[,1:3],
# grand_tour(),
# display_xy(axes="bottomleft"),
# gif_file = "gifs/swiss.gif",
# frames = 500,
# width = 300,
# height = 300)
#
# swiss_lle <- do.lle(swiss2,
# ndim=2, type=c("knn", 20), # type=c("enn", 0.5),
# weight=FALSE)$Y
# colnames(swiss_lle) <- c("lle1", "lle2")
# swiss_lle <- as_tibble(swiss_lle) |>
# bind_cols(swiss_tbl)
# ggplot(swiss_lle, aes(x=lle1, y=lle2, colour=d)) +
# geom_point() +
# scale_colour_continuous_divergingx(palette="Zissou 1",
# mid=median(swiss_lle$d)) +
# theme(aspect.ratio=1)
#
# swiss_df <- bind_cols(swiss, swiss_iso, swiss_lle)
# shared_swiss_df <- SharedData$new(swiss_df)
# p1 <- ggplot(shared_swiss_df, aes(x=iso1, y=iso2)) +
# geom_point()
# gp1 <- ggplotly(p1, width=500, height=500) |>
# highlight(on = "plotly_selected",
# off = "plotly_doubleclick")
# p2 <- ggplot(shared_swiss_df, aes(x=x, y=y)) +
# geom_point()
# gp2 <- ggplotly(p2, width=500, height=500) |>
# highlight(on = "plotly_selected",
# off = "plotly_doubleclick")
# p3 <- ggplot(shared_swiss_df, aes(x=lle1, y=lle2)) +
# geom_point()
# gp3 <- ggplotly(p3, width=500, height=500) |>
# highlight(on = "plotly_selected",
# off = "plotly_doubleclick")
#
# bscols(
# gp1, gp2, gp3,
# widths = c(4, 4, 4)
# )
#
# swiss_df <- bind_cols(swiss, swiss_iso)
# shared_swiss_df <- SharedData$new(swiss_df)
# p1 <- ggplot(shared_swiss_df, aes(x=iso1, y=iso2)) +
# geom_point()
# gp1 <- ggplotly(p1, width=300, height=300) |>
# highlight(on = "plotly_selected",
# off = "plotly_doubleclick")
# p2 <- ggplot(shared_swiss_df, aes(x=x, y=y)) +
# geom_point()
# gp2 <- ggplotly(p2, width=300, height=300) |>
# highlight(on = "plotly_selected",
# off = "plotly_doubleclick")
#
# bscols(
# gp1, gp2,
# widths = c(5, 5)
# )
## ----echo=knitr::is_html_output()--------------------------------------------
#| label: fig-nldr-clusters
#| fig-cap: "Two non-linear embeddings of the non-linear clusters data: (a) t-SNE, (b) UMAP. One suggests five clusters and the other four, and also disagree on the cluster shapes."
#| fig-alt: "This chart has two plots titled '(a) t-SNE', with x-axis 'tsne1' and y-axis 'tsne2', and '(b) UMAP' with x-axis 'umap1' anf y-axis 'umap2'. The chart '(a)' is a set of 1268 solid circle points arranged with a strip going from bottom let to top right, two C-shapes above and below the line and two small concetration of dots at the middle bottom and top. The points in chart '(b)' are arranged like a tilted smiley face."
#| fig-width: 8
#| fig-height: 4
#| out-width: 100%
#| message: false
#| code-summary: "Code to generate the 2D non-linear representation"
library(Rtsne)
library(uwot)
set.seed(44)
cnl_tsne <- Rtsne(clusters_nonlin)
cnl_umap <- umap(clusters_nonlin)
n1 <- ggplot(as.data.frame(cnl_tsne$Y), aes(x=V1, y=V2)) +
geom_point() +
xlab("tsne1") +
ylab("tsne2") +
ggtitle("(a) t-SNE") +
theme_minimal() +
theme(aspect.ratio=1,
axis.text = element_blank())
n2 <- ggplot(as.data.frame(cnl_umap), aes(x=V1, y=V2)) +
geom_point() +
xlab("umap1") +
ylab("umap2") +
ggtitle("(b) UMAP") +
theme_minimal() +
theme(aspect.ratio=1,
axis.text = element_blank())
n1 + n2
## ----------------------------------------------------------------------------
#| eval: false
#| echo: false
#| code-summary: "Code to create animated gif"
# render_gif(clusters_nonlin,
# grand_tour(),
# display_xy(),
# gif_file = "gifs/clusters_nonlin.gif",
# frames = 500,
# width = 300,
# height = 300)
## ----------------------------------------------------------------------------
#| message: FALSE
#| eval: false
#| code-fold: false
# umap_df <- data.frame(umapX = cnl_umap[, 1],
# umapY = cnl_umap[, 2])
# limn_tour_link(
# umap_df,
# clusters_nonlin,
# cols = x1:x4
# )
## ----------------------------------------------------------------------------
#| message: false
#| eval: false
#| code-fold: false
# umap_df <- data.frame(umapX = cnl_umap[, 1],
# umapY = cnl_umap[, 2])
# cnl_df <- bind_cols(clusters_nonlin, umap_df)
# shared_cnl <- SharedData$new(cnl_df)
#
# detour_plot <- detour(shared_cnl, tour_aes(
# projection = starts_with("x"))) |>
# tour_path(grand_tour(2),
# max_bases=50, fps = 60) |>
# show_scatter(alpha = 0.7, axes = FALSE,
# width = "100%", height = "450px")
#
# umap_plot <- plot_ly(shared_cnl,
# x = ~umapX,
# y = ~umapY,
# color = I("black"),
# height = 450) %>%
# highlight(on = "plotly_selected",
# off = "plotly_doubleclick") %>%
# add_trace(type = "scatter",
# mode = "markers")
#
# bscols(
# detour_plot, umap_plot,
# widths = c(5, 6)
# )
## ----------------------------------------------------------------------------
#| message: false
#| eval: false
#| code-summary: "Code to run liminal on the fake trees data"
# data(fake_trees)
# set.seed(2020)
# tsne <- Rtsne::Rtsne(
# dplyr::select(fake_trees,
# dplyr::starts_with("dim")))
# tsne_df <- data.frame(tsneX = tsne$Y[, 1],
# tsneY = tsne$Y[, 2])
# limn_tour_link(
# tsne_df,
# fake_trees,
# cols = dim1:dim10,
# color = branches
# )
## ----------------------------------------------------------------------------
#| message: false
#| eval: false
#| echo: false
# set.seed(1240)
# cnl_tsne <- Rtsne(clusters_nonlin)
# cnl_umap <- umap(clusters_nonlin)
# n1 <- ggplot(as.data.frame(cnl_tsne$Y), aes(x=V1, y=V2)) +
# geom_point() +
# ggtitle("(a) t-SNE") +
# theme_minimal() +
# theme(aspect.ratio=1)
# n2 <- ggplot(as.data.frame(cnl_umap), aes(x=V1, y=V2)) +
# geom_point() +
# ggtitle("(b) UMAP") +
# theme_minimal() +
# theme(aspect.ratio=1)
# n1 + n2
## ----------------------------------------------------------------------------
#| message: false
# Answer to Q2
library(Rdimtools)
library(cardinalR)
n_obs <- 1652
set.seed(259)
swiss <- swiss_roll(n_obs, num_noise = 0)
# Standardise
swiss_std <- apply(swiss, 2, function(x) (x-mean(x))/sd(x))
# Compute distance from (0,0) in first two coordinates - spiral
swiss_std <- cbind(swiss_std, sqrt(swiss[,1]^2 +
swiss[,2]^2))
colnames(swiss_std) <- c("x", "y", "z", "d")
swiss_tbl <- as_tibble(swiss_std)
swiss_pca <- prcomp(swiss_std[,1:3], scale=FALSE)
swiss_tbl <- bind_cols(swiss_tbl, as_tibble(swiss_pca$x))
spiral1 <- ggplot(swiss_tbl,
aes(x=PC1,
y=PC3,
colour=d)) +
geom_point() +
scale_colour_continuous_divergingx(palette="Zissou 1",
mid=median(swiss_tbl$d)) +
theme_minimal() +
theme(aspect.ratio=1,
axis.text = element_blank(),
legend.position = "none")
swiss_iso <- do.isomap(swiss_std[,1:3],
ndim=2, type=c("knn", 30),
weight=FALSE)$Y
colnames(swiss_iso) <- c("iso1", "iso2")
swiss_iso <- as_tibble(swiss_iso) |>
bind_cols(swiss_tbl)
spiral2 <- ggplot(swiss_iso,
aes(x=iso1,
y=iso2,
colour=d)) +
geom_point() +
scale_colour_continuous_divergingx(palette="Zissou 1",
mid=median(swiss_iso$d)) +
theme_minimal() +
theme(aspect.ratio=1,
axis.text = element_blank(),
legend.position = "none")
spiral1 + spiral2 + plot_layout(ncol=2)
## ----------------------------------------------------------------------------
#| message: false
# Answer to Q3
n_obs <- 1652
set.seed(259)
swiss <- swiss_roll(n_obs, num_noise = 0)
swiss[,3] <- swiss[,3] * 3
swiss <- cbind(swiss, sqrt(swiss[,1]^2 +
swiss[,2]^2))
colnames(swiss) <- c("x", "y", "z", "d")
swiss_tbl <- as_tibble(swiss)
set.seed(111)
swiss_tsne <- Rtsne::Rtsne(swiss[,1:3])$Y
colnames(swiss_tsne) <- c("tsne1", "tsne2")
swiss_tsne <- as_tibble(swiss_tsne) |>
bind_cols(swiss_tbl)
swiss_tsne <- ggplot(swiss_tsne,
aes(x=tsne1,
y=tsne2,
colour=d)) +
geom_point() +
scale_colour_continuous_divergingx(palette="Zissou 1",
mid=median(swiss_tbl$d)) +
theme_minimal() +
theme(aspect.ratio=1,
axis.text = element_blank(),
legend.position = "none")
set.seed(111)
swiss_umap <- uwot::umap(swiss[,1:3])
colnames(swiss_umap) <- c("umap1", "umap2")
swiss_umap <- as_tibble(swiss_umap) |>
bind_cols(swiss_tbl)
swiss_umap <- ggplot(swiss_umap,
aes(x=umap1,
y=umap2,
colour=d)) +
geom_point() +
scale_colour_continuous_divergingx(palette="Zissou 1",
mid=median(swiss_tbl$d)) +
theme_minimal() +
theme(aspect.ratio=1,
axis.text = element_blank(),
legend.position = "none")
swiss_tsne + swiss_umap + plot_layout(ncol=2)
## ----------------------------------------------------------------------------
#| message: false
#| eval: false
#| echo: false
# load("data/penguins_sub.rda")
#
# set.seed(2022)
# p_tsne <- Rtsne::Rtsne(penguins_sub[,2:5])
# p_tsne_df <- data.frame(tsneX = p_tsne$Y[, 1], tsneY = p_tsne$Y[, 2])
# limn_tour_link(
# p_tsne_df,
# penguins_sub,
# cols = bl:bm,
# color = species
# )
# # The t-SNE mapping of the penguins data inaccurately splits one of the clusters. The three clusters are clearly distinct when viewed with the tour.
## ----------------------------------------------------------------------------
#| message: false
#| eval: false
#| echo: false
# data(multicluster)
# animate_xy(multicluster[,2:11])
#
# set.seed(129)
# mc_umap <- uwot::umap(multicluster[,2:11])
# colnames(mc_umap) <- c("umap1", "umap2")
# mc_umap <- as_tibble(mc_umap)
# ggplot(mc_umap, aes(x=umap1,
# y=umap2)) +
# geom_point() +
# theme_minimal() +
# theme(aspect.ratio=1,
# axis.text = element_blank())
## ----------------------------------------------------------------------------
#| message: false
#| eval: false
#| echo: false
# data(sketches_train)
# #animate_xy(sketches_train[,1:784])
#
# set.seed(137)
# sketches_umap <- uwot::umap(sketches_train[,1:784])
# colnames(sketches_umap) <- c("umap1", "umap2")
# sketches_umap <- as_tibble(sketches_umap) |>
# mutate(word = sketches_train$word)
# ggplot(sketches_umap, aes(x=umap1,
# y=umap2,
# colour=word)) +
# geom_point() +
# scale_colour_discrete_divergingx(palette = "Zissou 1") +
# theme_minimal() +
# theme(aspect.ratio=1,
# axis.text = element_blank())
## ----------------------------------------------------------------------------
#| label: pbmc
#| message: false
#| eval: false
#| echo: false
# pbmc <- readRDS("data/pbmc_pca_50.rds")
#
# # t-SNE
# set.seed(1041)
# p_tsne <- Rtsne::Rtsne(pbmc[,1:15])
# p_tsne_df <- data.frame(tsneX = p_tsne$Y[, 1], tsneY = p_tsne$Y[, 2])
# ggplot(p_tsne_df, aes(x=tsneX, y=tsneY)) + geom_point()
#
# # UMAP
# set.seed(1045)
# p_umap <- uwot::umap(pbmc[,1:15])
# p_umap_df <- data.frame(umapX = p_umap[, 1], umapY = p_umap[, 2])
# ggplot(p_umap_df, aes(x=umapX, y=umapY)) + geom_point()