{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "\n", "Attaching package: ‘dplyr’\n", "\n", "The following objects are masked from ‘package:data.table’:\n", "\n", " between, first, last\n", "\n", "The following objects are masked from ‘package:stats’:\n", "\n", " filter, lag\n", "\n", "The following objects are masked from ‘package:base’:\n", "\n", " intersect, setdiff, setequal, union\n", "\n", "Loading required package: MASS\n", "\n", "Attaching package: ‘MASS’\n", "\n", "The following object is masked from ‘package:dplyr’:\n", "\n", " select\n", "\n", "Loading required package: ggplot2\n", "\n", "Attaching package: ‘cowplot’\n", "\n", "The following object is masked from ‘package:ggplot2’:\n", "\n", " ggsave\n", "\n", "Loading required package: magrittr\n", "\n", "Attaching package: ‘ggpubr’\n", "\n", "The following object is masked from ‘package:cowplot’:\n", "\n", " get_legend\n", "\n" ] } ], "source": [ "library(data.table)\n", "library(dplyr)\n", "library(Matrix)\n", "library(BuenColors)\n", "library(stringr)\n", "library(cowplot)\n", "library(ggpubr)\n", "library(umap)\n", "library(RColorBrewer)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### define functions" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "read_FM <- function(filename){\n", " df_FM = data.frame(readRDS(filename),stringsAsFactors=FALSE,check.names=FALSE)\n", " rownames(df_FM) <- make.names(rownames(df_FM), unique=TRUE)\n", " df_FM[is.na(df_FM)] <- 0\n", " return(df_FM)\n", "}\n", "\n", "run_pca <- function(mat,num_pcs=50,remove_first_PC=FALSE,scale=FALSE,center=FALSE){\n", " set.seed(2019) \n", " mat = as.matrix(mat)\n", " SVD = irlba(mat, num_pcs, num_pcs,scale=scale,center=center)\n", " sk_diag = matrix(0, nrow=num_pcs, ncol=num_pcs)\n", " diag(sk_diag) = SVD$d\n", " if(remove_first_PC){\n", " sk_diag[1,1] = 0\n", " SVD_vd = (sk_diag %*% t(SVD$v))[2:num_pcs,]\n", " }else{\n", " SVD_vd = sk_diag %*% t(SVD$v)\n", " }\n", " return(SVD_vd)\n", "}\n", "\n", "elbow_plot <- function(mat,num_pcs=50,scale=FALSE,center=FALSE,title='',width=3,height=3){\n", " set.seed(2019) \n", " mat = data.matrix(mat)\n", " SVD = irlba(mat, num_pcs, num_pcs,scale=scale,center=center)\n", " options(repr.plot.width=width, repr.plot.height=height)\n", " df_plot = data.frame(PC=1:num_pcs, SD=SVD$d);\n", "# print(SVD$d[1:num_pcs])\n", " p <- ggplot(df_plot, aes(x = PC, y = SD)) +\n", " geom_point(col=\"#cd5c5c\",size = 1) + \n", " ggtitle(title)\n", " return(p)\n", "}\n", "\n", "run_umap <- function(fm_mat){\n", " umap_object = umap(t(fm_mat),random_state = 2019)\n", " df_umap = umap_object$layout\n", " return(df_umap)\n", "}\n", "\n", "\n", "plot_umap <- function(df_umap,labels,title='UMAP',colormap=colormap){\n", " set.seed(2019) \n", " df_umap = data.frame(cbind(df_umap,labels),stringsAsFactors = FALSE)\n", " colnames(df_umap) = c('umap1','umap2','label')\n", " df_umap$umap1 = as.numeric(df_umap$umap1)\n", " df_umap$umap2 = as.numeric(df_umap$umap2)\n", " options(repr.plot.width=4, repr.plot.height=4)\n", " p <- ggplot(shuf(df_umap), aes(x = umap1, y = umap2, color = label)) +\n", " geom_point(size = 1) + scale_color_manual(values = colormap) +\n", " ggtitle(title)\n", " return(p)\n", "}" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Input" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "workdir = './output_new/'\n", "\n", "path_umap = paste0(workdir,'umap_rds/')\n", "system(paste0('mkdir -p ',path_umap))\n", "\n", "path_fm = paste0(workdir,'feature_matrices/')\n", "system(paste0('mkdir -p ',path_fm))" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "metadata <- read.table('../input/metadata.tsv',\n", " header = TRUE,\n", " stringsAsFactors=FALSE,quote=\"\",row.names=1)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "path_fm_old = './output/feature_matrices/'\n", "path_umap_old = './output/umap_rds/'" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "