wrote:
> There are 2 different Synap channels (2 different antibodies were
> used), so that part is fine.
> And 2 different VGluT1 channels (same antibody but done at different
> times)
> The NOS channel is the same, so count it as one even though it appears
> twice. It is listed two times because it can be found at both excitatory
> and inhibitory synapses. This is where your count of 25 comes, even
> though there are 24 channels.
> I would also add the 2 Synap channels to the Inhibitory presynaptic
> category - there is supposed to be synapsin there, but at lower levels
> compared to excitatory presynaptic category.
- Note: The order of the channels are given by line `227` in the `kohenen.m` file which can be found in the dropbox folder.
- `Synap` and `Synap` have been augmented to `Synap_1` and `Synap_2` for clarity.
- `VGlut1` and `VGlut1` have been augmented to `VGlut1_t1` and `VGlut1_t2` to distinguish between the different times (which are unknown).
```{r cc_data, eval=TRUE}
feat <- fread("../Data/synapsinR_7thA.tif.Pivots.txt.2011Features.txt",showProgress=FALSE)
dim(feat)
### Setting a seed and creating an index vector
### to select half of the data
set.seed(2^13)
half1 <- sample(dim(feat)[1],dim(feat)[1]/2)
feat <- feat[half1,]
channel <- c('Synap_1','Synap_2','VGlut1_t1','VGlut1_t2','VGlut2','Vglut3',
'psd','glur2','nmdar1','nr2b','gad','VGAT',
'PV','Gephyr','GABAR1','GABABR','CR1','5HT1A',
'NOS','TH','VACht','Synapo','tubuli','DAPI')
## NOS as ex.post
#channel.type <- c('ex.pre','ex.pre','ex.pre','ex.pre','ex.pre','in.pre.small',
# 'ex.post','ex.post','ex.post','ex.post','in.pre','in.pre',
# 'in.pre','in.post','in.post','in.post','in.pre.small','other',
# 'ex.post','other','other','ex.post','none','none')
#
## NOS as in.post
channel.type <- c('ex.pre','ex.pre','ex.pre','ex.pre','ex.pre','in.pre.small',
'ex.post','ex.post','ex.post','ex.post','in.pre','in.pre',
'in.pre','in.post','in.post','in.post','in.pre.small','other',
'in.post','other','other','ex.post','none','none')
nchannel <- length(channel)
nfeat <- ncol(feat) / nchannel
fchannel <- as.numeric(factor(channel.type,
levels= c("ex.pre","ex.post","in.pre","in.post","in.pre.small","other","none")
))
ford <- order(fchannel)
#ccol <- rainbow(max(fchannel))[fchannel]
Syncol <- c("#197300","#5ed155","#990000","#cc0000","#ff9933","mediumblue","gold")
ccol <- Syncol[fchannel]
exType <- factor(c(rep("ex",11),rep("in",6),rep("other",7)),ordered=TRUE)
exCol<-exType;levels(exCol) <- c("#197300","#990000","mediumblue");
exCol <- as.character(exCol)
fname <- as.vector(sapply(channel,function(x) paste0(x,paste0("F",0:5))))
names(feat) <- fname
fcol <- rep(ccol, each=6)
mycol <- colorpanel(100, "purple", "black", "green")
mycol2 <- matlab.like(nchannel)
```
## Transformations
Each of the channels has an arbitrary independent linear transformation and thus
converting to z-scores or quantiles is required.
Selecting only the `f0` features, we will use both the feature vector and a
transformed version, transforming by adding 1 and taking
the log base 10 for each column and scaling.
```{r cc_featF0, eval=TRUE}
f0 <- seq(1,ncol(feat),by=nfeat)
fRaw <- subset(feat, select=f0)
#fLog <- apply(X=(fRaw+1),2,log10)
#fLogScaled <- scale(flog, center=TRUE, scale=TRUE)
eps <- 2*.Machine$double.eps
f01 <- apply(X=fRaw, 2, function(x){((x-min(x))/(max(x)-min(x)))})
fRank <- apply(fRaw,2,rank,ties.method='average')
fLog <- apply(f01,2,function(x){ log10(x+eps)})
```
# Correlations
## Raw
```{r cc_corRaw, eval=TRUE, fig.cap=fig$cap("corRaw", "Correlation on raw data, reordered by synapse type.")}
tmp <- as.numeric(table(fchannel))
cmatRaw <- cor(fRaw)
corrplot(cmatRaw[ford,ford],method="color",tl.col=ccol[ford])
corrRect(tmp,col=Syncol,lwd=4)
```
## Log
```{r cc_corLog, eval=TRUE, fig.cap=fig$cap("corLog", "Correlation on log data, reordered by synapse type.")}
cmatLog <- cor(fLog)
corrplot(cmatLog[ford,ford],method="color",tl.col=ccol[ford])
corrRect(tmp,col=Syncol,lwd=4)
```
## Rank
```{r cc_corRank, eval=TRUE, fig.cap=fig$cap("corRank", "Correlation on rank data, reordered by synapse type.")}
cmatRank <- cor(fRank)
corrplot(cmatRank[ford,ford],method="color",tl.col=ccol[ford])
corrRect(tmp,col=Syncol,lwd=4)
```
# PCA and Scree Plots on the Correlation Matrices
Note that centering and scaling are set to `FALSE`.
```{r pcaRaw, eval=TRUE}
pcaRaw <- prcomp(cmatRaw,scale=FALSE, center=FALSE)
elRaw <- getElbows(pcaRaw$sdev, plot=FALSE)
```
```{r pcaLog, eval=TRUE}
pcaLog <- prcomp(cmatLog,scale=FALSE, center=FALSE)
elLog <- getElbows(pcaLog$sdev, plot=FALSE)
```
```{r pcaRank, eval=TRUE}
pcaRank <- prcomp(cmatRank,scale=FALSE, center=FALSE)
elRank <- getElbows(pcaRank$sdev, plot=FALSE)
```
```{r cc-scree-plots, eval=TRUE, w=8,h=4, fig.cap=fig$cap("scree-plots","Scree plots")}
par(mfrow=c(1,3))
plot(pcaRaw$sdev, type='b'); title("Scree plot: Raw Cor. Matrix")
points(cbind(elRaw,pcaRaw$sdev[elRaw]), col='red', pch=20)
plot(pcaLog$sdev, type='b'); title("Scree plot: Log Cor. Matrix")
points(cbind(elLog,pcaLog$sdev[elLog]), col='red', pch=20)
plot(pcaRank$sdev, type='b'); title("Scree plot: Rank Cor. Matrix")
points(cbind(elRank,pcaRank$sdev[elRank]), col='red', pch=20)
```
# PAMK
Running `pamk` for $K = 7$.
## PAMK on Raw data
```{r cc_pamkRaw, eval=TRUE, fig.cap=fig$cap("silRaw", "Average silhouette width plot on Raw Cor. Matrix")}
K <- 7
pamRaw7 <- pamk(pcaRaw$x[,1:elRaw[2]],krange=K,usepam=FALSE,critout=FALSE)
```
```{r cc_aggRaw, eval=TRUE}
aggRaw <- aggregate(pcaRaw$x[,1:elRaw[2]],by=list(lab=pamRaw7$pamo$clustering),FUN=mean)
aggRaw <- as.matrix(aggRaw[,-1])
rownames(aggRaw) <- clusterFraction(pamRaw7$pamo$clustering)
```
```{r cc_heatmapRaw,eval=TRUE,w=6,h=6,echo=TRUE,fig.cap=fig$cap("heat-raw", "Heat Map of Raw Data")}
heatmap.2(as.matrix(aggRaw), trace="none",col=mycol,cexRow=0.8, keysize=1,symkey=FALSE,symbreaks=FALSE,scale="none", srtCol=90) #
```
## PAMK on Log Data
```{r cc_pamkLog, eval=TRUE, fig.cap=fig$cap("silLog", "Average silhouette width plot on Log Cor. Matrix")}
pamLog7 <- pamk(pcaLog$x[,1:elLog[2]],krange=K,usepam=FALSE,critout=FALSE)
#plot(pamLog5$crit, type='b')
```
```{r cc_aggLog, eval=TRUE}
aggLog <- aggregate(pcaLog$x[,1:elLog[2]],by=list(lab=pamLog7$pamo$clustering),FUN=mean)
aggLog <- as.matrix(aggLog[,-1])
rownames(aggLog) <- clusterFraction(pamLog7$pamo$clustering)
```
```{r cc_heatmapLog,eval=TRUE,w=6,h=6,echo=TRUE,fig.cap=fig$cap("heat-log", "Heat Map of Log Data")}
heatmap.2(as.matrix(aggLog), trace="none",col=mycol,cexRow=0.8, keysize=1,symkey=FALSE,symbreaks=FALSE,scale="none", srtCol=90) #
```
## PAMK on Ranked Data
```{r cc_pamkRank, eval=TRUE, fig.cap=fig$cap("silRank", "Average silhouette width plot on Rank Cor. Matrix")}
pamRank7 <- pamk(pcaRank$x[,1:elRank[2]],krange=K,usepam=FALSE,critout=FALSE)
#plot(pamRank5$crit, type='b')
```
```{r cc_aggRank, eval=TRUE}
aggRank <- aggregate(pcaRank$x[,1:elRank[2]],by=list(lab=pamRank7$pamo$clustering),FUN=mean)
aggRank <- as.matrix(aggRank[,-1])
rownames(aggRank) <- clusterFraction(pamRank7$pamo$clustering)
```
```{r cc_heatmapRank,eval=TRUE,w=6,h=6,echo=TRUE,fig.cap=fig$cap("heat-rank", "Heat Map of Ranked Data")}
heatmap.2(as.matrix(aggRank), trace="none",col=mycol,cexRow=0.8, keysize=1,symkey=FALSE,symbreaks=FALSE,scale="none", srtCol=90) #
```
## ARI
We are considering the channel types to be the "ground truth" here.
```{r cc_truth}
truth <- as.numeric(factor(channel.type))
```
Calculating the ARI for the raw data.
```{r ariRaw}
xRaw <- as.numeric(pamRaw7$pamo$clustering)
ariRaw <- foreach(i = 1:1e4, .combine='c') %dopar%{
y <- sample(xRaw)
mclust::adjustedRandIndex(truth,y)
}
```
```{r ariLog}
xLog <- as.numeric(pamLog7$pamo$clustering)
ariLog <- foreach(i = 1:1e4, .combine='c') %dopar%{
y <- sample(xLog)
mclust::adjustedRandIndex(truth,y)
}
```
```{r ariRank}
xRank <- as.numeric(pamRank7$pamo$clustering)
ariRank <- foreach(i = 1:1e4, .combine='c') %dopar%{
y <- sample(xRank)
mclust::adjustedRandIndex(truth,y)
}
```
```{r cc_ariPlots, w=9,h=4, fig.cap=fig$cap("ariPlot","ARI plots")}
par(mfrow=c(1,3))
hist(ariRaw)
abline(v = adjustedRandIndex(truth,xRaw), lwd=2, col='red')
hist(ariLog)
abline(v = adjustedRandIndex(truth,xLog), lwd=2, col='red')
hist(ariRank)
abline(v = adjustedRandIndex(truth,xRank), lwd=2, col='red')
```
```{r cc_ariNum}
adjustedRandIndex(truth, xRaw)
adjustedRandIndex(truth, xLog)
adjustedRandIndex(truth, xRank)
```
## ARI vs Embedding dimension.
Here we investigeta how ARI varies with the embedding dimension.
### Functions
```{r cc_f1}
avd <- function(pcaObj,elbow,truth){
###
### Given a pca object with an elbow as upperbound
### compute the ARI of truth versus the pamk clustering for each
### dimension from 1:elbow
###
require(foreach)
out <- foreach(i = 1:elbow, .combine="rbind") %do% {
pmk <- pamk(pcaObj$x[,1:i], krange=K, usepam=FALSE, critout=FALSE)
data.frame(Embedding_Dimension=i,ARI=adjustedRandIndex(truth, pmk$pamo$clustering))
}
return(out)
}
```
### ARI Plots
```{r arivdimRaw, eval=TRUE, w=9,h=4, fig.cap=fig$cap("ariVdim", "ARI vs DIM")}
avdRaw <- avd(pcaRaw, elRaw[2], truth)
avdLog <- avd(pcaLog, elLog[2], truth)
avdRank <- avd(pcaRank, elRank[2], truth)
avdRaw$group <- "Raw"
avdLog$group <- "Log"
avdRank$group <- "Rank"
avdDat <- data.frame(rbind(avdRaw, avdLog, avdRank))
#par(mfrow=c(1,3))
#plot(avdRaw, type='b', main="Raw Data")
#plot(avdLog, type='b', main="Log Data")
#plot(avdRank, type='b', main="Rank Data")
p1 <- ggplot(data=avdDat, aes(x=Embedding_Dimension, y=ARI,group=group,color=group)) +
geom_line(size=1.5) + geom_point(size=3) +
theme(axis.text=element_text(size=18)) + ggtitle("ARI vs. DIM")
print(p1)
```
# Plots of embeddings
First let's look at the 3-d embeddings and make some decisions from
there.
```{r cc_3dEmb, h=6,w=6}
pairs(pcaRaw$x[,1:3],col=exCol,pch=as.numeric(exType)+15,cex=2,main="Raw")
pairs(pcaLog$x[,1:3],col=exCol, pch=as.numeric(exType)+15,cex=2,main="Log")
pairs(pcaRank$x[,1:3],col=exCol,pch=as.numeric(exType)+15,cex=2,main="Rank")
```
```{r cc_3dRaw}
pca <- pcaRaw$x[,1:3]
rgl::plot3d(pca[,1],pca[,2],pca[,3],type='s', col=exCol, size=1, main="Raw")
rgl::rgl.texts(pca[,1],pca[,2],pca[,3], channel[ford], col=exCol, adj=c(0,1.5))
subid <- currentSubscene3d()
rglwidget(elementId="plot3dRaw")
```
```{r cc_3dLog}
pca <- pcaLog$x
rgl::plot3d(pca[,1],pca[,2],pca[,3],type='s', col=exCol, size=1, main="Log")
rgl::rgl.texts(pca[,1],pca[,2],pca[,3], channel[ford], col=exCol, adj=c(0,1.5))
subid <- currentSubscene3d()
rglwidget(elementId="plot3dLog")
```
```{r cc_3dRank}
pca <- pcaRank$x[,1:3]
rgl::plot3d(pca[,1],pca[,2],pca[,3],type='s', col=exCol, size=3, main="Rank")
rgl::rgl.texts(pca[,1],pca[,2],pca[,3], channel[ford], col=exCol, adj=c(0,1.5))
subid <- currentSubscene3d()
rglwidget(elementId="plot3dRank")
```
# Pick $K=3$.
## PAMK=3 on Raw data
```{r cc_pamk3Raw, eval=TRUE, fig.cap=fig$cap("silRaw", "Average silhouette width plot on Raw Cor. Matrix")}
K <- 3
pamRaw3 <- pamk(pcaRaw$x[,1:elRaw[2]],krange=K,usepam=FALSE,critout=FALSE)
```
```{r cc_agg3Raw, eval=TRUE}
agg3Raw <- aggregate(pcaRaw$x[,1:elRaw[2]],by=list(lab=pamRaw3$pamo$clustering),FUN=mean)
agg3Raw <- as.matrix(agg3Raw[,-1])
rownames(agg3Raw) <- clusterFraction(pamRaw3$pamo$clustering)
```
```{r cc_heatmap3Raw,eval=TRUE,w=6,h=6,echo=TRUE,fig.cap=fig$cap("heat-3raw", "Heat Map of Raw Data")}
heatmap.2(as.matrix(agg3Raw), trace="none",col=mycol,cexRow=0.8, keysize=1,symkey=FALSE,symbreaks=FALSE,scale="none", srtCol=90) #
```
## PAMK=3 on Log Data
```{r cc_pamk3Log, eval=TRUE, fig.cap=fig$cap("silLog", "Average silhouette width plot on Log Cor. Matrix")}
pamLog3 <- pamk(pcaLog$x[,1:elLog[2]],krange=K,usepam=FALSE,critout=FALSE)
#plot(pamLog5$crit, type='b')
```
```{r cc_agg3Log, eval=TRUE}
agg3Log <- aggregate(pcaLog$x[,1:elLog[2]],by=list(lab=pamLog3$pamo$clustering),FUN=mean)
agg3Log <- as.matrix(agg3Log[,-1])
rownames(agg3Log) <- clusterFraction(pamLog3$pamo$clustering)
```
```{r cc_heatmap3Log,eval=TRUE,w=6,h=6,echo=TRUE,fig.cap=fig$cap("heat-3log", "Heat Map of Log Data")}
heatmap.2(as.matrix(agg3Log), trace="none",col=mycol,cexRow=0.8, keysize=1,symkey=FALSE,symbreaks=FALSE,scale="none", srtCol=90) #
```
## PAMK=3 on Ranked Data
```{r cc_pamk3Rank, eval=TRUE, fig.cap=fig$cap("silRank", "Average silhouette width plot on Rank Cor. Matrix")}
pamRank3 <- pamk(pcaRank$x[,1:elRank[2]],krange=K,usepam=FALSE,critout=FALSE)
#plot(pamRank5$crit, type='b')
```
```{r cc_agg3Rank, eval=TRUE}
agg3Rank <- aggregate(pcaRank$x[,1:elRank[2]],by=list(lab=pamRank3$pamo$clustering),FUN=mean)
agg3Rank <- as.matrix(agg3Rank[,-1])
rownames(agg3Rank) <- clusterFraction(pamRank3$pamo$clustering)
```
```{r cc_heatmap3Rank,eval=TRUE,w=6,h=6,echo=TRUE,fig.cap=fig$cap("heat-3rank", "Heat Map of Ranked Data")}
heatmap.2(as.matrix(agg3Rank), trace="none",col=mycol,cexRow=0.8, keysize=1,symkey=FALSE,symbreaks=FALSE,scale="none", srtCol=90) #
```
## ARI Plots for $K=3$
```{r arivdim3Raw, eval=TRUE, w=9,h=4, fig.cap=fig$cap("ariVdim", "ARI vs DIM")}
avd3Raw <- avd(pcaRaw,elRaw[2],truth)
avd3Log <- avd(pcaLog,elLog[2],truth)
avd3Rank <- avd(pcaRank,elRank[2],truth)
avd3Raw$group <- "Raw"
avd3Log$group <- "Log"
avd3Rank$group <- "Rank"
avd3Dat <- data.frame(rbind(avd3Raw, avd3Log, avd3Rank))
p2 <- ggplot(data=avd3Dat, aes(x=Embedding_Dimension, y=ARI,group=group,color=group)) +
geom_line(size=1.5) + geom_point(size=3) +
theme(axis.text=element_text(size=18)) + ggtitle("ARI vs. DIM")
print(p2)
```
```{r test, eval=FALSE}
pca <- pcaRaw$x[,1:3]
rgl::plot3d(pca[,1],pca[,2],pca[,3],type='s', col=exCol, size=1, main="Raw")
rgl::rgl.texts(pca[,1],pca[,2],pca[,3], channel[ford], col=exCol, adj=c(0,1.5))
subid <- currentSubscene3d()
rglwidget(elementId="plot3dRaw")
open3d()
plot3d( cube3d(col = "green") )
M <- par3d("userMatrix")
if (!rgl.useNULL())
play3d( par3dinterp(time = (0:2)*4, userMatrix = list(M,
rotate3d(M, pi/2, 1, 0, 0),
rotate3d(M, pi/2, 0, 0, 1) ) ),
duration = 18 )
movie3d( spin3d(), duration = 5)
subid <- currentSubscene3d()
rglwidget(elementId="plot3dRank")
```