wrote:
There are 2 different Synap channels (2 different antibodies were
used), so that part is fine.
And 2 different VGluT1 channels (same antibody but done at different
times)
The NOS channel is the same, so count it as one even though it appears
twice. It is listed two times because it can be found at both excitatory
and inhibitory synapses. This is where your count of 25 comes, even
though there are 24 channels.
I would also add the 2 Synap channels to the Inhibitory presynaptic
category - there is supposed to be synapsin there, but at lower levels
compared to excitatory presynaptic category.
- Note: The order of the channels are given by line `227` in the `kohenen.m` file which can be found in the dropbox folder.
- `Synap` and `Synap` have been augmented to `Synap_1` and `Synap_2` for clarity.
- `VGlut1` and `VGlut1` have been augmented to `VGlut1_t1` and `VGlut1_t2` to distinguish between the different times (which are unknown).
```{r cc_data, eval=TRUE}
feat <- fread("../Data/synapsinR_7thA.tif.Pivots.txt.2011Features.txt",showProgress=FALSE)
dim(feat)
channel <- c('Synap_1','Synap_2','VGlut1_t1','VGlut1_t2','VGlut2','Vglut3',
'psd','glur2','nmdar1','nr2b','gad','VGAT',
'PV','Gephyr','GABAR1','GABABR','CR1','5HT1A',
'NOS','TH','VACht','Synapo','tubuli','DAPI')
channel.type <- c('ex.pre','ex.pre','ex.pre','ex.pre','ex.pre','in.pre.small',
'ex.post','ex.post','ex.post','ex.post','in.pre','in.pre',
'in.pre','in.post','in.post','in.post','in.pre.small','other',
'ex.post','other','other','ex.post','none','none')
nchannel <- length(channel)
nfeat <- ncol(feat) / nchannel
fchannel <- as.numeric(factor(channel.type,
levels= c("ex.pre","ex.post","in.pre","in.post","in.pre.small","other","none")
))
ford <- order(fchannel)
#ccol <- rainbow(max(fchannel))[fchannel]
Syncol <- c("#197300","#5ed155","#660000","#cc0000","#ff9933","mediumblue","gold")
ccol <- Syncol[fchannel]
fname <- as.vector(sapply(channel,function(x) paste0(x,paste0("F",0:5))))
names(feat) <- fname
fcol <- rep(ccol, each=6)
#mycol <- colorpanel(100,"blue","grey","red")
#mycol <- redgreen(100)
#mycol <- colorpanel(100, "purple", "black", "blue")
mycol <- colorpanel(100, "purple", "black", "green")
mycol2 <- matlab.like(nchannel)
```
## Transformations
Each of the channels has an arbitrary independent linear transformation and thus
converting to z-scores or quantiles is required.
Selecting only the `f0` features, we will use both the raw data and a
transformed version, transforming by adding 1 and taking
the log base 10 for each column and scaling.
```{r cc_featF0, eval=TRUE}
f0 <- seq(1,ncol(feat),by=nfeat)
featF0 <- subset(feat, select=f0)
#featF0s <- scale(featF0, scale=TRUE, center=TRUE)
#flog <- apply(X=(featF0+1),2,log10)
#f01 <- data.table(apply(X=featF0, 2, function(x){((x-min(x))/(max(x)-min(x)))}))
#qs <- apply(f01, 2, quantile, probs=c(.025,.5,.75,.99))
### Tunrcating to the inner 98%
tmpG <- featF0[,lapply(.SD,function(x){x >= quantile(x,prob=.01)})]
tmpL <- featF0[,lapply(.SD,function(x){x <= quantile(x,prob=1 - 0.01)})]
ind <- apply(tmpG,1,all)
ind2 <- apply(tmpL,1,all)
fil98F0 <- featF0[ind & ind2,]
### taking log_10
flog <- log10(fil98F0)
fs <- scale(flog, center=TRUE, scale=TRUE)
### scaling between [0,1]
f01 <- data.table(apply(X=flog, 2, function(x){((x-min(x))/(max(x)-min(x)))}))
```
The data
### Kernel Density Estimates of the marginals
```{r cc_kde1, eval=TRUE, cache=FALSE,w=8,h=8,fig.cap=fig$cap("f0kde1","Kernel density estimates for each channel, on scaled data.")}
df <- melt(as.matrix(flog))
names(df) <- c("ind","channel","value")
ts <- 22
gg1 <- ggplot(df, aes(x=value)) +
scale_color_manual(values=ccol) +
scale_fill_manual(values=ccol) +
geom_histogram(aes(y=..density..,group=channel,colour=channel),bins=100) +
geom_density(aes(group=channel, color=channel),size=1.5) +
facet_wrap(~ channel, scale='fixed') +
theme(plot.title=element_text(size=ts),
axis.title.x=element_text(size=ts),
axis.title.y=element_text(size=ts),
legend.title=element_text(size=ts),
legend.text=element_text(size=ts-2),
axis.text=element_text(size=ts-2),
strip.text=element_text(size=ts))+
ggtitle("Kernel Density Estimates of f01 transformed data ")
print(gg1)
```
```{r cc_kde2, eval=TRUE, cache=FALSE,w=8,h=6,fig.cap=fig$cap("f0kde2","Kernel density estimates for each channel, on f01 data.")}
df <- melt(as.matrix(f01))
names(df) <- c("ind","channel","value")
ts <- 22
gg2 <- ggplot(df, aes(x=value)) +
scale_color_manual(values=ccol) +
scale_fill_manual(values=ccol) +
geom_histogram(aes(y=..density..,group=channel,colour=channel),bins=21) +
geom_density(aes(group=channel, color=channel),size=1.5) +
facet_wrap(~ channel, scale='fixed') +
theme(plot.title=element_text(size=ts),
axis.title.x=element_text(size=ts),
axis.title.y=element_text(size=ts),
legend.title=element_text(size=ts),
legend.text=element_text(size=ts-2),
axis.text=element_text(size=ts-2),
strip.text=element_text(size=ts))+
ggtitle("Kernel Density Estimates of f01 transformed data ")
print(gg2)
df <- melt(as.matrix(flog))
names(df) <- c("ind","channel","value")
gg3 <- ggplot(df, aes(x=value)) +
scale_color_manual(values=ccol) +
geom_density(aes(group=channel, colour=channel))+
facet_wrap( ~ channel, scale='free')+
ggtitle("Kernel Density Estimates on log transformed data.")
print(gg3)
```
```{r cc_kde3, eval=TRUE, cache=FALSE,w=8,h=4,fig.cap=fig$cap("f0kde3","Kernel density estimates for each channel, on log transformed scaled data.")}
df <- melt(as.matrix(fs))
names(df) <- c("ind","channel","value")
ggplot(df, aes(x=value)) +
scale_color_manual(values=ccol) +
geom_density(aes(group=channel, colour=channel))
```
## K-Means Level 1
** Note that a seed is being set for the random initialization of K-means. **
```{r cc_kmeans,eval=TRUE,echo=TRUE}
K1 <- 12 ## Set the upperbound for k-means.
## Run kmeans on the untransformed data
kvecF0 <- foreach(i = 1:K1) %dopar% {
set.seed(2^13 - 1)
kmeans(featF0,centers=i)
}
## Run kmeans on the
## log \circ scale transformed data
kvecfs <- foreach(i = 1:K1) %dopar% {
set.seed(2^13 - 1)
kmeans(fs,centers=i)
}
```
Here we calculate the `bic` from a user defined function and
pick the maximum.
```{r cc_bic2}
bicF0 <- kbic(kvecF0)
bicfs1 <- kbic(kvecfs)
mxF0 <- t(c(which(bicF0 == max(bicF0)), max(bicF0)))
mxfs1 <- t(c(which(bicfs1 == max(bicfs1)), max(bicfs1)))
```
```{r cc_plot-bic2,w=8,h=5}
par(mfrow=c(1,2))
plot(bicF0, type='b', main="BIC on raw data.")
points(mxF0[,1], mxF0[,2], col='red', pch=20)
plot(bicfs1, type='b', main="BIC on scaled data.")
points(mxfs1[,1], mxfs1[,2], col='red', pch=20)
```
### Heat maps: Untransformed Data
For the following we manualy choose 2 clusters.
```{r cc_agg, eval=TRUE}
## Formatting data for heatmap
feat2 <- aggregate(featF0,by=list(lab=kvecF0[[2]]$cluster),FUN=mean)
feat2 <- as.matrix(feat2[,-1])
rownames(feat2) <- clusterFraction(kvecF0[[2]])
ford <- order(fchannel)
```
```{r cc_km1-heatmap,eval=TRUE,w=6,h=6,fig.cap=fig$cap("heat1", "Heatmap of the cluster means vs channels. Rows and columns are rearranged according to hclust.")}
heatmap.2(as.matrix(feat2), trace="none",col=mycol,colCol=ccol,cexRow=0.8, keysize=1,symkey=FALSE,symbreaks=FALSE,scale="none", srtCol=90) #
```
```{r cc_km1-heatmapSorted,eval=TRUE,w=6,h=6,fig.cap=fig$cap("heat1", "Heatmap of the cluster means vs channels. Rows and columns are rearranged according to synapse type.")}
heatmap.2(as.matrix(feat2[,ford]),dendrogram='row',Colv=NA,trace="none", col=mycol,colCol=ccol[ford],cexRow=0.8, keysize=1,symkey=FALSE,symbreaks=FALSE,scale="none", srtCol=90)
```
### Heat maps: Transformed Data
```{r cc_aggfs, eval=TRUE}
## Formatting data for heatmap
feat2fs <- aggregate(fs,by=list(lab=kvecfs[[2]]$cluster),FUN=mean)
feat2fs <- as.matrix(feat2fs[,-1])
rownames(feat2fs) <- clusterFraction(kvecfs[[2]])
ford <- order(fchannel)
```
```{r cc_km1-heatmapFS,eval=TRUE,w=6,h=6,fig.cap=fig$cap("heat1", "Heatmap of the cluster means vs channels. Rows and columns are rearranged according to hclust.")}
heatmap.2(as.matrix(feat2fs), trace="none",col=mycol,colCol=ccol,cexRow=0.8, keysize=1,symkey=FALSE,symbreaks=FALSE,scale="none", srtCol=90) #
```
```{r cc_km1-heatmapSortedFS,eval=TRUE,w=6,h=6,fig.cap=fig$cap("heat1", "Heatmap of the cluster means vs channels. Rows and columns are rearranged according to synapse type.")}
heatmap.2(as.matrix(feat2fs[,ford]),dendrogram='row',Colv=NA,trace="none", col=mycol,colCol=ccol[ford],cexRow=0.8, keysize=1,symkey=FALSE,symbreaks=FALSE,scale="none", srtCol=90)
```
## K-Means Level 2
```{r cc_level21, eval=TRUE, echo=TRUE}
synOnly<-channel.type!= "none" & channel.type!="other"
fs11 <- fs[kvecfs[[2]]$cluster==1,][,synOnly]
fs12 <- fs[kvecfs[[2]]$cluster==2,][,synOnly]
dim(fs11)
dim(fs12)
## Run kmeans on the untransformed data
kvecL21synOnly <- foreach(i = 1:K1) %dopar% {
set.seed(2^13 - 1)
kmeans(fs11,centers=i)
}
```
## PAMK: Level 1
The `pamk` function performs something similar to K-means.
```{r cc_PAMkmeans1, eval=TRUE}
pamout <- pamk(fs, krange=1:K1,usepam=FALSE,critout=FALSE)
labs <- pamout$pamobject$clustering
```
```{r cc_depth1.4,eval=TRUE,echo=TRUE,fig.cap=fig$cap("pamk1", "Average silhouette width plot of the level 1 clustering.")}
plot(pamout$crit,type="b")
```
```{r cc_f5, eval=TRUE}
feat5 <- aggregate(fs,by=list(lab=pamout$pamo$clustering),FUN=mean)
feat5 <- as.matrix(feat5[,-1])
rownames(feat5) <- clusterFraction(labs)
### Reordering based on synapse type.
ford <- order(fchannel)
```
```{r cc_heat,eval=TRUE,w=6,h=6,echo=TRUE,fig.cap=fig$cap("depth1-heat", "Default Sorting")}
heatmap.2(as.matrix(feat5), trace="none",col=mycol,colCol=ccol,cexRow=0.8, keysize=1,symkey=FALSE,symbreaks=FALSE,scale="none", srtCol=90) #
```
```{r cc_sorted,eval=TRUE,w=6,h=6,echo=TRUE,fig.cap=fig$cap("depth1-heat-sorted", "Sorted by Synapse type")}
heatmap.2(as.matrix(feat5[,ford]),dendrogram='row',Colv=NA,trace="none", col=mycol,colCol=ccol[ford],cexRow=0.8, keysize=1,symkey=FALSE,symbreaks=FALSE,scale="none", srtCol=90)
```
## PAMK: Level 2
### PAMK: Level 2.1
We begin by splitting the data into the two clusters as
calculated above.
```{r cc_fs-lv2, eval=TRUE}
fs1 <- fs[pamout$pamobject$clustering==1,]
fs2 <- fs[pamout$pamobject$clustering==2,]
```
```{r cc_PAMkmeans2, eval=TRUE}
pamout21 <- pamk(fs1, krange=1:K1,usepam=FALSE,critout=FALSE)
labs21 <- pamout21$pamobject$clustering
```
```{r cc_sil-level2,eval=TRUE,echo=TRUE,fig.cap=fig$cap("pamk2", "Average silhouette width plot of the level 2 clustering.")}
plot(pamout21$crit,type="b")
```
```{r cc_f21, eval=TRUE}
agg21 <- aggregate(fs1,by=list(lab=pamout21$pamo$clustering),FUN=mean)
agg21 <- as.matrix(agg21[,-1])
cs <- paste(paste0("C", 1:pamout21$nc),
foreach(i =1:pamout21$nc,.combine=c)%do%{(sprintf("%.2g",sum(labs21==i)/length(labs21)))})
rownames(agg21) <- cs
```
```{r cc_heat21,eval=TRUE,w=6,h=6,echo=TRUE,fig.cap=fig$cap("depth21-heat", "Default Sorting")}
heatmap.2(as.matrix(agg21), trace="none",col=mycol,colCol=ccol,cexRow=0.8, keysize=1,symkey=FALSE,symbreaks=FALSE,scale="none", srtCol=90) #
```
```{r cc_sorted21,eval=TRUE,w=6,h=6,echo=TRUE,fig.cap=fig$cap("depth21-heat-sorted", "Sorted by Synapse type")}
heatmap.2(as.matrix(agg21[,ford]),dendrogram='row',Colv=NA,trace="none", col=mycol,colCol=ccol[ford],cexRow=0.8, keysize=1,symkey=FALSE,symbreaks=FALSE,scale="none", srtCol=90)
```
### PAMK: Level 2.2
```{r cc_PAMkmeans22, eval=TRUE}
pamout22 <- pamk(fs2, krange=1:K1,usepam=FALSE,critout=FALSE)
labs22 <- pamout22$pamobject$clustering
```
```{r cc_sil-level22,eval=TRUE,echo=TRUE,fig.cap=fig$cap("pamk2", "Average silhouette width plot of the level 2 clustering.")}
plot(pamout22$crit,type="b")
```
```{r cc_f22, eval=TRUE}
agg22 <- aggregate(fs2,by=list(lab=pamout22$pamo$clustering),FUN=mean)
agg22 <- as.matrix(agg22[,-1])
cs <- paste(paste0("C", 1:pamout22$nc),
foreach(i =1:pamout22$nc,.combine=c)%do%{(sprintf("%.2g",sum(labs22==i)/length(labs22)))})
rownames(agg22) <- cs
```
```{r cc_heat22,eval=TRUE,w=6,h=6,echo=TRUE,fig.cap=fig$cap("depth21-heat", "Default Sorting")}
heatmap.2(as.matrix(agg22), trace="none",col=rev(mycol),colCol=ccol,cexRow=0.8, keysize=1,symkey=FALSE,symbreaks=FALSE,scale="none", srtCol=90) #
```
```{r cc_sorted22,eval=TRUE,w=6,h=6,echo=TRUE,fig.cap=fig$cap("depth21-heat-sorted", "Sorted by Synapse type")}
heatmap.2(as.matrix(agg22[,ford]),dendrogram='row',Colv=NA,trace="none", col=rev(mycol),colCol=ccol[ford],cexRow=0.8, keysize=1,symkey=FALSE,symbreaks=FALSE,scale="none", srtCol=90)
```
```{r cc_corFS, eval=TRUE, fig.cap=fig$cap("corFS", "Correlation on log ยบ scaled data, reordered by synapse type.")}
tmp <- as.numeric(table(fchannel))
cmat <- cor(fs)
corrplot(cmat[ford,ford],method="color",tl.col=ccol[ford])
corrRect(tmp,col=Syncol,lwd=4)
```
# Exploring pair-wise relationships
```{r lattice1, eval=FALSE,w=25,h=25, fig.cap=fig$cap("lat", "Lattice Regression plots.")}
set.seed(2^13)
s1 <- sample(1e6,1e4)
dm <- as.matrix(f01[s1,])
c1 <- combn(24,2)
d2 <- foreach(i = 1:dim(c1)[2],.combine=rbind)%do%{
colset1 <- as.matrix(dm[,c1[,i]])
colnames(colset1) <- NULL
l <- paste(channel[c1[,i]][2],channel[c1[,i][1]],sep='~')
data.frame(colset1,l,check.names=FALSE)
}
colnames(d2) <- c("x", "y", "g")
p1 <- xyplot(y ~ x | g, data=d2,
panel=function(x,y,...){
panel.xyplot(x,y,...)
fit <- lm(y ~ x)
panel.lines(x,fitted(fit), col.line='red', lwd=2)
})
print(p1)
p2 <- xyplot(y ~ x | g, data=d2, type=c('p', 'smooth'), col.line='red',lwd=3,pch='.',scales = list(y = list(relation = "free"),x = list(relation = "free")))
pdf('~/Desktop/tmp1.pdf', height=40,width=60)
print(p1)
dev.off()
tmp <- as.matrix(f01[s1,])
plot(Synap_2F0 ~ Synap_1F0, data=f01)
chull(tmp)
polygon(tmp[chull(tmp),])
bagplot(tmp[,c(1,2)])
```