# Functions to calculate dissimilarity between sets of communities using the metrics corrected for undersampling # Based in Chao et al. (2005) # Currently only Jaccard implemented (sorensen will probably be implemented in future versions) # Uses only matrix operations (no for loops), it is slightly faster than the function available in the 'fossil' package # Easy to use with multiple sampling sites (output in the same format as 'vegdist' in the 'vegan' package) # Creator: CSDambros 06-Apr-2016 chaodist<-function(comm,method="jaccard"){ if(method!="jaccard"){warning("Only jaccard currently implemented, switching to jaccard")} mn<-rowSums(comm) pi<-comm/mn mndiv<-(mn-1)/mn commPA<-comm>0 f1<-tcrossprod(comm==1,commPA) #faster than f1<-(comm==1)%*%t(commPA) f2<-tcrossprod(comm==2,commPA) #faster than f2<-(comm==2)%*%t(commPA) f2[f2==0]<-1 # 1 if no doubletons P1<-tcrossprod(pi,commPA) #faster than P1<-pi%*%t(commPA) P2<-t(t(f1/(2*f2))*mndiv) P3<-tcrossprod(pi,1-commPA) #faster than P3<-pi%*%t(1-commPA) U<-P1+P2*P3 U[U>1]<-1 UV<-t(U)*U Jmat<-UV/((U+t(U))-(UV)) Jmat[is.nan(Jmat)]<-0 return(1-as.dist(Jmat)) } #USE # #chaodist(comm) #comm is a matrix with objects (usually sampling sites) in rows and attributes as columns (usually species)