library(factoextra) library(ggplot2) library(ggdendro) library(cluster) ## Read in the drought dataset. It contains unique identifiers, names, lat-lon values, and the drought anomalies for each month (Jul 98 - Jun 01) drought.data.raw <- read.csv("path_to_dataset") ## Transpose drought data drought.data <- t(drought.data.raw) ## Ensure data is in matrix form drought.data.matrix <- as.matrix(drought.data) ## Save the city names city.names<-trim_matrix[, 1] ## Scale the values of the drought dataset to standardize it z <- drought.data.matrix[,-c(1,1)] means <- apply(z,2,mean) sds <- apply(z,2,sd) drought.matrix.scaled <- scale(z,center=means,scale=sds) ## Compute the dissimilarity matrix dist.matrix <- dist(drought.matrix.scaled, method="euclidean") ## Create agglomerative clustering data with Ward D2 method hclust.data <- hclust(dist.matrix, method="ward.D2") ## Assign the weather station names to the dataset hclust.data$labels <- city.names ## Create the dendrogram using the ggdendro library dendro.data <- dendro_data(hclust.data) clusters <- cutree(hclust.data, k=4) # find 4 clusters ## Create a data frame to preserve the names and colors of the grouped clusters cluster.df <- data.frame(label=names(clusters), cluster=factor(clusters)) ## dendr[["labels"]] has the labels, merge with clust.df based on label column dendro.data[["labels"]] <- merge(dendro.data[["labels"]],cluster.df, by="label") ## plot the dendrogram; note use of color=cluster in geom_text ggplot() + geom_segment(data=segment(dendro.data), aes(x=x, y=y, xend=xend, yend=yend)) + geom_text(data=label(dendro.data), aes(x, y, label=label, hjust=0, color=cluster), size=3) + coord_flip() + scale_y_reverse(expand=c(0.2, 0)) + geom_hline(yintercept=18, linetype=2) + labs(title = "Dendrogram of Cumulative Drought Values in Kentucky, July 1998 - June 2001") + theme(axis.line.y=element_blank(), axis.ticks.y=element_blank(), axis.text.y=element_blank(), axis.title.y=element_blank(), panel.background=element_rect(fill="white"), panel.grid=element_blank())