## online resource: http://arxiv.org/pdf/1105.0121.pdf ## load 'agnes' function library('cluster') ## Example with 'airquality' data data("airquality") aq <- subset(airquality, !is.na(Ozone) & !is.na(Solar.R), select=c("Ozone", "Solar.R")) plot(aq$Ozone, aq$Solar.R, pch=20, xlab="Ozone", ylab="Solar.R") plot_agg <- function(dat, k=1, ...) { ctr <- cutree(agnes(dat, ...), k) plot(dat, pch=20) for(cls in unique(ctr)) { aqs <- subset(aq, ctr==cls) chu <- chull(aqs) lines(aqs[c(chu, chu[1]),]) } } plot_agg(aq, k=50) plot_agg(aq, k=30) plot_agg(aq, k=10) plot_agg(aq, k=5) plot_agg(aq, k=5, metric="euclidean", method="average") plot_agg(aq, k=5, metric="euclidean", method="complete") plot_agg(aq, k=5, metric="euclidean", method="single") plot_agg(aq, k=5, metric="manhattan", method="average") plot_agg(aq, k=5, metric="manhattan", method="complete") plot_agg(aq, k=5, metric="manhattan", method="single") ## compute within cluster scatter (W) comp_scat <- function(k=1, dat, ...) { agg <- agnes(dat, keep.diss=TRUE, ...) dis <- as.matrix(agg$diss) ctr <- cutree(agg, k) sum(sapply(unique(ctr), function(cls) { idx <- which(ctr==cls) sum(dis[idx,idx])/2 })) } plot(1:nrow(aq), sapply(1:nrow(aq), comp_scat, dat=aq), xlab="Number of Clusters", ylab="Within-cluster Scatter (W)", type="b", pch=20)