# Speedup factor of LDR # source() or copy/paste this file for a quick demo # Instead of kmeans(X), call kmeans(t(ldr(t(X))) ldr=function(x) { #rows are features, columns are samples n = ncol(x) if (nrow(x) <= n) return(x) S = qr(x - x[,n]) R = qr.R(S)[,S$pivot] return(R[-n,]) } # Compare run times n = 300 p = 20e3 nstart = 25 # Create a data matrix with 3 clusters X = matrix(rnorm(n*p), n, p) X[1:(n/3), ] = X[1:(n/3), ] + 2 X[(n/3):(2*n/3), ] = X[(n/3):(2*n/3), ] - 2 # The original X stx = system.time(kmx <- kmeans(X, 3, nstart=nstart)) # The LDR-transformed version of X will be called Y Y = t(ldr(t(X))) # just before calling the clustering function sty1 = system.time(kmy <- kmeans(Y, 3, nstart=nstart)) sty2 = system.time(kmy <- kmeans(t(ldr(t(X))), 3, nstart=nstart)) # This adds the LDR time print(kmx$withinss) # compare print(kmy$withinss) # compare print(table(kmx$cluster, kmy$cluster)) # compare #print(stx[1]) # original X #print(sty1[1]) # with ldr print(c("Speed up factor:", stx[1] / sty1[1]), quote=0) #print(sty2[1]) # with ldr print(c("Speed up factor counting the LDR time:", stx[1] / sty2[1]), quote=0)