1 files changed, 108 insertions, 116 deletions
diff --git a/measures.R b/measures.R
index 99aa4df..057a114 100644
--- a/measures.R
+++ b/measures.R
@@ -1,30 +1,5 @@
-stress <- function(Dx, Dy) {
-  if (any(Dx != t(Dx)) || any(Dy != t(Dy))) {
-    stop("Dx and Dy must be symmetric")
-  }
-
-  Dx <- as.matrix(Dx)
-  Dy <- as.matrix(Dy)
-  if (nrow(Dx) != nrow(Dy)) {
-    stop("Dx and Dy must have the same number of elements")
-  }
-
-  n <- nrow(Dx)
-  s <- vector("numeric", n)
-  for (i in 1:n) {
-    s[i] <- 0
-    for (j in 1:n) {
-      if (i == j) {
-        next
-      }
-
-      s[i] = s[i] + (Dx[i, j] - Dy[i, j])^2 / Dx[i, j]
-    }
-    s[i] = s[i] / sum(D[i, ])
-  }
-
-  s
-}
+# Measures used as manipulation targets.
+# NOTE: This file is only a library, see run.R for more details.
 
 NP <- function(Dx, Dy, k = 9) {
   if (any(Dx != t(Dx)) || any(Dy != t(Dy))) {
@@ -53,112 +28,129 @@ NP <- function(Dx, Dy, k = 9) {
   preservation
 }
 
+#silhouette <- function(Dy, labels) {
+#  if (any(t(Dy) != Dy)) {
+#    stop("Dy must be symmetric")
+#  }
+#
+#  Dy <- as.matrix(Dy)
+#  n <- nrow(Dy)
+#  cohesion <- vector("numeric", n)
+#  separation <- vector("numeric", n)
+#
+#  for (i in 1:n) {
+#    label <- labels[i]
+#    separation[i] <- min(Dy[i, labels != label])
+#    cohesion[i] <- mean(Dy[i, labels[-i] == label])
+#  }
+#
+#  (separation - cohesion) / max(separation, cohesion)
+#}
 silhouette <- function(Dy, labels) {
-  if (any(t(Dy) != Dy)) {
-    stop("Dy must be symmetric")
-  }
-
-  Dy <- as.matrix(Dy)
   n <- nrow(Dy)
-  cohesion <- vector("numeric", n)
-  separation <- vector("numeric", n)
-
-  for (i in 1:n) {
-    label <- labels[i]
-    separation[i] <- min(Dy[i, labels != label])
-    cohesion[i] <- mean(Dy[i, labels[-i] == label])
+  if (n != length(labels)) {
+    stop("Number of labels doesn't match number of points")
   }
 
-  silh <- (separation - cohesion) / max(separation, cohesion)
-}
-
-d2p <- function(D, perplexity = 9, tol = 1e-5, max.tries = 50) {
-  if (any(D != t(D))) {
-    stop("D must be symmetric")
+  A_labels <- list()
+  B_labels <- list()
+  unique.labels <- unique(labels)
+  for (l in unique.labels) {
+    A_labels[[l]] <- labels == l
+    B_labels[[l]] <- labels != l
   }
 
-  D <- as.matrix(D)
-  P <- matrix(data=0, nrow=nrow(D), ncol=ncol(D))
-  n <- nrow(D)
-  beta <- rep(1, n)
-  logU <- log(perplexity)
-
+  # This factor excludes self comparisons when computing cohesion
+  m.factor <- n / (n-1)
+  s <- vector("numeric", n)
   for (i in 1:n) {
-    #denom <- sum(exp(-D[i, ] / sigmas))
-    #P[i, ] <- exp(-D[i, ] / sigmas) / denom
-
-    betaMin <- -Inf
-    betaMax <-  Inf
-    Di <- D[i, -i]
-
-    tries <- 0
-    repeat {
-      Pi <- exp(-Di * beta[i])
-      sumPi <- sum(Pi)
-      H <- log(sumPi) + beta[i] * sum(Di * Pi) / sumPi
-      Pi <- Pi / sumPi
-      Hdiff <- H - logU
-
-      if (abs(Hdiff) < tol || tries > max.tries) {
-        break
-      }
-
-      if (Hdiff > 0) {
-        betaMin <- beta[i]
-        beta[i] <- if (is.finite(betaMax)) {
-                     (beta[i] + betaMax) / 2
-                   } else {
-                     beta[i] * 2
-                   }
-      } else {
-        betaMax <- beta[i]
-        beta[i] <- if (is.finite(betaMin)) {
-                     (beta[i] + betaMin) / 2
-                   } else {
-                     beta[i] / 2
-                   }
-      }
+    label_i <- labels[i]
 
-      tries <- tries + 1
+    a <- m.factor * mean(Dy[i, A_labels[[label_i]]]) # cohesion
+    b <- Inf
+    for (l in unique(labels)) {
+      b <- min(mean(Dy[i, B_labels[[l]]]), b) # separation
     }
 
-    P[i, -i] <- Pi
+    s[i] <- (b - a) / max(b, a)
   }
 
-  list(P = P, beta = beta) # sigmas = sqrt(1 / beta)
+  s
 }
 
-d2p.beta <- function(D, beta) {
-  if (any(D != t(D))) {
-    stop("D must be symmetric")
-  }
+#stress <- function(Dx, Dy) {
+#  if (any(Dx != t(Dx)) || any(Dy != t(Dy))) {
+#    stop("Dx and Dy must be symmetric")
+#  }
+#
+#  Dx <- as.matrix(Dx)
+#  Dy <- as.matrix(Dy)
+#  if (nrow(Dx) != nrow(Dy)) {
+#    stop("Dx and Dy must have the same number of elements")
+#  }
+#
+#  n <- nrow(Dx)
+#  s <- vector("numeric", n)
+#  for (i in 1:n) {
+#    s[i] <- 0
+#    for (j in 1:n) {
+#      if (i == j) {
+#        next
+#      }
+#
+#      s[i] = s[i] + (Dx[i, j] - Dy[i, j])^2 / Dx[i, j]
+#    }
+#    s[i] = s[i] / sum(D[i, ])
+#  }
+#
+#  s
+#}
+stress <- function(Dx, Dy) {
+  n <- nrow(Dx)
 
-  D <- as.matrix(D)
-  n <- nrow(D)
-  P <- matrix(data=0, nrow=nrow(D), ncol=ncol(D))
-  for (i in 1:n) {
-    P[i, -i] <- exp(-D[i, -i] * beta[i])
-    P[i, -i] <- P[i, -i] / sum(P[i, -i])
+  C <- 0
+  D <- 0
+  for (i in 1:(n-1)) {
+    for (j in (i + 1):n) {
+      C <- C + Dx[i, j]
+      D <- D + (Dx[i, j] - Dy[i, j])^2 / Dx[i, j]
+      if (is.nan(D)) {
+        loginfo("%d, %d", i, j)
+        loginfo("%f", Dx[i, j])
+        stop("NaN")
+      }
+    }
   }
 
-  P
+  D / C
 }
 
-klDivergence <- function(P, Q, eps = 1e-12) {
-  if (nrow(P) != ncol(P) || nrow(Q) != ncol(Q)) {
-    stop("P and Q must be square")
-  }
-  if (nrow(P) != nrow(Q)) {
-    stop("P and Q must have the same number of elements")
-  }
-
-  P[P < eps] <- eps
-  Q[Q < eps] <- eps
-  n <- nrow(P)
-  d <- vector("numeric", n)
-  for (i in 1:n) {
-    d[i] <- sum(P[i, -i] * log(P[i, -i] / Q[i, -i]))
-  }
-
-  d
+# NOTE: This function requires the 'klmeasure' binary from:
+# http://research.cs.aalto.fi/pml/software/dredviz/
+smoothed.pr <- function(Dx, Dy, k) {
+  # Create SOM_PAK file for Dx
+  Dx.fname <- tempfile()
+  Dx.f <- file(Dx.fname, "w")
+  cat(sprintf("%d\n", ncol(Dx)), file=Dx.f)
+  write.table(Dx, Dx.f, col.names=F, row.names=F)
+  close(Dx.f)
+
+  # Create SOM_PAK file for Dy
+  Dy.fname <- tempfile()
+  Dy.f <- file(Dy.fname, "w")
+  cat(sprintf("%d\n", ncol(Dy)), file=Dy.f)
+  write.table(Dy, Dy.f, col.names=F, row.names=F)
+  close(Dy.f)
+
+  output <- system2("./klmeasure",
+                    stdout=T,
+                    args=c("--datadist", Dx.fname,
+                           "--projdist", Dy.fname,
+                           "--neighbors", sprintf("%d", k)))
+  output <- strsplit(output[2], " | ", fixed=T)
+  output <- unlist(output)
+
+  file.remove(Dx.fname, Dy.fname)
+
+  list(s.precision=as.double(output[1]), s.recall=as.double(output[2]))
 }