aboutsummaryrefslogtreecommitdiff
path: root/measures.R
diff options
context:
space:
mode:
Diffstat (limited to 'measures.R')
-rw-r--r--measures.R224
1 files changed, 108 insertions, 116 deletions
diff --git a/measures.R b/measures.R
index 99aa4df..057a114 100644
--- a/measures.R
+++ b/measures.R
@@ -1,30 +1,5 @@
-stress <- function(Dx, Dy) {
- if (any(Dx != t(Dx)) || any(Dy != t(Dy))) {
- stop("Dx and Dy must be symmetric")
- }
-
- Dx <- as.matrix(Dx)
- Dy <- as.matrix(Dy)
- if (nrow(Dx) != nrow(Dy)) {
- stop("Dx and Dy must have the same number of elements")
- }
-
- n <- nrow(Dx)
- s <- vector("numeric", n)
- for (i in 1:n) {
- s[i] <- 0
- for (j in 1:n) {
- if (i == j) {
- next
- }
-
- s[i] = s[i] + (Dx[i, j] - Dy[i, j])^2 / Dx[i, j]
- }
- s[i] = s[i] / sum(D[i, ])
- }
-
- s
-}
+# Measures used as manipulation targets.
+# NOTE: This file is only a library, see run.R for more details.
NP <- function(Dx, Dy, k = 9) {
if (any(Dx != t(Dx)) || any(Dy != t(Dy))) {
@@ -53,112 +28,129 @@ NP <- function(Dx, Dy, k = 9) {
preservation
}
+#silhouette <- function(Dy, labels) {
+# if (any(t(Dy) != Dy)) {
+# stop("Dy must be symmetric")
+# }
+#
+# Dy <- as.matrix(Dy)
+# n <- nrow(Dy)
+# cohesion <- vector("numeric", n)
+# separation <- vector("numeric", n)
+#
+# for (i in 1:n) {
+# label <- labels[i]
+# separation[i] <- min(Dy[i, labels != label])
+# cohesion[i] <- mean(Dy[i, labels[-i] == label])
+# }
+#
+# (separation - cohesion) / max(separation, cohesion)
+#}
silhouette <- function(Dy, labels) {
- if (any(t(Dy) != Dy)) {
- stop("Dy must be symmetric")
- }
-
- Dy <- as.matrix(Dy)
n <- nrow(Dy)
- cohesion <- vector("numeric", n)
- separation <- vector("numeric", n)
-
- for (i in 1:n) {
- label <- labels[i]
- separation[i] <- min(Dy[i, labels != label])
- cohesion[i] <- mean(Dy[i, labels[-i] == label])
+ if (n != length(labels)) {
+ stop("Number of labels doesn't match number of points")
}
- silh <- (separation - cohesion) / max(separation, cohesion)
-}
-
-d2p <- function(D, perplexity = 9, tol = 1e-5, max.tries = 50) {
- if (any(D != t(D))) {
- stop("D must be symmetric")
+ A_labels <- list()
+ B_labels <- list()
+ unique.labels <- unique(labels)
+ for (l in unique.labels) {
+ A_labels[[l]] <- labels == l
+ B_labels[[l]] <- labels != l
}
- D <- as.matrix(D)
- P <- matrix(data=0, nrow=nrow(D), ncol=ncol(D))
- n <- nrow(D)
- beta <- rep(1, n)
- logU <- log(perplexity)
-
+ # This factor excludes self comparisons when computing cohesion
+ m.factor <- n / (n-1)
+ s <- vector("numeric", n)
for (i in 1:n) {
- #denom <- sum(exp(-D[i, ] / sigmas))
- #P[i, ] <- exp(-D[i, ] / sigmas) / denom
-
- betaMin <- -Inf
- betaMax <- Inf
- Di <- D[i, -i]
-
- tries <- 0
- repeat {
- Pi <- exp(-Di * beta[i])
- sumPi <- sum(Pi)
- H <- log(sumPi) + beta[i] * sum(Di * Pi) / sumPi
- Pi <- Pi / sumPi
- Hdiff <- H - logU
-
- if (abs(Hdiff) < tol || tries > max.tries) {
- break
- }
-
- if (Hdiff > 0) {
- betaMin <- beta[i]
- beta[i] <- if (is.finite(betaMax)) {
- (beta[i] + betaMax) / 2
- } else {
- beta[i] * 2
- }
- } else {
- betaMax <- beta[i]
- beta[i] <- if (is.finite(betaMin)) {
- (beta[i] + betaMin) / 2
- } else {
- beta[i] / 2
- }
- }
+ label_i <- labels[i]
- tries <- tries + 1
+ a <- m.factor * mean(Dy[i, A_labels[[label_i]]]) # cohesion
+ b <- Inf
+ for (l in unique(labels)) {
+ b <- min(mean(Dy[i, B_labels[[l]]]), b) # separation
}
- P[i, -i] <- Pi
+ s[i] <- (b - a) / max(b, a)
}
- list(P = P, beta = beta) # sigmas = sqrt(1 / beta)
+ s
}
-d2p.beta <- function(D, beta) {
- if (any(D != t(D))) {
- stop("D must be symmetric")
- }
+#stress <- function(Dx, Dy) {
+# if (any(Dx != t(Dx)) || any(Dy != t(Dy))) {
+# stop("Dx and Dy must be symmetric")
+# }
+#
+# Dx <- as.matrix(Dx)
+# Dy <- as.matrix(Dy)
+# if (nrow(Dx) != nrow(Dy)) {
+# stop("Dx and Dy must have the same number of elements")
+# }
+#
+# n <- nrow(Dx)
+# s <- vector("numeric", n)
+# for (i in 1:n) {
+# s[i] <- 0
+# for (j in 1:n) {
+# if (i == j) {
+# next
+# }
+#
+# s[i] = s[i] + (Dx[i, j] - Dy[i, j])^2 / Dx[i, j]
+# }
+# s[i] = s[i] / sum(D[i, ])
+# }
+#
+# s
+#}
+stress <- function(Dx, Dy) {
+ n <- nrow(Dx)
- D <- as.matrix(D)
- n <- nrow(D)
- P <- matrix(data=0, nrow=nrow(D), ncol=ncol(D))
- for (i in 1:n) {
- P[i, -i] <- exp(-D[i, -i] * beta[i])
- P[i, -i] <- P[i, -i] / sum(P[i, -i])
+ C <- 0
+ D <- 0
+ for (i in 1:(n-1)) {
+ for (j in (i + 1):n) {
+ C <- C + Dx[i, j]
+ D <- D + (Dx[i, j] - Dy[i, j])^2 / Dx[i, j]
+ if (is.nan(D)) {
+ loginfo("%d, %d", i, j)
+ loginfo("%f", Dx[i, j])
+ stop("NaN")
+ }
+ }
}
- P
+ D / C
}
-klDivergence <- function(P, Q, eps = 1e-12) {
- if (nrow(P) != ncol(P) || nrow(Q) != ncol(Q)) {
- stop("P and Q must be square")
- }
- if (nrow(P) != nrow(Q)) {
- stop("P and Q must have the same number of elements")
- }
-
- P[P < eps] <- eps
- Q[Q < eps] <- eps
- n <- nrow(P)
- d <- vector("numeric", n)
- for (i in 1:n) {
- d[i] <- sum(P[i, -i] * log(P[i, -i] / Q[i, -i]))
- }
-
- d
+# NOTE: This function requires the 'klmeasure' binary from:
+# http://research.cs.aalto.fi/pml/software/dredviz/
+smoothed.pr <- function(Dx, Dy, k) {
+ # Create SOM_PAK file for Dx
+ Dx.fname <- tempfile()
+ Dx.f <- file(Dx.fname, "w")
+ cat(sprintf("%d\n", ncol(Dx)), file=Dx.f)
+ write.table(Dx, Dx.f, col.names=F, row.names=F)
+ close(Dx.f)
+
+ # Create SOM_PAK file for Dy
+ Dy.fname <- tempfile()
+ Dy.f <- file(Dy.fname, "w")
+ cat(sprintf("%d\n", ncol(Dy)), file=Dy.f)
+ write.table(Dy, Dy.f, col.names=F, row.names=F)
+ close(Dy.f)
+
+ output <- system2("./klmeasure",
+ stdout=T,
+ args=c("--datadist", Dx.fname,
+ "--projdist", Dy.fname,
+ "--neighbors", sprintf("%d", k)))
+ output <- strsplit(output[2], " | ", fixed=T)
+ output <- unlist(output)
+
+ file.remove(Dx.fname, Dy.fname)
+
+ list(s.precision=as.double(output[1]), s.recall=as.double(output[2]))
}