From fbf8d82cdd3720c4bbf2a94035b6779e56d73448 Mon Sep 17 00:00:00 2001 From: Samuel Fadel Date: Thu, 18 Aug 2016 23:15:39 -0300 Subject: Updated with all the code used to run experiments. --- measures.R | 224 +++++++++++++++++++++++++++++-------------------------------- 1 file changed, 108 insertions(+), 116 deletions(-) (limited to 'measures.R') diff --git a/measures.R b/measures.R index 99aa4df..057a114 100644 --- a/measures.R +++ b/measures.R @@ -1,30 +1,5 @@ -stress <- function(Dx, Dy) { - if (any(Dx != t(Dx)) || any(Dy != t(Dy))) { - stop("Dx and Dy must be symmetric") - } - - Dx <- as.matrix(Dx) - Dy <- as.matrix(Dy) - if (nrow(Dx) != nrow(Dy)) { - stop("Dx and Dy must have the same number of elements") - } - - n <- nrow(Dx) - s <- vector("numeric", n) - for (i in 1:n) { - s[i] <- 0 - for (j in 1:n) { - if (i == j) { - next - } - - s[i] = s[i] + (Dx[i, j] - Dy[i, j])^2 / Dx[i, j] - } - s[i] = s[i] / sum(D[i, ]) - } - - s -} +# Measures used as manipulation targets. +# NOTE: This file is only a library, see run.R for more details. NP <- function(Dx, Dy, k = 9) { if (any(Dx != t(Dx)) || any(Dy != t(Dy))) { @@ -53,112 +28,129 @@ NP <- function(Dx, Dy, k = 9) { preservation } +#silhouette <- function(Dy, labels) { +# if (any(t(Dy) != Dy)) { +# stop("Dy must be symmetric") +# } +# +# Dy <- as.matrix(Dy) +# n <- nrow(Dy) +# cohesion <- vector("numeric", n) +# separation <- vector("numeric", n) +# +# for (i in 1:n) { +# label <- labels[i] +# separation[i] <- min(Dy[i, labels != label]) +# cohesion[i] <- mean(Dy[i, labels[-i] == label]) +# } +# +# (separation - cohesion) / max(separation, cohesion) +#} silhouette <- function(Dy, labels) { - if (any(t(Dy) != Dy)) { - stop("Dy must be symmetric") - } - - Dy <- as.matrix(Dy) n <- nrow(Dy) - cohesion <- vector("numeric", n) - separation <- vector("numeric", n) - - for (i in 1:n) { - label <- labels[i] - separation[i] <- min(Dy[i, labels != label]) - cohesion[i] <- mean(Dy[i, labels[-i] == label]) + if (n != length(labels)) { + stop("Number of labels doesn't match number of points") } - silh <- (separation - cohesion) / max(separation, cohesion) -} - -d2p <- function(D, perplexity = 9, tol = 1e-5, max.tries = 50) { - if (any(D != t(D))) { - stop("D must be symmetric") + A_labels <- list() + B_labels <- list() + unique.labels <- unique(labels) + for (l in unique.labels) { + A_labels[[l]] <- labels == l + B_labels[[l]] <- labels != l } - D <- as.matrix(D) - P <- matrix(data=0, nrow=nrow(D), ncol=ncol(D)) - n <- nrow(D) - beta <- rep(1, n) - logU <- log(perplexity) - + # This factor excludes self comparisons when computing cohesion + m.factor <- n / (n-1) + s <- vector("numeric", n) for (i in 1:n) { - #denom <- sum(exp(-D[i, ] / sigmas)) - #P[i, ] <- exp(-D[i, ] / sigmas) / denom - - betaMin <- -Inf - betaMax <- Inf - Di <- D[i, -i] - - tries <- 0 - repeat { - Pi <- exp(-Di * beta[i]) - sumPi <- sum(Pi) - H <- log(sumPi) + beta[i] * sum(Di * Pi) / sumPi - Pi <- Pi / sumPi - Hdiff <- H - logU - - if (abs(Hdiff) < tol || tries > max.tries) { - break - } - - if (Hdiff > 0) { - betaMin <- beta[i] - beta[i] <- if (is.finite(betaMax)) { - (beta[i] + betaMax) / 2 - } else { - beta[i] * 2 - } - } else { - betaMax <- beta[i] - beta[i] <- if (is.finite(betaMin)) { - (beta[i] + betaMin) / 2 - } else { - beta[i] / 2 - } - } + label_i <- labels[i] - tries <- tries + 1 + a <- m.factor * mean(Dy[i, A_labels[[label_i]]]) # cohesion + b <- Inf + for (l in unique(labels)) { + b <- min(mean(Dy[i, B_labels[[l]]]), b) # separation } - P[i, -i] <- Pi + s[i] <- (b - a) / max(b, a) } - list(P = P, beta = beta) # sigmas = sqrt(1 / beta) + s } -d2p.beta <- function(D, beta) { - if (any(D != t(D))) { - stop("D must be symmetric") - } +#stress <- function(Dx, Dy) { +# if (any(Dx != t(Dx)) || any(Dy != t(Dy))) { +# stop("Dx and Dy must be symmetric") +# } +# +# Dx <- as.matrix(Dx) +# Dy <- as.matrix(Dy) +# if (nrow(Dx) != nrow(Dy)) { +# stop("Dx and Dy must have the same number of elements") +# } +# +# n <- nrow(Dx) +# s <- vector("numeric", n) +# for (i in 1:n) { +# s[i] <- 0 +# for (j in 1:n) { +# if (i == j) { +# next +# } +# +# s[i] = s[i] + (Dx[i, j] - Dy[i, j])^2 / Dx[i, j] +# } +# s[i] = s[i] / sum(D[i, ]) +# } +# +# s +#} +stress <- function(Dx, Dy) { + n <- nrow(Dx) - D <- as.matrix(D) - n <- nrow(D) - P <- matrix(data=0, nrow=nrow(D), ncol=ncol(D)) - for (i in 1:n) { - P[i, -i] <- exp(-D[i, -i] * beta[i]) - P[i, -i] <- P[i, -i] / sum(P[i, -i]) + C <- 0 + D <- 0 + for (i in 1:(n-1)) { + for (j in (i + 1):n) { + C <- C + Dx[i, j] + D <- D + (Dx[i, j] - Dy[i, j])^2 / Dx[i, j] + if (is.nan(D)) { + loginfo("%d, %d", i, j) + loginfo("%f", Dx[i, j]) + stop("NaN") + } + } } - P + D / C } -klDivergence <- function(P, Q, eps = 1e-12) { - if (nrow(P) != ncol(P) || nrow(Q) != ncol(Q)) { - stop("P and Q must be square") - } - if (nrow(P) != nrow(Q)) { - stop("P and Q must have the same number of elements") - } - - P[P < eps] <- eps - Q[Q < eps] <- eps - n <- nrow(P) - d <- vector("numeric", n) - for (i in 1:n) { - d[i] <- sum(P[i, -i] * log(P[i, -i] / Q[i, -i])) - } - - d +# NOTE: This function requires the 'klmeasure' binary from: +# http://research.cs.aalto.fi/pml/software/dredviz/ +smoothed.pr <- function(Dx, Dy, k) { + # Create SOM_PAK file for Dx + Dx.fname <- tempfile() + Dx.f <- file(Dx.fname, "w") + cat(sprintf("%d\n", ncol(Dx)), file=Dx.f) + write.table(Dx, Dx.f, col.names=F, row.names=F) + close(Dx.f) + + # Create SOM_PAK file for Dy + Dy.fname <- tempfile() + Dy.f <- file(Dy.fname, "w") + cat(sprintf("%d\n", ncol(Dy)), file=Dy.f) + write.table(Dy, Dy.f, col.names=F, row.names=F) + close(Dy.f) + + output <- system2("./klmeasure", + stdout=T, + args=c("--datadist", Dx.fname, + "--projdist", Dy.fname, + "--neighbors", sprintf("%d", k))) + output <- strsplit(output[2], " | ", fixed=T) + output <- unlist(output) + + file.remove(Dx.fname, Dy.fname) + + list(s.precision=as.double(output[1]), s.recall=as.double(output[2])) } -- cgit v1.2.3