From 9b51886d702b22018db08ba8a30ab47d36f64aac Mon Sep 17 00:00:00 2001 From: Samuel Fadel Date: Thu, 18 Aug 2016 23:14:43 -0300 Subject: Renamed main script file. --- tests.R | 250 ---------------------------------------------------------------- 1 file changed, 250 deletions(-) delete mode 100644 tests.R (limited to 'tests.R') diff --git a/tests.R b/tests.R deleted file mode 100644 index 4c073a0..0000000 --- a/tests.R +++ /dev/null @@ -1,250 +0,0 @@ -require(ggplot2) -require(gridExtra) -require(mp) - -source("measures.R") - -#automated.m <- function(D, labels) { -# D.m <- D -# for (label in unique(labels)) { -# same.label <- labels == label -# D.m[same.label, same.label] <- D[same.label, same.label] * 0.1 -# } -# -# D.m -#} -automated.m <- function(Xs, labels) { - n <- nrow(Xs) - p <- ncol(Xs) - Xs <- cbind(Xs, matrix(data=0, nrow=n, ncol=p)) - for (label in unique(labels)) { - for (j in 1:p) { - Xs[labels == label, j + p] <- mean(Xs[labels == label, j]) - } - } - - dist(Xs) -} - -color_scale.blue_orange <- function(name) { - scale_colour_gradient(name = name, high = "#376092", low = "#e46c0a", space = "Lab") -} - -color_scale.gradient2 <- function(name) { - scale_colour_gradient2(name = name, mid = "#dddddd", space = "Lab") -} - -test <- function(file, suffix, output.dir) { - message("Testing dataset: ", file) - dataset <- read.table(file) - - # Extract labels - labels <- dataset[, ncol(dataset)] - classes <- as.factor(labels) - X <- dataset[, -ncol(dataset)] - - n <- nrow(X) - - # Calculate distances (X) and normalize - message("\tCalculating dist(X)") - Dx <- dist(X) - Dx <- Dx / mean(Dx) - Dx <- as.matrix(Dx) - - # Sample dataset - sample.indices <- sample(n, 3*sqrt(n)) - classes.s <- as.factor(labels[sample.indices]) - - # Automatic sample positioning - message("\tCalculating Ys") - Dx.s <- Dx[sample.indices, sample.indices] - Ys <- forceScheme(Dx.s) - - # LAMP - message("\tCalculating Y") - Y <- lamp(X, sample.indices, Ys) - - # Calculate distances (Y) and normalize - message("\tCalculating dist(Y)") - Dy <- dist(Y) - Dy <- Dy / mean(Dy) - Dy <- as.matrix(Dy) - - message("\tCalculating P and Q") - prob <- d2p(Dx^2) - P <- prob$P - Q <- d2p.beta(Dy^2, prob$beta) - - # Calculate measures - message("\tCalculating measures") - np <- NP(Dx, Dy) - silh <- silhouette(Dy, classes) - precision <- klDivergence(Q, P) - recall <- klDivergence(P, Q) - - measures <- rbind(data.frame(mean=mean(np), median=median(np), sd=sd(np)), - data.frame(mean=mean(silh), median=median(silh), sd=sd(silh)), - data.frame(mean=mean(precision), median=median(precision), sd=sd(precision)), - data.frame(mean=mean(recall), median=median(recall), sd=sd(recall))) - write.table(measures, paste(output.dir, suffix, "-measures.csv", sep=""), row.names=F) - - if (!(all(is.finite(np)) && - all(is.finite(silh)) && - all(is.finite(precision)) && - all(is.finite(recall)))) { - stop("Non-finite measures found") - } - - # Plot results - message("\tPlotting results") - shape_scale <- scale_shape_manual(name = "Classe", values = 1:nlevels(classes)) - Ys <- cbind(as.data.frame(Ys), classes.s) - Y <- cbind(as.data.frame(Y), classes, np, silh, precision, recall) - p.s <- ggplot(Ys) + - theme_bw() + - labs(x = "", y = "") + - geom_point(aes(x = V1, y = V2, shape = classes.s, colour = classes.s)) + - shape_scale + scale_color_manual(name = "Classe", values = 1:nlevels(classes)) - ggsave(paste(output.dir, "subsample-", suffix, ".pdf", sep=""), p.s, width=5, height=5) - - p <- ggplot(Y) + - theme_bw() + - labs(x = "", y = "") + - geom_point(aes(x = V1, y = V2, shape = classes, colour = classes)) + - shape_scale + scale_color_manual(name = "Classe", values = 1:nlevels(classes)) - ggsave(paste(output.dir, suffix, ".pdf", sep=""), p, width=5, height=5) - - p.np <- ggplot(Y) + - theme_bw() + - labs(x = "", y = "") + - geom_point(aes(x = V1, y = V2, shape = classes, colour = np)) + - shape_scale + color_scale.blue_orange("NP") - ggsave(paste(output.dir, "np-", suffix, ".pdf", sep=""), p.np, width=5, height=5) - - p.silh <- ggplot(Y) + - theme_bw() + - labs(x = "", y = "") + - geom_point(aes(x = V1, y = V2, shape = classes, colour = silh)) + - shape_scale + color_scale.blue_orange("Silhueta") - ggsave(paste(output.dir, "silh-", suffix, ".pdf", sep=""), p.silh, width=5, height=5) - - p.precision <- ggplot(Y) + - theme_bw() + - labs(x = "", y = "") + - geom_point(aes(x = V1, y = V2, shape = classes, colour = precision)) + - shape_scale + color_scale.blue_orange("Precisão") - ggsave(paste(output.dir, "precision-", suffix, ".pdf", sep=""), p.precision, width=5, height=5) - - p.recall <- ggplot(Y) + - theme_bw() + - labs(x = "", y = "") + - geom_point(aes(x = V1, y = V2, shape = classes, colour = recall)) + - shape_scale + color_scale.blue_orange("Revocação") - ggsave(paste(output.dir, "recall-", suffix, ".pdf", sep=""), p.recall, width=5, height=5) - - - pdf(paste(output.dir, "all-", suffix, ".pdf", sep=""), width = 12, height = 16) - grid.arrange(p.s, p, p.np, p.silh, p.precision, p.recall, ncol = 2) - dev.off() - - # Perform manipulation - message("\tCalculating Ys.m") - Dx.m <- automated.m(X[sample.indices, ], labels[sample.indices]) - Ys.m <- forceScheme(Dx.m, Ys[, 1:2]) - - # LAMP - message("\tCalculating Y.m") - Y.m <- lamp(X, sample.indices, Ys.m) - - # Calculate distances (Y.m) and normalize - message("\tCalculating dist(Y.m)") - Dy <- dist(Y.m) - Dy <- Dy / mean(Dy) - Dy <- as.matrix(Dy) - - message("\tCalculating Q") - Q <- d2p.beta(Dy^2, prob$beta) - - # Calculate measures - message("\tCalculating measures") - np <- NP(Dx, Dy) - np - silh <- silhouette(Dy, classes) - silh - precision <- klDivergence(Q, P) - precision - recall <- klDivergence(P, Q) - recall - - measures <- rbind(data.frame(mean=mean(np), median=median(np), sd=sd(np)), - data.frame(mean=mean(silh), median=median(silh), sd=sd(silh)), - data.frame(mean=mean(precision), median=median(precision), sd=sd(precision)), - data.frame(mean=mean(recall), median=median(recall), sd=sd(recall))) - write.table(measures, paste(output.dir, suffix, "-manip-measures.csv", sep=""), row.names=F) - - if (!(all(is.finite(np)) && - all(is.finite(silh)) && - all(is.finite(precision)) && - all(is.finite(recall)))) { - stop("Non-finite measures found") - } - - # Plot results - message("\tPlotting results") - Ys.m <- cbind(as.data.frame(Ys.m), classes.s) - Y.m <- cbind(as.data.frame(Y.m), classes, np, silh, precision, recall) - pm.s <- ggplot(Ys.m) + - theme_bw() + - labs(x = "", y = "") + - geom_point(aes(x = V1, y = V2, shape = classes.s, colour = classes.s)) + - shape_scale + scale_color_manual(name = "Classe", values = 1:nlevels(classes)) - ggsave(paste(output.dir, "manip-subsample-", suffix, ".pdf", sep=""), pm.s, width=5, height=5) - - pm <- ggplot(Y.m) + - theme_bw() + - labs(x = "", y = "") + - geom_point(aes(x = V1, y = V2, shape = classes, colour = classes)) + - shape_scale + scale_color_manual(name = "Classe", values = 1:nlevels(classes)) - ggsave(paste(output.dir, "manip-", suffix, ".pdf", sep=""), pm, width=5, height=5) - - p.np <- ggplot(Y.m) + - theme_bw() + - labs(x = "", y = "") + - geom_point(aes(x = V1, y = V2, shape = classes, colour = np)) + - shape_scale + color_scale.gradient2("NP") - ggsave(paste(output.dir, "manip-np-", suffix, ".pdf", sep=""), p.np, width=5, height=5) - - p.silh <- ggplot(Y.m) + - theme_bw() + - labs(x = "", y = "") + - geom_point(aes(x = V1, y = V2, shape = classes, colour = silh)) + - shape_scale + color_scale.gradient2("Silhueta") - ggsave(paste(output.dir, "manip-silh-", suffix, ".pdf", sep=""), p.silh, width=5, height=5) - - p.precision <- ggplot(Y.m) + - theme_bw() + - labs(x = "", y = "") + - geom_point(aes(x = V1, y = V2, shape = classes, colour = precision)) + - shape_scale + color_scale.gradient2("Precisão") - ggsave(paste(output.dir, "manip-precision-", suffix, ".pdf", sep=""), p.precision, width=5, height=5) - - p.recall <- ggplot(Y.m) + - theme_bw() + - labs(x = "", y = "") + - geom_point(aes(x = V1, y = V2, shape = classes, colour = recall)) + - shape_scale + color_scale.gradient2("Revocação") - ggsave(paste(output.dir, "manip-recall-", suffix, ".pdf", sep=""), p.recall, width=5, height=5) - - pdf(paste(output.dir, "original-manip-", suffix, ".pdf", sep=""), width = 10, height = 8) - grid.arrange(p.s, p, pm.s, pm, ncol = 2) - dev.off() - - pdf(paste(output.dir, "manip-measures-", suffix, ".pdf", sep=""), width = 10, height = 8) - grid.arrange(p.np, p.silh, p.precision, p.recall, ncol = 2) - dev.off() - - pdf(paste(output.dir, "manip-all-", suffix, ".pdf", sep=""), width = 12, height = 16) - grid.arrange(pm.s, pm, p.np, p.silh, p.precision, p.recall, ncol = 2) - dev.off() -} - -#test(file = "datasets/iris.tbl", suffix = "iris", "tests/") -test(file = "datasets/wdbc.tbl", suffix = "wdbc", "tests/") -test(file = "datasets/segmentation.tbl", suffix = "segmentation", "tests/") -test(file = "datasets/glass.tbl", suffix = "glass", "tests/") -- cgit v1.2.3