From 9b51886d702b22018db08ba8a30ab47d36f64aac Mon Sep 17 00:00:00 2001 From: Samuel Fadel Date: Thu, 18 Aug 2016 23:14:43 -0300 Subject: Renamed main script file. --- run.R | 250 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ tests.R | 250 ---------------------------------------------------------------- 2 files changed, 250 insertions(+), 250 deletions(-) create mode 100644 run.R delete mode 100644 tests.R diff --git a/run.R b/run.R new file mode 100644 index 0000000..4c073a0 --- /dev/null +++ b/run.R @@ -0,0 +1,250 @@ +require(ggplot2) +require(gridExtra) +require(mp) + +source("measures.R") + +#automated.m <- function(D, labels) { +# D.m <- D +# for (label in unique(labels)) { +# same.label <- labels == label +# D.m[same.label, same.label] <- D[same.label, same.label] * 0.1 +# } +# +# D.m +#} +automated.m <- function(Xs, labels) { + n <- nrow(Xs) + p <- ncol(Xs) + Xs <- cbind(Xs, matrix(data=0, nrow=n, ncol=p)) + for (label in unique(labels)) { + for (j in 1:p) { + Xs[labels == label, j + p] <- mean(Xs[labels == label, j]) + } + } + + dist(Xs) +} + +color_scale.blue_orange <- function(name) { + scale_colour_gradient(name = name, high = "#376092", low = "#e46c0a", space = "Lab") +} + +color_scale.gradient2 <- function(name) { + scale_colour_gradient2(name = name, mid = "#dddddd", space = "Lab") +} + +test <- function(file, suffix, output.dir) { + message("Testing dataset: ", file) + dataset <- read.table(file) + + # Extract labels + labels <- dataset[, ncol(dataset)] + classes <- as.factor(labels) + X <- dataset[, -ncol(dataset)] + + n <- nrow(X) + + # Calculate distances (X) and normalize + message("\tCalculating dist(X)") + Dx <- dist(X) + Dx <- Dx / mean(Dx) + Dx <- as.matrix(Dx) + + # Sample dataset + sample.indices <- sample(n, 3*sqrt(n)) + classes.s <- as.factor(labels[sample.indices]) + + # Automatic sample positioning + message("\tCalculating Ys") + Dx.s <- Dx[sample.indices, sample.indices] + Ys <- forceScheme(Dx.s) + + # LAMP + message("\tCalculating Y") + Y <- lamp(X, sample.indices, Ys) + + # Calculate distances (Y) and normalize + message("\tCalculating dist(Y)") + Dy <- dist(Y) + Dy <- Dy / mean(Dy) + Dy <- as.matrix(Dy) + + message("\tCalculating P and Q") + prob <- d2p(Dx^2) + P <- prob$P + Q <- d2p.beta(Dy^2, prob$beta) + + # Calculate measures + message("\tCalculating measures") + np <- NP(Dx, Dy) + silh <- silhouette(Dy, classes) + precision <- klDivergence(Q, P) + recall <- klDivergence(P, Q) + + measures <- rbind(data.frame(mean=mean(np), median=median(np), sd=sd(np)), + data.frame(mean=mean(silh), median=median(silh), sd=sd(silh)), + data.frame(mean=mean(precision), median=median(precision), sd=sd(precision)), + data.frame(mean=mean(recall), median=median(recall), sd=sd(recall))) + write.table(measures, paste(output.dir, suffix, "-measures.csv", sep=""), row.names=F) + + if (!(all(is.finite(np)) && + all(is.finite(silh)) && + all(is.finite(precision)) && + all(is.finite(recall)))) { + stop("Non-finite measures found") + } + + # Plot results + message("\tPlotting results") + shape_scale <- scale_shape_manual(name = "Classe", values = 1:nlevels(classes)) + Ys <- cbind(as.data.frame(Ys), classes.s) + Y <- cbind(as.data.frame(Y), classes, np, silh, precision, recall) + p.s <- ggplot(Ys) + + theme_bw() + + labs(x = "", y = "") + + geom_point(aes(x = V1, y = V2, shape = classes.s, colour = classes.s)) + + shape_scale + scale_color_manual(name = "Classe", values = 1:nlevels(classes)) + ggsave(paste(output.dir, "subsample-", suffix, ".pdf", sep=""), p.s, width=5, height=5) + + p <- ggplot(Y) + + theme_bw() + + labs(x = "", y = "") + + geom_point(aes(x = V1, y = V2, shape = classes, colour = classes)) + + shape_scale + scale_color_manual(name = "Classe", values = 1:nlevels(classes)) + ggsave(paste(output.dir, suffix, ".pdf", sep=""), p, width=5, height=5) + + p.np <- ggplot(Y) + + theme_bw() + + labs(x = "", y = "") + + geom_point(aes(x = V1, y = V2, shape = classes, colour = np)) + + shape_scale + color_scale.blue_orange("NP") + ggsave(paste(output.dir, "np-", suffix, ".pdf", sep=""), p.np, width=5, height=5) + + p.silh <- ggplot(Y) + + theme_bw() + + labs(x = "", y = "") + + geom_point(aes(x = V1, y = V2, shape = classes, colour = silh)) + + shape_scale + color_scale.blue_orange("Silhueta") + ggsave(paste(output.dir, "silh-", suffix, ".pdf", sep=""), p.silh, width=5, height=5) + + p.precision <- ggplot(Y) + + theme_bw() + + labs(x = "", y = "") + + geom_point(aes(x = V1, y = V2, shape = classes, colour = precision)) + + shape_scale + color_scale.blue_orange("Precisão") + ggsave(paste(output.dir, "precision-", suffix, ".pdf", sep=""), p.precision, width=5, height=5) + + p.recall <- ggplot(Y) + + theme_bw() + + labs(x = "", y = "") + + geom_point(aes(x = V1, y = V2, shape = classes, colour = recall)) + + shape_scale + color_scale.blue_orange("Revocação") + ggsave(paste(output.dir, "recall-", suffix, ".pdf", sep=""), p.recall, width=5, height=5) + + + pdf(paste(output.dir, "all-", suffix, ".pdf", sep=""), width = 12, height = 16) + grid.arrange(p.s, p, p.np, p.silh, p.precision, p.recall, ncol = 2) + dev.off() + + # Perform manipulation + message("\tCalculating Ys.m") + Dx.m <- automated.m(X[sample.indices, ], labels[sample.indices]) + Ys.m <- forceScheme(Dx.m, Ys[, 1:2]) + + # LAMP + message("\tCalculating Y.m") + Y.m <- lamp(X, sample.indices, Ys.m) + + # Calculate distances (Y.m) and normalize + message("\tCalculating dist(Y.m)") + Dy <- dist(Y.m) + Dy <- Dy / mean(Dy) + Dy <- as.matrix(Dy) + + message("\tCalculating Q") + Q <- d2p.beta(Dy^2, prob$beta) + + # Calculate measures + message("\tCalculating measures") + np <- NP(Dx, Dy) - np + silh <- silhouette(Dy, classes) - silh + precision <- klDivergence(Q, P) - precision + recall <- klDivergence(P, Q) - recall + + measures <- rbind(data.frame(mean=mean(np), median=median(np), sd=sd(np)), + data.frame(mean=mean(silh), median=median(silh), sd=sd(silh)), + data.frame(mean=mean(precision), median=median(precision), sd=sd(precision)), + data.frame(mean=mean(recall), median=median(recall), sd=sd(recall))) + write.table(measures, paste(output.dir, suffix, "-manip-measures.csv", sep=""), row.names=F) + + if (!(all(is.finite(np)) && + all(is.finite(silh)) && + all(is.finite(precision)) && + all(is.finite(recall)))) { + stop("Non-finite measures found") + } + + # Plot results + message("\tPlotting results") + Ys.m <- cbind(as.data.frame(Ys.m), classes.s) + Y.m <- cbind(as.data.frame(Y.m), classes, np, silh, precision, recall) + pm.s <- ggplot(Ys.m) + + theme_bw() + + labs(x = "", y = "") + + geom_point(aes(x = V1, y = V2, shape = classes.s, colour = classes.s)) + + shape_scale + scale_color_manual(name = "Classe", values = 1:nlevels(classes)) + ggsave(paste(output.dir, "manip-subsample-", suffix, ".pdf", sep=""), pm.s, width=5, height=5) + + pm <- ggplot(Y.m) + + theme_bw() + + labs(x = "", y = "") + + geom_point(aes(x = V1, y = V2, shape = classes, colour = classes)) + + shape_scale + scale_color_manual(name = "Classe", values = 1:nlevels(classes)) + ggsave(paste(output.dir, "manip-", suffix, ".pdf", sep=""), pm, width=5, height=5) + + p.np <- ggplot(Y.m) + + theme_bw() + + labs(x = "", y = "") + + geom_point(aes(x = V1, y = V2, shape = classes, colour = np)) + + shape_scale + color_scale.gradient2("NP") + ggsave(paste(output.dir, "manip-np-", suffix, ".pdf", sep=""), p.np, width=5, height=5) + + p.silh <- ggplot(Y.m) + + theme_bw() + + labs(x = "", y = "") + + geom_point(aes(x = V1, y = V2, shape = classes, colour = silh)) + + shape_scale + color_scale.gradient2("Silhueta") + ggsave(paste(output.dir, "manip-silh-", suffix, ".pdf", sep=""), p.silh, width=5, height=5) + + p.precision <- ggplot(Y.m) + + theme_bw() + + labs(x = "", y = "") + + geom_point(aes(x = V1, y = V2, shape = classes, colour = precision)) + + shape_scale + color_scale.gradient2("Precisão") + ggsave(paste(output.dir, "manip-precision-", suffix, ".pdf", sep=""), p.precision, width=5, height=5) + + p.recall <- ggplot(Y.m) + + theme_bw() + + labs(x = "", y = "") + + geom_point(aes(x = V1, y = V2, shape = classes, colour = recall)) + + shape_scale + color_scale.gradient2("Revocação") + ggsave(paste(output.dir, "manip-recall-", suffix, ".pdf", sep=""), p.recall, width=5, height=5) + + pdf(paste(output.dir, "original-manip-", suffix, ".pdf", sep=""), width = 10, height = 8) + grid.arrange(p.s, p, pm.s, pm, ncol = 2) + dev.off() + + pdf(paste(output.dir, "manip-measures-", suffix, ".pdf", sep=""), width = 10, height = 8) + grid.arrange(p.np, p.silh, p.precision, p.recall, ncol = 2) + dev.off() + + pdf(paste(output.dir, "manip-all-", suffix, ".pdf", sep=""), width = 12, height = 16) + grid.arrange(pm.s, pm, p.np, p.silh, p.precision, p.recall, ncol = 2) + dev.off() +} + +#test(file = "datasets/iris.tbl", suffix = "iris", "tests/") +test(file = "datasets/wdbc.tbl", suffix = "wdbc", "tests/") +test(file = "datasets/segmentation.tbl", suffix = "segmentation", "tests/") +test(file = "datasets/glass.tbl", suffix = "glass", "tests/") diff --git a/tests.R b/tests.R deleted file mode 100644 index 4c073a0..0000000 --- a/tests.R +++ /dev/null @@ -1,250 +0,0 @@ -require(ggplot2) -require(gridExtra) -require(mp) - -source("measures.R") - -#automated.m <- function(D, labels) { -# D.m <- D -# for (label in unique(labels)) { -# same.label <- labels == label -# D.m[same.label, same.label] <- D[same.label, same.label] * 0.1 -# } -# -# D.m -#} -automated.m <- function(Xs, labels) { - n <- nrow(Xs) - p <- ncol(Xs) - Xs <- cbind(Xs, matrix(data=0, nrow=n, ncol=p)) - for (label in unique(labels)) { - for (j in 1:p) { - Xs[labels == label, j + p] <- mean(Xs[labels == label, j]) - } - } - - dist(Xs) -} - -color_scale.blue_orange <- function(name) { - scale_colour_gradient(name = name, high = "#376092", low = "#e46c0a", space = "Lab") -} - -color_scale.gradient2 <- function(name) { - scale_colour_gradient2(name = name, mid = "#dddddd", space = "Lab") -} - -test <- function(file, suffix, output.dir) { - message("Testing dataset: ", file) - dataset <- read.table(file) - - # Extract labels - labels <- dataset[, ncol(dataset)] - classes <- as.factor(labels) - X <- dataset[, -ncol(dataset)] - - n <- nrow(X) - - # Calculate distances (X) and normalize - message("\tCalculating dist(X)") - Dx <- dist(X) - Dx <- Dx / mean(Dx) - Dx <- as.matrix(Dx) - - # Sample dataset - sample.indices <- sample(n, 3*sqrt(n)) - classes.s <- as.factor(labels[sample.indices]) - - # Automatic sample positioning - message("\tCalculating Ys") - Dx.s <- Dx[sample.indices, sample.indices] - Ys <- forceScheme(Dx.s) - - # LAMP - message("\tCalculating Y") - Y <- lamp(X, sample.indices, Ys) - - # Calculate distances (Y) and normalize - message("\tCalculating dist(Y)") - Dy <- dist(Y) - Dy <- Dy / mean(Dy) - Dy <- as.matrix(Dy) - - message("\tCalculating P and Q") - prob <- d2p(Dx^2) - P <- prob$P - Q <- d2p.beta(Dy^2, prob$beta) - - # Calculate measures - message("\tCalculating measures") - np <- NP(Dx, Dy) - silh <- silhouette(Dy, classes) - precision <- klDivergence(Q, P) - recall <- klDivergence(P, Q) - - measures <- rbind(data.frame(mean=mean(np), median=median(np), sd=sd(np)), - data.frame(mean=mean(silh), median=median(silh), sd=sd(silh)), - data.frame(mean=mean(precision), median=median(precision), sd=sd(precision)), - data.frame(mean=mean(recall), median=median(recall), sd=sd(recall))) - write.table(measures, paste(output.dir, suffix, "-measures.csv", sep=""), row.names=F) - - if (!(all(is.finite(np)) && - all(is.finite(silh)) && - all(is.finite(precision)) && - all(is.finite(recall)))) { - stop("Non-finite measures found") - } - - # Plot results - message("\tPlotting results") - shape_scale <- scale_shape_manual(name = "Classe", values = 1:nlevels(classes)) - Ys <- cbind(as.data.frame(Ys), classes.s) - Y <- cbind(as.data.frame(Y), classes, np, silh, precision, recall) - p.s <- ggplot(Ys) + - theme_bw() + - labs(x = "", y = "") + - geom_point(aes(x = V1, y = V2, shape = classes.s, colour = classes.s)) + - shape_scale + scale_color_manual(name = "Classe", values = 1:nlevels(classes)) - ggsave(paste(output.dir, "subsample-", suffix, ".pdf", sep=""), p.s, width=5, height=5) - - p <- ggplot(Y) + - theme_bw() + - labs(x = "", y = "") + - geom_point(aes(x = V1, y = V2, shape = classes, colour = classes)) + - shape_scale + scale_color_manual(name = "Classe", values = 1:nlevels(classes)) - ggsave(paste(output.dir, suffix, ".pdf", sep=""), p, width=5, height=5) - - p.np <- ggplot(Y) + - theme_bw() + - labs(x = "", y = "") + - geom_point(aes(x = V1, y = V2, shape = classes, colour = np)) + - shape_scale + color_scale.blue_orange("NP") - ggsave(paste(output.dir, "np-", suffix, ".pdf", sep=""), p.np, width=5, height=5) - - p.silh <- ggplot(Y) + - theme_bw() + - labs(x = "", y = "") + - geom_point(aes(x = V1, y = V2, shape = classes, colour = silh)) + - shape_scale + color_scale.blue_orange("Silhueta") - ggsave(paste(output.dir, "silh-", suffix, ".pdf", sep=""), p.silh, width=5, height=5) - - p.precision <- ggplot(Y) + - theme_bw() + - labs(x = "", y = "") + - geom_point(aes(x = V1, y = V2, shape = classes, colour = precision)) + - shape_scale + color_scale.blue_orange("Precisão") - ggsave(paste(output.dir, "precision-", suffix, ".pdf", sep=""), p.precision, width=5, height=5) - - p.recall <- ggplot(Y) + - theme_bw() + - labs(x = "", y = "") + - geom_point(aes(x = V1, y = V2, shape = classes, colour = recall)) + - shape_scale + color_scale.blue_orange("Revocação") - ggsave(paste(output.dir, "recall-", suffix, ".pdf", sep=""), p.recall, width=5, height=5) - - - pdf(paste(output.dir, "all-", suffix, ".pdf", sep=""), width = 12, height = 16) - grid.arrange(p.s, p, p.np, p.silh, p.precision, p.recall, ncol = 2) - dev.off() - - # Perform manipulation - message("\tCalculating Ys.m") - Dx.m <- automated.m(X[sample.indices, ], labels[sample.indices]) - Ys.m <- forceScheme(Dx.m, Ys[, 1:2]) - - # LAMP - message("\tCalculating Y.m") - Y.m <- lamp(X, sample.indices, Ys.m) - - # Calculate distances (Y.m) and normalize - message("\tCalculating dist(Y.m)") - Dy <- dist(Y.m) - Dy <- Dy / mean(Dy) - Dy <- as.matrix(Dy) - - message("\tCalculating Q") - Q <- d2p.beta(Dy^2, prob$beta) - - # Calculate measures - message("\tCalculating measures") - np <- NP(Dx, Dy) - np - silh <- silhouette(Dy, classes) - silh - precision <- klDivergence(Q, P) - precision - recall <- klDivergence(P, Q) - recall - - measures <- rbind(data.frame(mean=mean(np), median=median(np), sd=sd(np)), - data.frame(mean=mean(silh), median=median(silh), sd=sd(silh)), - data.frame(mean=mean(precision), median=median(precision), sd=sd(precision)), - data.frame(mean=mean(recall), median=median(recall), sd=sd(recall))) - write.table(measures, paste(output.dir, suffix, "-manip-measures.csv", sep=""), row.names=F) - - if (!(all(is.finite(np)) && - all(is.finite(silh)) && - all(is.finite(precision)) && - all(is.finite(recall)))) { - stop("Non-finite measures found") - } - - # Plot results - message("\tPlotting results") - Ys.m <- cbind(as.data.frame(Ys.m), classes.s) - Y.m <- cbind(as.data.frame(Y.m), classes, np, silh, precision, recall) - pm.s <- ggplot(Ys.m) + - theme_bw() + - labs(x = "", y = "") + - geom_point(aes(x = V1, y = V2, shape = classes.s, colour = classes.s)) + - shape_scale + scale_color_manual(name = "Classe", values = 1:nlevels(classes)) - ggsave(paste(output.dir, "manip-subsample-", suffix, ".pdf", sep=""), pm.s, width=5, height=5) - - pm <- ggplot(Y.m) + - theme_bw() + - labs(x = "", y = "") + - geom_point(aes(x = V1, y = V2, shape = classes, colour = classes)) + - shape_scale + scale_color_manual(name = "Classe", values = 1:nlevels(classes)) - ggsave(paste(output.dir, "manip-", suffix, ".pdf", sep=""), pm, width=5, height=5) - - p.np <- ggplot(Y.m) + - theme_bw() + - labs(x = "", y = "") + - geom_point(aes(x = V1, y = V2, shape = classes, colour = np)) + - shape_scale + color_scale.gradient2("NP") - ggsave(paste(output.dir, "manip-np-", suffix, ".pdf", sep=""), p.np, width=5, height=5) - - p.silh <- ggplot(Y.m) + - theme_bw() + - labs(x = "", y = "") + - geom_point(aes(x = V1, y = V2, shape = classes, colour = silh)) + - shape_scale + color_scale.gradient2("Silhueta") - ggsave(paste(output.dir, "manip-silh-", suffix, ".pdf", sep=""), p.silh, width=5, height=5) - - p.precision <- ggplot(Y.m) + - theme_bw() + - labs(x = "", y = "") + - geom_point(aes(x = V1, y = V2, shape = classes, colour = precision)) + - shape_scale + color_scale.gradient2("Precisão") - ggsave(paste(output.dir, "manip-precision-", suffix, ".pdf", sep=""), p.precision, width=5, height=5) - - p.recall <- ggplot(Y.m) + - theme_bw() + - labs(x = "", y = "") + - geom_point(aes(x = V1, y = V2, shape = classes, colour = recall)) + - shape_scale + color_scale.gradient2("Revocação") - ggsave(paste(output.dir, "manip-recall-", suffix, ".pdf", sep=""), p.recall, width=5, height=5) - - pdf(paste(output.dir, "original-manip-", suffix, ".pdf", sep=""), width = 10, height = 8) - grid.arrange(p.s, p, pm.s, pm, ncol = 2) - dev.off() - - pdf(paste(output.dir, "manip-measures-", suffix, ".pdf", sep=""), width = 10, height = 8) - grid.arrange(p.np, p.silh, p.precision, p.recall, ncol = 2) - dev.off() - - pdf(paste(output.dir, "manip-all-", suffix, ".pdf", sep=""), width = 12, height = 16) - grid.arrange(pm.s, pm, p.np, p.silh, p.precision, p.recall, ncol = 2) - dev.off() -} - -#test(file = "datasets/iris.tbl", suffix = "iris", "tests/") -test(file = "datasets/wdbc.tbl", suffix = "wdbc", "tests/") -test(file = "datasets/segmentation.tbl", suffix = "segmentation", "tests/") -test(file = "datasets/glass.tbl", suffix = "glass", "tests/") -- cgit v1.2.3