From b255338295587246292dc978e7d4d5687ee01fb4 Mon Sep 17 00:00:00 2001 From: Samuel Fadel Date: Fri, 19 Aug 2016 14:20:57 -0300 Subject: Scripts and other files for building all datasets. --- datasets/faces/faces_extract.py | 81 +++++++++++++++++++++++++++++++++++++++++ datasets/faces/source | 1 + 2 files changed, 82 insertions(+) create mode 100644 datasets/faces/faces_extract.py create mode 100644 datasets/faces/source (limited to 'datasets/faces') diff --git a/datasets/faces/faces_extract.py b/datasets/faces/faces_extract.py new file mode 100644 index 0000000..3e8b4f3 --- /dev/null +++ b/datasets/faces/faces_extract.py @@ -0,0 +1,81 @@ +from scipy.io import loadmat +from scipy.misc import imsave +from sklearn.decomposition import PCA + +import hashlib +import logging +import numpy as np +import os +import os.path +import sklearn.decomposition +import subprocess +import wget + + +# Original data +DATA_URL = "http://isomap.stanford.edu/face_data.mat.Z" +SHA256_DIGEST = "9c5bc75f204071bbd340aa3ff584757ec784b0630206e526d4cd3809f2650a8a" + +# Local name +DATA_FNAME = "face_data.mat" + +# Output files/directories +IMG_DIR = "images" +IMG_FNAME = "face_raw.tbl" +LIGHTS_FNAME = "face_lights.tbl" +POSES_FNAME = "face_poses.tbl" +PCA_FNAME = "faces.tbl" + + +if __name__ == "__main__": + logging.basicConfig(filename="faces_extract.log", + format="%(levelname)s:%(message)s", + level=logging.INFO) + + # Get original data + if not os.path.exists(DATA_FNAME): + if not os.path.exists("{}.Z".format(DATA_FNAME)): + logging.info("Downloading faces data from '{}'".format(DATA_URL)) + wget.download(DATA_URL, "{}.Z".format(DATA_FNAME)) + + logging.info("Checking SHA-1 digest") + with open("{}.Z".format(DATA_FNAME), "rb") as f: + if hashlib.sha256(f.read()).hexdigest() != SHA256_DIGEST: + logging.error("File seems corrupted; aborting") + exit(1) + + logging.info("Uncompressing data into '{}'".format(DATA_FNAME)) + subprocess.call(["uncompress", "{}.Z".format(DATA_FNAME)]) + + # We have the original data; proceed + logging.info("Loading faces data") + faces = loadmat(DATA_FNAME) + + face_images = faces["images"] + logging.info("Writing image table data to {}".format(IMG_FNAME)) + np.savetxt(IMG_FNAME, face_images.T, fmt="%f") + + if not os.path.exists(IMG_DIR): + logging.info("Creating directory {}".format(IMG_DIR)) + os.makedirs(IMG_DIR, 0o755) + elif not os.path.isdir(IMG_DIR): + logging.error("File {} exists; aborting".format(IMG_DIR)) + exit(1) + + logging.info("Writing image files to {}".format(IMG_DIR)) + for i in range(face_images.shape[1]): + image = face_images[:, i] + image = image.reshape(64, 64).T + path = os.path.join(IMG_DIR, "{}.png".format(i)) + imsave(path, image) + + logging.info("Writing lights data to {}".format(LIGHTS_FNAME)) + np.savetxt(LIGHTS_FNAME, faces["lights"].T, fmt="%f") + + logging.info("Writing poses data to {}".format(POSES_FNAME)) + np.savetxt(POSES_FNAME, faces["poses"].T, fmt="%f") + + logging.info("Writing PCA-whitened data to {}".format(PCA_FNAME)) + X = faces["images"].T + X = PCA(n_components=256, whiten=True).fit_transform(X) + np.savetxt(PCA_FNAME, X, fmt="%f") diff --git a/datasets/faces/source b/datasets/faces/source new file mode 100644 index 0000000..e89da9b --- /dev/null +++ b/datasets/faces/source @@ -0,0 +1 @@ +http://isomap.stanford.edu/datasets.html -- cgit v1.2.3