aboutsummaryrefslogtreecommitdiff
path: root/datasets/faces
diff options
context:
space:
mode:
Diffstat (limited to 'datasets/faces')
-rw-r--r--datasets/faces/faces_extract.py81
-rw-r--r--datasets/faces/source1
2 files changed, 82 insertions, 0 deletions
diff --git a/datasets/faces/faces_extract.py b/datasets/faces/faces_extract.py
new file mode 100644
index 0000000..3e8b4f3
--- /dev/null
+++ b/datasets/faces/faces_extract.py
@@ -0,0 +1,81 @@
+from scipy.io import loadmat
+from scipy.misc import imsave
+from sklearn.decomposition import PCA
+
+import hashlib
+import logging
+import numpy as np
+import os
+import os.path
+import sklearn.decomposition
+import subprocess
+import wget
+
+
+# Original data
+DATA_URL = "http://isomap.stanford.edu/face_data.mat.Z"
+SHA256_DIGEST = "9c5bc75f204071bbd340aa3ff584757ec784b0630206e526d4cd3809f2650a8a"
+
+# Local name
+DATA_FNAME = "face_data.mat"
+
+# Output files/directories
+IMG_DIR = "images"
+IMG_FNAME = "face_raw.tbl"
+LIGHTS_FNAME = "face_lights.tbl"
+POSES_FNAME = "face_poses.tbl"
+PCA_FNAME = "faces.tbl"
+
+
+if __name__ == "__main__":
+ logging.basicConfig(filename="faces_extract.log",
+ format="%(levelname)s:%(message)s",
+ level=logging.INFO)
+
+ # Get original data
+ if not os.path.exists(DATA_FNAME):
+ if not os.path.exists("{}.Z".format(DATA_FNAME)):
+ logging.info("Downloading faces data from '{}'".format(DATA_URL))
+ wget.download(DATA_URL, "{}.Z".format(DATA_FNAME))
+
+ logging.info("Checking SHA-1 digest")
+ with open("{}.Z".format(DATA_FNAME), "rb") as f:
+ if hashlib.sha256(f.read()).hexdigest() != SHA256_DIGEST:
+ logging.error("File seems corrupted; aborting")
+ exit(1)
+
+ logging.info("Uncompressing data into '{}'".format(DATA_FNAME))
+ subprocess.call(["uncompress", "{}.Z".format(DATA_FNAME)])
+
+ # We have the original data; proceed
+ logging.info("Loading faces data")
+ faces = loadmat(DATA_FNAME)
+
+ face_images = faces["images"]
+ logging.info("Writing image table data to {}".format(IMG_FNAME))
+ np.savetxt(IMG_FNAME, face_images.T, fmt="%f")
+
+ if not os.path.exists(IMG_DIR):
+ logging.info("Creating directory {}".format(IMG_DIR))
+ os.makedirs(IMG_DIR, 0o755)
+ elif not os.path.isdir(IMG_DIR):
+ logging.error("File {} exists; aborting".format(IMG_DIR))
+ exit(1)
+
+ logging.info("Writing image files to {}".format(IMG_DIR))
+ for i in range(face_images.shape[1]):
+ image = face_images[:, i]
+ image = image.reshape(64, 64).T
+ path = os.path.join(IMG_DIR, "{}.png".format(i))
+ imsave(path, image)
+
+ logging.info("Writing lights data to {}".format(LIGHTS_FNAME))
+ np.savetxt(LIGHTS_FNAME, faces["lights"].T, fmt="%f")
+
+ logging.info("Writing poses data to {}".format(POSES_FNAME))
+ np.savetxt(POSES_FNAME, faces["poses"].T, fmt="%f")
+
+ logging.info("Writing PCA-whitened data to {}".format(PCA_FNAME))
+ X = faces["images"].T
+ X = PCA(n_components=256, whiten=True).fit_transform(X)
+ np.savetxt(PCA_FNAME, X, fmt="%f")
diff --git a/datasets/faces/source b/datasets/faces/source
new file mode 100644
index 0000000..e89da9b
--- /dev/null
+++ b/datasets/faces/source
@@ -0,0 +1 @@
+http://isomap.stanford.edu/datasets.html