aboutsummaryrefslogtreecommitdiff
path: root/datasets/faces/faces_extract.py
blob: 3e8b4f3795ed7342a2bd956ebb2278c1acddbf00 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
from scipy.io import loadmat
from scipy.misc import imsave
from sklearn.decomposition import PCA

import hashlib
import logging
import numpy as np
import os
import os.path
import sklearn.decomposition
import subprocess
import wget


# Original data
DATA_URL      = "http://isomap.stanford.edu/face_data.mat.Z"
SHA256_DIGEST = "9c5bc75f204071bbd340aa3ff584757ec784b0630206e526d4cd3809f2650a8a"

# Local name
DATA_FNAME = "face_data.mat"

# Output files/directories
IMG_DIR      = "images"
IMG_FNAME    = "face_raw.tbl"
LIGHTS_FNAME = "face_lights.tbl"
POSES_FNAME  = "face_poses.tbl"
PCA_FNAME    = "faces.tbl"


if __name__ == "__main__":
    logging.basicConfig(filename="faces_extract.log",
                        format="%(levelname)s:%(message)s",
                        level=logging.INFO)

    # Get original data
    if not os.path.exists(DATA_FNAME):
        if not os.path.exists("{}.Z".format(DATA_FNAME)):
            logging.info("Downloading faces data from '{}'".format(DATA_URL))
            wget.download(DATA_URL, "{}.Z".format(DATA_FNAME))

        logging.info("Checking SHA-1 digest")
        with open("{}.Z".format(DATA_FNAME), "rb") as f:
            if hashlib.sha256(f.read()).hexdigest() != SHA256_DIGEST:
                logging.error("File seems corrupted; aborting")
                exit(1)

        logging.info("Uncompressing data into '{}'".format(DATA_FNAME))
        subprocess.call(["uncompress", "{}.Z".format(DATA_FNAME)])

    # We have the original data; proceed
    logging.info("Loading faces data")
    faces = loadmat(DATA_FNAME)

    face_images = faces["images"]
    logging.info("Writing image table data to {}".format(IMG_FNAME))
    np.savetxt(IMG_FNAME, face_images.T, fmt="%f")

    if not os.path.exists(IMG_DIR):
        logging.info("Creating directory {}".format(IMG_DIR))
        os.makedirs(IMG_DIR, 0o755)
    elif not os.path.isdir(IMG_DIR):
        logging.error("File {} exists; aborting".format(IMG_DIR))
        exit(1)

    logging.info("Writing image files to {}".format(IMG_DIR))
    for i in range(face_images.shape[1]):
        image = face_images[:, i]
        image = image.reshape(64, 64).T
        path = os.path.join(IMG_DIR, "{}.png".format(i))
        imsave(path, image)

    logging.info("Writing lights data to {}".format(LIGHTS_FNAME))
    np.savetxt(LIGHTS_FNAME, faces["lights"].T, fmt="%f")

    logging.info("Writing poses data to {}".format(POSES_FNAME))
    np.savetxt(POSES_FNAME, faces["poses"].T, fmt="%f")

    logging.info("Writing PCA-whitened data to {}".format(PCA_FNAME))
    X = faces["images"].T
    X = PCA(n_components=256, whiten=True).fit_transform(X)
    np.savetxt(PCA_FNAME, X, fmt="%f")