1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
|
from scipy.io import loadmat
from scipy.misc import imsave
from sklearn.decomposition import PCA
import hashlib
import logging
import numpy as np
import os
import os.path
import sklearn.decomposition
import subprocess
import wget
# Original data
DATA_URL = "http://isomap.stanford.edu/face_data.mat.Z"
SHA256_DIGEST = "9c5bc75f204071bbd340aa3ff584757ec784b0630206e526d4cd3809f2650a8a"
# Local name
DATA_FNAME = "face_data.mat"
# Output files/directories
IMG_DIR = "images"
IMG_FNAME = "face_raw.tbl"
LIGHTS_FNAME = "face_lights.tbl"
POSES_FNAME = "face_poses.tbl"
PCA_FNAME = "faces.tbl"
if __name__ == "__main__":
logging.basicConfig(filename="faces_extract.log",
format="%(levelname)s:%(message)s",
level=logging.INFO)
# Get original data
if not os.path.exists(DATA_FNAME):
if not os.path.exists("{}.Z".format(DATA_FNAME)):
logging.info("Downloading faces data from '{}'".format(DATA_URL))
wget.download(DATA_URL, "{}.Z".format(DATA_FNAME))
logging.info("Checking SHA-1 digest")
with open("{}.Z".format(DATA_FNAME), "rb") as f:
if hashlib.sha256(f.read()).hexdigest() != SHA256_DIGEST:
logging.error("File seems corrupted; aborting")
exit(1)
logging.info("Uncompressing data into '{}'".format(DATA_FNAME))
subprocess.call(["uncompress", "{}.Z".format(DATA_FNAME)])
# We have the original data; proceed
logging.info("Loading faces data")
faces = loadmat(DATA_FNAME)
face_images = faces["images"]
logging.info("Writing image table data to {}".format(IMG_FNAME))
np.savetxt(IMG_FNAME, face_images.T, fmt="%f")
if not os.path.exists(IMG_DIR):
logging.info("Creating directory {}".format(IMG_DIR))
os.makedirs(IMG_DIR, 0o755)
elif not os.path.isdir(IMG_DIR):
logging.error("File {} exists; aborting".format(IMG_DIR))
exit(1)
logging.info("Writing image files to {}".format(IMG_DIR))
for i in range(face_images.shape[1]):
image = face_images[:, i]
image = image.reshape(64, 64).T
path = os.path.join(IMG_DIR, "{}.png".format(i))
imsave(path, image)
logging.info("Writing lights data to {}".format(LIGHTS_FNAME))
np.savetxt(LIGHTS_FNAME, faces["lights"].T, fmt="%f")
logging.info("Writing poses data to {}".format(POSES_FNAME))
np.savetxt(POSES_FNAME, faces["poses"].T, fmt="%f")
logging.info("Writing PCA-whitened data to {}".format(PCA_FNAME))
X = faces["images"].T
X = PCA(n_components=256, whiten=True).fit_transform(X)
np.savetxt(PCA_FNAME, X, fmt="%f")
|