from sklearn.datasets import fetch_lfw_people
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn import metrics
from matplotlib import pyplot as plt
import numpy as np
from time import time
lfw = fetch_lfw_people(data_home='./scikit_learn_data/', min_faces_per_person=100, resize=0.5)
print('lfw:',lfw.keys())
print('data:',lfw.data.shape)
print('images:',lfw.images.shape)
print('target:',lfw.target.shape)
X = lfw.data
y = lfw.target
X_train, X_test, y_train, y_test = train_test_split(lfw.data, lfw.target, test_size=0.25, random_state=0)
# 学習用データ数と検証用データ数
print('num of train data:', X_train.shape[0],
'\nnum of test data:', X_test.shape[0])
v,h = lfw.images.shape[1:3] # 画像の垂直・水平サイズを保持する
n_train = X_train.shape[0] # 学習データ数を保持する
linSVC=svm.LinearSVC()
t_start = time()
linSVC.fit(X_train, y_train)
print("time %0.3fs" % (time() - t_start))
t_start = time()
y_pred=linSVC.predict(X_test)
print("time %0.3fs" % (time() - t_start))
print('----------\n 検証用データの正解率', linSVC.score(X_test,y_test))
print('----------\n classification_report\n', metrics.classification_report(y_test, y_pred, target_names=lfw.target_names))
print('----------\n confusion_matrix\n', metrics.confusion_matrix(y_test, y_pred))
plt.rcParams["figure.figsize"]=(10,10)
metrics.ConfusionMatrixDisplay(confusion_matrix=metrics.confusion_matrix(y_test, y_pred), display_labels=lfw.target_names).plot(cmap='YlGn', values_format='d')
plt.savefig('.\cmat1.jpg')
from sklearn import decomposition
pca=decomposition.PCA(n_components=0.95, svd_solver = 'full')
t_start = time()
pca.fit(X_train)
print("time %0.3fs" % (time() - t_start))
print('----------\n 主成分の数:', pca.n_components_)
t_start = time()
X_train_pca = pca.transform(X_train)
X_test_pca = pca.transform(X_test)
print("done in %0.3fs" % (time() - t_start))
t_start = time()
linSVC.fit(X_train_pca, y_train)
print("done in %0.3fs" % (time() - t_start))
t_start = time()
y_pred_pca=linSVC.predict(X_test_pca)
print("time %0.3fs" % (time() - t_start))
print('----------\n 検証用データの正解率', linSVC.score(X_test_pca,y_test))
print('----------\n classification_report\n', metrics.classification_report(y_test, y_pred_pca, target_names=lfw.target_names))
print('----------\n confusion_matrix\n', metrics.confusion_matrix(y_test, y_pred_pca))
plt.rcParams["figure.figsize"]=(10,10)
metrics.ConfusionMatrixDisplay(confusion_matrix=metrics.confusion_matrix(y_test, y_pred_pca), display_labels=lfw.target_names).plot(cmap='YlGn', values_format='d')
plt.savefig('.\cmat2.jpg')
nmax=30
n_comp = pca.components_.shape[0]
plt.rcParams["figure.figsize"]=(12,2*nmax/5+1)
for i in range(nmax):
subplt = plt.subplot(nmax/5+1,5, i+1)
subplt.imshow(pca.components_.reshape(n_comp,v,h)[i], cmap='gray')
plt.savefig('./pca.jpg')
plt.show()
plt.rcParams["figure.figsize"]=(8,6)
X_train_inv=pca.inverse_transform(X_train_pca)
for i in range(5):
subplt = plt.subplot(2,5, i+1)
subplt.imshow(X_train.reshape(n_train,v,h)[i], cmap='gray')
for i in range(5):
subplt = plt.subplot(2,5, (i+1)+5)
subplt.imshow(X_train_inv.reshape(n_train,v,h)[i], cmap='gray')
plt.savefig('./pca_loss.jpg')
plt.show()
plt.plot(pca.explained_variance_ratio_)
plt.plot(np.cumsum(pca.explained_variance_ratio_),marker="o")
plt.xlim(0,n_comp)
plt.ylim(0,1.0)
plt.show()
for i in lfw.target_names:
index =np.where(lfw.target_names[y_train]==i)
plt.scatter(X_train_pca[index,0],X_train_pca[index,1],label=i,alpha=0.5)
plt.legend(loc=3)
plt.savefig('.\pca_2d.jpg')
plt.show()
from mpl_toolkits.mplot3d import Axes3D
fig = plt.figure()
ax = Axes3D(fig)
for i in lfw.target_names:
index =np.where(lfw.target_names[y_train]==i)
ax.scatter(X_train_pca[index,0],X_train_pca[index,1],X_train_pca[index,2],label=i,alpha=0.5)
plt.legend(loc=3)
plt.savefig('.\pca_3d.jpg')
plt.show()