from sklearn.datasets import fetch_lfw_people
from sklearn.model_selection import train_test_split # 学習データと検証用データ分割用ライブラリ
from sklearn.preprocessing import StandardScaler # 標準化ライブラリ
from sklearn import decomposition # 主成分分析用ライブラリ
from sklearn import svm # SVC用ライブラリ
from sklearn import metrics # 指標データ用ライブラリ
from matplotlib import pyplot as plt
lfw = fetch_lfw_people(data_home='./scikit_learn_data/', min_faces_per_person=100, resize=0.5)
X = lfw.data
y = lfw.target
X_train, X_test, y_train, y_test = train_test_split(lfw.data, lfw.target, test_size=0.25, random_state=0)
# 学習用データ数と検証用データ数
print('num of train data:', X_train.shape[0],
'\nnum of test data:', X_test.shape[0])
v,h = lfw.images.shape[1:3] # 画像の垂直・水平サイズを保持する
n_train = X_train.shape[0] # 学習データ数を保持する
pca=decomposition.PCA(n_components=0.95, svd_solver = 'full', random_state=0)
pca.fit(X_train)
X_train_pca = pca.transform(X_train)
X_test_pca = pca.transform(X_test)
X_train_inv = pca.inverse_transform(X_train_pca)
print('----------\n 主成分の数:', pca.n_components_)
linSVC=svm.LinearSVC()
linSVC.fit(X_train_pca, y_train)
y_pred_pca=linSVC.predict(X_test_pca)
print('----------\n 検証用データの正解率', linSVC.score(X_test_pca,y_test))
print('----------\n classification_report\n', metrics.classification_report(y_test, y_pred_pca, target_names=lfw.target_names))
print('----------\n confusion_matrix\n', metrics.confusion_matrix(y_test, y_pred_pca))
plt.rcParams["figure.figsize"]=(10,10)
metrics.ConfusionMatrixDisplay(confusion_matrix=metrics.confusion_matrix(y_test, y_pred_pca), display_labels=lfw.target_names).plot(cmap='YlGn', values_format='d')
plt.savefig('.\cmat_nostd.jpg')
sc = StandardScaler()
sc.fit(X_train)
X_train_std = sc.transform(X_train)
X_test_std = sc.transform(X_test)
pca=decomposition.PCA(n_components=0.95, svd_solver = 'full', random_state=0)
pca.fit(X_train_std)
X_train_std_pca = pca.transform(X_train_std)
X_test_std_pca = pca.transform(X_test_std)
X_train_std_inv=pca.inverse_transform(X_train_std_pca)
print('----------\n 主成分の数:', pca.n_components_)
linSVC.fit(X_train_std_pca, y_train)
y_pred_std_pca=linSVC.predict(X_test_std_pca)
print('----------\n 検証用データの正解率', linSVC.score(X_test_std_pca,y_test))
print('----------\n classification_report\n', metrics.classification_report(y_test, y_pred_std_pca, target_names=lfw.target_names))
print('----------\n confusion_matrix\n', metrics.confusion_matrix(y_test, y_pred_std_pca))
plt.rcParams["figure.figsize"]=(10,10)
metrics.ConfusionMatrixDisplay(confusion_matrix=metrics.confusion_matrix(y_test, y_pred_std_pca), display_labels=lfw.target_names).plot(cmap='YlGn', values_format='d')
plt.savefig('.\cmat_std.jpg')
plt.rcParams["figure.figsize"]=(15,10)
for i in range(5):
subplt = plt.subplot(4,5, i+1)
subplt.imshow(X_train.reshape(n_train,v,h)[i], cmap='gray')
for i in range(5):
subplt = plt.subplot(4,5, (i+1)+5)
subplt.imshow(X_train_std.reshape(n_train,v,h)[i], cmap='gray')
plt.savefig('./std.jpg')
plt.show()