ディープラーニングによる猫画像分類モデルの構築

必要なライブラリ

本プロジェクトでは、以下のライブラリを使用します。

NumPy: Pythonでの科学計算のための基本ライブラリ
H5py: H5形式のファイルに保存されたデータセットと対話するためのライブラリ
Matplotlib: Pythonでグラフを描画するためのライブラリ
PIL: 画像処理のためのライブラリ
SciPy: 画像処理のための追加機能を提供するライブラリ

プロジェクトの概要

このプロジェクトでは、ロジスティック回帰アルゴリズムを使用して、画像に猫が含まれているかどうかを分類するモデルを構築します。以下の手順で進めます。

データの読み込み
データの前処理
モデルの実装
モデルの最適化
モデルの評価
予測の実行

実装手順

1. ライブラリのインポート

# 必要なライブラリのインポート
import numpy as np
import matplotlib.pyplot as plt
import h5py
import scipy
from PIL import Image
from scipy import ndimage
from dataset_loader import load_cat_dataset

# Jupyter Notebook内でグラフを表示する設定
%matplotlib inline

2. データの読み込み

# 猫/猫ではない画像データセットの読み込み
train_images, train_labels, test_images, test_labels, class_names = load_cat_dataset()

# データの説明:
# train_images: 訓練用画像データ
# train_labels: 訓練用ラベルデータ
# test_images: テスト用画像データ
# test_labels: テスト用ラベルデータ
# class_names: クラス名（["not-cat", "cat"]）

3. サンプル画像の表示

# 画像のインデックスを指定
image_index = 5
# 画像の表示
plt.imshow(train_images[image_index])
# ラベルの表示
print(f"ラベル: {train_labels[:, image_index]}, これは'{class_names[np.squeeze(train_labels[:, image_index])].decode('utf-8')}'の画像です。")

4. データセットの次元情報の取得

# 訓練データとテストデータのサンプル数を取得
num_train_samples = train_images.shape[0]
num_test_samples = test_images.shape[0]

# 画像のサイズを取得（正方形画像なので幅と高さは同じ）
image_size = train_images.shape[1]

print(f"訓練サンプル数: {num_train_samples}")
print(f"テストサンプル数: {num_test_samples}")
print(f"画像サイズ: {image_size}x{image_size}")

5. データの形状変換

# 訓練データの形状を変換
train_images_flat = train_images.reshape(train_images.shape[0], -1).T
# 訓練データの形状: (image_size * image_size * 3, num_train_samples)

# テストデータの形状を変換
test_images_flat = test_images.reshape(test_images.shape[0], -1).T
# テストデータの形状: (image_size * image_size * 3, num_test_samples)

6. データの正規化

# ピクセル値を0-1の範囲に正規化
train_images_normalized = train_images_flat / 255.
test_images_normalized = test_images_flat / 255.

7. シグモイド関数の実装

def sigmoid(z):
    """
    シグモイド関数の計算
    
    引数:
    z -- スカラーまたは任意のサイズのNumPy配列
    
    戻り値:
    s -- sigmoid(z)
    """
    return 1 / (1 + np.exp(-z))

8. パラメータの初期化

def initialize_parameters(dim):
    """
    ゼロベクトルでパラメータを初期化
    
    引数:
    dim -- 重みベクトルのサイズ
    
    戻り値:
    w -- 初期化された重みベクトル
    b -- 初期化されたバイアス（スカラー）
    """
    w = np.zeros((dim, 1))
    b = 0
    
    # 形状の検証
    assert(w.shape == (dim, 1))
    assert(isinstance(b, float) or isinstance(b, int))
    
    return w, b

9. コスト関数と勾配の計算

def forward_backward_propagation(w, b, X, Y):
    """
    順伝播と逆伝播の実行
    
    引数:
    w -- 重みベクトル
    b -- バイアス
    X -- データ（特徴量 x サンプル数）
    Y -- 正解ラベル
    
    戻り値:
    cost -- コスト関数の値
    gradients -- 勾配辞書
    """
    m = X.shape[1]  # サンプル数
    
    # 順伝播
    A = sigmoid(np.dot(w.T, X) + b)
    cost = -1/m * np.sum(Y * np.log(A) + (1 - Y) * np.log(1 - A))
    
    # 逆伝播
    dw = 1/m * np.dot(X, (A - Y).T)
    db = 1/m * np.sum(A - Y)
    
    # 形状の検証
    assert(dw.shape == w.shape)
    assert(db.dtype == float)
    cost = np.squeeze(cost)
    assert(cost.shape == ())
    
    gradients = {"dw": dw,
                 "db": db}
    
    return gradients, cost

10. 最適化処理

def optimize_parameters(w, b, X, Y, iterations, learning_rate, print_cost=False):
    """
    勾配降下法によるパラメータの最適化
    
    引数:
    w -- 重みベクトル
    b -- バイアス
    X -- 訓練データ
    Y -- 訓練ラベル
    iterations -- 最適化の反復回数
    learning_rate -- 学習率
    print_cost -- コスト表示のフラグ
    
    戻り値:
    params -- 最適化されたパラメータ辞書
    gradients -- 勾配辞書
    costs -- コストの履歴
    """
    costs = []
    
    for i in range(iterations):
        gradients, cost = forward_backward_propagation(w, b, X, Y)
        
        # 勾配の取得
        dw = gradients["dw"]
        db = gradients["db"]
        
        # パラメータの更新
        w = w - learning_rate * dw
        b = b - learning_rate * db
        
        # コストの記録
        if i % 100 == 0:
            costs.append(cost)
            
        # コストの表示
        if print_cost and i % 100 == 0:
            print(f"反復 {i} 回後のコスト: {cost}")
    
    params = {"w": w,
              "b": b}
    
    return params, gradients, costs

11. 予測関数

def predict(w, b, X):
    """
    予測値の計算
    
    引数:
    w -- 重みベクトル
    b -- バイアス
    X -- 予測対象データ
    
    戻り値:
    predictions -- 予測結果
    """
    m = X.shape[1]
    predictions = np.zeros((1, m))
    w = w.reshape(X.shape[0], 1)
    
    # 予測確率の計算
    A = sigmoid(np.dot(w.T, X) + b)
    
    # 予測ラベルの決定
    for i in range(A.shape[1]):
        predictions[0, i] = 1 if A[0, i] >= 0.5 else 0
    
    assert(predictions.shape == (1, m))
    
    return predictions

12. モデルの統合

def create_model(X_train, Y_train, X_test, Y_test, iterations=2000, learning_rate=0.5, print_cost=False):
    """
    ロジスティック回帰モデルの構築
    
    引数:
    X_train -- 訓練データ
    Y_train -- 訓練ラベル
    X_test -- テストデータ
    Y_test -- テストラベル
    iterations -- 反復回数
    learning_rate -- 学習率
    print_cost -- コスト表示のフラグ
    
    戻り値:
    model -- モデル情報辞書
    """
    # パラメータの初期化
    w, b = initialize_parameters(X_train.shape[0])
    
    # パラメータの最適化
    params, gradients, costs = optimize_parameters(w, b, X_train, Y_train, iterations, learning_rate, print_cost)
    
    # 最適化されたパラメータの取得
    w = params["w"]
    b = params["b"]
    
    # 予測の実行
    train_predictions = predict(w, b, X_train)
    test_predictions = predict(w, b, X_test)
    
    # 精度の計算と表示
    train_accuracy = 100 - np.mean(np.abs(train_predictions - Y_train)) * 100
    test_accuracy = 100 - np.mean(np.abs(test_predictions - Y_test)) * 100
    
    print(f"訓練精度: {train_accuracy:.2f}%")
    print(f"テスト精度: {test_accuracy:.2f}%")
    
    # モデル情報の保存
    model = {"costs": costs,
             "test_predictions": test_predictions, 
             "train_predictions": train_predictions, 
             "w": w, 
             "b": b,
             "learning_rate": learning_rate,
             "iterations": iterations}
    
    return model

13. モデルの実行

# モデルの構築と実行
model_info = create_model(train_images_normalized, train_labels, 
                         test_images_normalized, test_labels, 
                         iterations=2000, learning_rate=0.005, print_cost=True)

14. サンプル画像の予測

# 予測対象の画像インデックス
sample_index = 1
# 画像の表示
plt.imshow(test_images_normalized[:, sample_index].reshape((image_size, image_size, 3)))
# 予測結果の表示
prediction = int(model_info["test_predictions"][0, sample_index])
print(f"正解ラベル: {test_labels[0, sample_index]}, 予測結果: '{class_names[prediction].decode('utf-8')}'")

15. 学習曲線のプロット

# コスト履歴の取得
cost_history = np.squeeze(model_info['costs'])
# 学習曲線のプロット
plt.plot(cost_history)
plt.ylabel('コスト')
plt.xlabel('反復回数（100回ごと）')
plt.title(f"学習率 = {model_info['learning_rate']}")
plt.show()

16. 学習率の選択

# 異なる学習率のテスト
learning_rates = [0.01, 0.001, 0.0001]
models = {}

for lr in learning_rates:
    print(f"学習率: {lr}")
    models[str(lr)] = create_model(train_images_normalized, train_labels, 
                                   test_images_normalized, test_labels, 
                                   iterations=1500, learning_rate=lr, print_cost=False)
    print("\n" + "-"*50 + "\n")

# 学習曲線の比較
for lr in learning_rates:
    plt.plot(np.squeeze(models[str(lr)]["costs"]), 
             label=f"学習率 = {models[str(lr)]['learning_rate']}")

plt.ylabel('コスト')
plt.xlabel('反復回数')
plt.title('学習率による学習曲線の比較')
plt.legend()
plt.show()

タグ: ディープラーニングロジスティック回帰画像分類 Python NumPy

5月14日 22:30 投稿

異端開発室

ディープラーニングによる猫画像分類モデルの構築

必要なライブラリ

プロジェクトの概要

実装手順

1. ライブラリのインポート

2. データの読み込み

3. サンプル画像の表示

4. データセットの次元情報の取得

5. データの形状変換

6. データの正規化

7. シグモイド関数の実装

8. パラメータの初期化

9. コスト関数と勾配の計算

10. 最適化処理

11. 予測関数

12. モデルの統合

13. モデルの実行

14. サンプル画像の予測

15. 学習曲線のプロット

16. 学習率の選択

ホットタグ