Kerasの主な活用ポイントを確認:
- BatchNormalization:バッチごとに前レイヤーの活性化を正規化し、学習速度を向上
各バッチで前レイヤーの出力を正規化し、平均を0、標準偏差を1に保つ変換を適用
-
TimeDistributed:時系列データの各時点にDenseレイヤーを適用し、複雑なパターンを捉える能力を高める
-
Bidirectional:Kerasで提供される双方向RNNのラッパー関数
Keras関連ライブラリのインポート:
from keras.models import Model
from keras.layers import Input, Conv1D, BatchNormalization, GRU, TimeDistributed, Dense, Activation
基本RNNアーキテクチャ
def base_rnn_arch(feature_dim, num_classes=29):
input_tensor = Input(shape=(None, feature_dim), name='audio_input')
rnn_layer = GRU(units=num_classes, return_sequences=True, implementation=2, name='rnn_layer')(input_tensor)
output = Activation('softmax', name='output_layer')(rnn_layer)
model = Model(inputs=input_tensor, outputs=output)
model.output_length = lambda x: x
return model
RNN + TimeDistributedの組み合わせ
def rnn_td_arch(feature_dim, hidden_units, num_classes=29):
input_tensor = Input(shape=(None, feature_dim), name='audio_input')
rnn_layer = GRU(hidden_units, return_sequences=True, implementation=2, name='rnn')(input_tensor)
bn_layer = BatchNormalization(name='bn_layer')(rnn_layer)
td_dense = TimeDistributed(Dense(num_classes), name='td_dense')(bn_layer)
output = Activation('softmax', name='output')(td_dense)
model = Model(inputs=input_tensor, outputs=output)
model.output_length = lambda x: x
return model
CNN + RNN + TimeDistributedの統合モデル
def cnn_rnn_arch(feature_dim, num_filters, kernel_size, stride, padding, hidden_units, num_classes=29):
input_tensor = Input(shape=(None, feature_dim), name='audio_input')
conv_layer = Conv1D(num_filters, kernel_size, strides=stride, padding=padding, activation='relu', name='conv')(input_tensor)
bn_conv = BatchNormalization(name='bn_conv')(conv_layer)
rnn_layer = GRU(hidden_units, return_sequences=True, implementation=2, name='rnn')(bn_conv)
bn_rnn = BatchNormalization(name='bn_rnn')(rnn_layer)
td_dense = TimeDistributed(Dense(num_classes), name='td_dense')(bn_rnn)
output = Activation('softmax', name='output')(td_dense)
model = Model(inputs=input_tensor, outputs=output)
model.output_length = lambda x: (x + stride - 1) // stride
return model
深層RNN + TimeDistributed
def deep_rnn_arch(feature_dim, hidden_units, layers, num_classes=29):
input_tensor = Input(shape=(None, feature_dim), name='audio_input')
x = input_tensor
for i in range(layers):
x = GRU(hidden_units, return_sequences=True, implementation=2, name=f'rnn_{i}')(x)
bn_layer = BatchNormalization(name='bn')(x)
td_dense = TimeDistributed(Dense(num_classes), name='td_dense')(bn_layer)
output = Activation('softmax', name='output')(td_dense)
model = Model(inputs=input_tensor, outputs=output)
model.output_length = lambda x: x
return model
双方向RNN + TimeDistributed
def bidir_rnn_arch(feature_dim, hidden_units, num_classes=29):
input_tensor = Input(shape=(None, feature_dim), name='audio_input')
bidir_layer = Bidirectional(GRU(hidden_units, return_sequences=True), name='bidir')(input_tensor)
bn_layer = BatchNormalization(name='bn')(bidir_layer)
td_dense = TimeDistributed(Dense(num_classes), name='td_dense')(bn_layer)
output = Activation('softmax', name='output')(td_dense)
model = Model(inputs=input_tensor, outputs=output)
model.output_length = lambda x: x
return model