ニューラルネットワーク構成要素の実装
全体構造
このプロジェクトではjoelnetという名前のシンプルなニューラルネットワークライブラリを実装します。このライブラリは主要なニューラルネットワーク要素を含んでおり、PyTorchなどのフレームワークの基本概念を理解するのに役立ちます。
テンソルの定義
from numpy import ndarray as Tensor
ここではGPUによる高速化は必要ないため、NumPyのndarray型をテンソルとして使用します。これは多次元配列を表現するのに適しています。
レイヤーの実装
from typing import Dict, Callable
import numpy as np
from joelnet.tensor import Tensor
class Layer:
def __init__(self) -> None:
self.params: Dict[str, Tensor] = {}
self.grads: Dict[str, Tensor] = {}
def forward(self, inputs: Tensor) -> Tensor:
raise NotImplementedError
def backward(self, grad: Tensor) -> Tensor:
raise NotImplementedError
class Linear(Layer):
def __init__(self, input_size: int, output_size: int) -> None:
super().__init__()
self.params["w"] = np.random.randn(input_size, output_size)
self.params["b"] = np.random.randn(output_size)
def forward(self, inputs: Tensor) -> Tensor:
self.inputs = inputs
return inputs @ self.params["w"] + self.params["b"]
def backward(self, grad: Tensor) -> Tensor:
self.grads["b"] = np.sum(grad, axis=0)
self.grads["w"] = self.inputs.T @ grad
return grad @ self.params["w"].T
F = Callable[[Tensor], Tensor]
class Activation(Layer):
def __init__(self, f: F, f_prime: F) -> None:
super().__init__()
self.f = f
self.f_prime = f_prime
def forward(self, inputs: Tensor) -> Tensor:
self.inputs = inputs
return self.f(inputs)
def backward(self, grad: Tensor) -> Tensor:
return self.f_prime(self.inputs) * grad
def tanh(x: Tensor) -> Tensor:
return np.tanh(x)
def tanh_prime(x: Tensor) -> Tensor:
y = tanh(x)
return 1 - y ** 2
class Tanh(Activation):
def __init__(self) -> None:
super().__init__(tanh, tanh_prime)
ニューラルネットワーク本体
from typing import Sequence, Iterator, Tuple
from joelnet.tensor import Tensor
from joelnet.layers import Layer
class NeuralNet:
def __init__(self, layers: Sequence[Layer]) -> None:
self.layers = layers
def forward(self, inputs: Tensor) -> Tensor:
for layer in self.layers:
inputs = layer.forward(inputs)
return inputs
def backward(self, grad: Tensor) -> Tensor:
for layer in reversed(self.layers):
grad = layer.backward(grad)
return grad
def params_and_grads(self) -> Iterator[Tuple[Tensor, Tensor]]:
for layer in self.layers:
for name, param in layer.params.items():
grad = layer.grads[name]
yield param, grad
損失関数
import numpy as np
from joelnet.tensor import Tensor
class Loss:
def loss(self, predicted: Tensor, actual: Tensor) -> float:
raise NotImplementedError
def grad(self, predicted: Tensor, actual: Tensor) -> Tensor:
raise NotImplementedError
class MSE(Loss):
def loss(self, predicted: Tensor, actual: Tensor) -> float:
return np.sum((predicted - actual) ** 2)
def grad(self, predicted: Tensor, actual: Tensor) -> Tensor:
return 2 * (predicted - actual)
最適化アルゴリズム
from joelnet.nn import NeuralNet
class Optimizer:
def step(self, net: NeuralNet) -> None:
raise NotImplementedError
class SGD(Optimizer):
def __init__(self, lr: float = 0.01) -> None:
self.lr = lr
def step(self, net: NeuralNet) -> None:
for param, grad in net.params_and_grads():
param -= self.lr * grad
データ処理
from typing import Iterator, NamedTuple
import numpy as np
from joelnet.tensor import Tensor
Batch = NamedTuple("Batch", [("inputs", Tensor), ("targets", Tensor)])
class DataIterator:
def __call__(self, inputs: Tensor, targets: Tensor) -> Iterator[Batch]:
raise NotImplementedError
class BatchIterator(DataIterator):
def __init__(self, batch_size: int = 32, shuffle: bool = True) -> None:
self.batch_size = batch_size
self.shuffle = shuffle
def __call__(self, inputs: Tensor, targets: Tensor) -> Iterator[Batch]:
starts = np.arange(0, len(inputs), self.batch_size)
if self.shuffle:
np.random.shuffle(starts)
for start in starts:
end = start + self.batch_size
batch_inputs = inputs[start:end]
batch_targets = targets[start:end]
yield Batch(batch_inputs, batch_targets)
トレーニング処理
from joelnet.tensor import Tensor
from joelnet.nn import NeuralNet
from joelnet.loss import Loss, MSE
from joelnet.optim import Optimizer, SGD
from joelnet.data import DataIterator, BatchIterator
def train(net: NeuralNet,
inputs: Tensor,
targets: Tensor,
num_epochs: int = 5000,
iterator: DataIterator = BatchIterator(),
loss: Loss = MSE(),
optimizer: Optimizer = SGD()) -> None:
for epoch in range(num_epochs):
epoch_loss = 0.0
for batch in iterator(inputs, targets):
predicted = net.forward(batch.inputs)
epoch_loss += loss.loss(predicted, batch.targets)
grad = loss.grad(predicted, batch.targets)
net.backward(grad)
optimizer.step(net)
print(epoch, epoch_loss)
Fizz-Buzz問題の解決
問題の定義
- 1から100の数値を入力として受け取る
- 3で割り切れる場合は"fizz"
- 5で割り切れる場合は"buzz"
- 3と5の両方で割り切れる場合は"fizzbuzz"
- それ以外の場合は数値をそのまま出力
トレーニングと推論のコード
from typing import List
import numpy as np
import pickle
from joelnet.train import train
from joelnet.nn import NeuralNet
from joelnet.layers import Linear, Tanh
from joelnet.optim import SGD
def fizz_buzz_encode(x: int) -> List[int]:
if x % 15 == 0:
return [0, 0, 0, 1]
elif x % 5 == 0:
return [0, 0, 1, 0]
elif x % 3 == 0:
return [0, 1, 0, 0]
else:
return [1, 0, 0, 0]
def binary_encode(x: int) -> List[int]:
return [x >> i & 1 for i in range(10)]
inputs = np.array([
binary_encode(x)
for x in range(101, 1024)
])
targets = np.array([
fizz_buzz_encode(x)
for x in range(101, 1024)
])
net = NeuralNet([
Linear(input_size=10, output_size=50),
Tanh(),
Linear(input_size=50, output_size=4)
])
train(net,
inputs,
targets,
num_epochs=50000,
optimizer=SGD(lr=0.001))
print("save model")
with open('fizzbuzz.pkl', 'wb') as f:
pickle.dump(net, f)
with open('fizzbuzz.pkl', 'rb') as f:
loaded_net = pickle.load(f)
for x in range(1, 101):
predicted = loaded_net.forward(binary_encode(x))
predicted_idx = np.argmax(predicted)
actual_idx = np.argmax(fizz_buzz_encode(x))
labels = [str(x), "fizz", "buzz", "fizzbuzz"]
print(x, labels[predicted_idx], labels[actual_idx])