diff --git a/dataset/mnist.py b/dataset/mnist.py index 4b8c464..2b09b9e 100644 --- a/dataset/mnist.py +++ b/dataset/mnist.py @@ -61,6 +61,6 @@ def load_data(data_path: str, flatten: bool = False) -> Tuple[np.ndarray, np.nda train_images = load_image_file(os.path.join(data_path, train_images_filename), flatten=flatten) train_labels = load_label_file(os.path.join(data_path, train_labels_filename)) test_images = load_image_file(os.path.join(data_path, test_images_filename), flatten=flatten) - test_labels = load_label_file(os.path.join(data_path, train_labels_filename)) + test_labels = load_label_file(os.path.join(data_path, test_labels_filename)) return train_images, train_labels, test_images, test_labels diff --git a/layers.py b/layers.py index 2300e3c..4648b70 100644 --- a/layers.py +++ b/layers.py @@ -50,7 +50,9 @@ class Conv2d(Layer): self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride, **kwargs) self.batch_norm = nn.BatchNorm2d( - out_channels, eps=0.001, momentum=Layer.BATCH_NORM_MOMENTUM) if self.batch_norm else None + out_channels, + momentum=Layer.BATCH_NORM_MOMENTUM, + track_running_stats=not Layer.BATCH_NORM_TRAINING) if self.batch_norm else None def forward(self, input_data: torch.Tensor) -> torch.Tensor: return super().forward(self.conv(input_data)) @@ -62,7 +64,9 @@ class Linear(Layer): self.fc = nn.Linear(in_channels, out_channels, **kwargs) self.batch_norm = nn.BatchNorm1d( - out_channels, eps=0.001, momentum=Layer.BATCH_NORM_MOMENTUM) if self.batch_norm else None + out_channels, + momentum=Layer.BATCH_NORM_MOMENTUM, + track_running_stats=not Layer.BATCH_NORM_TRAINING) if self.batch_norm else None def forward(self, input_data: torch.Tensor) -> torch.Tensor: return super().forward(self.fc(input_data)) diff --git a/train.py b/train.py index 6d6b256..bfd324d 100644 --- a/train.py +++ b/train.py @@ -20,6 +20,9 @@ def parameter_summary(network: torch.nn.Module) -> List[Tuple[str, Tuple[int], s for name, param in network.named_parameters(): numpy = param.detach().cpu().numpy() parameter_info.append((name, numpy.shape, human_size(numpy.size * numpy.dtype.itemsize))) + for name, param in network.named_buffers(): + numpy = param.detach().cpu().numpy() + parameter_info.append((name, numpy.shape, human_size(numpy.size * numpy.dtype.itemsize))) return parameter_info