Fix MNIST test data loading, add named buffer summary

This commit is contained in:
Corentin Risselin 2020-04-17 12:08:16 +09:00
commit 7db99ffa51
3 changed files with 10 additions and 3 deletions

View file

@ -61,6 +61,6 @@ def load_data(data_path: str, flatten: bool = False) -> Tuple[np.ndarray, np.nda
train_images = load_image_file(os.path.join(data_path, train_images_filename), flatten=flatten)
train_labels = load_label_file(os.path.join(data_path, train_labels_filename))
test_images = load_image_file(os.path.join(data_path, test_images_filename), flatten=flatten)
test_labels = load_label_file(os.path.join(data_path, train_labels_filename))
test_labels = load_label_file(os.path.join(data_path, test_labels_filename))
return train_images, train_labels, test_images, test_labels

View file

@ -50,7 +50,9 @@ class Conv2d(Layer):
self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride, **kwargs)
self.batch_norm = nn.BatchNorm2d(
out_channels, eps=0.001, momentum=Layer.BATCH_NORM_MOMENTUM) if self.batch_norm else None
out_channels,
momentum=Layer.BATCH_NORM_MOMENTUM,
track_running_stats=not Layer.BATCH_NORM_TRAINING) if self.batch_norm else None
def forward(self, input_data: torch.Tensor) -> torch.Tensor:
return super().forward(self.conv(input_data))
@ -62,7 +64,9 @@ class Linear(Layer):
self.fc = nn.Linear(in_channels, out_channels, **kwargs)
self.batch_norm = nn.BatchNorm1d(
out_channels, eps=0.001, momentum=Layer.BATCH_NORM_MOMENTUM) if self.batch_norm else None
out_channels,
momentum=Layer.BATCH_NORM_MOMENTUM,
track_running_stats=not Layer.BATCH_NORM_TRAINING) if self.batch_norm else None
def forward(self, input_data: torch.Tensor) -> torch.Tensor:
return super().forward(self.fc(input_data))

View file

@ -20,6 +20,9 @@ def parameter_summary(network: torch.nn.Module) -> List[Tuple[str, Tuple[int], s
for name, param in network.named_parameters():
numpy = param.detach().cpu().numpy()
parameter_info.append((name, numpy.shape, human_size(numpy.size * numpy.dtype.itemsize)))
for name, param in network.named_buffers():
numpy = param.detach().cpu().numpy()
parameter_info.append((name, numpy.shape, human_size(numpy.size * numpy.dtype.itemsize)))
return parameter_info