Fix MNIST test data loading, add named buffer summary
This commit is contained in:
parent
ced13a4351
commit
7db99ffa51
3 changed files with 10 additions and 3 deletions
|
|
@ -61,6 +61,6 @@ def load_data(data_path: str, flatten: bool = False) -> Tuple[np.ndarray, np.nda
|
|||
train_images = load_image_file(os.path.join(data_path, train_images_filename), flatten=flatten)
|
||||
train_labels = load_label_file(os.path.join(data_path, train_labels_filename))
|
||||
test_images = load_image_file(os.path.join(data_path, test_images_filename), flatten=flatten)
|
||||
test_labels = load_label_file(os.path.join(data_path, train_labels_filename))
|
||||
test_labels = load_label_file(os.path.join(data_path, test_labels_filename))
|
||||
|
||||
return train_images, train_labels, test_images, test_labels
|
||||
|
|
|
|||
|
|
@ -50,7 +50,9 @@ class Conv2d(Layer):
|
|||
|
||||
self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride, **kwargs)
|
||||
self.batch_norm = nn.BatchNorm2d(
|
||||
out_channels, eps=0.001, momentum=Layer.BATCH_NORM_MOMENTUM) if self.batch_norm else None
|
||||
out_channels,
|
||||
momentum=Layer.BATCH_NORM_MOMENTUM,
|
||||
track_running_stats=not Layer.BATCH_NORM_TRAINING) if self.batch_norm else None
|
||||
|
||||
def forward(self, input_data: torch.Tensor) -> torch.Tensor:
|
||||
return super().forward(self.conv(input_data))
|
||||
|
|
@ -62,7 +64,9 @@ class Linear(Layer):
|
|||
|
||||
self.fc = nn.Linear(in_channels, out_channels, **kwargs)
|
||||
self.batch_norm = nn.BatchNorm1d(
|
||||
out_channels, eps=0.001, momentum=Layer.BATCH_NORM_MOMENTUM) if self.batch_norm else None
|
||||
out_channels,
|
||||
momentum=Layer.BATCH_NORM_MOMENTUM,
|
||||
track_running_stats=not Layer.BATCH_NORM_TRAINING) if self.batch_norm else None
|
||||
|
||||
def forward(self, input_data: torch.Tensor) -> torch.Tensor:
|
||||
return super().forward(self.fc(input_data))
|
||||
|
|
|
|||
3
train.py
3
train.py
|
|
@ -20,6 +20,9 @@ def parameter_summary(network: torch.nn.Module) -> List[Tuple[str, Tuple[int], s
|
|||
for name, param in network.named_parameters():
|
||||
numpy = param.detach().cpu().numpy()
|
||||
parameter_info.append((name, numpy.shape, human_size(numpy.size * numpy.dtype.itemsize)))
|
||||
for name, param in network.named_buffers():
|
||||
numpy = param.detach().cpu().numpy()
|
||||
parameter_info.append((name, numpy.shape, human_size(numpy.size * numpy.dtype.itemsize)))
|
||||
return parameter_info
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue