Vision Transformer

2021-05-22 01:18:39 +09:00 · 2021-05-22 01:18:39 +09:00 · 06db437aa4
commit 06db437aa4
parent 90abb84710
2 changed files with 200 additions and 30 deletions
--- a/layers.py
+++ b/layers.py
@ -38,20 +38,35 @@ class Layer(nn.Module):
            output = self.batch_norm(output)
        return output

+    @staticmethod
+    def add_weight_decay(module: nn.Module, weight_decay: float, exclude=()):
+        decay = []
+        no_decay = []
+        for name, param in module.named_parameters():
+            if not param.requires_grad:
+                continue
+            if len(param.shape) == 1 or name.endswith('.bias') or name in exclude:
+                no_decay.append(param)
+            else:
+                decay.append(param)
+        return [
+            {'params': no_decay, 'weight_decay': 0.0},
+            {'params': decay, 'weight_decay': weight_decay}]
+

 class Linear(Layer):
    def __init__(self, in_channels: int, out_channels: int, activation=0, use_batch_norm: bool = None, **kwargs):
        super().__init__(activation)

-        self.fc = nn.Linear(in_channels, out_channels, bias=not self.batch_norm, **kwargs)
        use_batch_norm = Layer.USE_BATCH_NORM if use_batch_norm is None else use_batch_norm
+        self.linear = nn.Linear(in_channels, out_channels, bias=not use_batch_norm, **kwargs)
        self.batch_norm = nn.BatchNorm1d(
            out_channels,
            momentum=Layer.BATCH_NORM_MOMENTUM,
            track_running_stats=Layer.BATCH_NORM_TRAINING) if use_batch_norm else None

    def forward(self, input_data: torch.Tensor) -> torch.Tensor:
-        return super().forward(self.fc(input_data))
+        return super().forward(self.linear(input_data))


 class Conv1d(Layer):
@ -59,9 +74,9 @@ class Conv1d(Layer):
                 stride: Union[int, Tuple[int, int]] = 1, activation=0, use_batch_norm: bool = None, **kwargs):
        super().__init__(activation)

-        self.conv = nn.Conv1d(in_channels, out_channels, kernel_size, stride=stride,
-                              bias=not self.use_batch_norm, **kwargs)
        use_batch_norm = Layer.USE_BATCH_NORM if use_batch_norm is None else use_batch_norm
+        self.conv = nn.Conv1d(in_channels, out_channels, kernel_size, stride=stride,
+                              bias=use_batch_norm, **kwargs)
        self.batch_norm = nn.BatchNorm1d(
            out_channels,
            momentum=Layer.BATCH_NORM_MOMENTUM,
@ -72,30 +87,30 @@ class Conv1d(Layer):


 class Conv2d(Layer):
-    def __init__(self, in_channels: int, out_channels: int, kernel_size: int = 3,
-                 stride: Union[int, Tuple[int, int]] = 1, activation=0, use_batch_norm: bool = None, **kwargs):
-        super().__init__(activation, use_batch_norm)
+    def __init__(self, in_channels: int, out_channels: int, kernel_size: Union[int, tuple[int, int]] = 3,
+                 stride: Union[int, tuple[int, int]] = 1, activation=0, use_batch_norm: bool = None, **kwargs):
+        super().__init__(activation)

-        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride=stride,
-                              bias=not self.use_batch_norm, **kwargs)
        use_batch_norm = Layer.USE_BATCH_NORM if use_batch_norm is None else use_batch_norm
+        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride=stride,
+                              bias=not use_batch_norm, **kwargs)
        self.batch_norm = nn.BatchNorm2d(
            out_channels,
            momentum=Layer.BATCH_NORM_MOMENTUM,
-            track_running_stats=Layer.BATCH_NORM_TRAINING) if self.use_batch_norm else None
+            track_running_stats=Layer.BATCH_NORM_TRAINING) if use_batch_norm else None

    def forward(self, input_data: torch.Tensor) -> torch.Tensor:
        return super().forward(self.conv(input_data))


 class Conv3d(Layer):
-    def __init__(self, in_channels: int, out_channels: int, kernel_size: int = 3,
+    def __init__(self, in_channels: int, out_channels: int, kernel_size: Union[int, tuple[int, int, int]] = 3,
                 stride: Union[int, Tuple[int, int]] = 1, activation=0, use_batch_norm: bool = None, **kwargs):
        super().__init__(activation)

-        self.conv = nn.Conv3d(in_channels, out_channels, kernel_size, stride=stride,
-                              bias=not self.use_batch_norm, **kwargs)
        use_batch_norm = Layer.USE_BATCH_NORM if use_batch_norm is None else use_batch_norm
+        self.conv = nn.Conv3d(in_channels, out_channels, kernel_size, stride=stride,
+                              bias=use_batch_norm, **kwargs)
        self.batch_norm = nn.BatchNorm3d(
            out_channels,
            momentum=Layer.BATCH_NORM_MOMENTUM,
@ -110,10 +125,10 @@ class Deconv2d(Layer):
                 stride: Union[int, Tuple[int, int]] = 1, activation=0, use_batch_norm: bool = None, **kwargs):
        super().__init__(activation)

+        use_batch_norm = Layer.USE_BATCH_NORM if use_batch_norm is None else use_batch_norm
        self.deconv = nn.ConvTranspose2d(
            in_channels, out_channels, kernel_size, stride=stride,
-            bias=not self.use_batch_norm, **kwargs)
-        use_batch_norm = Layer.USE_BATCH_NORM if use_batch_norm is None else use_batch_norm
+            bias=not use_batch_norm, **kwargs)
        self.batch_norm = nn.BatchNorm2d(
            out_channels,
            momentum=Layer.BATCH_NORM_MOMENTUM,
@ -121,3 +136,18 @@ class Deconv2d(Layer):

    def forward(self, input_data: torch.Tensor) -> torch.Tensor:
        return super().forward(self.deconv(input_data))
+
+
+class DropPath(nn.Module):
+    def __init__(self, drop_prob=None):
+        super().__init__()
+        self.drop_prob = drop_prob
+
+    def forward(self, input_data: torch.Tensor) -> torch.Tensor:
+        if self.drop_prob == 0.0:
+            return input_data
+        keep_prob = 1 - self.drop_prob
+        shape = (input_data.shape[0],) + (1,) * (input_data.ndim - 1)
+        random_tensor = keep_prob + torch.rand(shape, dtype=input_data.dtype, device=input_data.device)
+        random_tensor.floor_()  # binarize
+        return input_data.div(keep_prob) * random_tensor