Merge branch 'hoel-bagard/torch_utils-BatchNormModifications'
This commit is contained in:
commit
90abb84710
2 changed files with 71 additions and 37 deletions
68
layers.py
68
layers.py
|
|
@ -2,36 +2,25 @@ from typing import Union, Tuple
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
import torch.nn as nn
|
import torch.nn as nn
|
||||||
import torch.nn.functional as F
|
|
||||||
|
|
||||||
from .utils.logger import DummyLogger
|
from .utils.logger import DummyLogger
|
||||||
|
|
||||||
|
|
||||||
class LayerInfo():
|
|
||||||
def __init__(self):
|
|
||||||
self.memory = 0.0
|
|
||||||
self.ops = 0.0
|
|
||||||
self.output = 0.0
|
|
||||||
|
|
||||||
|
|
||||||
class Layer(nn.Module):
|
class Layer(nn.Module):
|
||||||
# Default layer arguments
|
# Default layer arguments
|
||||||
ACTIVATION = F.leaky_relu
|
ACTIVATION = torch.nn.LeakyReLU
|
||||||
|
ACTIVATION_KWARGS = {"negative_slope": 0.1}
|
||||||
|
|
||||||
BATCH_NORM = True
|
USE_BATCH_NORM = True
|
||||||
BATCH_NORM_TRAINING = True
|
BATCH_NORM_TRAINING = True
|
||||||
BATCH_NORM_MOMENTUM = 0.01
|
BATCH_NORM_MOMENTUM = 0.01
|
||||||
|
|
||||||
IS_TRAINING = False
|
IS_TRAINING = False
|
||||||
METRICS = False
|
METRICS = False
|
||||||
VERBOSE = 0
|
|
||||||
LOGGER = DummyLogger()
|
LOGGER = DummyLogger()
|
||||||
|
|
||||||
def __init__(self, activation, batch_norm):
|
def __init__(self, activation):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.name = 'Layer'
|
|
||||||
self.info = LayerInfo()
|
|
||||||
|
|
||||||
# Preload default
|
# Preload default
|
||||||
if activation == 0:
|
if activation == 0:
|
||||||
activation = Layer.ACTIVATION
|
activation = Layer.ACTIVATION
|
||||||
|
|
@ -39,26 +28,27 @@ class Layer(nn.Module):
|
||||||
self.activation = activation()
|
self.activation = activation()
|
||||||
else:
|
else:
|
||||||
self.activation = activation
|
self.activation = activation
|
||||||
self.batch_norm = Layer.BATCH_NORM if batch_norm is None else batch_norm
|
self.batch_norm: torch.nn._BatchNorm
|
||||||
|
|
||||||
def forward(self, input_data: torch.Tensor) -> torch.Tensor:
|
def forward(self, input_data: torch.Tensor) -> torch.Tensor:
|
||||||
output = input_data
|
output = input_data
|
||||||
if self.activation is not None:
|
if self.activation:
|
||||||
output = self.activation(output)
|
output = self.activation(output)
|
||||||
if self.batch_norm is not None:
|
if self.batch_norm:
|
||||||
output = self.batch_norm(output)
|
output = self.batch_norm(output)
|
||||||
return output
|
return output
|
||||||
|
|
||||||
|
|
||||||
class Linear(Layer):
|
class Linear(Layer):
|
||||||
def __init__(self, in_channels: int, out_channels: int, activation=0, batch_norm=None, **kwargs):
|
def __init__(self, in_channels: int, out_channels: int, activation=0, use_batch_norm: bool = None, **kwargs):
|
||||||
super().__init__(activation, batch_norm)
|
super().__init__(activation)
|
||||||
|
|
||||||
self.fc = nn.Linear(in_channels, out_channels, bias=not self.batch_norm, **kwargs)
|
self.fc = nn.Linear(in_channels, out_channels, bias=not self.batch_norm, **kwargs)
|
||||||
|
use_batch_norm = Layer.USE_BATCH_NORM if use_batch_norm is None else use_batch_norm
|
||||||
self.batch_norm = nn.BatchNorm1d(
|
self.batch_norm = nn.BatchNorm1d(
|
||||||
out_channels,
|
out_channels,
|
||||||
momentum=Layer.BATCH_NORM_MOMENTUM,
|
momentum=Layer.BATCH_NORM_MOMENTUM,
|
||||||
track_running_stats=Layer.BATCH_NORM_TRAINING) if self.batch_norm else None
|
track_running_stats=Layer.BATCH_NORM_TRAINING) if use_batch_norm else None
|
||||||
|
|
||||||
def forward(self, input_data: torch.Tensor) -> torch.Tensor:
|
def forward(self, input_data: torch.Tensor) -> torch.Tensor:
|
||||||
return super().forward(self.fc(input_data))
|
return super().forward(self.fc(input_data))
|
||||||
|
|
@ -66,15 +56,16 @@ class Linear(Layer):
|
||||||
|
|
||||||
class Conv1d(Layer):
|
class Conv1d(Layer):
|
||||||
def __init__(self, in_channels: int, out_channels: int, kernel_size: int = 3,
|
def __init__(self, in_channels: int, out_channels: int, kernel_size: int = 3,
|
||||||
stride: Union[int, Tuple[int, int]] = 1, activation=0, batch_norm=None, **kwargs):
|
stride: Union[int, Tuple[int, int]] = 1, activation=0, use_batch_norm: bool = None, **kwargs):
|
||||||
super().__init__(activation, batch_norm)
|
super().__init__(activation)
|
||||||
|
|
||||||
self.conv = nn.Conv1d(in_channels, out_channels, kernel_size, stride=stride,
|
self.conv = nn.Conv1d(in_channels, out_channels, kernel_size, stride=stride,
|
||||||
bias=not self.batch_norm, **kwargs)
|
bias=not self.use_batch_norm, **kwargs)
|
||||||
|
use_batch_norm = Layer.USE_BATCH_NORM if use_batch_norm is None else use_batch_norm
|
||||||
self.batch_norm = nn.BatchNorm1d(
|
self.batch_norm = nn.BatchNorm1d(
|
||||||
out_channels,
|
out_channels,
|
||||||
momentum=Layer.BATCH_NORM_MOMENTUM,
|
momentum=Layer.BATCH_NORM_MOMENTUM,
|
||||||
track_running_stats=Layer.BATCH_NORM_TRAINING) if self.batch_norm else None
|
track_running_stats=Layer.BATCH_NORM_TRAINING) if use_batch_norm else None
|
||||||
|
|
||||||
def forward(self, input_data: torch.Tensor) -> torch.Tensor:
|
def forward(self, input_data: torch.Tensor) -> torch.Tensor:
|
||||||
return super().forward(self.conv(input_data))
|
return super().forward(self.conv(input_data))
|
||||||
|
|
@ -82,15 +73,16 @@ class Conv1d(Layer):
|
||||||
|
|
||||||
class Conv2d(Layer):
|
class Conv2d(Layer):
|
||||||
def __init__(self, in_channels: int, out_channels: int, kernel_size: int = 3,
|
def __init__(self, in_channels: int, out_channels: int, kernel_size: int = 3,
|
||||||
stride: Union[int, Tuple[int, int]] = 1, activation=0, batch_norm=None, **kwargs):
|
stride: Union[int, Tuple[int, int]] = 1, activation=0, use_batch_norm: bool = None, **kwargs):
|
||||||
super().__init__(activation, batch_norm)
|
super().__init__(activation, use_batch_norm)
|
||||||
|
|
||||||
self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride=stride,
|
self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride=stride,
|
||||||
bias=not self.batch_norm, **kwargs)
|
bias=not self.use_batch_norm, **kwargs)
|
||||||
|
use_batch_norm = Layer.USE_BATCH_NORM if use_batch_norm is None else use_batch_norm
|
||||||
self.batch_norm = nn.BatchNorm2d(
|
self.batch_norm = nn.BatchNorm2d(
|
||||||
out_channels,
|
out_channels,
|
||||||
momentum=Layer.BATCH_NORM_MOMENTUM,
|
momentum=Layer.BATCH_NORM_MOMENTUM,
|
||||||
track_running_stats=Layer.BATCH_NORM_TRAINING) if self.batch_norm else None
|
track_running_stats=Layer.BATCH_NORM_TRAINING) if self.use_batch_norm else None
|
||||||
|
|
||||||
def forward(self, input_data: torch.Tensor) -> torch.Tensor:
|
def forward(self, input_data: torch.Tensor) -> torch.Tensor:
|
||||||
return super().forward(self.conv(input_data))
|
return super().forward(self.conv(input_data))
|
||||||
|
|
@ -98,15 +90,16 @@ class Conv2d(Layer):
|
||||||
|
|
||||||
class Conv3d(Layer):
|
class Conv3d(Layer):
|
||||||
def __init__(self, in_channels: int, out_channels: int, kernel_size: int = 3,
|
def __init__(self, in_channels: int, out_channels: int, kernel_size: int = 3,
|
||||||
stride: Union[int, Tuple[int, int]] = 1, activation=0, batch_norm=None, **kwargs):
|
stride: Union[int, Tuple[int, int]] = 1, activation=0, use_batch_norm: bool = None, **kwargs):
|
||||||
super().__init__(activation, batch_norm)
|
super().__init__(activation)
|
||||||
|
|
||||||
self.conv = nn.Conv3d(in_channels, out_channels, kernel_size, stride=stride,
|
self.conv = nn.Conv3d(in_channels, out_channels, kernel_size, stride=stride,
|
||||||
bias=not self.batch_norm, **kwargs)
|
bias=not self.use_batch_norm, **kwargs)
|
||||||
|
use_batch_norm = Layer.USE_BATCH_NORM if use_batch_norm is None else use_batch_norm
|
||||||
self.batch_norm = nn.BatchNorm3d(
|
self.batch_norm = nn.BatchNorm3d(
|
||||||
out_channels,
|
out_channels,
|
||||||
momentum=Layer.BATCH_NORM_MOMENTUM,
|
momentum=Layer.BATCH_NORM_MOMENTUM,
|
||||||
track_running_stats=Layer.BATCH_NORM_TRAINING) if self.batch_norm else None
|
track_running_stats=Layer.BATCH_NORM_TRAINING) if use_batch_norm else None
|
||||||
|
|
||||||
def forward(self, input_data: torch.Tensor) -> torch.Tensor:
|
def forward(self, input_data: torch.Tensor) -> torch.Tensor:
|
||||||
return super().forward(self.conv(input_data))
|
return super().forward(self.conv(input_data))
|
||||||
|
|
@ -114,16 +107,17 @@ class Conv3d(Layer):
|
||||||
|
|
||||||
class Deconv2d(Layer):
|
class Deconv2d(Layer):
|
||||||
def __init__(self, in_channels: int, out_channels: int, kernel_size: int = 3,
|
def __init__(self, in_channels: int, out_channels: int, kernel_size: int = 3,
|
||||||
stride: Union[int, Tuple[int, int]] = 1, activation=0, batch_norm=None, **kwargs):
|
stride: Union[int, Tuple[int, int]] = 1, activation=0, use_batch_norm: bool = None, **kwargs):
|
||||||
super().__init__(activation, batch_norm)
|
super().__init__(activation)
|
||||||
|
|
||||||
self.deconv = nn.ConvTranspose2d(
|
self.deconv = nn.ConvTranspose2d(
|
||||||
in_channels, out_channels, kernel_size, stride=stride,
|
in_channels, out_channels, kernel_size, stride=stride,
|
||||||
bias=not self.batch_norm, **kwargs)
|
bias=not self.use_batch_norm, **kwargs)
|
||||||
|
use_batch_norm = Layer.USE_BATCH_NORM if use_batch_norm is None else use_batch_norm
|
||||||
self.batch_norm = nn.BatchNorm2d(
|
self.batch_norm = nn.BatchNorm2d(
|
||||||
out_channels,
|
out_channels,
|
||||||
momentum=Layer.BATCH_NORM_MOMENTUM,
|
momentum=Layer.BATCH_NORM_MOMENTUM,
|
||||||
track_running_stats=Layer.BATCH_NORM_TRAINING) if self.batch_norm else None
|
track_running_stats=Layer.BATCH_NORM_TRAINING) if use_batch_norm else None
|
||||||
|
|
||||||
def forward(self, input_data: torch.Tensor) -> torch.Tensor:
|
def forward(self, input_data: torch.Tensor) -> torch.Tensor:
|
||||||
return super().forward(self.deconv(input_data))
|
return super().forward(self.deconv(input_data))
|
||||||
|
|
|
||||||
40
transformer/vision_transformer.py
Normal file
40
transformer/vision_transformer.py
Normal file
|
|
@ -0,0 +1,40 @@
|
||||||
|
import torch
|
||||||
|
import torch.nn as nn
|
||||||
|
|
||||||
|
|
||||||
|
class Attention(nn.Module):
|
||||||
|
def __init__(self, dim: int, head_count: int = None, qkv_bias: bool = False, qk_scale: float = None,
|
||||||
|
attention_drop: float = None, projection_drop: float = None):
|
||||||
|
super().__init__()
|
||||||
|
self.head_count = head_count
|
||||||
|
head_dim = dim // head_count
|
||||||
|
self.scale = qk_scale or head_dim ** -0.5
|
||||||
|
|
||||||
|
self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
|
||||||
|
self.attention_drop = nn.Dropout(
|
||||||
|
attention_drop if attention_drop is not None else VisionTransformer.ATTENTION_DROP)
|
||||||
|
self.projector = nn.Linear(dim, dim)
|
||||||
|
self.projection_drop = nn.Dropout(
|
||||||
|
projection_drop if projection_drop is not None else VisionTransformer.PROJECTION_DROP)
|
||||||
|
|
||||||
|
def foward(self, input_data: torch.Tensor) -> torch.Tensor:
|
||||||
|
batch_size, sequence_length, channel_count = input_data.shape
|
||||||
|
qkv = self.qkv(input_data).reshape(
|
||||||
|
batch_size, sequence_length, 3, self.head_count, channel_count // self.head_count).permute(
|
||||||
|
2, 0, 3, 1, 4)
|
||||||
|
# (output shape : 3, batch_size, head_ctoun, sequence_lenght, channel_count / head_count)
|
||||||
|
query, key, value = qkv[0], qkv[1], qkv[2]
|
||||||
|
attention = self.attention_drop(((query @ key.transpose(-2, -1)) * self.scale).softmax(dim=-1))
|
||||||
|
return self.projection_drop(self.projector(
|
||||||
|
(attention @ value).transpose(1, 2).reshape(batch_size, sequence_length, channel_count)))
|
||||||
|
|
||||||
|
|
||||||
|
class VisionTransformer(nn.Module):
|
||||||
|
HEAD_COUNT = 8
|
||||||
|
MLP_RATIO = 4.0
|
||||||
|
QKV_BIAS = False
|
||||||
|
ATTENTION_DROP = 0.0
|
||||||
|
PROJECTION_DROP = 0.0
|
||||||
|
|
||||||
|
def __init__(self, dim: int, head_count: int, mlp_ratio: float = None,
|
||||||
|
qkv_bias: bool = None
|
||||||
Loading…
Add table
Add a link
Reference in a new issue