Avoid use_batch_norm as layers instance variable
This commit is contained in:
parent
fe11f3e6d5
commit
770a9a4f82
2 changed files with 57 additions and 14 deletions
40
transformer/vision_transformer.py
Normal file
40
transformer/vision_transformer.py
Normal file
|
|
@ -0,0 +1,40 @@
|
|||
import torch
|
||||
import torch.nn as nn
|
||||
|
||||
|
||||
class Attention(nn.Module):
|
||||
def __init__(self, dim: int, head_count: int = None, qkv_bias: bool = False, qk_scale: float = None,
|
||||
attention_drop: float = None, projection_drop: float = None):
|
||||
super().__init__()
|
||||
self.head_count = head_count
|
||||
head_dim = dim // head_count
|
||||
self.scale = qk_scale or head_dim ** -0.5
|
||||
|
||||
self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
|
||||
self.attention_drop = nn.Dropout(
|
||||
attention_drop if attention_drop is not None else VisionTransformer.ATTENTION_DROP)
|
||||
self.projector = nn.Linear(dim, dim)
|
||||
self.projection_drop = nn.Dropout(
|
||||
projection_drop if projection_drop is not None else VisionTransformer.PROJECTION_DROP)
|
||||
|
||||
def foward(self, input_data: torch.Tensor) -> torch.Tensor:
|
||||
batch_size, sequence_length, channel_count = input_data.shape
|
||||
qkv = self.qkv(input_data).reshape(
|
||||
batch_size, sequence_length, 3, self.head_count, channel_count // self.head_count).permute(
|
||||
2, 0, 3, 1, 4)
|
||||
# (output shape : 3, batch_size, head_ctoun, sequence_lenght, channel_count / head_count)
|
||||
query, key, value = qkv[0], qkv[1], qkv[2]
|
||||
attention = self.attention_drop(((query @ key.transpose(-2, -1)) * self.scale).softmax(dim=-1))
|
||||
return self.projection_drop(self.projector(
|
||||
(attention @ value).transpose(1, 2).reshape(batch_size, sequence_length, channel_count)))
|
||||
|
||||
|
||||
class VisionTransformer(nn.Module):
|
||||
HEAD_COUNT = 8
|
||||
MLP_RATIO = 4.0
|
||||
QKV_BIAS = False
|
||||
ATTENTION_DROP = 0.0
|
||||
PROJECTION_DROP = 0.0
|
||||
|
||||
def __init__(self, dim: int, head_count: int, mlp_ratio: float = None,
|
||||
qkv_bias: bool = None
|
||||
Loading…
Add table
Add a link
Reference in a new issue