From 268429fa1a85659890db4ddbf54186edae8a4863 Mon Sep 17 00:00:00 2001 From: Corentin Risselin Date: Tue, 31 Mar 2020 13:46:01 +0900 Subject: [PATCH] Layers, batch generator, memory --- .gitignore | 1 + layers.py | 59 ++++++++++++ train.py | 20 +++++ utils/batch_generator.py | 189 +++++++++++++++++++++++++++++++++++++++ utils/memory.py | 8 ++ 5 files changed, 277 insertions(+) create mode 100644 .gitignore create mode 100644 layers.py create mode 100644 train.py create mode 100644 utils/batch_generator.py create mode 100644 utils/memory.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..7e99e36 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +*.pyc \ No newline at end of file diff --git a/layers.py b/layers.py new file mode 100644 index 0000000..728f026 --- /dev/null +++ b/layers.py @@ -0,0 +1,59 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F + +from .utils.logger import DummyLogger + + +class LayerInfo(): + def __init__(self): + self.memory = 0.0 + self.ops = 0.0 + self.output = 0.0 + + +class Layer(nn.Module): + # Default layer arguments + ACTIVATION = F.leaky_relu + + BATCH_NORM = True + BATCH_NORM_TRAINING = False + BATCH_NORM_DECAY = 0.95 + + REGULARIZER = None + + PADDING = 'SAME' + + IS_TRAINING = False + METRICS = False + VERBOSE = 0 + LOGGER = DummyLogger() + + def __init__(self, activation, batch_norm): + super(Layer, self).__init__() + self.name = 'Layer' + self.info = LayerInfo() + + # Preload default + self.activation = Layer.ACTIVATION if activation == 0 else activation + self.batch_norm = Layer.BATCH_NORM if batch_norm is None else batch_norm + + def forward(self, input_data: torch.Tensor) -> torch.Tensor: + output = input_data + if self.activation is not None: + output = self.activation(output) + if self.batch_norm is not None: + output = self.batch_norm(output) + return output + + +class Conv2d(Layer): + def __init__(self, in_channels: int, out_channels: int, kernel_size: int, stride: int = 1, + activation=0, batch_norm=None, **kwargs): + super(Conv2d, self).__init__(activation, batch_norm) + + self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride, **kwargs) + self.batch_norm = nn.BatchNorm2d(out_channels, eps=0.001, momentum=0.01) if self.batch_norm else None + + def forward(self, input_data: torch.Tensor) -> torch.Tensor: + return super().forward(self.conv(input_data)) diff --git a/train.py b/train.py new file mode 100644 index 0000000..157704d --- /dev/null +++ b/train.py @@ -0,0 +1,20 @@ +from typing import List, Tuple + +import torch + +from .utils.memory import human_size + + +def parameter_summary(network: torch.nn.Module) -> List[Tuple[str, Tuple[int], str]]: + """ Returns network parameter + + Returns a list of tuple: name, shape (tuple os ints), size (string) + + Args: + network (torch.nn.Module): network to parse + """ + parameter_info = [] + for name, param in network.named_parameters(): + numpy = param.detach().cpu().numpy() + parameter_info.append((name, numpy.shape, human_size(numpy.size * numpy.dtype.itemsize))) + return parameter_info diff --git a/utils/batch_generator.py b/utils/batch_generator.py new file mode 100644 index 0000000..2b72e93 --- /dev/null +++ b/utils/batch_generator.py @@ -0,0 +1,189 @@ +import math +import os +from typing import Optional, Tuple + +import numpy as np + + +class BatchGenerator: + + def __init__(self, data, label, batch_size, data_processor=None, label_processor=None, + shuffle=True, preload=False, save=None, left_right_flip=False): + self.batch_size = batch_size + self.shuffle = shuffle + self.left_right_flip = left_right_flip + + if not preload: + self.data_processor = data_processor + self.label_processor = label_processor + self.data = data + self.label = label + else: + self.data_processor = None + self.label_processor = None + + if save and os.path.exists(save + '_data.npy'): + self.data = np.load(save + '_data.npy', allow_pickle=True) + self.label = np.load(save + '_label.npy', allow_pickle=True) + else: + if data_processor: + self.data = np.asarray([data_processor(entry) for entry in data]) + else: + self.data = data + if label_processor: + self.label = np.asarray([label_processor(entry) for entry in label]) + else: + self.label = label + if save: + np.save(save + '_data.npy', self.data, allow_pickle=True) + np.save(save + '_label.npy', self.label, allow_pickle=True) + + self.step_per_epoch = math.ceil(len(self.data) / batch_size) + + self.epoch = 0 + self.global_step = -1 + self.step = -1 + + self.batch_data: Optional[np.ndarray] = None + self.batch_label: Optional[np.ndarray] = None + + self.index_list = np.arange(len(self.data)) + if shuffle: + np.random.shuffle(self.index_list) + + def next_batch(self) -> Tuple[np.ndarray, np.ndarray]: + if self.step >= self.step_per_epoch - 1: # step start at 0 + self.step = 0 + self.epoch += 1 + if self.shuffle: + np.random.shuffle(self.index_list) + else: + self.step += 1 + + self.global_step += 1 + # Loading data + if self.data_processor is not None: + self.batch_data = [] + for entry in self.index_list[self.step * self.batch_size:(self.step + 1) * self.batch_size]: + self.batch_data.append(self.data_processor(self.data[entry])) + self.batch_data = np.asarray(self.batch_data) + else: + self.batch_data = self.data[ + self.index_list[self.step * self.batch_size: (self.step + 1) * self.batch_size]] + # Loading label + if self.label_processor is not None: + self.batch_label = [] + for entry in self.index_list[self.step * self.batch_size:(self.step + 1) * self.batch_size]: + self.batch_label.append(self.label_processor(self.label[entry])) + self.batch_label = np.asarray(self.batch_label) + else: + self.batch_label = self.label[ + self.index_list[self.step * self.batch_size: (self.step + 1) * self.batch_size]] + # print('next_batch : epoch {}, step {}/{}, data {}, label {}'.format( + # self.epoch, self.step, self.step_per_epoch - 1, self.batch_data.shape, self.batch_label.shape)) + if self.left_right_flip and np.random.uniform() > 0.5: + self.batch_data = self.batch_data[:, :, ::-1] + self.batch_label = self.batch_label[:, :, ::-1] + return self.batch_data, self.batch_label + + +class SequenceGenerator: + + def __init__(self, data, label, sequence_size, batch_size, data_processor=None, label_processor=None, + preload=False, shuffle=True, save=None): + self.sequence_size = sequence_size + self.batch_size = batch_size + self.shuffle = shuffle + + self.index_list = [] + for sequence_index in range(len(data)): + start_indices = np.expand_dims( + np.arange(len(data[sequence_index]) - sequence_size, dtype=np.uint8), + axis=-1) + start_indices = np.insert(start_indices, 0, sequence_index, axis=1) + self.index_list.append(start_indices) + self.index_list = np.concatenate(self.index_list, axis=0) + self.step_per_epoch = math.ceil(len(self.index_list) / batch_size) + + if not preload: + self.data_processor = data_processor + self.label_processor = label_processor + self.data = data + self.label = label + else: + self.data_processor = None + self.label_processor = None + + if save and os.path.exists(save + '_data.npy'): + self.data = np.load(save + '_data.npy', allow_pickle=True) + self.label = np.load(save + '_label.npy', allow_pickle=True) + else: + if data_processor: + self.data = np.asarray( + [np.asarray([data_processor(entry) for entry in serie]) for serie in data]) + else: + self.data = data + if label_processor: + self.label = np.asarray( + [np.asarray([label_processor(entry) for entry in serie]) for serie in label]) + else: + self.label = label + + if save: + np.save(save + '_data.npy', self.data, allow_pickle=True) + np.save(save + '_label.npy', self.label, allow_pickle=True) + + self.epoch = 0 + self.global_step = -1 + self.step = -1 + + self.batch_data: Optional[np.ndarray] = None + self.batch_label: Optional[np.ndarray] = None + + if shuffle: + np.random.shuffle(self.index_list) + + def next_batch(self) -> Tuple[np.ndarray, np.ndarray]: + if self.step >= self.step_per_epoch - 1: # step start at 0 + self.step = 0 + self.epoch += 1 + if self.shuffle: + np.random.shuffle(self.index_list) + else: + self.step += 1 + + self.global_step += 1 + # Loading data + if self.data_processor is not None: + self.batch_data = [] + for sequence_index, start_index in self.index_list[ + self.step * self.batch_size:(self.step + 1) * self.batch_size]: + self.batch_data.append( + [self.data_processor(input_data) + for input_data in self.data[sequence_index][start_index: start_index + self.sequence_size]]) + self.batch_data = np.asarray(self.batch_data) + else: + self.batch_data = [] + for sequence_index, start_index in self.index_list[ + self.step * self.batch_size:(self.step + 1) * self.batch_size]: + self.batch_data.append( + self.data[sequence_index][start_index: start_index + self.sequence_size]) + self.batch_data = np.asarray(self.batch_data) + # Loading label + if self.label_processor is not None: + self.batch_label = [] + for sequence_index, start_index in self.index_list[ + self.step * self.batch_size:(self.step + 1) * self.batch_size]: + self.batch_label.append( + [self.label_processor(input_data) + for input_data in self.label[sequence_index][start_index: start_index + self.sequence_size]]) + self.batch_label = np.asarray(self.batch_label) + else: + self.batch_label = [] + for sequence_index, start_index in self.index_list[ + self.step * self.batch_size:(self.step + 1) * self.batch_size]: + self.batch_label.append( + self.label[sequence_index][start_index: start_index + self.sequence_size]) + self.batch_label = np.asarray(self.batch_label) + + return self.batch_data, self.batch_label diff --git a/utils/memory.py b/utils/memory.py new file mode 100644 index 0000000..be26ffc --- /dev/null +++ b/utils/memory.py @@ -0,0 +1,8 @@ +def human_size(byte_count: int) -> str: + """Output byte amount in human readable format""" + amount = float(byte_count) + for unit in ['', 'Ki', 'Mi', 'Gi', 'Ti', 'Pi', 'Ei', 'Zi', 'Yi']: + if amount < 1024.0: + break + amount /= 1024.0 + return f'{amount:.2f}{unit}B'