From 268429fa1a85659890db4ddbf54186edae8a4863 Mon Sep 17 00:00:00 2001
From: Corentin Risselin <corentin-pro@mail.com>
Date: Tue, 31 Mar 2020 13:46:01 +0900
Subject: [PATCH] Layers, batch generator, memory

---
 .gitignore               |   1 +
 layers.py                |  59 ++++++++++++
 train.py                 |  20 +++++
 utils/batch_generator.py | 189 +++++++++++++++++++++++++++++++++++++++
 utils/memory.py          |   8 ++
 5 files changed, 277 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 layers.py
 create mode 100644 train.py
 create mode 100644 utils/batch_generator.py
 create mode 100644 utils/memory.py

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..7e99e36
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+*.pyc
\ No newline at end of file
diff --git a/layers.py b/layers.py
new file mode 100644
index 0000000..728f026
--- /dev/null
+++ b/layers.py
@@ -0,0 +1,59 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from .utils.logger import DummyLogger
+
+
+class LayerInfo():
+    def __init__(self):
+        self.memory = 0.0
+        self.ops = 0.0
+        self.output = 0.0
+
+
+class Layer(nn.Module):
+    # Default layer arguments
+    ACTIVATION = F.leaky_relu
+
+    BATCH_NORM = True
+    BATCH_NORM_TRAINING = False
+    BATCH_NORM_DECAY = 0.95
+
+    REGULARIZER = None
+
+    PADDING = 'SAME'
+
+    IS_TRAINING = False
+    METRICS = False
+    VERBOSE = 0
+    LOGGER = DummyLogger()
+
+    def __init__(self, activation, batch_norm):
+        super(Layer, self).__init__()
+        self.name = 'Layer'
+        self.info = LayerInfo()
+
+        # Preload default
+        self.activation = Layer.ACTIVATION if activation == 0 else activation
+        self.batch_norm = Layer.BATCH_NORM if batch_norm is None else batch_norm
+
+    def forward(self, input_data: torch.Tensor) -> torch.Tensor:
+        output = input_data
+        if self.activation is not None:
+            output = self.activation(output)
+        if self.batch_norm is not None:
+            output = self.batch_norm(output)
+        return output
+
+
+class Conv2d(Layer):
+    def __init__(self, in_channels: int, out_channels: int, kernel_size: int, stride: int = 1,
+                 activation=0, batch_norm=None, **kwargs):
+        super(Conv2d, self).__init__(activation, batch_norm)
+
+        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride, **kwargs)
+        self.batch_norm = nn.BatchNorm2d(out_channels, eps=0.001, momentum=0.01) if self.batch_norm else None
+
+    def forward(self, input_data: torch.Tensor) -> torch.Tensor:
+        return super().forward(self.conv(input_data))
diff --git a/train.py b/train.py
new file mode 100644
index 0000000..157704d
--- /dev/null
+++ b/train.py
@@ -0,0 +1,20 @@
+from typing import List, Tuple
+
+import torch
+
+from .utils.memory import human_size
+
+
+def parameter_summary(network: torch.nn.Module) -> List[Tuple[str, Tuple[int], str]]:
+    """ Returns network parameter
+
+    Returns a list of tuple: name, shape (tuple os ints), size (string)
+
+    Args:
+        network (torch.nn.Module): network to parse
+    """
+    parameter_info = []
+    for name, param in network.named_parameters():
+        numpy = param.detach().cpu().numpy()
+        parameter_info.append((name, numpy.shape, human_size(numpy.size * numpy.dtype.itemsize)))
+    return parameter_info
diff --git a/utils/batch_generator.py b/utils/batch_generator.py
new file mode 100644
index 0000000..2b72e93
--- /dev/null
+++ b/utils/batch_generator.py
@@ -0,0 +1,189 @@
+import math
+import os
+from typing import Optional, Tuple
+
+import numpy as np
+
+
+class BatchGenerator:
+
+    def __init__(self, data, label, batch_size, data_processor=None, label_processor=None,
+                 shuffle=True, preload=False, save=None, left_right_flip=False):
+        self.batch_size = batch_size
+        self.shuffle = shuffle
+        self.left_right_flip = left_right_flip
+
+        if not preload:
+            self.data_processor = data_processor
+            self.label_processor = label_processor
+            self.data = data
+            self.label = label
+        else:
+            self.data_processor = None
+            self.label_processor = None
+
+            if save and os.path.exists(save + '_data.npy'):
+                self.data = np.load(save + '_data.npy', allow_pickle=True)
+                self.label = np.load(save + '_label.npy', allow_pickle=True)
+            else:
+                if data_processor:
+                    self.data = np.asarray([data_processor(entry) for entry in data])
+                else:
+                    self.data = data
+                if label_processor:
+                    self.label = np.asarray([label_processor(entry) for entry in label])
+                else:
+                    self.label = label
+            if save:
+                np.save(save + '_data.npy', self.data, allow_pickle=True)
+                np.save(save + '_label.npy', self.label, allow_pickle=True)
+
+        self.step_per_epoch = math.ceil(len(self.data) / batch_size)
+
+        self.epoch = 0
+        self.global_step = -1
+        self.step = -1
+
+        self.batch_data: Optional[np.ndarray] = None
+        self.batch_label: Optional[np.ndarray] = None
+
+        self.index_list = np.arange(len(self.data))
+        if shuffle:
+            np.random.shuffle(self.index_list)
+
+    def next_batch(self) -> Tuple[np.ndarray, np.ndarray]:
+        if self.step >= self.step_per_epoch - 1:  # step start at 0
+            self.step = 0
+            self.epoch += 1
+            if self.shuffle:
+                np.random.shuffle(self.index_list)
+        else:
+            self.step += 1
+
+        self.global_step += 1
+        # Loading data
+        if self.data_processor is not None:
+            self.batch_data = []
+            for entry in self.index_list[self.step * self.batch_size:(self.step + 1) * self.batch_size]:
+                self.batch_data.append(self.data_processor(self.data[entry]))
+            self.batch_data = np.asarray(self.batch_data)
+        else:
+            self.batch_data = self.data[
+                self.index_list[self.step * self.batch_size: (self.step + 1) * self.batch_size]]
+        # Loading label
+        if self.label_processor is not None:
+            self.batch_label = []
+            for entry in self.index_list[self.step * self.batch_size:(self.step + 1) * self.batch_size]:
+                self.batch_label.append(self.label_processor(self.label[entry]))
+            self.batch_label = np.asarray(self.batch_label)
+        else:
+            self.batch_label = self.label[
+                self.index_list[self.step * self.batch_size: (self.step + 1) * self.batch_size]]
+        # print('next_batch : epoch {}, step {}/{}, data {}, label {}'.format(
+        #     self.epoch, self.step, self.step_per_epoch - 1, self.batch_data.shape, self.batch_label.shape))
+        if self.left_right_flip and np.random.uniform() > 0.5:
+            self.batch_data = self.batch_data[:, :, ::-1]
+            self.batch_label = self.batch_label[:, :, ::-1]
+        return self.batch_data, self.batch_label
+
+
+class SequenceGenerator:
+
+    def __init__(self, data, label, sequence_size, batch_size, data_processor=None, label_processor=None,
+                 preload=False, shuffle=True, save=None):
+        self.sequence_size = sequence_size
+        self.batch_size = batch_size
+        self.shuffle = shuffle
+
+        self.index_list = []
+        for sequence_index in range(len(data)):
+            start_indices = np.expand_dims(
+                np.arange(len(data[sequence_index]) - sequence_size, dtype=np.uint8),
+                axis=-1)
+            start_indices = np.insert(start_indices, 0, sequence_index, axis=1)
+            self.index_list.append(start_indices)
+        self.index_list = np.concatenate(self.index_list, axis=0)
+        self.step_per_epoch = math.ceil(len(self.index_list) / batch_size)
+
+        if not preload:
+            self.data_processor = data_processor
+            self.label_processor = label_processor
+            self.data = data
+            self.label = label
+        else:
+            self.data_processor = None
+            self.label_processor = None
+
+            if save and os.path.exists(save + '_data.npy'):
+                self.data = np.load(save + '_data.npy', allow_pickle=True)
+                self.label = np.load(save + '_label.npy', allow_pickle=True)
+            else:
+                if data_processor:
+                    self.data = np.asarray(
+                        [np.asarray([data_processor(entry) for entry in serie]) for serie in data])
+                else:
+                    self.data = data
+                if label_processor:
+                    self.label = np.asarray(
+                        [np.asarray([label_processor(entry) for entry in serie]) for serie in label])
+                else:
+                    self.label = label
+
+            if save:
+                np.save(save + '_data.npy', self.data, allow_pickle=True)
+                np.save(save + '_label.npy', self.label, allow_pickle=True)
+
+        self.epoch = 0
+        self.global_step = -1
+        self.step = -1
+
+        self.batch_data: Optional[np.ndarray] = None
+        self.batch_label: Optional[np.ndarray] = None
+
+        if shuffle:
+            np.random.shuffle(self.index_list)
+
+    def next_batch(self) -> Tuple[np.ndarray, np.ndarray]:
+        if self.step >= self.step_per_epoch - 1:  # step start at 0
+            self.step = 0
+            self.epoch += 1
+            if self.shuffle:
+                np.random.shuffle(self.index_list)
+        else:
+            self.step += 1
+
+        self.global_step += 1
+        # Loading data
+        if self.data_processor is not None:
+            self.batch_data = []
+            for sequence_index, start_index in self.index_list[
+                    self.step * self.batch_size:(self.step + 1) * self.batch_size]:
+                self.batch_data.append(
+                    [self.data_processor(input_data)
+                     for input_data in self.data[sequence_index][start_index: start_index + self.sequence_size]])
+            self.batch_data = np.asarray(self.batch_data)
+        else:
+            self.batch_data = []
+            for sequence_index, start_index in self.index_list[
+                    self.step * self.batch_size:(self.step + 1) * self.batch_size]:
+                self.batch_data.append(
+                    self.data[sequence_index][start_index: start_index + self.sequence_size])
+            self.batch_data = np.asarray(self.batch_data)
+        # Loading label
+        if self.label_processor is not None:
+            self.batch_label = []
+            for sequence_index, start_index in self.index_list[
+                    self.step * self.batch_size:(self.step + 1) * self.batch_size]:
+                self.batch_label.append(
+                    [self.label_processor(input_data)
+                     for input_data in self.label[sequence_index][start_index: start_index + self.sequence_size]])
+            self.batch_label = np.asarray(self.batch_label)
+        else:
+            self.batch_label = []
+            for sequence_index, start_index in self.index_list[
+                    self.step * self.batch_size:(self.step + 1) * self.batch_size]:
+                self.batch_label.append(
+                    self.label[sequence_index][start_index: start_index + self.sequence_size])
+            self.batch_label = np.asarray(self.batch_label)
+
+        return self.batch_data, self.batch_label
diff --git a/utils/memory.py b/utils/memory.py
new file mode 100644
index 0000000..be26ffc
--- /dev/null
+++ b/utils/memory.py
@@ -0,0 +1,8 @@
+def human_size(byte_count: int) -> str:
+    """Output byte amount in human readable format"""
+    amount = float(byte_count)
+    for unit in ['', 'Ki', 'Mi', 'Gi', 'Ti', 'Pi', 'Ei', 'Zi', 'Yi']:
+        if amount < 1024.0:
+            break
+        amount /= 1024.0
+    return f'{amount:.2f}{unit}B'