Add SSD

2021-05-17 21:05:15 +09:00 · 2021-05-17 21:05:15 +09:00 · 092f4acc3b
commit 092f4acc3b
parent 8d13de5711
2 changed files with 251 additions and 0 deletions
--- a/ssd/box.py
+++ b/ssd/box.py
@ -0,0 +1,86 @@
+import numpy as np
+
+
+def create_box(y_pos: float, x_pos: float, height: float, width: float) -> tuple[float, float, float, float]:
+    y_min, x_min, y_max, x_max = check_rectangle(
+        y_pos - (height / 2), x_pos - (width / 2), y_pos + (height / 2), x_pos + (width / 2))
+    return (y_min + y_max) / 2, (x_min + x_max) / 2, y_max - y_min, x_max - x_min
+
+
+def check_rectangle(y_min: float, x_min: float, y_max: float, x_max: float) -> tuple[float, float, float, float]:
+    if y_min < 0:
+        y_min = 0
+    if x_min < 0:
+        x_min = 0
+    if y_min > 1:
+        y_min = 1
+    if x_min > 1:
+        x_min = 1
+    if y_max < 0:
+        y_max = 0
+    if x_max < 0:
+        x_max = 0
+    if y_max >= 1:
+        y_max = 1
+    if x_max >= 1:
+        x_max = 1
+    return y_min, x_min, y_max, x_max
+
+
+def get_boxes(predictions: np.ndarray, anchors: np.ndarray, class_index: int) -> np.ndarray:
+    boxes = np.zeros(anchors.shape)
+    boxes[:, 0] = (predictions[:, 0] * anchors[:, 2]) + anchors[:, 0]
+    boxes[:, 1] = (predictions[:, 1] * anchors[:, 3]) + anchors[:, 1]
+    boxes[:, 2] = np.exp(predictions[:, 2]) * anchors[:, 2]
+    boxes[:, 3] = np.exp(predictions[:, 3]) * anchors[:, 3]
+    boxes = np.asarray([create_box(*box) for box in boxes])
+
+    # return np.insert(boxes, 4, predictions[:, class_index], axis=-1)
+    return np.concatenate([boxes, predictions[:, class_index:class_index + 1]], axis=1)
+
+
+def fast_nms(boxes: np.ndarray, min_iou: float) -> np.ndarray:
+    # if there are no boxes, return an empty list
+    if len(boxes) == 0:
+        return []
+
+    # initialize the list of picked indexes
+    pick = []
+
+    # grab the coordinates of the bounding boxes
+    y_min = boxes[:, 0] - (boxes[:, 2] / 2)
+    y_max = boxes[:, 0] + (boxes[:, 2] / 2)
+    x_min = boxes[:, 1] - (boxes[:, 3] / 2)
+    x_max = boxes[:, 1] + (boxes[:, 3] / 2)
+    scores = boxes[:, 4]
+
+    # compute the area of the bounding boxes and sort the bounding boxes by the scores
+    areas = (x_max - x_min) * (y_max - y_min)
+    idxs = np.argsort(scores)
+
+    # keep looping while some indexes still remain in the indexes
+    # list
+    while len(idxs) > 0:
+        # grab the last index in the indexes list and add the
+        # index value to the list of picked indexes
+        last = len(idxs) - 1
+        i = idxs[last]
+        pick.append(i)
+
+        inter_tops = np.maximum(y_min[i], y_min[idxs[:last]])
+        inter_bottoms = np.minimum(y_max[i], y_max[idxs[:last]])
+        inter_lefts = np.maximum(x_min[i], x_min[idxs[:last]])
+        inter_rights = np.minimum(x_max[i], x_max[idxs[:last]])
+        inter_areas = (inter_rights - inter_lefts) * (inter_bottoms - inter_tops)
+
+        # compute the ratio of overlap
+        union_area = (areas[idxs[:last]] + areas[i]) - inter_areas
+        overlap = inter_areas / union_area
+
+        # delete all indexes from the index list that have less overlap than min_iou
+        idxs = np.delete(
+            idxs, np.concatenate(([last], np.where(overlap > min_iou)[0])))
+
+    # return only the bounding boxes that were picked using the
+    # integer data type
+    return boxes[pick]
--- a/ssd/ssd.py
+++ b/ssd/ssd.py
@ -0,0 +1,165 @@
+import colorsys
+import math
+
+import numpy as np
+import torch
+import torch.nn as nn
+
+from .box import check_rectangle
+from ..layers import Conv2d
+
+
+class SSD(nn.Module):
+
+    class Detector(nn.Module):
+        def __init__(self, input_features: int, output_features: int):
+            super().__init__()
+            self.conv = Conv2d(input_features, output_features, kernel_size=3, padding=1,
+                               batch_norm=False, activation=None)
+            self.output = None
+
+        def forward(self, input_data: torch.Tensor) -> torch.Tensor:
+            self.output = self.conv(input_data).permute(0, 2, 3, 1)
+            return self.output
+
+    class DetectorMerge(nn.Module):
+        def __init__(self, location_dimmension: int):
+            super().__init__()
+            self.location_dim = location_dimmension
+
+        def forward(self, detector_outputs: torch.Tensor) -> torch.Tensor:
+            return torch.cat(
+                [detector_outputs[:, :, :self.location_dim],
+                 torch.softmax(detector_outputs[:, :, self.location_dim:], dim=2)], dim=2)
+
+    class AnchorInfo:
+        def __init__(self, center: tuple[float, float], size: tuple[float],
+                     index: int, layer_index: int, map_index: tuple[int, int], color_index: int,
+                     ratio: float, size_factor: float):
+            self.index = index
+            self.layer_index = layer_index
+            self.map_index = map_index
+            self.color_index = color_index
+            self.ratio = ratio
+            self.size_factor = size_factor
+            self.center = center
+            self.size = size
+            self.box = check_rectangle(
+                center[0] - (size[0] / 2), center[1] - (size[1] / 2),
+                center[0] + (size[0] / 2), center[1] + (size[1] / 2))
+
+        def __repr__(self):
+            return (f'{self.__class__.__name__}'
+                    f'(index:{self.index}, layer:{self.layer_index}, coord:{self.map_index}'
+                    f', center:({self.center[0]:.03f}, {self.center[1]:.03f})'
+                    f', size:({self.size[0]:.03f}, {self.size[1]:.03f})'
+                    f', ratio:{self.ratio:.03f}, size_factor:{self.size_factor:.03f})'
+                    f', y:[{self.box[0]:.03f}:{self.box[2]:.03f}]'
+                    f', x:[{self.box[1]:.03f}:{self.box[3]:.03f}])')
+
+        def __array__(self):
+            return np.array([*self.center, *self.size])
+
+    def __init__(self, base_network: nn.Module, input_sample: torch.Tensor, classes: list[str],
+                 location_dimmension: int, layer_channels: list[int], layer_box_ratios: list[float], layer_args: dict,
+                 box_size_factors: list[float]):
+        super().__init__()
+
+        self.location_dim = location_dimmension
+        self.classes = ['none'] + classes
+        self.class_count = len(self.classes)
+        self.base_input_shape = input_sample.numpy().shape[1:]
+        self.base_network = base_network
+        sample_output = base_network(input_sample)
+        self.base_output_shape = list(sample_output.detach().numpy().shape)[-3:]
+
+        layer_convs: list[nn.Module] = []
+        layer_detectors: list[SSD.Detector] = []
+        last_feature_count = self.base_output_shape[0]
+        for layer_index, (output_features, kwargs) in enumerate(zip(layer_channels, layer_args)):
+            if 'disable' not in kwargs:
+                layer_convs.append(Conv2d(last_feature_count, output_features, **kwargs))
+            layer_detectors.append(SSD.Detector(
+                last_feature_count, (self.class_count + self.location_dim) * len(layer_box_ratios[layer_index])))
+            # layers.append(SSD.Layer(
+            #     last_feature_count, output_features,
+            #     (self.class_count + self.location_dim) * len(layer_box_ratios[layer_index]),
+            #     **kwargs))
+            last_feature_count = output_features
+        self.layer_convs = nn.ModuleList(layer_convs)
+        self.layer_detectors = nn.ModuleList(layer_detectors)
+
+        self.merge = self.DetectorMerge(location_dimmension)
+
+        self.anchors_numpy, self.anchor_info, self.box_colors = self._create_anchors(
+            sample_output, self.layer_convs, self.layer_detectors, layer_box_ratios, box_size_factors,
+            input_sample.shape[3] / input_sample.shape[2])
+        self.anchors = torch.from_numpy(self.anchors_numpy)
+
+    def forward(self, input_data: torch.Tensor) -> torch.Tensor:
+        head = self.base_network(input_data)
+        detector_outputs = []
+        for layer_index, detector in enumerate(self.layer_detectors):
+            detector_out = detector(head)
+            detector_outputs.append(detector_out.reshape(
+                detector_out.size(0), -1, self.class_count + self.location_dim))
+            if layer_index < len(self.layer_convs):
+                head = self.layer_convs[layer_index](head)
+        detector_outputs = torch.cat(detector_outputs, 1)
+        return self.merge(detector_outputs)
+        # base_output = self.base_network(input_data)
+        # head = base_output
+        # outputs = []
+        # for layer in self.layers:
+        #     head, detector_output = layer(head)
+        #     outputs.append(detector_output.reshape(base_output.size(0), -1, self.class_count + self.location_dim))
+        # outputs = torch.cat(outputs, 1)
+        # return torch.cat(
+        #     [outputs[:, :, :self.location_dim], torch.softmax(outputs[:, :, self.location_dim:], dim=2)], dim=2)
+
+    def _apply(self, fn):
+        super()._apply(fn)
+        self.anchors = fn(self.anchors)
+        return self
+
+    @staticmethod
+    def _create_anchors(
+            base_output: torch.Tensor, layers: nn.ModuleList, detectors: nn.ModuleList, layer_box_ratios: list[float],
+            box_size_factors: list[float], image_ratio: float) -> tuple[np.ndarray, np.ndarray, list[np.ndarray]]:
+        anchors = []
+        anchor_info: list[SSD.AnchorInfo] = []
+        box_colors: list[np.ndarray] = []
+        head = base_output
+
+        for layer_index, detector in enumerate(detectors):
+            detector_output = detector(head)  # detector output shape : NCRSHW (Ratio, Size)
+            if layer_index < len(layers):
+                head = layers[layer_index](head)
+
+            detector_rows = detector_output.size()[1]
+            detector_cols = detector_output.size()[2]
+            color_index = 0
+            layer_ratios = layer_box_ratios[layer_index]
+            for index_y in range(detector_rows):
+                center_y = (index_y + 0.5) / detector_rows
+                for index_x in range(detector_cols):
+                    center_x = (index_x + 0.5) / detector_cols
+                    for ratio, size_factor in zip(layer_ratios, box_size_factors):
+                        box_colors.append((np.asarray(colorsys.hsv_to_rgb(
+                            color_index / len(layer_ratios), 1.0, 1.0)) * 255).astype(np.uint8))
+                        color_index += 1
+                        unit_box_size = size_factor / max(detector_rows, detector_cols)
+                        anchor_width = unit_box_size * math.sqrt(ratio / image_ratio)
+                        anchor_height = unit_box_size / math.sqrt(ratio / image_ratio)
+                        anchor_info.append(SSD.AnchorInfo(
+                            (center_y, center_x),
+                            (anchor_height, anchor_width),
+                            len(anchors),
+                            layer_index,
+                            (index_y, index_x),
+                            len(box_colors) - 1,
+                            ratio,
+                            size_factor
+                        ))
+                        anchors.append([center_y, center_x, anchor_height, anchor_width])
+        return np.asarray(anchors, dtype=np.float32), anchor_info, box_colors