diff --git a/ssd/box.py b/ssd/box.py new file mode 100644 index 0000000..e255fe4 --- /dev/null +++ b/ssd/box.py @@ -0,0 +1,86 @@ +import numpy as np + + +def create_box(y_pos: float, x_pos: float, height: float, width: float) -> tuple[float, float, float, float]: + y_min, x_min, y_max, x_max = check_rectangle( + y_pos - (height / 2), x_pos - (width / 2), y_pos + (height / 2), x_pos + (width / 2)) + return (y_min + y_max) / 2, (x_min + x_max) / 2, y_max - y_min, x_max - x_min + + +def check_rectangle(y_min: float, x_min: float, y_max: float, x_max: float) -> tuple[float, float, float, float]: + if y_min < 0: + y_min = 0 + if x_min < 0: + x_min = 0 + if y_min > 1: + y_min = 1 + if x_min > 1: + x_min = 1 + if y_max < 0: + y_max = 0 + if x_max < 0: + x_max = 0 + if y_max >= 1: + y_max = 1 + if x_max >= 1: + x_max = 1 + return y_min, x_min, y_max, x_max + + +def get_boxes(predictions: np.ndarray, anchors: np.ndarray, class_index: int) -> np.ndarray: + boxes = np.zeros(anchors.shape) + boxes[:, 0] = (predictions[:, 0] * anchors[:, 2]) + anchors[:, 0] + boxes[:, 1] = (predictions[:, 1] * anchors[:, 3]) + anchors[:, 1] + boxes[:, 2] = np.exp(predictions[:, 2]) * anchors[:, 2] + boxes[:, 3] = np.exp(predictions[:, 3]) * anchors[:, 3] + boxes = np.asarray([create_box(*box) for box in boxes]) + + # return np.insert(boxes, 4, predictions[:, class_index], axis=-1) + return np.concatenate([boxes, predictions[:, class_index:class_index + 1]], axis=1) + + +def fast_nms(boxes: np.ndarray, min_iou: float) -> np.ndarray: + # if there are no boxes, return an empty list + if len(boxes) == 0: + return [] + + # initialize the list of picked indexes + pick = [] + + # grab the coordinates of the bounding boxes + y_min = boxes[:, 0] - (boxes[:, 2] / 2) + y_max = boxes[:, 0] + (boxes[:, 2] / 2) + x_min = boxes[:, 1] - (boxes[:, 3] / 2) + x_max = boxes[:, 1] + (boxes[:, 3] / 2) + scores = boxes[:, 4] + + # compute the area of the bounding boxes and sort the bounding boxes by the scores + areas = (x_max - x_min) * (y_max - y_min) + idxs = np.argsort(scores) + + # keep looping while some indexes still remain in the indexes + # list + while len(idxs) > 0: + # grab the last index in the indexes list and add the + # index value to the list of picked indexes + last = len(idxs) - 1 + i = idxs[last] + pick.append(i) + + inter_tops = np.maximum(y_min[i], y_min[idxs[:last]]) + inter_bottoms = np.minimum(y_max[i], y_max[idxs[:last]]) + inter_lefts = np.maximum(x_min[i], x_min[idxs[:last]]) + inter_rights = np.minimum(x_max[i], x_max[idxs[:last]]) + inter_areas = (inter_rights - inter_lefts) * (inter_bottoms - inter_tops) + + # compute the ratio of overlap + union_area = (areas[idxs[:last]] + areas[i]) - inter_areas + overlap = inter_areas / union_area + + # delete all indexes from the index list that have less overlap than min_iou + idxs = np.delete( + idxs, np.concatenate(([last], np.where(overlap > min_iou)[0]))) + + # return only the bounding boxes that were picked using the + # integer data type + return boxes[pick] diff --git a/ssd/ssd.py b/ssd/ssd.py new file mode 100644 index 0000000..eba064d --- /dev/null +++ b/ssd/ssd.py @@ -0,0 +1,165 @@ +import colorsys +import math + +import numpy as np +import torch +import torch.nn as nn + +from .box import check_rectangle +from ..layers import Conv2d + + +class SSD(nn.Module): + + class Detector(nn.Module): + def __init__(self, input_features: int, output_features: int): + super().__init__() + self.conv = Conv2d(input_features, output_features, kernel_size=3, padding=1, + batch_norm=False, activation=None) + self.output = None + + def forward(self, input_data: torch.Tensor) -> torch.Tensor: + self.output = self.conv(input_data).permute(0, 2, 3, 1) + return self.output + + class DetectorMerge(nn.Module): + def __init__(self, location_dimmension: int): + super().__init__() + self.location_dim = location_dimmension + + def forward(self, detector_outputs: torch.Tensor) -> torch.Tensor: + return torch.cat( + [detector_outputs[:, :, :self.location_dim], + torch.softmax(detector_outputs[:, :, self.location_dim:], dim=2)], dim=2) + + class AnchorInfo: + def __init__(self, center: tuple[float, float], size: tuple[float], + index: int, layer_index: int, map_index: tuple[int, int], color_index: int, + ratio: float, size_factor: float): + self.index = index + self.layer_index = layer_index + self.map_index = map_index + self.color_index = color_index + self.ratio = ratio + self.size_factor = size_factor + self.center = center + self.size = size + self.box = check_rectangle( + center[0] - (size[0] / 2), center[1] - (size[1] / 2), + center[0] + (size[0] / 2), center[1] + (size[1] / 2)) + + def __repr__(self): + return (f'{self.__class__.__name__}' + f'(index:{self.index}, layer:{self.layer_index}, coord:{self.map_index}' + f', center:({self.center[0]:.03f}, {self.center[1]:.03f})' + f', size:({self.size[0]:.03f}, {self.size[1]:.03f})' + f', ratio:{self.ratio:.03f}, size_factor:{self.size_factor:.03f})' + f', y:[{self.box[0]:.03f}:{self.box[2]:.03f}]' + f', x:[{self.box[1]:.03f}:{self.box[3]:.03f}])') + + def __array__(self): + return np.array([*self.center, *self.size]) + + def __init__(self, base_network: nn.Module, input_sample: torch.Tensor, classes: list[str], + location_dimmension: int, layer_channels: list[int], layer_box_ratios: list[float], layer_args: dict, + box_size_factors: list[float]): + super().__init__() + + self.location_dim = location_dimmension + self.classes = ['none'] + classes + self.class_count = len(self.classes) + self.base_input_shape = input_sample.numpy().shape[1:] + self.base_network = base_network + sample_output = base_network(input_sample) + self.base_output_shape = list(sample_output.detach().numpy().shape)[-3:] + + layer_convs: list[nn.Module] = [] + layer_detectors: list[SSD.Detector] = [] + last_feature_count = self.base_output_shape[0] + for layer_index, (output_features, kwargs) in enumerate(zip(layer_channels, layer_args)): + if 'disable' not in kwargs: + layer_convs.append(Conv2d(last_feature_count, output_features, **kwargs)) + layer_detectors.append(SSD.Detector( + last_feature_count, (self.class_count + self.location_dim) * len(layer_box_ratios[layer_index]))) + # layers.append(SSD.Layer( + # last_feature_count, output_features, + # (self.class_count + self.location_dim) * len(layer_box_ratios[layer_index]), + # **kwargs)) + last_feature_count = output_features + self.layer_convs = nn.ModuleList(layer_convs) + self.layer_detectors = nn.ModuleList(layer_detectors) + + self.merge = self.DetectorMerge(location_dimmension) + + self.anchors_numpy, self.anchor_info, self.box_colors = self._create_anchors( + sample_output, self.layer_convs, self.layer_detectors, layer_box_ratios, box_size_factors, + input_sample.shape[3] / input_sample.shape[2]) + self.anchors = torch.from_numpy(self.anchors_numpy) + + def forward(self, input_data: torch.Tensor) -> torch.Tensor: + head = self.base_network(input_data) + detector_outputs = [] + for layer_index, detector in enumerate(self.layer_detectors): + detector_out = detector(head) + detector_outputs.append(detector_out.reshape( + detector_out.size(0), -1, self.class_count + self.location_dim)) + if layer_index < len(self.layer_convs): + head = self.layer_convs[layer_index](head) + detector_outputs = torch.cat(detector_outputs, 1) + return self.merge(detector_outputs) + # base_output = self.base_network(input_data) + # head = base_output + # outputs = [] + # for layer in self.layers: + # head, detector_output = layer(head) + # outputs.append(detector_output.reshape(base_output.size(0), -1, self.class_count + self.location_dim)) + # outputs = torch.cat(outputs, 1) + # return torch.cat( + # [outputs[:, :, :self.location_dim], torch.softmax(outputs[:, :, self.location_dim:], dim=2)], dim=2) + + def _apply(self, fn): + super()._apply(fn) + self.anchors = fn(self.anchors) + return self + + @staticmethod + def _create_anchors( + base_output: torch.Tensor, layers: nn.ModuleList, detectors: nn.ModuleList, layer_box_ratios: list[float], + box_size_factors: list[float], image_ratio: float) -> tuple[np.ndarray, np.ndarray, list[np.ndarray]]: + anchors = [] + anchor_info: list[SSD.AnchorInfo] = [] + box_colors: list[np.ndarray] = [] + head = base_output + + for layer_index, detector in enumerate(detectors): + detector_output = detector(head) # detector output shape : NCRSHW (Ratio, Size) + if layer_index < len(layers): + head = layers[layer_index](head) + + detector_rows = detector_output.size()[1] + detector_cols = detector_output.size()[2] + color_index = 0 + layer_ratios = layer_box_ratios[layer_index] + for index_y in range(detector_rows): + center_y = (index_y + 0.5) / detector_rows + for index_x in range(detector_cols): + center_x = (index_x + 0.5) / detector_cols + for ratio, size_factor in zip(layer_ratios, box_size_factors): + box_colors.append((np.asarray(colorsys.hsv_to_rgb( + color_index / len(layer_ratios), 1.0, 1.0)) * 255).astype(np.uint8)) + color_index += 1 + unit_box_size = size_factor / max(detector_rows, detector_cols) + anchor_width = unit_box_size * math.sqrt(ratio / image_ratio) + anchor_height = unit_box_size / math.sqrt(ratio / image_ratio) + anchor_info.append(SSD.AnchorInfo( + (center_y, center_x), + (anchor_height, anchor_width), + len(anchors), + layer_index, + (index_y, index_x), + len(box_colors) - 1, + ratio, + size_factor + )) + anchors.append([center_y, center_x, anchor_height, anchor_width]) + return np.asarray(anchors, dtype=np.float32), anchor_info, box_colors