Module kerod.layers.detection.rpn

None

View Source

from typing import List

import tensorflow as tf

import tensorflow.keras.layers as KL

from tensorflow.keras import initializers

from tensorflow.keras.losses import SparseCategoricalCrossentropy

from kerod.core.box_coder import encode_boxes_faster_rcnn

from kerod.core.losses import L1Loss

from kerod.core.matcher import Matcher

from kerod.core.sampling_ops import batch_sample_balanced_positive_negative

from kerod.core.similarity import IoUSimilarity

from kerod.core.standard_fields import BoxField

from kerod.core.target_assigner import TargetAssigner

from kerod.layers import Anchors

from kerod.layers.detection.abstract_detection_head import \

    AbstractDetectionHead

from kerod.utils.documentation import remove_unwanted_doc

__pdoc__ = {}

SAMPLING_SIZE = 256

SAMPLING_POSITIVE_RATIO = 0.5

class RegionProposalNetwork(AbstractDetectionHead):

    """It has been introduced in the [Faster R-CNN paper](https://arxiv.org/abs/1506.01497) and

    use the parameters from [Feature Pyramidal Networks for Object Detection](https://arxiv.org/abs/1612.03144).

    Arguments:

        anchor_ratios: The ratios are the different shapes that you want to apply on your anchors.

            e.g: (0.5, 1, 2)

    Call arguments:

        inputs: A List of tensors the output of the pyramid

    Call returns:

        Tuple:

            - `localization_pred`: A list of logits 3-D tensor

            of shape [batch_size, num_anchors, 4]

            - `classification_pred`: A lost of logits 3-D tensor

            of shape [batch_size, num_anchors, 2]

            - `anchors`: A list of tensors of shape [batch_size, num_anchors, (y_min, x_min, y_max, x_max)]

    """

    def __init__(self, anchor_ratios=(0.5, 1, 2), **kwargs):

        super().__init__(

            2,

            SparseCategoricalCrossentropy(reduction=tf.keras.losses.Reduction.NONE,

                                          from_logits=True),

            L1Loss(reduction=tf.keras.losses.Reduction.NONE),

            multiples=len(anchor_ratios),

            kernel_initializer_classification_head=initializers.RandomNormal(stddev=0.01),

            kernel_initializer_box_prediction_head=initializers.RandomNormal(stddev=0.01),

            **kwargs)

        #Force each ground_truths to match to at least one anchor

        matcher = Matcher([0.3, 0.7], [0, -1, 1], allow_low_quality_matches=True)

        self.target_assigner = TargetAssigner(IoUSimilarity(),

                                              matcher,

                                              encode_boxes_faster_rcnn,

                                              dtype=self._compute_dtype)

        anchor_strides = (4, 8, 16, 32, 64)

        anchor_zises = (32, 64, 128, 256, 512)

        self._anchor_ratios = anchor_ratios

        # Precompute a deterministic grid of anchors for each layer of the pyramid.

        # We will extract a subpart of the anchors according to

        self._anchors = [

            Anchors(stride, size, self._anchor_ratios)

            for stride, size in zip(anchor_strides, anchor_zises)

        ]

    def build(self, input_shape):

        self.rpn_conv2d = KL.Conv2D(512, (3, 3),

                                    padding='same',

                                    kernel_initializer=self._kernel_initializer_classification_head,

                                    kernel_regularizer=self._kernel_regularizer)

        super().build(input_shape)

    def build_rpn_head(self, inputs):

        """Predictions for the classification and the regression

        Arguments:

            inputs: A tensor of  shape [batch_size, width, height, channel]

        Returns:

            Tuple:

                classification_head: a tensor of shape [batch_size, num_anchors, 2]

                localization_head: a tensor of shape [batch_size, num_anchors, 4]

        """

        batch_size = tf.shape(inputs)[0]

        rpn_conv2d = self.rpn_conv2d(inputs)

        classification_head, localization_head = self.build_detection_head(rpn_conv2d)

        classification_head = tf.reshape(classification_head, (batch_size, -1, 2))

        localization_head = tf.reshape(localization_head, (batch_size, -1, 4))

        return classification_head, localization_head

    def call(self, inputs: List[tf.Tensor]):

        """Create the computation graph for the rpn inference

        Arguments:

            inputs: A List of tensors the output of the pyramid

        Returns:

            Tuple:

                - `localization_pred`: A list of logits 3-D tensor

                of shape [batch_size, num_anchors, 4]

                - `classification_pred`: A lost of logits 3-D tensor

                of shape [batch_size, num_anchors, 2]

                - `anchors`: A list of tensors of shape [batch_size, num_anchors, (y_min, x_min, y_max, x_max)]

        """

        anchors = [anchors(tensor) for tensor, anchors in zip(inputs, self._anchors)]

        rpn_predictions = [self.build_rpn_head(tensor) for tensor in inputs]

        localization_pred = [prediction[1] for prediction in rpn_predictions]

        classification_pred = [prediction[0] for prediction in rpn_predictions]

        return localization_pred, classification_pred, anchors

    def compute_loss(self, localization_pred, classification_pred, anchors, ground_truths):

        """Compute the loss

        Arguments:

            localization_pred: A list of tensors of shape [batch_size, num_anchors, 4].

            classification_pred: A list of tensors of shape [batch_size, num_anchors, 2]

            anchors: A list of tensors of shape [num_anchors, (y_min, x_min, y_max, x_max)]

            ground_truths: A dict

                - `BoxField.LABELS`: A 3-D tensor of shape [batch_size, num_gt, num_classes],

                - `BoxField.BOXES`: A 3-D tensor of shape [batch_size, num_gt, (y1, x1, y2, x2)]

                - `BoxField.LABELS`: A 3-D tensor of int32 and shape [batch_size, num_gt]

                - `BoxField.WEIGHTS`: A 3-D tensor of float and shape [batch_size, num_gt]

                - `BoxField.NUM_BOXES`: A 2-D tensor of int32 and shape [batch_size, 1]

                    which allows to remove the padding created by tf.Data.

                    Example: if batch_size=2 and this field equal tf.constant([[2], [1]], tf.int32)

                    then my second box has a padding of 1

        Returns:

            Tuple:

                - `classification_loss`: A scalar in tf.float32

                - `localization_loss`: A scalar in tf.float32

        """

        localization_pred = tf.concat(localization_pred, 1)

        classification_pred = tf.concat(classification_pred, 1)

        anchors = tf.concat(anchors, 0)

        ground_truths = {

            # We add one because the background is not counted in ground_truths[BoxField.LABELS]

            BoxField.LABELS:

                ground_truths[BoxField.LABELS] + 1,

            BoxField.BOXES:

                ground_truths[BoxField.BOXES],

            BoxField.WEIGHTS:

                ground_truths[BoxField.WEIGHTS],

            BoxField.NUM_BOXES:

                ground_truths[BoxField.NUM_BOXES]

        }

        # anchors are deterministic duplicate them to create a batch

        anchors = tf.tile(anchors[None], (tf.shape(ground_truths[BoxField.BOXES])[0], 1, 1))

        y_true, weights = self.target_assigner.assign({BoxField.BOXES: anchors}, ground_truths)

        y_true[BoxField.LABELS] = tf.minimum(y_true[BoxField.LABELS], 1)

        ## Compute metrics

        recall = compute_rpn_metrics(y_true[BoxField.LABELS], classification_pred,

                                     weights[BoxField.LABELS])

        self.add_metric(recall, name='rpn_recall', aggregation='mean')

        # All the boxes which are not -1 can be sampled

        labels = y_true[BoxField.LABELS] > 0

        sample_idx = batch_sample_balanced_positive_negative(

            weights[BoxField.LABELS],

            SAMPLING_SIZE,

            labels,

            positive_fraction=SAMPLING_POSITIVE_RATIO,

            dtype=self._compute_dtype)

        weights[BoxField.LABELS] = sample_idx * weights[BoxField.LABELS]

        weights[BoxField.BOXES] = sample_idx * weights[BoxField.BOXES]

        y_pred = {BoxField.LABELS: classification_pred, BoxField.BOXES: localization_pred}

        return self.compute_losses(y_true, y_pred, weights)

    def get_config(self):

        base_config = super().get_config()

        base_config['anchor_ratios'] = self._anchor_ratios

        return base_config

def compute_rpn_metrics(y_true: tf.Tensor, y_pred: tf.Tensor, weights: tf.Tensor):

    """Useful metrics that allows to track how behave the training of the rpn head.

    Arguments:

        y_true: A tensor vector with shape [batch_size, num_anchors] where 0 = background and

            1 = foreground.

        y_pred: A tensor of shape [batch_size, num_anchors, 2],

            representing the classification logits.

        weights: A tensor of shape [batch_size, num_anchors] where weights should

    Returns:

        tf.Tensor: Recall, among all the boxes that we had to find how much did we found.

    """

    # Force the cast to avoid type issue when the mixed precision is activated

    y_true, y_pred, weights = tf.cast(y_true, tf.float32), tf.cast(y_pred, tf.float32), tf.cast(

        weights, tf.float32)

    # Sometimes the weights have decimal value we do not want that

    weights = tf.clip_by_value(tf.math.ceil(weights), 0, 1)

    masked_y_true = y_true * weights

    prediction = tf.cast(tf.argmax(y_pred, axis=-1, name='label_prediction'),

                         tf.float32) * weights  # 0 or 1

    correct = tf.cast(tf.equal(prediction, masked_y_true), tf.float32)

    fg_inds = tf.where(masked_y_true == 1)

    num_valid_anchor = tf.math.count_nonzero(masked_y_true)

    num_pos_foreground_prediction = tf.math.count_nonzero(tf.gather_nd(correct, fg_inds))

    recall = tf.truediv(num_pos_foreground_prediction, num_valid_anchor, name='recall')

    return recall

remove_unwanted_doc(RegionProposalNetwork, __pdoc__)

Variables

SAMPLING_POSITIVE_RATIO

SAMPLING_SIZE

Functions

compute_rpn_metrics

def compute_rpn_metrics(
    y_true: tensorflow.python.framework.ops.Tensor,
    y_pred: tensorflow.python.framework.ops.Tensor,
    weights: tensorflow.python.framework.ops.Tensor
)

Useful metrics that allows to track how behave the training of the rpn head.

Parameters:

Name	Description
y_true	A tensor vector with shape [batch_size, num_anchors] where 0 = background and 1 = foreground.
y_pred	A tensor of shape [batch_size, num_anchors, 2], representing the classification logits.
weights	A tensor of shape [batch_size, num_anchors] where weights should

Returns:

Type	Description
tf.Tensor	Recall, among all the boxes that we had to find how much did we found.

View Source

def compute_rpn_metrics(y_true: tf.Tensor, y_pred: tf.Tensor, weights: tf.Tensor):

    """Useful metrics that allows to track how behave the training of the rpn head.

    Arguments:

        y_true: A tensor vector with shape [batch_size, num_anchors] where 0 = background and

            1 = foreground.

        y_pred: A tensor of shape [batch_size, num_anchors, 2],

            representing the classification logits.

        weights: A tensor of shape [batch_size, num_anchors] where weights should

    Returns:

        tf.Tensor: Recall, among all the boxes that we had to find how much did we found.

    """

    # Force the cast to avoid type issue when the mixed precision is activated

    y_true, y_pred, weights = tf.cast(y_true, tf.float32), tf.cast(y_pred, tf.float32), tf.cast(

        weights, tf.float32)

    # Sometimes the weights have decimal value we do not want that

    weights = tf.clip_by_value(tf.math.ceil(weights), 0, 1)

    masked_y_true = y_true * weights

    prediction = tf.cast(tf.argmax(y_pred, axis=-1, name='label_prediction'),

                         tf.float32) * weights  # 0 or 1

    correct = tf.cast(tf.equal(prediction, masked_y_true), tf.float32)

    fg_inds = tf.where(masked_y_true == 1)

    num_valid_anchor = tf.math.count_nonzero(masked_y_true)

    num_pos_foreground_prediction = tf.math.count_nonzero(tf.gather_nd(correct, fg_inds))

    recall = tf.truediv(num_pos_foreground_prediction, num_valid_anchor, name='recall')

    return recall

Classes

RegionProposalNetwork

class RegionProposalNetwork(
    anchor_ratios=(0.5, 1, 2),
    **kwargs
)

use the parameters from Feature Pyramidal Networks for Object Detection.

Arguments

Name	Description
anchor_ratios	The ratios are the different shapes that you want to apply on your anchors. e.g: (0.5, 1, 2)

Call arguments

Name	Description
inputs	A List of tensors the output of the pyramid

Call returns

Type	Description
Tuple	- `localization_pred`: A list of logits 3-D tensor of shape [batch_size, num_anchors, 4] - `classification_pred`: A lost of logits 3-D tensor of shape [batch_size, num_anchors, 2] - `anchors`: A list of tensors of shape [batch_size, num_anchors, (y_min, x_min, y_max, x_max)]

Ancestors (in MRO)

kerod.layers.detection.abstract_detection_head.AbstractDetectionHead
tensorflow.python.keras.engine.base_layer.Layer
tensorflow.python.module.module.Module
tensorflow.python.training.tracking.tracking.AutoTrackable
tensorflow.python.training.tracking.base.Trackable
tensorflow.python.keras.utils.version_utils.LayerVersionSelector

Methods

build_detection_head

def build_detection_head(
    self,
    inputs
)

Build a detection head composed of a classification and box_detection.

Parameters:

Name	Description
inputs	A tensor of shape [batch_size, H, W, C]

Returns:

Type	Description
Tuple	classification_head: a tensor of shape [batch_size, num_anchors, 2] localization_head: a tensor of shape [batch_size, num_anchors, 4]

View Source

    def build_detection_head(self, inputs):

        """ Build a detection head composed of a classification and box_detection.

        Arguments:

            inputs: A tensor of shape [batch_size, H, W, C]

        Returns:

            Tuple:

                classification_head: a tensor of shape [batch_size, num_anchors, 2]

                localization_head: a tensor of shape [batch_size, num_anchors, 4]

        """

        classification_head = self._conv_classification_head(inputs)

        box_prediction_head = self._conv_box_prediction_head(inputs)

        return classification_head, box_prediction_head

build_rpn_head

def build_rpn_head(
    self,
    inputs
)

Predictions for the classification and the regression

Parameters:

Name	Description
inputs	A tensor of shape [batch_size, width, height, channel]

Returns:

Type	Description
Tuple	classification_head: a tensor of shape [batch_size, num_anchors, 2] localization_head: a tensor of shape [batch_size, num_anchors, 4]

View Source

    def build_rpn_head(self, inputs):

        """Predictions for the classification and the regression

        Arguments:

            inputs: A tensor of  shape [batch_size, width, height, channel]

        Returns:

            Tuple:

                classification_head: a tensor of shape [batch_size, num_anchors, 2]

                localization_head: a tensor of shape [batch_size, num_anchors, 4]

        """

        batch_size = tf.shape(inputs)[0]

        rpn_conv2d = self.rpn_conv2d(inputs)

        classification_head, localization_head = self.build_detection_head(rpn_conv2d)

        classification_head = tf.reshape(classification_head, (batch_size, -1, 2))

        localization_head = tf.reshape(localization_head, (batch_size, -1, 4))

        return classification_head, localization_head

build_segmentation_head

def build_segmentation_head(
    self,
    inputs
)

Build the detection head

Parameters:

Name	Description
inputs	A tensor of float and shape [N, H, W, C]

Returns:

Type	Description
tf.Tensor	A tensor and shape [N, H2, W2, num_classes - 1]

View Source

    def build_segmentation_head(self, inputs):

        """Build the detection head

        Arguments:

            inputs: A tensor of float and shape [N, H, W, C]

        Returns:

            tf.Tensor: A tensor and shape [N, H*2, W*2, num_classes - 1]

        """

        x = inputs

        for layer in self._segmentation_layers:

            x = layer(x)

        return x

call

def call(
    self,
    inputs: List[tensorflow.python.framework.ops.Tensor]
)

Create the computation graph for the rpn inference

Parameters:

Name	Description
inputs	A List of tensors the output of the pyramid

Returns:

Type	Description
Tuple	- `localization_pred`: A list of logits 3-D tensor of shape [batch_size, num_anchors, 4] - `classification_pred`: A lost of logits 3-D tensor of shape [batch_size, num_anchors, 2] - `anchors`: A list of tensors of shape [batch_size, num_anchors, (y_min, x_min, y_max, x_max)]

View Source

    def call(self, inputs: List[tf.Tensor]):

        """Create the computation graph for the rpn inference

        Arguments:

            inputs: A List of tensors the output of the pyramid

        Returns:

            Tuple:

                - `localization_pred`: A list of logits 3-D tensor

                of shape [batch_size, num_anchors, 4]

                - `classification_pred`: A lost of logits 3-D tensor

                of shape [batch_size, num_anchors, 2]

                - `anchors`: A list of tensors of shape [batch_size, num_anchors, (y_min, x_min, y_max, x_max)]

        """

        anchors = [anchors(tensor) for tensor, anchors in zip(inputs, self._anchors)]

        rpn_predictions = [self.build_rpn_head(tensor) for tensor in inputs]

        localization_pred = [prediction[1] for prediction in rpn_predictions]

        classification_pred = [prediction[0] for prediction in rpn_predictions]

        return localization_pred, classification_pred, anchors

compute_loss

def compute_loss(
    self,
    localization_pred,
    classification_pred,
    anchors,
    ground_truths
)

Compute the loss

Parameters:

Name	Description
localization_pred	A list of tensors of shape [batch_size, num_anchors, 4].
classification_pred	A list of tensors of shape [batch_size, num_anchors, 2]
anchors	A list of tensors of shape [num_anchors, (y_min, x_min, y_max, x_max)]
ground_truths	A dict - `BoxField.LABELS`: A 3-D tensor of shape [batch_size, num_gt, num_classes], - `BoxField.BOXES`: A 3-D tensor of shape [batch_size, num_gt, (y1, x1, y2, x2)] - `BoxField.LABELS`: A 3-D tensor of int32 and shape [batch_size, num_gt] - `BoxField.WEIGHTS`: A 3-D tensor of float and shape [batch_size, num_gt] - `BoxField.NUM_BOXES`: A 2-D tensor of int32 and shape [batch_size, 1] which allows to remove the padding created by tf.Data. Example: if batch_size=2 and this field equal tf.constant([[2], [1]], tf.int32) then my second box has a padding of 1

Returns:

Type	Description
Tuple	- `classification_loss`: A scalar in tf.float32 - `localization_loss`: A scalar in tf.float32

View Source

    def compute_loss(self, localization_pred, classification_pred, anchors, ground_truths):

        """Compute the loss

        Arguments:

            localization_pred: A list of tensors of shape [batch_size, num_anchors, 4].

            classification_pred: A list of tensors of shape [batch_size, num_anchors, 2]

            anchors: A list of tensors of shape [num_anchors, (y_min, x_min, y_max, x_max)]

            ground_truths: A dict

                - `BoxField.LABELS`: A 3-D tensor of shape [batch_size, num_gt, num_classes],

                - `BoxField.BOXES`: A 3-D tensor of shape [batch_size, num_gt, (y1, x1, y2, x2)]

                - `BoxField.LABELS`: A 3-D tensor of int32 and shape [batch_size, num_gt]

                - `BoxField.WEIGHTS`: A 3-D tensor of float and shape [batch_size, num_gt]

                - `BoxField.NUM_BOXES`: A 2-D tensor of int32 and shape [batch_size, 1]

                    which allows to remove the padding created by tf.Data.

                    Example: if batch_size=2 and this field equal tf.constant([[2], [1]], tf.int32)

                    then my second box has a padding of 1

        Returns:

            Tuple:

                - `classification_loss`: A scalar in tf.float32

                - `localization_loss`: A scalar in tf.float32

        """

        localization_pred = tf.concat(localization_pred, 1)

        classification_pred = tf.concat(classification_pred, 1)

        anchors = tf.concat(anchors, 0)

        ground_truths = {

            # We add one because the background is not counted in ground_truths[BoxField.LABELS]

            BoxField.LABELS:

                ground_truths[BoxField.LABELS] + 1,

            BoxField.BOXES:

                ground_truths[BoxField.BOXES],

            BoxField.WEIGHTS:

                ground_truths[BoxField.WEIGHTS],

            BoxField.NUM_BOXES:

                ground_truths[BoxField.NUM_BOXES]

        }

        # anchors are deterministic duplicate them to create a batch

        anchors = tf.tile(anchors[None], (tf.shape(ground_truths[BoxField.BOXES])[0], 1, 1))

        y_true, weights = self.target_assigner.assign({BoxField.BOXES: anchors}, ground_truths)

        y_true[BoxField.LABELS] = tf.minimum(y_true[BoxField.LABELS], 1)

        ## Compute metrics

        recall = compute_rpn_metrics(y_true[BoxField.LABELS], classification_pred,

                                     weights[BoxField.LABELS])

        self.add_metric(recall, name='rpn_recall', aggregation='mean')

        # All the boxes which are not -1 can be sampled

        labels = y_true[BoxField.LABELS] > 0

        sample_idx = batch_sample_balanced_positive_negative(

            weights[BoxField.LABELS],

            SAMPLING_SIZE,

            labels,

            positive_fraction=SAMPLING_POSITIVE_RATIO,

            dtype=self._compute_dtype)

        weights[BoxField.LABELS] = sample_idx * weights[BoxField.LABELS]

        weights[BoxField.BOXES] = sample_idx * weights[BoxField.BOXES]

        y_pred = {BoxField.LABELS: classification_pred, BoxField.BOXES: localization_pred}

        return self.compute_losses(y_true, y_pred, weights)

compute_losses

def compute_losses(
    self,
    y_true: Dict[str, tensorflow.python.framework.ops.Tensor],
    y_pred: Dict[str, tensorflow.python.framework.ops.Tensor],
    weights: Dict[str, tensorflow.python.framework.ops.Tensor]
) -> dict

Compute the losses of the object detection head.

Each dictionary is composed of the same keys (classification, localization, segmentation)

Parameters:

Name	Description
y_pred	A dict of tensors of shape [N, nb_boxes, num_output].
y_true	A dict of tensors of shape [N, nb_boxes, num_output].
weights	A dict of tensors ofshape [N, nb_boxes, num_output]. This tensor is composed of one hot vectors.

Returns:

Type	Description
None	dict : A dict of different losses

View Source

    def compute_losses(self, y_true: Dict[str, tf.Tensor], y_pred: Dict[str, tf.Tensor],

                       weights: Dict[str, tf.Tensor]) -> dict:

        """Compute the losses of the object detection head.

        Each dictionary is composed of the same keys (classification, localization, segmentation)

        Arguments:

            y_pred: A dict of tensors of shape [N, nb_boxes, num_output].

            y_true: A dict of tensors of shape [N, nb_boxes, num_output].

            weights: A dict of tensors ofshape [N, nb_boxes, num_output].

                This tensor is composed of one hot vectors.

        Returns:

            dict : A dict of different losses

        """

        def _compute_loss(loss, loss_weight, target):

            losses = loss(tf.cast(y_true[target], tf.float32),

                          tf.cast(y_pred[target], tf.float32),

                          sample_weight=tf.cast(weights[target], tf.float32))

            return loss_weight * tf.reduce_mean(tf.reduce_sum(losses, axis=1) / normalizer)

        normalizer = tf.maximum(tf.reduce_sum(weights[BoxField.LABELS], axis=1), 1.0)

        normalizer = tf.cast(normalizer, tf.float32)

        classification_loss = _compute_loss(self._classification_loss,

                                            self._classification_loss_weight, BoxField.LABELS)

        self.add_metric(classification_loss,

                        name=f'{self.name}_classification_loss',

                        aggregation='mean')

        localization_loss = _compute_loss(self._localization_loss, self._localization_loss_weight,

                                          BoxField.BOXES)

        self.add_metric(localization_loss,

                        name=f'{self.name}_localization_loss',

                        aggregation='mean')

        self.add_loss([classification_loss, localization_loss])

        if self._use_mask:

            segmentation_loss = _compute_loss(self._segmentation_loss,

                                              self._segmentation_loss_weight, BoxField.MASKS)

            self.add_metric(segmentation_loss,

                            name=f'{self.name}_segmentation_loss',

                            aggregation='mean')

            self.add_loss(segmentation_loss)

            return {

                BoxField.LABELS: classification_loss,

                BoxField.BOXES: localization_loss,

                BoxField.MASKS: segmentation_loss

            }

        return {BoxField.LABELS: classification_loss, BoxField.BOXES: localization_loss}