Module kerod.layers.detection.rpn
None
None
View Source
from typing import List
import tensorflow as tf
import tensorflow.keras.layers as KL
from tensorflow.keras import initializers
from tensorflow.keras.losses import SparseCategoricalCrossentropy
from kerod.core.box_coder import encode_boxes_faster_rcnn
from kerod.core.losses import L1Loss
from kerod.core.matcher import Matcher
from kerod.core.sampling_ops import batch_sample_balanced_positive_negative
from kerod.core.similarity import IoUSimilarity
from kerod.core.standard_fields import BoxField
from kerod.core.target_assigner import TargetAssigner
from kerod.layers import Anchors
from kerod.layers.detection.abstract_detection_head import \
AbstractDetectionHead
from kerod.utils.documentation import remove_unwanted_doc
__pdoc__ = {}
SAMPLING_SIZE = 256
SAMPLING_POSITIVE_RATIO = 0.5
class RegionProposalNetwork(AbstractDetectionHead):
"""It has been introduced in the [Faster R-CNN paper](https://arxiv.org/abs/1506.01497) and
use the parameters from [Feature Pyramidal Networks for Object Detection](https://arxiv.org/abs/1612.03144).
Arguments:
anchor_ratios: The ratios are the different shapes that you want to apply on your anchors.
e.g: (0.5, 1, 2)
Call arguments:
inputs: A List of tensors the output of the pyramid
Call returns:
Tuple:
- `localization_pred`: A list of logits 3-D tensor
of shape [batch_size, num_anchors, 4]
- `classification_pred`: A lost of logits 3-D tensor
of shape [batch_size, num_anchors, 2]
- `anchors`: A list of tensors of shape [batch_size, num_anchors, (y_min, x_min, y_max, x_max)]
"""
def __init__(self, anchor_ratios=(0.5, 1, 2), **kwargs):
super().__init__(
2,
SparseCategoricalCrossentropy(reduction=tf.keras.losses.Reduction.NONE,
from_logits=True),
L1Loss(reduction=tf.keras.losses.Reduction.NONE),
multiples=len(anchor_ratios),
kernel_initializer_classification_head=initializers.RandomNormal(stddev=0.01),
kernel_initializer_box_prediction_head=initializers.RandomNormal(stddev=0.01),
**kwargs)
#Force each ground_truths to match to at least one anchor
matcher = Matcher([0.3, 0.7], [0, -1, 1], allow_low_quality_matches=True)
self.target_assigner = TargetAssigner(IoUSimilarity(),
matcher,
encode_boxes_faster_rcnn,
dtype=self._compute_dtype)
anchor_strides = (4, 8, 16, 32, 64)
anchor_zises = (32, 64, 128, 256, 512)
self._anchor_ratios = anchor_ratios
# Precompute a deterministic grid of anchors for each layer of the pyramid.
# We will extract a subpart of the anchors according to
self._anchors = [
Anchors(stride, size, self._anchor_ratios)
for stride, size in zip(anchor_strides, anchor_zises)
]
def build(self, input_shape):
self.rpn_conv2d = KL.Conv2D(512, (3, 3),
padding='same',
kernel_initializer=self._kernel_initializer_classification_head,
kernel_regularizer=self._kernel_regularizer)
super().build(input_shape)
def build_rpn_head(self, inputs):
"""Predictions for the classification and the regression
Arguments:
inputs: A tensor of shape [batch_size, width, height, channel]
Returns:
Tuple:
classification_head: a tensor of shape [batch_size, num_anchors, 2]
localization_head: a tensor of shape [batch_size, num_anchors, 4]
"""
batch_size = tf.shape(inputs)[0]
rpn_conv2d = self.rpn_conv2d(inputs)
classification_head, localization_head = self.build_detection_head(rpn_conv2d)
classification_head = tf.reshape(classification_head, (batch_size, -1, 2))
localization_head = tf.reshape(localization_head, (batch_size, -1, 4))
return classification_head, localization_head
def call(self, inputs: List[tf.Tensor]):
"""Create the computation graph for the rpn inference
Arguments:
inputs: A List of tensors the output of the pyramid
Returns:
Tuple:
- `localization_pred`: A list of logits 3-D tensor
of shape [batch_size, num_anchors, 4]
- `classification_pred`: A lost of logits 3-D tensor
of shape [batch_size, num_anchors, 2]
- `anchors`: A list of tensors of shape [batch_size, num_anchors, (y_min, x_min, y_max, x_max)]
"""
anchors = [anchors(tensor) for tensor, anchors in zip(inputs, self._anchors)]
rpn_predictions = [self.build_rpn_head(tensor) for tensor in inputs]
localization_pred = [prediction[1] for prediction in rpn_predictions]
classification_pred = [prediction[0] for prediction in rpn_predictions]
return localization_pred, classification_pred, anchors
def compute_loss(self, localization_pred, classification_pred, anchors, ground_truths):
"""Compute the loss
Arguments:
localization_pred: A list of tensors of shape [batch_size, num_anchors, 4].
classification_pred: A list of tensors of shape [batch_size, num_anchors, 2]
anchors: A list of tensors of shape [num_anchors, (y_min, x_min, y_max, x_max)]
ground_truths: A dict
- `BoxField.LABELS`: A 3-D tensor of shape [batch_size, num_gt, num_classes],
- `BoxField.BOXES`: A 3-D tensor of shape [batch_size, num_gt, (y1, x1, y2, x2)]
- `BoxField.LABELS`: A 3-D tensor of int32 and shape [batch_size, num_gt]
- `BoxField.WEIGHTS`: A 3-D tensor of float and shape [batch_size, num_gt]
- `BoxField.NUM_BOXES`: A 2-D tensor of int32 and shape [batch_size, 1]
which allows to remove the padding created by tf.Data.
Example: if batch_size=2 and this field equal tf.constant([[2], [1]], tf.int32)
then my second box has a padding of 1
Returns:
Tuple:
- `classification_loss`: A scalar in tf.float32
- `localization_loss`: A scalar in tf.float32
"""
localization_pred = tf.concat(localization_pred, 1)
classification_pred = tf.concat(classification_pred, 1)
anchors = tf.concat(anchors, 0)
ground_truths = {
# We add one because the background is not counted in ground_truths[BoxField.LABELS]
BoxField.LABELS:
ground_truths[BoxField.LABELS] + 1,
BoxField.BOXES:
ground_truths[BoxField.BOXES],
BoxField.WEIGHTS:
ground_truths[BoxField.WEIGHTS],
BoxField.NUM_BOXES:
ground_truths[BoxField.NUM_BOXES]
}
# anchors are deterministic duplicate them to create a batch
anchors = tf.tile(anchors[None], (tf.shape(ground_truths[BoxField.BOXES])[0], 1, 1))
y_true, weights = self.target_assigner.assign({BoxField.BOXES: anchors}, ground_truths)
y_true[BoxField.LABELS] = tf.minimum(y_true[BoxField.LABELS], 1)
## Compute metrics
recall = compute_rpn_metrics(y_true[BoxField.LABELS], classification_pred,
weights[BoxField.LABELS])
self.add_metric(recall, name='rpn_recall', aggregation='mean')
# All the boxes which are not -1 can be sampled
labels = y_true[BoxField.LABELS] > 0
sample_idx = batch_sample_balanced_positive_negative(
weights[BoxField.LABELS],
SAMPLING_SIZE,
labels,
positive_fraction=SAMPLING_POSITIVE_RATIO,
dtype=self._compute_dtype)
weights[BoxField.LABELS] = sample_idx * weights[BoxField.LABELS]
weights[BoxField.BOXES] = sample_idx * weights[BoxField.BOXES]
y_pred = {BoxField.LABELS: classification_pred, BoxField.BOXES: localization_pred}
return self.compute_losses(y_true, y_pred, weights)
def get_config(self):
base_config = super().get_config()
base_config['anchor_ratios'] = self._anchor_ratios
return base_config
def compute_rpn_metrics(y_true: tf.Tensor, y_pred: tf.Tensor, weights: tf.Tensor):
"""Useful metrics that allows to track how behave the training of the rpn head.
Arguments:
y_true: A tensor vector with shape [batch_size, num_anchors] where 0 = background and
1 = foreground.
y_pred: A tensor of shape [batch_size, num_anchors, 2],
representing the classification logits.
weights: A tensor of shape [batch_size, num_anchors] where weights should
Returns:
tf.Tensor: Recall, among all the boxes that we had to find how much did we found.
"""
# Force the cast to avoid type issue when the mixed precision is activated
y_true, y_pred, weights = tf.cast(y_true, tf.float32), tf.cast(y_pred, tf.float32), tf.cast(
weights, tf.float32)
# Sometimes the weights have decimal value we do not want that
weights = tf.clip_by_value(tf.math.ceil(weights), 0, 1)
masked_y_true = y_true * weights
prediction = tf.cast(tf.argmax(y_pred, axis=-1, name='label_prediction'),
tf.float32) * weights # 0 or 1
correct = tf.cast(tf.equal(prediction, masked_y_true), tf.float32)
fg_inds = tf.where(masked_y_true == 1)
num_valid_anchor = tf.math.count_nonzero(masked_y_true)
num_pos_foreground_prediction = tf.math.count_nonzero(tf.gather_nd(correct, fg_inds))
recall = tf.truediv(num_pos_foreground_prediction, num_valid_anchor, name='recall')
return recall
remove_unwanted_doc(RegionProposalNetwork, __pdoc__)
Variables
SAMPLING_POSITIVE_RATIO
SAMPLING_SIZE
Functions
compute_rpn_metrics
def compute_rpn_metrics(
y_true: tensorflow.python.framework.ops.Tensor,
y_pred: tensorflow.python.framework.ops.Tensor,
weights: tensorflow.python.framework.ops.Tensor
)
Useful metrics that allows to track how behave the training of the rpn head.
Parameters:
Name | Description |
---|---|
y_true | A tensor vector with shape [batch_size, num_anchors] where 0 = background and 1 = foreground. |
y_pred | A tensor of shape [batch_size, num_anchors, 2], representing the classification logits. |
weights | A tensor of shape [batch_size, num_anchors] where weights should |
Returns:
Type | Description |
---|---|
tf.Tensor | Recall, among all the boxes that we had to find how much did we found. |
View Source
def compute_rpn_metrics(y_true: tf.Tensor, y_pred: tf.Tensor, weights: tf.Tensor):
"""Useful metrics that allows to track how behave the training of the rpn head.
Arguments:
y_true: A tensor vector with shape [batch_size, num_anchors] where 0 = background and
1 = foreground.
y_pred: A tensor of shape [batch_size, num_anchors, 2],
representing the classification logits.
weights: A tensor of shape [batch_size, num_anchors] where weights should
Returns:
tf.Tensor: Recall, among all the boxes that we had to find how much did we found.
"""
# Force the cast to avoid type issue when the mixed precision is activated
y_true, y_pred, weights = tf.cast(y_true, tf.float32), tf.cast(y_pred, tf.float32), tf.cast(
weights, tf.float32)
# Sometimes the weights have decimal value we do not want that
weights = tf.clip_by_value(tf.math.ceil(weights), 0, 1)
masked_y_true = y_true * weights
prediction = tf.cast(tf.argmax(y_pred, axis=-1, name='label_prediction'),
tf.float32) * weights # 0 or 1
correct = tf.cast(tf.equal(prediction, masked_y_true), tf.float32)
fg_inds = tf.where(masked_y_true == 1)
num_valid_anchor = tf.math.count_nonzero(masked_y_true)
num_pos_foreground_prediction = tf.math.count_nonzero(tf.gather_nd(correct, fg_inds))
recall = tf.truediv(num_pos_foreground_prediction, num_valid_anchor, name='recall')
return recall
Classes
RegionProposalNetwork
class RegionProposalNetwork(
anchor_ratios=(0.5, 1, 2),
**kwargs
)
use the parameters from Feature Pyramidal Networks for Object Detection.
Arguments
Name | Description |
---|---|
anchor_ratios | The ratios are the different shapes that you want to apply on your anchors. e.g: (0.5, 1, 2) |
Call arguments
Name | Description |
---|---|
inputs | A List of tensors the output of the pyramid |
Call returns
Type | Description |
---|---|
Tuple | - localization_pred : A list of logits 3-D tensorof shape [batch_size, num_anchors, 4] - classification_pred : A lost of logits 3-D tensorof shape [batch_size, num_anchors, 2] - anchors : A list of tensors of shape [batch_size, num_anchors, (y_min, x_min, y_max, x_max)] |
Ancestors (in MRO)
- kerod.layers.detection.abstract_detection_head.AbstractDetectionHead
- tensorflow.python.keras.engine.base_layer.Layer
- tensorflow.python.module.module.Module
- tensorflow.python.training.tracking.tracking.AutoTrackable
- tensorflow.python.training.tracking.base.Trackable
- tensorflow.python.keras.utils.version_utils.LayerVersionSelector
Methods
build_detection_head
def build_detection_head(
self,
inputs
)
Build a detection head composed of a classification and box_detection.
Parameters:
Name | Description |
---|---|
inputs | A tensor of shape [batch_size, H, W, C] |
Returns:
Type | Description |
---|---|
Tuple | classification_head: a tensor of shape [batch_size, num_anchors, 2] localization_head: a tensor of shape [batch_size, num_anchors, 4] |
View Source
def build_detection_head(self, inputs):
""" Build a detection head composed of a classification and box_detection.
Arguments:
inputs: A tensor of shape [batch_size, H, W, C]
Returns:
Tuple:
classification_head: a tensor of shape [batch_size, num_anchors, 2]
localization_head: a tensor of shape [batch_size, num_anchors, 4]
"""
classification_head = self._conv_classification_head(inputs)
box_prediction_head = self._conv_box_prediction_head(inputs)
return classification_head, box_prediction_head
build_rpn_head
def build_rpn_head(
self,
inputs
)
Predictions for the classification and the regression
Parameters:
Name | Description |
---|---|
inputs | A tensor of shape [batch_size, width, height, channel] |
Returns:
Type | Description |
---|---|
Tuple | classification_head: a tensor of shape [batch_size, num_anchors, 2] localization_head: a tensor of shape [batch_size, num_anchors, 4] |
View Source
def build_rpn_head(self, inputs):
"""Predictions for the classification and the regression
Arguments:
inputs: A tensor of shape [batch_size, width, height, channel]
Returns:
Tuple:
classification_head: a tensor of shape [batch_size, num_anchors, 2]
localization_head: a tensor of shape [batch_size, num_anchors, 4]
"""
batch_size = tf.shape(inputs)[0]
rpn_conv2d = self.rpn_conv2d(inputs)
classification_head, localization_head = self.build_detection_head(rpn_conv2d)
classification_head = tf.reshape(classification_head, (batch_size, -1, 2))
localization_head = tf.reshape(localization_head, (batch_size, -1, 4))
return classification_head, localization_head
build_segmentation_head
def build_segmentation_head(
self,
inputs
)
Build the detection head
Parameters:
Name | Description |
---|---|
inputs | A tensor of float and shape [N, H, W, C] |
Returns:
Type | Description |
---|---|
tf.Tensor | A tensor and shape [N, H2, W2, num_classes - 1] |
View Source
def build_segmentation_head(self, inputs):
"""Build the detection head
Arguments:
inputs: A tensor of float and shape [N, H, W, C]
Returns:
tf.Tensor: A tensor and shape [N, H*2, W*2, num_classes - 1]
"""
x = inputs
for layer in self._segmentation_layers:
x = layer(x)
return x
call
def call(
self,
inputs: List[tensorflow.python.framework.ops.Tensor]
)
Create the computation graph for the rpn inference
Parameters:
Name | Description |
---|---|
inputs | A List of tensors the output of the pyramid |
Returns:
Type | Description |
---|---|
Tuple | - localization_pred : A list of logits 3-D tensorof shape [batch_size, num_anchors, 4] - classification_pred : A lost of logits 3-D tensorof shape [batch_size, num_anchors, 2] - anchors : A list of tensors of shape [batch_size, num_anchors, (y_min, x_min, y_max, x_max)] |
View Source
def call(self, inputs: List[tf.Tensor]):
"""Create the computation graph for the rpn inference
Arguments:
inputs: A List of tensors the output of the pyramid
Returns:
Tuple:
- `localization_pred`: A list of logits 3-D tensor
of shape [batch_size, num_anchors, 4]
- `classification_pred`: A lost of logits 3-D tensor
of shape [batch_size, num_anchors, 2]
- `anchors`: A list of tensors of shape [batch_size, num_anchors, (y_min, x_min, y_max, x_max)]
"""
anchors = [anchors(tensor) for tensor, anchors in zip(inputs, self._anchors)]
rpn_predictions = [self.build_rpn_head(tensor) for tensor in inputs]
localization_pred = [prediction[1] for prediction in rpn_predictions]
classification_pred = [prediction[0] for prediction in rpn_predictions]
return localization_pred, classification_pred, anchors
compute_loss
def compute_loss(
self,
localization_pred,
classification_pred,
anchors,
ground_truths
)
Compute the loss
Parameters:
Name | Description |
---|---|
localization_pred | A list of tensors of shape [batch_size, num_anchors, 4]. |
classification_pred | A list of tensors of shape [batch_size, num_anchors, 2] |
anchors | A list of tensors of shape [num_anchors, (y_min, x_min, y_max, x_max)] |
ground_truths | A dict - BoxField.LABELS : A 3-D tensor of shape [batch_size, num_gt, num_classes],- BoxField.BOXES : A 3-D tensor of shape [batch_size, num_gt, (y1, x1, y2, x2)]- BoxField.LABELS : A 3-D tensor of int32 and shape [batch_size, num_gt]- BoxField.WEIGHTS : A 3-D tensor of float and shape [batch_size, num_gt]- BoxField.NUM_BOXES : A 2-D tensor of int32 and shape [batch_size, 1]which allows to remove the padding created by tf.Data. Example: if batch_size=2 and this field equal tf.constant([[2], [1]], tf.int32) then my second box has a padding of 1 |
Returns:
Type | Description |
---|---|
Tuple | - classification_loss : A scalar in tf.float32- localization_loss : A scalar in tf.float32 |
View Source
def compute_loss(self, localization_pred, classification_pred, anchors, ground_truths):
"""Compute the loss
Arguments:
localization_pred: A list of tensors of shape [batch_size, num_anchors, 4].
classification_pred: A list of tensors of shape [batch_size, num_anchors, 2]
anchors: A list of tensors of shape [num_anchors, (y_min, x_min, y_max, x_max)]
ground_truths: A dict
- `BoxField.LABELS`: A 3-D tensor of shape [batch_size, num_gt, num_classes],
- `BoxField.BOXES`: A 3-D tensor of shape [batch_size, num_gt, (y1, x1, y2, x2)]
- `BoxField.LABELS`: A 3-D tensor of int32 and shape [batch_size, num_gt]
- `BoxField.WEIGHTS`: A 3-D tensor of float and shape [batch_size, num_gt]
- `BoxField.NUM_BOXES`: A 2-D tensor of int32 and shape [batch_size, 1]
which allows to remove the padding created by tf.Data.
Example: if batch_size=2 and this field equal tf.constant([[2], [1]], tf.int32)
then my second box has a padding of 1
Returns:
Tuple:
- `classification_loss`: A scalar in tf.float32
- `localization_loss`: A scalar in tf.float32
"""
localization_pred = tf.concat(localization_pred, 1)
classification_pred = tf.concat(classification_pred, 1)
anchors = tf.concat(anchors, 0)
ground_truths = {
# We add one because the background is not counted in ground_truths[BoxField.LABELS]
BoxField.LABELS:
ground_truths[BoxField.LABELS] + 1,
BoxField.BOXES:
ground_truths[BoxField.BOXES],
BoxField.WEIGHTS:
ground_truths[BoxField.WEIGHTS],
BoxField.NUM_BOXES:
ground_truths[BoxField.NUM_BOXES]
}
# anchors are deterministic duplicate them to create a batch
anchors = tf.tile(anchors[None], (tf.shape(ground_truths[BoxField.BOXES])[0], 1, 1))
y_true, weights = self.target_assigner.assign({BoxField.BOXES: anchors}, ground_truths)
y_true[BoxField.LABELS] = tf.minimum(y_true[BoxField.LABELS], 1)
## Compute metrics
recall = compute_rpn_metrics(y_true[BoxField.LABELS], classification_pred,
weights[BoxField.LABELS])
self.add_metric(recall, name='rpn_recall', aggregation='mean')
# All the boxes which are not -1 can be sampled
labels = y_true[BoxField.LABELS] > 0
sample_idx = batch_sample_balanced_positive_negative(
weights[BoxField.LABELS],
SAMPLING_SIZE,
labels,
positive_fraction=SAMPLING_POSITIVE_RATIO,
dtype=self._compute_dtype)
weights[BoxField.LABELS] = sample_idx * weights[BoxField.LABELS]
weights[BoxField.BOXES] = sample_idx * weights[BoxField.BOXES]
y_pred = {BoxField.LABELS: classification_pred, BoxField.BOXES: localization_pred}
return self.compute_losses(y_true, y_pred, weights)
compute_losses
def compute_losses(
self,
y_true: Dict[str, tensorflow.python.framework.ops.Tensor],
y_pred: Dict[str, tensorflow.python.framework.ops.Tensor],
weights: Dict[str, tensorflow.python.framework.ops.Tensor]
) -> dict
Compute the losses of the object detection head.
Each dictionary is composed of the same keys (classification, localization, segmentation)
Parameters:
Name | Description |
---|---|
y_pred | A dict of tensors of shape [N, nb_boxes, num_output]. |
y_true | A dict of tensors of shape [N, nb_boxes, num_output]. |
weights | A dict of tensors ofshape [N, nb_boxes, num_output]. This tensor is composed of one hot vectors. |
Returns:
Type | Description |
---|---|
None | dict : A dict of different losses |
View Source
def compute_losses(self, y_true: Dict[str, tf.Tensor], y_pred: Dict[str, tf.Tensor],
weights: Dict[str, tf.Tensor]) -> dict:
"""Compute the losses of the object detection head.
Each dictionary is composed of the same keys (classification, localization, segmentation)
Arguments:
y_pred: A dict of tensors of shape [N, nb_boxes, num_output].
y_true: A dict of tensors of shape [N, nb_boxes, num_output].
weights: A dict of tensors ofshape [N, nb_boxes, num_output].
This tensor is composed of one hot vectors.
Returns:
dict : A dict of different losses
"""
def _compute_loss(loss, loss_weight, target):
losses = loss(tf.cast(y_true[target], tf.float32),
tf.cast(y_pred[target], tf.float32),
sample_weight=tf.cast(weights[target], tf.float32))
return loss_weight * tf.reduce_mean(tf.reduce_sum(losses, axis=1) / normalizer)
normalizer = tf.maximum(tf.reduce_sum(weights[BoxField.LABELS], axis=1), 1.0)
normalizer = tf.cast(normalizer, tf.float32)
classification_loss = _compute_loss(self._classification_loss,
self._classification_loss_weight, BoxField.LABELS)
self.add_metric(classification_loss,
name=f'{self.name}_classification_loss',
aggregation='mean')
localization_loss = _compute_loss(self._localization_loss, self._localization_loss_weight,
BoxField.BOXES)
self.add_metric(localization_loss,
name=f'{self.name}_localization_loss',
aggregation='mean')
self.add_loss([classification_loss, localization_loss])
if self._use_mask:
segmentation_loss = _compute_loss(self._segmentation_loss,
self._segmentation_loss_weight, BoxField.MASKS)
self.add_metric(segmentation_loss,
name=f'{self.name}_segmentation_loss',
aggregation='mean')
self.add_loss(segmentation_loss)
return {
BoxField.LABELS: classification_loss,
BoxField.BOXES: localization_loss,
BoxField.MASKS: segmentation_loss
}
return {BoxField.LABELS: classification_loss, BoxField.BOXES: localization_loss}