Skip to content

Module kerod.core.box_ops



View Source
import tensorflow as tf

def transform_fpcoor_for_tf(boxes: tf.Tensor, tensor_shape: tuple, crop_shape: tuple) -> tf.Tensor:

    """The way tf.image.crop_and_resize works (with normalized box):

    Initial point (the value of output[0]): x0_box * (W_img - 1)

    Spacing: w_box * (W_img - 1) / (W_crop - 1)

    Use the above grid to bilinear sample.

    However, what we want is (with fpcoor box):

    Spacing: w_box / W_crop

    Initial point: x0_box + spacing/2 - 0.5

    (-0.5 because bilinear sample (in my definition) assumes floating point coordinate

        (0.0, 0.0) is the same as pixel value (0, 0))

    This function transform fpcoor boxes to a format to be used by tf.image.crop_and_resize

    This function has been taken from tensorpack:



    - *normalized_boxes*:  A Tensor of shape [N, ..., (y_min,x_min,y_max,x_max)]. These

    boxes have already been normalized in the feature space. The coordinates are not in

    the input image space.

    - *tensor_shape*: Height and width respectively

    - *crop_shape*:


    A tensor of shape [N, ..., num_boxes, (y_min, x_min, y_max, x_max)]


    y_min, x_min, y_max, x_max = tf.split(boxes, 4, axis=-1)

    spacing_h = (y_max - y_min) / tf.cast(crop_shape[0], boxes.dtype)

    spacing_w = (x_max - x_min) / tf.cast(crop_shape[1], boxes.dtype)

    tensor_shape = (tf.cast(tensor_shape[0] - 1,

                            boxes.dtype), tf.cast(tensor_shape[1] - 1, boxes.dtype))

    ny0 = (y_min + spacing_h / 2 - 0.5) / tensor_shape[0]

    nx0 = (x_min + spacing_w / 2 - 0.5) / tensor_shape[1]

    nh = spacing_h * tf.cast(crop_shape[0] - 1, boxes.dtype) / tensor_shape[0]

    nw = spacing_w * tf.cast(crop_shape[1] - 1, boxes.dtype) / tensor_shape[1]

    return tf.concat([ny0, nx0, ny0 + nh, nx0 + nw], axis=-1)

def convert_to_center_coordinates(boxes: tf.Tensor) -> tf.Tensor:

    """Convert boxes to their center coordinates

    y_min, x_min, y_max, x_max -> y_cent, x_cent, h, w


    - *boxes*: A Tensor of shape [N, ..., (y_min,x_min,y_max,x_max)]


    A tensor of shape [N, ..., num_boxes, (ycenter, xcenter, height, width)]


    y_min, x_min, y_max, x_max = tf.split(value=boxes, num_or_size_splits=4, axis=-1)

    width = x_max - x_min

    height = y_max - y_min

    ycenter = y_min + height / 2.

    xcenter = x_min + width / 2.

    return tf.concat([ycenter, xcenter, height, width], axis=-1)

def convert_to_xyxy_coordinates(boxes: tf.Tensor) -> tf.Tensor:

    """Convert boxes to their center coordinates

    y_cent, x_cent, h, w -> y_min, x_min, y_max, x_max


    - *boxes*: A Tensor of shape [N, ..., (y_cent, x_cent, h, w)]


    A tensor of shape [N, ..., num_boxes, (y_min, x_min, y_max, x_max)]


    y_cent, x_cent, h, w = tf.split(value=boxes, num_or_size_splits=4, axis=-1)

    y_min = y_cent - 0.5 * h

    x_min = x_cent - 0.5 * w

    y_max = y_cent + 0.5 * h

    x_max = x_cent + 0.5 * w

    return tf.concat([y_min, x_min, y_max, x_max], axis=-1)

def compute_area(boxes: tf.Tensor) -> tf.Tensor:

    """Compute the area of boxes.


    - *boxes*: Tensor of shape [N, ..., (y_min,x_min,y_max_,x_max)]


    A tensor of shape [N, ..., num_boxes]


    with tf.name_scope('Area'):

        y_min, x_min, y_max, x_max = tf.split(value=boxes, num_or_size_splits=4, axis=-1)

        return tf.squeeze((y_max - y_min) * (x_max - x_min), -1)

def compute_intersection(boxes1: tf.Tensor, boxes2: tf.Tensor, perm=None) -> tf.Tensor:

    """Compute pairwise intersection areas between boxes.


    - *boxes1*: Tensor of shape [N, ..., (y_min,x_min,y_max,x_max)]

    - *boxes2*: Tensor of shape [N, ..., (y_max,x_max,y_max,x_max)]


    A tensor with shape [N, M] representing pairwise intersections


    with tf.name_scope('Intersection'):

        y_min1, x_min1, y_max1, x_max1 = tf.split(value=boxes1, num_or_size_splits=4, axis=-1)

        y_min2, x_min2, y_max2, x_max2 = tf.split(value=boxes2, num_or_size_splits=4, axis=-1)

        all_pairs_min_ymax = tf.minimum(y_max1, tf.transpose(y_max2, perm=perm))

        all_pairs_max_ymin = tf.maximum(y_min1, tf.transpose(y_min2, perm=perm))

        zero = tf.convert_to_tensor(0.0, boxes1.dtype)

        intersect_heights = tf.maximum(zero, all_pairs_min_ymax - all_pairs_max_ymin)

        all_pairs_min_xmax = tf.minimum(x_max1, tf.transpose(x_max2, perm=perm))

        all_pairs_max_xmin = tf.maximum(x_min1, tf.transpose(x_min2, perm=perm))

        intersect_widths = tf.maximum(zero, all_pairs_min_xmax - all_pairs_max_xmin)

        return intersect_heights * intersect_widths

def compute_iou(boxes1: tf.Tensor, boxes2: tf.Tensor) -> tf.Tensor:

    """Computes pairwise intersection-over-union between boxes.


   The axis x correspond to boxes2 and y the boxes1:


    from kerod.core.box_ops import compute_iou

    import numpy as np

    boxes1 = np.array([[548.26666, 364.57202, 706.1333 , 524.472  ],

           [473.6    , 547.924  , 565.3333 , 635.336  ],

           [477.86664, 688.63605, 580.26666, 786.70795],

           [497.06668, 750.464  , 576.     , 857.064  ]])

    boxes2 = np.array([[474.74518, 553.37256, 565.2548 , 598.62744],

                     [448., 736., 576., 864.],

                      [464., 672., 592., 800.],

                      [560., 368., 688., 496.]


    compute_iou(boxes1, boxes2)




    <tf.Tensor: shape=(4, 4), dtype=float64, numpy=

    array([[0.        , 0.        , 0.        , 0.6490545 ],

           [0.51081317, 0.        , 0.        , 0.        ],

           [0.        , 0.23198337, 0.61294949, 0.        ],

           [0.        , 0.51356762, 0.18718853, 0.        ]])>



    - *boxes1*: A 2D Tensor of shape [N, (y_min,x_min,y_max,x_max)] or a or 3D

    Tensor of shape [batch_size, N, (y_min,x_min,y_max,x_max)]

    - *boxes2*: A 2D Tensor of shape [M, (y_min,x_min,y_max,x_max)] or a or 3D

    Tensor of shape [batch_size, M, (y_min,x_min,y_max,x_max)]    Returns:


    A tensor with shape [N, M] or [batch_size, N, M] representing pairwise iou scores.


    ValueError: If your tensor is different than 2D or 3D.


    return compute_giou(boxes1, boxes2, mode='iou')

def compute_giou(boxes1: tf.Tensor, boxes2: tf.Tensor, mode: str = "giou") -> tf.Tensor:

    """Computes pairwise general intersection-over-union between boxes following:


    - *boxes1*: A 2D Tensor of shape [N, (y_min,x_min,y_max,x_max)] or a or 3D

    Tensor of shape [batch_size, N, (y_min,x_min,y_max,x_max)]

    - *boxes2*: A 2D Tensor of shape [M, (y_min,x_min,y_max,x_max)] or a or 3D

    Tensor of shape [batch_size, M, (y_min,x_min,y_max,x_max)]

    - *mode*: You can select iou or giou.


    A tensor with shape [N, M] or [batch_size, N, M] representing pairwise iou scores.


    ValueError: If your tensor is different than 2D or 3D.


    with tf.name_scope(mode.upper()):

        if len(boxes1.shape) == 2:

            perm = None

            which_dim_expands = 0

        elif len(boxes1.shape) == 3:

            perm = (0, 2, 1)

            which_dim_expands = 1


            raise ValueError('Compute Iou is only suppoted for 2D and 3D Tensor')

        intersections = compute_intersection(boxes1, boxes2, perm=perm)

        areas1 = compute_area(boxes1)

        areas2 = compute_area(boxes2)

        unions = areas1[..., None] + tf.expand_dims(areas2, which_dim_expands) - intersections

        iou = tf.where(intersections == 0, tf.zeros_like(intersections),

                       tf.truediv(intersections, unions))

        if mode == "iou":

            return iou

        y_min1, x_min1, y_max1, x_max1 = tf.split(boxes1, 4, axis=-1)

        y_min2, x_min2, y_max2, x_max2 = tf.split(boxes2, 4, axis=-1)

        enclose_ymin = tf.minimum(y_min1, tf.transpose(y_min2, perm=perm))

        enclose_xmin = tf.minimum(x_min1, tf.transpose(x_min2, perm=perm))

        enclose_ymax = tf.maximum(y_max1, tf.transpose(y_max2, perm=perm))

        enclose_xmax = tf.maximum(x_max1, tf.transpose(x_max2, perm=perm))

        zero = tf.convert_to_tensor(0.0, boxes1.dtype)

        enclose_width = tf.maximum(zero, enclose_xmax - enclose_xmin)

        enclose_height = tf.maximum(zero, enclose_ymax - enclose_ymin)

        enclose_area = enclose_width * enclose_height

        giou = iou - tf.math.divide_no_nan((enclose_area - unions), enclose_area)

        return giou

def normalize_box_coordinates(boxes, height: int, width: int):

    """ Normalize the boxes coordinates with image shape


    - *boxes*: Tensor of shape [N, ..., (y_min,x_min,y_max,x_max)]

    - *height*: An integer

    - *width*: An integer


    y_min, x_min, y_max, x_max = tf.split(value=boxes, num_or_size_splits=4, axis=-1)

    y_min = y_min / height

    x_min = x_min / width

    y_max = y_max / height

    x_max = x_max / width

    # Won't be backpropagated to rois anyway, but to save time

    boxes = tf.stop_gradient(tf.concat([y_min, x_min, y_max, x_max], axis=-1))

    return boxes

def clip_boxes(boxes: tf.Tensor, window: tf.Tensor) -> tf.Tensor:

    """Perform a clipping according to a window on the boxes.


    - *boxes*: A tensor of shape [batch_size, num_boxes, (y_min, x_min, y_max, x_max)]

    - *window*: A tensor of shape [batch_size, (h, w)]


    A tensor of shape [batch_size, num_boxes, (y_min, x_min, y_max, x_max)]


    boxes = tf.maximum(boxes, tf.cast(0, boxes.dtype))

    m = tf.tile(tf.expand_dims(window, axis=1), [1, 1, 2])

    boxes = tf.minimum(boxes, tf.cast(m, boxes.dtype))

    return boxes

def flip_left_right(boxes: tf.Tensor) -> tf.Tensor:

    """[Taken from tensorflow models] Left-right flip the boxes.


    - *boxes*: rank 2 float32 tensor containing the bounding boxes -> [N, 4].

        Boxes are in normalized form meaning their coordinates vary

        between [0, 1]. Each row is in the form of [ymin, xmin, ymax, xmax].


    Flipped boxes.


    ymin, xmin, ymax, xmax = tf.split(value=boxes, num_or_size_splits=4, axis=1)

    flipped_xmin = tf.subtract(1.0, xmax)

    flipped_xmax = tf.subtract(1.0, xmin)

    flipped_boxes = tf.concat([ymin, flipped_xmin, ymax, flipped_xmax], 1)

    return flipped_boxes



def clip_boxes(
    boxes: tensorflow.python.framework.ops.Tensor,
    window: tensorflow.python.framework.ops.Tensor
) -> tensorflow.python.framework.ops.Tensor

Perform a clipping according to a window on the boxes.


  • boxes: A tensor of shape [batch_size, num_boxes, (y_min, x_min, y_max, x_max)]
  • window: A tensor of shape [batch_size, (h, w)]


A tensor of shape [batch_size, num_boxes, (y_min, x_min, y_max, x_max)]

View Source
def clip_boxes(boxes: tf.Tensor, window: tf.Tensor) -> tf.Tensor:

    """Perform a clipping according to a window on the boxes.


    - *boxes*: A tensor of shape [batch_size, num_boxes, (y_min, x_min, y_max, x_max)]

    - *window*: A tensor of shape [batch_size, (h, w)]


    A tensor of shape [batch_size, num_boxes, (y_min, x_min, y_max, x_max)]


    boxes = tf.maximum(boxes, tf.cast(0, boxes.dtype))

    m = tf.tile(tf.expand_dims(window, axis=1), [1, 1, 2])

    boxes = tf.minimum(boxes, tf.cast(m, boxes.dtype))

    return boxes


def compute_area(
    boxes: tensorflow.python.framework.ops.Tensor
) -> tensorflow.python.framework.ops.Tensor

Compute the area of boxes.


  • boxes: Tensor of shape [N, ..., (y_min,x_min,y_max_,x_max)]


A tensor of shape [N, ..., num_boxes]

View Source
def compute_area(boxes: tf.Tensor) -> tf.Tensor:

    """Compute the area of boxes.


    - *boxes*: Tensor of shape [N, ..., (y_min,x_min,y_max_,x_max)]


    A tensor of shape [N, ..., num_boxes]


    with tf.name_scope('Area'):

        y_min, x_min, y_max, x_max = tf.split(value=boxes, num_or_size_splits=4, axis=-1)

        return tf.squeeze((y_max - y_min) * (x_max - x_min), -1)


def compute_giou(
    boxes1: tensorflow.python.framework.ops.Tensor,
    boxes2: tensorflow.python.framework.ops.Tensor,
    mode: str = 'giou'
) -> tensorflow.python.framework.ops.Tensor

Computes pairwise general intersection-over-union between boxes following:


  • boxes1: A 2D Tensor of shape [N, (y_min,x_min,y_max,x_max)] or a or 3D Tensor of shape [batch_size, N, (y_min,x_min,y_max,x_max)]
  • boxes2: A 2D Tensor of shape [M, (y_min,x_min,y_max,x_max)] or a or 3D Tensor of shape [batch_size, M, (y_min,x_min,y_max,x_max)]
  • mode: You can select iou or giou.


A tensor with shape [N, M] or [batch_size, N, M] representing pairwise iou scores.


ValueError: If your tensor is different than 2D or 3D.

View Source
def compute_giou(boxes1: tf.Tensor, boxes2: tf.Tensor, mode: str = "giou") -> tf.Tensor:

    """Computes pairwise general intersection-over-union between boxes following:


    - *boxes1*: A 2D Tensor of shape [N, (y_min,x_min,y_max,x_max)] or a or 3D

    Tensor of shape [batch_size, N, (y_min,x_min,y_max,x_max)]

    - *boxes2*: A 2D Tensor of shape [M, (y_min,x_min,y_max,x_max)] or a or 3D

    Tensor of shape [batch_size, M, (y_min,x_min,y_max,x_max)]

    - *mode*: You can select iou or giou.


    A tensor with shape [N, M] or [batch_size, N, M] representing pairwise iou scores.


    ValueError: If your tensor is different than 2D or 3D.


    with tf.name_scope(mode.upper()):

        if len(boxes1.shape) == 2:

            perm = None

            which_dim_expands = 0

        elif len(boxes1.shape) == 3:

            perm = (0, 2, 1)

            which_dim_expands = 1


            raise ValueError('Compute Iou is only suppoted for 2D and 3D Tensor')

        intersections = compute_intersection(boxes1, boxes2, perm=perm)

        areas1 = compute_area(boxes1)

        areas2 = compute_area(boxes2)

        unions = areas1[..., None] + tf.expand_dims(areas2, which_dim_expands) - intersections

        iou = tf.where(intersections == 0, tf.zeros_like(intersections),

                       tf.truediv(intersections, unions))

        if mode == "iou":

            return iou

        y_min1, x_min1, y_max1, x_max1 = tf.split(boxes1, 4, axis=-1)

        y_min2, x_min2, y_max2, x_max2 = tf.split(boxes2, 4, axis=-1)

        enclose_ymin = tf.minimum(y_min1, tf.transpose(y_min2, perm=perm))

        enclose_xmin = tf.minimum(x_min1, tf.transpose(x_min2, perm=perm))

        enclose_ymax = tf.maximum(y_max1, tf.transpose(y_max2, perm=perm))

        enclose_xmax = tf.maximum(x_max1, tf.transpose(x_max2, perm=perm))

        zero = tf.convert_to_tensor(0.0, boxes1.dtype)

        enclose_width = tf.maximum(zero, enclose_xmax - enclose_xmin)

        enclose_height = tf.maximum(zero, enclose_ymax - enclose_ymin)

        enclose_area = enclose_width * enclose_height

        giou = iou - tf.math.divide_no_nan((enclose_area - unions), enclose_area)

        return giou


def compute_intersection(
    boxes1: tensorflow.python.framework.ops.Tensor,
    boxes2: tensorflow.python.framework.ops.Tensor,
) -> tensorflow.python.framework.ops.Tensor

Compute pairwise intersection areas between boxes.


  • boxes1: Tensor of shape [N, ..., (y_min,x_min,y_max,x_max)]
  • boxes2: Tensor of shape [N, ..., (y_max,x_max,y_max,x_max)]


A tensor with shape [N, M] representing pairwise intersections

View Source
def compute_intersection(boxes1: tf.Tensor, boxes2: tf.Tensor, perm=None) -> tf.Tensor:

    """Compute pairwise intersection areas between boxes.


    - *boxes1*: Tensor of shape [N, ..., (y_min,x_min,y_max,x_max)]

    - *boxes2*: Tensor of shape [N, ..., (y_max,x_max,y_max,x_max)]


    A tensor with shape [N, M] representing pairwise intersections


    with tf.name_scope('Intersection'):

        y_min1, x_min1, y_max1, x_max1 = tf.split(value=boxes1, num_or_size_splits=4, axis=-1)

        y_min2, x_min2, y_max2, x_max2 = tf.split(value=boxes2, num_or_size_splits=4, axis=-1)

        all_pairs_min_ymax = tf.minimum(y_max1, tf.transpose(y_max2, perm=perm))

        all_pairs_max_ymin = tf.maximum(y_min1, tf.transpose(y_min2, perm=perm))

        zero = tf.convert_to_tensor(0.0, boxes1.dtype)

        intersect_heights = tf.maximum(zero, all_pairs_min_ymax - all_pairs_max_ymin)

        all_pairs_min_xmax = tf.minimum(x_max1, tf.transpose(x_max2, perm=perm))

        all_pairs_max_xmin = tf.maximum(x_min1, tf.transpose(x_min2, perm=perm))

        intersect_widths = tf.maximum(zero, all_pairs_min_xmax - all_pairs_max_xmin)

        return intersect_heights * intersect_widths


def compute_iou(
    boxes1: tensorflow.python.framework.ops.Tensor,
    boxes2: tensorflow.python.framework.ops.Tensor
) -> tensorflow.python.framework.ops.Tensor

Computes pairwise intersection-over-union between boxes.


The axis x correspond to boxes2 and y the boxes1:

```python from kerod.core.box_ops import compute_iou import numpy as np

boxes1 = np.array([[548.26666, 364.57202, 706.1333 , 524.472 ], [473.6 , 547.924 , 565.3333 , 635.336 ], [477.86664, 688.63605, 580.26666, 786.70795], [497.06668, 750.464 , 576. , 857.064 ]])

boxes2 = np.array([[474.74518, 553.37256, 565.2548 , 598.62744], [448., 736., 576., 864.], [464., 672., 592., 800.], [560., 368., 688., 496.] ]) compute_iou(boxes1, boxes2) ```


<tf.Tensor: shape=(4, 4), dtype=float64, numpy= array([[0. , 0. , 0. , 0.6490545 ], [0.51081317, 0. , 0. , 0. ], [0. , 0.23198337, 0.61294949, 0. ], [0. , 0.51356762, 0.18718853, 0. ]])>


  • boxes1: A 2D Tensor of shape [N, (y_min,x_min,y_max,x_max)] or a or 3D Tensor of shape [batch_size, N, (y_min,x_min,y_max,x_max)]
  • boxes2: A 2D Tensor of shape [M, (y_min,x_min,y_max,x_max)] or a or 3D Tensor of shape [batch_size, M, (y_min,x_min,y_max,x_max)] Returns:


A tensor with shape [N, M] or [batch_size, N, M] representing pairwise iou scores.


ValueError: If your tensor is different than 2D or 3D.

View Source
def compute_iou(boxes1: tf.Tensor, boxes2: tf.Tensor) -> tf.Tensor:

    """Computes pairwise intersection-over-union between boxes.


   The axis x correspond to boxes2 and y the boxes1:


    from kerod.core.box_ops import compute_iou

    import numpy as np

    boxes1 = np.array([[548.26666, 364.57202, 706.1333 , 524.472  ],

           [473.6    , 547.924  , 565.3333 , 635.336  ],

           [477.86664, 688.63605, 580.26666, 786.70795],

           [497.06668, 750.464  , 576.     , 857.064  ]])

    boxes2 = np.array([[474.74518, 553.37256, 565.2548 , 598.62744],

                     [448., 736., 576., 864.],

                      [464., 672., 592., 800.],

                      [560., 368., 688., 496.]


    compute_iou(boxes1, boxes2)




    <tf.Tensor: shape=(4, 4), dtype=float64, numpy=

    array([[0.        , 0.        , 0.        , 0.6490545 ],

           [0.51081317, 0.        , 0.        , 0.        ],

           [0.        , 0.23198337, 0.61294949, 0.        ],

           [0.        , 0.51356762, 0.18718853, 0.        ]])>



    - *boxes1*: A 2D Tensor of shape [N, (y_min,x_min,y_max,x_max)] or a or 3D

    Tensor of shape [batch_size, N, (y_min,x_min,y_max,x_max)]

    - *boxes2*: A 2D Tensor of shape [M, (y_min,x_min,y_max,x_max)] or a or 3D

    Tensor of shape [batch_size, M, (y_min,x_min,y_max,x_max)]    Returns:


    A tensor with shape [N, M] or [batch_size, N, M] representing pairwise iou scores.


    ValueError: If your tensor is different than 2D or 3D.


    return compute_giou(boxes1, boxes2, mode='iou')


def convert_to_center_coordinates(
    boxes: tensorflow.python.framework.ops.Tensor
) -> tensorflow.python.framework.ops.Tensor

Convert boxes to their center coordinates

y_min, x_min, y_max, x_max -> y_cent, x_cent, h, w


  • boxes: A Tensor of shape [N, ..., (y_min,x_min,y_max,x_max)]


A tensor of shape [N, ..., num_boxes, (ycenter, xcenter, height, width)]

View Source
def convert_to_center_coordinates(boxes: tf.Tensor) -> tf.Tensor:

    """Convert boxes to their center coordinates

    y_min, x_min, y_max, x_max -> y_cent, x_cent, h, w


    - *boxes*: A Tensor of shape [N, ..., (y_min,x_min,y_max,x_max)]


    A tensor of shape [N, ..., num_boxes, (ycenter, xcenter, height, width)]


    y_min, x_min, y_max, x_max = tf.split(value=boxes, num_or_size_splits=4, axis=-1)

    width = x_max - x_min

    height = y_max - y_min

    ycenter = y_min + height / 2.

    xcenter = x_min + width / 2.

    return tf.concat([ycenter, xcenter, height, width], axis=-1)


def convert_to_xyxy_coordinates(
    boxes: tensorflow.python.framework.ops.Tensor
) -> tensorflow.python.framework.ops.Tensor

Convert boxes to their center coordinates

y_cent, x_cent, h, w -> y_min, x_min, y_max, x_max


  • boxes: A Tensor of shape [N, ..., (y_cent, x_cent, h, w)]


A tensor of shape [N, ..., num_boxes, (y_min, x_min, y_max, x_max)]

View Source
def convert_to_xyxy_coordinates(boxes: tf.Tensor) -> tf.Tensor:

    """Convert boxes to their center coordinates

    y_cent, x_cent, h, w -> y_min, x_min, y_max, x_max


    - *boxes*: A Tensor of shape [N, ..., (y_cent, x_cent, h, w)]


    A tensor of shape [N, ..., num_boxes, (y_min, x_min, y_max, x_max)]


    y_cent, x_cent, h, w = tf.split(value=boxes, num_or_size_splits=4, axis=-1)

    y_min = y_cent - 0.5 * h

    x_min = x_cent - 0.5 * w

    y_max = y_cent + 0.5 * h

    x_max = x_cent + 0.5 * w

    return tf.concat([y_min, x_min, y_max, x_max], axis=-1)


def flip_left_right(
    boxes: tensorflow.python.framework.ops.Tensor
) -> tensorflow.python.framework.ops.Tensor

[Taken from tensorflow models] Left-right flip the boxes.


  • boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4]. Boxes are in normalized form meaning their coordinates vary between [0, 1]. Each row is in the form of [ymin, xmin, ymax, xmax].


Flipped boxes.

View Source
def flip_left_right(boxes: tf.Tensor) -> tf.Tensor:

    """[Taken from tensorflow models] Left-right flip the boxes.


    - *boxes*: rank 2 float32 tensor containing the bounding boxes -> [N, 4].

        Boxes are in normalized form meaning their coordinates vary

        between [0, 1]. Each row is in the form of [ymin, xmin, ymax, xmax].


    Flipped boxes.


    ymin, xmin, ymax, xmax = tf.split(value=boxes, num_or_size_splits=4, axis=1)

    flipped_xmin = tf.subtract(1.0, xmax)

    flipped_xmax = tf.subtract(1.0, xmin)

    flipped_boxes = tf.concat([ymin, flipped_xmin, ymax, flipped_xmax], 1)

    return flipped_boxes


def normalize_box_coordinates(
    height: int,
    width: int

Normalize the boxes coordinates with image shape


  • boxes: Tensor of shape [N, ..., (y_min,x_min,y_max,x_max)]
  • height: An integer
  • width: An integer
View Source
def normalize_box_coordinates(boxes, height: int, width: int):

    """ Normalize the boxes coordinates with image shape


    - *boxes*: Tensor of shape [N, ..., (y_min,x_min,y_max,x_max)]

    - *height*: An integer

    - *width*: An integer


    y_min, x_min, y_max, x_max = tf.split(value=boxes, num_or_size_splits=4, axis=-1)

    y_min = y_min / height

    x_min = x_min / width

    y_max = y_max / height

    x_max = x_max / width

    # Won't be backpropagated to rois anyway, but to save time

    boxes = tf.stop_gradient(tf.concat([y_min, x_min, y_max, x_max], axis=-1))

    return boxes


def transform_fpcoor_for_tf(
    boxes: tensorflow.python.framework.ops.Tensor,
    tensor_shape: tuple,
    crop_shape: tuple
) -> tensorflow.python.framework.ops.Tensor

The way tf.image.crop_and_resize works (with normalized box):

Initial point (the value of output[0]): x0_box * (W_img - 1) Spacing: w_box * (W_img - 1) / (W_crop - 1) Use the above grid to bilinear sample.

However, what we want is (with fpcoor box): Spacing: w_box / W_crop Initial point: x0_box + spacing/2 - 0.5 (-0.5 because bilinear sample (in my definition) assumes floating point coordinate (0.0, 0.0) is the same as pixel value (0, 0))

This function transform fpcoor boxes to a format to be used by tf.image.crop_and_resize This function has been taken from tensorpack: (


  • normalized_boxes: A Tensor of shape [N, ..., (y_min,x_min,y_max,x_max)]. These boxes have already been normalized in the feature space. The coordinates are not in the input image space.
  • tensor_shape: Height and width respectively
  • crop_shape:


A tensor of shape [N, ..., num_boxes, (y_min, x_min, y_max, x_max)]

View Source
def transform_fpcoor_for_tf(boxes: tf.Tensor, tensor_shape: tuple, crop_shape: tuple) -> tf.Tensor:

    """The way tf.image.crop_and_resize works (with normalized box):

    Initial point (the value of output[0]): x0_box * (W_img - 1)

    Spacing: w_box * (W_img - 1) / (W_crop - 1)

    Use the above grid to bilinear sample.

    However, what we want is (with fpcoor box):

    Spacing: w_box / W_crop

    Initial point: x0_box + spacing/2 - 0.5

    (-0.5 because bilinear sample (in my definition) assumes floating point coordinate

        (0.0, 0.0) is the same as pixel value (0, 0))

    This function transform fpcoor boxes to a format to be used by tf.image.crop_and_resize

    This function has been taken from tensorpack:



    - *normalized_boxes*:  A Tensor of shape [N, ..., (y_min,x_min,y_max,x_max)]. These

    boxes have already been normalized in the feature space. The coordinates are not in

    the input image space.

    - *tensor_shape*: Height and width respectively

    - *crop_shape*:


    A tensor of shape [N, ..., num_boxes, (y_min, x_min, y_max, x_max)]


    y_min, x_min, y_max, x_max = tf.split(boxes, 4, axis=-1)

    spacing_h = (y_max - y_min) / tf.cast(crop_shape[0], boxes.dtype)

    spacing_w = (x_max - x_min) / tf.cast(crop_shape[1], boxes.dtype)

    tensor_shape = (tf.cast(tensor_shape[0] - 1,

                            boxes.dtype), tf.cast(tensor_shape[1] - 1, boxes.dtype))

    ny0 = (y_min + spacing_h / 2 - 0.5) / tensor_shape[0]

    nx0 = (x_min + spacing_w / 2 - 0.5) / tensor_shape[1]

    nh = spacing_h * tf.cast(crop_shape[0] - 1, boxes.dtype) / tensor_shape[0]

    nw = spacing_w * tf.cast(crop_shape[1] - 1, boxes.dtype) / tensor_shape[1]

    return tf.concat([ny0, nx0, ny0 + nh, nx0 + nw], axis=-1)