From a62d21c1ce16b3f64ce56b86a61b5010b67094ef Mon Sep 17 00:00:00 2001
From: Can Zhao <canz@nvidia.com>
Date: Wed, 11 May 2022 00:16:34 -0400
Subject: [PATCH 01/49] add box_utils and unit tests

Signed-off-by: Can Zhao <canz@nvidia.com>
---
 monai/data/box_utils.py | 952 ++++++++++++++++++++++++++++++++++++++++
 tests/test_box_utils.py | 220 ++++++++++
 2 files changed, 1172 insertions(+)
 create mode 100644 monai/data/box_utils.py
 create mode 100644 tests/test_box_utils.py

diff --git a/monai/data/box_utils.py b/monai/data/box_utils.py
new file mode 100644
index 0000000000..f2e4d5f944
--- /dev/null
+++ b/monai/data/box_utils.py
@@ -0,0 +1,952 @@
+# Copyright (c) MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from copy import deepcopy
+from typing import Sequence, Union
+
+import numpy as np
+import torch
+
+import monai
+from monai.config.type_definitions import NdarrayOrTensor
+from monai.utils.module import look_up_option
+from monai.utils.type_conversion import convert_to_numpy, convert_to_tensor
+
+CORNER_CORNER_MODE = ["xyxy", "xyzxyz"]  # [2d_mode, 3d_mode]
+XXYYZZ_MODE = ["xxyy", "xxyyzz"]  # [2d_mode, 3d_mode]
+CORNER_SIZE_MODE = ["xywh", "xyzwhd"]  # [2d_mode, 3d_mode]
+CENTER_SIZE_MODE = ["ccwh", "cccwhd"]  # [2d_mode, 3d_mode]
+
+STANDARD_MODE = CORNER_CORNER_MODE  # [2d_mode, 3d_mode]
+SUPPORT_MODE = CORNER_CORNER_MODE + XXYYZZ_MODE + CORNER_SIZE_MODE + CENTER_SIZE_MODE
+
+# TO_REMOVE = 0 if in 'xxyy','xxyyzz' mode, the bottom-right corner is not included in the box,
+#      i.e., when x_min=1, x_max=2, we have w = 1
+# TO_REMOVE = 1  if in 'xxyy','xxyyzz' mode, the bottom-right corner is included in the box,
+#       i.e., when x_min=1, x_max=2, we have w = 2
+# Currently only TO_REMOVE = 0 has been tested. Please use TO_REMOVE = 0
+TO_REMOVE = 0  # x_max-x_min = w -TO_REMOVE.
+
+
+def convert_to_list(in_sequence: Union[Sequence, torch.Tensor, np.ndarray]) -> list:
+    """
+    Convert a torch.Tensor, or np array input to list
+    Args:
+        in_sequence: Sequence or torch.Tensor or np.ndarray
+    Returns:
+        in_sequence_list: a list
+
+    """
+    in_sequence_list = deepcopy(in_sequence)
+    if torch.is_tensor(in_sequence):
+        in_sequence_list = in_sequence_list.detach().cpu().numpy().tolist()
+    elif isinstance(in_sequence, np.ndarray):
+        in_sequence_list = in_sequence_list.tolist()
+    elif not isinstance(in_sequence, list):
+        in_sequence_list = list(in_sequence_list)
+    return in_sequence_list
+
+
+def get_dimension(
+    bbox: Union[torch.Tensor, np.ndarray, None] = None,
+    image_size: Union[Sequence[int], torch.Tensor, np.ndarray, None] = None,
+    mode: Union[str, None] = None,
+) -> int:
+    """
+    Get spatial dimension for the giving setting.
+    Missing input is allowed. But at least one of the input value should be given.
+    Args:
+        bbox: bounding box, Nx4 or Nx6 torch tensor
+        image_size: Length of 2 or 3. Data format is list, or np.ndarray, or tensor of int
+        mode: box mode, choose from SUPPORT_MODE. If mode is not given, this func will assume mode is STANDARD_MODE
+    Returns:
+        spatial_dimension: 2 or 3
+    """
+    spatial_dims = set()
+    if image_size is not None:
+        spatial_dims.add(len(image_size))
+    if mode is not None:
+        spatial_dims.add(len(mode) / 2)
+    if bbox is not None:
+        spatial_dims.add(int(bbox.shape[1] / 2))
+    spatial_dims = list(spatial_dims)
+    if len(spatial_dims) == 0:
+        raise ValueError("At least one of bbox, image_size, and mode needs to be non-empty.")
+    elif len(spatial_dims) == 1:
+        spatial_dims = int(spatial_dims[0])
+        spatial_dims = look_up_option(spatial_dims, supported=[2, 3])
+        return int(spatial_dims)
+    else:
+        raise ValueError("The dimension of bbox, image_size, mode should match with each other.")
+
+
+def get_standard_mode(spatial_dims: int) -> str:
+    """
+    Get the mode name for the given spatial dimension
+    Args:
+        spatial_dims: 2 or 3
+    Returns:
+        mode name, choose from STANDARD_MODE
+
+    """
+    if spatial_dims == 2:
+        return STANDARD_MODE[0]
+    elif spatial_dims == 3:
+        return STANDARD_MODE[1]
+    else:
+        raise ValueError(f"Images should have 2 or 3 dimensions, got {spatial_dims}")
+
+
+def point_interp(
+    point: NdarrayOrTensor, zoom: Union[Sequence[float], float]
+) -> Union[Sequence, torch.Tensor, np.ndarray]:
+    """
+    Convert point position from one pixel/voxel size to another pixel/voxel size
+    Args:
+        point: point coordinate, Nx2 or Nx3, [x, y] or [x, y, z]
+        zoom: The zoom factor along the spatial axes.
+            If a float, zoom is the same for each spatial axis.
+            If a sequence, zoom should contain one value for each spatial axis.
+    Returns:
+        point2: zoomed point coordinate, does not share memory with original point
+    """
+    # make sure the spatial dimensions of the inputs match with each other
+    spatial_dims = point.shape[1]
+    spatial_dims = look_up_option(spatial_dims, supported=[2, 3])
+
+    # compute new point
+    point2 = deepcopy(point)
+    _zoom = monai.utils.misc.ensure_tuple_rep(zoom, spatial_dims)
+    for axis in range(0, spatial_dims):
+        point2[:, axis] = point[:, axis] * _zoom[axis]
+    return point2
+
+
+def box_interp(
+    bbox: NdarrayOrTensor, zoom: Union[Sequence[float], float], mode: Union[str, None] = None
+) -> torch.Tensor:
+    """
+    Interpolate bbox
+    Args:
+        bbox: bounding box, Nx4 or Nx6 torch tensor
+        mode: box mode, choose from SUPPORT_MODE. If mode is not given, this func will assume mode is STANDARD_MODE
+        zoom: The zoom factor along the spatial axes.
+            If a float, zoom is the same for each spatial axis.
+            If a sequence, zoom should contain one value for each spatial axis.
+
+    Returns:
+        returned interpolated bbox has the same mode as bbox, does not share memory with original bbox
+    """
+    if mode is None:
+        mode = get_standard_mode(int(bbox.shape[1] / 2))
+    mode = look_up_option(mode, supported=SUPPORT_MODE)
+    spatial_dims = get_dimension(bbox=bbox, mode=mode)
+
+    # convert to standard mode
+    mode_standard = get_standard_mode(spatial_dims)
+    bbox_standard = box_convert_mode(bbox1=bbox, mode1=mode, mode2=mode_standard)
+
+    # interp
+    corner_lt = point_interp(bbox_standard[:, :spatial_dims], zoom)
+    corner_rb = point_interp(bbox_standard[:, spatial_dims:], zoom)
+
+    bbox_standard_interp = deepcopy(bbox_standard)
+    bbox_standard_interp[:, :spatial_dims] = corner_lt
+    bbox_standard_interp[:, spatial_dims:] = corner_rb
+
+    # convert back
+    bbox2 = box_convert_mode(bbox1=bbox_standard_interp, mode1=mode_standard, mode2=mode)
+    return bbox2
+
+
+def split_into_corners(bbox: NdarrayOrTensor, mode: Union[str, None] = None):
+    """
+    This internal function outputs the corner coordinates of the bbox
+    Args:
+        bbox: bounding box, Nx4 or Nx6 torch tensor
+        mode: box mode, choose from SUPPORT_MODE. If mode is not given, this func will assume mode is STANDARD_MODE
+    Returns:
+        if 2D image, outputs (xmin, xmax, ymin, ymax)
+        if 3D images, outputs (xmin, xmax, ymin, ymax, zmin, zmax)
+        xmin for example, is a Nx1 tensor
+
+    """
+    # convert numpy to tensor if needed
+    if isinstance(bbox, np.ndarray):
+        bbox = convert_to_tensor(bbox)
+        numpy_bool = True
+    else:
+        numpy_bool = False
+
+    # convert to float32 when computing torch.clamp, which does not support float16
+    box_dtype = bbox.dtype
+    compute_dtype = torch.float32
+
+    if mode is None:
+        mode = get_standard_mode(int(bbox.shape[1] / 2))
+    mode = look_up_option(mode, supported=SUPPORT_MODE)
+
+    # split tensor into corners
+    if mode in ["xxyy", "xxyyzz"]:
+        split_result = bbox.split(1, dim=-1)
+    elif mode == "xyzxyz":
+        xmin, ymin, zmin, xmax, ymax, zmax = bbox.split(1, dim=-1)
+        split_result = (
+            xmin,
+            xmax,
+            ymin,
+            ymax,
+            zmin,
+            zmax,
+        )
+    elif mode == "xyxy":
+        xmin, ymin, xmax, ymax = bbox.split(1, dim=-1)
+        split_result = (xmin, xmax, ymin, ymax)
+    elif mode == "xyzwhd":
+        xmin, ymin, zmin, w, h, d = bbox.split(1, dim=-1)
+        split_result = (
+            xmin,
+            xmin + (w - TO_REMOVE).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype),
+            ymin,
+            ymin + (h - TO_REMOVE).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype),
+            zmin,
+            zmin + (d - TO_REMOVE).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype),
+        )
+    elif mode == "xywh":
+        xmin, ymin, w, h = bbox.split(1, dim=-1)
+        split_result = (xmin, xmin + (w - TO_REMOVE).clamp(min=0), ymin, ymin + (h - TO_REMOVE).clamp(min=0))
+    elif mode == "cccwhd":
+        xc, yc, zc, w, h, d = bbox.split(1, dim=-1)
+        split_result = (
+            xc - ((w - TO_REMOVE) / 2.0).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype),
+            xc + ((w - TO_REMOVE) / 2.0).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype),
+            yc - ((h - TO_REMOVE) / 2.0).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype),
+            yc + ((h - TO_REMOVE) / 2.0).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype),
+            zc - ((d - TO_REMOVE) / 2.0).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype),
+            zc + ((d - TO_REMOVE) / 2.0).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype),
+        )
+    elif mode == "ccwh":
+        xc, yc, w, h = bbox.split(1, dim=-1)
+        split_result = (
+            xc - ((w - TO_REMOVE) / 2.0).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype),
+            xc + ((w - TO_REMOVE) / 2.0).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype),
+            yc - ((h - TO_REMOVE) / 2.0).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype),
+            yc + ((h - TO_REMOVE) / 2.0).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype),
+        )
+    else:
+        raise RuntimeError("Should not be here")
+
+    # convert tensor back to numpy if needed
+    if numpy_bool:
+        split_result = convert_to_numpy(split_result)
+    return split_result
+
+
+def box_convert_mode(
+    bbox1: NdarrayOrTensor, mode1: Union[str, None] = None, mode2: Union[str, None] = None
+) -> NdarrayOrTensor:
+    """
+    This function converts the bbox1 in mode 1 to the mode2
+    Args:
+        bbox1: source bounding box, Nx4 or Nx6 torch tensor
+        mode1: source box mode, choose from SUPPORT_MODE. If mode is not given, this func will assume mode is STANDARD_MODE
+        mode2: target box mode, choose from SUPPORT_MODE. If mode is not given, this func will assume mode is STANDARD_MODE
+    Returns:
+        bbox2: bounding box with target mode, does not share memory with original bbox1
+    """
+
+    # convert numpy to tensor if needed
+    if isinstance(bbox1, np.ndarray):
+        bbox1 = convert_to_tensor(bbox1)
+        numpy_bool = True
+    else:
+        numpy_bool = False
+
+    # check whether the bbox and the new mode is valid
+    if mode1 is None:
+        mode1 = get_standard_mode(int(bbox1.shape[1] / 2))
+    if mode2 is None:
+        mode2 = get_standard_mode(int(bbox1.shape[1] / 2))
+    mode1 = look_up_option(mode1, supported=SUPPORT_MODE)
+    mode2 = look_up_option(mode2, supported=SUPPORT_MODE)
+
+    spatial_dims = get_dimension(bbox=bbox1, mode=mode1)
+    if len(mode1) != len(mode2):
+        raise ValueError("The dimension of the new mode should have the same spatial dimension as the old mode.")
+
+    # if mode not changed, return original box
+    if mode1 == mode2:
+        bbox2 = deepcopy(bbox1)
+    # convert mode for bbox
+    elif mode2 in ["xxyy", "xxyyzz"]:
+        corners = split_into_corners(bbox1, mode1)
+        bbox2 = torch.cat(corners, dim=-1)
+    else:
+        if spatial_dims == 3:
+            xmin, xmax, ymin, ymax, zmin, zmax = split_into_corners(bbox1, mode1)
+            if mode2 == "xyzxyz":
+                bbox2 = torch.cat((xmin, ymin, zmin, xmax, ymax, zmax), dim=-1)
+            elif mode2 == "xyzwhd":
+                bbox2 = torch.cat(
+                    (xmin, ymin, zmin, xmax - xmin + TO_REMOVE, ymax - ymin + TO_REMOVE, zmax - zmin + TO_REMOVE),
+                    dim=-1,
+                )
+            elif mode2 == "cccwhd":
+                bbox2 = torch.cat(
+                    (
+                        (xmin + xmax + TO_REMOVE) / 2,
+                        (ymin + ymax + TO_REMOVE) / 2,
+                        (zmin + zmax + TO_REMOVE) / 2,
+                        xmax - xmin + TO_REMOVE,
+                        ymax - ymin + TO_REMOVE,
+                        zmax - zmin + TO_REMOVE,
+                    ),
+                    dim=-1,
+                )
+            else:
+                raise ValueError("We support only bbox mode in " + str(SUPPORT_MODE) + f", got {mode2}")
+        elif spatial_dims == 2:
+            xmin, xmax, ymin, ymax = split_into_corners(bbox1.clone(), mode1)
+            if mode2 == "xyxy":
+                bbox2 = torch.cat((xmin, ymin, xmax, ymax), dim=-1)
+            elif mode2 == "xywh":
+                bbox2 = torch.cat((xmin, ymin, xmax - xmin + TO_REMOVE, ymax - ymin + TO_REMOVE), dim=-1)
+            elif mode2 == "ccwh":
+                bbox2 = torch.cat(
+                    (
+                        (xmin + xmax + TO_REMOVE) / 2,
+                        (ymin + ymax + TO_REMOVE) / 2,
+                        xmax - xmin + TO_REMOVE,
+                        ymax - ymin + TO_REMOVE,
+                    ),
+                    dim=-1,
+                )
+            else:
+                raise ValueError("We support only bbox mode in " + str(SUPPORT_MODE) + f", got {mode2}")
+        else:
+            raise ValueError(f"Images should have 2 or 3 dimensions, got {spatial_dims}")
+
+    # convert tensor back to numpy if needed
+    if numpy_bool:
+        bbox2 = convert_to_numpy(bbox2)
+
+    return bbox2
+
+
+def box_convert_standard_mode(bbox: NdarrayOrTensor, mode: Union[str, None] = None) -> NdarrayOrTensor:
+    """
+    Convert given bbox to standard mode
+    Args:
+        bbox: source bounding box, Nx4 or Nx6 torch tensor
+        mode: source box mode, choose from SUPPORT_MODE. If mode is not given, this func will assume mode is STANDARD_MODE
+    This function convert the bbox in mode 1 to 'xyxy' or 'xyzxyz'
+    """
+    if mode is None:
+        mode = get_standard_mode(int(bbox.shape[1] / 2))
+    mode = look_up_option(mode, supported=SUPPORT_MODE)
+    spatial_dims = get_dimension(bbox=bbox, mode=mode)
+    mode_standard = get_standard_mode(spatial_dims)
+    return box_convert_mode(bbox1=bbox, mode1=mode, mode2=mode_standard)
+
+
+def point_affine(
+    point: NdarrayOrTensor, affine: NdarrayOrTensor, include_shift: bool = True
+) -> Union[Sequence, torch.Tensor, np.ndarray]:
+    """
+    Convert point position from one pixel/voxel size to another pixel/voxel size
+    Args:
+        point: point coordinate, Nx2 or Nx3, [x, y] or [x, y, z]
+        affine: affine transform
+        include_shift: does the func apply translation (shift) in the affine transform
+    Returns:
+        point2: transformed point coordinate, does not share memory with original point
+    """
+    # make sure the spatial dimensions of the inputs match with each other
+    spatial_dims = point.shape[1]
+    spatial_dims = look_up_option(spatial_dims, supported=[2, 3])
+
+    # convert numpy to tensor if needed
+    if isinstance(point, np.ndarray):
+        point = convert_to_tensor(point)
+        numpy_bool = True
+    else:
+        numpy_bool = False
+    affine = convert_to_tensor(affine, device=point.device, dtype=point.dtype)
+
+    # compute new point
+    if include_shift:
+        # append 1 to form Nx(spatial_dims+1) vector, then transpose
+        point2 = torch.cat(
+            [point, torch.ones(point.shape[0], 1, device=point.device, dtype=point.dtype)], dim=1
+        ).transpose(0, 1)
+        # apply affine
+        point2 = torch.matmul(affine, point2)
+        # remove appended 1 and transpose back
+        point2 = point2[:spatial_dims, :].transpose(0, 1)
+    else:
+        point2 = point.transpose(0, 1)
+        point2 = torch.matmul(affine[:spatial_dims, :spatial_dims], point2)
+        point2 = point2.transpose(0, 1)
+
+    # convert tensor back to numpy if needed
+    if numpy_bool:
+        point2 = convert_to_numpy(point2)
+    return point2
+
+
+def box_affine(bbox: NdarrayOrTensor, affine: torch.Tensor, mode: Union[str, None] = None) -> torch.Tensor:
+    """
+    This function applys affine matrixs to the bbox
+    Args:
+        bbox: bounding box, Nx4 or Nx6 torch tensor
+        affine: affine matric to be applied to the box coordinate, (spatial_dims+1)x(spatial_dims+1)
+        mode: box mode, choose from SUPPORT_MODE. If mode is not given, this func will assume mode is STANDARD_MODE
+    Returns:
+        returned affine transformed bbox has the same mode as bbox, does not share memory with original bbox
+    """
+    # convert numpy to tensor if needed
+    if isinstance(bbox, np.ndarray):
+        bbox = convert_to_tensor(bbox)
+        numpy_bool = True
+    else:
+        numpy_bool = False
+    affine = convert_to_tensor(affine, device=bbox.device, dtype=bbox.dtype)
+
+    if mode is None:
+        mode = get_standard_mode(int(bbox.shape[1] / 2))
+    mode = look_up_option(mode, supported=SUPPORT_MODE)
+    spatial_dims = get_dimension(bbox=bbox, mode=mode)
+
+    if mode in ["xxyy", "xxyyzz", "xyxy", "xyzxyz"]:
+        # extract left top and right bottom, and apply affine
+        if mode in ["xxyy", "xxyyzz"]:
+            lt = point_affine(bbox[:, ::2], affine, include_shift=True)
+            rb = point_affine(bbox[:, 1::2], affine, include_shift=True)
+        if mode in ["xyxy", "xyzxyz"]:
+            lt = point_affine(bbox[:, :spatial_dims], affine, include_shift=True)
+            rb = point_affine(bbox[:, spatial_dims:], affine, include_shift=True)
+
+        lt_new, _ = torch.min(torch.stack([lt, rb], dim=2), dim=2)
+        rb_new, _ = torch.max(torch.stack([lt, rb], dim=2), dim=2)
+
+        bbox2 = box_convert_mode(torch.cat([lt_new, rb_new], dim=1), mode1=STANDARD_MODE[spatial_dims - 2], mode2=mode)
+
+    elif mode in ["ccwh", "cccwhd", "xywh", "xyzwhd"]:
+        cc = point_affine(bbox[:, :spatial_dims], affine, include_shift=True)
+        wh = point_affine(bbox[:, spatial_dims:], affine, include_shift=False).absolute()
+        bbox2 = torch.cat([cc, wh], dim=1)
+
+    else:
+        raise RuntimeError("Should not be here")
+
+    # convert tensor back to numpy if needed
+    if numpy_bool:
+        bbox2 = convert_to_numpy(bbox2)
+    return bbox2
+
+
+def box_clip_to_patch(
+    bbox: NdarrayOrTensor,
+    patch_box: Union[Sequence[int], torch.Tensor, np.ndarray],
+    remove_empty: bool = True,
+):
+    """
+    This function makes sure the bounding boxes are within the patch.
+    Args:
+        bbox: bounding box, Nx4 or Nx6 torch tensor. The box mode is assumed to be STANDARD_MODE
+        patch_box: The coordinate of the target patch to clip, it follows standard mode
+        remove_empty: whether to remove the boxes that are actually empty
+    Returns:
+        new_bbox: updated box, does not share memory with original bbox
+        keep: the indice of the new_bbox regarding to input bbox. When remove_empty=True, only some of the boxes are kept
+    """
+    if bbox.shape[0] == 0:
+        return deepcopy(bbox), []
+
+    spatial_dims = get_dimension(bbox=bbox)
+    new_bbox = deepcopy(bbox)
+
+    # convert numpy to tensor if needed
+    if isinstance(new_bbox, np.ndarray):
+        new_bbox = convert_to_tensor(new_bbox)
+        numpy_bool = True
+    else:
+        numpy_bool = False
+
+    # convert to float32 since torch.clamp_ does not support float16
+    box_dtype = new_bbox.dtype
+    compute_dtype = torch.float32
+    if box_dtype is torch.float16:
+        new_bbox = new_bbox.to(dtype=compute_dtype)
+
+    # makes sure the bounding boxes are within the image
+    for axis in range(0, spatial_dims):
+        new_bbox[:, axis].clamp_(min=patch_box[axis], max=patch_box[axis + spatial_dims] - TO_REMOVE)
+        new_bbox[:, axis + spatial_dims].clamp_(min=patch_box[axis], max=patch_box[axis + spatial_dims] - TO_REMOVE)
+        new_bbox[:, axis] -= patch_box[axis]
+        new_bbox[:, axis + spatial_dims] -= patch_box[axis]
+
+    # remove the boxes that are actually empty
+    if remove_empty:
+        keep = (new_bbox[:, spatial_dims] >= new_bbox[:, 0] + 1 - TO_REMOVE) & (
+            new_bbox[:, 1 + spatial_dims] >= new_bbox[:, 1] + 1 - TO_REMOVE
+        )
+        if spatial_dims == 3:
+            keep = keep & (new_bbox[:, 2 + spatial_dims] >= new_bbox[:, 2] + 1 - TO_REMOVE)
+        new_bbox = new_bbox[keep]
+
+    # convert tensor back to numpy if needed
+    new_bbox = new_bbox.to(dtype=box_dtype)
+    if numpy_bool:
+        new_bbox = convert_to_numpy(new_bbox)
+
+    return new_bbox, keep
+
+
+def box_clip_to_image(
+    bbox: NdarrayOrTensor,
+    image_size: Union[Sequence[int], torch.Tensor, np.ndarray],
+    remove_empty: bool = True,
+):
+    """
+    This function makes sure the bounding boxes are within the image.
+    Args:
+        bbox: bounding box, Nx4 or Nx6 torch tensor. The box mode is assumed to be STANDARD_MODE
+        remove_empty: whether to remove the boxes that are actually empty
+    Returns:
+        updated box
+    """
+    spatial_dims = get_dimension(bbox=bbox, image_size=image_size)
+    image_box = [0] * spatial_dims + convert_to_list(image_size)
+    return box_clip_to_patch(bbox, image_box, remove_empty)
+
+
+def box_area(bbox: NdarrayOrTensor) -> NdarrayOrTensor:
+    """
+    This function computes the area of each box
+    Args:
+        bbox: bounding box, Nx4 or Nx6 torch tensor. The box mode is assumed to be STANDARD_MODE
+    Returns:
+        area: 1-D tensor
+    """
+
+    spatial_dims = get_dimension(bbox=bbox)
+
+    area = bbox[:, spatial_dims] - bbox[:, 0] + TO_REMOVE
+    for axis in range(1, spatial_dims):
+        area = area * (bbox[:, axis + spatial_dims] - bbox[:, axis] + TO_REMOVE)
+
+    if isinstance(area, np.ndarray):
+        area = convert_to_tensor(area)
+        numpy_bool = True
+    else:
+        numpy_bool = False
+
+    if area.isnan().any() or area.isinf().any():
+        if area.dtype is torch.float16:
+            raise ValueError("Box area is NaN or Inf. bbox is float16. Please change to float32 and test it again.")
+        else:
+            raise ValueError("Box area is NaN or Inf.")
+
+    if numpy_bool:
+        area = convert_to_numpy(area)
+    return area
+
+
+def box_iou(bbox1: NdarrayOrTensor, bbox2: NdarrayOrTensor) -> NdarrayOrTensor:
+    """
+    Compute the intersection over union of two set of boxes. This function is not differentialable.
+
+    IMPORTANT: Please run box_clip_to_image(bbox, image_size, mode, remove_empty=True) before computing IoU
+
+    Implementation from https://github.com/kuangliu/torchcv/blob/master/torchcv/utils/box.py
+    with slight modifications.
+
+    Args:
+        bbox1: bounding box, Nx4 or Nx6 torch tensor. The box mode is assumed to be STANDARD_MODE
+        bbox2: bounding box, Mx4 or Mx6 torch tensor. The box mode is assumed to be STANDARD_MODE
+
+    Returns:
+      (tensor) iou, sized [N,M].
+
+    Reference:
+      https://github.com/chainer/chainercv/blob/master/chainercv/utils/bbox/bbox_iou.py
+    """
+
+    # convert numpy to tensor if needed
+    if isinstance(bbox1, np.ndarray):
+        bbox1 = convert_to_tensor(bbox1)
+        bbox2 = convert_to_tensor(bbox2)
+        numpy_bool = True
+    else:
+        numpy_bool = False
+
+    spatial_dims = get_dimension(bbox=bbox1)
+
+    # we do computation with compute_dtype to avoid overflow
+    box_dtype = bbox1.dtype
+    compute_dtype = torch.float32
+
+    # compute area with float32
+    area1 = box_area(bbox=bbox1.to(dtype=compute_dtype))  # Nx1
+    area2 = box_area(bbox=bbox2.to(dtype=compute_dtype))  # Mx1
+
+    # get the left top and right bottom points for the NxM combinations
+    lt = torch.max(bbox1[:, None, :spatial_dims], bbox2[:, :spatial_dims]).to(
+        dtype=compute_dtype
+    )  # [N,M,spatial_dims] left top
+    rb = torch.min(bbox1[:, None, spatial_dims:], bbox2[:, spatial_dims:]).to(
+        dtype=compute_dtype
+    )  # [N,M,spatial_dims] right bottom
+    # compute size for the intersection region for the NxM combinations
+    wh = (rb - lt + TO_REMOVE).clamp(min=0)  # [N,M,spatial_dims]
+    inter = wh[:, :, 0]  # [N,M]
+    for axis in range(1, spatial_dims):
+        inter = inter * wh[:, :, axis]
+
+    # compute IoU and convert back to original box_dtype
+    iou = inter / (area1[:, None] + area2 - inter + torch.finfo(compute_dtype).eps)  # [N,M,spatial_dims]
+    iou = iou.to(dtype=box_dtype)
+
+    if torch.isnan(iou).any() or torch.isinf(iou).any():
+        raise ValueError("Box IoU is NaN or Inf.")
+
+    # convert tensor back to numpy if needed
+    if numpy_bool:
+        iou = convert_to_numpy(iou)
+    return iou
+
+
+def box_giou(bbox1: NdarrayOrTensor, bbox2: NdarrayOrTensor) -> NdarrayOrTensor:
+    """
+    Compute the generalized intersection over union of two set of boxes. This function is not differentialable.
+
+    IMPORTANT: Please run box_clip_to_image(bbox, image_size, mode, remove_empty=True) before computing IoU
+
+    Implementation from https://github.com/kuangliu/torchcv/blob/master/torchcv/utils/box.py
+    with slight modifications.
+
+    Args:
+        bbox1: bounding box, Nx4 or Nx6 torch tensor. The box mode is assumed to be STANDARD_MODE
+        bbox2: bounding box, Mx4 or Mx6 torch tensor. The box mode is assumed to be STANDARD_MODE
+
+    Returns:
+      (tensor) iou, sized [N,M].
+
+    Reference:
+      https://github.com/chainer/chainercv/blob/master/chainercv/utils/bbox/bbox_iou.py
+    """
+    # convert numpy to tensor if needed
+    if isinstance(bbox1, np.ndarray):
+        bbox1 = convert_to_tensor(bbox1)
+        bbox2 = convert_to_tensor(bbox2)
+        numpy_bool = True
+    else:
+        numpy_bool = False
+
+    spatial_dims = get_dimension(bbox=bbox1)
+
+    # we do computation with compute_dtype to avoid overflow
+    box_dtype = bbox1.dtype
+    compute_dtype = torch.float32
+
+    # compute area with float32
+    area1 = box_area(bbox=bbox1.to(dtype=compute_dtype))  # Nx1
+    area2 = box_area(bbox=bbox2.to(dtype=compute_dtype))  # Mx1
+
+    # get the left top and right bottom points for the NxM combinations
+    lt = torch.max(bbox1[:, None, :spatial_dims], bbox2[:, :spatial_dims]).to(
+        dtype=compute_dtype
+    )  # [N,M,spatial_dims] left top
+    rb = torch.min(bbox1[:, None, spatial_dims:], bbox2[:, spatial_dims:]).to(
+        dtype=compute_dtype
+    )  # [N,M,spatial_dims] right bottom
+    # compute size for the intersection region for the NxM combinations
+    wh = (rb - lt + TO_REMOVE).clamp(min=0)  # [N,M,spatial_dims]
+    inter = wh[:, :, 0]  # [N,M]
+    for axis in range(1, spatial_dims):
+        inter = inter * wh[:, :, axis]
+
+    # compute IoU and convert back to original box_dtype
+    union = area1[:, None] + area2 - inter
+    iou = inter / (union + torch.finfo(compute_dtype).eps)  # [N,M,spatial_dims]
+
+    # enclosure
+    lt = torch.min(bbox1[:, None, :spatial_dims], bbox2[:, :spatial_dims]).to(
+        dtype=compute_dtype
+    )  # [N,M,spatial_dims] left top
+    rb = torch.max(bbox1[:, None, spatial_dims:], bbox2[:, spatial_dims:]).to(
+        dtype=compute_dtype
+    )  # [N,M,spatial_dims] right bottom
+    # compute size for the intersection region for the NxM combinations
+    wh = (rb - lt + TO_REMOVE).clamp(min=0)  # [N,M,spatial_dims]
+    enclosure = wh[:, :, 0]  # [N,M]
+    for axis in range(1, spatial_dims):
+        enclosure = enclosure * wh[:, :, axis]
+
+    giou = iou - (enclosure - union) / (enclosure + torch.finfo(compute_dtype).eps)
+    giou = giou.to(dtype=box_dtype)
+    if torch.isnan(giou).any() or torch.isinf(giou).any():
+        raise ValueError("Box GIoU is NaN or Inf.")
+
+    # convert tensor back to numpy if needed
+    if numpy_bool:
+        giou = convert_to_numpy(giou)
+    return giou
+
+
+def box_pair_giou(bbox1: NdarrayOrTensor, bbox2: NdarrayOrTensor) -> NdarrayOrTensor:
+    """
+    Compute the generalized intersection over union of two set of boxes. This function is not differentialable.
+
+    IMPORTANT: Please run box_clip_to_image(bbox, image_size, mode, remove_empty=True) before computing IoU
+
+    Implementation from https://github.com/kuangliu/torchcv/blob/master/torchcv/utils/box.py
+    with slight modifications.
+
+    Args:
+        bbox1: bounding box, Nx4 or Nx6 torch tensor. The box mode is assumed to be STANDARD_MODE
+        bbox2: bounding box, Mx4 or Mx6 torch tensor. The box mode is assumed to be STANDARD_MODE
+
+    Returns:
+      (tensor) iou, sized [N].
+
+    Reference:
+      https://github.com/chainer/chainercv/blob/master/chainercv/utils/bbox/bbox_iou.py
+    """
+
+    if bbox1.shape[0] != bbox2.shape[0]:
+        raise ValueError("bbox1 and bbox2 should be paired.")
+
+    # convert numpy to tensor if needed
+    if isinstance(bbox1, np.ndarray):
+        bbox1 = convert_to_tensor(bbox1)
+        bbox2 = convert_to_tensor(bbox2)
+        numpy_bool = True
+    else:
+        numpy_bool = False
+
+    spatial_dims = get_dimension(bbox=bbox1)
+
+    # we do computation with compute_dtype to avoid overflow
+    box_dtype = bbox1.dtype
+    compute_dtype = torch.float32
+
+    # compute area
+    area1 = box_area(bbox=bbox1.to(dtype=compute_dtype))  # Nx1
+    area2 = box_area(bbox=bbox2.to(dtype=compute_dtype))  # Nx1
+
+    # get the left top and right bottom points for the NxM combinations
+    lt = torch.max(bbox1[:, :spatial_dims], bbox2[:, :spatial_dims]).to(
+        dtype=compute_dtype
+    )  # [N,spatial_dims] left top
+    rb = torch.min(bbox1[:, spatial_dims:], bbox2[:, spatial_dims:]).to(
+        dtype=compute_dtype
+    )  # [N,spatial_dims] right bottom
+    # compute size for the intersection region for the NxM combinations
+    wh = (rb - lt + TO_REMOVE).clamp(min=0)  # [N,spatial_dims]
+    inter = wh[:, 0]  # [N,M]
+    for axis in range(1, spatial_dims):
+        inter = inter * wh[:, axis]
+
+    # compute IoU and convert back to original box_dtype
+    union = area1 + area2 - inter
+    iou = inter / (union + torch.finfo(compute_dtype).eps)  # [N,spatial_dims]
+
+    # enclosure
+    lt = torch.min(bbox1[:, :spatial_dims], bbox2[:, :spatial_dims]).to(
+        dtype=compute_dtype
+    )  # [N,spatial_dims] left top
+    rb = torch.max(bbox1[:, spatial_dims:], bbox2[:, spatial_dims:]).to(
+        dtype=compute_dtype
+    )  # [N,spatial_dims] right bottom
+    # compute size for the intersection region for the NxM combinations
+    wh = (rb - lt + TO_REMOVE).clamp(min=0)  # [N,spatial_dims]
+    enclosure = wh[:, 0]  # [N,M]
+    for axis in range(1, spatial_dims):
+        enclosure = enclosure * wh[:, axis]
+
+    giou = iou - (enclosure - union) / (enclosure + torch.finfo(compute_dtype).eps)
+    giou = giou.to(dtype=box_dtype)  # [N,spatial_dims]
+    if torch.isnan(giou).any() or torch.isinf(giou).any():
+        raise ValueError("Box GIoU is NaN or Inf.")
+
+    # convert tensor back to numpy if needed
+    if numpy_bool:
+        giou = convert_to_numpy(giou)
+    return giou
+
+
+def non_max_suppression(
+    bbox: NdarrayOrTensor,
+    scores: NdarrayOrTensor,
+    nms_thresh: float,
+    max_proposals=-1,
+    box_overlap_metric="iou",
+):
+    """
+    written by Can Zhao, 2019
+    if there are no boxes, return an empty list
+    Args:
+        bbox: bounding box, Nx4 or Nx6 torch tensor. The box mode is assumed to be STANDARD_MODE
+    """
+    look_up_option(box_overlap_metric, ["iou", "giou"])
+    look_up_option(bbox.shape[1], [4, 6]) // 2
+    if bbox.shape[0] == 0:
+        return []
+
+    if bbox.shape[0] != scores.shape[0]:
+        raise ValueError(
+            f"bbox and scores should have same length, got bbox shape {bbox.shape}, scores shape {scores.shape}"
+        )
+
+    # convert numpy to tensor if needed
+    if isinstance(bbox, np.ndarray):
+        bbox = convert_to_tensor(bbox)
+        scores = convert_to_tensor(scores)
+        numpy_bool = True
+    else:
+        numpy_bool = False
+
+    scores_sort, indices = torch.sort(scores, descending=True)
+    bbox_sort = deepcopy(bbox)[indices, :]
+
+    # initialize the list of picked indexes
+    pick = []
+    idxs = np.arange(0, bbox_sort.shape[0])
+    # keep looping while some indexes still remain in the indexes
+    # list
+    while len(idxs) > 0:
+        # grab the first index in the indexes list and add the
+        # index value to the list of picked indexes
+        i = idxs[0]
+        pick.append(i)
+        if len(pick) >= max_proposals >= 1:
+            break
+
+        # compute the IoU
+        if box_overlap_metric == "giou":
+            iou = box_giou(bbox_sort[idxs[1:], :], bbox_sort[i : i + 1, :])
+        else:
+            iou = box_iou(bbox_sort[idxs[1:], :], bbox_sort[i : i + 1, :])
+
+        # delete all indexes from the index list that have overlap > nms_thresh
+        idxs = np.delete(idxs, np.concatenate(([0], 1 + np.where(iou.cpu().numpy() > nms_thresh)[0])))
+
+    # return only the bounding boxes that were picked using the
+    # integer data type
+    pick_idx = indices[pick]
+    # convert tensor back to numpy if needed
+    if numpy_bool:
+        pick_idx = convert_to_numpy(pick_idx)
+    return pick_idx
+
+
+def box_center(bbox: NdarrayOrTensor) -> torch.Tensor:
+    """
+    Compute center point of bbox
+    Args:
+        bbox: bounding box, Nx4 or Nx6 torch tensor. The box mode is assumed to be STANDARD_MODE
+    Returns:
+        Tensor: center points [N, dims]
+    """
+    spatial_dims = bbox.shape[1] // 2
+    centers = [(bbox[:, axis + spatial_dims] + bbox[:, axis]) / 2.0 for axis in range(spatial_dims)]
+
+    if isinstance(bbox, np.ndarray):
+        return np.stack(centers, axis=1)
+    else:
+        return torch.stack(centers, dim=1)
+
+
+def box_center_dist(bbox1: torch.Tensor, bbox2: torch.Tensor, euclidean: bool = True) -> Sequence[torch.Tensor]:
+    """
+    Distance of center points between two sets of bbox
+    Args:
+        bbox1: bounding box, Nx4 or Nx6 torch tensor. The box mode is assumed to be STANDARD_MODE
+        bbox2: bounding box, Nx4 or Nx6 torch tensor. The box mode is assumed to be STANDARD_MODE
+        euclidean: computed the euclidean distance otherwise it uses the l1
+            distance
+    Returns:
+        Tensor: the NxM matrix containing the pairwise
+            distances for every element in bbox1 and bbox2; [N, M]
+        Tensor: center points of bbox1
+        Tensor: center points of bbox2
+    """
+    # convert numpy to tensor if needed
+    if isinstance(bbox1, np.ndarray):
+        bbox1 = convert_to_tensor(bbox1)
+        bbox2 = convert_to_tensor(bbox2)
+        numpy_bool = True
+    else:
+        numpy_bool = False
+
+    box_dtype = bbox1.dtype
+    compute_dtype = torch.float32
+
+    center1 = box_center(bbox1.to(compute_dtype))  # [N, dims]
+    center2 = box_center(bbox2.to(compute_dtype))  # [M, dims]
+
+    if euclidean:
+        dists = (center1[:, None] - center2[None]).pow(2).sum(-1).sqrt()
+    else:
+        # before sum: [N, M, dims]
+        dists = (center1[:, None] - center2[None]).sum(-1)
+
+    # convert tensor back to numpy if needed
+    dists, center1, center2 = dists.to(box_dtype), center1.to(box_dtype), center2.to(box_dtype)
+    if numpy_bool:
+        dists, center1, center2 = convert_to_numpy(dists), convert_to_numpy(center1), convert_to_numpy(center2)
+    return dists, center1, center2
+
+
+def center_in_boxes(center: NdarrayOrTensor, bbox: NdarrayOrTensor, eps: float = 0.01) -> NdarrayOrTensor:
+    """
+    Checks which center points are within bbox
+    Args:
+        bbox: bounding box, Nx4 or Nx6 torch tensor. The box mode is assumed to be STANDARD_MODE
+        center: center points [N, dims]
+        eps: minimum distance to boarder of bbox
+    Returns:
+        Tensor: boolean array indicating which center points are within
+            the bbox [N]
+    """
+    spatial_dims = bbox.shape[1] // 2
+    axes = [center[:, axis] - bbox[:, axis] for axis in range(spatial_dims)] + [
+        bbox[:, axis + spatial_dims] - center[:, axis] for axis in range(spatial_dims)
+    ]
+    if isinstance(bbox, np.ndarray):
+        return np.stack(axes, axis=1).min(axis=1) > eps  # array[bool]
+    else:
+        return torch.stack(axes, dim=1).min(dim=1)[0] > eps  # Tensor[bool]
+
+
+def resize_boxes(
+    bbox: NdarrayOrTensor,
+    original_size: Union[Sequence, torch.Tensor, np.ndarray],
+    new_size: Union[Sequence, torch.Tensor, np.ndarray],
+) -> NdarrayOrTensor:
+    """
+    modified from torchvision
+    Args:
+        bbox: source bounding box, Nx4 or Nx6 torch tensor. The box mode is assumed to be STANDARD_MODE
+        original_size: source image size, Length of 2 or 3. Data format is list, or np.ndarray, or tensor of int
+        original_size: target image size, Length of 2 or 3. Data format is list, or np.ndarray, or tensor of int
+    """
+    if len(original_size) != len(new_size):
+        raise ValueError("The dimension of original image size should equal to the new image size")
+    spatial_dims = get_dimension(bbox, original_size)
+
+    original_size = convert_to_list(original_size)
+    new_size = convert_to_list(new_size)
+    zoom = [new_size[axis] / float(original_size[axis]) for axis in range(spatial_dims)]
+
+    return box_interp(bbox=bbox, zoom=zoom)
diff --git a/tests/test_box_utils.py b/tests/test_box_utils.py
new file mode 100644
index 0000000000..f185a2016d
--- /dev/null
+++ b/tests/test_box_utils.py
@@ -0,0 +1,220 @@
+# Copyright (c) MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import random
+import unittest
+
+import numpy as np
+import torch
+from parameterized import parameterized
+
+from monai.data.box_utils import (
+    box_affine,
+    box_area,
+    box_center,
+    box_center_dist,
+    box_clip_to_image,
+    box_convert_mode,
+    box_convert_standard_mode,
+    box_giou,
+    box_interp,
+    box_iou,
+    box_pair_giou,
+    center_in_boxes,
+    convert_to_list,
+    non_max_suppression,
+    resize_boxes,
+)
+from monai.utils.type_conversion import convert_data_type
+from tests.utils import TEST_NDARRAYS, assert_allclose
+
+TESTS = []
+for p in TEST_NDARRAYS:
+    bbox = [[0, 0, 0, 0, 0, 0], [0, 1, 0, 2, 2, 3], [0, 1, 1, 2, 2, 3]]
+    image_size = [4, 4, 4]
+    TESTS.append(
+        [
+            {"bbox": p(bbox), "image_size": image_size, "mode": "xyzwhd", "half": False},
+            "xyzwhd",
+            p([[0, 0, 0, 0, 0, 0], [0, 1, 0, 2, 2, 3], [0, 1, 1, 2, 2, 3]]),
+            p([0, 12, 12]),
+        ]
+    )
+    TESTS.append(
+        [
+            {"bbox": p(bbox), "image_size": image_size, "mode": "xyzwhd", "half": True},
+            "xyzxyz",
+            p([[0, 0, 0, 0, 0, 0], [0, 1, 0, 2, 3, 3], [0, 1, 1, 2, 3, 4]]),
+            p([0, 12, 12]),
+        ]
+    )
+    TESTS.append(
+        [
+            {"bbox": p(bbox), "image_size": image_size, "mode": "xyzwhd", "half": False},
+            "xxyyzz",
+            p([[0, 0, 0, 0, 0, 0], [0, 2, 1, 3, 0, 3], [0, 2, 1, 3, 1, 4]]),
+            p([0, 12, 12]),
+        ]
+    )
+    TESTS.append(
+        [
+            {"bbox": p(bbox), "image_size": image_size, "mode": "xyzxyz", "half": False},
+            "xyzwhd",
+            p([[0, 0, 0, 0, 0, 0], [0, 1, 0, 2, 1, 3], [0, 1, 1, 2, 1, 2]]),
+            p([0, 6, 4]),
+        ]
+    )
+    TESTS.append(
+        [
+            {"bbox": p(bbox), "image_size": image_size, "mode": "xyzxyz", "half": True},
+            "xyzxyz",
+            p([[0, 0, 0, 0, 0, 0], [0, 1, 0, 2, 2, 3], [0, 1, 1, 2, 2, 3]]),
+            p([0, 6, 4]),
+        ]
+    )
+    TESTS.append(
+        [
+            {"bbox": p(bbox), "image_size": image_size, "mode": "xyzxyz", "half": False},
+            "xxyyzz",
+            p([[0, 0, 0, 0, 0, 0], [0, 2, 1, 2, 0, 3], [0, 2, 1, 2, 1, 3]]),
+            p([0, 6, 4]),
+        ]
+    )
+    TESTS.append(
+        [
+            {"bbox": p(bbox), "image_size": image_size, "mode": "xxyyzz", "half": False},
+            "xxyyzz",
+            p([[0, 0, 0, 0, 0, 0], [0, 1, 0, 2, 2, 3], [0, 1, 1, 2, 2, 3]]),
+            p([0, 2, 1]),
+        ]
+    )
+    TESTS.append(
+        [
+            {"bbox": p(bbox), "image_size": image_size, "mode": "xxyyzz", "half": True},
+            "xyzxyz",
+            p([[0, 0, 0, 0, 0, 0], [0, 0, 2, 1, 2, 3], [0, 1, 2, 1, 2, 3]]),
+            p([0, 2, 1]),
+        ]
+    )
+    TESTS.append(
+        [
+            {"bbox": p(bbox), "image_size": image_size, "mode": "xxyyzz", "half": False},
+            "xyzwhd",
+            p([[0, 0, 0, 0, 0, 0], [0, 0, 2, 1, 2, 1], [0, 1, 2, 1, 1, 1]]),
+            p([0, 2, 1]),
+        ]
+    )
+
+
+class TestCreateBoxList(unittest.TestCase):
+    @parameterized.expand(TESTS)
+    def test_value(self, input_data, mode2, expected_box, expected_area):
+        bbox1 = convert_data_type(input_data["bbox"], dtype=np.float32)[0]
+        mode1 = input_data["mode"]
+        image_size = input_data["image_size"]
+        half_bool = input_data["half"]
+
+        # test float16
+        if half_bool:
+            bbox1 = convert_data_type(bbox1, dtype=np.float16)[0]
+            expected_box = convert_data_type(expected_box, dtype=np.float16)[0]
+
+        # test box_convert_mode, box_convert_standard_mode
+        result2 = box_convert_mode(bbox1=bbox1, mode1=mode1, mode2=mode2)
+        assert_allclose(result2, expected_box, type_test=True, device_test=True, atol=0.0)
+
+        result1 = box_convert_mode(bbox1=result2, mode1=mode2, mode2=mode1)
+        assert_allclose(result1, bbox1, type_test=True, device_test=True, atol=0.0)
+
+        result_standard = box_convert_standard_mode(bbox=bbox1, mode=mode1)
+        expected_box_standard = box_convert_standard_mode(bbox=expected_box, mode=mode2)
+        assert_allclose(result_standard, expected_box_standard, type_test=True, device_test=True, atol=0.0)
+
+        # test box_area, box_clip_to_image, convert_to_list
+        assert_allclose(box_area(result_standard), expected_area, type_test=True, device_test=True, atol=0.0)
+
+        result_standard_clip, _ = box_clip_to_image(result_standard, image_size, remove_empty=True)
+        np.testing.assert_equal(
+            isinstance(result_standard_clip, np.ndarray), isinstance(bbox1, np.ndarray), "numpy type"
+        )
+        np.testing.assert_equal(
+            isinstance(result_standard_clip, torch.Tensor), isinstance(bbox1, torch.Tensor), "torch type"
+        )
+        result_area_clip = convert_to_list(box_area(result_standard_clip))
+        expected_area_clip = list(filter(lambda num: num > 0, convert_to_list(expected_area)))
+        assert_allclose(result_area_clip, expected_area_clip, type_test=True, device_test=True, atol=0.0)
+
+        # test box_interp and box_affine, resize_boxes
+        zoom = [random.uniform(0.5, 5), random.uniform(0.5, 2), random.uniform(0.5, 5)]
+        new_size = [int(image_size[axis] * zoom[axis] + 0.5) for axis in range(3)]
+        zoom = [new_size[axis] / float(image_size[axis]) for axis in range(3)]
+
+        result_standard_interp = box_interp(bbox=result_standard, zoom=zoom)
+        result_standard_resize = resize_boxes(bbox=result_standard, original_size=image_size, new_size=new_size)
+        assert_allclose(result_standard_interp, result_standard_resize, type_test=True, device_test=True, atol=0.0)
+
+        result_area_interp = box_area(result_standard_interp)
+        expected_area_interp = expected_area * zoom[0] * zoom[1] * zoom[2]
+        assert_allclose(result_area_interp, expected_area_interp, type_test=True, device_test=True, atol=0.5)
+
+        affine = torch.diag(torch.Tensor(zoom + [1.0]))
+        result_affine = box_affine(bbox=bbox1, affine=affine, mode=mode1)
+        result_affine_standard = box_convert_standard_mode(bbox=result_affine, mode=mode1)
+        assert_allclose(
+            box_area(result_affine_standard), expected_area_interp, type_test=True, device_test=True, atol=0.5
+        )
+
+        # test box_center, center_in_boxes, box_center_dist
+        result_standard_center = box_center(result_standard)
+        expected_center = box_convert_mode(bbox1=bbox1, mode1=mode1, mode2="cccwhd")[:, :3]
+        assert_allclose(result_standard_center, expected_center, type_test=True, device_test=True, atol=0.0)
+
+        center = expected_center
+        center[2, :] += 10
+        result_center_in_boxes = center_in_boxes(center=center, bbox=result_standard)
+        assert_allclose(result_center_in_boxes, np.array([False, True, False]), type_test=False)
+
+        center_dist, _, _ = box_center_dist(bbox1=result_standard[1:2, :], bbox2=result_standard[1:1, :])
+        assert_allclose(center_dist, np.array([[]]), type_test=False)
+        center_dist, _, _ = box_center_dist(bbox1=result_standard[1:2, :], bbox2=result_standard[1:2, :])
+        assert_allclose(center_dist, np.array([[0.0]]), type_test=False)
+        center_dist, _, _ = box_center_dist(bbox1=result_standard[0:1, :], bbox2=result_standard[0:1, :])
+        assert_allclose(center_dist, np.array([[0.0]]), type_test=False)
+
+        # test box_iou
+        iou_metrics = (box_iou, box_giou)  # type: ignore
+        for p in iou_metrics:
+            self_iou = p(bbox1=result_standard[1:2, :], bbox2=result_standard[1:1, :])
+            assert_allclose(self_iou, np.array([[]]), type_test=False)
+
+            self_iou = p(bbox1=result_standard[1:2, :], bbox2=result_standard[1:2, :])
+            assert_allclose(self_iou, np.array([[1.0]]), type_test=False)
+
+        self_iou = box_pair_giou(bbox1=result_standard[1:1, :], bbox2=result_standard[1:1, :])
+        assert_allclose(self_iou, np.array([]), type_test=False)
+
+        self_iou = box_pair_giou(bbox1=result_standard[1:2, :], bbox2=result_standard[1:2, :])
+        assert_allclose(self_iou, np.array([1.0]), type_test=False)
+
+        # test non_max_suppression
+        nms_box = non_max_suppression(
+            bbox=result_standard, scores=bbox1[:, 1] / 2.0, nms_thresh=1.0, box_overlap_metric="iou"
+        )
+        assert_allclose(nms_box, [1, 2, 0], type_test=False)
+
+        nms_box = non_max_suppression(
+            bbox=result_standard, scores=bbox1[:, 1] / 2.0, nms_thresh=-0.1, box_overlap_metric="iou"
+        )
+        assert_allclose(nms_box, [1], type_test=False)
+
+
+if __name__ == "__main__":
+    unittest.main()

From d1424bd30ddb4b667d693c04e9c8c10aa7d25cf7 Mon Sep 17 00:00:00 2001
From: Can Zhao <canz@nvidia.com>
Date: Wed, 11 May 2022 00:38:13 -0400
Subject: [PATCH 02/49] reformat

Signed-off-by: Can Zhao <canz@nvidia.com>
---
 monai/data/box_utils.py | 23 ++++-------------------
 1 file changed, 4 insertions(+), 19 deletions(-)

diff --git a/monai/data/box_utils.py b/monai/data/box_utils.py
index f2e4d5f944..03a6f1d0d9 100644
--- a/monai/data/box_utils.py
+++ b/monai/data/box_utils.py
@@ -199,14 +199,7 @@ def split_into_corners(bbox: NdarrayOrTensor, mode: Union[str, None] = None):
         split_result = bbox.split(1, dim=-1)
     elif mode == "xyzxyz":
         xmin, ymin, zmin, xmax, ymax, zmax = bbox.split(1, dim=-1)
-        split_result = (
-            xmin,
-            xmax,
-            ymin,
-            ymax,
-            zmin,
-            zmax,
-        )
+        split_result = (xmin, xmax, ymin, ymax, zmin, zmax)
     elif mode == "xyxy":
         xmin, ymin, xmax, ymax = bbox.split(1, dim=-1)
         split_result = (xmin, xmax, ymin, ymax)
@@ -454,9 +447,7 @@ def box_affine(bbox: NdarrayOrTensor, affine: torch.Tensor, mode: Union[str, Non
 
 
 def box_clip_to_patch(
-    bbox: NdarrayOrTensor,
-    patch_box: Union[Sequence[int], torch.Tensor, np.ndarray],
-    remove_empty: bool = True,
+    bbox: NdarrayOrTensor, patch_box: Union[Sequence[int], torch.Tensor, np.ndarray], remove_empty: bool = True
 ):
     """
     This function makes sure the bounding boxes are within the patch.
@@ -512,9 +503,7 @@ def box_clip_to_patch(
 
 
 def box_clip_to_image(
-    bbox: NdarrayOrTensor,
-    image_size: Union[Sequence[int], torch.Tensor, np.ndarray],
-    remove_empty: bool = True,
+    bbox: NdarrayOrTensor, image_size: Union[Sequence[int], torch.Tensor, np.ndarray], remove_empty: bool = True
 ):
     """
     This function makes sure the bounding boxes are within the image.
@@ -786,11 +775,7 @@ def box_pair_giou(bbox1: NdarrayOrTensor, bbox2: NdarrayOrTensor) -> NdarrayOrTe
 
 
 def non_max_suppression(
-    bbox: NdarrayOrTensor,
-    scores: NdarrayOrTensor,
-    nms_thresh: float,
-    max_proposals=-1,
-    box_overlap_metric="iou",
+    bbox: NdarrayOrTensor, scores: NdarrayOrTensor, nms_thresh: float, max_proposals=-1, box_overlap_metric="iou"
 ):
     """
     written by Can Zhao, 2019

From 2a44c5464d111e674f023715e32eaed63e705850 Mon Sep 17 00:00:00 2001
From: Can Zhao <canz@nvidia.com>
Date: Wed, 11 May 2022 11:59:14 -0400
Subject: [PATCH 03/49] split PR, add box utils for mode convert only

Signed-off-by: Can Zhao <canz@nvidia.com>
---
 monai/data/box_utils.py | 1319 ++++++++++++++++++++-------------------
 tests/test_box_utils.py |  182 +++---
 2 files changed, 748 insertions(+), 753 deletions(-)

diff --git a/monai/data/box_utils.py b/monai/data/box_utils.py
index 03a6f1d0d9..3fbf21a58b 100644
--- a/monai/data/box_utils.py
+++ b/monai/data/box_utils.py
@@ -15,8 +15,9 @@
 import numpy as np
 import torch
 
-import monai
 from monai.config.type_definitions import NdarrayOrTensor
+
+# from monai.utils.misc import ensure_tuple_rep
 from monai.utils.module import look_up_option
 from monai.utils.type_conversion import convert_to_numpy, convert_to_tensor
 
@@ -46,7 +47,7 @@ def convert_to_list(in_sequence: Union[Sequence, torch.Tensor, np.ndarray]) -> l
 
     """
     in_sequence_list = deepcopy(in_sequence)
-    if torch.is_tensor(in_sequence):
+    if isinstance(in_sequence, torch.Tensor):
         in_sequence_list = in_sequence_list.detach().cpu().numpy().tolist()
     elif isinstance(in_sequence, np.ndarray):
         in_sequence_list = in_sequence_list.tolist()
@@ -70,18 +71,18 @@ def get_dimension(
     Returns:
         spatial_dimension: 2 or 3
     """
-    spatial_dims = set()
+    spatial_dims_set = set()
     if image_size is not None:
-        spatial_dims.add(len(image_size))
+        spatial_dims_set.add(len(image_size))
     if mode is not None:
-        spatial_dims.add(len(mode) / 2)
+        spatial_dims_set.add(int(len(mode) / 2))
     if bbox is not None:
-        spatial_dims.add(int(bbox.shape[1] / 2))
-    spatial_dims = list(spatial_dims)
-    if len(spatial_dims) == 0:
+        spatial_dims_set.add(int(bbox.shape[1] / 2))
+    spatial_dims_list = list(spatial_dims_set)
+    if len(spatial_dims_list) == 0:
         raise ValueError("At least one of bbox, image_size, and mode needs to be non-empty.")
-    elif len(spatial_dims) == 1:
-        spatial_dims = int(spatial_dims[0])
+    elif len(spatial_dims_list) == 1:
+        spatial_dims = int(spatial_dims_list[0])
         spatial_dims = look_up_option(spatial_dims, supported=[2, 3])
         return int(spatial_dims)
     else:
@@ -105,68 +106,6 @@ def get_standard_mode(spatial_dims: int) -> str:
         raise ValueError(f"Images should have 2 or 3 dimensions, got {spatial_dims}")
 
 
-def point_interp(
-    point: NdarrayOrTensor, zoom: Union[Sequence[float], float]
-) -> Union[Sequence, torch.Tensor, np.ndarray]:
-    """
-    Convert point position from one pixel/voxel size to another pixel/voxel size
-    Args:
-        point: point coordinate, Nx2 or Nx3, [x, y] or [x, y, z]
-        zoom: The zoom factor along the spatial axes.
-            If a float, zoom is the same for each spatial axis.
-            If a sequence, zoom should contain one value for each spatial axis.
-    Returns:
-        point2: zoomed point coordinate, does not share memory with original point
-    """
-    # make sure the spatial dimensions of the inputs match with each other
-    spatial_dims = point.shape[1]
-    spatial_dims = look_up_option(spatial_dims, supported=[2, 3])
-
-    # compute new point
-    point2 = deepcopy(point)
-    _zoom = monai.utils.misc.ensure_tuple_rep(zoom, spatial_dims)
-    for axis in range(0, spatial_dims):
-        point2[:, axis] = point[:, axis] * _zoom[axis]
-    return point2
-
-
-def box_interp(
-    bbox: NdarrayOrTensor, zoom: Union[Sequence[float], float], mode: Union[str, None] = None
-) -> torch.Tensor:
-    """
-    Interpolate bbox
-    Args:
-        bbox: bounding box, Nx4 or Nx6 torch tensor
-        mode: box mode, choose from SUPPORT_MODE. If mode is not given, this func will assume mode is STANDARD_MODE
-        zoom: The zoom factor along the spatial axes.
-            If a float, zoom is the same for each spatial axis.
-            If a sequence, zoom should contain one value for each spatial axis.
-
-    Returns:
-        returned interpolated bbox has the same mode as bbox, does not share memory with original bbox
-    """
-    if mode is None:
-        mode = get_standard_mode(int(bbox.shape[1] / 2))
-    mode = look_up_option(mode, supported=SUPPORT_MODE)
-    spatial_dims = get_dimension(bbox=bbox, mode=mode)
-
-    # convert to standard mode
-    mode_standard = get_standard_mode(spatial_dims)
-    bbox_standard = box_convert_mode(bbox1=bbox, mode1=mode, mode2=mode_standard)
-
-    # interp
-    corner_lt = point_interp(bbox_standard[:, :spatial_dims], zoom)
-    corner_rb = point_interp(bbox_standard[:, spatial_dims:], zoom)
-
-    bbox_standard_interp = deepcopy(bbox_standard)
-    bbox_standard_interp[:, :spatial_dims] = corner_lt
-    bbox_standard_interp[:, spatial_dims:] = corner_rb
-
-    # convert back
-    bbox2 = box_convert_mode(bbox1=bbox_standard_interp, mode1=mode_standard, mode2=mode)
-    return bbox2
-
-
 def split_into_corners(bbox: NdarrayOrTensor, mode: Union[str, None] = None):
     """
     This internal function outputs the corner coordinates of the bbox
@@ -350,588 +289,654 @@ def box_convert_standard_mode(bbox: NdarrayOrTensor, mode: Union[str, None] = No
     return box_convert_mode(bbox1=bbox, mode1=mode, mode2=mode_standard)
 
 
-def point_affine(
-    point: NdarrayOrTensor, affine: NdarrayOrTensor, include_shift: bool = True
-) -> Union[Sequence, torch.Tensor, np.ndarray]:
-    """
-    Convert point position from one pixel/voxel size to another pixel/voxel size
-    Args:
-        point: point coordinate, Nx2 or Nx3, [x, y] or [x, y, z]
-        affine: affine transform
-        include_shift: does the func apply translation (shift) in the affine transform
-    Returns:
-        point2: transformed point coordinate, does not share memory with original point
-    """
-    # make sure the spatial dimensions of the inputs match with each other
-    spatial_dims = point.shape[1]
-    spatial_dims = look_up_option(spatial_dims, supported=[2, 3])
-
-    # convert numpy to tensor if needed
-    if isinstance(point, np.ndarray):
-        point = convert_to_tensor(point)
-        numpy_bool = True
-    else:
-        numpy_bool = False
-    affine = convert_to_tensor(affine, device=point.device, dtype=point.dtype)
-
-    # compute new point
-    if include_shift:
-        # append 1 to form Nx(spatial_dims+1) vector, then transpose
-        point2 = torch.cat(
-            [point, torch.ones(point.shape[0], 1, device=point.device, dtype=point.dtype)], dim=1
-        ).transpose(0, 1)
-        # apply affine
-        point2 = torch.matmul(affine, point2)
-        # remove appended 1 and transpose back
-        point2 = point2[:spatial_dims, :].transpose(0, 1)
-    else:
-        point2 = point.transpose(0, 1)
-        point2 = torch.matmul(affine[:spatial_dims, :spatial_dims], point2)
-        point2 = point2.transpose(0, 1)
-
-    # convert tensor back to numpy if needed
-    if numpy_bool:
-        point2 = convert_to_numpy(point2)
-    return point2
-
-
-def box_affine(bbox: NdarrayOrTensor, affine: torch.Tensor, mode: Union[str, None] = None) -> torch.Tensor:
-    """
-    This function applys affine matrixs to the bbox
-    Args:
-        bbox: bounding box, Nx4 or Nx6 torch tensor
-        affine: affine matric to be applied to the box coordinate, (spatial_dims+1)x(spatial_dims+1)
-        mode: box mode, choose from SUPPORT_MODE. If mode is not given, this func will assume mode is STANDARD_MODE
-    Returns:
-        returned affine transformed bbox has the same mode as bbox, does not share memory with original bbox
-    """
-    # convert numpy to tensor if needed
-    if isinstance(bbox, np.ndarray):
-        bbox = convert_to_tensor(bbox)
-        numpy_bool = True
-    else:
-        numpy_bool = False
-    affine = convert_to_tensor(affine, device=bbox.device, dtype=bbox.dtype)
-
-    if mode is None:
-        mode = get_standard_mode(int(bbox.shape[1] / 2))
-    mode = look_up_option(mode, supported=SUPPORT_MODE)
-    spatial_dims = get_dimension(bbox=bbox, mode=mode)
-
-    if mode in ["xxyy", "xxyyzz", "xyxy", "xyzxyz"]:
-        # extract left top and right bottom, and apply affine
-        if mode in ["xxyy", "xxyyzz"]:
-            lt = point_affine(bbox[:, ::2], affine, include_shift=True)
-            rb = point_affine(bbox[:, 1::2], affine, include_shift=True)
-        if mode in ["xyxy", "xyzxyz"]:
-            lt = point_affine(bbox[:, :spatial_dims], affine, include_shift=True)
-            rb = point_affine(bbox[:, spatial_dims:], affine, include_shift=True)
-
-        lt_new, _ = torch.min(torch.stack([lt, rb], dim=2), dim=2)
-        rb_new, _ = torch.max(torch.stack([lt, rb], dim=2), dim=2)
-
-        bbox2 = box_convert_mode(torch.cat([lt_new, rb_new], dim=1), mode1=STANDARD_MODE[spatial_dims - 2], mode2=mode)
-
-    elif mode in ["ccwh", "cccwhd", "xywh", "xyzwhd"]:
-        cc = point_affine(bbox[:, :spatial_dims], affine, include_shift=True)
-        wh = point_affine(bbox[:, spatial_dims:], affine, include_shift=False).absolute()
-        bbox2 = torch.cat([cc, wh], dim=1)
-
-    else:
-        raise RuntimeError("Should not be here")
-
-    # convert tensor back to numpy if needed
-    if numpy_bool:
-        bbox2 = convert_to_numpy(bbox2)
-    return bbox2
-
-
-def box_clip_to_patch(
-    bbox: NdarrayOrTensor, patch_box: Union[Sequence[int], torch.Tensor, np.ndarray], remove_empty: bool = True
-):
-    """
-    This function makes sure the bounding boxes are within the patch.
-    Args:
-        bbox: bounding box, Nx4 or Nx6 torch tensor. The box mode is assumed to be STANDARD_MODE
-        patch_box: The coordinate of the target patch to clip, it follows standard mode
-        remove_empty: whether to remove the boxes that are actually empty
-    Returns:
-        new_bbox: updated box, does not share memory with original bbox
-        keep: the indice of the new_bbox regarding to input bbox. When remove_empty=True, only some of the boxes are kept
-    """
-    if bbox.shape[0] == 0:
-        return deepcopy(bbox), []
-
-    spatial_dims = get_dimension(bbox=bbox)
-    new_bbox = deepcopy(bbox)
-
-    # convert numpy to tensor if needed
-    if isinstance(new_bbox, np.ndarray):
-        new_bbox = convert_to_tensor(new_bbox)
-        numpy_bool = True
-    else:
-        numpy_bool = False
-
-    # convert to float32 since torch.clamp_ does not support float16
-    box_dtype = new_bbox.dtype
-    compute_dtype = torch.float32
-    if box_dtype is torch.float16:
-        new_bbox = new_bbox.to(dtype=compute_dtype)
-
-    # makes sure the bounding boxes are within the image
-    for axis in range(0, spatial_dims):
-        new_bbox[:, axis].clamp_(min=patch_box[axis], max=patch_box[axis + spatial_dims] - TO_REMOVE)
-        new_bbox[:, axis + spatial_dims].clamp_(min=patch_box[axis], max=patch_box[axis + spatial_dims] - TO_REMOVE)
-        new_bbox[:, axis] -= patch_box[axis]
-        new_bbox[:, axis + spatial_dims] -= patch_box[axis]
-
-    # remove the boxes that are actually empty
-    if remove_empty:
-        keep = (new_bbox[:, spatial_dims] >= new_bbox[:, 0] + 1 - TO_REMOVE) & (
-            new_bbox[:, 1 + spatial_dims] >= new_bbox[:, 1] + 1 - TO_REMOVE
-        )
-        if spatial_dims == 3:
-            keep = keep & (new_bbox[:, 2 + spatial_dims] >= new_bbox[:, 2] + 1 - TO_REMOVE)
-        new_bbox = new_bbox[keep]
-
-    # convert tensor back to numpy if needed
-    new_bbox = new_bbox.to(dtype=box_dtype)
-    if numpy_bool:
-        new_bbox = convert_to_numpy(new_bbox)
-
-    return new_bbox, keep
-
-
-def box_clip_to_image(
-    bbox: NdarrayOrTensor, image_size: Union[Sequence[int], torch.Tensor, np.ndarray], remove_empty: bool = True
-):
-    """
-    This function makes sure the bounding boxes are within the image.
-    Args:
-        bbox: bounding box, Nx4 or Nx6 torch tensor. The box mode is assumed to be STANDARD_MODE
-        remove_empty: whether to remove the boxes that are actually empty
-    Returns:
-        updated box
-    """
-    spatial_dims = get_dimension(bbox=bbox, image_size=image_size)
-    image_box = [0] * spatial_dims + convert_to_list(image_size)
-    return box_clip_to_patch(bbox, image_box, remove_empty)
-
-
-def box_area(bbox: NdarrayOrTensor) -> NdarrayOrTensor:
-    """
-    This function computes the area of each box
-    Args:
-        bbox: bounding box, Nx4 or Nx6 torch tensor. The box mode is assumed to be STANDARD_MODE
-    Returns:
-        area: 1-D tensor
-    """
-
-    spatial_dims = get_dimension(bbox=bbox)
-
-    area = bbox[:, spatial_dims] - bbox[:, 0] + TO_REMOVE
-    for axis in range(1, spatial_dims):
-        area = area * (bbox[:, axis + spatial_dims] - bbox[:, axis] + TO_REMOVE)
-
-    if isinstance(area, np.ndarray):
-        area = convert_to_tensor(area)
-        numpy_bool = True
-    else:
-        numpy_bool = False
-
-    if area.isnan().any() or area.isinf().any():
-        if area.dtype is torch.float16:
-            raise ValueError("Box area is NaN or Inf. bbox is float16. Please change to float32 and test it again.")
-        else:
-            raise ValueError("Box area is NaN or Inf.")
-
-    if numpy_bool:
-        area = convert_to_numpy(area)
-    return area
-
-
-def box_iou(bbox1: NdarrayOrTensor, bbox2: NdarrayOrTensor) -> NdarrayOrTensor:
-    """
-    Compute the intersection over union of two set of boxes. This function is not differentialable.
-
-    IMPORTANT: Please run box_clip_to_image(bbox, image_size, mode, remove_empty=True) before computing IoU
-
-    Implementation from https://github.com/kuangliu/torchcv/blob/master/torchcv/utils/box.py
-    with slight modifications.
-
-    Args:
-        bbox1: bounding box, Nx4 or Nx6 torch tensor. The box mode is assumed to be STANDARD_MODE
-        bbox2: bounding box, Mx4 or Mx6 torch tensor. The box mode is assumed to be STANDARD_MODE
-
-    Returns:
-      (tensor) iou, sized [N,M].
-
-    Reference:
-      https://github.com/chainer/chainercv/blob/master/chainercv/utils/bbox/bbox_iou.py
-    """
-
-    # convert numpy to tensor if needed
-    if isinstance(bbox1, np.ndarray):
-        bbox1 = convert_to_tensor(bbox1)
-        bbox2 = convert_to_tensor(bbox2)
-        numpy_bool = True
-    else:
-        numpy_bool = False
-
-    spatial_dims = get_dimension(bbox=bbox1)
-
-    # we do computation with compute_dtype to avoid overflow
-    box_dtype = bbox1.dtype
-    compute_dtype = torch.float32
-
-    # compute area with float32
-    area1 = box_area(bbox=bbox1.to(dtype=compute_dtype))  # Nx1
-    area2 = box_area(bbox=bbox2.to(dtype=compute_dtype))  # Mx1
-
-    # get the left top and right bottom points for the NxM combinations
-    lt = torch.max(bbox1[:, None, :spatial_dims], bbox2[:, :spatial_dims]).to(
-        dtype=compute_dtype
-    )  # [N,M,spatial_dims] left top
-    rb = torch.min(bbox1[:, None, spatial_dims:], bbox2[:, spatial_dims:]).to(
-        dtype=compute_dtype
-    )  # [N,M,spatial_dims] right bottom
-    # compute size for the intersection region for the NxM combinations
-    wh = (rb - lt + TO_REMOVE).clamp(min=0)  # [N,M,spatial_dims]
-    inter = wh[:, :, 0]  # [N,M]
-    for axis in range(1, spatial_dims):
-        inter = inter * wh[:, :, axis]
-
-    # compute IoU and convert back to original box_dtype
-    iou = inter / (area1[:, None] + area2 - inter + torch.finfo(compute_dtype).eps)  # [N,M,spatial_dims]
-    iou = iou.to(dtype=box_dtype)
-
-    if torch.isnan(iou).any() or torch.isinf(iou).any():
-        raise ValueError("Box IoU is NaN or Inf.")
-
-    # convert tensor back to numpy if needed
-    if numpy_bool:
-        iou = convert_to_numpy(iou)
-    return iou
-
-
-def box_giou(bbox1: NdarrayOrTensor, bbox2: NdarrayOrTensor) -> NdarrayOrTensor:
-    """
-    Compute the generalized intersection over union of two set of boxes. This function is not differentialable.
-
-    IMPORTANT: Please run box_clip_to_image(bbox, image_size, mode, remove_empty=True) before computing IoU
-
-    Implementation from https://github.com/kuangliu/torchcv/blob/master/torchcv/utils/box.py
-    with slight modifications.
-
-    Args:
-        bbox1: bounding box, Nx4 or Nx6 torch tensor. The box mode is assumed to be STANDARD_MODE
-        bbox2: bounding box, Mx4 or Mx6 torch tensor. The box mode is assumed to be STANDARD_MODE
-
-    Returns:
-      (tensor) iou, sized [N,M].
-
-    Reference:
-      https://github.com/chainer/chainercv/blob/master/chainercv/utils/bbox/bbox_iou.py
-    """
-    # convert numpy to tensor if needed
-    if isinstance(bbox1, np.ndarray):
-        bbox1 = convert_to_tensor(bbox1)
-        bbox2 = convert_to_tensor(bbox2)
-        numpy_bool = True
-    else:
-        numpy_bool = False
-
-    spatial_dims = get_dimension(bbox=bbox1)
-
-    # we do computation with compute_dtype to avoid overflow
-    box_dtype = bbox1.dtype
-    compute_dtype = torch.float32
-
-    # compute area with float32
-    area1 = box_area(bbox=bbox1.to(dtype=compute_dtype))  # Nx1
-    area2 = box_area(bbox=bbox2.to(dtype=compute_dtype))  # Mx1
-
-    # get the left top and right bottom points for the NxM combinations
-    lt = torch.max(bbox1[:, None, :spatial_dims], bbox2[:, :spatial_dims]).to(
-        dtype=compute_dtype
-    )  # [N,M,spatial_dims] left top
-    rb = torch.min(bbox1[:, None, spatial_dims:], bbox2[:, spatial_dims:]).to(
-        dtype=compute_dtype
-    )  # [N,M,spatial_dims] right bottom
-    # compute size for the intersection region for the NxM combinations
-    wh = (rb - lt + TO_REMOVE).clamp(min=0)  # [N,M,spatial_dims]
-    inter = wh[:, :, 0]  # [N,M]
-    for axis in range(1, spatial_dims):
-        inter = inter * wh[:, :, axis]
-
-    # compute IoU and convert back to original box_dtype
-    union = area1[:, None] + area2 - inter
-    iou = inter / (union + torch.finfo(compute_dtype).eps)  # [N,M,spatial_dims]
-
-    # enclosure
-    lt = torch.min(bbox1[:, None, :spatial_dims], bbox2[:, :spatial_dims]).to(
-        dtype=compute_dtype
-    )  # [N,M,spatial_dims] left top
-    rb = torch.max(bbox1[:, None, spatial_dims:], bbox2[:, spatial_dims:]).to(
-        dtype=compute_dtype
-    )  # [N,M,spatial_dims] right bottom
-    # compute size for the intersection region for the NxM combinations
-    wh = (rb - lt + TO_REMOVE).clamp(min=0)  # [N,M,spatial_dims]
-    enclosure = wh[:, :, 0]  # [N,M]
-    for axis in range(1, spatial_dims):
-        enclosure = enclosure * wh[:, :, axis]
-
-    giou = iou - (enclosure - union) / (enclosure + torch.finfo(compute_dtype).eps)
-    giou = giou.to(dtype=box_dtype)
-    if torch.isnan(giou).any() or torch.isinf(giou).any():
-        raise ValueError("Box GIoU is NaN or Inf.")
-
-    # convert tensor back to numpy if needed
-    if numpy_bool:
-        giou = convert_to_numpy(giou)
-    return giou
-
-
-def box_pair_giou(bbox1: NdarrayOrTensor, bbox2: NdarrayOrTensor) -> NdarrayOrTensor:
-    """
-    Compute the generalized intersection over union of two set of boxes. This function is not differentialable.
-
-    IMPORTANT: Please run box_clip_to_image(bbox, image_size, mode, remove_empty=True) before computing IoU
-
-    Implementation from https://github.com/kuangliu/torchcv/blob/master/torchcv/utils/box.py
-    with slight modifications.
-
-    Args:
-        bbox1: bounding box, Nx4 or Nx6 torch tensor. The box mode is assumed to be STANDARD_MODE
-        bbox2: bounding box, Mx4 or Mx6 torch tensor. The box mode is assumed to be STANDARD_MODE
-
-    Returns:
-      (tensor) iou, sized [N].
-
-    Reference:
-      https://github.com/chainer/chainercv/blob/master/chainercv/utils/bbox/bbox_iou.py
-    """
-
-    if bbox1.shape[0] != bbox2.shape[0]:
-        raise ValueError("bbox1 and bbox2 should be paired.")
-
-    # convert numpy to tensor if needed
-    if isinstance(bbox1, np.ndarray):
-        bbox1 = convert_to_tensor(bbox1)
-        bbox2 = convert_to_tensor(bbox2)
-        numpy_bool = True
-    else:
-        numpy_bool = False
-
-    spatial_dims = get_dimension(bbox=bbox1)
-
-    # we do computation with compute_dtype to avoid overflow
-    box_dtype = bbox1.dtype
-    compute_dtype = torch.float32
-
-    # compute area
-    area1 = box_area(bbox=bbox1.to(dtype=compute_dtype))  # Nx1
-    area2 = box_area(bbox=bbox2.to(dtype=compute_dtype))  # Nx1
-
-    # get the left top and right bottom points for the NxM combinations
-    lt = torch.max(bbox1[:, :spatial_dims], bbox2[:, :spatial_dims]).to(
-        dtype=compute_dtype
-    )  # [N,spatial_dims] left top
-    rb = torch.min(bbox1[:, spatial_dims:], bbox2[:, spatial_dims:]).to(
-        dtype=compute_dtype
-    )  # [N,spatial_dims] right bottom
-    # compute size for the intersection region for the NxM combinations
-    wh = (rb - lt + TO_REMOVE).clamp(min=0)  # [N,spatial_dims]
-    inter = wh[:, 0]  # [N,M]
-    for axis in range(1, spatial_dims):
-        inter = inter * wh[:, axis]
-
-    # compute IoU and convert back to original box_dtype
-    union = area1 + area2 - inter
-    iou = inter / (union + torch.finfo(compute_dtype).eps)  # [N,spatial_dims]
-
-    # enclosure
-    lt = torch.min(bbox1[:, :spatial_dims], bbox2[:, :spatial_dims]).to(
-        dtype=compute_dtype
-    )  # [N,spatial_dims] left top
-    rb = torch.max(bbox1[:, spatial_dims:], bbox2[:, spatial_dims:]).to(
-        dtype=compute_dtype
-    )  # [N,spatial_dims] right bottom
-    # compute size for the intersection region for the NxM combinations
-    wh = (rb - lt + TO_REMOVE).clamp(min=0)  # [N,spatial_dims]
-    enclosure = wh[:, 0]  # [N,M]
-    for axis in range(1, spatial_dims):
-        enclosure = enclosure * wh[:, axis]
-
-    giou = iou - (enclosure - union) / (enclosure + torch.finfo(compute_dtype).eps)
-    giou = giou.to(dtype=box_dtype)  # [N,spatial_dims]
-    if torch.isnan(giou).any() or torch.isinf(giou).any():
-        raise ValueError("Box GIoU is NaN or Inf.")
-
-    # convert tensor back to numpy if needed
-    if numpy_bool:
-        giou = convert_to_numpy(giou)
-    return giou
-
-
-def non_max_suppression(
-    bbox: NdarrayOrTensor, scores: NdarrayOrTensor, nms_thresh: float, max_proposals=-1, box_overlap_metric="iou"
-):
-    """
-    written by Can Zhao, 2019
-    if there are no boxes, return an empty list
-    Args:
-        bbox: bounding box, Nx4 or Nx6 torch tensor. The box mode is assumed to be STANDARD_MODE
-    """
-    look_up_option(box_overlap_metric, ["iou", "giou"])
-    look_up_option(bbox.shape[1], [4, 6]) // 2
-    if bbox.shape[0] == 0:
-        return []
-
-    if bbox.shape[0] != scores.shape[0]:
-        raise ValueError(
-            f"bbox and scores should have same length, got bbox shape {bbox.shape}, scores shape {scores.shape}"
-        )
-
-    # convert numpy to tensor if needed
-    if isinstance(bbox, np.ndarray):
-        bbox = convert_to_tensor(bbox)
-        scores = convert_to_tensor(scores)
-        numpy_bool = True
-    else:
-        numpy_bool = False
-
-    scores_sort, indices = torch.sort(scores, descending=True)
-    bbox_sort = deepcopy(bbox)[indices, :]
-
-    # initialize the list of picked indexes
-    pick = []
-    idxs = np.arange(0, bbox_sort.shape[0])
-    # keep looping while some indexes still remain in the indexes
-    # list
-    while len(idxs) > 0:
-        # grab the first index in the indexes list and add the
-        # index value to the list of picked indexes
-        i = idxs[0]
-        pick.append(i)
-        if len(pick) >= max_proposals >= 1:
-            break
-
-        # compute the IoU
-        if box_overlap_metric == "giou":
-            iou = box_giou(bbox_sort[idxs[1:], :], bbox_sort[i : i + 1, :])
-        else:
-            iou = box_iou(bbox_sort[idxs[1:], :], bbox_sort[i : i + 1, :])
-
-        # delete all indexes from the index list that have overlap > nms_thresh
-        idxs = np.delete(idxs, np.concatenate(([0], 1 + np.where(iou.cpu().numpy() > nms_thresh)[0])))
-
-    # return only the bounding boxes that were picked using the
-    # integer data type
-    pick_idx = indices[pick]
-    # convert tensor back to numpy if needed
-    if numpy_bool:
-        pick_idx = convert_to_numpy(pick_idx)
-    return pick_idx
-
-
-def box_center(bbox: NdarrayOrTensor) -> torch.Tensor:
-    """
-    Compute center point of bbox
-    Args:
-        bbox: bounding box, Nx4 or Nx6 torch tensor. The box mode is assumed to be STANDARD_MODE
-    Returns:
-        Tensor: center points [N, dims]
-    """
-    spatial_dims = bbox.shape[1] // 2
-    centers = [(bbox[:, axis + spatial_dims] + bbox[:, axis]) / 2.0 for axis in range(spatial_dims)]
-
-    if isinstance(bbox, np.ndarray):
-        return np.stack(centers, axis=1)
-    else:
-        return torch.stack(centers, dim=1)
-
-
-def box_center_dist(bbox1: torch.Tensor, bbox2: torch.Tensor, euclidean: bool = True) -> Sequence[torch.Tensor]:
-    """
-    Distance of center points between two sets of bbox
-    Args:
-        bbox1: bounding box, Nx4 or Nx6 torch tensor. The box mode is assumed to be STANDARD_MODE
-        bbox2: bounding box, Nx4 or Nx6 torch tensor. The box mode is assumed to be STANDARD_MODE
-        euclidean: computed the euclidean distance otherwise it uses the l1
-            distance
-    Returns:
-        Tensor: the NxM matrix containing the pairwise
-            distances for every element in bbox1 and bbox2; [N, M]
-        Tensor: center points of bbox1
-        Tensor: center points of bbox2
-    """
-    # convert numpy to tensor if needed
-    if isinstance(bbox1, np.ndarray):
-        bbox1 = convert_to_tensor(bbox1)
-        bbox2 = convert_to_tensor(bbox2)
-        numpy_bool = True
-    else:
-        numpy_bool = False
-
-    box_dtype = bbox1.dtype
-    compute_dtype = torch.float32
-
-    center1 = box_center(bbox1.to(compute_dtype))  # [N, dims]
-    center2 = box_center(bbox2.to(compute_dtype))  # [M, dims]
-
-    if euclidean:
-        dists = (center1[:, None] - center2[None]).pow(2).sum(-1).sqrt()
-    else:
-        # before sum: [N, M, dims]
-        dists = (center1[:, None] - center2[None]).sum(-1)
-
-    # convert tensor back to numpy if needed
-    dists, center1, center2 = dists.to(box_dtype), center1.to(box_dtype), center2.to(box_dtype)
-    if numpy_bool:
-        dists, center1, center2 = convert_to_numpy(dists), convert_to_numpy(center1), convert_to_numpy(center2)
-    return dists, center1, center2
-
-
-def center_in_boxes(center: NdarrayOrTensor, bbox: NdarrayOrTensor, eps: float = 0.01) -> NdarrayOrTensor:
-    """
-    Checks which center points are within bbox
-    Args:
-        bbox: bounding box, Nx4 or Nx6 torch tensor. The box mode is assumed to be STANDARD_MODE
-        center: center points [N, dims]
-        eps: minimum distance to boarder of bbox
-    Returns:
-        Tensor: boolean array indicating which center points are within
-            the bbox [N]
-    """
-    spatial_dims = bbox.shape[1] // 2
-    axes = [center[:, axis] - bbox[:, axis] for axis in range(spatial_dims)] + [
-        bbox[:, axis + spatial_dims] - center[:, axis] for axis in range(spatial_dims)
-    ]
-    if isinstance(bbox, np.ndarray):
-        return np.stack(axes, axis=1).min(axis=1) > eps  # array[bool]
-    else:
-        return torch.stack(axes, dim=1).min(dim=1)[0] > eps  # Tensor[bool]
-
-
-def resize_boxes(
-    bbox: NdarrayOrTensor,
-    original_size: Union[Sequence, torch.Tensor, np.ndarray],
-    new_size: Union[Sequence, torch.Tensor, np.ndarray],
-) -> NdarrayOrTensor:
-    """
-    modified from torchvision
-    Args:
-        bbox: source bounding box, Nx4 or Nx6 torch tensor. The box mode is assumed to be STANDARD_MODE
-        original_size: source image size, Length of 2 or 3. Data format is list, or np.ndarray, or tensor of int
-        original_size: target image size, Length of 2 or 3. Data format is list, or np.ndarray, or tensor of int
-    """
-    if len(original_size) != len(new_size):
-        raise ValueError("The dimension of original image size should equal to the new image size")
-    spatial_dims = get_dimension(bbox, original_size)
-
-    original_size = convert_to_list(original_size)
-    new_size = convert_to_list(new_size)
-    zoom = [new_size[axis] / float(original_size[axis]) for axis in range(spatial_dims)]
-
-    return box_interp(bbox=bbox, zoom=zoom)
+# def point_interp(
+#     point: NdarrayOrTensor, zoom: Union[Sequence[float], float]
+# ) -> Union[Sequence, torch.Tensor, np.ndarray]:
+#     """
+#     Convert point position from one pixel/voxel size to another pixel/voxel size
+#     Args:
+#         point: point coordinate, Nx2 or Nx3, [x, y] or [x, y, z]
+#         zoom: The zoom factor along the spatial axes.
+#             If a float, zoom is the same for each spatial axis.
+#             If a sequence, zoom should contain one value for each spatial axis.
+#     Returns:
+#         point2: zoomed point coordinate, does not share memory with original point
+#     """
+#     # make sure the spatial dimensions of the inputs match with each other
+#     spatial_dims = point.shape[1]
+#     spatial_dims = look_up_option(spatial_dims, supported=[2, 3])
+
+#     # compute new point
+#     point2 = deepcopy(point)
+#     _zoom = ensure_tuple_rep(zoom, spatial_dims)
+#     for axis in range(0, spatial_dims):
+#         point2[:, axis] = point[:, axis] * _zoom[axis]
+#     return point2
+
+
+# def box_interp(
+#     bbox: NdarrayOrTensor, zoom: Union[Sequence[float], float], mode: Union[str, None] = None
+# ) -> torch.Tensor:
+#     """
+#     Interpolate bbox
+#     Args:
+#         bbox: bounding box, Nx4 or Nx6 torch tensor
+#         mode: box mode, choose from SUPPORT_MODE. If mode is not given, this func will assume mode is STANDARD_MODE
+#         zoom: The zoom factor along the spatial axes.
+#             If a float, zoom is the same for each spatial axis.
+#             If a sequence, zoom should contain one value for each spatial axis.
+
+#     Returns:
+#         returned interpolated bbox has the same mode as bbox, does not share memory with original bbox
+#     """
+#     if mode is None:
+#         mode = get_standard_mode(int(bbox.shape[1] / 2))
+#     mode = look_up_option(mode, supported=SUPPORT_MODE)
+#     spatial_dims = get_dimension(bbox=bbox, mode=mode)
+
+#     # convert to standard mode
+#     mode_standard = get_standard_mode(spatial_dims)
+#     bbox_standard = box_convert_mode(bbox1=bbox, mode1=mode, mode2=mode_standard)
+
+#     # interp
+#     corner_lt = point_interp(bbox_standard[:, :spatial_dims], zoom)
+#     corner_rb = point_interp(bbox_standard[:, spatial_dims:], zoom)
+
+#     bbox_standard_interp = deepcopy(bbox_standard)
+#     bbox_standard_interp[:, :spatial_dims] = corner_lt
+#     bbox_standard_interp[:, spatial_dims:] = corner_rb
+
+#     # convert back
+#     bbox2 = box_convert_mode(bbox1=bbox_standard_interp, mode1=mode_standard, mode2=mode)
+#     return bbox2
+
+# def point_affine(
+#     point: NdarrayOrTensor, affine: NdarrayOrTensor, include_shift: bool = True
+# ) -> Union[Sequence, torch.Tensor, np.ndarray]:
+#     """
+#     Convert point position from one pixel/voxel size to another pixel/voxel size
+#     Args:
+#         point: point coordinate, Nx2 or Nx3, [x, y] or [x, y, z]
+#         affine: affine transform
+#         include_shift: does the func apply translation (shift) in the affine transform
+#     Returns:
+#         point2: transformed point coordinate, does not share memory with original point
+#     """
+#     # make sure the spatial dimensions of the inputs match with each other
+#     spatial_dims = point.shape[1]
+#     spatial_dims = look_up_option(spatial_dims, supported=[2, 3])
+
+#     # convert numpy to tensor if needed
+#     if isinstance(point, np.ndarray):
+#         point = convert_to_tensor(point)
+#         numpy_bool = True
+#     else:
+#         numpy_bool = False
+#     affine = convert_to_tensor(affine, device=point.device, dtype=point.dtype)
+
+#     # compute new point
+#     if include_shift:
+#         # append 1 to form Nx(spatial_dims+1) vector, then transpose
+#         point2 = torch.cat(
+#             [point, torch.ones(point.shape[0], 1, device=point.device, dtype=point.dtype)], dim=1
+#         ).transpose(0, 1)
+#         # apply affine
+#         point2 = torch.matmul(affine, point2)
+#         # remove appended 1 and transpose back
+#         point2 = point2[:spatial_dims, :].transpose(0, 1)
+#     else:
+#         point2 = point.transpose(0, 1)
+#         point2 = torch.matmul(affine[:spatial_dims, :spatial_dims], point2)
+#         point2 = point2.transpose(0, 1)
+
+#     # convert tensor back to numpy if needed
+#     if numpy_bool:
+#         point2 = convert_to_numpy(point2)
+#     return point2
+
+
+# def box_affine(bbox: NdarrayOrTensor, affine: NdarrayOrTensor, mode: Union[str, None] = None) -> torch.Tensor:
+#     """
+#     This function applys affine matrixs to the bbox
+#     Args:
+#         bbox: bounding box, Nx4 or Nx6 torch tensor
+#         affine: affine matric to be applied to the box coordinate, (spatial_dims+1)x(spatial_dims+1)
+#         mode: box mode, choose from SUPPORT_MODE. If mode is not given, this func will assume mode is STANDARD_MODE
+#     Returns:
+#         returned affine transformed bbox has the same mode as bbox, does not share memory with original bbox
+#     """
+#     # convert numpy to tensor if needed
+#     if isinstance(bbox, np.ndarray):
+#         bbox = convert_to_tensor(bbox)
+#         numpy_bool = True
+#     else:
+#         numpy_bool = False
+
+#     box_dtype = bbox.dtype
+#     compute_dtype = torch.float32
+#     if box_dtype is torch.float16:
+#         bbox = bbox.to(dtype=compute_dtype)
+#     affine = convert_to_tensor(affine, device=bbox.device, dtype=bbox.dtype)
+
+#     if mode is None:
+#         mode = get_standard_mode(int(bbox.shape[1] / 2))
+#     mode = look_up_option(mode, supported=SUPPORT_MODE)
+#     spatial_dims = get_dimension(bbox=bbox, mode=mode)
+
+#     if mode in ["xxyy", "xxyyzz", "xyxy", "xyzxyz"]:
+#         # extract left top and right bottom, and apply affine
+#         if mode in ["xxyy", "xxyyzz"]:
+#             lt = point_affine(bbox[:, ::2], affine, include_shift=True)
+#             rb = point_affine(bbox[:, 1::2], affine, include_shift=True)
+#         if mode in ["xyxy", "xyzxyz"]:
+#             lt = point_affine(bbox[:, :spatial_dims], affine, include_shift=True)
+#             rb = point_affine(bbox[:, spatial_dims:], affine, include_shift=True)
+
+#         lt_new, _ = torch.min(torch.stack([lt, rb], dim=2), dim=2)
+#         rb_new, _ = torch.max(torch.stack([lt, rb], dim=2), dim=2)
+
+#         bbox2 = box_convert_mode(torch.cat([lt_new, rb_new], dim=1), mode1=STANDARD_MODE[spatial_dims - 2], mode2=mode)
+
+#     elif mode in ["ccwh", "cccwhd", "xywh", "xyzwhd"]:
+#         cc = point_affine(bbox[:, :spatial_dims], affine, include_shift=True)
+#         wh = point_affine(bbox[:, spatial_dims:], affine, include_shift=False).absolute()
+#         bbox2 = torch.cat([cc, wh], dim=1)
+
+#     else:
+#         raise RuntimeError("Should not be here")
+
+#     # convert tensor back to numpy if needed
+#     if numpy_bool:
+#         bbox2 = convert_to_numpy(bbox2.to(dtype=box_dtype))
+#     return bbox2
+
+
+# def box_clip_to_patch(
+#     bbox: NdarrayOrTensor, patch_box: Union[Sequence[int], torch.Tensor, np.ndarray], remove_empty: bool = True
+# ):
+#     """
+#     This function makes sure the bounding boxes are within the patch.
+#     Args:
+#         bbox: bounding box, Nx4 or Nx6 torch tensor. The box mode is assumed to be STANDARD_MODE
+#         patch_box: The coordinate of the target patch to clip, it follows standard mode
+#         remove_empty: whether to remove the boxes that are actually empty
+#     Returns:
+#         new_bbox: updated box, does not share memory with original bbox
+#         keep: the indice of the new_bbox regarding to input bbox. When remove_empty=True, only some of the boxes are kept
+#     """
+#     if bbox.shape[0] == 0:
+#         return deepcopy(bbox), []
+
+#     spatial_dims = get_dimension(bbox=bbox)
+#     new_bbox = deepcopy(bbox)
+
+#     # convert numpy to tensor if needed
+#     if isinstance(new_bbox, np.ndarray):
+#         new_bbox = convert_to_tensor(new_bbox)
+#         numpy_bool = True
+#     else:
+#         numpy_bool = False
+
+#     # convert to float32 since torch.clamp_ does not support float16
+#     box_dtype = new_bbox.dtype
+#     compute_dtype = torch.float32
+#     if box_dtype is torch.float16:
+#         new_bbox = new_bbox.to(dtype=compute_dtype)
+
+#     # makes sure the bounding boxes are within the image
+#     for axis in range(0, spatial_dims):
+#         new_bbox[:, axis].clamp_(min=patch_box[axis], max=patch_box[axis + spatial_dims] - TO_REMOVE)
+#         new_bbox[:, axis + spatial_dims].clamp_(min=patch_box[axis], max=patch_box[axis + spatial_dims] - TO_REMOVE)
+#         new_bbox[:, axis] -= patch_box[axis]
+#         new_bbox[:, axis + spatial_dims] -= patch_box[axis]
+
+#     # remove the boxes that are actually empty
+#     if remove_empty:
+#         keep = (new_bbox[:, spatial_dims] >= new_bbox[:, 0] + 1 - TO_REMOVE) & (
+#             new_bbox[:, 1 + spatial_dims] >= new_bbox[:, 1] + 1 - TO_REMOVE
+#         )
+#         if spatial_dims == 3:
+#             keep = keep & (new_bbox[:, 2 + spatial_dims] >= new_bbox[:, 2] + 1 - TO_REMOVE)
+#         new_bbox = new_bbox[keep]
+
+#     # convert tensor back to numpy if needed
+#     new_bbox = new_bbox.to(dtype=box_dtype)
+#     if numpy_bool:
+#         new_bbox = convert_to_numpy(new_bbox)
+
+#     return new_bbox, keep
+
+
+# def box_clip_to_image(
+#     bbox: NdarrayOrTensor, image_size: Union[Sequence[int], torch.Tensor, np.ndarray], remove_empty: bool = True
+# ):
+#     """
+#     This function makes sure the bounding boxes are within the image.
+#     Args:
+#         bbox: bounding box, Nx4 or Nx6 torch tensor. The box mode is assumed to be STANDARD_MODE
+#         remove_empty: whether to remove the boxes that are actually empty
+#     Returns:
+#         updated box
+#     """
+#     spatial_dims = get_dimension(bbox=bbox, image_size=image_size)
+#     image_box = [0] * spatial_dims + convert_to_list(image_size)
+#     return box_clip_to_patch(bbox, image_box, remove_empty)
+
+
+# def box_area(bbox: NdarrayOrTensor) -> NdarrayOrTensor:
+#     """
+#     This function computes the area of each box
+#     Args:
+#         bbox: bounding box, Nx4 or Nx6 torch tensor. The box mode is assumed to be STANDARD_MODE
+#     Returns:
+#         area: 1-D tensor
+#     """
+
+#     spatial_dims = get_dimension(bbox=bbox)
+
+#     area = bbox[:, spatial_dims] - bbox[:, 0] + TO_REMOVE
+#     for axis in range(1, spatial_dims):
+#         area = area * (bbox[:, axis + spatial_dims] - bbox[:, axis] + TO_REMOVE)
+
+#     if isinstance(area, np.ndarray):
+#         area = convert_to_tensor(area)
+#         numpy_bool = True
+#     else:
+#         numpy_bool = False
+
+#     if area.isnan().any() or area.isinf().any():
+#         if area.dtype is torch.float16:
+#             raise ValueError("Box area is NaN or Inf. bbox is float16. Please change to float32 and test it again.")
+#         else:
+#             raise ValueError("Box area is NaN or Inf.")
+
+#     if numpy_bool:
+#         area = convert_to_numpy(area)
+#     return area
+
+
+# def box_iou(bbox1: NdarrayOrTensor, bbox2: NdarrayOrTensor) -> NdarrayOrTensor:
+#     """
+#     Compute the intersection over union of two set of boxes. This function is not differentialable.
+
+#     IMPORTANT: Please run box_clip_to_image(bbox, image_size, mode, remove_empty=True) before computing IoU
+
+#     Implementation from https://github.com/kuangliu/torchcv/blob/master/torchcv/utils/box.py
+#     with slight modifications.
+
+#     Args:
+#         bbox1: bounding box, Nx4 or Nx6 torch tensor. The box mode is assumed to be STANDARD_MODE
+#         bbox2: bounding box, Mx4 or Mx6 torch tensor. The box mode is assumed to be STANDARD_MODE
+
+#     Returns:
+#       (tensor) iou, sized [N,M].
+
+#     Reference:
+#       https://github.com/chainer/chainercv/blob/master/chainercv/utils/bbox/bbox_iou.py
+#     """
+
+#     # convert numpy to tensor if needed
+#     if isinstance(bbox1, np.ndarray):
+#         bbox1 = convert_to_tensor(bbox1)
+#         bbox2 = convert_to_tensor(bbox2)
+#         numpy_bool = True
+#     else:
+#         numpy_bool = False
+
+#     spatial_dims = get_dimension(bbox=bbox1)
+
+#     # we do computation with compute_dtype to avoid overflow
+#     box_dtype = bbox1.dtype
+#     compute_dtype = torch.float32
+
+#     # compute area with float32
+#     area1 = box_area(bbox=bbox1.to(dtype=compute_dtype))  # Nx1
+#     area2 = box_area(bbox=bbox2.to(dtype=compute_dtype))  # Mx1
+
+#     # get the left top and right bottom points for the NxM combinations
+#     lt = torch.max(bbox1[:, None, :spatial_dims], bbox2[:, :spatial_dims]).to(
+#         dtype=compute_dtype
+#     )  # [N,M,spatial_dims] left top
+#     rb = torch.min(bbox1[:, None, spatial_dims:], bbox2[:, spatial_dims:]).to(
+#         dtype=compute_dtype
+#     )  # [N,M,spatial_dims] right bottom
+#     # compute size for the intersection region for the NxM combinations
+#     wh = (rb - lt + TO_REMOVE).clamp(min=0)  # [N,M,spatial_dims]
+#     inter = wh[:, :, 0]  # [N,M]
+#     for axis in range(1, spatial_dims):
+#         inter = inter * wh[:, :, axis]
+
+#     # compute IoU and convert back to original box_dtype
+#     iou = inter / (area1[:, None] + area2 - inter + torch.finfo(compute_dtype).eps)  # [N,M,spatial_dims]
+#     iou = iou.to(dtype=box_dtype)
+
+#     if torch.isnan(iou).any() or torch.isinf(iou).any():
+#         raise ValueError("Box IoU is NaN or Inf.")
+
+#     # convert tensor back to numpy if needed
+#     if numpy_bool:
+#         iou = convert_to_numpy(iou)
+#     return iou
+
+
+# def box_giou(bbox1: NdarrayOrTensor, bbox2: NdarrayOrTensor) -> NdarrayOrTensor:
+#     """
+#     Compute the generalized intersection over union of two set of boxes. This function is not differentialable.
+
+#     IMPORTANT: Please run box_clip_to_image(bbox, image_size, mode, remove_empty=True) before computing IoU
+
+#     Implementation from https://github.com/kuangliu/torchcv/blob/master/torchcv/utils/box.py
+#     with slight modifications.
+
+#     Args:
+#         bbox1: bounding box, Nx4 or Nx6 torch tensor. The box mode is assumed to be STANDARD_MODE
+#         bbox2: bounding box, Mx4 or Mx6 torch tensor. The box mode is assumed to be STANDARD_MODE
+
+#     Returns:
+#       (tensor) iou, sized [N,M].
+
+#     Reference:
+#       https://github.com/chainer/chainercv/blob/master/chainercv/utils/bbox/bbox_iou.py
+#     """
+#     # convert numpy to tensor if needed
+#     if isinstance(bbox1, np.ndarray):
+#         bbox1 = convert_to_tensor(bbox1)
+#         bbox2 = convert_to_tensor(bbox2)
+#         numpy_bool = True
+#     else:
+#         numpy_bool = False
+
+#     spatial_dims = get_dimension(bbox=bbox1)
+
+#     # we do computation with compute_dtype to avoid overflow
+#     box_dtype = bbox1.dtype
+#     compute_dtype = torch.float32
+
+#     # compute area with float32
+#     area1 = box_area(bbox=bbox1.to(dtype=compute_dtype))  # Nx1
+#     area2 = box_area(bbox=bbox2.to(dtype=compute_dtype))  # Mx1
+
+#     # get the left top and right bottom points for the NxM combinations
+#     lt = torch.max(bbox1[:, None, :spatial_dims], bbox2[:, :spatial_dims]).to(
+#         dtype=compute_dtype
+#     )  # [N,M,spatial_dims] left top
+#     rb = torch.min(bbox1[:, None, spatial_dims:], bbox2[:, spatial_dims:]).to(
+#         dtype=compute_dtype
+#     )  # [N,M,spatial_dims] right bottom
+#     # compute size for the intersection region for the NxM combinations
+#     wh = (rb - lt + TO_REMOVE).clamp(min=0)  # [N,M,spatial_dims]
+#     inter = wh[:, :, 0]  # [N,M]
+#     for axis in range(1, spatial_dims):
+#         inter = inter * wh[:, :, axis]
+
+#     # compute IoU and convert back to original box_dtype
+#     union = area1[:, None] + area2 - inter
+#     iou = inter / (union + torch.finfo(compute_dtype).eps)  # [N,M,spatial_dims]
+
+#     # enclosure
+#     lt = torch.min(bbox1[:, None, :spatial_dims], bbox2[:, :spatial_dims]).to(
+#         dtype=compute_dtype
+#     )  # [N,M,spatial_dims] left top
+#     rb = torch.max(bbox1[:, None, spatial_dims:], bbox2[:, spatial_dims:]).to(
+#         dtype=compute_dtype
+#     )  # [N,M,spatial_dims] right bottom
+#     # compute size for the intersection region for the NxM combinations
+#     wh = (rb - lt + TO_REMOVE).clamp(min=0)  # [N,M,spatial_dims]
+#     enclosure = wh[:, :, 0]  # [N,M]
+#     for axis in range(1, spatial_dims):
+#         enclosure = enclosure * wh[:, :, axis]
+
+#     giou = iou - (enclosure - union) / (enclosure + torch.finfo(compute_dtype).eps)
+#     giou = giou.to(dtype=box_dtype)
+#     if torch.isnan(giou).any() or torch.isinf(giou).any():
+#         raise ValueError("Box GIoU is NaN or Inf.")
+
+#     # convert tensor back to numpy if needed
+#     if numpy_bool:
+#         giou = convert_to_numpy(giou)
+#     return giou
+
+
+# def box_pair_giou(bbox1: NdarrayOrTensor, bbox2: NdarrayOrTensor) -> NdarrayOrTensor:
+#     """
+#     Compute the generalized intersection over union of two set of boxes. This function is not differentialable.
+
+#     IMPORTANT: Please run box_clip_to_image(bbox, image_size, mode, remove_empty=True) before computing IoU
+
+#     Implementation from https://github.com/kuangliu/torchcv/blob/master/torchcv/utils/box.py
+#     with slight modifications.
+
+#     Args:
+#         bbox1: bounding box, Nx4 or Nx6 torch tensor. The box mode is assumed to be STANDARD_MODE
+#         bbox2: bounding box, Mx4 or Mx6 torch tensor. The box mode is assumed to be STANDARD_MODE
+
+#     Returns:
+#       (tensor) iou, sized [N].
+
+#     Reference:
+#       https://github.com/chainer/chainercv/blob/master/chainercv/utils/bbox/bbox_iou.py
+#     """
+
+#     if bbox1.shape[0] != bbox2.shape[0]:
+#         raise ValueError("bbox1 and bbox2 should be paired.")
+
+#     # convert numpy to tensor if needed
+#     if isinstance(bbox1, np.ndarray):
+#         bbox1 = convert_to_tensor(bbox1)
+#         bbox2 = convert_to_tensor(bbox2)
+#         numpy_bool = True
+#     else:
+#         numpy_bool = False
+
+#     spatial_dims = get_dimension(bbox=bbox1)
+
+#     # we do computation with compute_dtype to avoid overflow
+#     box_dtype = bbox1.dtype
+#     compute_dtype = torch.float32
+
+#     # compute area
+#     area1 = box_area(bbox=bbox1.to(dtype=compute_dtype))  # Nx1
+#     area2 = box_area(bbox=bbox2.to(dtype=compute_dtype))  # Nx1
+
+#     # get the left top and right bottom points for the NxM combinations
+#     lt = torch.max(bbox1[:, :spatial_dims], bbox2[:, :spatial_dims]).to(
+#         dtype=compute_dtype
+#     )  # [N,spatial_dims] left top
+#     rb = torch.min(bbox1[:, spatial_dims:], bbox2[:, spatial_dims:]).to(
+#         dtype=compute_dtype
+#     )  # [N,spatial_dims] right bottom
+#     # compute size for the intersection region for the NxM combinations
+#     wh = (rb - lt + TO_REMOVE).clamp(min=0)  # [N,spatial_dims]
+#     inter = wh[:, 0]  # [N,M]
+#     for axis in range(1, spatial_dims):
+#         inter = inter * wh[:, axis]
+
+#     # compute IoU and convert back to original box_dtype
+#     union = area1 + area2 - inter
+#     iou = inter / (union + torch.finfo(compute_dtype).eps)  # [N,spatial_dims]
+
+#     # enclosure
+#     lt = torch.min(bbox1[:, :spatial_dims], bbox2[:, :spatial_dims]).to(
+#         dtype=compute_dtype
+#     )  # [N,spatial_dims] left top
+#     rb = torch.max(bbox1[:, spatial_dims:], bbox2[:, spatial_dims:]).to(
+#         dtype=compute_dtype
+#     )  # [N,spatial_dims] right bottom
+#     # compute size for the intersection region for the NxM combinations
+#     wh = (rb - lt + TO_REMOVE).clamp(min=0)  # [N,spatial_dims]
+#     enclosure = wh[:, 0]  # [N,M]
+#     for axis in range(1, spatial_dims):
+#         enclosure = enclosure * wh[:, axis]
+
+#     giou = iou - (enclosure - union) / (enclosure + torch.finfo(compute_dtype).eps)
+#     giou = giou.to(dtype=box_dtype)  # [N,spatial_dims]
+#     if torch.isnan(giou).any() or torch.isinf(giou).any():
+#         raise ValueError("Box GIoU is NaN or Inf.")
+
+#     # convert tensor back to numpy if needed
+#     if numpy_bool:
+#         giou = convert_to_numpy(giou)
+#     return giou
+
+
+# def non_max_suppression(
+#     bbox: NdarrayOrTensor, scores: NdarrayOrTensor, nms_thresh: float, max_proposals=-1, box_overlap_metric="iou"
+# ):
+#     """
+#     written by Can Zhao, 2019
+#     if there are no boxes, return an empty list
+#     Args:
+#         bbox: bounding box, Nx4 or Nx6 torch tensor. The box mode is assumed to be STANDARD_MODE
+#     """
+#     look_up_option(box_overlap_metric, ["iou", "giou"])
+#     look_up_option(bbox.shape[1], [4, 6]) // 2
+#     if bbox.shape[0] == 0:
+#         return []
+
+#     if bbox.shape[0] != scores.shape[0]:
+#         raise ValueError(
+#             f"bbox and scores should have same length, got bbox shape {bbox.shape}, scores shape {scores.shape}"
+#         )
+
+#     # convert numpy to tensor if needed
+#     if isinstance(bbox, np.ndarray):
+#         bbox = convert_to_tensor(bbox)
+#         scores = convert_to_tensor(scores)
+#         numpy_bool = True
+#     else:
+#         numpy_bool = False
+
+#     scores_sort, indices = torch.sort(scores, descending=True)
+#     bbox_sort = deepcopy(bbox)[indices, :]
+
+#     # initialize the list of picked indexes
+#     pick = []
+#     idxs = np.arange(0, bbox_sort.shape[0])
+#     # keep looping while some indexes still remain in the indexes
+#     # list
+#     while len(idxs) > 0:
+#         # grab the first index in the indexes list and add the
+#         # index value to the list of picked indexes
+#         i = idxs[0]
+#         pick.append(i)
+#         if len(pick) >= max_proposals >= 1:
+#             break
+
+#         # compute the IoU
+#         if box_overlap_metric == "giou":
+#             iou = box_giou(bbox_sort[idxs[1:], :], bbox_sort[i : i + 1, :])
+#         else:
+#             iou = box_iou(bbox_sort[idxs[1:], :], bbox_sort[i : i + 1, :])
+
+#         # delete all indexes from the index list that have overlap > nms_thresh
+#         idxs = np.delete(idxs, np.concatenate(([0], 1 + np.where(iou.cpu().numpy() > nms_thresh)[0])))
+
+#     # return only the bounding boxes that were picked using the
+#     # integer data type
+#     pick_idx = indices[pick]
+#     # convert tensor back to numpy if needed
+#     if numpy_bool:
+#         pick_idx = convert_to_numpy(pick_idx)
+#     return pick_idx
+
+
+# def box_center(bbox: NdarrayOrTensor) -> torch.Tensor:
+#     """
+#     Compute center point of bbox
+#     Args:
+#         bbox: bounding box, Nx4 or Nx6 torch tensor. The box mode is assumed to be STANDARD_MODE
+#     Returns:
+#         Tensor: center points [N, dims]
+#     """
+#     spatial_dims = bbox.shape[1] // 2
+#     centers = [(bbox[:, axis + spatial_dims] + bbox[:, axis]) / 2.0 for axis in range(spatial_dims)]
+
+#     if isinstance(bbox, np.ndarray):
+#         return np.stack(centers, axis=1)
+#     else:
+#         return torch.stack(centers, dim=1)
+
+
+# def box_center_dist(bbox1: torch.Tensor, bbox2: torch.Tensor, euclidean: bool = True) -> Sequence[torch.Tensor]:
+#     """
+#     Distance of center points between two sets of bbox
+#     Args:
+#         bbox1: bounding box, Nx4 or Nx6 torch tensor. The box mode is assumed to be STANDARD_MODE
+#         bbox2: bounding box, Nx4 or Nx6 torch tensor. The box mode is assumed to be STANDARD_MODE
+#         euclidean: computed the euclidean distance otherwise it uses the l1
+#             distance
+#     Returns:
+#         Tensor: the NxM matrix containing the pairwise
+#             distances for every element in bbox1 and bbox2; [N, M]
+#         Tensor: center points of bbox1
+#         Tensor: center points of bbox2
+#     """
+#     # convert numpy to tensor if needed
+#     if isinstance(bbox1, np.ndarray):
+#         bbox1 = convert_to_tensor(bbox1)
+#         bbox2 = convert_to_tensor(bbox2)
+#         numpy_bool = True
+#     else:
+#         numpy_bool = False
+
+#     box_dtype = bbox1.dtype
+#     compute_dtype = torch.float32
+
+#     center1 = box_center(bbox1.to(compute_dtype))  # [N, dims]
+#     center2 = box_center(bbox2.to(compute_dtype))  # [M, dims]
+
+#     if euclidean:
+#         dists = (center1[:, None] - center2[None]).pow(2).sum(-1).sqrt()
+#     else:
+#         # before sum: [N, M, dims]
+#         dists = (center1[:, None] - center2[None]).sum(-1)
+
+#     # convert tensor back to numpy if needed
+#     dists, center1, center2 = dists.to(box_dtype), center1.to(box_dtype), center2.to(box_dtype)
+#     if numpy_bool:
+#         dists, center1, center2 = convert_to_numpy(dists), convert_to_numpy(center1), convert_to_numpy(center2)
+#     return dists, center1, center2
+
+
+# def center_in_boxes(center: NdarrayOrTensor, bbox: NdarrayOrTensor, eps: float = 0.01) -> NdarrayOrTensor:
+#     """
+#     Checks which center points are within bbox
+#     Args:
+#         bbox: bounding box, Nx4 or Nx6 torch tensor. The box mode is assumed to be STANDARD_MODE
+#         center: center points [N, dims]
+#         eps: minimum distance to boarder of bbox
+#     Returns:
+#         Tensor: boolean array indicating which center points are within
+#             the bbox [N]
+#     """
+#     spatial_dims = bbox.shape[1] // 2
+#     axes = [center[:, axis] - bbox[:, axis] for axis in range(spatial_dims)] + [
+#         bbox[:, axis + spatial_dims] - center[:, axis] for axis in range(spatial_dims)
+#     ]
+#     if isinstance(bbox, np.ndarray):
+#         return np.stack(axes, axis=1).min(axis=1) > eps  # array[bool]
+#     else:
+#         return torch.stack(axes, dim=1).min(dim=1)[0] > eps  # Tensor[bool]
+
+
+# def resize_boxes(
+#     bbox: NdarrayOrTensor,
+#     original_size: Union[Sequence, torch.Tensor, np.ndarray],
+#     new_size: Union[Sequence, torch.Tensor, np.ndarray],
+# ) -> NdarrayOrTensor:
+#     """
+#     modified from torchvision
+#     Args:
+#         bbox: source bounding box, Nx4 or Nx6 torch tensor. The box mode is assumed to be STANDARD_MODE
+#         original_size: source image size, Length of 2 or 3. Data format is list, or np.ndarray, or tensor of int
+#         original_size: target image size, Length of 2 or 3. Data format is list, or np.ndarray, or tensor of int
+#     """
+#     if len(original_size) != len(new_size):
+#         raise ValueError("The dimension of original image size should equal to the new image size")
+#     spatial_dims = get_dimension(bbox, original_size)
+
+#     original_size = convert_to_list(original_size)
+#     new_size = convert_to_list(new_size)
+#     zoom = [new_size[axis] / float(original_size[axis]) for axis in range(spatial_dims)]
+
+#     return box_interp(bbox=bbox, zoom=zoom)
diff --git a/tests/test_box_utils.py b/tests/test_box_utils.py
index f185a2016d..307988d3d0 100644
--- a/tests/test_box_utils.py
+++ b/tests/test_box_utils.py
@@ -9,33 +9,23 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import random
+# import random
 import unittest
 
 import numpy as np
-import torch
+
+# import torch
 from parameterized import parameterized
 
-from monai.data.box_utils import (
-    box_affine,
-    box_area,
-    box_center,
-    box_center_dist,
-    box_clip_to_image,
-    box_convert_mode,
-    box_convert_standard_mode,
-    box_giou,
-    box_interp,
-    box_iou,
-    box_pair_giou,
-    center_in_boxes,
-    convert_to_list,
-    non_max_suppression,
-    resize_boxes,
-)
+from monai.data.box_utils import box_convert_mode, box_convert_standard_mode
 from monai.utils.type_conversion import convert_data_type
 from tests.utils import TEST_NDARRAYS, assert_allclose
 
+# box_affine, box_area, box_center, box_center_dist, box_clip_to_image,
+# box_giou, box_interp, box_iou, box_pair_giou, center_in_boxes,
+# convert_to_list, non_max_suppression, resize_boxes,
+
+
 TESTS = []
 for p in TEST_NDARRAYS:
     bbox = [[0, 0, 0, 0, 0, 0], [0, 1, 0, 2, 2, 3], [0, 1, 1, 2, 2, 3]]
@@ -119,7 +109,7 @@ class TestCreateBoxList(unittest.TestCase):
     def test_value(self, input_data, mode2, expected_box, expected_area):
         bbox1 = convert_data_type(input_data["bbox"], dtype=np.float32)[0]
         mode1 = input_data["mode"]
-        image_size = input_data["image_size"]
+        # image_size = input_data["image_size"]
         half_bool = input_data["half"]
 
         # test float16
@@ -138,82 +128,82 @@ def test_value(self, input_data, mode2, expected_box, expected_area):
         expected_box_standard = box_convert_standard_mode(bbox=expected_box, mode=mode2)
         assert_allclose(result_standard, expected_box_standard, type_test=True, device_test=True, atol=0.0)
 
-        # test box_area, box_clip_to_image, convert_to_list
-        assert_allclose(box_area(result_standard), expected_area, type_test=True, device_test=True, atol=0.0)
-
-        result_standard_clip, _ = box_clip_to_image(result_standard, image_size, remove_empty=True)
-        np.testing.assert_equal(
-            isinstance(result_standard_clip, np.ndarray), isinstance(bbox1, np.ndarray), "numpy type"
-        )
-        np.testing.assert_equal(
-            isinstance(result_standard_clip, torch.Tensor), isinstance(bbox1, torch.Tensor), "torch type"
-        )
-        result_area_clip = convert_to_list(box_area(result_standard_clip))
-        expected_area_clip = list(filter(lambda num: num > 0, convert_to_list(expected_area)))
-        assert_allclose(result_area_clip, expected_area_clip, type_test=True, device_test=True, atol=0.0)
-
-        # test box_interp and box_affine, resize_boxes
-        zoom = [random.uniform(0.5, 5), random.uniform(0.5, 2), random.uniform(0.5, 5)]
-        new_size = [int(image_size[axis] * zoom[axis] + 0.5) for axis in range(3)]
-        zoom = [new_size[axis] / float(image_size[axis]) for axis in range(3)]
-
-        result_standard_interp = box_interp(bbox=result_standard, zoom=zoom)
-        result_standard_resize = resize_boxes(bbox=result_standard, original_size=image_size, new_size=new_size)
-        assert_allclose(result_standard_interp, result_standard_resize, type_test=True, device_test=True, atol=0.0)
-
-        result_area_interp = box_area(result_standard_interp)
-        expected_area_interp = expected_area * zoom[0] * zoom[1] * zoom[2]
-        assert_allclose(result_area_interp, expected_area_interp, type_test=True, device_test=True, atol=0.5)
-
-        affine = torch.diag(torch.Tensor(zoom + [1.0]))
-        result_affine = box_affine(bbox=bbox1, affine=affine, mode=mode1)
-        result_affine_standard = box_convert_standard_mode(bbox=result_affine, mode=mode1)
-        assert_allclose(
-            box_area(result_affine_standard), expected_area_interp, type_test=True, device_test=True, atol=0.5
-        )
-
-        # test box_center, center_in_boxes, box_center_dist
-        result_standard_center = box_center(result_standard)
-        expected_center = box_convert_mode(bbox1=bbox1, mode1=mode1, mode2="cccwhd")[:, :3]
-        assert_allclose(result_standard_center, expected_center, type_test=True, device_test=True, atol=0.0)
-
-        center = expected_center
-        center[2, :] += 10
-        result_center_in_boxes = center_in_boxes(center=center, bbox=result_standard)
-        assert_allclose(result_center_in_boxes, np.array([False, True, False]), type_test=False)
-
-        center_dist, _, _ = box_center_dist(bbox1=result_standard[1:2, :], bbox2=result_standard[1:1, :])
-        assert_allclose(center_dist, np.array([[]]), type_test=False)
-        center_dist, _, _ = box_center_dist(bbox1=result_standard[1:2, :], bbox2=result_standard[1:2, :])
-        assert_allclose(center_dist, np.array([[0.0]]), type_test=False)
-        center_dist, _, _ = box_center_dist(bbox1=result_standard[0:1, :], bbox2=result_standard[0:1, :])
-        assert_allclose(center_dist, np.array([[0.0]]), type_test=False)
-
-        # test box_iou
-        iou_metrics = (box_iou, box_giou)  # type: ignore
-        for p in iou_metrics:
-            self_iou = p(bbox1=result_standard[1:2, :], bbox2=result_standard[1:1, :])
-            assert_allclose(self_iou, np.array([[]]), type_test=False)
-
-            self_iou = p(bbox1=result_standard[1:2, :], bbox2=result_standard[1:2, :])
-            assert_allclose(self_iou, np.array([[1.0]]), type_test=False)
-
-        self_iou = box_pair_giou(bbox1=result_standard[1:1, :], bbox2=result_standard[1:1, :])
-        assert_allclose(self_iou, np.array([]), type_test=False)
-
-        self_iou = box_pair_giou(bbox1=result_standard[1:2, :], bbox2=result_standard[1:2, :])
-        assert_allclose(self_iou, np.array([1.0]), type_test=False)
-
-        # test non_max_suppression
-        nms_box = non_max_suppression(
-            bbox=result_standard, scores=bbox1[:, 1] / 2.0, nms_thresh=1.0, box_overlap_metric="iou"
-        )
-        assert_allclose(nms_box, [1, 2, 0], type_test=False)
-
-        nms_box = non_max_suppression(
-            bbox=result_standard, scores=bbox1[:, 1] / 2.0, nms_thresh=-0.1, box_overlap_metric="iou"
-        )
-        assert_allclose(nms_box, [1], type_test=False)
+        # # test box_area, box_clip_to_image, convert_to_list
+        # assert_allclose(box_area(result_standard), expected_area, type_test=True, device_test=True, atol=0.0)
+
+        # result_standard_clip, _ = box_clip_to_image(result_standard, image_size, remove_empty=True)
+        # np.testing.assert_equal(
+        #     isinstance(result_standard_clip, np.ndarray), isinstance(bbox1, np.ndarray), "numpy type"
+        # )
+        # np.testing.assert_equal(
+        #     isinstance(result_standard_clip, torch.Tensor), isinstance(bbox1, torch.Tensor), "torch type"
+        # )
+        # result_area_clip = convert_to_list(box_area(result_standard_clip))
+        # expected_area_clip = list(filter(lambda num: num > 0, convert_to_list(expected_area)))
+        # assert_allclose(result_area_clip, expected_area_clip, type_test=True, device_test=True, atol=0.0)
+
+        # # test box_interp and box_affine, resize_boxes
+        # zoom = [random.uniform(0.5, 5), random.uniform(0.5, 2), random.uniform(0.5, 5)]
+        # new_size = [int(image_size[axis] * zoom[axis] + 0.5) for axis in range(3)]
+        # zoom = [new_size[axis] / float(image_size[axis]) for axis in range(3)]
+
+        # result_standard_interp = box_interp(bbox=result_standard, zoom=zoom)
+        # result_standard_resize = resize_boxes(bbox=result_standard, original_size=image_size, new_size=new_size)
+        # assert_allclose(result_standard_interp, result_standard_resize, type_test=True, device_test=True, atol=0.0)
+
+        # result_area_interp = box_area(result_standard_interp)
+        # expected_area_interp = expected_area * zoom[0] * zoom[1] * zoom[2]
+        # assert_allclose(result_area_interp, expected_area_interp, type_test=True, device_test=True, atol=0.5)
+
+        # affine = torch.diag(torch.Tensor(zoom + [1.0]))
+        # result_affine = box_affine(bbox=bbox1, affine=affine, mode=mode1)
+        # result_affine_standard = box_convert_standard_mode(bbox=result_affine, mode=mode1)
+        # assert_allclose(
+        #     box_area(result_affine_standard), expected_area_interp, type_test=True, device_test=True, atol=0.5
+        # )
+
+        # # test box_center, center_in_boxes, box_center_dist
+        # result_standard_center = box_center(result_standard)
+        # expected_center = box_convert_mode(bbox1=bbox1, mode1=mode1, mode2="cccwhd")[:, :3]
+        # assert_allclose(result_standard_center, expected_center, type_test=True, device_test=True, atol=0.0)
+
+        # center = expected_center
+        # center[2, :] += 10
+        # result_center_in_boxes = center_in_boxes(center=center, bbox=result_standard)
+        # assert_allclose(result_center_in_boxes, np.array([False, True, False]), type_test=False)
+
+        # center_dist, _, _ = box_center_dist(bbox1=result_standard[1:2, :], bbox2=result_standard[1:1, :])
+        # assert_allclose(center_dist, np.array([[]]), type_test=False)
+        # center_dist, _, _ = box_center_dist(bbox1=result_standard[1:2, :], bbox2=result_standard[1:2, :])
+        # assert_allclose(center_dist, np.array([[0.0]]), type_test=False)
+        # center_dist, _, _ = box_center_dist(bbox1=result_standard[0:1, :], bbox2=result_standard[0:1, :])
+        # assert_allclose(center_dist, np.array([[0.0]]), type_test=False)
+
+        # # test box_iou
+        # iou_metrics = (box_iou, box_giou)  # type: ignore
+        # for p in iou_metrics:
+        #     self_iou = p(bbox1=result_standard[1:2, :], bbox2=result_standard[1:1, :])
+        #     assert_allclose(self_iou, np.array([[]]), type_test=False)
+
+        #     self_iou = p(bbox1=result_standard[1:2, :], bbox2=result_standard[1:2, :])
+        #     assert_allclose(self_iou, np.array([[1.0]]), type_test=False)
+
+        # self_iou = box_pair_giou(bbox1=result_standard[1:1, :], bbox2=result_standard[1:1, :])
+        # assert_allclose(self_iou, np.array([]), type_test=False)
+
+        # self_iou = box_pair_giou(bbox1=result_standard[1:2, :], bbox2=result_standard[1:2, :])
+        # assert_allclose(self_iou, np.array([1.0]), type_test=False)
+
+        # # test non_max_suppression
+        # nms_box = non_max_suppression(
+        #     bbox=result_standard, scores=bbox1[:, 1] / 2.0, nms_thresh=1.0, box_overlap_metric="iou"
+        # )
+        # assert_allclose(nms_box, [1, 2, 0], type_test=False)
+
+        # nms_box = non_max_suppression(
+        #     bbox=result_standard, scores=bbox1[:, 1] / 2.0, nms_thresh=-0.1, box_overlap_metric="iou"
+        # )
+        # assert_allclose(nms_box, [1], type_test=False)
 
 
 if __name__ == "__main__":

From 01d7fd912fe27e7c11bbd509aea0abd1ce2bc353 Mon Sep 17 00:00:00 2001
From: Can Zhao <canz@nvidia.com>
Date: Wed, 11 May 2022 12:11:54 -0400
Subject: [PATCH 04/49] add examples in docstring

Signed-off-by: Can Zhao <canz@nvidia.com>
---
 monai/data/box_utils.py | 61 +++++++++++++++++++++++++++--------------
 1 file changed, 41 insertions(+), 20 deletions(-)

diff --git a/monai/data/box_utils.py b/monai/data/box_utils.py
index 3fbf21a58b..897adffdf3 100644
--- a/monai/data/box_utils.py
+++ b/monai/data/box_utils.py
@@ -65,11 +65,17 @@ def get_dimension(
     Get spatial dimension for the giving setting.
     Missing input is allowed. But at least one of the input value should be given.
     Args:
-        bbox: bounding box, Nx4 or Nx6 torch tensor
+        bbox: bounding box, Nx4 or Nx6 torch tensor or ndarray
         image_size: Length of 2 or 3. Data format is list, or np.ndarray, or tensor of int
         mode: box mode, choose from SUPPORT_MODE. If mode is not given, this func will assume mode is STANDARD_MODE
     Returns:
         spatial_dimension: 2 or 3
+
+    Example:
+        bbox = torch.zeros(10,6)
+        get_dimension(bbox, mode="xyzxyz") will return 3
+        get_dimension(bbox, mode="xyzxyz", image_size=[100,200,200]) will return 3
+        get_dimension(mode="xyzxyz") will return 3
     """
     spatial_dims_set = set()
     if image_size is not None:
@@ -97,6 +103,9 @@ def get_standard_mode(spatial_dims: int) -> str:
     Returns:
         mode name, choose from STANDARD_MODE
 
+    Example:
+        get_standard_mode(spatial_dims = 2)
+
     """
     if spatial_dims == 2:
         return STANDARD_MODE[0]
@@ -110,13 +119,16 @@ def split_into_corners(bbox: NdarrayOrTensor, mode: Union[str, None] = None):
     """
     This internal function outputs the corner coordinates of the bbox
     Args:
-        bbox: bounding box, Nx4 or Nx6 torch tensor
+        bbox: bounding box, Nx4 or Nx6 torch tensor or ndarray
         mode: box mode, choose from SUPPORT_MODE. If mode is not given, this func will assume mode is STANDARD_MODE
     Returns:
         if 2D image, outputs (xmin, xmax, ymin, ymax)
         if 3D images, outputs (xmin, xmax, ymin, ymax, zmin, zmax)
         xmin for example, is a Nx1 tensor
-
+    
+    Example:
+        bbox = torch.zeros(10,6)
+        split_into_corners(bbox, mode="cccwhd")
     """
     # convert numpy to tensor if needed
     if isinstance(bbox, np.ndarray):
@@ -188,11 +200,15 @@ def box_convert_mode(
     """
     This function converts the bbox1 in mode 1 to the mode2
     Args:
-        bbox1: source bounding box, Nx4 or Nx6 torch tensor
+        bbox1: source bounding box, Nx4 or Nx6 torch tensor or ndarray
         mode1: source box mode, choose from SUPPORT_MODE. If mode is not given, this func will assume mode is STANDARD_MODE
         mode2: target box mode, choose from SUPPORT_MODE. If mode is not given, this func will assume mode is STANDARD_MODE
     Returns:
         bbox2: bounding box with target mode, does not share memory with original bbox1
+
+    Example:
+        bbox = torch.zeros(10,6)
+        box_convert_mode(bbox1=bbox, mode1="xyzxyz", mode2="cccwhd")
     """
 
     # convert numpy to tensor if needed
@@ -277,9 +293,14 @@ def box_convert_standard_mode(bbox: NdarrayOrTensor, mode: Union[str, None] = No
     """
     Convert given bbox to standard mode
     Args:
-        bbox: source bounding box, Nx4 or Nx6 torch tensor
+        bbox: source bounding box, Nx4 or Nx6 torch tensor or ndarray
         mode: source box mode, choose from SUPPORT_MODE. If mode is not given, this func will assume mode is STANDARD_MODE
-    This function convert the bbox in mode 1 to 'xyxy' or 'xyzxyz'
+    Returns:
+        bbox2: bounding box with standard mode, does not share memory with original bbox1
+
+    Example:
+        bbox = torch.zeros(10,6)
+        box_convert_mode(bbox=bbox, mode="xxyyzz")
     """
     if mode is None:
         mode = get_standard_mode(int(bbox.shape[1] / 2))
@@ -320,7 +341,7 @@ def box_convert_standard_mode(bbox: NdarrayOrTensor, mode: Union[str, None] = No
 #     """
 #     Interpolate bbox
 #     Args:
-#         bbox: bounding box, Nx4 or Nx6 torch tensor
+#         bbox: bounding box, Nx4 or Nx6 torch tensor or ndarray
 #         mode: box mode, choose from SUPPORT_MODE. If mode is not given, this func will assume mode is STANDARD_MODE
 #         zoom: The zoom factor along the spatial axes.
 #             If a float, zoom is the same for each spatial axis.
@@ -399,7 +420,7 @@ def box_convert_standard_mode(bbox: NdarrayOrTensor, mode: Union[str, None] = No
 #     """
 #     This function applys affine matrixs to the bbox
 #     Args:
-#         bbox: bounding box, Nx4 or Nx6 torch tensor
+#         bbox: bounding box, Nx4 or Nx6 torch tensor or ndarray
 #         affine: affine matric to be applied to the box coordinate, (spatial_dims+1)x(spatial_dims+1)
 #         mode: box mode, choose from SUPPORT_MODE. If mode is not given, this func will assume mode is STANDARD_MODE
 #     Returns:
@@ -457,7 +478,7 @@ def box_convert_standard_mode(bbox: NdarrayOrTensor, mode: Union[str, None] = No
 #     """
 #     This function makes sure the bounding boxes are within the patch.
 #     Args:
-#         bbox: bounding box, Nx4 or Nx6 torch tensor. The box mode is assumed to be STANDARD_MODE
+#         bbox: bounding box, Nx4 or Nx6 torch tensor or ndarray. The box mode is assumed to be STANDARD_MODE
 #         patch_box: The coordinate of the target patch to clip, it follows standard mode
 #         remove_empty: whether to remove the boxes that are actually empty
 #     Returns:
@@ -513,7 +534,7 @@ def box_convert_standard_mode(bbox: NdarrayOrTensor, mode: Union[str, None] = No
 #     """
 #     This function makes sure the bounding boxes are within the image.
 #     Args:
-#         bbox: bounding box, Nx4 or Nx6 torch tensor. The box mode is assumed to be STANDARD_MODE
+#         bbox: bounding box, Nx4 or Nx6 torch tensor or ndarray. The box mode is assumed to be STANDARD_MODE
 #         remove_empty: whether to remove the boxes that are actually empty
 #     Returns:
 #         updated box
@@ -527,7 +548,7 @@ def box_convert_standard_mode(bbox: NdarrayOrTensor, mode: Union[str, None] = No
 #     """
 #     This function computes the area of each box
 #     Args:
-#         bbox: bounding box, Nx4 or Nx6 torch tensor. The box mode is assumed to be STANDARD_MODE
+#         bbox: bounding box, Nx4 or Nx6 torch tensor or ndarray. The box mode is assumed to be STANDARD_MODE
 #     Returns:
 #         area: 1-D tensor
 #     """
@@ -565,7 +586,7 @@ def box_convert_standard_mode(bbox: NdarrayOrTensor, mode: Union[str, None] = No
 #     with slight modifications.
 
 #     Args:
-#         bbox1: bounding box, Nx4 or Nx6 torch tensor. The box mode is assumed to be STANDARD_MODE
+#         bbox1: bounding box, Nx4 or Nx6 torch tensor or ndarray. The box mode is assumed to be STANDARD_MODE
 #         bbox2: bounding box, Mx4 or Mx6 torch tensor. The box mode is assumed to be STANDARD_MODE
 
 #     Returns:
@@ -629,7 +650,7 @@ def box_convert_standard_mode(bbox: NdarrayOrTensor, mode: Union[str, None] = No
 #     with slight modifications.
 
 #     Args:
-#         bbox1: bounding box, Nx4 or Nx6 torch tensor. The box mode is assumed to be STANDARD_MODE
+#         bbox1: bounding box, Nx4 or Nx6 torch tensor or ndarray. The box mode is assumed to be STANDARD_MODE
 #         bbox2: bounding box, Mx4 or Mx6 torch tensor. The box mode is assumed to be STANDARD_MODE
 
 #     Returns:
@@ -707,7 +728,7 @@ def box_convert_standard_mode(bbox: NdarrayOrTensor, mode: Union[str, None] = No
 #     with slight modifications.
 
 #     Args:
-#         bbox1: bounding box, Nx4 or Nx6 torch tensor. The box mode is assumed to be STANDARD_MODE
+#         bbox1: bounding box, Nx4 or Nx6 torch tensor or ndarray. The box mode is assumed to be STANDARD_MODE
 #         bbox2: bounding box, Mx4 or Mx6 torch tensor. The box mode is assumed to be STANDARD_MODE
 
 #     Returns:
@@ -786,7 +807,7 @@ def box_convert_standard_mode(bbox: NdarrayOrTensor, mode: Union[str, None] = No
 #     written by Can Zhao, 2019
 #     if there are no boxes, return an empty list
 #     Args:
-#         bbox: bounding box, Nx4 or Nx6 torch tensor. The box mode is assumed to be STANDARD_MODE
+#         bbox: bounding box, Nx4 or Nx6 torch tensor or ndarray. The box mode is assumed to be STANDARD_MODE
 #     """
 #     look_up_option(box_overlap_metric, ["iou", "giou"])
 #     look_up_option(bbox.shape[1], [4, 6]) // 2
@@ -844,7 +865,7 @@ def box_convert_standard_mode(bbox: NdarrayOrTensor, mode: Union[str, None] = No
 #     """
 #     Compute center point of bbox
 #     Args:
-#         bbox: bounding box, Nx4 or Nx6 torch tensor. The box mode is assumed to be STANDARD_MODE
+#         bbox: bounding box, Nx4 or Nx6 torch tensor or ndarray. The box mode is assumed to be STANDARD_MODE
 #     Returns:
 #         Tensor: center points [N, dims]
 #     """
@@ -861,8 +882,8 @@ def box_convert_standard_mode(bbox: NdarrayOrTensor, mode: Union[str, None] = No
 #     """
 #     Distance of center points between two sets of bbox
 #     Args:
-#         bbox1: bounding box, Nx4 or Nx6 torch tensor. The box mode is assumed to be STANDARD_MODE
-#         bbox2: bounding box, Nx4 or Nx6 torch tensor. The box mode is assumed to be STANDARD_MODE
+#         bbox1: bounding box, Nx4 or Nx6 torch tensor or ndarray. The box mode is assumed to be STANDARD_MODE
+#         bbox2: bounding box, Nx4 or Nx6 torch tensor or ndarray. The box mode is assumed to be STANDARD_MODE
 #         euclidean: computed the euclidean distance otherwise it uses the l1
 #             distance
 #     Returns:
@@ -902,7 +923,7 @@ def box_convert_standard_mode(bbox: NdarrayOrTensor, mode: Union[str, None] = No
 #     """
 #     Checks which center points are within bbox
 #     Args:
-#         bbox: bounding box, Nx4 or Nx6 torch tensor. The box mode is assumed to be STANDARD_MODE
+#         bbox: bounding box, Nx4 or Nx6 torch tensor or ndarray. The box mode is assumed to be STANDARD_MODE
 #         center: center points [N, dims]
 #         eps: minimum distance to boarder of bbox
 #     Returns:
@@ -927,7 +948,7 @@ def box_convert_standard_mode(bbox: NdarrayOrTensor, mode: Union[str, None] = No
 #     """
 #     modified from torchvision
 #     Args:
-#         bbox: source bounding box, Nx4 or Nx6 torch tensor. The box mode is assumed to be STANDARD_MODE
+#         bbox: source bounding box, Nx4 or Nx6 torch tensor or ndarray. The box mode is assumed to be STANDARD_MODE
 #         original_size: source image size, Length of 2 or 3. Data format is list, or np.ndarray, or tensor of int
 #         original_size: target image size, Length of 2 or 3. Data format is list, or np.ndarray, or tensor of int
 #     """

From 208f8894aedd872183d353e6023215841bd7675f Mon Sep 17 00:00:00 2001
From: Can Zhao <canz@nvidia.com>
Date: Wed, 11 May 2022 12:30:33 -0400
Subject: [PATCH 05/49] clean

Signed-off-by: Can Zhao <canz@nvidia.com>
---
 monai/data/box_utils.py | 655 +---------------------------------------
 tests/test_box_utils.py |  77 -----
 2 files changed, 1 insertion(+), 731 deletions(-)

diff --git a/monai/data/box_utils.py b/monai/data/box_utils.py
index 897adffdf3..d8724d303c 100644
--- a/monai/data/box_utils.py
+++ b/monai/data/box_utils.py
@@ -125,7 +125,7 @@ def split_into_corners(bbox: NdarrayOrTensor, mode: Union[str, None] = None):
         if 2D image, outputs (xmin, xmax, ymin, ymax)
         if 3D images, outputs (xmin, xmax, ymin, ymax, zmin, zmax)
         xmin for example, is a Nx1 tensor
-    
+
     Example:
         bbox = torch.zeros(10,6)
         split_into_corners(bbox, mode="cccwhd")
@@ -308,656 +308,3 @@ def box_convert_standard_mode(bbox: NdarrayOrTensor, mode: Union[str, None] = No
     spatial_dims = get_dimension(bbox=bbox, mode=mode)
     mode_standard = get_standard_mode(spatial_dims)
     return box_convert_mode(bbox1=bbox, mode1=mode, mode2=mode_standard)
-
-
-# def point_interp(
-#     point: NdarrayOrTensor, zoom: Union[Sequence[float], float]
-# ) -> Union[Sequence, torch.Tensor, np.ndarray]:
-#     """
-#     Convert point position from one pixel/voxel size to another pixel/voxel size
-#     Args:
-#         point: point coordinate, Nx2 or Nx3, [x, y] or [x, y, z]
-#         zoom: The zoom factor along the spatial axes.
-#             If a float, zoom is the same for each spatial axis.
-#             If a sequence, zoom should contain one value for each spatial axis.
-#     Returns:
-#         point2: zoomed point coordinate, does not share memory with original point
-#     """
-#     # make sure the spatial dimensions of the inputs match with each other
-#     spatial_dims = point.shape[1]
-#     spatial_dims = look_up_option(spatial_dims, supported=[2, 3])
-
-#     # compute new point
-#     point2 = deepcopy(point)
-#     _zoom = ensure_tuple_rep(zoom, spatial_dims)
-#     for axis in range(0, spatial_dims):
-#         point2[:, axis] = point[:, axis] * _zoom[axis]
-#     return point2
-
-
-# def box_interp(
-#     bbox: NdarrayOrTensor, zoom: Union[Sequence[float], float], mode: Union[str, None] = None
-# ) -> torch.Tensor:
-#     """
-#     Interpolate bbox
-#     Args:
-#         bbox: bounding box, Nx4 or Nx6 torch tensor or ndarray
-#         mode: box mode, choose from SUPPORT_MODE. If mode is not given, this func will assume mode is STANDARD_MODE
-#         zoom: The zoom factor along the spatial axes.
-#             If a float, zoom is the same for each spatial axis.
-#             If a sequence, zoom should contain one value for each spatial axis.
-
-#     Returns:
-#         returned interpolated bbox has the same mode as bbox, does not share memory with original bbox
-#     """
-#     if mode is None:
-#         mode = get_standard_mode(int(bbox.shape[1] / 2))
-#     mode = look_up_option(mode, supported=SUPPORT_MODE)
-#     spatial_dims = get_dimension(bbox=bbox, mode=mode)
-
-#     # convert to standard mode
-#     mode_standard = get_standard_mode(spatial_dims)
-#     bbox_standard = box_convert_mode(bbox1=bbox, mode1=mode, mode2=mode_standard)
-
-#     # interp
-#     corner_lt = point_interp(bbox_standard[:, :spatial_dims], zoom)
-#     corner_rb = point_interp(bbox_standard[:, spatial_dims:], zoom)
-
-#     bbox_standard_interp = deepcopy(bbox_standard)
-#     bbox_standard_interp[:, :spatial_dims] = corner_lt
-#     bbox_standard_interp[:, spatial_dims:] = corner_rb
-
-#     # convert back
-#     bbox2 = box_convert_mode(bbox1=bbox_standard_interp, mode1=mode_standard, mode2=mode)
-#     return bbox2
-
-# def point_affine(
-#     point: NdarrayOrTensor, affine: NdarrayOrTensor, include_shift: bool = True
-# ) -> Union[Sequence, torch.Tensor, np.ndarray]:
-#     """
-#     Convert point position from one pixel/voxel size to another pixel/voxel size
-#     Args:
-#         point: point coordinate, Nx2 or Nx3, [x, y] or [x, y, z]
-#         affine: affine transform
-#         include_shift: does the func apply translation (shift) in the affine transform
-#     Returns:
-#         point2: transformed point coordinate, does not share memory with original point
-#     """
-#     # make sure the spatial dimensions of the inputs match with each other
-#     spatial_dims = point.shape[1]
-#     spatial_dims = look_up_option(spatial_dims, supported=[2, 3])
-
-#     # convert numpy to tensor if needed
-#     if isinstance(point, np.ndarray):
-#         point = convert_to_tensor(point)
-#         numpy_bool = True
-#     else:
-#         numpy_bool = False
-#     affine = convert_to_tensor(affine, device=point.device, dtype=point.dtype)
-
-#     # compute new point
-#     if include_shift:
-#         # append 1 to form Nx(spatial_dims+1) vector, then transpose
-#         point2 = torch.cat(
-#             [point, torch.ones(point.shape[0], 1, device=point.device, dtype=point.dtype)], dim=1
-#         ).transpose(0, 1)
-#         # apply affine
-#         point2 = torch.matmul(affine, point2)
-#         # remove appended 1 and transpose back
-#         point2 = point2[:spatial_dims, :].transpose(0, 1)
-#     else:
-#         point2 = point.transpose(0, 1)
-#         point2 = torch.matmul(affine[:spatial_dims, :spatial_dims], point2)
-#         point2 = point2.transpose(0, 1)
-
-#     # convert tensor back to numpy if needed
-#     if numpy_bool:
-#         point2 = convert_to_numpy(point2)
-#     return point2
-
-
-# def box_affine(bbox: NdarrayOrTensor, affine: NdarrayOrTensor, mode: Union[str, None] = None) -> torch.Tensor:
-#     """
-#     This function applys affine matrixs to the bbox
-#     Args:
-#         bbox: bounding box, Nx4 or Nx6 torch tensor or ndarray
-#         affine: affine matric to be applied to the box coordinate, (spatial_dims+1)x(spatial_dims+1)
-#         mode: box mode, choose from SUPPORT_MODE. If mode is not given, this func will assume mode is STANDARD_MODE
-#     Returns:
-#         returned affine transformed bbox has the same mode as bbox, does not share memory with original bbox
-#     """
-#     # convert numpy to tensor if needed
-#     if isinstance(bbox, np.ndarray):
-#         bbox = convert_to_tensor(bbox)
-#         numpy_bool = True
-#     else:
-#         numpy_bool = False
-
-#     box_dtype = bbox.dtype
-#     compute_dtype = torch.float32
-#     if box_dtype is torch.float16:
-#         bbox = bbox.to(dtype=compute_dtype)
-#     affine = convert_to_tensor(affine, device=bbox.device, dtype=bbox.dtype)
-
-#     if mode is None:
-#         mode = get_standard_mode(int(bbox.shape[1] / 2))
-#     mode = look_up_option(mode, supported=SUPPORT_MODE)
-#     spatial_dims = get_dimension(bbox=bbox, mode=mode)
-
-#     if mode in ["xxyy", "xxyyzz", "xyxy", "xyzxyz"]:
-#         # extract left top and right bottom, and apply affine
-#         if mode in ["xxyy", "xxyyzz"]:
-#             lt = point_affine(bbox[:, ::2], affine, include_shift=True)
-#             rb = point_affine(bbox[:, 1::2], affine, include_shift=True)
-#         if mode in ["xyxy", "xyzxyz"]:
-#             lt = point_affine(bbox[:, :spatial_dims], affine, include_shift=True)
-#             rb = point_affine(bbox[:, spatial_dims:], affine, include_shift=True)
-
-#         lt_new, _ = torch.min(torch.stack([lt, rb], dim=2), dim=2)
-#         rb_new, _ = torch.max(torch.stack([lt, rb], dim=2), dim=2)
-
-#         bbox2 = box_convert_mode(torch.cat([lt_new, rb_new], dim=1), mode1=STANDARD_MODE[spatial_dims - 2], mode2=mode)
-
-#     elif mode in ["ccwh", "cccwhd", "xywh", "xyzwhd"]:
-#         cc = point_affine(bbox[:, :spatial_dims], affine, include_shift=True)
-#         wh = point_affine(bbox[:, spatial_dims:], affine, include_shift=False).absolute()
-#         bbox2 = torch.cat([cc, wh], dim=1)
-
-#     else:
-#         raise RuntimeError("Should not be here")
-
-#     # convert tensor back to numpy if needed
-#     if numpy_bool:
-#         bbox2 = convert_to_numpy(bbox2.to(dtype=box_dtype))
-#     return bbox2
-
-
-# def box_clip_to_patch(
-#     bbox: NdarrayOrTensor, patch_box: Union[Sequence[int], torch.Tensor, np.ndarray], remove_empty: bool = True
-# ):
-#     """
-#     This function makes sure the bounding boxes are within the patch.
-#     Args:
-#         bbox: bounding box, Nx4 or Nx6 torch tensor or ndarray. The box mode is assumed to be STANDARD_MODE
-#         patch_box: The coordinate of the target patch to clip, it follows standard mode
-#         remove_empty: whether to remove the boxes that are actually empty
-#     Returns:
-#         new_bbox: updated box, does not share memory with original bbox
-#         keep: the indice of the new_bbox regarding to input bbox. When remove_empty=True, only some of the boxes are kept
-#     """
-#     if bbox.shape[0] == 0:
-#         return deepcopy(bbox), []
-
-#     spatial_dims = get_dimension(bbox=bbox)
-#     new_bbox = deepcopy(bbox)
-
-#     # convert numpy to tensor if needed
-#     if isinstance(new_bbox, np.ndarray):
-#         new_bbox = convert_to_tensor(new_bbox)
-#         numpy_bool = True
-#     else:
-#         numpy_bool = False
-
-#     # convert to float32 since torch.clamp_ does not support float16
-#     box_dtype = new_bbox.dtype
-#     compute_dtype = torch.float32
-#     if box_dtype is torch.float16:
-#         new_bbox = new_bbox.to(dtype=compute_dtype)
-
-#     # makes sure the bounding boxes are within the image
-#     for axis in range(0, spatial_dims):
-#         new_bbox[:, axis].clamp_(min=patch_box[axis], max=patch_box[axis + spatial_dims] - TO_REMOVE)
-#         new_bbox[:, axis + spatial_dims].clamp_(min=patch_box[axis], max=patch_box[axis + spatial_dims] - TO_REMOVE)
-#         new_bbox[:, axis] -= patch_box[axis]
-#         new_bbox[:, axis + spatial_dims] -= patch_box[axis]
-
-#     # remove the boxes that are actually empty
-#     if remove_empty:
-#         keep = (new_bbox[:, spatial_dims] >= new_bbox[:, 0] + 1 - TO_REMOVE) & (
-#             new_bbox[:, 1 + spatial_dims] >= new_bbox[:, 1] + 1 - TO_REMOVE
-#         )
-#         if spatial_dims == 3:
-#             keep = keep & (new_bbox[:, 2 + spatial_dims] >= new_bbox[:, 2] + 1 - TO_REMOVE)
-#         new_bbox = new_bbox[keep]
-
-#     # convert tensor back to numpy if needed
-#     new_bbox = new_bbox.to(dtype=box_dtype)
-#     if numpy_bool:
-#         new_bbox = convert_to_numpy(new_bbox)
-
-#     return new_bbox, keep
-
-
-# def box_clip_to_image(
-#     bbox: NdarrayOrTensor, image_size: Union[Sequence[int], torch.Tensor, np.ndarray], remove_empty: bool = True
-# ):
-#     """
-#     This function makes sure the bounding boxes are within the image.
-#     Args:
-#         bbox: bounding box, Nx4 or Nx6 torch tensor or ndarray. The box mode is assumed to be STANDARD_MODE
-#         remove_empty: whether to remove the boxes that are actually empty
-#     Returns:
-#         updated box
-#     """
-#     spatial_dims = get_dimension(bbox=bbox, image_size=image_size)
-#     image_box = [0] * spatial_dims + convert_to_list(image_size)
-#     return box_clip_to_patch(bbox, image_box, remove_empty)
-
-
-# def box_area(bbox: NdarrayOrTensor) -> NdarrayOrTensor:
-#     """
-#     This function computes the area of each box
-#     Args:
-#         bbox: bounding box, Nx4 or Nx6 torch tensor or ndarray. The box mode is assumed to be STANDARD_MODE
-#     Returns:
-#         area: 1-D tensor
-#     """
-
-#     spatial_dims = get_dimension(bbox=bbox)
-
-#     area = bbox[:, spatial_dims] - bbox[:, 0] + TO_REMOVE
-#     for axis in range(1, spatial_dims):
-#         area = area * (bbox[:, axis + spatial_dims] - bbox[:, axis] + TO_REMOVE)
-
-#     if isinstance(area, np.ndarray):
-#         area = convert_to_tensor(area)
-#         numpy_bool = True
-#     else:
-#         numpy_bool = False
-
-#     if area.isnan().any() or area.isinf().any():
-#         if area.dtype is torch.float16:
-#             raise ValueError("Box area is NaN or Inf. bbox is float16. Please change to float32 and test it again.")
-#         else:
-#             raise ValueError("Box area is NaN or Inf.")
-
-#     if numpy_bool:
-#         area = convert_to_numpy(area)
-#     return area
-
-
-# def box_iou(bbox1: NdarrayOrTensor, bbox2: NdarrayOrTensor) -> NdarrayOrTensor:
-#     """
-#     Compute the intersection over union of two set of boxes. This function is not differentialable.
-
-#     IMPORTANT: Please run box_clip_to_image(bbox, image_size, mode, remove_empty=True) before computing IoU
-
-#     Implementation from https://github.com/kuangliu/torchcv/blob/master/torchcv/utils/box.py
-#     with slight modifications.
-
-#     Args:
-#         bbox1: bounding box, Nx4 or Nx6 torch tensor or ndarray. The box mode is assumed to be STANDARD_MODE
-#         bbox2: bounding box, Mx4 or Mx6 torch tensor. The box mode is assumed to be STANDARD_MODE
-
-#     Returns:
-#       (tensor) iou, sized [N,M].
-
-#     Reference:
-#       https://github.com/chainer/chainercv/blob/master/chainercv/utils/bbox/bbox_iou.py
-#     """
-
-#     # convert numpy to tensor if needed
-#     if isinstance(bbox1, np.ndarray):
-#         bbox1 = convert_to_tensor(bbox1)
-#         bbox2 = convert_to_tensor(bbox2)
-#         numpy_bool = True
-#     else:
-#         numpy_bool = False
-
-#     spatial_dims = get_dimension(bbox=bbox1)
-
-#     # we do computation with compute_dtype to avoid overflow
-#     box_dtype = bbox1.dtype
-#     compute_dtype = torch.float32
-
-#     # compute area with float32
-#     area1 = box_area(bbox=bbox1.to(dtype=compute_dtype))  # Nx1
-#     area2 = box_area(bbox=bbox2.to(dtype=compute_dtype))  # Mx1
-
-#     # get the left top and right bottom points for the NxM combinations
-#     lt = torch.max(bbox1[:, None, :spatial_dims], bbox2[:, :spatial_dims]).to(
-#         dtype=compute_dtype
-#     )  # [N,M,spatial_dims] left top
-#     rb = torch.min(bbox1[:, None, spatial_dims:], bbox2[:, spatial_dims:]).to(
-#         dtype=compute_dtype
-#     )  # [N,M,spatial_dims] right bottom
-#     # compute size for the intersection region for the NxM combinations
-#     wh = (rb - lt + TO_REMOVE).clamp(min=0)  # [N,M,spatial_dims]
-#     inter = wh[:, :, 0]  # [N,M]
-#     for axis in range(1, spatial_dims):
-#         inter = inter * wh[:, :, axis]
-
-#     # compute IoU and convert back to original box_dtype
-#     iou = inter / (area1[:, None] + area2 - inter + torch.finfo(compute_dtype).eps)  # [N,M,spatial_dims]
-#     iou = iou.to(dtype=box_dtype)
-
-#     if torch.isnan(iou).any() or torch.isinf(iou).any():
-#         raise ValueError("Box IoU is NaN or Inf.")
-
-#     # convert tensor back to numpy if needed
-#     if numpy_bool:
-#         iou = convert_to_numpy(iou)
-#     return iou
-
-
-# def box_giou(bbox1: NdarrayOrTensor, bbox2: NdarrayOrTensor) -> NdarrayOrTensor:
-#     """
-#     Compute the generalized intersection over union of two set of boxes. This function is not differentialable.
-
-#     IMPORTANT: Please run box_clip_to_image(bbox, image_size, mode, remove_empty=True) before computing IoU
-
-#     Implementation from https://github.com/kuangliu/torchcv/blob/master/torchcv/utils/box.py
-#     with slight modifications.
-
-#     Args:
-#         bbox1: bounding box, Nx4 or Nx6 torch tensor or ndarray. The box mode is assumed to be STANDARD_MODE
-#         bbox2: bounding box, Mx4 or Mx6 torch tensor. The box mode is assumed to be STANDARD_MODE
-
-#     Returns:
-#       (tensor) iou, sized [N,M].
-
-#     Reference:
-#       https://github.com/chainer/chainercv/blob/master/chainercv/utils/bbox/bbox_iou.py
-#     """
-#     # convert numpy to tensor if needed
-#     if isinstance(bbox1, np.ndarray):
-#         bbox1 = convert_to_tensor(bbox1)
-#         bbox2 = convert_to_tensor(bbox2)
-#         numpy_bool = True
-#     else:
-#         numpy_bool = False
-
-#     spatial_dims = get_dimension(bbox=bbox1)
-
-#     # we do computation with compute_dtype to avoid overflow
-#     box_dtype = bbox1.dtype
-#     compute_dtype = torch.float32
-
-#     # compute area with float32
-#     area1 = box_area(bbox=bbox1.to(dtype=compute_dtype))  # Nx1
-#     area2 = box_area(bbox=bbox2.to(dtype=compute_dtype))  # Mx1
-
-#     # get the left top and right bottom points for the NxM combinations
-#     lt = torch.max(bbox1[:, None, :spatial_dims], bbox2[:, :spatial_dims]).to(
-#         dtype=compute_dtype
-#     )  # [N,M,spatial_dims] left top
-#     rb = torch.min(bbox1[:, None, spatial_dims:], bbox2[:, spatial_dims:]).to(
-#         dtype=compute_dtype
-#     )  # [N,M,spatial_dims] right bottom
-#     # compute size for the intersection region for the NxM combinations
-#     wh = (rb - lt + TO_REMOVE).clamp(min=0)  # [N,M,spatial_dims]
-#     inter = wh[:, :, 0]  # [N,M]
-#     for axis in range(1, spatial_dims):
-#         inter = inter * wh[:, :, axis]
-
-#     # compute IoU and convert back to original box_dtype
-#     union = area1[:, None] + area2 - inter
-#     iou = inter / (union + torch.finfo(compute_dtype).eps)  # [N,M,spatial_dims]
-
-#     # enclosure
-#     lt = torch.min(bbox1[:, None, :spatial_dims], bbox2[:, :spatial_dims]).to(
-#         dtype=compute_dtype
-#     )  # [N,M,spatial_dims] left top
-#     rb = torch.max(bbox1[:, None, spatial_dims:], bbox2[:, spatial_dims:]).to(
-#         dtype=compute_dtype
-#     )  # [N,M,spatial_dims] right bottom
-#     # compute size for the intersection region for the NxM combinations
-#     wh = (rb - lt + TO_REMOVE).clamp(min=0)  # [N,M,spatial_dims]
-#     enclosure = wh[:, :, 0]  # [N,M]
-#     for axis in range(1, spatial_dims):
-#         enclosure = enclosure * wh[:, :, axis]
-
-#     giou = iou - (enclosure - union) / (enclosure + torch.finfo(compute_dtype).eps)
-#     giou = giou.to(dtype=box_dtype)
-#     if torch.isnan(giou).any() or torch.isinf(giou).any():
-#         raise ValueError("Box GIoU is NaN or Inf.")
-
-#     # convert tensor back to numpy if needed
-#     if numpy_bool:
-#         giou = convert_to_numpy(giou)
-#     return giou
-
-
-# def box_pair_giou(bbox1: NdarrayOrTensor, bbox2: NdarrayOrTensor) -> NdarrayOrTensor:
-#     """
-#     Compute the generalized intersection over union of two set of boxes. This function is not differentialable.
-
-#     IMPORTANT: Please run box_clip_to_image(bbox, image_size, mode, remove_empty=True) before computing IoU
-
-#     Implementation from https://github.com/kuangliu/torchcv/blob/master/torchcv/utils/box.py
-#     with slight modifications.
-
-#     Args:
-#         bbox1: bounding box, Nx4 or Nx6 torch tensor or ndarray. The box mode is assumed to be STANDARD_MODE
-#         bbox2: bounding box, Mx4 or Mx6 torch tensor. The box mode is assumed to be STANDARD_MODE
-
-#     Returns:
-#       (tensor) iou, sized [N].
-
-#     Reference:
-#       https://github.com/chainer/chainercv/blob/master/chainercv/utils/bbox/bbox_iou.py
-#     """
-
-#     if bbox1.shape[0] != bbox2.shape[0]:
-#         raise ValueError("bbox1 and bbox2 should be paired.")
-
-#     # convert numpy to tensor if needed
-#     if isinstance(bbox1, np.ndarray):
-#         bbox1 = convert_to_tensor(bbox1)
-#         bbox2 = convert_to_tensor(bbox2)
-#         numpy_bool = True
-#     else:
-#         numpy_bool = False
-
-#     spatial_dims = get_dimension(bbox=bbox1)
-
-#     # we do computation with compute_dtype to avoid overflow
-#     box_dtype = bbox1.dtype
-#     compute_dtype = torch.float32
-
-#     # compute area
-#     area1 = box_area(bbox=bbox1.to(dtype=compute_dtype))  # Nx1
-#     area2 = box_area(bbox=bbox2.to(dtype=compute_dtype))  # Nx1
-
-#     # get the left top and right bottom points for the NxM combinations
-#     lt = torch.max(bbox1[:, :spatial_dims], bbox2[:, :spatial_dims]).to(
-#         dtype=compute_dtype
-#     )  # [N,spatial_dims] left top
-#     rb = torch.min(bbox1[:, spatial_dims:], bbox2[:, spatial_dims:]).to(
-#         dtype=compute_dtype
-#     )  # [N,spatial_dims] right bottom
-#     # compute size for the intersection region for the NxM combinations
-#     wh = (rb - lt + TO_REMOVE).clamp(min=0)  # [N,spatial_dims]
-#     inter = wh[:, 0]  # [N,M]
-#     for axis in range(1, spatial_dims):
-#         inter = inter * wh[:, axis]
-
-#     # compute IoU and convert back to original box_dtype
-#     union = area1 + area2 - inter
-#     iou = inter / (union + torch.finfo(compute_dtype).eps)  # [N,spatial_dims]
-
-#     # enclosure
-#     lt = torch.min(bbox1[:, :spatial_dims], bbox2[:, :spatial_dims]).to(
-#         dtype=compute_dtype
-#     )  # [N,spatial_dims] left top
-#     rb = torch.max(bbox1[:, spatial_dims:], bbox2[:, spatial_dims:]).to(
-#         dtype=compute_dtype
-#     )  # [N,spatial_dims] right bottom
-#     # compute size for the intersection region for the NxM combinations
-#     wh = (rb - lt + TO_REMOVE).clamp(min=0)  # [N,spatial_dims]
-#     enclosure = wh[:, 0]  # [N,M]
-#     for axis in range(1, spatial_dims):
-#         enclosure = enclosure * wh[:, axis]
-
-#     giou = iou - (enclosure - union) / (enclosure + torch.finfo(compute_dtype).eps)
-#     giou = giou.to(dtype=box_dtype)  # [N,spatial_dims]
-#     if torch.isnan(giou).any() or torch.isinf(giou).any():
-#         raise ValueError("Box GIoU is NaN or Inf.")
-
-#     # convert tensor back to numpy if needed
-#     if numpy_bool:
-#         giou = convert_to_numpy(giou)
-#     return giou
-
-
-# def non_max_suppression(
-#     bbox: NdarrayOrTensor, scores: NdarrayOrTensor, nms_thresh: float, max_proposals=-1, box_overlap_metric="iou"
-# ):
-#     """
-#     written by Can Zhao, 2019
-#     if there are no boxes, return an empty list
-#     Args:
-#         bbox: bounding box, Nx4 or Nx6 torch tensor or ndarray. The box mode is assumed to be STANDARD_MODE
-#     """
-#     look_up_option(box_overlap_metric, ["iou", "giou"])
-#     look_up_option(bbox.shape[1], [4, 6]) // 2
-#     if bbox.shape[0] == 0:
-#         return []
-
-#     if bbox.shape[0] != scores.shape[0]:
-#         raise ValueError(
-#             f"bbox and scores should have same length, got bbox shape {bbox.shape}, scores shape {scores.shape}"
-#         )
-
-#     # convert numpy to tensor if needed
-#     if isinstance(bbox, np.ndarray):
-#         bbox = convert_to_tensor(bbox)
-#         scores = convert_to_tensor(scores)
-#         numpy_bool = True
-#     else:
-#         numpy_bool = False
-
-#     scores_sort, indices = torch.sort(scores, descending=True)
-#     bbox_sort = deepcopy(bbox)[indices, :]
-
-#     # initialize the list of picked indexes
-#     pick = []
-#     idxs = np.arange(0, bbox_sort.shape[0])
-#     # keep looping while some indexes still remain in the indexes
-#     # list
-#     while len(idxs) > 0:
-#         # grab the first index in the indexes list and add the
-#         # index value to the list of picked indexes
-#         i = idxs[0]
-#         pick.append(i)
-#         if len(pick) >= max_proposals >= 1:
-#             break
-
-#         # compute the IoU
-#         if box_overlap_metric == "giou":
-#             iou = box_giou(bbox_sort[idxs[1:], :], bbox_sort[i : i + 1, :])
-#         else:
-#             iou = box_iou(bbox_sort[idxs[1:], :], bbox_sort[i : i + 1, :])
-
-#         # delete all indexes from the index list that have overlap > nms_thresh
-#         idxs = np.delete(idxs, np.concatenate(([0], 1 + np.where(iou.cpu().numpy() > nms_thresh)[0])))
-
-#     # return only the bounding boxes that were picked using the
-#     # integer data type
-#     pick_idx = indices[pick]
-#     # convert tensor back to numpy if needed
-#     if numpy_bool:
-#         pick_idx = convert_to_numpy(pick_idx)
-#     return pick_idx
-
-
-# def box_center(bbox: NdarrayOrTensor) -> torch.Tensor:
-#     """
-#     Compute center point of bbox
-#     Args:
-#         bbox: bounding box, Nx4 or Nx6 torch tensor or ndarray. The box mode is assumed to be STANDARD_MODE
-#     Returns:
-#         Tensor: center points [N, dims]
-#     """
-#     spatial_dims = bbox.shape[1] // 2
-#     centers = [(bbox[:, axis + spatial_dims] + bbox[:, axis]) / 2.0 for axis in range(spatial_dims)]
-
-#     if isinstance(bbox, np.ndarray):
-#         return np.stack(centers, axis=1)
-#     else:
-#         return torch.stack(centers, dim=1)
-
-
-# def box_center_dist(bbox1: torch.Tensor, bbox2: torch.Tensor, euclidean: bool = True) -> Sequence[torch.Tensor]:
-#     """
-#     Distance of center points between two sets of bbox
-#     Args:
-#         bbox1: bounding box, Nx4 or Nx6 torch tensor or ndarray. The box mode is assumed to be STANDARD_MODE
-#         bbox2: bounding box, Nx4 or Nx6 torch tensor or ndarray. The box mode is assumed to be STANDARD_MODE
-#         euclidean: computed the euclidean distance otherwise it uses the l1
-#             distance
-#     Returns:
-#         Tensor: the NxM matrix containing the pairwise
-#             distances for every element in bbox1 and bbox2; [N, M]
-#         Tensor: center points of bbox1
-#         Tensor: center points of bbox2
-#     """
-#     # convert numpy to tensor if needed
-#     if isinstance(bbox1, np.ndarray):
-#         bbox1 = convert_to_tensor(bbox1)
-#         bbox2 = convert_to_tensor(bbox2)
-#         numpy_bool = True
-#     else:
-#         numpy_bool = False
-
-#     box_dtype = bbox1.dtype
-#     compute_dtype = torch.float32
-
-#     center1 = box_center(bbox1.to(compute_dtype))  # [N, dims]
-#     center2 = box_center(bbox2.to(compute_dtype))  # [M, dims]
-
-#     if euclidean:
-#         dists = (center1[:, None] - center2[None]).pow(2).sum(-1).sqrt()
-#     else:
-#         # before sum: [N, M, dims]
-#         dists = (center1[:, None] - center2[None]).sum(-1)
-
-#     # convert tensor back to numpy if needed
-#     dists, center1, center2 = dists.to(box_dtype), center1.to(box_dtype), center2.to(box_dtype)
-#     if numpy_bool:
-#         dists, center1, center2 = convert_to_numpy(dists), convert_to_numpy(center1), convert_to_numpy(center2)
-#     return dists, center1, center2
-
-
-# def center_in_boxes(center: NdarrayOrTensor, bbox: NdarrayOrTensor, eps: float = 0.01) -> NdarrayOrTensor:
-#     """
-#     Checks which center points are within bbox
-#     Args:
-#         bbox: bounding box, Nx4 or Nx6 torch tensor or ndarray. The box mode is assumed to be STANDARD_MODE
-#         center: center points [N, dims]
-#         eps: minimum distance to boarder of bbox
-#     Returns:
-#         Tensor: boolean array indicating which center points are within
-#             the bbox [N]
-#     """
-#     spatial_dims = bbox.shape[1] // 2
-#     axes = [center[:, axis] - bbox[:, axis] for axis in range(spatial_dims)] + [
-#         bbox[:, axis + spatial_dims] - center[:, axis] for axis in range(spatial_dims)
-#     ]
-#     if isinstance(bbox, np.ndarray):
-#         return np.stack(axes, axis=1).min(axis=1) > eps  # array[bool]
-#     else:
-#         return torch.stack(axes, dim=1).min(dim=1)[0] > eps  # Tensor[bool]
-
-
-# def resize_boxes(
-#     bbox: NdarrayOrTensor,
-#     original_size: Union[Sequence, torch.Tensor, np.ndarray],
-#     new_size: Union[Sequence, torch.Tensor, np.ndarray],
-# ) -> NdarrayOrTensor:
-#     """
-#     modified from torchvision
-#     Args:
-#         bbox: source bounding box, Nx4 or Nx6 torch tensor or ndarray. The box mode is assumed to be STANDARD_MODE
-#         original_size: source image size, Length of 2 or 3. Data format is list, or np.ndarray, or tensor of int
-#         original_size: target image size, Length of 2 or 3. Data format is list, or np.ndarray, or tensor of int
-#     """
-#     if len(original_size) != len(new_size):
-#         raise ValueError("The dimension of original image size should equal to the new image size")
-#     spatial_dims = get_dimension(bbox, original_size)
-
-#     original_size = convert_to_list(original_size)
-#     new_size = convert_to_list(new_size)
-#     zoom = [new_size[axis] / float(original_size[axis]) for axis in range(spatial_dims)]
-
-#     return box_interp(bbox=bbox, zoom=zoom)
diff --git a/tests/test_box_utils.py b/tests/test_box_utils.py
index 307988d3d0..b4bb6646cd 100644
--- a/tests/test_box_utils.py
+++ b/tests/test_box_utils.py
@@ -128,83 +128,6 @@ def test_value(self, input_data, mode2, expected_box, expected_area):
         expected_box_standard = box_convert_standard_mode(bbox=expected_box, mode=mode2)
         assert_allclose(result_standard, expected_box_standard, type_test=True, device_test=True, atol=0.0)
 
-        # # test box_area, box_clip_to_image, convert_to_list
-        # assert_allclose(box_area(result_standard), expected_area, type_test=True, device_test=True, atol=0.0)
-
-        # result_standard_clip, _ = box_clip_to_image(result_standard, image_size, remove_empty=True)
-        # np.testing.assert_equal(
-        #     isinstance(result_standard_clip, np.ndarray), isinstance(bbox1, np.ndarray), "numpy type"
-        # )
-        # np.testing.assert_equal(
-        #     isinstance(result_standard_clip, torch.Tensor), isinstance(bbox1, torch.Tensor), "torch type"
-        # )
-        # result_area_clip = convert_to_list(box_area(result_standard_clip))
-        # expected_area_clip = list(filter(lambda num: num > 0, convert_to_list(expected_area)))
-        # assert_allclose(result_area_clip, expected_area_clip, type_test=True, device_test=True, atol=0.0)
-
-        # # test box_interp and box_affine, resize_boxes
-        # zoom = [random.uniform(0.5, 5), random.uniform(0.5, 2), random.uniform(0.5, 5)]
-        # new_size = [int(image_size[axis] * zoom[axis] + 0.5) for axis in range(3)]
-        # zoom = [new_size[axis] / float(image_size[axis]) for axis in range(3)]
-
-        # result_standard_interp = box_interp(bbox=result_standard, zoom=zoom)
-        # result_standard_resize = resize_boxes(bbox=result_standard, original_size=image_size, new_size=new_size)
-        # assert_allclose(result_standard_interp, result_standard_resize, type_test=True, device_test=True, atol=0.0)
-
-        # result_area_interp = box_area(result_standard_interp)
-        # expected_area_interp = expected_area * zoom[0] * zoom[1] * zoom[2]
-        # assert_allclose(result_area_interp, expected_area_interp, type_test=True, device_test=True, atol=0.5)
-
-        # affine = torch.diag(torch.Tensor(zoom + [1.0]))
-        # result_affine = box_affine(bbox=bbox1, affine=affine, mode=mode1)
-        # result_affine_standard = box_convert_standard_mode(bbox=result_affine, mode=mode1)
-        # assert_allclose(
-        #     box_area(result_affine_standard), expected_area_interp, type_test=True, device_test=True, atol=0.5
-        # )
-
-        # # test box_center, center_in_boxes, box_center_dist
-        # result_standard_center = box_center(result_standard)
-        # expected_center = box_convert_mode(bbox1=bbox1, mode1=mode1, mode2="cccwhd")[:, :3]
-        # assert_allclose(result_standard_center, expected_center, type_test=True, device_test=True, atol=0.0)
-
-        # center = expected_center
-        # center[2, :] += 10
-        # result_center_in_boxes = center_in_boxes(center=center, bbox=result_standard)
-        # assert_allclose(result_center_in_boxes, np.array([False, True, False]), type_test=False)
-
-        # center_dist, _, _ = box_center_dist(bbox1=result_standard[1:2, :], bbox2=result_standard[1:1, :])
-        # assert_allclose(center_dist, np.array([[]]), type_test=False)
-        # center_dist, _, _ = box_center_dist(bbox1=result_standard[1:2, :], bbox2=result_standard[1:2, :])
-        # assert_allclose(center_dist, np.array([[0.0]]), type_test=False)
-        # center_dist, _, _ = box_center_dist(bbox1=result_standard[0:1, :], bbox2=result_standard[0:1, :])
-        # assert_allclose(center_dist, np.array([[0.0]]), type_test=False)
-
-        # # test box_iou
-        # iou_metrics = (box_iou, box_giou)  # type: ignore
-        # for p in iou_metrics:
-        #     self_iou = p(bbox1=result_standard[1:2, :], bbox2=result_standard[1:1, :])
-        #     assert_allclose(self_iou, np.array([[]]), type_test=False)
-
-        #     self_iou = p(bbox1=result_standard[1:2, :], bbox2=result_standard[1:2, :])
-        #     assert_allclose(self_iou, np.array([[1.0]]), type_test=False)
-
-        # self_iou = box_pair_giou(bbox1=result_standard[1:1, :], bbox2=result_standard[1:1, :])
-        # assert_allclose(self_iou, np.array([]), type_test=False)
-
-        # self_iou = box_pair_giou(bbox1=result_standard[1:2, :], bbox2=result_standard[1:2, :])
-        # assert_allclose(self_iou, np.array([1.0]), type_test=False)
-
-        # # test non_max_suppression
-        # nms_box = non_max_suppression(
-        #     bbox=result_standard, scores=bbox1[:, 1] / 2.0, nms_thresh=1.0, box_overlap_metric="iou"
-        # )
-        # assert_allclose(nms_box, [1, 2, 0], type_test=False)
-
-        # nms_box = non_max_suppression(
-        #     bbox=result_standard, scores=bbox1[:, 1] / 2.0, nms_thresh=-0.1, box_overlap_metric="iou"
-        # )
-        # assert_allclose(nms_box, [1], type_test=False)
-
 
 if __name__ == "__main__":
     unittest.main()

From 406a73cd2690f5ac997b8bad2967e3969131f4a7 Mon Sep 17 00:00:00 2001
From: Can Zhao <canz@nvidia.com>
Date: Wed, 11 May 2022 12:35:21 -0400
Subject: [PATCH 06/49] correct typo

Signed-off-by: Can Zhao <canz@nvidia.com>
---
 monai/data/box_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/monai/data/box_utils.py b/monai/data/box_utils.py
index d8724d303c..611afae50f 100644
--- a/monai/data/box_utils.py
+++ b/monai/data/box_utils.py
@@ -296,7 +296,7 @@ def box_convert_standard_mode(bbox: NdarrayOrTensor, mode: Union[str, None] = No
         bbox: source bounding box, Nx4 or Nx6 torch tensor or ndarray
         mode: source box mode, choose from SUPPORT_MODE. If mode is not given, this func will assume mode is STANDARD_MODE
     Returns:
-        bbox2: bounding box with standard mode, does not share memory with original bbox1
+        bbox2: bounding box with standard mode, does not share memory with original bbox
 
     Example:
         bbox = torch.zeros(10,6)

From 6cd14ae8fbd23f2e9983b9c0bcba081c0a6eaf9b Mon Sep 17 00:00:00 2001
From: Can Zhao <canz@nvidia.com>
Date: Wed, 11 May 2022 12:44:30 -0400
Subject: [PATCH 07/49] add comments to explain box mode

Signed-off-by: Can Zhao <canz@nvidia.com>
---
 monai/data/box_utils.py | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/monai/data/box_utils.py b/monai/data/box_utils.py
index 611afae50f..bf67fb57a9 100644
--- a/monai/data/box_utils.py
+++ b/monai/data/box_utils.py
@@ -21,13 +21,19 @@
 from monai.utils.module import look_up_option
 from monai.utils.type_conversion import convert_to_numpy, convert_to_tensor
 
-CORNER_CORNER_MODE = ["xyxy", "xyzxyz"]  # [2d_mode, 3d_mode]
-XXYYZZ_MODE = ["xxyy", "xxyyzz"]  # [2d_mode, 3d_mode]
-CORNER_SIZE_MODE = ["xywh", "xyzwhd"]  # [2d_mode, 3d_mode]
-CENTER_SIZE_MODE = ["ccwh", "cccwhd"]  # [2d_mode, 3d_mode]
-
-STANDARD_MODE = CORNER_CORNER_MODE  # [2d_mode, 3d_mode]
-SUPPORT_MODE = CORNER_CORNER_MODE + XXYYZZ_MODE + CORNER_SIZE_MODE + CENTER_SIZE_MODE
+# We support several box modes, i.e., representation of a bounding box
+CORNER_CORNER_MODE = ["xyxy", "xyzxyz"]  # [xmin, ymin, xmax, ymax] and [xmin, ymin, zmin, xmax, ymax, zmax]
+XXYYZZ_MODE = ["xxyy", "xxyyzz"]  # [xmin, xmax, ymin, ymax] and [xmin, xmax, ymin, ymax, zmin, zmax]
+CORNER_SIZE_MODE = ["xywh", "xyzwhd"]  # [xmin, ymin, xsize, ysize] and [xmin, ymin, zmin, xsize, ysize, zsize]
+CENTER_SIZE_MODE = [
+    "ccwh",
+    "cccwhd",
+]  # [xcenter, ycenter, x_size, y_size] and [xcenter, ycenter, zcenter, xsize, ysize, zsize]
+
+STANDARD_MODE = CORNER_CORNER_MODE  # standard box modes supported by all the box util functions
+SUPPORT_MODE = (
+    CORNER_CORNER_MODE + XXYYZZ_MODE + CORNER_SIZE_MODE + CENTER_SIZE_MODE
+)  # supported box modes for some box util functions
 
 # TO_REMOVE = 0 if in 'xxyy','xxyyzz' mode, the bottom-right corner is not included in the box,
 #      i.e., when x_min=1, x_max=2, we have w = 1

From 47b99e5e2c11890826952e48cdd20521151dcc67 Mon Sep 17 00:00:00 2001
From: Can Zhao <canz@nvidia.com>
Date: Wed, 11 May 2022 12:47:05 -0400
Subject: [PATCH 08/49] correct typo

Signed-off-by: Can Zhao <canz@nvidia.com>
---
 monai/data/box_utils.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/monai/data/box_utils.py b/monai/data/box_utils.py
index bf67fb57a9..fa1ababfbd 100644
--- a/monai/data/box_utils.py
+++ b/monai/data/box_utils.py
@@ -28,7 +28,7 @@
 CENTER_SIZE_MODE = [
     "ccwh",
     "cccwhd",
-]  # [xcenter, ycenter, x_size, y_size] and [xcenter, ycenter, zcenter, xsize, ysize, zsize]
+]  # [xcenter, ycenter, xsize, ysize] and [xcenter, ycenter, zcenter, xsize, ysize, zsize]
 
 STANDARD_MODE = CORNER_CORNER_MODE  # standard box modes supported by all the box util functions
 SUPPORT_MODE = (
@@ -36,11 +36,11 @@
 )  # supported box modes for some box util functions
 
 # TO_REMOVE = 0 if in 'xxyy','xxyyzz' mode, the bottom-right corner is not included in the box,
-#      i.e., when x_min=1, x_max=2, we have w = 1
+#      i.e., when xmin=1, xmax=2, we have w = 1
 # TO_REMOVE = 1  if in 'xxyy','xxyyzz' mode, the bottom-right corner is included in the box,
-#       i.e., when x_min=1, x_max=2, we have w = 2
+#       i.e., when xmin=1, xmax=2, we have w = 2
 # Currently only TO_REMOVE = 0 has been tested. Please use TO_REMOVE = 0
-TO_REMOVE = 0  # x_max-x_min = w -TO_REMOVE.
+TO_REMOVE = 0  # xmax-xmin = w -TO_REMOVE.
 
 
 def convert_to_list(in_sequence: Union[Sequence, torch.Tensor, np.ndarray]) -> list:

From e86a6795bc15a7f395035f3cb2291e5f533aba95 Mon Sep 17 00:00:00 2001
From: Can Zhao <canz@nvidia.com>
Date: Wed, 11 May 2022 13:08:16 -0400
Subject: [PATCH 09/49] correct typo

Signed-off-by: Can Zhao <canz@nvidia.com>
---
 monai/data/box_utils.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/monai/data/box_utils.py b/monai/data/box_utils.py
index fa1ababfbd..b9fb4698d6 100644
--- a/monai/data/box_utils.py
+++ b/monai/data/box_utils.py
@@ -78,7 +78,7 @@ def get_dimension(
         spatial_dimension: 2 or 3
 
     Example:
-        bbox = torch.zeros(10,6)
+        bbox = torch.ones(10,6)
         get_dimension(bbox, mode="xyzxyz") will return 3
         get_dimension(bbox, mode="xyzxyz", image_size=[100,200,200]) will return 3
         get_dimension(mode="xyzxyz") will return 3
@@ -133,7 +133,7 @@ def split_into_corners(bbox: NdarrayOrTensor, mode: Union[str, None] = None):
         xmin for example, is a Nx1 tensor
 
     Example:
-        bbox = torch.zeros(10,6)
+        bbox = torch.ones(10,6)
         split_into_corners(bbox, mode="cccwhd")
     """
     # convert numpy to tensor if needed
@@ -213,7 +213,7 @@ def box_convert_mode(
         bbox2: bounding box with target mode, does not share memory with original bbox1
 
     Example:
-        bbox = torch.zeros(10,6)
+        bbox = torch.ones(10,6)
         box_convert_mode(bbox1=bbox, mode1="xyzxyz", mode2="cccwhd")
     """
 
@@ -305,8 +305,8 @@ def box_convert_standard_mode(bbox: NdarrayOrTensor, mode: Union[str, None] = No
         bbox2: bounding box with standard mode, does not share memory with original bbox
 
     Example:
-        bbox = torch.zeros(10,6)
-        box_convert_mode(bbox=bbox, mode="xxyyzz")
+        bbox = torch.ones(10,6)
+        box_convert_standard_mode(bbox=bbox, mode="xxyyzz")
     """
     if mode is None:
         mode = get_standard_mode(int(bbox.shape[1] / 2))

From 3b0c8ea9b5bfb37512789d681e94919b00e2ef41 Mon Sep 17 00:00:00 2001
From: Can Zhao <canz@nvidia.com>
Date: Wed, 11 May 2022 13:19:56 -0400
Subject: [PATCH 10/49] correct typo

Signed-off-by: Can Zhao <canz@nvidia.com>
---
 monai/data/box_utils.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/monai/data/box_utils.py b/monai/data/box_utils.py
index b9fb4698d6..f022905c91 100644
--- a/monai/data/box_utils.py
+++ b/monai/data/box_utils.py
@@ -256,9 +256,9 @@ def box_convert_mode(
             elif mode2 == "cccwhd":
                 bbox2 = torch.cat(
                     (
-                        (xmin + xmax + TO_REMOVE) / 2,
-                        (ymin + ymax + TO_REMOVE) / 2,
-                        (zmin + zmax + TO_REMOVE) / 2,
+                        (xmin + xmax + TO_REMOVE) / 2.,
+                        (ymin + ymax + TO_REMOVE) / 2.,
+                        (zmin + zmax + TO_REMOVE) / 2.,
                         xmax - xmin + TO_REMOVE,
                         ymax - ymin + TO_REMOVE,
                         zmax - zmin + TO_REMOVE,
@@ -276,8 +276,8 @@ def box_convert_mode(
             elif mode2 == "ccwh":
                 bbox2 = torch.cat(
                     (
-                        (xmin + xmax + TO_REMOVE) / 2,
-                        (ymin + ymax + TO_REMOVE) / 2,
+                        (xmin + xmax + TO_REMOVE) / 2.,
+                        (ymin + ymax + TO_REMOVE) / 2.,
                         xmax - xmin + TO_REMOVE,
                         ymax - ymin + TO_REMOVE,
                     ),

From 627dd0c1754e862f25535d75faa4d024d8e72e2a Mon Sep 17 00:00:00 2001
From: Can Zhao <canz@nvidia.com>
Date: Wed, 11 May 2022 13:44:59 -0400
Subject: [PATCH 11/49] add function to check whether box size is non-negative

Signed-off-by: Can Zhao <canz@nvidia.com>
---
 monai/data/box_utils.py | 54 +++++++++++++++++++++++++++++++++++++----
 1 file changed, 49 insertions(+), 5 deletions(-)

diff --git a/monai/data/box_utils.py b/monai/data/box_utils.py
index f022905c91..4d64e62acd 100644
--- a/monai/data/box_utils.py
+++ b/monai/data/box_utils.py
@@ -121,6 +121,48 @@ def get_standard_mode(spatial_dims: int) -> str:
         raise ValueError(f"Images should have 2 or 3 dimensions, got {spatial_dims}")
 
 
+def check_box_mode(bbox: NdarrayOrTensor, mode: Union[str, None] = None):
+    """
+    This function checks whether the bbox is valid.
+    It ensures the box size is non-negative.
+    Args:
+        bbox: bounding box, Nx4 or Nx6 torch tensor or ndarray
+        mode: box mode, choose from SUPPORT_MODE. If mode is not given, this func will assume mode is STANDARD_MODE
+    Returns:
+        raise Error is mode is not supported
+        raise Error if box has negative size
+
+    Example:
+        bbox = torch.ones(10,6)
+        check_box_mode(bbox, mode="cccwhd")
+    """
+    if mode is None:
+        mode = get_standard_mode(int(bbox.shape[1] / 2))
+    mode = look_up_option(mode, supported=SUPPORT_MODE)
+    spatial_dims = get_dimension(bbox=bbox, mode=mode)
+
+    # we need box size to be non-negative
+    if mode in ["ccwh", "cccwhd", "xywh", "xyzwhd"]:
+        box_error = bbox[:, spatial_dims] < 0
+        for axis in range(1, spatial_dims):
+            box_error = box_error | (bbox[:, spatial_dims + axis] < 0)
+    elif mode in ["xxyy", "xxyyzz"]:
+        box_error = bbox[:, 1] < bbox[:, 0]
+        for axis in range(1, spatial_dims):
+            box_error = box_error | (bbox[:, 2 * axis + 1] < bbox[:, 2 * axis])
+    elif mode in ["xyxy", "xyzxyz"]:
+        box_error = bbox[:, spatial_dims] < bbox[:, 0]
+        for axis in range(1, spatial_dims):
+            box_error = box_error | (bbox[:, spatial_dims + axis] < bbox[:, axis])
+    else:
+        raise ValueError(f"Box mode {mode} not in {SUPPORT_MODE}.")
+
+    if box_error.sum() > 0:
+        raise ValueError("Given bbox has invalid values. The box size must be non-negative.")
+
+    return
+
+
 def split_into_corners(bbox: NdarrayOrTensor, mode: Union[str, None] = None):
     """
     This internal function outputs the corner coordinates of the bbox
@@ -217,6 +259,8 @@ def box_convert_mode(
         box_convert_mode(bbox1=bbox, mode1="xyzxyz", mode2="cccwhd")
     """
 
+    check_box_mode(bbox1, mode1)
+
     # convert numpy to tensor if needed
     if isinstance(bbox1, np.ndarray):
         bbox1 = convert_to_tensor(bbox1)
@@ -256,9 +300,9 @@ def box_convert_mode(
             elif mode2 == "cccwhd":
                 bbox2 = torch.cat(
                     (
-                        (xmin + xmax + TO_REMOVE) / 2.,
-                        (ymin + ymax + TO_REMOVE) / 2.,
-                        (zmin + zmax + TO_REMOVE) / 2.,
+                        (xmin + xmax + TO_REMOVE) / 2.0,
+                        (ymin + ymax + TO_REMOVE) / 2.0,
+                        (zmin + zmax + TO_REMOVE) / 2.0,
                         xmax - xmin + TO_REMOVE,
                         ymax - ymin + TO_REMOVE,
                         zmax - zmin + TO_REMOVE,
@@ -276,8 +320,8 @@ def box_convert_mode(
             elif mode2 == "ccwh":
                 bbox2 = torch.cat(
                     (
-                        (xmin + xmax + TO_REMOVE) / 2.,
-                        (ymin + ymax + TO_REMOVE) / 2.,
+                        (xmin + xmax + TO_REMOVE) / 2.0,
+                        (ymin + ymax + TO_REMOVE) / 2.0,
                         xmax - xmin + TO_REMOVE,
                         ymax - ymin + TO_REMOVE,
                     ),

From d1b0555977d22f0db1df3191f8995ce6b2400cb1 Mon Sep 17 00:00:00 2001
From: Can Zhao <canz@nvidia.com>
Date: Thu, 12 May 2022 11:15:19 -0400
Subject: [PATCH 12/49] simplify numpy to tensor convert

Signed-off-by: Can Zhao <canz@nvidia.com>
---
 monai/data/box_utils.py | 169 ++++++++++++++++++----------------------
 tests/test_box_utils.py |   4 +-
 2 files changed, 79 insertions(+), 94 deletions(-)

diff --git a/monai/data/box_utils.py b/monai/data/box_utils.py
index 4d64e62acd..de5eed3437 100644
--- a/monai/data/box_utils.py
+++ b/monai/data/box_utils.py
@@ -16,10 +16,8 @@
 import torch
 
 from monai.config.type_definitions import NdarrayOrTensor
-
-# from monai.utils.misc import ensure_tuple_rep
 from monai.utils.module import look_up_option
-from monai.utils.type_conversion import convert_to_numpy, convert_to_tensor
+from monai.utils.type_conversion import convert_data_type, convert_to_dst_type, convert_to_numpy, convert_to_tensor
 
 # We support several box modes, i.e., representation of a bounding box
 CORNER_CORNER_MODE = ["xyxy", "xyzxyz"]  # [xmin, ymin, xmax, ymax] and [xmin, ymin, zmin, xmax, ymax, zmax]
@@ -31,7 +29,7 @@
 ]  # [xcenter, ycenter, xsize, ysize] and [xcenter, ycenter, zcenter, xsize, ysize, zsize]
 
 STANDARD_MODE = CORNER_CORNER_MODE  # standard box modes supported by all the box util functions
-SUPPORT_MODE = (
+SUPPORTED_MODE = (
     CORNER_CORNER_MODE + XXYYZZ_MODE + CORNER_SIZE_MODE + CENTER_SIZE_MODE
 )  # supported box modes for some box util functions
 
@@ -39,7 +37,7 @@
 #      i.e., when xmin=1, xmax=2, we have w = 1
 # TO_REMOVE = 1  if in 'xxyy','xxyyzz' mode, the bottom-right corner is included in the box,
 #       i.e., when xmin=1, xmax=2, we have w = 2
-# Currently only TO_REMOVE = 0 has been tested. Please use TO_REMOVE = 0
+# Currently only TO_REMOVE = 0 has been tested. Please keep TO_REMOVE = 0
 TO_REMOVE = 0  # xmax-xmin = w -TO_REMOVE.
 
 
@@ -49,17 +47,10 @@ def convert_to_list(in_sequence: Union[Sequence, torch.Tensor, np.ndarray]) -> l
     Args:
         in_sequence: Sequence or torch.Tensor or np.ndarray
     Returns:
-        in_sequence_list: a list
+        a list
 
     """
-    in_sequence_list = deepcopy(in_sequence)
-    if isinstance(in_sequence, torch.Tensor):
-        in_sequence_list = in_sequence_list.detach().cpu().numpy().tolist()
-    elif isinstance(in_sequence, np.ndarray):
-        in_sequence_list = in_sequence_list.tolist()
-    elif not isinstance(in_sequence, list):
-        in_sequence_list = list(in_sequence_list)
-    return in_sequence_list
+    return in_sequence.tolist() if isinstance(in_sequence, (torch.Tensor, np.ndarray)) else list(in_sequence)
 
 
 def get_dimension(
@@ -70,15 +61,17 @@ def get_dimension(
     """
     Get spatial dimension for the giving setting.
     Missing input is allowed. But at least one of the input value should be given.
+    It raises ValueError if the dimensions of multiple inputs do not match with each other.
     Args:
         bbox: bounding box, Nx4 or Nx6 torch tensor or ndarray
         image_size: Length of 2 or 3. Data format is list, or np.ndarray, or tensor of int
-        mode: box mode, choose from SUPPORT_MODE. If mode is not given, this func will assume mode is STANDARD_MODE
+        mode: box mode, choose from SUPPORTED_MODE. If mode is not given, this func will assume mode is STANDARD_MODE
     Returns:
         spatial_dimension: 2 or 3
 
     Example:
         bbox = torch.ones(10,6)
+        get_dimension(bbox, mode="xyxy") will raise ValueError
         get_dimension(bbox, mode="xyzxyz") will return 3
         get_dimension(bbox, mode="xyzxyz", image_size=[100,200,200]) will return 3
         get_dimension(mode="xyzxyz") will return 3
@@ -127,10 +120,11 @@ def check_box_mode(bbox: NdarrayOrTensor, mode: Union[str, None] = None):
     It ensures the box size is non-negative.
     Args:
         bbox: bounding box, Nx4 or Nx6 torch tensor or ndarray
-        mode: box mode, choose from SUPPORT_MODE. If mode is not given, this func will assume mode is STANDARD_MODE
+        mode: box mode, choose from SUPPORTED_MODE. If mode is not given, this func will assume mode is STANDARD_MODE
     Returns:
         raise Error is mode is not supported
-        raise Error if box has negative size
+        return False if box has negative size
+        return True if no issue found
 
     Example:
         bbox = torch.ones(10,6)
@@ -138,7 +132,7 @@ def check_box_mode(bbox: NdarrayOrTensor, mode: Union[str, None] = None):
     """
     if mode is None:
         mode = get_standard_mode(int(bbox.shape[1] / 2))
-    mode = look_up_option(mode, supported=SUPPORT_MODE)
+    mode = look_up_option(mode, supported=SUPPORTED_MODE)
     spatial_dims = get_dimension(bbox=bbox, mode=mode)
 
     # we need box size to be non-negative
@@ -155,12 +149,12 @@ def check_box_mode(bbox: NdarrayOrTensor, mode: Union[str, None] = None):
         for axis in range(1, spatial_dims):
             box_error = box_error | (bbox[:, spatial_dims + axis] < bbox[:, axis])
     else:
-        raise ValueError(f"Box mode {mode} not in {SUPPORT_MODE}.")
+        raise ValueError(f"Box mode {mode} not in {SUPPORTED_MODE}.")
 
     if box_error.sum() > 0:
-        raise ValueError("Given bbox has invalid values. The box size must be non-negative.")
+        return False
 
-    return
+    return True
 
 
 def split_into_corners(bbox: NdarrayOrTensor, mode: Union[str, None] = None):
@@ -168,7 +162,7 @@ def split_into_corners(bbox: NdarrayOrTensor, mode: Union[str, None] = None):
     This internal function outputs the corner coordinates of the bbox
     Args:
         bbox: bounding box, Nx4 or Nx6 torch tensor or ndarray
-        mode: box mode, choose from SUPPORT_MODE. If mode is not given, this func will assume mode is STANDARD_MODE
+        mode: box mode, choose from SUPPORTED_MODE. If mode is not given, this func will assume mode is STANDARD_MODE
     Returns:
         if 2D image, outputs (xmin, xmax, ymin, ymax)
         if 3D images, outputs (xmin, xmax, ymin, ymax, zmin, zmax)
@@ -179,31 +173,27 @@ def split_into_corners(bbox: NdarrayOrTensor, mode: Union[str, None] = None):
         split_into_corners(bbox, mode="cccwhd")
     """
     # convert numpy to tensor if needed
-    if isinstance(bbox, np.ndarray):
-        bbox = convert_to_tensor(bbox)
-        numpy_bool = True
-    else:
-        numpy_bool = False
+    bbox_t, *_ = convert_data_type(bbox, torch.Tensor)
 
     # convert to float32 when computing torch.clamp, which does not support float16
-    box_dtype = bbox.dtype
+    box_dtype = bbox_t.dtype
     compute_dtype = torch.float32
 
     if mode is None:
-        mode = get_standard_mode(int(bbox.shape[1] / 2))
-    mode = look_up_option(mode, supported=SUPPORT_MODE)
+        mode = get_standard_mode(int(bbox_t.shape[1] / 2))
+    mode = look_up_option(mode, supported=SUPPORTED_MODE)
 
     # split tensor into corners
     if mode in ["xxyy", "xxyyzz"]:
-        split_result = bbox.split(1, dim=-1)
+        split_result = bbox_t.split(1, dim=-1)
     elif mode == "xyzxyz":
-        xmin, ymin, zmin, xmax, ymax, zmax = bbox.split(1, dim=-1)
+        xmin, ymin, zmin, xmax, ymax, zmax = bbox_t.split(1, dim=-1)
         split_result = (xmin, xmax, ymin, ymax, zmin, zmax)
     elif mode == "xyxy":
-        xmin, ymin, xmax, ymax = bbox.split(1, dim=-1)
+        xmin, ymin, xmax, ymax = bbox_t.split(1, dim=-1)
         split_result = (xmin, xmax, ymin, ymax)
     elif mode == "xyzwhd":
-        xmin, ymin, zmin, w, h, d = bbox.split(1, dim=-1)
+        xmin, ymin, zmin, w, h, d = bbox_t.split(1, dim=-1)
         split_result = (
             xmin,
             xmin + (w - TO_REMOVE).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype),
@@ -213,10 +203,10 @@ def split_into_corners(bbox: NdarrayOrTensor, mode: Union[str, None] = None):
             zmin + (d - TO_REMOVE).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype),
         )
     elif mode == "xywh":
-        xmin, ymin, w, h = bbox.split(1, dim=-1)
+        xmin, ymin, w, h = bbox_t.split(1, dim=-1)
         split_result = (xmin, xmin + (w - TO_REMOVE).clamp(min=0), ymin, ymin + (h - TO_REMOVE).clamp(min=0))
     elif mode == "cccwhd":
-        xc, yc, zc, w, h, d = bbox.split(1, dim=-1)
+        xc, yc, zc, w, h, d = bbox_t.split(1, dim=-1)
         split_result = (
             xc - ((w - TO_REMOVE) / 2.0).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype),
             xc + ((w - TO_REMOVE) / 2.0).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype),
@@ -226,7 +216,7 @@ def split_into_corners(bbox: NdarrayOrTensor, mode: Union[str, None] = None):
             zc + ((d - TO_REMOVE) / 2.0).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype),
         )
     elif mode == "ccwh":
-        xc, yc, w, h = bbox.split(1, dim=-1)
+        xc, yc, w, h = bbox_t.split(1, dim=-1)
         split_result = (
             xc - ((w - TO_REMOVE) / 2.0).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype),
             xc + ((w - TO_REMOVE) / 2.0).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype),
@@ -237,68 +227,64 @@ def split_into_corners(bbox: NdarrayOrTensor, mode: Union[str, None] = None):
         raise RuntimeError("Should not be here")
 
     # convert tensor back to numpy if needed
-    if numpy_bool:
-        split_result = convert_to_numpy(split_result)
+    split_result, *_ = convert_to_dst_type(src=split_result, dst=bbox)
     return split_result
 
 
 def box_convert_mode(
-    bbox1: NdarrayOrTensor, mode1: Union[str, None] = None, mode2: Union[str, None] = None
+    bbox: NdarrayOrTensor, src_mode: Union[str, None] = None, dst_mode: Union[str, None] = None
 ) -> NdarrayOrTensor:
     """
-    This function converts the bbox1 in mode 1 to the mode2
+    This function converts the bbox in src_mode to the dst_mode
     Args:
-        bbox1: source bounding box, Nx4 or Nx6 torch tensor or ndarray
-        mode1: source box mode, choose from SUPPORT_MODE. If mode is not given, this func will assume mode is STANDARD_MODE
-        mode2: target box mode, choose from SUPPORT_MODE. If mode is not given, this func will assume mode is STANDARD_MODE
+        bbox: source bounding box, Nx4 or Nx6 torch tensor or ndarray
+        src_mode: source box mode, choose from SUPPORTED_MODE. If mode is not given, this func will assume mode is STANDARD_MODE
+        dst_mode: target box mode, choose from SUPPORTED_MODE. If mode is not given, this func will assume mode is STANDARD_MODE
     Returns:
-        bbox2: bounding box with target mode, does not share memory with original bbox1
+        bbox_dst: bounding box with target mode, does not share memory with original bbox
 
     Example:
         bbox = torch.ones(10,6)
-        box_convert_mode(bbox1=bbox, mode1="xyzxyz", mode2="cccwhd")
+        box_convert_mode(bbox=bbox, src_mode="xyzxyz", dst_mode="cccwhd")
     """
 
-    check_box_mode(bbox1, mode1)
+    if not check_box_mode(bbox, src_mode):
+        raise ValueError("Given bbox has invalid values. The box size must be non-negative.")
 
     # convert numpy to tensor if needed
-    if isinstance(bbox1, np.ndarray):
-        bbox1 = convert_to_tensor(bbox1)
-        numpy_bool = True
-    else:
-        numpy_bool = False
-
-    # check whether the bbox and the new mode is valid
-    if mode1 is None:
-        mode1 = get_standard_mode(int(bbox1.shape[1] / 2))
-    if mode2 is None:
-        mode2 = get_standard_mode(int(bbox1.shape[1] / 2))
-    mode1 = look_up_option(mode1, supported=SUPPORT_MODE)
-    mode2 = look_up_option(mode2, supported=SUPPORT_MODE)
-
-    spatial_dims = get_dimension(bbox=bbox1, mode=mode1)
-    if len(mode1) != len(mode2):
+    bbox_t, *_ = convert_data_type(bbox, torch.Tensor)
+
+    # check whether the bbox_t and the new mode is valid
+    if src_mode is None:
+        src_mode = get_standard_mode(int(bbox_t.shape[1] / 2))
+    if dst_mode is None:
+        dst_mode = get_standard_mode(int(bbox_t.shape[1] / 2))
+    src_mode = look_up_option(src_mode, supported=SUPPORTED_MODE)
+    dst_mode = look_up_option(dst_mode, supported=SUPPORTED_MODE)
+
+    spatial_dims = get_dimension(bbox=bbox_t, mode=src_mode)
+    if len(src_mode) != len(dst_mode):
         raise ValueError("The dimension of the new mode should have the same spatial dimension as the old mode.")
 
     # if mode not changed, return original box
-    if mode1 == mode2:
-        bbox2 = deepcopy(bbox1)
-    # convert mode for bbox
-    elif mode2 in ["xxyy", "xxyyzz"]:
-        corners = split_into_corners(bbox1, mode1)
-        bbox2 = torch.cat(corners, dim=-1)
+    if src_mode == dst_mode:
+        bbox_t_dst = deepcopy(bbox_t)
+    # convert mode for bbox_t
+    elif dst_mode in ["xxyy", "xxyyzz"]:
+        corners = split_into_corners(bbox_t, src_mode)
+        bbox_t_dst = torch.cat(corners, dim=-1)
     else:
         if spatial_dims == 3:
-            xmin, xmax, ymin, ymax, zmin, zmax = split_into_corners(bbox1, mode1)
-            if mode2 == "xyzxyz":
-                bbox2 = torch.cat((xmin, ymin, zmin, xmax, ymax, zmax), dim=-1)
-            elif mode2 == "xyzwhd":
-                bbox2 = torch.cat(
+            xmin, xmax, ymin, ymax, zmin, zmax = split_into_corners(bbox_t, src_mode)
+            if dst_mode == "xyzxyz":
+                bbox_t_dst = torch.cat((xmin, ymin, zmin, xmax, ymax, zmax), dim=-1)
+            elif dst_mode == "xyzwhd":
+                bbox_t_dst = torch.cat(
                     (xmin, ymin, zmin, xmax - xmin + TO_REMOVE, ymax - ymin + TO_REMOVE, zmax - zmin + TO_REMOVE),
                     dim=-1,
                 )
-            elif mode2 == "cccwhd":
-                bbox2 = torch.cat(
+            elif dst_mode == "cccwhd":
+                bbox_t_dst = torch.cat(
                     (
                         (xmin + xmax + TO_REMOVE) / 2.0,
                         (ymin + ymax + TO_REMOVE) / 2.0,
@@ -310,15 +296,15 @@ def box_convert_mode(
                     dim=-1,
                 )
             else:
-                raise ValueError("We support only bbox mode in " + str(SUPPORT_MODE) + f", got {mode2}")
+                raise ValueError("We support only bbox mode in " + str(SUPPORTED_MODE) + f", got {dst_mode}")
         elif spatial_dims == 2:
-            xmin, xmax, ymin, ymax = split_into_corners(bbox1.clone(), mode1)
-            if mode2 == "xyxy":
-                bbox2 = torch.cat((xmin, ymin, xmax, ymax), dim=-1)
-            elif mode2 == "xywh":
-                bbox2 = torch.cat((xmin, ymin, xmax - xmin + TO_REMOVE, ymax - ymin + TO_REMOVE), dim=-1)
-            elif mode2 == "ccwh":
-                bbox2 = torch.cat(
+            xmin, xmax, ymin, ymax = split_into_corners(bbox_t.clone(), src_mode)
+            if dst_mode == "xyxy":
+                bbox_t_dst = torch.cat((xmin, ymin, xmax, ymax), dim=-1)
+            elif dst_mode == "xywh":
+                bbox_t_dst = torch.cat((xmin, ymin, xmax - xmin + TO_REMOVE, ymax - ymin + TO_REMOVE), dim=-1)
+            elif dst_mode == "ccwh":
+                bbox_t_dst = torch.cat(
                     (
                         (xmin + xmax + TO_REMOVE) / 2.0,
                         (ymin + ymax + TO_REMOVE) / 2.0,
@@ -328,15 +314,14 @@ def box_convert_mode(
                     dim=-1,
                 )
             else:
-                raise ValueError("We support only bbox mode in " + str(SUPPORT_MODE) + f", got {mode2}")
+                raise ValueError("We support only bbox mode in " + str(SUPPORTED_MODE) + f", got {dst_mode}")
         else:
             raise ValueError(f"Images should have 2 or 3 dimensions, got {spatial_dims}")
 
     # convert tensor back to numpy if needed
-    if numpy_bool:
-        bbox2 = convert_to_numpy(bbox2)
+    bbox_dst, *_ = convert_to_dst_type(src=bbox_t_dst, dst=bbox)
 
-    return bbox2
+    return bbox_dst
 
 
 def box_convert_standard_mode(bbox: NdarrayOrTensor, mode: Union[str, None] = None) -> NdarrayOrTensor:
@@ -344,9 +329,9 @@ def box_convert_standard_mode(bbox: NdarrayOrTensor, mode: Union[str, None] = No
     Convert given bbox to standard mode
     Args:
         bbox: source bounding box, Nx4 or Nx6 torch tensor or ndarray
-        mode: source box mode, choose from SUPPORT_MODE. If mode is not given, this func will assume mode is STANDARD_MODE
+        mode: source box mode, choose from SUPPORTED_MODE. If mode is not given, this func will assume mode is STANDARD_MODE
     Returns:
-        bbox2: bounding box with standard mode, does not share memory with original bbox
+        bbox_standard: bounding box with standard mode, does not share memory with original bbox
 
     Example:
         bbox = torch.ones(10,6)
@@ -354,7 +339,7 @@ def box_convert_standard_mode(bbox: NdarrayOrTensor, mode: Union[str, None] = No
     """
     if mode is None:
         mode = get_standard_mode(int(bbox.shape[1] / 2))
-    mode = look_up_option(mode, supported=SUPPORT_MODE)
+    mode = look_up_option(mode, supported=SUPPORTED_MODE)
     spatial_dims = get_dimension(bbox=bbox, mode=mode)
     mode_standard = get_standard_mode(spatial_dims)
-    return box_convert_mode(bbox1=bbox, mode1=mode, mode2=mode_standard)
+    return box_convert_mode(bbox=bbox, src_mode=mode, dst_mode=mode_standard)
diff --git a/tests/test_box_utils.py b/tests/test_box_utils.py
index b4bb6646cd..7a0f1f6fdb 100644
--- a/tests/test_box_utils.py
+++ b/tests/test_box_utils.py
@@ -118,10 +118,10 @@ def test_value(self, input_data, mode2, expected_box, expected_area):
             expected_box = convert_data_type(expected_box, dtype=np.float16)[0]
 
         # test box_convert_mode, box_convert_standard_mode
-        result2 = box_convert_mode(bbox1=bbox1, mode1=mode1, mode2=mode2)
+        result2 = box_convert_mode(bbox=bbox1, src_mode=mode1, dst_mode=mode2)
         assert_allclose(result2, expected_box, type_test=True, device_test=True, atol=0.0)
 
-        result1 = box_convert_mode(bbox1=result2, mode1=mode2, mode2=mode1)
+        result1 = box_convert_mode(bbox=result2, src_mode=mode2, dst_mode=mode1)
         assert_allclose(result1, bbox1, type_test=True, device_test=True, atol=0.0)
 
         result_standard = box_convert_standard_mode(bbox=bbox1, mode=mode1)

From a8f77b067c60ccef0952060f2496517ac9c7fc4c Mon Sep 17 00:00:00 2001
From: Can Zhao <canz@nvidia.com>
Date: Mon, 16 May 2022 04:27:06 -0400
Subject: [PATCH 13/49] rewrite box convert

Signed-off-by: Can Zhao <canz@nvidia.com>
---
 monai/data/box_mode.py  | 322 +++++++++++++++++++++++++++++
 monai/data/box_utils.py | 437 ++++++++++++++--------------------------
 tests/test_box_utils.py |  35 ++--
 3 files changed, 494 insertions(+), 300 deletions(-)
 create mode 100644 monai/data/box_mode.py

diff --git a/monai/data/box_mode.py b/monai/data/box_mode.py
new file mode 100644
index 0000000000..c676497216
--- /dev/null
+++ b/monai/data/box_mode.py
@@ -0,0 +1,322 @@
+# Copyright (c) MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from copy import deepcopy
+from typing import Sequence, Union, Tuple
+from abc import ABC, abstractmethod
+
+import numpy as np
+import torch
+
+from monai.config.type_definitions import NdarrayOrTensor
+from monai.utils.module import look_up_option
+from monai.utils.type_conversion import convert_data_type, convert_to_dst_type, convert_to_numpy, convert_to_tensor
+
+# TO_REMOVE = 0 if in 'xxyy','xxyyzz' mode, the bottom-right corner is not included in the box,
+#      i.e., when xmin=1, xmax=2, we have w = 1
+# TO_REMOVE = 1  if in 'xxyy','xxyyzz' mode, the bottom-right corner is included in the box,
+#       i.e., when xmin=1, xmax=2, we have w = 2
+# Currently, only `TO_REMOVE = 0.` is supported
+TO_REMOVE = 0.  # xmax-xmin = w -TO_REMOVE.
+
+class BoxMode:
+    def __int__(self,):
+        # The mapping that maps spatial dimension to mode string name
+        self.dim_to_str_mapping = {2: None, 3: None}
+
+    def get_str_mode(self, spatial_dims: int) -> str:
+        """
+        Get the mode name for the given spatial dimension
+        Args:
+            spatial_dims: 2 or 3
+        Returns:
+            mode string name
+        Example:
+            boxmode.get_str_mode(spatial_dims = 2)
+        """
+        return self.dim_to_str_mapping[spatial_dims]
+
+    def get_dim_from_boxes(self, boxes: torch.Tensor) -> int:
+        """
+        Get spatial dimension for the given boxes
+        Args:
+            boxes: bounding box, Nx4 or Nx6 torch tensor
+        Returns:
+            spatial_dims: 2 or 3
+        Example:
+            boxes = torch.ones(10,6)
+            boxmode.get_dim_from_boxes(boxes) will return 3
+        """
+        if int(boxes.shape[1]) not in [4,6]:
+            raise ValueError(f"Currently we support only boxes with shape [N,4] or [N,6], got boxes with shape {boxes.shape}.")
+        spatial_dims = int(boxes.shape[1]//2)
+        return spatial_dims
+
+    def get_dim_from_corner(self, c: Sequence) -> int:
+        """
+        Get spatial dimension for the given box corners
+        Args:
+            c: corners of a box, 4-element or 6-element tuple, each element is a Nx1 torch tensor
+            (xmin, ymin, xmax, ymax) or (xmin, ymin, zmin, xmax, ymax, zmax)
+        Returns:
+            spatial_dims: 2 or 3
+        Example:
+            c = (torch.ones(10,1), torch.ones(10,1), torch.ones(10,1), torch.ones(10,1))
+            boxmode.get_dim_from_corner(c) will return 2
+        """
+        if len(c) not in [4,6]:
+            raise ValueError(f"Currently we support only boxes with shape [N,4] or [N,6], got box corner tuple with length {len(c)}.")
+        spatial_dims = int(len(c)//2)
+        return spatial_dims
+
+    def check_corner(self, c: Sequence) -> bool:
+        """
+        check the validity for the given box corners
+        Args:
+            c: corners of a box, 4-element or 6-element tuple, each element is a Nx1 torch tensor
+            (xmin, ymin, xmax, ymax) or (xmin, ymin, zmin, xmax, ymax, zmax)
+        Returns:
+            bool, whether the box is valid
+        Example:
+            c = (torch.ones(10,1), torch.ones(10,1), torch.ones(10,1), torch.ones(10,1))
+            boxmode.check_corner(c) will return True
+        """
+        spatial_dims = self.get_dim_from_corner(c)
+        box_error = c[spatial_dims] < c[ 0]
+        for axis in range(1, spatial_dims):
+            box_error = box_error | (c[spatial_dims + axis] < c[axis])
+        if box_error.sum() > 0:
+            return False
+        else:
+            return True
+
+    @abstractmethod
+    def box_to_corner(self, boxes: torch.Tensor) -> Tuple:
+        """
+        Return the box corners for the given boxes
+        Args:
+            boxes: bounding box, Nx4 or Nx6 torch tensor
+        Returns:
+            corners of a box, 4-element or 6-element tuple, each element is a Nx1 torch tensor
+        Example:
+            boxes = torch.ones(10,6)
+            boxmode.box_to_corner(boxes) will a 6-element tuple, each element is a 10x1 tensor
+        """
+        raise NotImplementedError(f"Subclass {self.__class__.__name__} must implement this method.")
+
+    @abstractmethod
+    def corner_to_box(self, corner: Sequence) -> torch.Tensor:
+        """
+        Return the boxes converted from the given box corners
+        Args:
+            c: corners of a box, 4-element or 6-element tuple, each element is a Nx1 torch tensor
+            (xmin, ymin, xmax, ymax) or (xmin, ymin, zmin, xmax, ymax, zmax)
+        Returns:
+            boxes: bounding box, Nx4 or Nx6 torch tensor
+        Example:
+            c = (torch.ones(10,1), torch.ones(10,1), torch.ones(10,1), torch.ones(10,1))
+            boxmode.corner_to_box(c) will return a 10x4 tensor
+        """
+        raise NotImplementedError(f"Subclass {self.__class__.__name__} must implement this method.")
+
+
+class CornerCornerMode_TypeA(BoxMode):
+    """
+    Also represented as "xyxy" or "xyzxyz"
+    [xmin, ymin, xmax, ymax] or [xmin, ymin, zmin, xmax, ymax, zmax]
+    """
+    def __int__(self,):
+        self.dim_to_str_mapping = {2: "xyxy", 3: "xyzxyx"}
+
+    def box_to_corner(self, boxes: torch.Tensor) -> Tuple:        
+        spatial_dims = self.get_dim_from_boxes(boxes)
+        if spatial_dims == 3:
+            xmin, ymin, zmin, xmax, ymax, zmax = boxes.split(1, dim=-1)
+            corner = xmin, ymin, zmin, xmax, ymax, zmax
+        if spatial_dims == 2:
+            xmin, ymin, xmax, ymax = boxes.split(1, dim=-1)
+            corner = xmin, ymin, xmax, ymax
+        if self.check_corner(corner):
+            return corner
+        else:
+            raise ValueError("Given boxes has invalid values. The box size must be non-negative.")
+
+    def corner_to_box(self, c: Sequence) -> torch.Tensor:
+        spatial_dims = self.get_dim_from_corner(c)
+        if spatial_dims == 3:
+            return torch.cat((c[0], c[1], c[2], c[3], c[4], c[5]), dim=-1)
+        if spatial_dims == 2:
+            return torch.cat((c[0], c[1], c[2], c[3]), dim=-1)
+
+class CornerCornerMode_TypeB(BoxMode):
+    """
+    Also represented as "xxyy" or "xxyyzz"
+    [xmin, xmax, ymin, ymax] or [xmin, xmax, ymin, ymax, zmin, zmax]
+    """
+    def __int__(self,):
+        self.dim_to_str_mapping = {2: "xxyy", 3: "xxyyzz"}
+
+    def box_to_corner(self, boxes: torch.Tensor) -> Tuple:
+        spatial_dims = self.get_dim_from_boxes(boxes)
+        if spatial_dims == 3:
+            xmin, xmax, ymin, ymax, zmin, zmax = boxes.split(1, dim=-1)
+            corner = xmin, ymin, zmin, xmax, ymax, zmax
+        if spatial_dims == 2:
+            xmin, xmax, ymin, ymax = boxes.split(1, dim=-1)
+            corner = xmin, ymin, xmax, ymax
+        if self.check_corner(corner):
+            return corner
+        else:
+            raise ValueError("Given boxes has invalid values. The box size must be non-negative.")
+
+    def corner_to_box(self, c: Sequence) -> torch.Tensor:
+        spatial_dims = self.get_dim_from_corner(c)
+        if spatial_dims == 3:
+            return torch.cat((c[0], c[3], c[1], c[4], c[2], c[5]), dim=-1)
+        if spatial_dims == 2:
+            return torch.cat((c[0], c[2], c[1], c[3]), dim=-1)
+
+class CornerCornerMode_TypeC(BoxMode):
+    """
+    Also represented as "xyxy" or "xyxyzz"
+    [xmin, ymin, xmax, ymax] or [xmin, ymin, xmax, ymax, zmin, zmax]
+    """
+    def __int__(self,):
+        self.dim_to_str_mapping = {2: "xyxy", 3: "xyxyzz"}        
+
+    def box_to_corner(self, boxes: torch.Tensor) -> Tuple:
+        spatial_dims = self.get_dim_from_boxes(boxes)
+        if spatial_dims == 3:
+            xmin, ymin, xmax, ymax, zmin, zmax = boxes.split(1, dim=-1)
+            corner = xmin, ymin, zmin, xmax, ymax, zmax
+        if spatial_dims == 2:
+            xmin, ymin, xmax, ymax = boxes.split(1, dim=-1)
+            corner = xmin, ymin, xmax, ymax
+        if self.check_corner(corner):
+            return corner
+        else:
+            raise ValueError("Given boxes has invalid values. The box size must be non-negative.")
+
+    def corner_to_box(self, c: Sequence) -> torch.Tensor:
+        spatial_dims = self.get_dim_from_corner(c)
+        if spatial_dims == 3:
+            return torch.cat((c[0], c[1], c[3], c[4], c[2], c[5]), dim=-1)
+        if spatial_dims == 2:
+            return torch.cat((c[0], c[1], c[2], c[3]), dim=-1)
+
+class CornerSizeMode(BoxMode):
+    """
+    Also represented as "xywh" or "xyzwhd"
+    [xmin, ymin, xsize, ysize] or [xmin, ymin, zmin, xsize, ysize, zsize]
+    """
+    def __int__(self,):
+        self.dim_to_str_mapping = {2: "xywh", 3: "xyzwhd"}
+
+    def box_to_corner(self, boxes: torch.Tensor) -> Tuple:
+        # convert to float32 when computing torch.clamp, which does not support float16
+        box_dtype = boxes.dtype
+        compute_dtype = torch.float32
+
+        spatial_dims = self.get_dim_from_boxes(boxes)
+        if spatial_dims == 3:
+            xmin, ymin, zmin, w, h, d = boxes.split(1, dim=-1)
+            xmax = xmin + (w - TO_REMOVE).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype)
+            ymax = ymin + (h - TO_REMOVE).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype)
+            zmax = zmin + (d - TO_REMOVE).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype)
+            corner = xmin, ymin, zmin, xmax, ymax, zmax
+        if spatial_dims == 2:
+            xmin, ymin, w, h = boxes.split(1, dim=-1)
+            xmax = xmin + (w - TO_REMOVE).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype)
+            ymax = ymin + (h - TO_REMOVE).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype)
+            corner = xmin, ymin, xmax, ymax
+        if self.check_corner(corner):
+            return corner
+        else:
+            raise ValueError("Given boxes has invalid values. The box size must be non-negative.")
+
+
+    def corner_to_box(self, c: Sequence) -> torch.Tensor:
+        spatial_dims = self.get_dim_from_corner(c)
+        if spatial_dims == 3:
+            xmin, ymin, zmin, xmax, ymax, zmax = c[0], c[1], c[2], c[3], c[4], c[5]
+            return torch.cat(
+                    (xmin, ymin, zmin, xmax - xmin + TO_REMOVE, ymax - ymin + TO_REMOVE, zmax - zmin + TO_REMOVE),
+                    dim=-1,
+                )
+        if spatial_dims == 2:
+            xmin, ymin, xmax, ymax = c[0], c[1], c[2], c[3]
+            return torch.cat(
+                    (xmin, ymin, xmax - xmin + TO_REMOVE, ymax - ymin + TO_REMOVE),
+                    dim=-1,
+                )
+
+
+class CenterSizeMode(BoxMode):
+    """
+    Also represented as "ccwh" or "cccwhd"
+    [xmin, ymin, xsize, ysize] or [xmin, ymin, zmin, xsize, ysize, zsize]
+    """
+    def __int__(self,):
+        self.dim_to_str_mapping = {2: "ccwh", 3: "cccwhd"}
+
+    def box_to_corner(self, boxes: torch.Tensor) -> Tuple:
+        # convert to float32 when computing torch.clamp, which does not support float16
+        box_dtype = boxes.dtype
+        compute_dtype = torch.float32
+
+        spatial_dims = self.get_dim_from_boxes(boxes)
+        if spatial_dims == 3:
+            xc, yc, zc, w, h, d = boxes.split(1, dim=-1)
+            xmin = xc - ((w - TO_REMOVE) / 2.0).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype)
+            xmax = xc + ((w - TO_REMOVE) / 2.0).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype)
+            ymin = yc - ((h - TO_REMOVE) / 2.0).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype)
+            ymax = yc + ((h - TO_REMOVE) / 2.0).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype)
+            zmin = zc - ((d - TO_REMOVE) / 2.0).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype)
+            zmax = zc + ((d - TO_REMOVE) / 2.0).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype)
+            corner = xmin, ymin, zmin, xmax, ymax, zmax
+        if spatial_dims == 2:
+            xc, yc, w, h = boxes.split(1, dim=-1)
+            xmin = xc - ((w - TO_REMOVE) / 2.0).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype)
+            xmax = xc + ((w - TO_REMOVE) / 2.0).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype)
+            ymin = yc - ((h - TO_REMOVE) / 2.0).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype)
+            ymax = yc + ((h - TO_REMOVE) / 2.0).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype)
+            corner = xmin, ymin, xmax, ymax
+        if self.check_corner(corner):
+            return corner
+        else:
+            raise ValueError("Given boxes has invalid values. The box size must be non-negative.")
+
+    def corner_to_box(self, c: Sequence) -> torch.Tensor:
+        spatial_dims = int(len(c)//2)
+        if spatial_dims == 3:
+            xmin, ymin, zmin, xmax, ymax, zmax = c[0], c[1], c[2], c[3], c[4], c[5]
+            return torch.cat(
+                (
+                    (xmin + xmax + TO_REMOVE) / 2.0,
+                    (ymin + ymax + TO_REMOVE) / 2.0,
+                    (zmin + zmax + TO_REMOVE) / 2.0,
+                    xmax - xmin + TO_REMOVE,
+                    ymax - ymin + TO_REMOVE,
+                    zmax - zmin + TO_REMOVE,
+                ),
+                dim=-1,
+            )
+        if spatial_dims == 2:
+            xmin, ymin, xmax, ymax = c[0], c[1], c[2], c[3]
+            return torch.cat(
+                (
+                    (xmin + xmax + TO_REMOVE) / 2.0,
+                    (ymin + ymax + TO_REMOVE) / 2.0,
+                    xmax - xmin + TO_REMOVE,
+                    ymax - ymin + TO_REMOVE,
+                ),
+                dim=-1,
+            )
\ No newline at end of file
diff --git a/monai/data/box_utils.py b/monai/data/box_utils.py
index de5eed3437..2649316010 100644
--- a/monai/data/box_utils.py
+++ b/monai/data/box_utils.py
@@ -10,7 +10,8 @@
 # limitations under the License.
 
 from copy import deepcopy
-from typing import Sequence, Union
+from typing import Sequence, Union, Tuple
+from abc import ABC, abstractmethod
 
 import numpy as np
 import torch
@@ -18,28 +19,50 @@
 from monai.config.type_definitions import NdarrayOrTensor
 from monai.utils.module import look_up_option
 from monai.utils.type_conversion import convert_data_type, convert_to_dst_type, convert_to_numpy, convert_to_tensor
+from monai.data import box_mode
+from monai.data.box_mode import (
+    BoxMode,
+    CornerCornerMode_TypeA, 
+    CornerCornerMode_TypeB, 
+    CornerCornerMode_TypeC, 
+    CornerSizeMode, 
+    CenterSizeMode
+    )
 
-# We support several box modes, i.e., representation of a bounding box
-CORNER_CORNER_MODE = ["xyxy", "xyzxyz"]  # [xmin, ymin, xmax, ymax] and [xmin, ymin, zmin, xmax, ymax, zmax]
-XXYYZZ_MODE = ["xxyy", "xxyyzz"]  # [xmin, xmax, ymin, ymax] and [xmin, xmax, ymin, ymax, zmin, zmax]
-CORNER_SIZE_MODE = ["xywh", "xyzwhd"]  # [xmin, ymin, xsize, ysize] and [xmin, ymin, zmin, xsize, ysize, zsize]
-CENTER_SIZE_MODE = [
-    "ccwh",
-    "cccwhd",
-]  # [xcenter, ycenter, xsize, ysize] and [xcenter, ycenter, zcenter, xsize, ysize, zsize]
-
-STANDARD_MODE = CORNER_CORNER_MODE  # standard box modes supported by all the box util functions
-SUPPORTED_MODE = (
-    CORNER_CORNER_MODE + XXYYZZ_MODE + CORNER_SIZE_MODE + CENTER_SIZE_MODE
-)  # supported box modes for some box util functions
 
 # TO_REMOVE = 0 if in 'xxyy','xxyyzz' mode, the bottom-right corner is not included in the box,
 #      i.e., when xmin=1, xmax=2, we have w = 1
 # TO_REMOVE = 1  if in 'xxyy','xxyyzz' mode, the bottom-right corner is included in the box,
 #       i.e., when xmin=1, xmax=2, we have w = 2
-# Currently only TO_REMOVE = 0 has been tested. Please keep TO_REMOVE = 0
-TO_REMOVE = 0  # xmax-xmin = w -TO_REMOVE.
-
+# Currently, only `TO_REMOVE = 0` is supported
+TO_REMOVE = box_mode.TO_REMOVE
+
+# We support the conversion between several box modes, i.e., representation of a bounding box
+# BOXMODE_MAPPING maps string box mode to teh corresponding BoxMode class
+BOXMODE_MAPPING = {
+    "xyxy": CornerCornerMode_TypeA(), # [xmin, ymin, xmax, ymax]
+    "xyzxyz": CornerCornerMode_TypeA(), # [xmin, ymin, zmin, xmax, ymax, zmax]
+    "xxyy": CornerCornerMode_TypeB(), # [xmin, xmax, ymin, ymax]
+    "xxyyzz": CornerCornerMode_TypeB(), # [xmin, xmax, ymin, ymax, zmin, zmax]
+    "xyxyzz": CornerCornerMode_TypeC(), # [xmin, ymin, xmax, ymax, zmin, zmax]
+    "xywh": CornerSizeMode(), # [xmin, ymin, xsize, ysize]
+    "xyzwhd": CornerSizeMode(), # [xmin, ymin, zmin, xsize, ysize, zsize]
+    "ccwh": CenterSizeMode(), # [xcenter, ycenter, xsize, ysize]
+    "cccwhd": CenterSizeMode() # [xcenter, ycenter, zcenter, xsize, ysize, zsize]
+}
+# The standard box mode we use in all the box util functions
+StandardMode = CornerCornerMode_TypeA
+
+
+def get_boxmode(mode: Union[str, BoxMode, None] = None) -> BoxMode:
+    if isinstance(mode, BoxMode):
+        return mode
+    elif isinstance(mode, str):
+        return BOXMODE_MAPPING[mode]
+    elif mode is None:
+        return StandardMode()
+    else:
+        raise ValueError("mode has to be chosen from [str, BoxMode, None].")
 
 def convert_to_list(in_sequence: Union[Sequence, torch.Tensor, np.ndarray]) -> list:
     """
@@ -53,293 +76,141 @@ def convert_to_list(in_sequence: Union[Sequence, torch.Tensor, np.ndarray]) -> l
     return in_sequence.tolist() if isinstance(in_sequence, (torch.Tensor, np.ndarray)) else list(in_sequence)
 
 
-def get_dimension(
-    bbox: Union[torch.Tensor, np.ndarray, None] = None,
-    image_size: Union[Sequence[int], torch.Tensor, np.ndarray, None] = None,
-    mode: Union[str, None] = None,
-) -> int:
-    """
-    Get spatial dimension for the giving setting.
-    Missing input is allowed. But at least one of the input value should be given.
-    It raises ValueError if the dimensions of multiple inputs do not match with each other.
-    Args:
-        bbox: bounding box, Nx4 or Nx6 torch tensor or ndarray
-        image_size: Length of 2 or 3. Data format is list, or np.ndarray, or tensor of int
-        mode: box mode, choose from SUPPORTED_MODE. If mode is not given, this func will assume mode is STANDARD_MODE
-    Returns:
-        spatial_dimension: 2 or 3
-
-    Example:
-        bbox = torch.ones(10,6)
-        get_dimension(bbox, mode="xyxy") will raise ValueError
-        get_dimension(bbox, mode="xyzxyz") will return 3
-        get_dimension(bbox, mode="xyzxyz", image_size=[100,200,200]) will return 3
-        get_dimension(mode="xyzxyz") will return 3
-    """
-    spatial_dims_set = set()
-    if image_size is not None:
-        spatial_dims_set.add(len(image_size))
-    if mode is not None:
-        spatial_dims_set.add(int(len(mode) / 2))
-    if bbox is not None:
-        spatial_dims_set.add(int(bbox.shape[1] / 2))
-    spatial_dims_list = list(spatial_dims_set)
-    if len(spatial_dims_list) == 0:
-        raise ValueError("At least one of bbox, image_size, and mode needs to be non-empty.")
-    elif len(spatial_dims_list) == 1:
-        spatial_dims = int(spatial_dims_list[0])
-        spatial_dims = look_up_option(spatial_dims, supported=[2, 3])
-        return int(spatial_dims)
-    else:
-        raise ValueError("The dimension of bbox, image_size, mode should match with each other.")
-
-
-def get_standard_mode(spatial_dims: int) -> str:
-    """
-    Get the mode name for the given spatial dimension
-    Args:
-        spatial_dims: 2 or 3
-    Returns:
-        mode name, choose from STANDARD_MODE
-
-    Example:
-        get_standard_mode(spatial_dims = 2)
-
-    """
-    if spatial_dims == 2:
-        return STANDARD_MODE[0]
-    elif spatial_dims == 3:
-        return STANDARD_MODE[1]
-    else:
-        raise ValueError(f"Images should have 2 or 3 dimensions, got {spatial_dims}")
-
-
-def check_box_mode(bbox: NdarrayOrTensor, mode: Union[str, None] = None):
-    """
-    This function checks whether the bbox is valid.
-    It ensures the box size is non-negative.
-    Args:
-        bbox: bounding box, Nx4 or Nx6 torch tensor or ndarray
-        mode: box mode, choose from SUPPORTED_MODE. If mode is not given, this func will assume mode is STANDARD_MODE
-    Returns:
-        raise Error is mode is not supported
-        return False if box has negative size
-        return True if no issue found
-
-    Example:
-        bbox = torch.ones(10,6)
-        check_box_mode(bbox, mode="cccwhd")
-    """
-    if mode is None:
-        mode = get_standard_mode(int(bbox.shape[1] / 2))
-    mode = look_up_option(mode, supported=SUPPORTED_MODE)
-    spatial_dims = get_dimension(bbox=bbox, mode=mode)
-
-    # we need box size to be non-negative
-    if mode in ["ccwh", "cccwhd", "xywh", "xyzwhd"]:
-        box_error = bbox[:, spatial_dims] < 0
-        for axis in range(1, spatial_dims):
-            box_error = box_error | (bbox[:, spatial_dims + axis] < 0)
-    elif mode in ["xxyy", "xxyyzz"]:
-        box_error = bbox[:, 1] < bbox[:, 0]
-        for axis in range(1, spatial_dims):
-            box_error = box_error | (bbox[:, 2 * axis + 1] < bbox[:, 2 * axis])
-    elif mode in ["xyxy", "xyzxyz"]:
-        box_error = bbox[:, spatial_dims] < bbox[:, 0]
-        for axis in range(1, spatial_dims):
-            box_error = box_error | (bbox[:, spatial_dims + axis] < bbox[:, axis])
-    else:
-        raise ValueError(f"Box mode {mode} not in {SUPPORTED_MODE}.")
-
-    if box_error.sum() > 0:
-        return False
-
-    return True
-
-
-def split_into_corners(bbox: NdarrayOrTensor, mode: Union[str, None] = None):
-    """
-    This internal function outputs the corner coordinates of the bbox
-    Args:
-        bbox: bounding box, Nx4 or Nx6 torch tensor or ndarray
-        mode: box mode, choose from SUPPORTED_MODE. If mode is not given, this func will assume mode is STANDARD_MODE
-    Returns:
-        if 2D image, outputs (xmin, xmax, ymin, ymax)
-        if 3D images, outputs (xmin, xmax, ymin, ymax, zmin, zmax)
-        xmin for example, is a Nx1 tensor
-
-    Example:
-        bbox = torch.ones(10,6)
-        split_into_corners(bbox, mode="cccwhd")
-    """
-    # convert numpy to tensor if needed
-    bbox_t, *_ = convert_data_type(bbox, torch.Tensor)
-
-    # convert to float32 when computing torch.clamp, which does not support float16
-    box_dtype = bbox_t.dtype
-    compute_dtype = torch.float32
-
-    if mode is None:
-        mode = get_standard_mode(int(bbox_t.shape[1] / 2))
-    mode = look_up_option(mode, supported=SUPPORTED_MODE)
-
-    # split tensor into corners
-    if mode in ["xxyy", "xxyyzz"]:
-        split_result = bbox_t.split(1, dim=-1)
-    elif mode == "xyzxyz":
-        xmin, ymin, zmin, xmax, ymax, zmax = bbox_t.split(1, dim=-1)
-        split_result = (xmin, xmax, ymin, ymax, zmin, zmax)
-    elif mode == "xyxy":
-        xmin, ymin, xmax, ymax = bbox_t.split(1, dim=-1)
-        split_result = (xmin, xmax, ymin, ymax)
-    elif mode == "xyzwhd":
-        xmin, ymin, zmin, w, h, d = bbox_t.split(1, dim=-1)
-        split_result = (
-            xmin,
-            xmin + (w - TO_REMOVE).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype),
-            ymin,
-            ymin + (h - TO_REMOVE).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype),
-            zmin,
-            zmin + (d - TO_REMOVE).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype),
-        )
-    elif mode == "xywh":
-        xmin, ymin, w, h = bbox_t.split(1, dim=-1)
-        split_result = (xmin, xmin + (w - TO_REMOVE).clamp(min=0), ymin, ymin + (h - TO_REMOVE).clamp(min=0))
-    elif mode == "cccwhd":
-        xc, yc, zc, w, h, d = bbox_t.split(1, dim=-1)
-        split_result = (
-            xc - ((w - TO_REMOVE) / 2.0).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype),
-            xc + ((w - TO_REMOVE) / 2.0).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype),
-            yc - ((h - TO_REMOVE) / 2.0).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype),
-            yc + ((h - TO_REMOVE) / 2.0).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype),
-            zc - ((d - TO_REMOVE) / 2.0).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype),
-            zc + ((d - TO_REMOVE) / 2.0).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype),
-        )
-    elif mode == "ccwh":
-        xc, yc, w, h = bbox_t.split(1, dim=-1)
-        split_result = (
-            xc - ((w - TO_REMOVE) / 2.0).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype),
-            xc + ((w - TO_REMOVE) / 2.0).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype),
-            yc - ((h - TO_REMOVE) / 2.0).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype),
-            yc + ((h - TO_REMOVE) / 2.0).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype),
-        )
-    else:
-        raise RuntimeError("Should not be here")
-
-    # convert tensor back to numpy if needed
-    split_result, *_ = convert_to_dst_type(src=split_result, dst=bbox)
-    return split_result
+# def get_dimension(
+#     boxes: Union[torch.Tensor, np.ndarray, None] = None,
+#     image_size: Union[Sequence[int], torch.Tensor, np.ndarray, None] = None,
+#     mode: Union[str, None] = None,
+# ) -> int:
+#     """
+#     Get spatial dimension for the giving setting.
+#     Missing input is allowed. But at least one of the input value should be given.
+#     It raises ValueError if the dimensions of multiple inputs do not match with each other.
+#     Args:
+#         boxes: bounding box, Nx4 or Nx6 torch tensor or ndarray
+#         image_size: Length of 2 or 3. Data format is list, or np.ndarray, or tensor of int
+#         mode: box mode, choose from SUPPORTED_MODE. If mode is not given, this func will assume mode is STANDARD_MODE
+#     Returns:
+#         spatial_dimension: 2 or 3
+
+#     Example:
+#         boxes = torch.ones(10,6)
+#         get_dimension(boxes, mode="xyxy") will raise ValueError
+#         get_dimension(boxes, mode="xyzxyz") will return 3
+#         get_dimension(boxes, mode="xyzxyz", image_size=[100,200,200]) will return 3
+#         get_dimension(mode="xyzxyz") will return 3
+#     """
+#     spatial_dims_set = set()
+#     if image_size is not None:
+#         spatial_dims_set.add(len(image_size))
+#     if mode is not None:
+#         spatial_dims_set.add(int(len(mode) / 2))
+#     if boxes is not None:
+#         spatial_dims_set.add(int(boxes.shape[1] / 2))
+#     spatial_dims_list = list(spatial_dims_set)
+#     if len(spatial_dims_list) == 0:
+#         raise ValueError("At least one of boxes, image_size, and mode needs to be non-empty.")
+#     elif len(spatial_dims_list) == 1:
+#         spatial_dims = int(spatial_dims_list[0])
+#         spatial_dims = look_up_option(spatial_dims, supported=[2, 3])
+#         return int(spatial_dims)
+#     else:
+#         raise ValueError("The dimension of boxes, image_size, mode should match with each other.")
+
+
+# def check_box_mode(boxes: NdarrayOrTensor, mode: Union[str, None] = None):
+#     """
+#     This function checks whether the boxes is valid.
+#     It ensures the box size is non-negative.
+#     Args:
+#         boxes: bounding box, Nx4 or Nx6 torch tensor or ndarray
+#         mode: box mode, choose from SUPPORTED_MODE. If mode is not given, this func will assume mode is STANDARD_MODE
+#     Returns:
+#         raise Error is mode is not supported
+#         return False if box has negative size
+#         return True if no issue found
+
+#     Example:
+#         boxes = torch.ones(10,6)
+#         check_box_mode(boxes, mode="cccwhd")
+#     """
+#     if mode is None:
+#         mode = get_standard_mode(int(boxes.shape[1] / 2))
+#     mode = look_up_option(mode, supported=SUPPORTED_MODE)
+#     spatial_dims = get_dimension(boxes=boxes, mode=mode)
+
+#     # we need box size to be non-negative
+#     if mode in ["ccwh", "cccwhd", "xywh", "xyzwhd"]:
+#         box_error = boxes[:, spatial_dims] < 0
+#         for axis in range(1, spatial_dims):
+#             box_error = box_error | (boxes[:, spatial_dims + axis] < 0)
+#     elif mode in ["xxyy", "xxyyzz"]:
+#         box_error = boxes[:, 1] < boxes[:, 0]
+#         for axis in range(1, spatial_dims):
+#             box_error = box_error | (boxes[:, 2 * axis + 1] < boxes[:, 2 * axis])
+#     elif mode in ["xyxy", "xyzxyz"]:
+#         box_error = boxes[:, spatial_dims] < boxes[:, 0]
+#         for axis in range(1, spatial_dims):
+#             box_error = box_error | (boxes[:, spatial_dims + axis] < boxes[:, axis])
+#     else:
+#         raise ValueError(f"Box mode {mode} not in {SUPPORTED_MODE}.")
+
+#     if box_error.sum() > 0:
+#         return False
+
+#     return True
 
 
 def box_convert_mode(
-    bbox: NdarrayOrTensor, src_mode: Union[str, None] = None, dst_mode: Union[str, None] = None
+    boxes: NdarrayOrTensor, src_mode: Union[str, BoxMode, None] = None, dst_mode: Union[str, BoxMode, None] = None
 ) -> NdarrayOrTensor:
     """
-    This function converts the bbox in src_mode to the dst_mode
+    This function converts the boxes in src_mode to the dst_mode
     Args:
-        bbox: source bounding box, Nx4 or Nx6 torch tensor or ndarray
-        src_mode: source box mode, choose from SUPPORTED_MODE. If mode is not given, this func will assume mode is STANDARD_MODE
-        dst_mode: target box mode, choose from SUPPORTED_MODE. If mode is not given, this func will assume mode is STANDARD_MODE
+        boxes: source bounding box, Nx4 or Nx6 torch tensor or ndarray
+        src_mode: source box mode. If mode is not given, this func will assume mode is StandardMode()
+        dst_mode: target box mode. If mode is not given, this func will assume mode is StandardMode()
     Returns:
-        bbox_dst: bounding box with target mode, does not share memory with original bbox
+        boxes_dst: bounding box with target mode, does not share memory with original boxes
 
     Example:
-        bbox = torch.ones(10,6)
-        box_convert_mode(bbox=bbox, src_mode="xyzxyz", dst_mode="cccwhd")
+        boxes = torch.ones(10,6)
+        box_convert_mode(boxes=boxes, src_mode="xyzxyz", dst_mode="cccwhd")
     """
 
-    if not check_box_mode(bbox, src_mode):
-        raise ValueError("Given bbox has invalid values. The box size must be non-negative.")
+    # if not check_box_mode(boxes, src_mode):
+    #     raise ValueError("Given boxes has invalid values. The box size must be non-negative.")
 
-    # convert numpy to tensor if needed
-    bbox_t, *_ = convert_data_type(bbox, torch.Tensor)
-
-    # check whether the bbox_t and the new mode is valid
-    if src_mode is None:
-        src_mode = get_standard_mode(int(bbox_t.shape[1] / 2))
-    if dst_mode is None:
-        dst_mode = get_standard_mode(int(bbox_t.shape[1] / 2))
-    src_mode = look_up_option(src_mode, supported=SUPPORTED_MODE)
-    dst_mode = look_up_option(dst_mode, supported=SUPPORTED_MODE)
-
-    spatial_dims = get_dimension(bbox=bbox_t, mode=src_mode)
-    if len(src_mode) != len(dst_mode):
-        raise ValueError("The dimension of the new mode should have the same spatial dimension as the old mode.")
+    
 
     # if mode not changed, return original box
-    if src_mode == dst_mode:
-        bbox_t_dst = deepcopy(bbox_t)
-    # convert mode for bbox_t
-    elif dst_mode in ["xxyy", "xxyyzz"]:
-        corners = split_into_corners(bbox_t, src_mode)
-        bbox_t_dst = torch.cat(corners, dim=-1)
+    src_boxmode = get_boxmode(src_mode)
+    dst_boxmode = get_boxmode(dst_mode)
+    if type(src_boxmode) is type(dst_boxmode):
+        return deepcopy(boxes)
+    # convert mode
     else:
-        if spatial_dims == 3:
-            xmin, xmax, ymin, ymax, zmin, zmax = split_into_corners(bbox_t, src_mode)
-            if dst_mode == "xyzxyz":
-                bbox_t_dst = torch.cat((xmin, ymin, zmin, xmax, ymax, zmax), dim=-1)
-            elif dst_mode == "xyzwhd":
-                bbox_t_dst = torch.cat(
-                    (xmin, ymin, zmin, xmax - xmin + TO_REMOVE, ymax - ymin + TO_REMOVE, zmax - zmin + TO_REMOVE),
-                    dim=-1,
-                )
-            elif dst_mode == "cccwhd":
-                bbox_t_dst = torch.cat(
-                    (
-                        (xmin + xmax + TO_REMOVE) / 2.0,
-                        (ymin + ymax + TO_REMOVE) / 2.0,
-                        (zmin + zmax + TO_REMOVE) / 2.0,
-                        xmax - xmin + TO_REMOVE,
-                        ymax - ymin + TO_REMOVE,
-                        zmax - zmin + TO_REMOVE,
-                    ),
-                    dim=-1,
-                )
-            else:
-                raise ValueError("We support only bbox mode in " + str(SUPPORTED_MODE) + f", got {dst_mode}")
-        elif spatial_dims == 2:
-            xmin, xmax, ymin, ymax = split_into_corners(bbox_t.clone(), src_mode)
-            if dst_mode == "xyxy":
-                bbox_t_dst = torch.cat((xmin, ymin, xmax, ymax), dim=-1)
-            elif dst_mode == "xywh":
-                bbox_t_dst = torch.cat((xmin, ymin, xmax - xmin + TO_REMOVE, ymax - ymin + TO_REMOVE), dim=-1)
-            elif dst_mode == "ccwh":
-                bbox_t_dst = torch.cat(
-                    (
-                        (xmin + xmax + TO_REMOVE) / 2.0,
-                        (ymin + ymax + TO_REMOVE) / 2.0,
-                        xmax - xmin + TO_REMOVE,
-                        ymax - ymin + TO_REMOVE,
-                    ),
-                    dim=-1,
-                )
-            else:
-                raise ValueError("We support only bbox mode in " + str(SUPPORTED_MODE) + f", got {dst_mode}")
-        else:
-            raise ValueError(f"Images should have 2 or 3 dimensions, got {spatial_dims}")
-
-    # convert tensor back to numpy if needed
-    bbox_dst, *_ = convert_to_dst_type(src=bbox_t_dst, dst=bbox)
+        # convert numpy to tensor if needed
+        boxes_t, *_ = convert_data_type(boxes, torch.Tensor)
 
-    return bbox_dst
+        corners = src_boxmode.box_to_corner(boxes_t)
+        boxes_t_dst = dst_boxmode.corner_to_box(corners)
+    
+        # convert tensor back to numpy if needed
+        boxes_dst, *_ = convert_to_dst_type(src=boxes_t_dst, dst=boxes)
+        return boxes_dst
 
 
-def box_convert_standard_mode(bbox: NdarrayOrTensor, mode: Union[str, None] = None) -> NdarrayOrTensor:
+def box_convert_standard_mode(boxes: NdarrayOrTensor, mode: Union[str, BoxMode, None] = None) -> NdarrayOrTensor:
     """
-    Convert given bbox to standard mode
+    Convert given boxes to standard mode
     Args:
-        bbox: source bounding box, Nx4 or Nx6 torch tensor or ndarray
-        mode: source box mode, choose from SUPPORTED_MODE. If mode is not given, this func will assume mode is STANDARD_MODE
+        boxes: source bounding box, Nx4 or Nx6 torch tensor or ndarray
+        mode: source box mode. If mode is not given, this func will assume mode is StandardMode()
     Returns:
-        bbox_standard: bounding box with standard mode, does not share memory with original bbox
+        boxes_standard: bounding box with standard mode, does not share memory with original boxes
 
     Example:
-        bbox = torch.ones(10,6)
-        box_convert_standard_mode(bbox=bbox, mode="xxyyzz")
+        boxes = torch.ones(10,6)
+        box_convert_standard_mode(boxes=boxes, mode="xxyyzz")
     """
-    if mode is None:
-        mode = get_standard_mode(int(bbox.shape[1] / 2))
-    mode = look_up_option(mode, supported=SUPPORTED_MODE)
-    spatial_dims = get_dimension(bbox=bbox, mode=mode)
-    mode_standard = get_standard_mode(spatial_dims)
-    return box_convert_mode(bbox=bbox, src_mode=mode, dst_mode=mode_standard)
+    return box_convert_mode(boxes=boxes, src_mode=mode, dst_mode=StandardMode())
diff --git a/tests/test_box_utils.py b/tests/test_box_utils.py
index 7a0f1f6fdb..679ad76d6a 100644
--- a/tests/test_box_utils.py
+++ b/tests/test_box_utils.py
@@ -28,11 +28,11 @@
 
 TESTS = []
 for p in TEST_NDARRAYS:
-    bbox = [[0, 0, 0, 0, 0, 0], [0, 1, 0, 2, 2, 3], [0, 1, 1, 2, 2, 3]]
+    boxes = [[0, 0, 0, 0, 0, 0], [0, 1, 0, 2, 2, 3], [0, 1, 1, 2, 2, 3]]
     image_size = [4, 4, 4]
     TESTS.append(
         [
-            {"bbox": p(bbox), "image_size": image_size, "mode": "xyzwhd", "half": False},
+            {"boxes": p(boxes), "image_size": image_size, "mode": "xyzwhd", "half": False},
             "xyzwhd",
             p([[0, 0, 0, 0, 0, 0], [0, 1, 0, 2, 2, 3], [0, 1, 1, 2, 2, 3]]),
             p([0, 12, 12]),
@@ -40,7 +40,7 @@
     )
     TESTS.append(
         [
-            {"bbox": p(bbox), "image_size": image_size, "mode": "xyzwhd", "half": True},
+            {"boxes": p(boxes), "image_size": image_size, "mode": "xyzwhd", "half": True},
             "xyzxyz",
             p([[0, 0, 0, 0, 0, 0], [0, 1, 0, 2, 3, 3], [0, 1, 1, 2, 3, 4]]),
             p([0, 12, 12]),
@@ -48,7 +48,7 @@
     )
     TESTS.append(
         [
-            {"bbox": p(bbox), "image_size": image_size, "mode": "xyzwhd", "half": False},
+            {"boxes": p(boxes), "image_size": image_size, "mode": "xyzwhd", "half": False},
             "xxyyzz",
             p([[0, 0, 0, 0, 0, 0], [0, 2, 1, 3, 0, 3], [0, 2, 1, 3, 1, 4]]),
             p([0, 12, 12]),
@@ -56,7 +56,7 @@
     )
     TESTS.append(
         [
-            {"bbox": p(bbox), "image_size": image_size, "mode": "xyzxyz", "half": False},
+            {"boxes": p(boxes), "image_size": image_size, "mode": "xyzxyz", "half": False},
             "xyzwhd",
             p([[0, 0, 0, 0, 0, 0], [0, 1, 0, 2, 1, 3], [0, 1, 1, 2, 1, 2]]),
             p([0, 6, 4]),
@@ -64,7 +64,7 @@
     )
     TESTS.append(
         [
-            {"bbox": p(bbox), "image_size": image_size, "mode": "xyzxyz", "half": True},
+            {"boxes": p(boxes), "image_size": image_size, "mode": "xyzxyz", "half": True},
             "xyzxyz",
             p([[0, 0, 0, 0, 0, 0], [0, 1, 0, 2, 2, 3], [0, 1, 1, 2, 2, 3]]),
             p([0, 6, 4]),
@@ -72,7 +72,7 @@
     )
     TESTS.append(
         [
-            {"bbox": p(bbox), "image_size": image_size, "mode": "xyzxyz", "half": False},
+            {"boxes": p(boxes), "image_size": image_size, "mode": "xyzxyz", "half": False},
             "xxyyzz",
             p([[0, 0, 0, 0, 0, 0], [0, 2, 1, 2, 0, 3], [0, 2, 1, 2, 1, 3]]),
             p([0, 6, 4]),
@@ -80,7 +80,7 @@
     )
     TESTS.append(
         [
-            {"bbox": p(bbox), "image_size": image_size, "mode": "xxyyzz", "half": False},
+            {"boxes": p(boxes), "image_size": image_size, "mode": "xxyyzz", "half": False},
             "xxyyzz",
             p([[0, 0, 0, 0, 0, 0], [0, 1, 0, 2, 2, 3], [0, 1, 1, 2, 2, 3]]),
             p([0, 2, 1]),
@@ -88,7 +88,7 @@
     )
     TESTS.append(
         [
-            {"bbox": p(bbox), "image_size": image_size, "mode": "xxyyzz", "half": True},
+            {"boxes": p(boxes), "image_size": image_size, "mode": "xxyyzz", "half": True},
             "xyzxyz",
             p([[0, 0, 0, 0, 0, 0], [0, 0, 2, 1, 2, 3], [0, 1, 2, 1, 2, 3]]),
             p([0, 2, 1]),
@@ -96,7 +96,7 @@
     )
     TESTS.append(
         [
-            {"bbox": p(bbox), "image_size": image_size, "mode": "xxyyzz", "half": False},
+            {"boxes": p(boxes), "image_size": image_size, "mode": "xxyyzz", "half": False},
             "xyzwhd",
             p([[0, 0, 0, 0, 0, 0], [0, 0, 2, 1, 2, 1], [0, 1, 2, 1, 1, 1]]),
             p([0, 2, 1]),
@@ -107,25 +107,26 @@
 class TestCreateBoxList(unittest.TestCase):
     @parameterized.expand(TESTS)
     def test_value(self, input_data, mode2, expected_box, expected_area):
-        bbox1 = convert_data_type(input_data["bbox"], dtype=np.float32)[0]
+        expected_box = convert_data_type(expected_box, dtype=np.float32)[0]
+        boxes1 = convert_data_type(input_data["boxes"], dtype=np.float32)[0]
         mode1 = input_data["mode"]
         # image_size = input_data["image_size"]
         half_bool = input_data["half"]
 
         # test float16
         if half_bool:
-            bbox1 = convert_data_type(bbox1, dtype=np.float16)[0]
+            boxes1 = convert_data_type(boxes1, dtype=np.float16)[0]
             expected_box = convert_data_type(expected_box, dtype=np.float16)[0]
 
         # test box_convert_mode, box_convert_standard_mode
-        result2 = box_convert_mode(bbox=bbox1, src_mode=mode1, dst_mode=mode2)
+        result2 = box_convert_mode(boxes=boxes1, src_mode=mode1, dst_mode=mode2)
         assert_allclose(result2, expected_box, type_test=True, device_test=True, atol=0.0)
 
-        result1 = box_convert_mode(bbox=result2, src_mode=mode2, dst_mode=mode1)
-        assert_allclose(result1, bbox1, type_test=True, device_test=True, atol=0.0)
+        result1 = box_convert_mode(boxes=result2, src_mode=mode2, dst_mode=mode1)
+        assert_allclose(result1, boxes1, type_test=True, device_test=True, atol=0.0)
 
-        result_standard = box_convert_standard_mode(bbox=bbox1, mode=mode1)
-        expected_box_standard = box_convert_standard_mode(bbox=expected_box, mode=mode2)
+        result_standard = box_convert_standard_mode(boxes=boxes1, mode=mode1)
+        expected_box_standard = box_convert_standard_mode(boxes=expected_box, mode=mode2)
         assert_allclose(result_standard, expected_box_standard, type_test=True, device_test=True, atol=0.0)
 
 

From a149f5408d6cba9a3f2dd5f09d92718acc266650 Mon Sep 17 00:00:00 2001
From: Can Zhao <canz@nvidia.com>
Date: Mon, 16 May 2022 04:29:12 -0400
Subject: [PATCH 14/49] reformat

Signed-off-by: Can Zhao <canz@nvidia.com>
---
 monai/data/box_mode.py  | 64 +++++++++++++++++++++++------------------
 monai/data/box_utils.py | 44 ++++++++++++++--------------
 2 files changed, 57 insertions(+), 51 deletions(-)

diff --git a/monai/data/box_mode.py b/monai/data/box_mode.py
index c676497216..5fbbab9ea7 100644
--- a/monai/data/box_mode.py
+++ b/monai/data/box_mode.py
@@ -9,9 +9,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from copy import deepcopy
-from typing import Sequence, Union, Tuple
 from abc import ABC, abstractmethod
+from copy import deepcopy
+from typing import Sequence, Tuple, Union
 
 import numpy as np
 import torch
@@ -25,10 +25,11 @@
 # TO_REMOVE = 1  if in 'xxyy','xxyyzz' mode, the bottom-right corner is included in the box,
 #       i.e., when xmin=1, xmax=2, we have w = 2
 # Currently, only `TO_REMOVE = 0.` is supported
-TO_REMOVE = 0.  # xmax-xmin = w -TO_REMOVE.
+TO_REMOVE = 0.0  # xmax-xmin = w -TO_REMOVE.
+
 
 class BoxMode:
-    def __int__(self,):
+    def __int__(self):
         # The mapping that maps spatial dimension to mode string name
         self.dim_to_str_mapping = {2: None, 3: None}
 
@@ -55,9 +56,11 @@ def get_dim_from_boxes(self, boxes: torch.Tensor) -> int:
             boxes = torch.ones(10,6)
             boxmode.get_dim_from_boxes(boxes) will return 3
         """
-        if int(boxes.shape[1]) not in [4,6]:
-            raise ValueError(f"Currently we support only boxes with shape [N,4] or [N,6], got boxes with shape {boxes.shape}.")
-        spatial_dims = int(boxes.shape[1]//2)
+        if int(boxes.shape[1]) not in [4, 6]:
+            raise ValueError(
+                f"Currently we support only boxes with shape [N,4] or [N,6], got boxes with shape {boxes.shape}."
+            )
+        spatial_dims = int(boxes.shape[1] // 2)
         return spatial_dims
 
     def get_dim_from_corner(self, c: Sequence) -> int:
@@ -72,9 +75,11 @@ def get_dim_from_corner(self, c: Sequence) -> int:
             c = (torch.ones(10,1), torch.ones(10,1), torch.ones(10,1), torch.ones(10,1))
             boxmode.get_dim_from_corner(c) will return 2
         """
-        if len(c) not in [4,6]:
-            raise ValueError(f"Currently we support only boxes with shape [N,4] or [N,6], got box corner tuple with length {len(c)}.")
-        spatial_dims = int(len(c)//2)
+        if len(c) not in [4, 6]:
+            raise ValueError(
+                f"Currently we support only boxes with shape [N,4] or [N,6], got box corner tuple with length {len(c)}."
+            )
+        spatial_dims = int(len(c) // 2)
         return spatial_dims
 
     def check_corner(self, c: Sequence) -> bool:
@@ -90,7 +95,7 @@ def check_corner(self, c: Sequence) -> bool:
             boxmode.check_corner(c) will return True
         """
         spatial_dims = self.get_dim_from_corner(c)
-        box_error = c[spatial_dims] < c[ 0]
+        box_error = c[spatial_dims] < c[0]
         for axis in range(1, spatial_dims):
             box_error = box_error | (c[spatial_dims + axis] < c[axis])
         if box_error.sum() > 0:
@@ -133,10 +138,11 @@ class CornerCornerMode_TypeA(BoxMode):
     Also represented as "xyxy" or "xyzxyz"
     [xmin, ymin, xmax, ymax] or [xmin, ymin, zmin, xmax, ymax, zmax]
     """
-    def __int__(self,):
+
+    def __int__(self):
         self.dim_to_str_mapping = {2: "xyxy", 3: "xyzxyx"}
 
-    def box_to_corner(self, boxes: torch.Tensor) -> Tuple:        
+    def box_to_corner(self, boxes: torch.Tensor) -> Tuple:
         spatial_dims = self.get_dim_from_boxes(boxes)
         if spatial_dims == 3:
             xmin, ymin, zmin, xmax, ymax, zmax = boxes.split(1, dim=-1)
@@ -156,12 +162,14 @@ def corner_to_box(self, c: Sequence) -> torch.Tensor:
         if spatial_dims == 2:
             return torch.cat((c[0], c[1], c[2], c[3]), dim=-1)
 
+
 class CornerCornerMode_TypeB(BoxMode):
     """
     Also represented as "xxyy" or "xxyyzz"
     [xmin, xmax, ymin, ymax] or [xmin, xmax, ymin, ymax, zmin, zmax]
     """
-    def __int__(self,):
+
+    def __int__(self):
         self.dim_to_str_mapping = {2: "xxyy", 3: "xxyyzz"}
 
     def box_to_corner(self, boxes: torch.Tensor) -> Tuple:
@@ -184,13 +192,15 @@ def corner_to_box(self, c: Sequence) -> torch.Tensor:
         if spatial_dims == 2:
             return torch.cat((c[0], c[2], c[1], c[3]), dim=-1)
 
+
 class CornerCornerMode_TypeC(BoxMode):
     """
     Also represented as "xyxy" or "xyxyzz"
     [xmin, ymin, xmax, ymax] or [xmin, ymin, xmax, ymax, zmin, zmax]
     """
-    def __int__(self,):
-        self.dim_to_str_mapping = {2: "xyxy", 3: "xyxyzz"}        
+
+    def __int__(self):
+        self.dim_to_str_mapping = {2: "xyxy", 3: "xyxyzz"}
 
     def box_to_corner(self, boxes: torch.Tensor) -> Tuple:
         spatial_dims = self.get_dim_from_boxes(boxes)
@@ -212,12 +222,14 @@ def corner_to_box(self, c: Sequence) -> torch.Tensor:
         if spatial_dims == 2:
             return torch.cat((c[0], c[1], c[2], c[3]), dim=-1)
 
+
 class CornerSizeMode(BoxMode):
     """
     Also represented as "xywh" or "xyzwhd"
     [xmin, ymin, xsize, ysize] or [xmin, ymin, zmin, xsize, ysize, zsize]
     """
-    def __int__(self,):
+
+    def __int__(self):
         self.dim_to_str_mapping = {2: "xywh", 3: "xyzwhd"}
 
     def box_to_corner(self, boxes: torch.Tensor) -> Tuple:
@@ -242,21 +254,16 @@ def box_to_corner(self, boxes: torch.Tensor) -> Tuple:
         else:
             raise ValueError("Given boxes has invalid values. The box size must be non-negative.")
 
-
     def corner_to_box(self, c: Sequence) -> torch.Tensor:
         spatial_dims = self.get_dim_from_corner(c)
         if spatial_dims == 3:
             xmin, ymin, zmin, xmax, ymax, zmax = c[0], c[1], c[2], c[3], c[4], c[5]
             return torch.cat(
-                    (xmin, ymin, zmin, xmax - xmin + TO_REMOVE, ymax - ymin + TO_REMOVE, zmax - zmin + TO_REMOVE),
-                    dim=-1,
-                )
+                (xmin, ymin, zmin, xmax - xmin + TO_REMOVE, ymax - ymin + TO_REMOVE, zmax - zmin + TO_REMOVE), dim=-1
+            )
         if spatial_dims == 2:
             xmin, ymin, xmax, ymax = c[0], c[1], c[2], c[3]
-            return torch.cat(
-                    (xmin, ymin, xmax - xmin + TO_REMOVE, ymax - ymin + TO_REMOVE),
-                    dim=-1,
-                )
+            return torch.cat((xmin, ymin, xmax - xmin + TO_REMOVE, ymax - ymin + TO_REMOVE), dim=-1)
 
 
 class CenterSizeMode(BoxMode):
@@ -264,7 +271,8 @@ class CenterSizeMode(BoxMode):
     Also represented as "ccwh" or "cccwhd"
     [xmin, ymin, xsize, ysize] or [xmin, ymin, zmin, xsize, ysize, zsize]
     """
-    def __int__(self,):
+
+    def __int__(self):
         self.dim_to_str_mapping = {2: "ccwh", 3: "cccwhd"}
 
     def box_to_corner(self, boxes: torch.Tensor) -> Tuple:
@@ -295,7 +303,7 @@ def box_to_corner(self, boxes: torch.Tensor) -> Tuple:
             raise ValueError("Given boxes has invalid values. The box size must be non-negative.")
 
     def corner_to_box(self, c: Sequence) -> torch.Tensor:
-        spatial_dims = int(len(c)//2)
+        spatial_dims = int(len(c) // 2)
         if spatial_dims == 3:
             xmin, ymin, zmin, xmax, ymax, zmax = c[0], c[1], c[2], c[3], c[4], c[5]
             return torch.cat(
@@ -319,4 +327,4 @@ def corner_to_box(self, c: Sequence) -> torch.Tensor:
                     ymax - ymin + TO_REMOVE,
                 ),
                 dim=-1,
-            )
\ No newline at end of file
+            )
diff --git a/monai/data/box_utils.py b/monai/data/box_utils.py
index 2649316010..9b0c95b1e3 100644
--- a/monai/data/box_utils.py
+++ b/monai/data/box_utils.py
@@ -9,26 +9,25 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from copy import deepcopy
-from typing import Sequence, Union, Tuple
 from abc import ABC, abstractmethod
+from copy import deepcopy
+from typing import Sequence, Tuple, Union
 
 import numpy as np
 import torch
 
 from monai.config.type_definitions import NdarrayOrTensor
-from monai.utils.module import look_up_option
-from monai.utils.type_conversion import convert_data_type, convert_to_dst_type, convert_to_numpy, convert_to_tensor
 from monai.data import box_mode
 from monai.data.box_mode import (
     BoxMode,
-    CornerCornerMode_TypeA, 
-    CornerCornerMode_TypeB, 
-    CornerCornerMode_TypeC, 
-    CornerSizeMode, 
-    CenterSizeMode
-    )
-
+    CenterSizeMode,
+    CornerCornerMode_TypeA,
+    CornerCornerMode_TypeB,
+    CornerCornerMode_TypeC,
+    CornerSizeMode,
+)
+from monai.utils.module import look_up_option
+from monai.utils.type_conversion import convert_data_type, convert_to_dst_type, convert_to_numpy, convert_to_tensor
 
 # TO_REMOVE = 0 if in 'xxyy','xxyyzz' mode, the bottom-right corner is not included in the box,
 #      i.e., when xmin=1, xmax=2, we have w = 1
@@ -40,15 +39,15 @@
 # We support the conversion between several box modes, i.e., representation of a bounding box
 # BOXMODE_MAPPING maps string box mode to teh corresponding BoxMode class
 BOXMODE_MAPPING = {
-    "xyxy": CornerCornerMode_TypeA(), # [xmin, ymin, xmax, ymax]
-    "xyzxyz": CornerCornerMode_TypeA(), # [xmin, ymin, zmin, xmax, ymax, zmax]
-    "xxyy": CornerCornerMode_TypeB(), # [xmin, xmax, ymin, ymax]
-    "xxyyzz": CornerCornerMode_TypeB(), # [xmin, xmax, ymin, ymax, zmin, zmax]
-    "xyxyzz": CornerCornerMode_TypeC(), # [xmin, ymin, xmax, ymax, zmin, zmax]
-    "xywh": CornerSizeMode(), # [xmin, ymin, xsize, ysize]
-    "xyzwhd": CornerSizeMode(), # [xmin, ymin, zmin, xsize, ysize, zsize]
-    "ccwh": CenterSizeMode(), # [xcenter, ycenter, xsize, ysize]
-    "cccwhd": CenterSizeMode() # [xcenter, ycenter, zcenter, xsize, ysize, zsize]
+    "xyxy": CornerCornerMode_TypeA(),  # [xmin, ymin, xmax, ymax]
+    "xyzxyz": CornerCornerMode_TypeA(),  # [xmin, ymin, zmin, xmax, ymax, zmax]
+    "xxyy": CornerCornerMode_TypeB(),  # [xmin, xmax, ymin, ymax]
+    "xxyyzz": CornerCornerMode_TypeB(),  # [xmin, xmax, ymin, ymax, zmin, zmax]
+    "xyxyzz": CornerCornerMode_TypeC(),  # [xmin, ymin, xmax, ymax, zmin, zmax]
+    "xywh": CornerSizeMode(),  # [xmin, ymin, xsize, ysize]
+    "xyzwhd": CornerSizeMode(),  # [xmin, ymin, zmin, xsize, ysize, zsize]
+    "ccwh": CenterSizeMode(),  # [xcenter, ycenter, xsize, ysize]
+    "cccwhd": CenterSizeMode(),  # [xcenter, ycenter, zcenter, xsize, ysize, zsize]
 }
 # The standard box mode we use in all the box util functions
 StandardMode = CornerCornerMode_TypeA
@@ -64,6 +63,7 @@ def get_boxmode(mode: Union[str, BoxMode, None] = None) -> BoxMode:
     else:
         raise ValueError("mode has to be chosen from [str, BoxMode, None].")
 
+
 def convert_to_list(in_sequence: Union[Sequence, torch.Tensor, np.ndarray]) -> list:
     """
     Convert a torch.Tensor, or np array input to list
@@ -180,8 +180,6 @@ def box_convert_mode(
     # if not check_box_mode(boxes, src_mode):
     #     raise ValueError("Given boxes has invalid values. The box size must be non-negative.")
 
-    
-
     # if mode not changed, return original box
     src_boxmode = get_boxmode(src_mode)
     dst_boxmode = get_boxmode(dst_mode)
@@ -194,7 +192,7 @@ def box_convert_mode(
 
         corners = src_boxmode.box_to_corner(boxes_t)
         boxes_t_dst = dst_boxmode.corner_to_box(corners)
-    
+
         # convert tensor back to numpy if needed
         boxes_dst, *_ = convert_to_dst_type(src=boxes_t_dst, dst=boxes)
         return boxes_dst

From 9f5e3f8fbd7ae1258dade215b333cd4eab63b5ee Mon Sep 17 00:00:00 2001
From: Can Zhao <canz@nvidia.com>
Date: Mon, 16 May 2022 04:42:05 -0400
Subject: [PATCH 15/49] reformat

Signed-off-by: Can Zhao <canz@nvidia.com>
---
 monai/data/box_mode.py  |  10 +--
 monai/data/box_utils.py | 132 +++++++++++++---------------------------
 tests/test_box_utils.py |  12 ++--
 3 files changed, 49 insertions(+), 105 deletions(-)

diff --git a/monai/data/box_mode.py b/monai/data/box_mode.py
index 5fbbab9ea7..4307b52940 100644
--- a/monai/data/box_mode.py
+++ b/monai/data/box_mode.py
@@ -9,17 +9,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from abc import ABC, abstractmethod
-from copy import deepcopy
-from typing import Sequence, Tuple, Union
+from abc import abstractmethod
+from typing import Sequence, Tuple
 
-import numpy as np
 import torch
 
-from monai.config.type_definitions import NdarrayOrTensor
-from monai.utils.module import look_up_option
-from monai.utils.type_conversion import convert_data_type, convert_to_dst_type, convert_to_numpy, convert_to_tensor
-
 # TO_REMOVE = 0 if in 'xxyy','xxyyzz' mode, the bottom-right corner is not included in the box,
 #      i.e., when xmin=1, xmax=2, we have w = 1
 # TO_REMOVE = 1  if in 'xxyy','xxyyzz' mode, the bottom-right corner is included in the box,
diff --git a/monai/data/box_utils.py b/monai/data/box_utils.py
index 9b0c95b1e3..1b4006269a 100644
--- a/monai/data/box_utils.py
+++ b/monai/data/box_utils.py
@@ -9,9 +9,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from abc import ABC, abstractmethod
 from copy import deepcopy
-from typing import Sequence, Tuple, Union
+from typing import Sequence, Union
 
 import numpy as np
 import torch
@@ -26,8 +25,8 @@
     CornerCornerMode_TypeC,
     CornerSizeMode,
 )
-from monai.utils.module import look_up_option
-from monai.utils.type_conversion import convert_data_type, convert_to_dst_type, convert_to_numpy, convert_to_tensor
+from monai.utils import look_up_option
+from monai.utils.type_conversion import convert_data_type, convert_to_dst_type
 
 # TO_REMOVE = 0 if in 'xxyy','xxyyzz' mode, the bottom-right corner is not included in the box,
 #      i.e., when xmin=1, xmax=2, we have w = 1
@@ -76,91 +75,42 @@ def convert_to_list(in_sequence: Union[Sequence, torch.Tensor, np.ndarray]) -> l
     return in_sequence.tolist() if isinstance(in_sequence, (torch.Tensor, np.ndarray)) else list(in_sequence)
 
 
-# def get_dimension(
-#     boxes: Union[torch.Tensor, np.ndarray, None] = None,
-#     image_size: Union[Sequence[int], torch.Tensor, np.ndarray, None] = None,
-#     mode: Union[str, None] = None,
-# ) -> int:
-#     """
-#     Get spatial dimension for the giving setting.
-#     Missing input is allowed. But at least one of the input value should be given.
-#     It raises ValueError if the dimensions of multiple inputs do not match with each other.
-#     Args:
-#         boxes: bounding box, Nx4 or Nx6 torch tensor or ndarray
-#         image_size: Length of 2 or 3. Data format is list, or np.ndarray, or tensor of int
-#         mode: box mode, choose from SUPPORTED_MODE. If mode is not given, this func will assume mode is STANDARD_MODE
-#     Returns:
-#         spatial_dimension: 2 or 3
-
-#     Example:
-#         boxes = torch.ones(10,6)
-#         get_dimension(boxes, mode="xyxy") will raise ValueError
-#         get_dimension(boxes, mode="xyzxyz") will return 3
-#         get_dimension(boxes, mode="xyzxyz", image_size=[100,200,200]) will return 3
-#         get_dimension(mode="xyzxyz") will return 3
-#     """
-#     spatial_dims_set = set()
-#     if image_size is not None:
-#         spatial_dims_set.add(len(image_size))
-#     if mode is not None:
-#         spatial_dims_set.add(int(len(mode) / 2))
-#     if boxes is not None:
-#         spatial_dims_set.add(int(boxes.shape[1] / 2))
-#     spatial_dims_list = list(spatial_dims_set)
-#     if len(spatial_dims_list) == 0:
-#         raise ValueError("At least one of boxes, image_size, and mode needs to be non-empty.")
-#     elif len(spatial_dims_list) == 1:
-#         spatial_dims = int(spatial_dims_list[0])
-#         spatial_dims = look_up_option(spatial_dims, supported=[2, 3])
-#         return int(spatial_dims)
-#     else:
-#         raise ValueError("The dimension of boxes, image_size, mode should match with each other.")
-
-
-# def check_box_mode(boxes: NdarrayOrTensor, mode: Union[str, None] = None):
-#     """
-#     This function checks whether the boxes is valid.
-#     It ensures the box size is non-negative.
-#     Args:
-#         boxes: bounding box, Nx4 or Nx6 torch tensor or ndarray
-#         mode: box mode, choose from SUPPORTED_MODE. If mode is not given, this func will assume mode is STANDARD_MODE
-#     Returns:
-#         raise Error is mode is not supported
-#         return False if box has negative size
-#         return True if no issue found
-
-#     Example:
-#         boxes = torch.ones(10,6)
-#         check_box_mode(boxes, mode="cccwhd")
-#     """
-#     if mode is None:
-#         mode = get_standard_mode(int(boxes.shape[1] / 2))
-#     mode = look_up_option(mode, supported=SUPPORTED_MODE)
-#     spatial_dims = get_dimension(boxes=boxes, mode=mode)
-
-#     # we need box size to be non-negative
-#     if mode in ["ccwh", "cccwhd", "xywh", "xyzwhd"]:
-#         box_error = boxes[:, spatial_dims] < 0
-#         for axis in range(1, spatial_dims):
-#             box_error = box_error | (boxes[:, spatial_dims + axis] < 0)
-#     elif mode in ["xxyy", "xxyyzz"]:
-#         box_error = boxes[:, 1] < boxes[:, 0]
-#         for axis in range(1, spatial_dims):
-#             box_error = box_error | (boxes[:, 2 * axis + 1] < boxes[:, 2 * axis])
-#     elif mode in ["xyxy", "xyzxyz"]:
-#         box_error = boxes[:, spatial_dims] < boxes[:, 0]
-#         for axis in range(1, spatial_dims):
-#             box_error = box_error | (boxes[:, spatial_dims + axis] < boxes[:, axis])
-#     else:
-#         raise ValueError(f"Box mode {mode} not in {SUPPORTED_MODE}.")
-
-#     if box_error.sum() > 0:
-#         return False
-
-#     return True
-
-
-def box_convert_mode(
+def get_dimension(
+    boxes: Union[torch.Tensor, np.ndarray, None] = None,
+    image_size: Union[Sequence[int], torch.Tensor, np.ndarray, None] = None,
+) -> int:
+    """
+    Get spatial dimension for the giving setting.
+    Missing input is allowed. But at least one of the input value should be given.
+    It raises ValueError if the dimensions of multiple inputs do not match with each other.
+    Args:
+        boxes: bounding box, Nx4 or Nx6 torch tensor or ndarray
+        image_size: Length of 2 or 3. Data format is list, or np.ndarray, or tensor of int
+    Returns:
+        spatial_dimension: 2 or 3
+
+    Example:
+        boxes = torch.ones(10,6)
+        get_dimension(boxes, image_size=[100,200,200]) will return 3
+        get_dimension(boxes) will return 3
+    """
+    spatial_dims_set = set()
+    if image_size is not None:
+        spatial_dims_set.add(len(image_size))
+    if boxes is not None:
+        spatial_dims_set.add(int(boxes.shape[1] / 2))
+    spatial_dims_list = list(spatial_dims_set)
+    if len(spatial_dims_list) == 0:
+        raise ValueError("At least one of boxes, image_size, and mode needs to be non-empty.")
+    elif len(spatial_dims_list) == 1:
+        spatial_dims = int(spatial_dims_list[0])
+        spatial_dims = look_up_option(spatial_dims, supported=[2, 3])
+        return int(spatial_dims)
+    else:
+        raise ValueError("The dimension of boxes, image_size, mode should match with each other.")
+
+
+def convert_box_mode(
     boxes: NdarrayOrTensor, src_mode: Union[str, BoxMode, None] = None, dst_mode: Union[str, BoxMode, None] = None
 ) -> NdarrayOrTensor:
     """
@@ -198,7 +148,7 @@ def box_convert_mode(
         return boxes_dst
 
 
-def box_convert_standard_mode(boxes: NdarrayOrTensor, mode: Union[str, BoxMode, None] = None) -> NdarrayOrTensor:
+def convert_box_to_standard_mode(boxes: NdarrayOrTensor, mode: Union[str, BoxMode, None] = None) -> NdarrayOrTensor:
     """
     Convert given boxes to standard mode
     Args:
@@ -211,4 +161,4 @@ def box_convert_standard_mode(boxes: NdarrayOrTensor, mode: Union[str, BoxMode,
         boxes = torch.ones(10,6)
         box_convert_standard_mode(boxes=boxes, mode="xxyyzz")
     """
-    return box_convert_mode(boxes=boxes, src_mode=mode, dst_mode=StandardMode())
+    return convert_box_mode(boxes=boxes, src_mode=mode, dst_mode=StandardMode())
diff --git a/tests/test_box_utils.py b/tests/test_box_utils.py
index 679ad76d6a..768efc14a5 100644
--- a/tests/test_box_utils.py
+++ b/tests/test_box_utils.py
@@ -17,7 +17,7 @@
 # import torch
 from parameterized import parameterized
 
-from monai.data.box_utils import box_convert_mode, box_convert_standard_mode
+from monai.data.box_utils import convert_box_mode, convert_box_to_standard_mode
 from monai.utils.type_conversion import convert_data_type
 from tests.utils import TEST_NDARRAYS, assert_allclose
 
@@ -118,15 +118,15 @@ def test_value(self, input_data, mode2, expected_box, expected_area):
             boxes1 = convert_data_type(boxes1, dtype=np.float16)[0]
             expected_box = convert_data_type(expected_box, dtype=np.float16)[0]
 
-        # test box_convert_mode, box_convert_standard_mode
-        result2 = box_convert_mode(boxes=boxes1, src_mode=mode1, dst_mode=mode2)
+        # test convert_box_mode, convert_box_to_standard_mode
+        result2 = convert_box_mode(boxes=boxes1, src_mode=mode1, dst_mode=mode2)
         assert_allclose(result2, expected_box, type_test=True, device_test=True, atol=0.0)
 
-        result1 = box_convert_mode(boxes=result2, src_mode=mode2, dst_mode=mode1)
+        result1 = convert_box_mode(boxes=result2, src_mode=mode2, dst_mode=mode1)
         assert_allclose(result1, boxes1, type_test=True, device_test=True, atol=0.0)
 
-        result_standard = box_convert_standard_mode(boxes=boxes1, mode=mode1)
-        expected_box_standard = box_convert_standard_mode(boxes=expected_box, mode=mode2)
+        result_standard = convert_box_to_standard_mode(boxes=boxes1, mode=mode1)
+        expected_box_standard = convert_box_to_standard_mode(boxes=expected_box, mode=mode2)
         assert_allclose(result_standard, expected_box_standard, type_test=True, device_test=True, atol=0.0)
 
 

From 2b10daf41ea7b58df9e3724418d3458bc70eb850 Mon Sep 17 00:00:00 2001
From: Can Zhao <canz@nvidia.com>
Date: Mon, 16 May 2022 04:56:51 -0400
Subject: [PATCH 16/49] add enum

Signed-off-by: Can Zhao <canz@nvidia.com>
---
 monai/data/box_mode.py  | 11 ++++++-----
 monai/data/box_utils.py | 19 ++++++++++---------
 monai/utils/enums.py    | 11 +++++++++++
 3 files changed, 27 insertions(+), 14 deletions(-)

diff --git a/monai/data/box_mode.py b/monai/data/box_mode.py
index 4307b52940..84d48798de 100644
--- a/monai/data/box_mode.py
+++ b/monai/data/box_mode.py
@@ -13,6 +13,7 @@
 from typing import Sequence, Tuple
 
 import torch
+from monai.utils.enums import BoundingBoxMode
 
 # TO_REMOVE = 0 if in 'xxyy','xxyyzz' mode, the bottom-right corner is not included in the box,
 #      i.e., when xmin=1, xmax=2, we have w = 1
@@ -134,7 +135,7 @@ class CornerCornerMode_TypeA(BoxMode):
     """
 
     def __int__(self):
-        self.dim_to_str_mapping = {2: "xyxy", 3: "xyzxyx"}
+        self.dim_to_str_mapping = {2: BoundingBoxMode.XYXY, 3: BoundingBoxMode.XYZXYZ}
 
     def box_to_corner(self, boxes: torch.Tensor) -> Tuple:
         spatial_dims = self.get_dim_from_boxes(boxes)
@@ -164,7 +165,7 @@ class CornerCornerMode_TypeB(BoxMode):
     """
 
     def __int__(self):
-        self.dim_to_str_mapping = {2: "xxyy", 3: "xxyyzz"}
+        self.dim_to_str_mapping = {2: BoundingBoxMode.XXYY, 3: BoundingBoxMode.XXYYZZ}
 
     def box_to_corner(self, boxes: torch.Tensor) -> Tuple:
         spatial_dims = self.get_dim_from_boxes(boxes)
@@ -194,7 +195,7 @@ class CornerCornerMode_TypeC(BoxMode):
     """
 
     def __int__(self):
-        self.dim_to_str_mapping = {2: "xyxy", 3: "xyxyzz"}
+        self.dim_to_str_mapping = {2: BoundingBoxMode.XYXY, 3: BoundingBoxMode.XYXYZZ}
 
     def box_to_corner(self, boxes: torch.Tensor) -> Tuple:
         spatial_dims = self.get_dim_from_boxes(boxes)
@@ -224,7 +225,7 @@ class CornerSizeMode(BoxMode):
     """
 
     def __int__(self):
-        self.dim_to_str_mapping = {2: "xywh", 3: "xyzwhd"}
+        self.dim_to_str_mapping = {2: BoundingBoxMode.XYWH, 3: BoundingBoxMode.XYZWHD}
 
     def box_to_corner(self, boxes: torch.Tensor) -> Tuple:
         # convert to float32 when computing torch.clamp, which does not support float16
@@ -267,7 +268,7 @@ class CenterSizeMode(BoxMode):
     """
 
     def __int__(self):
-        self.dim_to_str_mapping = {2: "ccwh", 3: "cccwhd"}
+        self.dim_to_str_mapping = {2: BoundingBoxMode.CCWH, 3: BoundingBoxMode.CCCWHD}
 
     def box_to_corner(self, boxes: torch.Tensor) -> Tuple:
         # convert to float32 when computing torch.clamp, which does not support float16
diff --git a/monai/data/box_utils.py b/monai/data/box_utils.py
index 1b4006269a..31f80ba3d9 100644
--- a/monai/data/box_utils.py
+++ b/monai/data/box_utils.py
@@ -27,6 +27,7 @@
 )
 from monai.utils import look_up_option
 from monai.utils.type_conversion import convert_data_type, convert_to_dst_type
+from monai.utils.enums import BoundingBoxMode
 
 # TO_REMOVE = 0 if in 'xxyy','xxyyzz' mode, the bottom-right corner is not included in the box,
 #      i.e., when xmin=1, xmax=2, we have w = 1
@@ -38,15 +39,15 @@
 # We support the conversion between several box modes, i.e., representation of a bounding box
 # BOXMODE_MAPPING maps string box mode to teh corresponding BoxMode class
 BOXMODE_MAPPING = {
-    "xyxy": CornerCornerMode_TypeA(),  # [xmin, ymin, xmax, ymax]
-    "xyzxyz": CornerCornerMode_TypeA(),  # [xmin, ymin, zmin, xmax, ymax, zmax]
-    "xxyy": CornerCornerMode_TypeB(),  # [xmin, xmax, ymin, ymax]
-    "xxyyzz": CornerCornerMode_TypeB(),  # [xmin, xmax, ymin, ymax, zmin, zmax]
-    "xyxyzz": CornerCornerMode_TypeC(),  # [xmin, ymin, xmax, ymax, zmin, zmax]
-    "xywh": CornerSizeMode(),  # [xmin, ymin, xsize, ysize]
-    "xyzwhd": CornerSizeMode(),  # [xmin, ymin, zmin, xsize, ysize, zsize]
-    "ccwh": CenterSizeMode(),  # [xcenter, ycenter, xsize, ysize]
-    "cccwhd": CenterSizeMode(),  # [xcenter, ycenter, zcenter, xsize, ysize, zsize]
+    BoundingBoxMode.XYXY: CornerCornerMode_TypeA(),  # [xmin, ymin, xmax, ymax]
+    BoundingBoxMode.XYZXYZ: CornerCornerMode_TypeA(),  # [xmin, ymin, zmin, xmax, ymax, zmax]
+    BoundingBoxMode.XXYY: CornerCornerMode_TypeB(),  # [xmin, xmax, ymin, ymax]
+    BoundingBoxMode.XXYYZZ: CornerCornerMode_TypeB(),  # [xmin, xmax, ymin, ymax, zmin, zmax]
+    BoundingBoxMode.XYXYZZ: CornerCornerMode_TypeC(),  # [xmin, ymin, xmax, ymax, zmin, zmax]
+    BoundingBoxMode.XYWH: CornerSizeMode(),  # [xmin, ymin, xsize, ysize]
+    BoundingBoxMode.XYZWHD: CornerSizeMode(),  # [xmin, ymin, zmin, xsize, ysize, zsize]
+    BoundingBoxMode.CCWH: CenterSizeMode(),  # [xcenter, ycenter, xsize, ysize]
+    BoundingBoxMode.CCCWHD: CenterSizeMode(),  # [xcenter, ycenter, zcenter, xsize, ysize, zsize]
 }
 # The standard box mode we use in all the box util functions
 StandardMode = CornerCornerMode_TypeA
diff --git a/monai/utils/enums.py b/monai/utils/enums.py
index 8920f51a88..78a7653886 100644
--- a/monai/utils/enums.py
+++ b/monai/utils/enums.py
@@ -311,3 +311,14 @@ class JITMetadataKeys(Enum):
     TIMESTAMP = "timestamp"
     VERSION = "version"
     DESCRIPTION = "description"
+
+class BoundingBoxMode:
+    XYXY = "xyxy",  # [xmin, ymin, xmax, ymax]
+    XYZXYZ = "xyzxyz"  # [xmin, ymin, zmin, xmax, ymax, zmax]
+    XXYY = "xxyy"  # [xmin, xmax, ymin, ymax]
+    XXYYZZ = "xxyyzz"  # [xmin, xmax, ymin, ymax, zmin, zmax]
+    XYXYZZ = "xyxyzz"  # [xmin, ymin, xmax, ymax, zmin, zmax]
+    XYWH = "xywh"  # [xmin, ymin, xsize, ysize]
+    XYZWHD = "xyzwhd"  # [xmin, ymin, zmin, xsize, ysize, zsize]
+    CCWH = "ccwh"  # [xcenter, ycenter, xsize, ysize]
+    CCCWHD = "cccwhd"  # [xcenter, ycenter, zcenter, xsize, ysize, zsize]

From 9517c19427d82f7d09bbc30004a2c4de33b99cc5 Mon Sep 17 00:00:00 2001
From: Can Zhao <canz@nvidia.com>
Date: Mon, 16 May 2022 04:59:20 -0400
Subject: [PATCH 17/49] change variable name

Signed-off-by: Can Zhao <canz@nvidia.com>
---
 monai/data/box_utils.py | 14 +++++++-------
 tests/test_box_utils.py | 22 +++++++++++-----------
 2 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/monai/data/box_utils.py b/monai/data/box_utils.py
index 31f80ba3d9..c25c428d21 100644
--- a/monai/data/box_utils.py
+++ b/monai/data/box_utils.py
@@ -78,7 +78,7 @@ def convert_to_list(in_sequence: Union[Sequence, torch.Tensor, np.ndarray]) -> l
 
 def get_dimension(
     boxes: Union[torch.Tensor, np.ndarray, None] = None,
-    image_size: Union[Sequence[int], torch.Tensor, np.ndarray, None] = None,
+    spatial_size: Union[Sequence[int], torch.Tensor, np.ndarray, None] = None,
 ) -> int:
     """
     Get spatial dimension for the giving setting.
@@ -86,29 +86,29 @@ def get_dimension(
     It raises ValueError if the dimensions of multiple inputs do not match with each other.
     Args:
         boxes: bounding box, Nx4 or Nx6 torch tensor or ndarray
-        image_size: Length of 2 or 3. Data format is list, or np.ndarray, or tensor of int
+        spatial_size: Length of 2 or 3. Data format is list, or np.ndarray, or tensor of int
     Returns:
         spatial_dimension: 2 or 3
 
     Example:
         boxes = torch.ones(10,6)
-        get_dimension(boxes, image_size=[100,200,200]) will return 3
+        get_dimension(boxes, spatial_size=[100,200,200]) will return 3
         get_dimension(boxes) will return 3
     """
     spatial_dims_set = set()
-    if image_size is not None:
-        spatial_dims_set.add(len(image_size))
+    if spatial_size is not None:
+        spatial_dims_set.add(len(spatial_size))
     if boxes is not None:
         spatial_dims_set.add(int(boxes.shape[1] / 2))
     spatial_dims_list = list(spatial_dims_set)
     if len(spatial_dims_list) == 0:
-        raise ValueError("At least one of boxes, image_size, and mode needs to be non-empty.")
+        raise ValueError("At least one of boxes, spatial_size, and mode needs to be non-empty.")
     elif len(spatial_dims_list) == 1:
         spatial_dims = int(spatial_dims_list[0])
         spatial_dims = look_up_option(spatial_dims, supported=[2, 3])
         return int(spatial_dims)
     else:
-        raise ValueError("The dimension of boxes, image_size, mode should match with each other.")
+        raise ValueError("The dimension of boxes, spatial_size, mode should match with each other.")
 
 
 def convert_box_mode(
diff --git a/tests/test_box_utils.py b/tests/test_box_utils.py
index 768efc14a5..fb776b2dc6 100644
--- a/tests/test_box_utils.py
+++ b/tests/test_box_utils.py
@@ -29,10 +29,10 @@
 TESTS = []
 for p in TEST_NDARRAYS:
     boxes = [[0, 0, 0, 0, 0, 0], [0, 1, 0, 2, 2, 3], [0, 1, 1, 2, 2, 3]]
-    image_size = [4, 4, 4]
+    spatial_size = [4, 4, 4]
     TESTS.append(
         [
-            {"boxes": p(boxes), "image_size": image_size, "mode": "xyzwhd", "half": False},
+            {"boxes": p(boxes), "spatial_size": spatial_size, "mode": "xyzwhd", "half": False},
             "xyzwhd",
             p([[0, 0, 0, 0, 0, 0], [0, 1, 0, 2, 2, 3], [0, 1, 1, 2, 2, 3]]),
             p([0, 12, 12]),
@@ -40,7 +40,7 @@
     )
     TESTS.append(
         [
-            {"boxes": p(boxes), "image_size": image_size, "mode": "xyzwhd", "half": True},
+            {"boxes": p(boxes), "spatial_size": spatial_size, "mode": "xyzwhd", "half": True},
             "xyzxyz",
             p([[0, 0, 0, 0, 0, 0], [0, 1, 0, 2, 3, 3], [0, 1, 1, 2, 3, 4]]),
             p([0, 12, 12]),
@@ -48,7 +48,7 @@
     )
     TESTS.append(
         [
-            {"boxes": p(boxes), "image_size": image_size, "mode": "xyzwhd", "half": False},
+            {"boxes": p(boxes), "spatial_size": spatial_size, "mode": "xyzwhd", "half": False},
             "xxyyzz",
             p([[0, 0, 0, 0, 0, 0], [0, 2, 1, 3, 0, 3], [0, 2, 1, 3, 1, 4]]),
             p([0, 12, 12]),
@@ -56,7 +56,7 @@
     )
     TESTS.append(
         [
-            {"boxes": p(boxes), "image_size": image_size, "mode": "xyzxyz", "half": False},
+            {"boxes": p(boxes), "spatial_size": spatial_size, "mode": "xyzxyz", "half": False},
             "xyzwhd",
             p([[0, 0, 0, 0, 0, 0], [0, 1, 0, 2, 1, 3], [0, 1, 1, 2, 1, 2]]),
             p([0, 6, 4]),
@@ -64,7 +64,7 @@
     )
     TESTS.append(
         [
-            {"boxes": p(boxes), "image_size": image_size, "mode": "xyzxyz", "half": True},
+            {"boxes": p(boxes), "spatial_size": spatial_size, "mode": "xyzxyz", "half": True},
             "xyzxyz",
             p([[0, 0, 0, 0, 0, 0], [0, 1, 0, 2, 2, 3], [0, 1, 1, 2, 2, 3]]),
             p([0, 6, 4]),
@@ -72,7 +72,7 @@
     )
     TESTS.append(
         [
-            {"boxes": p(boxes), "image_size": image_size, "mode": "xyzxyz", "half": False},
+            {"boxes": p(boxes), "spatial_size": spatial_size, "mode": "xyzxyz", "half": False},
             "xxyyzz",
             p([[0, 0, 0, 0, 0, 0], [0, 2, 1, 2, 0, 3], [0, 2, 1, 2, 1, 3]]),
             p([0, 6, 4]),
@@ -80,7 +80,7 @@
     )
     TESTS.append(
         [
-            {"boxes": p(boxes), "image_size": image_size, "mode": "xxyyzz", "half": False},
+            {"boxes": p(boxes), "spatial_size": spatial_size, "mode": "xxyyzz", "half": False},
             "xxyyzz",
             p([[0, 0, 0, 0, 0, 0], [0, 1, 0, 2, 2, 3], [0, 1, 1, 2, 2, 3]]),
             p([0, 2, 1]),
@@ -88,7 +88,7 @@
     )
     TESTS.append(
         [
-            {"boxes": p(boxes), "image_size": image_size, "mode": "xxyyzz", "half": True},
+            {"boxes": p(boxes), "spatial_size": spatial_size, "mode": "xxyyzz", "half": True},
             "xyzxyz",
             p([[0, 0, 0, 0, 0, 0], [0, 0, 2, 1, 2, 3], [0, 1, 2, 1, 2, 3]]),
             p([0, 2, 1]),
@@ -96,7 +96,7 @@
     )
     TESTS.append(
         [
-            {"boxes": p(boxes), "image_size": image_size, "mode": "xxyyzz", "half": False},
+            {"boxes": p(boxes), "spatial_size": spatial_size, "mode": "xxyyzz", "half": False},
             "xyzwhd",
             p([[0, 0, 0, 0, 0, 0], [0, 0, 2, 1, 2, 1], [0, 1, 2, 1, 1, 1]]),
             p([0, 2, 1]),
@@ -110,7 +110,7 @@ def test_value(self, input_data, mode2, expected_box, expected_area):
         expected_box = convert_data_type(expected_box, dtype=np.float32)[0]
         boxes1 = convert_data_type(input_data["boxes"], dtype=np.float32)[0]
         mode1 = input_data["mode"]
-        # image_size = input_data["image_size"]
+        # spatial_size = input_data["spatial_size"]
         half_bool = input_data["half"]
 
         # test float16

From 94d5e11d1b5b842dbe97c90e2051bf2cc465d26f Mon Sep 17 00:00:00 2001
From: Can Zhao <canz@nvidia.com>
Date: Mon, 16 May 2022 05:06:45 -0400
Subject: [PATCH 18/49] correct typo

Signed-off-by: Can Zhao <canz@nvidia.com>
---
 monai/data/box_mode.py  |  1 +
 monai/data/box_utils.py | 18 +++++++++++++-----
 monai/utils/enums.py    |  3 ++-
 3 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/monai/data/box_mode.py b/monai/data/box_mode.py
index 84d48798de..2a544dadaa 100644
--- a/monai/data/box_mode.py
+++ b/monai/data/box_mode.py
@@ -13,6 +13,7 @@
 from typing import Sequence, Tuple
 
 import torch
+
 from monai.utils.enums import BoundingBoxMode
 
 # TO_REMOVE = 0 if in 'xxyy','xxyyzz' mode, the bottom-right corner is not included in the box,
diff --git a/monai/data/box_utils.py b/monai/data/box_utils.py
index c25c428d21..b117cf6347 100644
--- a/monai/data/box_utils.py
+++ b/monai/data/box_utils.py
@@ -26,8 +26,8 @@
     CornerSizeMode,
 )
 from monai.utils import look_up_option
-from monai.utils.type_conversion import convert_data_type, convert_to_dst_type
 from monai.utils.enums import BoundingBoxMode
+from monai.utils.type_conversion import convert_data_type, convert_to_dst_type
 
 # TO_REMOVE = 0 if in 'xxyy','xxyyzz' mode, the bottom-right corner is not included in the box,
 #      i.e., when xmin=1, xmax=2, we have w = 1
@@ -37,7 +37,7 @@
 TO_REMOVE = box_mode.TO_REMOVE
 
 # We support the conversion between several box modes, i.e., representation of a bounding box
-# BOXMODE_MAPPING maps string box mode to teh corresponding BoxMode class
+# BOXMODE_MAPPING maps string box mode to the corresponding BoxMode class
 BOXMODE_MAPPING = {
     BoundingBoxMode.XYXY: CornerCornerMode_TypeA(),  # [xmin, ymin, xmax, ymax]
     BoundingBoxMode.XYZXYZ: CornerCornerMode_TypeA(),  # [xmin, ymin, zmin, xmax, ymax, zmax]
@@ -54,6 +54,17 @@
 
 
 def get_boxmode(mode: Union[str, BoxMode, None] = None) -> BoxMode:
+    """
+    This function returns BoxMode object from giving mode according to BOXMODE_MAPPING
+    Args:
+        mode: source box mode. If mode is not given, this func will assume mode is StandardMode()
+    Returns:
+        BoxMode object
+
+    Example:
+        mode = "xyzxyz"
+        get_boxmode(mode) will return CornerCornerMode_TypeA()
+    """
     if isinstance(mode, BoxMode):
         return mode
     elif isinstance(mode, str):
@@ -128,9 +139,6 @@ def convert_box_mode(
         box_convert_mode(boxes=boxes, src_mode="xyzxyz", dst_mode="cccwhd")
     """
 
-    # if not check_box_mode(boxes, src_mode):
-    #     raise ValueError("Given boxes has invalid values. The box size must be non-negative.")
-
     # if mode not changed, return original box
     src_boxmode = get_boxmode(src_mode)
     dst_boxmode = get_boxmode(dst_mode)
diff --git a/monai/utils/enums.py b/monai/utils/enums.py
index 78a7653886..8d7c29c835 100644
--- a/monai/utils/enums.py
+++ b/monai/utils/enums.py
@@ -312,8 +312,9 @@ class JITMetadataKeys(Enum):
     VERSION = "version"
     DESCRIPTION = "description"
 
+
 class BoundingBoxMode:
-    XYXY = "xyxy",  # [xmin, ymin, xmax, ymax]
+    XYXY = ("xyxy",)  # [xmin, ymin, xmax, ymax]
     XYZXYZ = "xyzxyz"  # [xmin, ymin, zmin, xmax, ymax, zmax]
     XXYY = "xxyy"  # [xmin, xmax, ymin, ymax]
     XXYYZZ = "xxyyzz"  # [xmin, xmax, ymin, ymax, zmin, zmax]

From cc4eee5d27cd255fac7b971ae98e7be9604b63a6 Mon Sep 17 00:00:00 2001
From: Can Zhao <canz@nvidia.com>
Date: Mon, 16 May 2022 05:08:40 -0400
Subject: [PATCH 19/49] change comment

Signed-off-by: Can Zhao <canz@nvidia.com>
---
 monai/data/box_mode.py  | 4 ++--
 monai/data/box_utils.py | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/monai/data/box_mode.py b/monai/data/box_mode.py
index 2a544dadaa..ec60da2639 100644
--- a/monai/data/box_mode.py
+++ b/monai/data/box_mode.py
@@ -16,9 +16,9 @@
 
 from monai.utils.enums import BoundingBoxMode
 
-# TO_REMOVE = 0 if in 'xxyy','xxyyzz' mode, the bottom-right corner is not included in the box,
+# TO_REMOVE = 0 if the bottom-right corner pixel/voxel is not included in the box,
 #      i.e., when xmin=1, xmax=2, we have w = 1
-# TO_REMOVE = 1  if in 'xxyy','xxyyzz' mode, the bottom-right corner is included in the box,
+# TO_REMOVE = 1  if the bottom-right corner pixel/voxel is included in the box,
 #       i.e., when xmin=1, xmax=2, we have w = 2
 # Currently, only `TO_REMOVE = 0.` is supported
 TO_REMOVE = 0.0  # xmax-xmin = w -TO_REMOVE.
diff --git a/monai/data/box_utils.py b/monai/data/box_utils.py
index b117cf6347..4012a36a25 100644
--- a/monai/data/box_utils.py
+++ b/monai/data/box_utils.py
@@ -29,11 +29,11 @@
 from monai.utils.enums import BoundingBoxMode
 from monai.utils.type_conversion import convert_data_type, convert_to_dst_type
 
-# TO_REMOVE = 0 if in 'xxyy','xxyyzz' mode, the bottom-right corner is not included in the box,
+# TO_REMOVE = 0 if the bottom-right corner pixel/voxel is not included in the box,
 #      i.e., when xmin=1, xmax=2, we have w = 1
-# TO_REMOVE = 1  if in 'xxyy','xxyyzz' mode, the bottom-right corner is included in the box,
+# TO_REMOVE = 1  if the bottom-right corner pixel/voxel is included in the box,
 #       i.e., when xmin=1, xmax=2, we have w = 2
-# Currently, only `TO_REMOVE = 0` is supported
+# Currently, only `TO_REMOVE = 0.` is supported
 TO_REMOVE = box_mode.TO_REMOVE
 
 # We support the conversion between several box modes, i.e., representation of a bounding box

From c72369fcbf9851b3fad6e8c05d191cb4c4cc6e49 Mon Sep 17 00:00:00 2001
From: Can Zhao <canz@nvidia.com>
Date: Mon, 16 May 2022 05:15:43 -0400
Subject: [PATCH 20/49] reformat

Signed-off-by: Can Zhao <canz@nvidia.com>
---
 monai/data/box_mode.py  |  6 +++---
 monai/data/box_utils.py | 20 ++++++++++----------
 2 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/monai/data/box_mode.py b/monai/data/box_mode.py
index ec60da2639..b2d3095333 100644
--- a/monai/data/box_mode.py
+++ b/monai/data/box_mode.py
@@ -129,7 +129,7 @@ def corner_to_box(self, corner: Sequence) -> torch.Tensor:
         raise NotImplementedError(f"Subclass {self.__class__.__name__} must implement this method.")
 
 
-class CornerCornerMode_TypeA(BoxMode):
+class CornerCornerModeTypeA(BoxMode):
     """
     Also represented as "xyxy" or "xyzxyz"
     [xmin, ymin, xmax, ymax] or [xmin, ymin, zmin, xmax, ymax, zmax]
@@ -159,7 +159,7 @@ def corner_to_box(self, c: Sequence) -> torch.Tensor:
             return torch.cat((c[0], c[1], c[2], c[3]), dim=-1)
 
 
-class CornerCornerMode_TypeB(BoxMode):
+class CornerCornerModeTypeB(BoxMode):
     """
     Also represented as "xxyy" or "xxyyzz"
     [xmin, xmax, ymin, ymax] or [xmin, xmax, ymin, ymax, zmin, zmax]
@@ -189,7 +189,7 @@ def corner_to_box(self, c: Sequence) -> torch.Tensor:
             return torch.cat((c[0], c[2], c[1], c[3]), dim=-1)
 
 
-class CornerCornerMode_TypeC(BoxMode):
+class CornerCornerModeTypeC(BoxMode):
     """
     Also represented as "xyxy" or "xyxyzz"
     [xmin, ymin, xmax, ymax] or [xmin, ymin, xmax, ymax, zmin, zmax]
diff --git a/monai/data/box_utils.py b/monai/data/box_utils.py
index 4012a36a25..c19a3b5088 100644
--- a/monai/data/box_utils.py
+++ b/monai/data/box_utils.py
@@ -20,9 +20,9 @@
 from monai.data.box_mode import (
     BoxMode,
     CenterSizeMode,
-    CornerCornerMode_TypeA,
-    CornerCornerMode_TypeB,
-    CornerCornerMode_TypeC,
+    CornerCornerModeTypeA,
+    CornerCornerModeTypeB,
+    CornerCornerModeTypeC,
     CornerSizeMode,
 )
 from monai.utils import look_up_option
@@ -39,18 +39,18 @@
 # We support the conversion between several box modes, i.e., representation of a bounding box
 # BOXMODE_MAPPING maps string box mode to the corresponding BoxMode class
 BOXMODE_MAPPING = {
-    BoundingBoxMode.XYXY: CornerCornerMode_TypeA(),  # [xmin, ymin, xmax, ymax]
-    BoundingBoxMode.XYZXYZ: CornerCornerMode_TypeA(),  # [xmin, ymin, zmin, xmax, ymax, zmax]
-    BoundingBoxMode.XXYY: CornerCornerMode_TypeB(),  # [xmin, xmax, ymin, ymax]
-    BoundingBoxMode.XXYYZZ: CornerCornerMode_TypeB(),  # [xmin, xmax, ymin, ymax, zmin, zmax]
-    BoundingBoxMode.XYXYZZ: CornerCornerMode_TypeC(),  # [xmin, ymin, xmax, ymax, zmin, zmax]
+    BoundingBoxMode.XYXY: CornerCornerModeTypeA(),  # [xmin, ymin, xmax, ymax]
+    BoundingBoxMode.XYZXYZ: CornerCornerModeTypeA(),  # [xmin, ymin, zmin, xmax, ymax, zmax]
+    BoundingBoxMode.XXYY: CornerCornerModeTypeB(),  # [xmin, xmax, ymin, ymax]
+    BoundingBoxMode.XXYYZZ: CornerCornerModeTypeB(),  # [xmin, xmax, ymin, ymax, zmin, zmax]
+    BoundingBoxMode.XYXYZZ: CornerCornerModeTypeC(),  # [xmin, ymin, xmax, ymax, zmin, zmax]
     BoundingBoxMode.XYWH: CornerSizeMode(),  # [xmin, ymin, xsize, ysize]
     BoundingBoxMode.XYZWHD: CornerSizeMode(),  # [xmin, ymin, zmin, xsize, ysize, zsize]
     BoundingBoxMode.CCWH: CenterSizeMode(),  # [xcenter, ycenter, xsize, ysize]
     BoundingBoxMode.CCCWHD: CenterSizeMode(),  # [xcenter, ycenter, zcenter, xsize, ysize, zsize]
 }
 # The standard box mode we use in all the box util functions
-StandardMode = CornerCornerMode_TypeA
+StandardMode = CornerCornerModeTypeA
 
 
 def get_boxmode(mode: Union[str, BoxMode, None] = None) -> BoxMode:
@@ -63,7 +63,7 @@ def get_boxmode(mode: Union[str, BoxMode, None] = None) -> BoxMode:
 
     Example:
         mode = "xyzxyz"
-        get_boxmode(mode) will return CornerCornerMode_TypeA()
+        get_boxmode(mode) will return CornerCornerModeTypeA()
     """
     if isinstance(mode, BoxMode):
         return mode

From 923f7711c9c4f6b6c52fb7fbb86189e1be3aec6e Mon Sep 17 00:00:00 2001
From: Can Zhao <canz@nvidia.com>
Date: Mon, 16 May 2022 05:17:19 -0400
Subject: [PATCH 21/49] typo

Signed-off-by: Can Zhao <canz@nvidia.com>
---
 monai/utils/enums.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/monai/utils/enums.py b/monai/utils/enums.py
index 8d7c29c835..3153202b71 100644
--- a/monai/utils/enums.py
+++ b/monai/utils/enums.py
@@ -314,7 +314,7 @@ class JITMetadataKeys(Enum):
 
 
 class BoundingBoxMode:
-    XYXY = ("xyxy",)  # [xmin, ymin, xmax, ymax]
+    XYXY = "xyxy"  # [xmin, ymin, xmax, ymax]
     XYZXYZ = "xyzxyz"  # [xmin, ymin, zmin, xmax, ymax, zmax]
     XXYY = "xxyy"  # [xmin, xmax, ymin, ymax]
     XXYYZZ = "xxyyzz"  # [xmin, xmax, ymin, ymax, zmin, zmax]

From 581a329f3ad1c689d906be1643ff97de21795ccd Mon Sep 17 00:00:00 2001
From: Can Zhao <canz@nvidia.com>
Date: Mon, 16 May 2022 05:42:29 -0400
Subject: [PATCH 22/49] reformat

Signed-off-by: Can Zhao <canz@nvidia.com>
---
 monai/data/box_mode.py | 27 ++++++++++++++++-----------
 1 file changed, 16 insertions(+), 11 deletions(-)

diff --git a/monai/data/box_mode.py b/monai/data/box_mode.py
index b2d3095333..e4b5bc541d 100644
--- a/monai/data/box_mode.py
+++ b/monai/data/box_mode.py
@@ -27,7 +27,7 @@
 class BoxMode:
     def __int__(self):
         # The mapping that maps spatial dimension to mode string name
-        self.dim_to_str_mapping = {2: None, 3: None}
+        self.dim_to_str_mapping = {2: "", 3: ""}
 
     def get_str_mode(self, spatial_dims: int) -> str:
         """
@@ -143,19 +143,20 @@ def box_to_corner(self, boxes: torch.Tensor) -> Tuple:
         if spatial_dims == 3:
             xmin, ymin, zmin, xmax, ymax, zmax = boxes.split(1, dim=-1)
             corner = xmin, ymin, zmin, xmax, ymax, zmax
-        if spatial_dims == 2:
+        elif spatial_dims == 2:
             xmin, ymin, xmax, ymax = boxes.split(1, dim=-1)
             corner = xmin, ymin, xmax, ymax
         if self.check_corner(corner):
             return corner
         else:
             raise ValueError("Given boxes has invalid values. The box size must be non-negative.")
+            return
 
     def corner_to_box(self, c: Sequence) -> torch.Tensor:
         spatial_dims = self.get_dim_from_corner(c)
         if spatial_dims == 3:
             return torch.cat((c[0], c[1], c[2], c[3], c[4], c[5]), dim=-1)
-        if spatial_dims == 2:
+        elif spatial_dims == 2:
             return torch.cat((c[0], c[1], c[2], c[3]), dim=-1)
 
 
@@ -173,19 +174,20 @@ def box_to_corner(self, boxes: torch.Tensor) -> Tuple:
         if spatial_dims == 3:
             xmin, xmax, ymin, ymax, zmin, zmax = boxes.split(1, dim=-1)
             corner = xmin, ymin, zmin, xmax, ymax, zmax
-        if spatial_dims == 2:
+        elif spatial_dims == 2:
             xmin, xmax, ymin, ymax = boxes.split(1, dim=-1)
             corner = xmin, ymin, xmax, ymax
         if self.check_corner(corner):
             return corner
         else:
             raise ValueError("Given boxes has invalid values. The box size must be non-negative.")
+            return
 
     def corner_to_box(self, c: Sequence) -> torch.Tensor:
         spatial_dims = self.get_dim_from_corner(c)
         if spatial_dims == 3:
             return torch.cat((c[0], c[3], c[1], c[4], c[2], c[5]), dim=-1)
-        if spatial_dims == 2:
+        elif spatial_dims == 2:
             return torch.cat((c[0], c[2], c[1], c[3]), dim=-1)
 
 
@@ -203,19 +205,20 @@ def box_to_corner(self, boxes: torch.Tensor) -> Tuple:
         if spatial_dims == 3:
             xmin, ymin, xmax, ymax, zmin, zmax = boxes.split(1, dim=-1)
             corner = xmin, ymin, zmin, xmax, ymax, zmax
-        if spatial_dims == 2:
+        elif spatial_dims == 2:
             xmin, ymin, xmax, ymax = boxes.split(1, dim=-1)
             corner = xmin, ymin, xmax, ymax
         if self.check_corner(corner):
             return corner
         else:
             raise ValueError("Given boxes has invalid values. The box size must be non-negative.")
+            return
 
     def corner_to_box(self, c: Sequence) -> torch.Tensor:
         spatial_dims = self.get_dim_from_corner(c)
         if spatial_dims == 3:
             return torch.cat((c[0], c[1], c[3], c[4], c[2], c[5]), dim=-1)
-        if spatial_dims == 2:
+        elif spatial_dims == 2:
             return torch.cat((c[0], c[1], c[2], c[3]), dim=-1)
 
 
@@ -240,7 +243,7 @@ def box_to_corner(self, boxes: torch.Tensor) -> Tuple:
             ymax = ymin + (h - TO_REMOVE).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype)
             zmax = zmin + (d - TO_REMOVE).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype)
             corner = xmin, ymin, zmin, xmax, ymax, zmax
-        if spatial_dims == 2:
+        elif spatial_dims == 2:
             xmin, ymin, w, h = boxes.split(1, dim=-1)
             xmax = xmin + (w - TO_REMOVE).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype)
             ymax = ymin + (h - TO_REMOVE).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype)
@@ -249,6 +252,7 @@ def box_to_corner(self, boxes: torch.Tensor) -> Tuple:
             return corner
         else:
             raise ValueError("Given boxes has invalid values. The box size must be non-negative.")
+            return
 
     def corner_to_box(self, c: Sequence) -> torch.Tensor:
         spatial_dims = self.get_dim_from_corner(c)
@@ -257,7 +261,7 @@ def corner_to_box(self, c: Sequence) -> torch.Tensor:
             return torch.cat(
                 (xmin, ymin, zmin, xmax - xmin + TO_REMOVE, ymax - ymin + TO_REMOVE, zmax - zmin + TO_REMOVE), dim=-1
             )
-        if spatial_dims == 2:
+        elif spatial_dims == 2:
             xmin, ymin, xmax, ymax = c[0], c[1], c[2], c[3]
             return torch.cat((xmin, ymin, xmax - xmin + TO_REMOVE, ymax - ymin + TO_REMOVE), dim=-1)
 
@@ -286,7 +290,7 @@ def box_to_corner(self, boxes: torch.Tensor) -> Tuple:
             zmin = zc - ((d - TO_REMOVE) / 2.0).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype)
             zmax = zc + ((d - TO_REMOVE) / 2.0).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype)
             corner = xmin, ymin, zmin, xmax, ymax, zmax
-        if spatial_dims == 2:
+        elif spatial_dims == 2:
             xc, yc, w, h = boxes.split(1, dim=-1)
             xmin = xc - ((w - TO_REMOVE) / 2.0).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype)
             xmax = xc + ((w - TO_REMOVE) / 2.0).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype)
@@ -297,6 +301,7 @@ def box_to_corner(self, boxes: torch.Tensor) -> Tuple:
             return corner
         else:
             raise ValueError("Given boxes has invalid values. The box size must be non-negative.")
+            return
 
     def corner_to_box(self, c: Sequence) -> torch.Tensor:
         spatial_dims = int(len(c) // 2)
@@ -313,7 +318,7 @@ def corner_to_box(self, c: Sequence) -> torch.Tensor:
                 ),
                 dim=-1,
             )
-        if spatial_dims == 2:
+        elif spatial_dims == 2:
             xmin, ymin, xmax, ymax = c[0], c[1], c[2], c[3]
             return torch.cat(
                 (

From 7508d90eb7904f35099ea39f5236e1bb210e3a47 Mon Sep 17 00:00:00 2001
From: Can Zhao <canz@nvidia.com>
Date: Mon, 16 May 2022 12:15:56 -0400
Subject: [PATCH 23/49] reorganize class method and static method

Signed-off-by: Can Zhao <canz@nvidia.com>
---
 monai/data/box_mode.py  | 239 ++++++++++++++--------------------------
 monai/data/box_utils.py | 128 +++++++++++++--------
 2 files changed, 167 insertions(+), 200 deletions(-)

diff --git a/monai/data/box_mode.py b/monai/data/box_mode.py
index e4b5bc541d..8a952c2ee2 100644
--- a/monai/data/box_mode.py
+++ b/monai/data/box_mode.py
@@ -10,26 +10,34 @@
 # limitations under the License.
 
 from abc import abstractmethod
-from typing import Sequence, Tuple
+from typing import Dict, Sequence, Tuple
 
 import torch
 
+import monai
 from monai.utils.enums import BoundingBoxMode
 
-# TO_REMOVE = 0 if the bottom-right corner pixel/voxel is not included in the box,
-#      i.e., when xmin=1, xmax=2, we have w = 1
-# TO_REMOVE = 1  if the bottom-right corner pixel/voxel is included in the box,
-#       i.e., when xmin=1, xmax=2, we have w = 2
-# Currently, only `TO_REMOVE = 0.` is supported
+# TO_REMOVE = 0.0 if the bottom-right corner pixel/voxel is not included in the box,
+#      i.e., when xmin=1., xmax=2., we have w = 1.
+# TO_REMOVE = 1.0  if the bottom-right corner pixel/voxel is included in the box,
+#       i.e., when xmin=1., xmax=2., we have w = 2.
+# Currently, only `TO_REMOVE = 0.0` is supported
 TO_REMOVE = 0.0  # xmax-xmin = w -TO_REMOVE.
 
 
 class BoxMode:
-    def __int__(self):
-        # The mapping that maps spatial dimension to mode string name
-        self.dim_to_str_mapping = {2: "", 3: ""}
+    """
+    An abstract class of a ``BoxMode``.
+    A BoxMode is callable that converts box mode of boxes.
+    It always creates a copy and will not modify boxes in place,
+    the implementation should be aware of:
+        #. remember to define ``name`` which is a dictionary that maps spatial_dims to box mode string
+    """
+
+    name: Dict[int, str] = {}
 
-    def get_str_mode(self, spatial_dims: int) -> str:
+    @classmethod
+    def get_name(cls, spatial_dims: int) -> str:
         """
         Get the mode name for the given spatial dimension
         Args:
@@ -37,70 +45,12 @@ def get_str_mode(self, spatial_dims: int) -> str:
         Returns:
             mode string name
         Example:
-            boxmode.get_str_mode(spatial_dims = 2)
-        """
-        return self.dim_to_str_mapping[spatial_dims]
-
-    def get_dim_from_boxes(self, boxes: torch.Tensor) -> int:
-        """
-        Get spatial dimension for the given boxes
-        Args:
-            boxes: bounding box, Nx4 or Nx6 torch tensor
-        Returns:
-            spatial_dims: 2 or 3
-        Example:
-            boxes = torch.ones(10,6)
-            boxmode.get_dim_from_boxes(boxes) will return 3
-        """
-        if int(boxes.shape[1]) not in [4, 6]:
-            raise ValueError(
-                f"Currently we support only boxes with shape [N,4] or [N,6], got boxes with shape {boxes.shape}."
-            )
-        spatial_dims = int(boxes.shape[1] // 2)
-        return spatial_dims
-
-    def get_dim_from_corner(self, c: Sequence) -> int:
-        """
-        Get spatial dimension for the given box corners
-        Args:
-            c: corners of a box, 4-element or 6-element tuple, each element is a Nx1 torch tensor
-            (xmin, ymin, xmax, ymax) or (xmin, ymin, zmin, xmax, ymax, zmax)
-        Returns:
-            spatial_dims: 2 or 3
-        Example:
-            c = (torch.ones(10,1), torch.ones(10,1), torch.ones(10,1), torch.ones(10,1))
-            boxmode.get_dim_from_corner(c) will return 2
+            BoxMode.get_name(spatial_dims = 2)
         """
-        if len(c) not in [4, 6]:
-            raise ValueError(
-                f"Currently we support only boxes with shape [N,4] or [N,6], got box corner tuple with length {len(c)}."
-            )
-        spatial_dims = int(len(c) // 2)
-        return spatial_dims
-
-    def check_corner(self, c: Sequence) -> bool:
-        """
-        check the validity for the given box corners
-        Args:
-            c: corners of a box, 4-element or 6-element tuple, each element is a Nx1 torch tensor
-            (xmin, ymin, xmax, ymax) or (xmin, ymin, zmin, xmax, ymax, zmax)
-        Returns:
-            bool, whether the box is valid
-        Example:
-            c = (torch.ones(10,1), torch.ones(10,1), torch.ones(10,1), torch.ones(10,1))
-            boxmode.check_corner(c) will return True
-        """
-        spatial_dims = self.get_dim_from_corner(c)
-        box_error = c[spatial_dims] < c[0]
-        for axis in range(1, spatial_dims):
-            box_error = box_error | (c[spatial_dims + axis] < c[axis])
-        if box_error.sum() > 0:
-            return False
-        else:
-            return True
+        return cls.name[spatial_dims]
 
     @abstractmethod
-    def box_to_corner(self, boxes: torch.Tensor) -> Tuple:
+    def boxes_to_corners(self, boxes: torch.Tensor) -> Tuple:
         """
         Return the box corners for the given boxes
         Args:
@@ -108,23 +58,25 @@ def box_to_corner(self, boxes: torch.Tensor) -> Tuple:
         Returns:
             corners of a box, 4-element or 6-element tuple, each element is a Nx1 torch tensor
         Example:
+            boxmode = BoxMode()
             boxes = torch.ones(10,6)
-            boxmode.box_to_corner(boxes) will a 6-element tuple, each element is a 10x1 tensor
+            boxmode.boxes_to_corners(boxes) will a 6-element tuple, each element is a 10x1 tensor
         """
         raise NotImplementedError(f"Subclass {self.__class__.__name__} must implement this method.")
 
     @abstractmethod
-    def corner_to_box(self, corner: Sequence) -> torch.Tensor:
+    def corners_to_boxes(self, corners: Sequence) -> torch.Tensor:
         """
         Return the boxes converted from the given box corners
         Args:
-            c: corners of a box, 4-element or 6-element tuple, each element is a Nx1 torch tensor
+            corners: corners of a box, 4-element or 6-element tuple, each element is a Nx1 torch tensor
             (xmin, ymin, xmax, ymax) or (xmin, ymin, zmin, xmax, ymax, zmax)
         Returns:
             boxes: bounding box, Nx4 or Nx6 torch tensor
         Example:
-            c = (torch.ones(10,1), torch.ones(10,1), torch.ones(10,1), torch.ones(10,1))
-            boxmode.corner_to_box(c) will return a 10x4 tensor
+            boxmode = BoxMode()
+            corners = (torch.ones(10,1), torch.ones(10,1), torch.ones(10,1), torch.ones(10,1))
+            boxmode.corners_to_boxes(corners) will return a 10x4 tensor
         """
         raise NotImplementedError(f"Subclass {self.__class__.__name__} must implement this method.")
 
@@ -135,29 +87,24 @@ class CornerCornerModeTypeA(BoxMode):
     [xmin, ymin, xmax, ymax] or [xmin, ymin, zmin, xmax, ymax, zmax]
     """
 
-    def __int__(self):
-        self.dim_to_str_mapping = {2: BoundingBoxMode.XYXY, 3: BoundingBoxMode.XYZXYZ}
+    name = {2: BoundingBoxMode.XYXY, 3: BoundingBoxMode.XYZXYZ}
 
-    def box_to_corner(self, boxes: torch.Tensor) -> Tuple:
-        spatial_dims = self.get_dim_from_boxes(boxes)
+    def boxes_to_corners(self, boxes: torch.Tensor) -> Tuple:
+        spatial_dims = monai.data.box_utils.get_dimension(boxes=boxes)
         if spatial_dims == 3:
             xmin, ymin, zmin, xmax, ymax, zmax = boxes.split(1, dim=-1)
-            corner = xmin, ymin, zmin, xmax, ymax, zmax
+            corners = xmin, ymin, zmin, xmax, ymax, zmax
         elif spatial_dims == 2:
             xmin, ymin, xmax, ymax = boxes.split(1, dim=-1)
-            corner = xmin, ymin, xmax, ymax
-        if self.check_corner(corner):
-            return corner
-        else:
-            raise ValueError("Given boxes has invalid values. The box size must be non-negative.")
-            return
-
-    def corner_to_box(self, c: Sequence) -> torch.Tensor:
-        spatial_dims = self.get_dim_from_corner(c)
+            corners = xmin, ymin, xmax, ymax
+        return corners
+
+    def corners_to_boxes(self, corners: Sequence) -> torch.Tensor:
+        spatial_dims = monai.data.box_utils.get_dimension(corners=corners)
         if spatial_dims == 3:
-            return torch.cat((c[0], c[1], c[2], c[3], c[4], c[5]), dim=-1)
+            return torch.cat((corners[0], corners[1], corners[2], corners[3], corners[4], corners[5]), dim=-1)
         elif spatial_dims == 2:
-            return torch.cat((c[0], c[1], c[2], c[3]), dim=-1)
+            return torch.cat((corners[0], corners[1], corners[2], corners[3]), dim=-1)
 
 
 class CornerCornerModeTypeB(BoxMode):
@@ -166,29 +113,24 @@ class CornerCornerModeTypeB(BoxMode):
     [xmin, xmax, ymin, ymax] or [xmin, xmax, ymin, ymax, zmin, zmax]
     """
 
-    def __int__(self):
-        self.dim_to_str_mapping = {2: BoundingBoxMode.XXYY, 3: BoundingBoxMode.XXYYZZ}
+    name = {2: BoundingBoxMode.XXYY, 3: BoundingBoxMode.XXYYZZ}
 
-    def box_to_corner(self, boxes: torch.Tensor) -> Tuple:
-        spatial_dims = self.get_dim_from_boxes(boxes)
+    def boxes_to_corners(self, boxes: torch.Tensor) -> Tuple:
+        spatial_dims = monai.data.box_utils.get_dimension(boxes=boxes)
         if spatial_dims == 3:
             xmin, xmax, ymin, ymax, zmin, zmax = boxes.split(1, dim=-1)
-            corner = xmin, ymin, zmin, xmax, ymax, zmax
+            corners = xmin, ymin, zmin, xmax, ymax, zmax
         elif spatial_dims == 2:
             xmin, xmax, ymin, ymax = boxes.split(1, dim=-1)
-            corner = xmin, ymin, xmax, ymax
-        if self.check_corner(corner):
-            return corner
-        else:
-            raise ValueError("Given boxes has invalid values. The box size must be non-negative.")
-            return
-
-    def corner_to_box(self, c: Sequence) -> torch.Tensor:
-        spatial_dims = self.get_dim_from_corner(c)
+            corners = xmin, ymin, xmax, ymax
+        return corners
+
+    def corners_to_boxes(self, corners: Sequence) -> torch.Tensor:
+        spatial_dims = monai.data.box_utils.get_dimension(corners=corners)
         if spatial_dims == 3:
-            return torch.cat((c[0], c[3], c[1], c[4], c[2], c[5]), dim=-1)
+            return torch.cat((corners[0], corners[3], corners[1], corners[4], corners[2], corners[5]), dim=-1)
         elif spatial_dims == 2:
-            return torch.cat((c[0], c[2], c[1], c[3]), dim=-1)
+            return torch.cat((corners[0], corners[2], corners[1], corners[3]), dim=-1)
 
 
 class CornerCornerModeTypeC(BoxMode):
@@ -197,29 +139,24 @@ class CornerCornerModeTypeC(BoxMode):
     [xmin, ymin, xmax, ymax] or [xmin, ymin, xmax, ymax, zmin, zmax]
     """
 
-    def __int__(self):
-        self.dim_to_str_mapping = {2: BoundingBoxMode.XYXY, 3: BoundingBoxMode.XYXYZZ}
+    name = {2: BoundingBoxMode.XYXY, 3: BoundingBoxMode.XYXYZZ}
 
-    def box_to_corner(self, boxes: torch.Tensor) -> Tuple:
-        spatial_dims = self.get_dim_from_boxes(boxes)
+    def boxes_to_corners(self, boxes: torch.Tensor) -> Tuple:
+        spatial_dims = monai.data.box_utils.get_dimension(boxes=boxes)
         if spatial_dims == 3:
             xmin, ymin, xmax, ymax, zmin, zmax = boxes.split(1, dim=-1)
-            corner = xmin, ymin, zmin, xmax, ymax, zmax
+            corners = xmin, ymin, zmin, xmax, ymax, zmax
         elif spatial_dims == 2:
             xmin, ymin, xmax, ymax = boxes.split(1, dim=-1)
-            corner = xmin, ymin, xmax, ymax
-        if self.check_corner(corner):
-            return corner
-        else:
-            raise ValueError("Given boxes has invalid values. The box size must be non-negative.")
-            return
-
-    def corner_to_box(self, c: Sequence) -> torch.Tensor:
-        spatial_dims = self.get_dim_from_corner(c)
+            corners = xmin, ymin, xmax, ymax
+        return corners
+
+    def corners_to_boxes(self, corners: Sequence) -> torch.Tensor:
+        spatial_dims = monai.data.box_utils.get_dimension(corners=corners)
         if spatial_dims == 3:
-            return torch.cat((c[0], c[1], c[3], c[4], c[2], c[5]), dim=-1)
+            return torch.cat((corners[0], corners[1], corners[3], corners[4], corners[2], corners[5]), dim=-1)
         elif spatial_dims == 2:
-            return torch.cat((c[0], c[1], c[2], c[3]), dim=-1)
+            return torch.cat((corners[0], corners[1], corners[2], corners[3]), dim=-1)
 
 
 class CornerSizeMode(BoxMode):
@@ -228,41 +165,36 @@ class CornerSizeMode(BoxMode):
     [xmin, ymin, xsize, ysize] or [xmin, ymin, zmin, xsize, ysize, zsize]
     """
 
-    def __int__(self):
-        self.dim_to_str_mapping = {2: BoundingBoxMode.XYWH, 3: BoundingBoxMode.XYZWHD}
+    name = {2: BoundingBoxMode.XYWH, 3: BoundingBoxMode.XYZWHD}
 
-    def box_to_corner(self, boxes: torch.Tensor) -> Tuple:
+    def boxes_to_corners(self, boxes: torch.Tensor) -> Tuple:
         # convert to float32 when computing torch.clamp, which does not support float16
         box_dtype = boxes.dtype
         compute_dtype = torch.float32
 
-        spatial_dims = self.get_dim_from_boxes(boxes)
+        spatial_dims = monai.data.box_utils.get_dimension(boxes=boxes)
         if spatial_dims == 3:
             xmin, ymin, zmin, w, h, d = boxes.split(1, dim=-1)
             xmax = xmin + (w - TO_REMOVE).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype)
             ymax = ymin + (h - TO_REMOVE).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype)
             zmax = zmin + (d - TO_REMOVE).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype)
-            corner = xmin, ymin, zmin, xmax, ymax, zmax
+            corners = xmin, ymin, zmin, xmax, ymax, zmax
         elif spatial_dims == 2:
             xmin, ymin, w, h = boxes.split(1, dim=-1)
             xmax = xmin + (w - TO_REMOVE).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype)
             ymax = ymin + (h - TO_REMOVE).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype)
-            corner = xmin, ymin, xmax, ymax
-        if self.check_corner(corner):
-            return corner
-        else:
-            raise ValueError("Given boxes has invalid values. The box size must be non-negative.")
-            return
-
-    def corner_to_box(self, c: Sequence) -> torch.Tensor:
-        spatial_dims = self.get_dim_from_corner(c)
+            corners = xmin, ymin, xmax, ymax
+        return corners
+
+    def corners_to_boxes(self, corners: Sequence) -> torch.Tensor:
+        spatial_dims = monai.data.box_utils.get_dimension(corners=corners)
         if spatial_dims == 3:
-            xmin, ymin, zmin, xmax, ymax, zmax = c[0], c[1], c[2], c[3], c[4], c[5]
+            xmin, ymin, zmin, xmax, ymax, zmax = corners[0], corners[1], corners[2], corners[3], corners[4], corners[5]
             return torch.cat(
                 (xmin, ymin, zmin, xmax - xmin + TO_REMOVE, ymax - ymin + TO_REMOVE, zmax - zmin + TO_REMOVE), dim=-1
             )
         elif spatial_dims == 2:
-            xmin, ymin, xmax, ymax = c[0], c[1], c[2], c[3]
+            xmin, ymin, xmax, ymax = corners[0], corners[1], corners[2], corners[3]
             return torch.cat((xmin, ymin, xmax - xmin + TO_REMOVE, ymax - ymin + TO_REMOVE), dim=-1)
 
 
@@ -272,15 +204,14 @@ class CenterSizeMode(BoxMode):
     [xmin, ymin, xsize, ysize] or [xmin, ymin, zmin, xsize, ysize, zsize]
     """
 
-    def __int__(self):
-        self.dim_to_str_mapping = {2: BoundingBoxMode.CCWH, 3: BoundingBoxMode.CCCWHD}
+    name = {2: BoundingBoxMode.CCWH, 3: BoundingBoxMode.CCCWHD}
 
-    def box_to_corner(self, boxes: torch.Tensor) -> Tuple:
+    def boxes_to_corners(self, boxes: torch.Tensor) -> Tuple:
         # convert to float32 when computing torch.clamp, which does not support float16
         box_dtype = boxes.dtype
         compute_dtype = torch.float32
 
-        spatial_dims = self.get_dim_from_boxes(boxes)
+        spatial_dims = monai.data.box_utils.get_dimension(boxes=boxes)
         if spatial_dims == 3:
             xc, yc, zc, w, h, d = boxes.split(1, dim=-1)
             xmin = xc - ((w - TO_REMOVE) / 2.0).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype)
@@ -289,24 +220,20 @@ def box_to_corner(self, boxes: torch.Tensor) -> Tuple:
             ymax = yc + ((h - TO_REMOVE) / 2.0).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype)
             zmin = zc - ((d - TO_REMOVE) / 2.0).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype)
             zmax = zc + ((d - TO_REMOVE) / 2.0).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype)
-            corner = xmin, ymin, zmin, xmax, ymax, zmax
+            corners = xmin, ymin, zmin, xmax, ymax, zmax
         elif spatial_dims == 2:
             xc, yc, w, h = boxes.split(1, dim=-1)
             xmin = xc - ((w - TO_REMOVE) / 2.0).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype)
             xmax = xc + ((w - TO_REMOVE) / 2.0).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype)
             ymin = yc - ((h - TO_REMOVE) / 2.0).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype)
             ymax = yc + ((h - TO_REMOVE) / 2.0).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype)
-            corner = xmin, ymin, xmax, ymax
-        if self.check_corner(corner):
-            return corner
-        else:
-            raise ValueError("Given boxes has invalid values. The box size must be non-negative.")
-            return
-
-    def corner_to_box(self, c: Sequence) -> torch.Tensor:
-        spatial_dims = int(len(c) // 2)
+            corners = xmin, ymin, xmax, ymax
+        return corners
+
+    def corners_to_boxes(self, corners: Sequence) -> torch.Tensor:
+        spatial_dims = monai.data.box_utils.get_dimension(corners=corners)
         if spatial_dims == 3:
-            xmin, ymin, zmin, xmax, ymax, zmax = c[0], c[1], c[2], c[3], c[4], c[5]
+            xmin, ymin, zmin, xmax, ymax, zmax = corners[0], corners[1], corners[2], corners[3], corners[4], corners[5]
             return torch.cat(
                 (
                     (xmin + xmax + TO_REMOVE) / 2.0,
@@ -319,7 +246,7 @@ def corner_to_box(self, c: Sequence) -> torch.Tensor:
                 dim=-1,
             )
         elif spatial_dims == 2:
-            xmin, ymin, xmax, ymax = c[0], c[1], c[2], c[3]
+            xmin, ymin, xmax, ymax = corners[0], corners[1], corners[2], corners[3]
             return torch.cat(
                 (
                     (xmin + xmax + TO_REMOVE) / 2.0,
diff --git a/monai/data/box_utils.py b/monai/data/box_utils.py
index c19a3b5088..e71b379604 100644
--- a/monai/data/box_utils.py
+++ b/monai/data/box_utils.py
@@ -9,8 +9,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import inspect
 from copy import deepcopy
-from typing import Sequence, Union
+from typing import Sequence, Type, Union
 
 import numpy as np
 import torch
@@ -29,52 +30,22 @@
 from monai.utils.enums import BoundingBoxMode
 from monai.utils.type_conversion import convert_data_type, convert_to_dst_type
 
-# TO_REMOVE = 0 if the bottom-right corner pixel/voxel is not included in the box,
-#      i.e., when xmin=1, xmax=2, we have w = 1
-# TO_REMOVE = 1  if the bottom-right corner pixel/voxel is included in the box,
-#       i.e., when xmin=1, xmax=2, we have w = 2
-# Currently, only `TO_REMOVE = 0.` is supported
+# TO_REMOVE = 0.0 if the bottom-right corner pixel/voxel is not included in the box,
+#      i.e., when xmin=1., xmax=2., we have w = 1.
+# TO_REMOVE = 1.0  if the bottom-right corner pixel/voxel is included in the box,
+#       i.e., when xmin=1., xmax=2., we have w = 2.
+# Currently, only `TO_REMOVE = 0.0` is supported
 TO_REMOVE = box_mode.TO_REMOVE
 
+# We support 2_d or 3-D bounding boxes
+SUPPORTED_SPATIAL_DIMS = [2, 3]
+
 # We support the conversion between several box modes, i.e., representation of a bounding box
-# BOXMODE_MAPPING maps string box mode to the corresponding BoxMode class
-BOXMODE_MAPPING = {
-    BoundingBoxMode.XYXY: CornerCornerModeTypeA(),  # [xmin, ymin, xmax, ymax]
-    BoundingBoxMode.XYZXYZ: CornerCornerModeTypeA(),  # [xmin, ymin, zmin, xmax, ymax, zmax]
-    BoundingBoxMode.XXYY: CornerCornerModeTypeB(),  # [xmin, xmax, ymin, ymax]
-    BoundingBoxMode.XXYYZZ: CornerCornerModeTypeB(),  # [xmin, xmax, ymin, ymax, zmin, zmax]
-    BoundingBoxMode.XYXYZZ: CornerCornerModeTypeC(),  # [xmin, ymin, xmax, ymax, zmin, zmax]
-    BoundingBoxMode.XYWH: CornerSizeMode(),  # [xmin, ymin, xsize, ysize]
-    BoundingBoxMode.XYZWHD: CornerSizeMode(),  # [xmin, ymin, zmin, xsize, ysize, zsize]
-    BoundingBoxMode.CCWH: CenterSizeMode(),  # [xcenter, ycenter, xsize, ysize]
-    BoundingBoxMode.CCCWHD: CenterSizeMode(),  # [xcenter, ycenter, zcenter, xsize, ysize, zsize]
-}
+SUPPORTED_MODES = [CornerCornerModeTypeA, CornerCornerModeTypeB, CornerCornerModeTypeC, CornerSizeMode, CenterSizeMode]
 # The standard box mode we use in all the box util functions
 StandardMode = CornerCornerModeTypeA
 
 
-def get_boxmode(mode: Union[str, BoxMode, None] = None) -> BoxMode:
-    """
-    This function returns BoxMode object from giving mode according to BOXMODE_MAPPING
-    Args:
-        mode: source box mode. If mode is not given, this func will assume mode is StandardMode()
-    Returns:
-        BoxMode object
-
-    Example:
-        mode = "xyzxyz"
-        get_boxmode(mode) will return CornerCornerModeTypeA()
-    """
-    if isinstance(mode, BoxMode):
-        return mode
-    elif isinstance(mode, str):
-        return BOXMODE_MAPPING[mode]
-    elif mode is None:
-        return StandardMode()
-    else:
-        raise ValueError("mode has to be chosen from [str, BoxMode, None].")
-
-
 def convert_to_list(in_sequence: Union[Sequence, torch.Tensor, np.ndarray]) -> list:
     """
     Convert a torch.Tensor, or np array input to list
@@ -89,6 +60,7 @@ def convert_to_list(in_sequence: Union[Sequence, torch.Tensor, np.ndarray]) -> l
 
 def get_dimension(
     boxes: Union[torch.Tensor, np.ndarray, None] = None,
+    corners: Union[Sequence, None] = None,
     spatial_size: Union[Sequence[int], torch.Tensor, np.ndarray, None] = None,
 ) -> int:
     """
@@ -97,9 +69,11 @@ def get_dimension(
     It raises ValueError if the dimensions of multiple inputs do not match with each other.
     Args:
         boxes: bounding box, Nx4 or Nx6 torch tensor or ndarray
-        spatial_size: Length of 2 or 3. Data format is list, or np.ndarray, or tensor of int
+        corners: corners of boxes, 4-element or 6-element tuple, each element is a Nx1 torch tensor or ndarray
+        spatial_size: The spatial size of the image where the boxes are attached.
+                len(spatial_size) should be 2 or 3. Data format is list, or np.ndarray, or tensor of int
     Returns:
-        spatial_dimension: 2 or 3
+        spatial_dims: 2 or 3
 
     Example:
         boxes = torch.ones(10,6)
@@ -109,7 +83,17 @@ def get_dimension(
     spatial_dims_set = set()
     if spatial_size is not None:
         spatial_dims_set.add(len(spatial_size))
+    if corners is not None:
+        if len(corners) not in [4, 6]:
+            raise ValueError(
+                f"Currently we support only boxes with shape [N,4] or [N,6], got box corner tuple with length {len(corners)}."
+            )
+        spatial_dims_set.add(len(corners) // 2)
     if boxes is not None:
+        if int(boxes.shape[1]) not in [4, 6]:
+            raise ValueError(
+                f"Currently we support only boxes with shape [N,4] or [N,6], got boxes with shape {boxes.shape}."
+            )
         spatial_dims_set.add(int(boxes.shape[1] / 2))
     spatial_dims_list = list(spatial_dims_set)
     if len(spatial_dims_list) == 0:
@@ -122,6 +106,55 @@ def get_dimension(
         raise ValueError("The dimension of boxes, spatial_size, mode should match with each other.")
 
 
+def get_boxmode(mode: Union[str, BoxMode, Type[BoxMode], None] = None, *args, **kwargs) -> BoxMode:
+    """
+    This function returns BoxMode object from giving mode according to BOXMODE_MAPPING
+    Args:
+        mode: source box mode. If mode is not given, this func will assume mode is StandardMode()
+    Returns:
+        BoxMode object
+
+    Example:
+        mode = "xyzxyz"
+        get_boxmode(mode) will return CornerCornerModeTypeA()
+    """
+    if isinstance(mode, BoxMode):
+        return mode
+    elif inspect.isclass(mode) and issubclass(mode, BoxMode):
+        return mode(*args, **kwargs)
+    elif isinstance(mode, str):
+        for m in SUPPORTED_MODES:
+            for n in SUPPORTED_SPATIAL_DIMS:
+                if m.get_name(n) == mode:
+                    return m(*args, **kwargs)
+    elif mode is None:
+        return StandardMode(*args, **kwargs)
+    else:
+        raise ValueError(f"Unsupported box mode: {mode}.")
+
+
+def check_corners(corners: Sequence) -> bool:
+    """
+    check the validity for the given box corners
+    Args:
+        corners: corners of a box, 4-element or 6-element tuple, each element is a Nx1 torch tensor
+        (xmin, ymin, xmax, ymax) or (xmin, ymin, zmin, xmax, ymax, zmax)
+    Returns:
+        bool, whether the box is valid
+    Example:
+        corners = (torch.ones(10,1), torch.ones(10,1), torch.ones(10,1), torch.ones(10,1))
+        boxmode.check_corner(corners) will return True
+    """
+    spatial_dims = get_dimension(corners=corners)
+    box_error = corners[spatial_dims] < corners[0]
+    for axis in range(1, spatial_dims):
+        box_error = box_error | (corners[spatial_dims + axis] < corners[axis])
+    if box_error.sum() > 0:
+        return False
+    else:
+        return True
+
+
 def convert_box_mode(
     boxes: NdarrayOrTensor, src_mode: Union[str, BoxMode, None] = None, dst_mode: Union[str, BoxMode, None] = None
 ) -> NdarrayOrTensor:
@@ -149,8 +182,15 @@ def convert_box_mode(
         # convert numpy to tensor if needed
         boxes_t, *_ = convert_data_type(boxes, torch.Tensor)
 
-        corners = src_boxmode.box_to_corner(boxes_t)
-        boxes_t_dst = dst_boxmode.corner_to_box(corners)
+        # convert boxes to corners
+        corners = src_boxmode.boxes_to_corners(boxes_t)
+
+        # check validity of corners
+        if not check_corners(corners):
+            raise ValueError("Given boxes has invalid values. The box size must be non-negative.")
+
+        # convert corners to boxes
+        boxes_t_dst = dst_boxmode.corners_to_boxes(corners)
 
         # convert tensor back to numpy if needed
         boxes_dst, *_ = convert_to_dst_type(src=boxes_t_dst, dst=boxes)

From 6c4361d8b552135e0ec87bb8ee144f302231f444 Mon Sep 17 00:00:00 2001
From: Can Zhao <canz@nvidia.com>
Date: Mon, 16 May 2022 12:20:04 -0400
Subject: [PATCH 24/49] typo

Signed-off-by: Can Zhao <canz@nvidia.com>
---
 monai/data/box_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/monai/data/box_utils.py b/monai/data/box_utils.py
index e71b379604..07c4a2dec5 100644
--- a/monai/data/box_utils.py
+++ b/monai/data/box_utils.py
@@ -143,7 +143,7 @@ def check_corners(corners: Sequence) -> bool:
         bool, whether the box is valid
     Example:
         corners = (torch.ones(10,1), torch.ones(10,1), torch.ones(10,1), torch.ones(10,1))
-        boxmode.check_corner(corners) will return True
+        check_corner(corners) will return True
     """
     spatial_dims = get_dimension(corners=corners)
     box_error = corners[spatial_dims] < corners[0]

From 8a3e713f42e2f511f48f9595247e5f2ce44221c5 Mon Sep 17 00:00:00 2001
From: Can Zhao <canz@nvidia.com>
Date: Mon, 16 May 2022 12:26:28 -0400
Subject: [PATCH 25/49] change to subclass of Enum

Signed-off-by: Can Zhao <canz@nvidia.com>
---
 monai/data/box_mode.py | 2 +-
 monai/utils/enums.py   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/monai/data/box_mode.py b/monai/data/box_mode.py
index 8a952c2ee2..d7ae7fd3b9 100644
--- a/monai/data/box_mode.py
+++ b/monai/data/box_mode.py
@@ -47,7 +47,7 @@ def get_name(cls, spatial_dims: int) -> str:
         Example:
             BoxMode.get_name(spatial_dims = 2)
         """
-        return cls.name[spatial_dims]
+        return cls.name[spatial_dims].value
 
     @abstractmethod
     def boxes_to_corners(self, boxes: torch.Tensor) -> Tuple:
diff --git a/monai/utils/enums.py b/monai/utils/enums.py
index 3153202b71..bae5984b04 100644
--- a/monai/utils/enums.py
+++ b/monai/utils/enums.py
@@ -313,7 +313,7 @@ class JITMetadataKeys(Enum):
     DESCRIPTION = "description"
 
 
-class BoundingBoxMode:
+class BoundingBoxMode(Enum):
     XYXY = "xyxy"  # [xmin, ymin, xmax, ymax]
     XYZXYZ = "xyzxyz"  # [xmin, ymin, zmin, xmax, ymax, zmax]
     XXYY = "xxyy"  # [xmin, xmax, ymin, ymax]

From b052a7facc5a70b092e7546d5eba4effc32a0f09 Mon Sep 17 00:00:00 2001
From: Can Zhao <canz@nvidia.com>
Date: Mon, 16 May 2022 12:33:08 -0400
Subject: [PATCH 26/49] cleanup import

Signed-off-by: Can Zhao <canz@nvidia.com>
---
 monai/data/box_utils.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/monai/data/box_utils.py b/monai/data/box_utils.py
index 07c4a2dec5..0dd60ec29d 100644
--- a/monai/data/box_utils.py
+++ b/monai/data/box_utils.py
@@ -27,7 +27,6 @@
     CornerSizeMode,
 )
 from monai.utils import look_up_option
-from monai.utils.enums import BoundingBoxMode
 from monai.utils.type_conversion import convert_data_type, convert_to_dst_type
 
 # TO_REMOVE = 0.0 if the bottom-right corner pixel/voxel is not included in the box,

From 4bc7b13a39553b6473e35820caed31d4372af9a5 Mon Sep 17 00:00:00 2001
From: Can Zhao <canz@nvidia.com>
Date: Mon, 16 May 2022 15:23:28 -0400
Subject: [PATCH 27/49] add docstring'=

Signed-off-by: Can Zhao <canz@nvidia.com>
---
 monai/data/box_utils.py | 57 ++++++++++++++++++++++++++++++++++++-----
 1 file changed, 50 insertions(+), 7 deletions(-)

diff --git a/monai/data/box_utils.py b/monai/data/box_utils.py
index 0dd60ec29d..a6a40ca4a0 100644
--- a/monai/data/box_utils.py
+++ b/monai/data/box_utils.py
@@ -70,7 +70,7 @@ def get_dimension(
         boxes: bounding box, Nx4 or Nx6 torch tensor or ndarray
         corners: corners of boxes, 4-element or 6-element tuple, each element is a Nx1 torch tensor or ndarray
         spatial_size: The spatial size of the image where the boxes are attached.
-                len(spatial_size) should be 2 or 3. Data format is list, or np.ndarray, or tensor of int
+                len(spatial_size) should be in [2, 3].
     Returns:
         spatial_dims: 2 or 3
 
@@ -109,7 +109,7 @@ def get_boxmode(mode: Union[str, BoxMode, Type[BoxMode], None] = None, *args, **
     """
     This function returns BoxMode object from giving mode according to BOXMODE_MAPPING
     Args:
-        mode: source box mode. If mode is not given, this func will assume mode is StandardMode()
+        mode: source box mode. If mode is not given, this func will assume mode is StandardMode
     Returns:
         BoxMode object
 
@@ -155,14 +155,36 @@ def check_corners(corners: Sequence) -> bool:
 
 
 def convert_box_mode(
-    boxes: NdarrayOrTensor, src_mode: Union[str, BoxMode, None] = None, dst_mode: Union[str, BoxMode, None] = None
+    boxes: NdarrayOrTensor,
+    src_mode: Union[str, BoxMode, Type[BoxMode], None] = None,
+    dst_mode: Union[str, BoxMode, Type[BoxMode], None] = None,
 ) -> NdarrayOrTensor:
     """
     This function converts the boxes in src_mode to the dst_mode
     Args:
         boxes: source bounding box, Nx4 or Nx6 torch tensor or ndarray
-        src_mode: source box mode. If mode is not given, this func will assume mode is StandardMode()
-        dst_mode: target box mode. If mode is not given, this func will assume mode is StandardMode()
+        src_mode: source box mode. If mode is not given, this func will assume mode is StandardMode.
+            It can be:
+            #. str: choose from monai.utils.enums.BoundingBoxMode, including
+                "xyxy": [xmin, ymin, xmax, ymax]
+                "xyzxyz": [xmin, ymin, zmin, xmax, ymax, zmax]
+                "xxyy": [xmin, xmax, ymin, ymax]
+                "xxyyzz": [xmin, xmax, ymin, ymax, zmin, zmax]
+                "xyxyzz": [xmin, ymin, xmax, ymax, zmin, zmax]
+                "xywh": [xmin, ymin, xsize, ysize]
+                "xyzwhd": [xmin, ymin, zmin, xsize, ysize, zsize]
+                "ccwh": [xcenter, ycenter, xsize, ysize]
+                "cccwhd": [xcenter, ycenter, zcenter, xsize, ysize, zsize]
+            #. BoxMode class: choose from
+                CornerCornerModeTypeA: "xyxy", "xyzxyz"
+                CornerCornerModeTypeB: "xxyy", "xxyyzz"
+                CornerCornerModeTypeC: "xyxy", "xyxyzz"
+                CornerSizeMode: "xywh", "xyzwhd"
+                CenterSizeMode: "ccwh", "cccwhd"
+            #. BoxMode instance
+            #. None: will assume mode is StandardMode
+        dst_mode: target box mode. If mode is not given, this func will assume mode is StandardMode.
+            Data type same as src_mode.
     Returns:
         boxes_dst: bounding box with target mode, does not share memory with original boxes
 
@@ -196,12 +218,33 @@ def convert_box_mode(
         return boxes_dst
 
 
-def convert_box_to_standard_mode(boxes: NdarrayOrTensor, mode: Union[str, BoxMode, None] = None) -> NdarrayOrTensor:
+def convert_box_to_standard_mode(
+    boxes: NdarrayOrTensor, mode: Union[str, BoxMode, Type[BoxMode], None] = None
+) -> NdarrayOrTensor:
     """
     Convert given boxes to standard mode
     Args:
         boxes: source bounding box, Nx4 or Nx6 torch tensor or ndarray
-        mode: source box mode. If mode is not given, this func will assume mode is StandardMode()
+        mode: source box mode. If mode is not given, this func will assume mode is StandardMode
+        It can be:
+            #. str: choose from monai.utils.enums.BoundingBoxMode, including
+                "xyxy": [xmin, ymin, xmax, ymax]
+                "xyzxyz": [xmin, ymin, zmin, xmax, ymax, zmax]
+                "xxyy": [xmin, xmax, ymin, ymax]
+                "xxyyzz": [xmin, xmax, ymin, ymax, zmin, zmax]
+                "xyxyzz": [xmin, ymin, xmax, ymax, zmin, zmax]
+                "xywh": [xmin, ymin, xsize, ysize]
+                "xyzwhd": [xmin, ymin, zmin, xsize, ysize, zsize]
+                "ccwh": [xcenter, ycenter, xsize, ysize]
+                "cccwhd": [xcenter, ycenter, zcenter, xsize, ysize, zsize]
+            #. BoxMode class: choose from
+                CornerCornerModeTypeA: "xyxy", "xyzxyz"
+                CornerCornerModeTypeB: "xxyy", "xxyyzz"
+                CornerCornerModeTypeC: "xyxy", "xyxyzz"
+                CornerSizeMode: "xywh", "xyzwhd"
+                CenterSizeMode: "ccwh", "cccwhd"
+            #. BoxMode instance
+            #. None: will assume mode is StandardMode
     Returns:
         boxes_standard: bounding box with standard mode, does not share memory with original boxes
 

From 956db2a4e22d581d336684fe7cd28ea083f43028 Mon Sep 17 00:00:00 2001
From: Can Zhao <canz@nvidia.com>
Date: Mon, 16 May 2022 15:28:01 -0400
Subject: [PATCH 28/49] add docstring

Signed-off-by: Can Zhao <canz@nvidia.com>
---
 monai/data/box_utils.py | 56 ++++++++++++++++++++---------------------
 1 file changed, 28 insertions(+), 28 deletions(-)

diff --git a/monai/data/box_utils.py b/monai/data/box_utils.py
index a6a40ca4a0..30380daf4b 100644
--- a/monai/data/box_utils.py
+++ b/monai/data/box_utils.py
@@ -166,21 +166,21 @@ def convert_box_mode(
         src_mode: source box mode. If mode is not given, this func will assume mode is StandardMode.
             It can be:
             #. str: choose from monai.utils.enums.BoundingBoxMode, including
-                "xyxy": [xmin, ymin, xmax, ymax]
-                "xyzxyz": [xmin, ymin, zmin, xmax, ymax, zmax]
-                "xxyy": [xmin, xmax, ymin, ymax]
-                "xxyyzz": [xmin, xmax, ymin, ymax, zmin, zmax]
-                "xyxyzz": [xmin, ymin, xmax, ymax, zmin, zmax]
-                "xywh": [xmin, ymin, xsize, ysize]
-                "xyzwhd": [xmin, ymin, zmin, xsize, ysize, zsize]
-                "ccwh": [xcenter, ycenter, xsize, ysize]
-                "cccwhd": [xcenter, ycenter, zcenter, xsize, ysize, zsize]
+                "xyxy": boxes has format [xmin, ymin, xmax, ymax]
+                "xyzxyz": boxes has format [xmin, ymin, zmin, xmax, ymax, zmax]
+                "xxyy": boxes has format [xmin, xmax, ymin, ymax]
+                "xxyyzz": boxes has format [xmin, xmax, ymin, ymax, zmin, zmax]
+                "xyxyzz": boxes has format [xmin, ymin, xmax, ymax, zmin, zmax]
+                "xywh": boxes has format [xmin, ymin, xsize, ysize]
+                "xyzwhd": boxes has format [xmin, ymin, zmin, xsize, ysize, zsize]
+                "ccwh": boxes has format [xcenter, ycenter, xsize, ysize]
+                "cccwhd": boxes has format [xcenter, ycenter, zcenter, xsize, ysize, zsize]
             #. BoxMode class: choose from
-                CornerCornerModeTypeA: "xyxy", "xyzxyz"
-                CornerCornerModeTypeB: "xxyy", "xxyyzz"
-                CornerCornerModeTypeC: "xyxy", "xyxyzz"
-                CornerSizeMode: "xywh", "xyzwhd"
-                CenterSizeMode: "ccwh", "cccwhd"
+                CornerCornerModeTypeA: equivalent to "xyxy" or "xyzxyz"
+                CornerCornerModeTypeB: equivalent to "xxyy" or "xxyyzz"
+                CornerCornerModeTypeC: equivalent to "xyxy" or "xyxyzz"
+                CornerSizeMode: equivalent to "xywh" or "xyzwhd"
+                CenterSizeMode: equivalent to "ccwh" or "cccwhd"
             #. BoxMode instance
             #. None: will assume mode is StandardMode
         dst_mode: target box mode. If mode is not given, this func will assume mode is StandardMode.
@@ -228,21 +228,21 @@ def convert_box_to_standard_mode(
         mode: source box mode. If mode is not given, this func will assume mode is StandardMode
         It can be:
             #. str: choose from monai.utils.enums.BoundingBoxMode, including
-                "xyxy": [xmin, ymin, xmax, ymax]
-                "xyzxyz": [xmin, ymin, zmin, xmax, ymax, zmax]
-                "xxyy": [xmin, xmax, ymin, ymax]
-                "xxyyzz": [xmin, xmax, ymin, ymax, zmin, zmax]
-                "xyxyzz": [xmin, ymin, xmax, ymax, zmin, zmax]
-                "xywh": [xmin, ymin, xsize, ysize]
-                "xyzwhd": [xmin, ymin, zmin, xsize, ysize, zsize]
-                "ccwh": [xcenter, ycenter, xsize, ysize]
-                "cccwhd": [xcenter, ycenter, zcenter, xsize, ysize, zsize]
+                "xyxy": boxes has format [xmin, ymin, xmax, ymax]
+                "xyzxyz": boxes has format [xmin, ymin, zmin, xmax, ymax, zmax]
+                "xxyy": boxes has format [xmin, xmax, ymin, ymax]
+                "xxyyzz": boxes has format [xmin, xmax, ymin, ymax, zmin, zmax]
+                "xyxyzz": boxes has format [xmin, ymin, xmax, ymax, zmin, zmax]
+                "xywh": boxes has format [xmin, ymin, xsize, ysize]
+                "xyzwhd": boxes has format [xmin, ymin, zmin, xsize, ysize, zsize]
+                "ccwh": boxes has format [xcenter, ycenter, xsize, ysize]
+                "cccwhd": boxes has format [xcenter, ycenter, zcenter, xsize, ysize, zsize]
             #. BoxMode class: choose from
-                CornerCornerModeTypeA: "xyxy", "xyzxyz"
-                CornerCornerModeTypeB: "xxyy", "xxyyzz"
-                CornerCornerModeTypeC: "xyxy", "xyxyzz"
-                CornerSizeMode: "xywh", "xyzwhd"
-                CenterSizeMode: "ccwh", "cccwhd"
+                CornerCornerModeTypeA: equivalent to "xyxy" or "xyzxyz"
+                CornerCornerModeTypeB: equivalent to "xxyy" or "xxyyzz"
+                CornerCornerModeTypeC: equivalent to "xyxy" or "xyxyzz"
+                CornerSizeMode: equivalent to "xywh" or "xyzwhd"
+                CenterSizeMode: equivalent to "ccwh" or "cccwhd"
             #. BoxMode instance
             #. None: will assume mode is StandardMode
     Returns:

From 696e19716f0d190f82a1a02f47f890c4b2d78fa0 Mon Sep 17 00:00:00 2001
From: Can Zhao <canz@nvidia.com>
Date: Mon, 16 May 2022 15:30:20 -0400
Subject: [PATCH 29/49] typo

Signed-off-by: Can Zhao <canz@nvidia.com>
---
 monai/data/box_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/monai/data/box_utils.py b/monai/data/box_utils.py
index 30380daf4b..6aeb6913ca 100644
--- a/monai/data/box_utils.py
+++ b/monai/data/box_utils.py
@@ -36,7 +36,7 @@
 # Currently, only `TO_REMOVE = 0.0` is supported
 TO_REMOVE = box_mode.TO_REMOVE
 
-# We support 2_d or 3-D bounding boxes
+# We support 2-D or 3-D bounding boxes
 SUPPORTED_SPATIAL_DIMS = [2, 3]
 
 # We support the conversion between several box modes, i.e., representation of a bounding box

From 0425cb9f1fcf37f0eda847a389c80bf84ec34a7e Mon Sep 17 00:00:00 2001
From: Can Zhao <canz@nvidia.com>
Date: Mon, 16 May 2022 15:33:33 -0400
Subject: [PATCH 30/49] update security check

Signed-off-by: Can Zhao <canz@nvidia.com>
---
 monai/data/box_utils.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/monai/data/box_utils.py b/monai/data/box_utils.py
index 6aeb6913ca..5fed7caebe 100644
--- a/monai/data/box_utils.py
+++ b/monai/data/box_utils.py
@@ -81,6 +81,10 @@ def get_dimension(
     """
     spatial_dims_set = set()
     if spatial_size is not None:
+        if len(spatial_size) not in SUPPORTED_SPATIAL_DIMS:
+            raise ValueError(
+                f"Currently we support only boxes on 2-D and 3-D images, got image spatial_size {spatial_size}."
+            )
         spatial_dims_set.add(len(spatial_size))
     if corners is not None:
         if len(corners) not in [4, 6]:
@@ -96,13 +100,13 @@ def get_dimension(
         spatial_dims_set.add(int(boxes.shape[1] / 2))
     spatial_dims_list = list(spatial_dims_set)
     if len(spatial_dims_list) == 0:
-        raise ValueError("At least one of boxes, spatial_size, and mode needs to be non-empty.")
+        raise ValueError("At least one of the inputs needs to be non-empty.")
     elif len(spatial_dims_list) == 1:
         spatial_dims = int(spatial_dims_list[0])
         spatial_dims = look_up_option(spatial_dims, supported=[2, 3])
         return int(spatial_dims)
     else:
-        raise ValueError("The dimension of boxes, spatial_size, mode should match with each other.")
+        raise ValueError("The dimensions of multiple inputs should match with each other.")
 
 
 def get_boxmode(mode: Union[str, BoxMode, Type[BoxMode], None] = None, *args, **kwargs) -> BoxMode:

From d169e15d249e3f3d70af37e4e53486e73382a5e7 Mon Sep 17 00:00:00 2001
From: Can Zhao <canz@nvidia.com>
Date: Mon, 16 May 2022 15:37:03 -0400
Subject: [PATCH 31/49] update docstring

Signed-off-by: Can Zhao <canz@nvidia.com>
---
 monai/data/box_utils.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/monai/data/box_utils.py b/monai/data/box_utils.py
index 5fed7caebe..dd692cd0a5 100644
--- a/monai/data/box_utils.py
+++ b/monai/data/box_utils.py
@@ -168,7 +168,8 @@ def convert_box_mode(
     Args:
         boxes: source bounding box, Nx4 or Nx6 torch tensor or ndarray
         src_mode: source box mode. If mode is not given, this func will assume mode is StandardMode.
-            It can be:
+        dst_mode: target box mode. If mode is not given, this func will assume mode is StandardMode.
+        src_mode and dst_mode can be:
             #. str: choose from monai.utils.enums.BoundingBoxMode, including
                 "xyxy": boxes has format [xmin, ymin, xmax, ymax]
                 "xyzxyz": boxes has format [xmin, ymin, zmin, xmax, ymax, zmax]
@@ -187,14 +188,12 @@ def convert_box_mode(
                 CenterSizeMode: equivalent to "ccwh" or "cccwhd"
             #. BoxMode instance
             #. None: will assume mode is StandardMode
-        dst_mode: target box mode. If mode is not given, this func will assume mode is StandardMode.
-            Data type same as src_mode.
     Returns:
         boxes_dst: bounding box with target mode, does not share memory with original boxes
 
     Example:
         boxes = torch.ones(10,6)
-        box_convert_mode(boxes=boxes, src_mode="xyzxyz", dst_mode="cccwhd")
+        box_convert_mode(boxes=boxes, src_mode="xyzxyz", dst_mode=CenterSizeMode)
     """
 
     # if mode not changed, return original box

From 9ac7673ed1d31c7da39f17add9996407998daf41 Mon Sep 17 00:00:00 2001
From: Can Zhao <canz@nvidia.com>
Date: Mon, 16 May 2022 15:55:07 -0400
Subject: [PATCH 32/49] update get_dimension, prepare for more box util funcs

Signed-off-by: Can Zhao <canz@nvidia.com>
---
 monai/data/box_utils.py | 27 +++++++++++++++++++--------
 1 file changed, 19 insertions(+), 8 deletions(-)

diff --git a/monai/data/box_utils.py b/monai/data/box_utils.py
index dd692cd0a5..a4bc9faec1 100644
--- a/monai/data/box_utils.py
+++ b/monai/data/box_utils.py
@@ -59,6 +59,7 @@ def convert_to_list(in_sequence: Union[Sequence, torch.Tensor, np.ndarray]) -> l
 
 def get_dimension(
     boxes: Union[torch.Tensor, np.ndarray, None] = None,
+    points: Union[torch.Tensor, np.ndarray, None] = None,
     corners: Union[Sequence, None] = None,
     spatial_size: Union[Sequence[int], torch.Tensor, np.ndarray, None] = None,
 ) -> int:
@@ -68,6 +69,7 @@ def get_dimension(
     It raises ValueError if the dimensions of multiple inputs do not match with each other.
     Args:
         boxes: bounding box, Nx4 or Nx6 torch tensor or ndarray
+        points: points, Nx2 or Nx3 torch tensor or ndarray
         corners: corners of boxes, 4-element or 6-element tuple, each element is a Nx1 torch tensor or ndarray
         spatial_size: The spatial size of the image where the boxes are attached.
                 len(spatial_size) should be in [2, 3].
@@ -77,27 +79,36 @@ def get_dimension(
     Example:
         boxes = torch.ones(10,6)
         get_dimension(boxes, spatial_size=[100,200,200]) will return 3
+        get_dimension(boxes, spatial_size=[100,200]) will raise ValueError
         get_dimension(boxes) will return 3
     """
     spatial_dims_set = set()
-    if spatial_size is not None:
-        if len(spatial_size) not in SUPPORTED_SPATIAL_DIMS:
+
+    if boxes is not None:
+        if int(boxes.shape[1]) not in [4, 6]:
             raise ValueError(
-                f"Currently we support only boxes on 2-D and 3-D images, got image spatial_size {spatial_size}."
+                f"Currently we support only boxes with shape [N,4] or [N,6], got boxes with shape {boxes.shape}."
             )
-        spatial_dims_set.add(len(spatial_size))
+        spatial_dims_set.add(int(boxes.shape[1] / 2))
+    if points is not None:
+        if int(points.shape[1]) not in SUPPORTED_SPATIAL_DIMS:
+            raise ValueError(
+                f"Currently we support only points with shape [N,2] or [N,3], got boxes with shape {points.shape}."
+            )
+        spatial_dims_set.add(int(points.shape[1]))
     if corners is not None:
         if len(corners) not in [4, 6]:
             raise ValueError(
                 f"Currently we support only boxes with shape [N,4] or [N,6], got box corner tuple with length {len(corners)}."
             )
         spatial_dims_set.add(len(corners) // 2)
-    if boxes is not None:
-        if int(boxes.shape[1]) not in [4, 6]:
+    if spatial_size is not None:
+        if len(spatial_size) not in SUPPORTED_SPATIAL_DIMS:
             raise ValueError(
-                f"Currently we support only boxes with shape [N,4] or [N,6], got boxes with shape {boxes.shape}."
+                f"Currently we support only boxes on 2-D and 3-D images, got image spatial_size {spatial_size}."
             )
-        spatial_dims_set.add(int(boxes.shape[1] / 2))
+        spatial_dims_set.add(len(spatial_size))
+
     spatial_dims_list = list(spatial_dims_set)
     if len(spatial_dims_list) == 0:
         raise ValueError("At least one of the inputs needs to be non-empty.")

From d7203ee4fa803e347718d360e24d0c62b64cda5a Mon Sep 17 00:00:00 2001
From: Can Zhao <canz@nvidia.com>
Date: Mon, 16 May 2022 21:32:25 -0400
Subject: [PATCH 33/49] add docstring

Signed-off-by: Can Zhao <canz@nvidia.com>
---
 monai/data/box_mode.py  |  5 ++++-
 monai/data/box_utils.py | 19 +++++++++++++++++++
 2 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/monai/data/box_mode.py b/monai/data/box_mode.py
index d7ae7fd3b9..e90bd62689 100644
--- a/monai/data/box_mode.py
+++ b/monai/data/box_mode.py
@@ -34,7 +34,10 @@ class BoxMode:
         #. remember to define ``name`` which is a dictionary that maps spatial_dims to box mode string
     """
 
-    name: Dict[int, str] = {}
+    name: Dict[int, BoundingBoxMode] = {}
+
+    def __init__(self):
+        pass
 
     @classmethod
     def get_name(cls, spatial_dims: int) -> str:
diff --git a/monai/data/box_utils.py b/monai/data/box_utils.py
index a4bc9faec1..8064d2c2fa 100644
--- a/monai/data/box_utils.py
+++ b/monai/data/box_utils.py
@@ -125,6 +125,25 @@ def get_boxmode(mode: Union[str, BoxMode, Type[BoxMode], None] = None, *args, **
     This function returns BoxMode object from giving mode according to BOXMODE_MAPPING
     Args:
         mode: source box mode. If mode is not given, this func will assume mode is StandardMode
+        It can be:
+            #. str: choose from monai.utils.enums.BoundingBoxMode, including
+                "xyxy": boxes has format [xmin, ymin, xmax, ymax]
+                "xyzxyz": boxes has format [xmin, ymin, zmin, xmax, ymax, zmax]
+                "xxyy": boxes has format [xmin, xmax, ymin, ymax]
+                "xxyyzz": boxes has format [xmin, xmax, ymin, ymax, zmin, zmax]
+                "xyxyzz": boxes has format [xmin, ymin, xmax, ymax, zmin, zmax]
+                "xywh": boxes has format [xmin, ymin, xsize, ysize]
+                "xyzwhd": boxes has format [xmin, ymin, zmin, xsize, ysize, zsize]
+                "ccwh": boxes has format [xcenter, ycenter, xsize, ysize]
+                "cccwhd": boxes has format [xcenter, ycenter, zcenter, xsize, ysize, zsize]
+            #. BoxMode class: choose from
+                CornerCornerModeTypeA: equivalent to "xyxy" or "xyzxyz"
+                CornerCornerModeTypeB: equivalent to "xxyy" or "xxyyzz"
+                CornerCornerModeTypeC: equivalent to "xyxy" or "xyxyzz"
+                CornerSizeMode: equivalent to "xywh" or "xyzwhd"
+                CenterSizeMode: equivalent to "ccwh" or "cccwhd"
+            #. BoxMode instance
+            #. None: will assume mode is StandardMode
     Returns:
         BoxMode object
 

From 3c73f3c34cdbbbc4292a96feba2ecabea71b33ef Mon Sep 17 00:00:00 2001
From: Can Zhao <canz@nvidia.com>
Date: Mon, 16 May 2022 22:39:26 -0400
Subject: [PATCH 34/49] reformat

Signed-off-by: Can Zhao <canz@nvidia.com>
---
 monai/data/box_mode.py  | 45 +++++++++++++++++++++++------------------
 monai/data/box_utils.py | 11 ++++++----
 tests/test_box_utils.py | 11 +++++-----
 3 files changed, 38 insertions(+), 29 deletions(-)

diff --git a/monai/data/box_mode.py b/monai/data/box_mode.py
index e90bd62689..51b20d1c2d 100644
--- a/monai/data/box_mode.py
+++ b/monai/data/box_mode.py
@@ -9,7 +9,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from abc import abstractmethod
+from abc import ABC, abstractmethod
 from typing import Dict, Sequence, Tuple
 
 import torch
@@ -25,7 +25,7 @@
 TO_REMOVE = 0.0  # xmax-xmin = w -TO_REMOVE.
 
 
-class BoxMode:
+class BoxMode(ABC):
     """
     An abstract class of a ``BoxMode``.
     A BoxMode is callable that converts box mode of boxes.
@@ -34,11 +34,11 @@ class BoxMode:
         #. remember to define ``name`` which is a dictionary that maps spatial_dims to box mode string
     """
 
-    name: Dict[int, BoundingBoxMode] = {}
-
     def __init__(self):
         pass
 
+    name: Dict[int, BoundingBoxMode] = {}
+
     @classmethod
     def get_name(cls, spatial_dims: int) -> str:
         """
@@ -63,7 +63,7 @@ def boxes_to_corners(self, boxes: torch.Tensor) -> Tuple:
         Example:
             boxmode = BoxMode()
             boxes = torch.ones(10,6)
-            boxmode.boxes_to_corners(boxes) will a 6-element tuple, each element is a 10x1 tensor
+            boxmode.boxes_to_corners(boxes) will return a 6-element tuple, each element is a 10x1 tensor
         """
         raise NotImplementedError(f"Subclass {self.__class__.__name__} must implement this method.")
 
@@ -96,7 +96,7 @@ def boxes_to_corners(self, boxes: torch.Tensor) -> Tuple:
         spatial_dims = monai.data.box_utils.get_dimension(boxes=boxes)
         if spatial_dims == 3:
             xmin, ymin, zmin, xmax, ymax, zmax = boxes.split(1, dim=-1)
-            corners = xmin, ymin, zmin, xmax, ymax, zmax
+            return xmin, ymin, zmin, xmax, ymax, zmax
         elif spatial_dims == 2:
             xmin, ymin, xmax, ymax = boxes.split(1, dim=-1)
             corners = xmin, ymin, xmax, ymax
@@ -105,9 +105,10 @@ def boxes_to_corners(self, boxes: torch.Tensor) -> Tuple:
     def corners_to_boxes(self, corners: Sequence) -> torch.Tensor:
         spatial_dims = monai.data.box_utils.get_dimension(corners=corners)
         if spatial_dims == 3:
-            return torch.cat((corners[0], corners[1], corners[2], corners[3], corners[4], corners[5]), dim=-1)
+            boxes = torch.cat((corners[0], corners[1], corners[2], corners[3], corners[4], corners[5]), dim=-1)
         elif spatial_dims == 2:
-            return torch.cat((corners[0], corners[1], corners[2], corners[3]), dim=-1)
+            boxes = torch.cat((corners[0], corners[1], corners[2], corners[3]), dim=-1)
+        return boxes
 
 
 class CornerCornerModeTypeB(BoxMode):
@@ -122,7 +123,7 @@ def boxes_to_corners(self, boxes: torch.Tensor) -> Tuple:
         spatial_dims = monai.data.box_utils.get_dimension(boxes=boxes)
         if spatial_dims == 3:
             xmin, xmax, ymin, ymax, zmin, zmax = boxes.split(1, dim=-1)
-            corners = xmin, ymin, zmin, xmax, ymax, zmax
+            return xmin, ymin, zmin, xmax, ymax, zmax
         elif spatial_dims == 2:
             xmin, xmax, ymin, ymax = boxes.split(1, dim=-1)
             corners = xmin, ymin, xmax, ymax
@@ -131,9 +132,10 @@ def boxes_to_corners(self, boxes: torch.Tensor) -> Tuple:
     def corners_to_boxes(self, corners: Sequence) -> torch.Tensor:
         spatial_dims = monai.data.box_utils.get_dimension(corners=corners)
         if spatial_dims == 3:
-            return torch.cat((corners[0], corners[3], corners[1], corners[4], corners[2], corners[5]), dim=-1)
+            boxes = torch.cat((corners[0], corners[3], corners[1], corners[4], corners[2], corners[5]), dim=-1)
         elif spatial_dims == 2:
-            return torch.cat((corners[0], corners[2], corners[1], corners[3]), dim=-1)
+            boxes = torch.cat((corners[0], corners[2], corners[1], corners[3]), dim=-1)
+        return boxes
 
 
 class CornerCornerModeTypeC(BoxMode):
@@ -148,7 +150,7 @@ def boxes_to_corners(self, boxes: torch.Tensor) -> Tuple:
         spatial_dims = monai.data.box_utils.get_dimension(boxes=boxes)
         if spatial_dims == 3:
             xmin, ymin, xmax, ymax, zmin, zmax = boxes.split(1, dim=-1)
-            corners = xmin, ymin, zmin, xmax, ymax, zmax
+            return xmin, ymin, zmin, xmax, ymax, zmax
         elif spatial_dims == 2:
             xmin, ymin, xmax, ymax = boxes.split(1, dim=-1)
             corners = xmin, ymin, xmax, ymax
@@ -157,9 +159,10 @@ def boxes_to_corners(self, boxes: torch.Tensor) -> Tuple:
     def corners_to_boxes(self, corners: Sequence) -> torch.Tensor:
         spatial_dims = monai.data.box_utils.get_dimension(corners=corners)
         if spatial_dims == 3:
-            return torch.cat((corners[0], corners[1], corners[3], corners[4], corners[2], corners[5]), dim=-1)
+            boxes = torch.cat((corners[0], corners[1], corners[3], corners[4], corners[2], corners[5]), dim=-1)
         elif spatial_dims == 2:
-            return torch.cat((corners[0], corners[1], corners[2], corners[3]), dim=-1)
+            boxes = torch.cat((corners[0], corners[1], corners[2], corners[3]), dim=-1)
+        return boxes
 
 
 class CornerSizeMode(BoxMode):
@@ -181,7 +184,7 @@ def boxes_to_corners(self, boxes: torch.Tensor) -> Tuple:
             xmax = xmin + (w - TO_REMOVE).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype)
             ymax = ymin + (h - TO_REMOVE).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype)
             zmax = zmin + (d - TO_REMOVE).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype)
-            corners = xmin, ymin, zmin, xmax, ymax, zmax
+            return xmin, ymin, zmin, xmax, ymax, zmax
         elif spatial_dims == 2:
             xmin, ymin, w, h = boxes.split(1, dim=-1)
             xmax = xmin + (w - TO_REMOVE).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype)
@@ -193,12 +196,13 @@ def corners_to_boxes(self, corners: Sequence) -> torch.Tensor:
         spatial_dims = monai.data.box_utils.get_dimension(corners=corners)
         if spatial_dims == 3:
             xmin, ymin, zmin, xmax, ymax, zmax = corners[0], corners[1], corners[2], corners[3], corners[4], corners[5]
-            return torch.cat(
+            boxes = torch.cat(
                 (xmin, ymin, zmin, xmax - xmin + TO_REMOVE, ymax - ymin + TO_REMOVE, zmax - zmin + TO_REMOVE), dim=-1
             )
         elif spatial_dims == 2:
             xmin, ymin, xmax, ymax = corners[0], corners[1], corners[2], corners[3]
-            return torch.cat((xmin, ymin, xmax - xmin + TO_REMOVE, ymax - ymin + TO_REMOVE), dim=-1)
+            boxes = torch.cat((xmin, ymin, xmax - xmin + TO_REMOVE, ymax - ymin + TO_REMOVE), dim=-1)
+        return boxes
 
 
 class CenterSizeMode(BoxMode):
@@ -223,7 +227,7 @@ def boxes_to_corners(self, boxes: torch.Tensor) -> Tuple:
             ymax = yc + ((h - TO_REMOVE) / 2.0).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype)
             zmin = zc - ((d - TO_REMOVE) / 2.0).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype)
             zmax = zc + ((d - TO_REMOVE) / 2.0).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype)
-            corners = xmin, ymin, zmin, xmax, ymax, zmax
+            return xmin, ymin, zmin, xmax, ymax, zmax
         elif spatial_dims == 2:
             xc, yc, w, h = boxes.split(1, dim=-1)
             xmin = xc - ((w - TO_REMOVE) / 2.0).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype)
@@ -237,7 +241,7 @@ def corners_to_boxes(self, corners: Sequence) -> torch.Tensor:
         spatial_dims = monai.data.box_utils.get_dimension(corners=corners)
         if spatial_dims == 3:
             xmin, ymin, zmin, xmax, ymax, zmax = corners[0], corners[1], corners[2], corners[3], corners[4], corners[5]
-            return torch.cat(
+            boxes = torch.cat(
                 (
                     (xmin + xmax + TO_REMOVE) / 2.0,
                     (ymin + ymax + TO_REMOVE) / 2.0,
@@ -250,7 +254,7 @@ def corners_to_boxes(self, corners: Sequence) -> torch.Tensor:
             )
         elif spatial_dims == 2:
             xmin, ymin, xmax, ymax = corners[0], corners[1], corners[2], corners[3]
-            return torch.cat(
+            boxes = torch.cat(
                 (
                     (xmin + xmax + TO_REMOVE) / 2.0,
                     (ymin + ymax + TO_REMOVE) / 2.0,
@@ -259,3 +263,4 @@ def corners_to_boxes(self, corners: Sequence) -> torch.Tensor:
                 ),
                 dim=-1,
             )
+        return boxes
diff --git a/monai/data/box_utils.py b/monai/data/box_utils.py
index 8064d2c2fa..fc9ddb0ac9 100644
--- a/monai/data/box_utils.py
+++ b/monai/data/box_utils.py
@@ -153,17 +153,20 @@ def get_boxmode(mode: Union[str, BoxMode, Type[BoxMode], None] = None, *args, **
     """
     if isinstance(mode, BoxMode):
         return mode
-    elif inspect.isclass(mode) and issubclass(mode, BoxMode):
-        return mode(*args, **kwargs)
+
+    boxmode: Type[BoxMode]
+    if inspect.isclass(mode) and issubclass(mode, BoxMode):
+        boxmode = mode
     elif isinstance(mode, str):
         for m in SUPPORTED_MODES:
             for n in SUPPORTED_SPATIAL_DIMS:
                 if m.get_name(n) == mode:
-                    return m(*args, **kwargs)
+                    boxmode = m
     elif mode is None:
-        return StandardMode(*args, **kwargs)
+        boxmode = StandardMode
     else:
         raise ValueError(f"Unsupported box mode: {mode}.")
+    return boxmode(*args, **kwargs)
 
 
 def check_corners(corners: Sequence) -> bool:
diff --git a/tests/test_box_utils.py b/tests/test_box_utils.py
index fb776b2dc6..897f178fa2 100644
--- a/tests/test_box_utils.py
+++ b/tests/test_box_utils.py
@@ -17,6 +17,7 @@
 # import torch
 from parameterized import parameterized
 
+from monai.data.box_mode import CornerCornerModeTypeA, CornerCornerModeTypeB, CornerSizeMode
 from monai.data.box_utils import convert_box_mode, convert_box_to_standard_mode
 from monai.utils.type_conversion import convert_data_type
 from tests.utils import TEST_NDARRAYS, assert_allclose
@@ -33,7 +34,7 @@
     TESTS.append(
         [
             {"boxes": p(boxes), "spatial_size": spatial_size, "mode": "xyzwhd", "half": False},
-            "xyzwhd",
+            CornerSizeMode,
             p([[0, 0, 0, 0, 0, 0], [0, 1, 0, 2, 2, 3], [0, 1, 1, 2, 2, 3]]),
             p([0, 12, 12]),
         ]
@@ -56,7 +57,7 @@
     )
     TESTS.append(
         [
-            {"boxes": p(boxes), "spatial_size": spatial_size, "mode": "xyzxyz", "half": False},
+            {"boxes": p(boxes), "spatial_size": spatial_size, "mode": CornerCornerModeTypeA(), "half": False},
             "xyzwhd",
             p([[0, 0, 0, 0, 0, 0], [0, 1, 0, 2, 1, 3], [0, 1, 1, 2, 1, 2]]),
             p([0, 6, 4]),
@@ -64,8 +65,8 @@
     )
     TESTS.append(
         [
-            {"boxes": p(boxes), "spatial_size": spatial_size, "mode": "xyzxyz", "half": True},
-            "xyzxyz",
+            {"boxes": p(boxes), "spatial_size": spatial_size, "mode": CornerCornerModeTypeA, "half": True},
+            CornerCornerModeTypeA,
             p([[0, 0, 0, 0, 0, 0], [0, 1, 0, 2, 2, 3], [0, 1, 1, 2, 2, 3]]),
             p([0, 6, 4]),
         ]
@@ -73,7 +74,7 @@
     TESTS.append(
         [
             {"boxes": p(boxes), "spatial_size": spatial_size, "mode": "xyzxyz", "half": False},
-            "xxyyzz",
+            CornerCornerModeTypeB(),
             p([[0, 0, 0, 0, 0, 0], [0, 2, 1, 2, 0, 3], [0, 2, 1, 2, 1, 3]]),
             p([0, 6, 4]),
         ]

From 51c51660fb25f629fd77aed9519910ffd6932ce5 Mon Sep 17 00:00:00 2001
From: Can Zhao <canz@nvidia.com>
Date: Tue, 17 May 2022 14:01:34 -0400
Subject: [PATCH 35/49] update code and docstring

Signed-off-by: Can Zhao <canz@nvidia.com>
---
 monai/data/__init__.py         |   1 +
 monai/data/box_mode.py         |  94 ++++++++++----------------
 monai/data/box_utils.py        | 116 +++++++++++++++------------------
 monai/utils/enums.py           |   2 +-
 monai/utils/type_conversion.py |  12 ++++
 tests/test_box_utils.py        |  37 +++++++++--
 6 files changed, 134 insertions(+), 128 deletions(-)

diff --git a/monai/data/__init__.py b/monai/data/__init__.py
index 63aa29df65..60574cb565 100644
--- a/monai/data/__init__.py
+++ b/monai/data/__init__.py
@@ -9,6 +9,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from .box_utils import convert_box_mode, convert_box_to_standard_mode
 from .csv_saver import CSVSaver
 from .dataloader import DataLoader
 from .dataset import (
diff --git a/monai/data/box_mode.py b/monai/data/box_mode.py
index 51b20d1c2d..43cca85b74 100644
--- a/monai/data/box_mode.py
+++ b/monai/data/box_mode.py
@@ -15,7 +15,7 @@
 import torch
 
 import monai
-from monai.utils.enums import BoundingBoxMode
+from monai.utils.enums import BoxModeName
 
 # TO_REMOVE = 0.0 if the bottom-right corner pixel/voxel is not included in the box,
 #      i.e., when xmin=1., xmax=2., we have w = 1.
@@ -31,20 +31,17 @@ class BoxMode(ABC):
     A BoxMode is callable that converts box mode of boxes.
     It always creates a copy and will not modify boxes in place,
     the implementation should be aware of:
-        #. remember to define ``name`` which is a dictionary that maps spatial_dims to box mode string
+        #. remember to define ``name`` which is a dictionary that maps ``spatial_dims`` to the box mode name.
     """
 
-    def __init__(self):
-        pass
-
-    name: Dict[int, BoundingBoxMode] = {}
+    name: Dict[int, BoxModeName] = {}
 
     @classmethod
     def get_name(cls, spatial_dims: int) -> str:
         """
         Get the mode name for the given spatial dimension
         Args:
-            spatial_dims: 2 or 3
+            spatial_dims: number of spatial dimensions of the bounding box.
         Returns:
             mode string name
         Example:
@@ -55,11 +52,11 @@ def get_name(cls, spatial_dims: int) -> str:
     @abstractmethod
     def boxes_to_corners(self, boxes: torch.Tensor) -> Tuple:
         """
-        Return the box corners for the given boxes
+        Convert the bounding boxes of the current mode to corners.
         Args:
             boxes: bounding box, Nx4 or Nx6 torch tensor
         Returns:
-            corners of a box, 4-element or 6-element tuple, each element is a Nx1 torch tensor
+            corners of boxes, 4-element or 6-element tuple, each element is a Nx1 torch tensor
         Example:
             boxmode = BoxMode()
             boxes = torch.ones(10,6)
@@ -70,9 +67,9 @@ def boxes_to_corners(self, boxes: torch.Tensor) -> Tuple:
     @abstractmethod
     def corners_to_boxes(self, corners: Sequence) -> torch.Tensor:
         """
-        Return the boxes converted from the given box corners
+        Convert the given box corners to the bounding boxes of the current mode.
         Args:
-            corners: corners of a box, 4-element or 6-element tuple, each element is a Nx1 torch tensor
+            corners: corners of boxes, 4-element or 6-element tuple, each element is a Nx1 torch tensor
             (xmin, ymin, xmax, ymax) or (xmin, ymin, zmin, xmax, ymax, zmax)
         Returns:
             boxes: bounding box, Nx4 or Nx6 torch tensor
@@ -90,25 +87,13 @@ class CornerCornerModeTypeA(BoxMode):
     [xmin, ymin, xmax, ymax] or [xmin, ymin, zmin, xmax, ymax, zmax]
     """
 
-    name = {2: BoundingBoxMode.XYXY, 3: BoundingBoxMode.XYZXYZ}
+    name = {2: BoxModeName.XYXY, 3: BoxModeName.XYZXYZ}
 
     def boxes_to_corners(self, boxes: torch.Tensor) -> Tuple:
-        spatial_dims = monai.data.box_utils.get_dimension(boxes=boxes)
-        if spatial_dims == 3:
-            xmin, ymin, zmin, xmax, ymax, zmax = boxes.split(1, dim=-1)
-            return xmin, ymin, zmin, xmax, ymax, zmax
-        elif spatial_dims == 2:
-            xmin, ymin, xmax, ymax = boxes.split(1, dim=-1)
-            corners = xmin, ymin, xmax, ymax
-        return corners
+        return boxes.split(1, dim=-1)
 
     def corners_to_boxes(self, corners: Sequence) -> torch.Tensor:
-        spatial_dims = monai.data.box_utils.get_dimension(corners=corners)
-        if spatial_dims == 3:
-            boxes = torch.cat((corners[0], corners[1], corners[2], corners[3], corners[4], corners[5]), dim=-1)
-        elif spatial_dims == 2:
-            boxes = torch.cat((corners[0], corners[1], corners[2], corners[3]), dim=-1)
-        return boxes
+        return torch.cat(corners, dim=-1)
 
 
 class CornerCornerModeTypeB(BoxMode):
@@ -117,25 +102,23 @@ class CornerCornerModeTypeB(BoxMode):
     [xmin, xmax, ymin, ymax] or [xmin, xmax, ymin, ymax, zmin, zmax]
     """
 
-    name = {2: BoundingBoxMode.XXYY, 3: BoundingBoxMode.XXYYZZ}
+    name = {2: BoxModeName.XXYY, 3: BoxModeName.XXYYZZ}
 
     def boxes_to_corners(self, boxes: torch.Tensor) -> Tuple:
-        spatial_dims = monai.data.box_utils.get_dimension(boxes=boxes)
+        spatial_dims = monai.data.box_utils.get_spatial_dims(boxes=boxes)
         if spatial_dims == 3:
             xmin, xmax, ymin, ymax, zmin, zmax = boxes.split(1, dim=-1)
             return xmin, ymin, zmin, xmax, ymax, zmax
         elif spatial_dims == 2:
             xmin, xmax, ymin, ymax = boxes.split(1, dim=-1)
-            corners = xmin, ymin, xmax, ymax
-        return corners
+            return xmin, ymin, xmax, ymax
 
     def corners_to_boxes(self, corners: Sequence) -> torch.Tensor:
-        spatial_dims = monai.data.box_utils.get_dimension(corners=corners)
+        spatial_dims = monai.data.box_utils.get_spatial_dims(corners=corners)
         if spatial_dims == 3:
-            boxes = torch.cat((corners[0], corners[3], corners[1], corners[4], corners[2], corners[5]), dim=-1)
+            return torch.cat((corners[0], corners[3], corners[1], corners[4], corners[2], corners[5]), dim=-1)
         elif spatial_dims == 2:
-            boxes = torch.cat((corners[0], corners[2], corners[1], corners[3]), dim=-1)
-        return boxes
+            return torch.cat((corners[0], corners[2], corners[1], corners[3]), dim=-1)
 
 
 class CornerCornerModeTypeC(BoxMode):
@@ -144,25 +127,22 @@ class CornerCornerModeTypeC(BoxMode):
     [xmin, ymin, xmax, ymax] or [xmin, ymin, xmax, ymax, zmin, zmax]
     """
 
-    name = {2: BoundingBoxMode.XYXY, 3: BoundingBoxMode.XYXYZZ}
+    name = {2: BoxModeName.XYXY, 3: BoxModeName.XYXYZZ}
 
     def boxes_to_corners(self, boxes: torch.Tensor) -> Tuple:
-        spatial_dims = monai.data.box_utils.get_dimension(boxes=boxes)
+        spatial_dims = monai.data.box_utils.get_spatial_dims(boxes=boxes)
         if spatial_dims == 3:
             xmin, ymin, xmax, ymax, zmin, zmax = boxes.split(1, dim=-1)
             return xmin, ymin, zmin, xmax, ymax, zmax
         elif spatial_dims == 2:
-            xmin, ymin, xmax, ymax = boxes.split(1, dim=-1)
-            corners = xmin, ymin, xmax, ymax
-        return corners
+            return boxes.split(1, dim=-1)
 
     def corners_to_boxes(self, corners: Sequence) -> torch.Tensor:
-        spatial_dims = monai.data.box_utils.get_dimension(corners=corners)
+        spatial_dims = monai.data.box_utils.get_spatial_dims(corners=corners)
         if spatial_dims == 3:
-            boxes = torch.cat((corners[0], corners[1], corners[3], corners[4], corners[2], corners[5]), dim=-1)
+            return torch.cat((corners[0], corners[1], corners[3], corners[4], corners[2], corners[5]), dim=-1)
         elif spatial_dims == 2:
-            boxes = torch.cat((corners[0], corners[1], corners[2], corners[3]), dim=-1)
-        return boxes
+            return torch.cat(corners, dim=-1)
 
 
 class CornerSizeMode(BoxMode):
@@ -171,14 +151,14 @@ class CornerSizeMode(BoxMode):
     [xmin, ymin, xsize, ysize] or [xmin, ymin, zmin, xsize, ysize, zsize]
     """
 
-    name = {2: BoundingBoxMode.XYWH, 3: BoundingBoxMode.XYZWHD}
+    name = {2: BoxModeName.XYWH, 3: BoxModeName.XYZWHD}
 
     def boxes_to_corners(self, boxes: torch.Tensor) -> Tuple:
         # convert to float32 when computing torch.clamp, which does not support float16
         box_dtype = boxes.dtype
         compute_dtype = torch.float32
 
-        spatial_dims = monai.data.box_utils.get_dimension(boxes=boxes)
+        spatial_dims = monai.data.box_utils.get_spatial_dims(boxes=boxes)
         if spatial_dims == 3:
             xmin, ymin, zmin, w, h, d = boxes.split(1, dim=-1)
             xmax = xmin + (w - TO_REMOVE).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype)
@@ -189,20 +169,18 @@ def boxes_to_corners(self, boxes: torch.Tensor) -> Tuple:
             xmin, ymin, w, h = boxes.split(1, dim=-1)
             xmax = xmin + (w - TO_REMOVE).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype)
             ymax = ymin + (h - TO_REMOVE).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype)
-            corners = xmin, ymin, xmax, ymax
-        return corners
+            return xmin, ymin, xmax, ymax
 
     def corners_to_boxes(self, corners: Sequence) -> torch.Tensor:
-        spatial_dims = monai.data.box_utils.get_dimension(corners=corners)
+        spatial_dims = monai.data.box_utils.get_spatial_dims(corners=corners)
         if spatial_dims == 3:
             xmin, ymin, zmin, xmax, ymax, zmax = corners[0], corners[1], corners[2], corners[3], corners[4], corners[5]
-            boxes = torch.cat(
+            return torch.cat(
                 (xmin, ymin, zmin, xmax - xmin + TO_REMOVE, ymax - ymin + TO_REMOVE, zmax - zmin + TO_REMOVE), dim=-1
             )
         elif spatial_dims == 2:
             xmin, ymin, xmax, ymax = corners[0], corners[1], corners[2], corners[3]
-            boxes = torch.cat((xmin, ymin, xmax - xmin + TO_REMOVE, ymax - ymin + TO_REMOVE), dim=-1)
-        return boxes
+            return torch.cat((xmin, ymin, xmax - xmin + TO_REMOVE, ymax - ymin + TO_REMOVE), dim=-1)
 
 
 class CenterSizeMode(BoxMode):
@@ -211,14 +189,14 @@ class CenterSizeMode(BoxMode):
     [xmin, ymin, xsize, ysize] or [xmin, ymin, zmin, xsize, ysize, zsize]
     """
 
-    name = {2: BoundingBoxMode.CCWH, 3: BoundingBoxMode.CCCWHD}
+    name = {2: BoxModeName.CCWH, 3: BoxModeName.CCCWHD}
 
     def boxes_to_corners(self, boxes: torch.Tensor) -> Tuple:
         # convert to float32 when computing torch.clamp, which does not support float16
         box_dtype = boxes.dtype
         compute_dtype = torch.float32
 
-        spatial_dims = monai.data.box_utils.get_dimension(boxes=boxes)
+        spatial_dims = monai.data.box_utils.get_spatial_dims(boxes=boxes)
         if spatial_dims == 3:
             xc, yc, zc, w, h, d = boxes.split(1, dim=-1)
             xmin = xc - ((w - TO_REMOVE) / 2.0).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype)
@@ -234,14 +212,13 @@ def boxes_to_corners(self, boxes: torch.Tensor) -> Tuple:
             xmax = xc + ((w - TO_REMOVE) / 2.0).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype)
             ymin = yc - ((h - TO_REMOVE) / 2.0).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype)
             ymax = yc + ((h - TO_REMOVE) / 2.0).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype)
-            corners = xmin, ymin, xmax, ymax
-        return corners
+            return xmin, ymin, xmax, ymax
 
     def corners_to_boxes(self, corners: Sequence) -> torch.Tensor:
-        spatial_dims = monai.data.box_utils.get_dimension(corners=corners)
+        spatial_dims = monai.data.box_utils.get_spatial_dims(corners=corners)
         if spatial_dims == 3:
             xmin, ymin, zmin, xmax, ymax, zmax = corners[0], corners[1], corners[2], corners[3], corners[4], corners[5]
-            boxes = torch.cat(
+            return torch.cat(
                 (
                     (xmin + xmax + TO_REMOVE) / 2.0,
                     (ymin + ymax + TO_REMOVE) / 2.0,
@@ -254,7 +231,7 @@ def corners_to_boxes(self, corners: Sequence) -> torch.Tensor:
             )
         elif spatial_dims == 2:
             xmin, ymin, xmax, ymax = corners[0], corners[1], corners[2], corners[3]
-            boxes = torch.cat(
+            return torch.cat(
                 (
                     (xmin + xmax + TO_REMOVE) / 2.0,
                     (ymin + ymax + TO_REMOVE) / 2.0,
@@ -263,4 +240,3 @@ def corners_to_boxes(self, corners: Sequence) -> torch.Tensor:
                 ),
                 dim=-1,
             )
-        return boxes
diff --git a/monai/data/box_utils.py b/monai/data/box_utils.py
index fc9ddb0ac9..4d35cfde5b 100644
--- a/monai/data/box_utils.py
+++ b/monai/data/box_utils.py
@@ -17,7 +17,6 @@
 import torch
 
 from monai.config.type_definitions import NdarrayOrTensor
-from monai.data import box_mode
 from monai.data.box_mode import (
     BoxMode,
     CenterSizeMode,
@@ -29,13 +28,6 @@
 from monai.utils import look_up_option
 from monai.utils.type_conversion import convert_data_type, convert_to_dst_type
 
-# TO_REMOVE = 0.0 if the bottom-right corner pixel/voxel is not included in the box,
-#      i.e., when xmin=1., xmax=2., we have w = 1.
-# TO_REMOVE = 1.0  if the bottom-right corner pixel/voxel is included in the box,
-#       i.e., when xmin=1., xmax=2., we have w = 2.
-# Currently, only `TO_REMOVE = 0.0` is supported
-TO_REMOVE = box_mode.TO_REMOVE
-
 # We support 2-D or 3-D bounding boxes
 SUPPORTED_SPATIAL_DIMS = [2, 3]
 
@@ -45,19 +37,7 @@
 StandardMode = CornerCornerModeTypeA
 
 
-def convert_to_list(in_sequence: Union[Sequence, torch.Tensor, np.ndarray]) -> list:
-    """
-    Convert a torch.Tensor, or np array input to list
-    Args:
-        in_sequence: Sequence or torch.Tensor or np.ndarray
-    Returns:
-        a list
-
-    """
-    return in_sequence.tolist() if isinstance(in_sequence, (torch.Tensor, np.ndarray)) else list(in_sequence)
-
-
-def get_dimension(
+def get_spatial_dims(
     boxes: Union[torch.Tensor, np.ndarray, None] = None,
     points: Union[torch.Tensor, np.ndarray, None] = None,
     corners: Union[Sequence, None] = None,
@@ -69,18 +49,18 @@ def get_dimension(
     It raises ValueError if the dimensions of multiple inputs do not match with each other.
     Args:
         boxes: bounding box, Nx4 or Nx6 torch tensor or ndarray
-        points: points, Nx2 or Nx3 torch tensor or ndarray
+        points: point coordinates, [x, y] or [x, y, z], Nx2 or Nx3 torch tensor or ndarray
         corners: corners of boxes, 4-element or 6-element tuple, each element is a Nx1 torch tensor or ndarray
         spatial_size: The spatial size of the image where the boxes are attached.
                 len(spatial_size) should be in [2, 3].
     Returns:
-        spatial_dims: 2 or 3
+        spatial_dims: number of spatial dimensions of the bounding box.
 
     Example:
         boxes = torch.ones(10,6)
-        get_dimension(boxes, spatial_size=[100,200,200]) will return 3
-        get_dimension(boxes, spatial_size=[100,200]) will raise ValueError
-        get_dimension(boxes) will return 3
+        get_spatial_dims(boxes, spatial_size=[100,200,200]) will return 3
+        get_spatial_dims(boxes, spatial_size=[100,200]) will raise ValueError
+        get_spatial_dims(boxes) will return 3
     """
     spatial_dims_set = set()
 
@@ -122,11 +102,11 @@ def get_dimension(
 
 def get_boxmode(mode: Union[str, BoxMode, Type[BoxMode], None] = None, *args, **kwargs) -> BoxMode:
     """
-    This function returns BoxMode object from giving mode according to BOXMODE_MAPPING
+    This function returns BoxMode object giving a representation of box mode
     Args:
-        mode: source box mode. If mode is not given, this func will assume mode is StandardMode
+        mode: a representation of box mode. If mode is not given, this func will assume mode is StandardMode
         It can be:
-            #. str: choose from monai.utils.enums.BoundingBoxMode, including
+            #. str: choose from monai.utils.enums.BoxModeName, for example,
                 "xyxy": boxes has format [xmin, ymin, xmax, ymax]
                 "xyzxyz": boxes has format [xmin, ymin, zmin, xmax, ymax, zmax]
                 "xxyy": boxes has format [xmin, xmax, ymin, ymax]
@@ -136,13 +116,18 @@ def get_boxmode(mode: Union[str, BoxMode, Type[BoxMode], None] = None, *args, **
                 "xyzwhd": boxes has format [xmin, ymin, zmin, xsize, ysize, zsize]
                 "ccwh": boxes has format [xcenter, ycenter, xsize, ysize]
                 "cccwhd": boxes has format [xcenter, ycenter, zcenter, xsize, ysize, zsize]
-            #. BoxMode class: choose from
+            #. BoxMode class: for example,
                 CornerCornerModeTypeA: equivalent to "xyxy" or "xyzxyz"
                 CornerCornerModeTypeB: equivalent to "xxyy" or "xxyyzz"
                 CornerCornerModeTypeC: equivalent to "xyxy" or "xyxyzz"
                 CornerSizeMode: equivalent to "xywh" or "xyzwhd"
                 CenterSizeMode: equivalent to "ccwh" or "cccwhd"
-            #. BoxMode instance
+            #. BoxMode instance: for example,
+                CornerCornerModeTypeA(): equivalent to "xyxy" or "xyzxyz"
+                CornerCornerModeTypeB(): equivalent to "xxyy" or "xxyyzz"
+                CornerCornerModeTypeC(): equivalent to "xyxy" or "xyxyzz"
+                CornerSizeMode(): equivalent to "xywh" or "xyzwhd"
+                CenterSizeMode(): equivalent to "ccwh" or "cccwhd"
             #. None: will assume mode is StandardMode
     Returns:
         BoxMode object
@@ -173,7 +158,7 @@ def check_corners(corners: Sequence) -> bool:
     """
     check the validity for the given box corners
     Args:
-        corners: corners of a box, 4-element or 6-element tuple, each element is a Nx1 torch tensor
+        corners: corners of boxes, 4-element or 6-element tuple, each element is a Nx1 torch tensor
         (xmin, ymin, xmax, ymax) or (xmin, ymin, zmin, xmax, ymax, zmax)
     Returns:
         bool, whether the box is valid
@@ -181,14 +166,11 @@ def check_corners(corners: Sequence) -> bool:
         corners = (torch.ones(10,1), torch.ones(10,1), torch.ones(10,1), torch.ones(10,1))
         check_corner(corners) will return True
     """
-    spatial_dims = get_dimension(corners=corners)
-    box_error = corners[spatial_dims] < corners[0]
-    for axis in range(1, spatial_dims):
-        box_error = box_error | (corners[spatial_dims + axis] < corners[axis])
-    if box_error.sum() > 0:
-        return False
-    else:
-        return True
+    spatial_dims = get_spatial_dims(corners=corners)
+    for axis in range(0, spatial_dims):
+        if (corners[spatial_dims + axis] < corners[axis]).sum() > 0:
+            return False
+    return True
 
 
 def convert_box_mode(
@@ -203,7 +185,7 @@ def convert_box_mode(
         src_mode: source box mode. If mode is not given, this func will assume mode is StandardMode.
         dst_mode: target box mode. If mode is not given, this func will assume mode is StandardMode.
         src_mode and dst_mode can be:
-            #. str: choose from monai.utils.enums.BoundingBoxMode, including
+            #. str: choose from monai.utils.enums.BoxModeName, for example,
                 "xyxy": boxes has format [xmin, ymin, xmax, ymax]
                 "xyzxyz": boxes has format [xmin, ymin, zmin, xmax, ymax, zmax]
                 "xxyy": boxes has format [xmin, xmax, ymin, ymax]
@@ -213,13 +195,18 @@ def convert_box_mode(
                 "xyzwhd": boxes has format [xmin, ymin, zmin, xsize, ysize, zsize]
                 "ccwh": boxes has format [xcenter, ycenter, xsize, ysize]
                 "cccwhd": boxes has format [xcenter, ycenter, zcenter, xsize, ysize, zsize]
-            #. BoxMode class: choose from
+            #. BoxMode class: for example,
                 CornerCornerModeTypeA: equivalent to "xyxy" or "xyzxyz"
                 CornerCornerModeTypeB: equivalent to "xxyy" or "xxyyzz"
                 CornerCornerModeTypeC: equivalent to "xyxy" or "xyxyzz"
                 CornerSizeMode: equivalent to "xywh" or "xyzwhd"
                 CenterSizeMode: equivalent to "ccwh" or "cccwhd"
-            #. BoxMode instance
+            #. BoxMode instance: for example,
+                CornerCornerModeTypeA(): equivalent to "xyxy" or "xyzxyz"
+                CornerCornerModeTypeB(): equivalent to "xxyy" or "xxyyzz"
+                CornerCornerModeTypeC(): equivalent to "xyxy" or "xyxyzz"
+                CornerSizeMode(): equivalent to "xywh" or "xyzwhd"
+                CenterSizeMode(): equivalent to "ccwh" or "cccwhd"
             #. None: will assume mode is StandardMode
     Returns:
         boxes_dst: bounding box with target mode, does not share memory with original boxes
@@ -228,30 +215,30 @@ def convert_box_mode(
         boxes = torch.ones(10,6)
         box_convert_mode(boxes=boxes, src_mode="xyzxyz", dst_mode=CenterSizeMode)
     """
-
-    # if mode not changed, return original box
     src_boxmode = get_boxmode(src_mode)
     dst_boxmode = get_boxmode(dst_mode)
+
+    # if mode not changed, return original boxes
     if type(src_boxmode) is type(dst_boxmode):
         return deepcopy(boxes)
-    # convert mode
-    else:
-        # convert numpy to tensor if needed
-        boxes_t, *_ = convert_data_type(boxes, torch.Tensor)
 
-        # convert boxes to corners
-        corners = src_boxmode.boxes_to_corners(boxes_t)
+    # convert box mode
+    # convert numpy to tensor if needed
+    boxes_t, *_ = convert_data_type(boxes, torch.Tensor)
+
+    # convert boxes to corners
+    corners = src_boxmode.boxes_to_corners(boxes_t)
 
-        # check validity of corners
-        if not check_corners(corners):
-            raise ValueError("Given boxes has invalid values. The box size must be non-negative.")
+    # check validity of corners
+    if not check_corners(corners):
+        raise ValueError("Given boxes has invalid values. The box size must be non-negative.")
 
-        # convert corners to boxes
-        boxes_t_dst = dst_boxmode.corners_to_boxes(corners)
+    # convert corners to boxes
+    boxes_t_dst = dst_boxmode.corners_to_boxes(corners)
 
-        # convert tensor back to numpy if needed
-        boxes_dst, *_ = convert_to_dst_type(src=boxes_t_dst, dst=boxes)
-        return boxes_dst
+    # convert tensor back to numpy if needed
+    boxes_dst, *_ = convert_to_dst_type(src=boxes_t_dst, dst=boxes)
+    return boxes_dst
 
 
 def convert_box_to_standard_mode(
@@ -263,7 +250,7 @@ def convert_box_to_standard_mode(
         boxes: source bounding box, Nx4 or Nx6 torch tensor or ndarray
         mode: source box mode. If mode is not given, this func will assume mode is StandardMode
         It can be:
-            #. str: choose from monai.utils.enums.BoundingBoxMode, including
+            #. str: choose from monai.utils.enums.BoxModeName, for example,
                 "xyxy": boxes has format [xmin, ymin, xmax, ymax]
                 "xyzxyz": boxes has format [xmin, ymin, zmin, xmax, ymax, zmax]
                 "xxyy": boxes has format [xmin, xmax, ymin, ymax]
@@ -273,13 +260,18 @@ def convert_box_to_standard_mode(
                 "xyzwhd": boxes has format [xmin, ymin, zmin, xsize, ysize, zsize]
                 "ccwh": boxes has format [xcenter, ycenter, xsize, ysize]
                 "cccwhd": boxes has format [xcenter, ycenter, zcenter, xsize, ysize, zsize]
-            #. BoxMode class: choose from
+            #. BoxMode class: for example,
                 CornerCornerModeTypeA: equivalent to "xyxy" or "xyzxyz"
                 CornerCornerModeTypeB: equivalent to "xxyy" or "xxyyzz"
                 CornerCornerModeTypeC: equivalent to "xyxy" or "xyxyzz"
                 CornerSizeMode: equivalent to "xywh" or "xyzwhd"
                 CenterSizeMode: equivalent to "ccwh" or "cccwhd"
-            #. BoxMode instance
+            #. BoxMode instance: for example,
+                CornerCornerModeTypeA(): equivalent to "xyxy" or "xyzxyz"
+                CornerCornerModeTypeB(): equivalent to "xxyy" or "xxyyzz"
+                CornerCornerModeTypeC(): equivalent to "xyxy" or "xyxyzz"
+                CornerSizeMode(): equivalent to "xywh" or "xyzwhd"
+                CenterSizeMode(): equivalent to "ccwh" or "cccwhd"
             #. None: will assume mode is StandardMode
     Returns:
         boxes_standard: bounding box with standard mode, does not share memory with original boxes
diff --git a/monai/utils/enums.py b/monai/utils/enums.py
index bae5984b04..12b1a4b86a 100644
--- a/monai/utils/enums.py
+++ b/monai/utils/enums.py
@@ -313,7 +313,7 @@ class JITMetadataKeys(Enum):
     DESCRIPTION = "description"
 
 
-class BoundingBoxMode(Enum):
+class BoxModeName(Enum):
     XYXY = "xyxy"  # [xmin, ymin, xmax, ymax]
     XYZXYZ = "xyzxyz"  # [xmin, ymin, zmin, xmax, ymax, zmax]
     XXYY = "xxyy"  # [xmin, xmax, ymin, ymax]
diff --git a/monai/utils/type_conversion.py b/monai/utils/type_conversion.py
index d5944e265b..f03e4f52b1 100644
--- a/monai/utils/type_conversion.py
+++ b/monai/utils/type_conversion.py
@@ -301,3 +301,15 @@ def convert_to_dst_type(
     else:
         output_type = type(dst)
     return convert_data_type(data=src, output_type=output_type, device=device, dtype=dtype, wrap_sequence=wrap_sequence)
+
+
+def convert_to_list(data: Union[Sequence, torch.Tensor, np.ndarray]) -> list:
+    """
+    Convert to list from `torch.Tensor`/`np.ndarray`/`list`/`tuple` etc.
+    Args:
+        data: data to be converted
+    Returns:
+        a list
+
+    """
+    return data.tolist() if isinstance(data, (torch.Tensor, np.ndarray)) else list(data)
diff --git a/tests/test_box_utils.py b/tests/test_box_utils.py
index 897f178fa2..3af6cfdf86 100644
--- a/tests/test_box_utils.py
+++ b/tests/test_box_utils.py
@@ -17,20 +17,29 @@
 # import torch
 from parameterized import parameterized
 
-from monai.data.box_mode import CornerCornerModeTypeA, CornerCornerModeTypeB, CornerSizeMode
+from monai.data.box_mode import (
+    CenterSizeMode,
+    CornerCornerModeTypeA,
+    CornerCornerModeTypeB,
+    CornerCornerModeTypeC,
+    CornerSizeMode,
+)
 from monai.data.box_utils import convert_box_mode, convert_box_to_standard_mode
 from monai.utils.type_conversion import convert_data_type
 from tests.utils import TEST_NDARRAYS, assert_allclose
 
-# box_affine, box_area, box_center, box_center_dist, box_clip_to_image,
-# box_giou, box_interp, box_iou, box_pair_giou, center_in_boxes,
-# convert_to_list, non_max_suppression, resize_boxes,
-
-
 TESTS = []
 for p in TEST_NDARRAYS:
     boxes = [[0, 0, 0, 0, 0, 0], [0, 1, 0, 2, 2, 3], [0, 1, 1, 2, 2, 3]]
     spatial_size = [4, 4, 4]
+    TESTS.append(
+        [
+            {"boxes": p(boxes), "spatial_size": spatial_size, "mode": "cccwhd", "half": False},
+            CornerSizeMode,
+            p([[0, 0, 0, 0, 0, 0], [-1, 0, -1.5, 2, 2, 3], [-1, 0, -0.5, 2, 2, 3]]),
+            p([0, 12, 12]),
+        ]
+    )
     TESTS.append(
         [
             {"boxes": p(boxes), "spatial_size": spatial_size, "mode": "xyzwhd", "half": False},
@@ -55,6 +64,14 @@
             p([0, 12, 12]),
         ]
     )
+    TESTS.append(
+        [
+            {"boxes": p(boxes), "spatial_size": spatial_size, "mode": "xyzwhd", "half": False},
+            CornerCornerModeTypeC,
+            p([[0, 0, 0, 0, 0, 0], [0, 1, 2, 3, 0, 3], [0, 1, 2, 3, 1, 4]]),
+            p([0, 12, 12]),
+        ]
+    )
     TESTS.append(
         [
             {"boxes": p(boxes), "spatial_size": spatial_size, "mode": CornerCornerModeTypeA(), "half": False},
@@ -103,6 +120,14 @@
             p([0, 2, 1]),
         ]
     )
+    TESTS.append(
+        [
+            {"boxes": p(boxes), "spatial_size": spatial_size, "mode": "xxyyzz", "half": False},
+            CenterSizeMode(),
+            p([[0, 0, 0, 0, 0, 0], [0.5, 1, 2.5, 1, 2, 1], [0.5, 1.5, 2.5, 1, 1, 1]]),
+            p([0, 2, 1]),
+        ]
+    )
 
 
 class TestCreateBoxList(unittest.TestCase):

From 35943270e2bf0fa6f40ef5c802ee3c3746b334eb Mon Sep 17 00:00:00 2001
From: Can Zhao <canz@nvidia.com>
Date: Tue, 17 May 2022 14:07:40 -0400
Subject: [PATCH 36/49] clean code

Signed-off-by: Can Zhao <canz@nvidia.com>
---
 tests/test_box_utils.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/tests/test_box_utils.py b/tests/test_box_utils.py
index 3af6cfdf86..9c2c7248b3 100644
--- a/tests/test_box_utils.py
+++ b/tests/test_box_utils.py
@@ -9,12 +9,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# import random
 import unittest
 
 import numpy as np
 
-# import torch
 from parameterized import parameterized
 
 from monai.data.box_mode import (
@@ -136,7 +134,6 @@ def test_value(self, input_data, mode2, expected_box, expected_area):
         expected_box = convert_data_type(expected_box, dtype=np.float32)[0]
         boxes1 = convert_data_type(input_data["boxes"], dtype=np.float32)[0]
         mode1 = input_data["mode"]
-        # spatial_size = input_data["spatial_size"]
         half_bool = input_data["half"]
 
         # test float16

From 571284334ec33f04617ecd0edc7385e28fbbb2d3 Mon Sep 17 00:00:00 2001
From: Can Zhao <canz@nvidia.com>
Date: Tue, 17 May 2022 15:04:17 -0400
Subject: [PATCH 37/49] isort test_box_utils

Signed-off-by: Can Zhao <canz@nvidia.com>
---
 tests/test_box_utils.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/test_box_utils.py b/tests/test_box_utils.py
index 9c2c7248b3..5865183d6d 100644
--- a/tests/test_box_utils.py
+++ b/tests/test_box_utils.py
@@ -12,7 +12,6 @@
 import unittest
 
 import numpy as np
-
 from parameterized import parameterized
 
 from monai.data.box_mode import (

From c679de47e2cb9bd04ab1c1316624d3aa6e10da7e Mon Sep 17 00:00:00 2001
From: Can Zhao <canz@nvidia.com>
Date: Tue, 17 May 2022 22:36:20 -0400
Subject: [PATCH 38/49] update docstring to generate docs

Signed-off-by: Can Zhao <canz@nvidia.com>
---
 docs/source/data.rst    |  12 +++
 monai/data/box_utils.py | 171 +++++++++++++++++++++++-----------------
 2 files changed, 109 insertions(+), 74 deletions(-)

diff --git a/docs/source/data.rst b/docs/source/data.rst
index 02e8031117..6a8f7f581d 100644
--- a/docs/source/data.rst
+++ b/docs/source/data.rst
@@ -311,3 +311,15 @@ PatchWSIDataset
 ~~~~~~~~~~~~~~~
 .. autoclass:: monai.data.PatchWSIDataset
     :members:
+
+Box utility
+-----------
+
+convert_box_mode
+~~~~~~~~~~~~~~~~~
+.. autofunction:: monai.data.box_utils.convert_box_mode
+
+convert_box_to_standard_mode
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. autofunction:: monai.data.box_utils.convert_box_to_standard_mode
+
diff --git a/monai/data/box_utils.py b/monai/data/box_utils.py
index 4d35cfde5b..2897a1a501 100644
--- a/monai/data/box_utils.py
+++ b/monai/data/box_utils.py
@@ -107,28 +107,28 @@ def get_boxmode(mode: Union[str, BoxMode, Type[BoxMode], None] = None, *args, **
         mode: a representation of box mode. If mode is not given, this func will assume mode is StandardMode
         It can be:
             #. str: choose from monai.utils.enums.BoxModeName, for example,
-                "xyxy": boxes has format [xmin, ymin, xmax, ymax]
-                "xyzxyz": boxes has format [xmin, ymin, zmin, xmax, ymax, zmax]
-                "xxyy": boxes has format [xmin, xmax, ymin, ymax]
-                "xxyyzz": boxes has format [xmin, xmax, ymin, ymax, zmin, zmax]
-                "xyxyzz": boxes has format [xmin, ymin, xmax, ymax, zmin, zmax]
-                "xywh": boxes has format [xmin, ymin, xsize, ysize]
-                "xyzwhd": boxes has format [xmin, ymin, zmin, xsize, ysize, zsize]
-                "ccwh": boxes has format [xcenter, ycenter, xsize, ysize]
-                "cccwhd": boxes has format [xcenter, ycenter, zcenter, xsize, ysize, zsize]
-            #. BoxMode class: for example,
-                CornerCornerModeTypeA: equivalent to "xyxy" or "xyzxyz"
-                CornerCornerModeTypeB: equivalent to "xxyy" or "xxyyzz"
-                CornerCornerModeTypeC: equivalent to "xyxy" or "xyxyzz"
-                CornerSizeMode: equivalent to "xywh" or "xyzwhd"
-                CenterSizeMode: equivalent to "ccwh" or "cccwhd"
-            #. BoxMode instance: for example,
-                CornerCornerModeTypeA(): equivalent to "xyxy" or "xyzxyz"
-                CornerCornerModeTypeB(): equivalent to "xxyy" or "xxyyzz"
-                CornerCornerModeTypeC(): equivalent to "xyxy" or "xyxyzz"
-                CornerSizeMode(): equivalent to "xywh" or "xyzwhd"
-                CenterSizeMode(): equivalent to "ccwh" or "cccwhd"
-            #. None: will assume mode is StandardMode
+                - "xyxy": boxes has format [xmin, ymin, xmax, ymax]
+                - "xyzxyz": boxes has format [xmin, ymin, zmin, xmax, ymax, zmax]
+                - "xxyy": boxes has format [xmin, xmax, ymin, ymax]
+                - "xxyyzz": boxes has format [xmin, xmax, ymin, ymax, zmin, zmax]
+                - "xyxyzz": boxes has format [xmin, ymin, xmax, ymax, zmin, zmax]
+                - "xywh": boxes has format [xmin, ymin, xsize, ysize]
+                - "xyzwhd": boxes has format [xmin, ymin, zmin, xsize, ysize, zsize]
+                - "ccwh": boxes has format [xcenter, ycenter, xsize, ysize]
+                - "cccwhd": boxes has format [xcenter, ycenter, zcenter, xsize, ysize, zsize]
+            #. BoxMode class: choose from monai.data.box_mode, for example,
+                - CornerCornerModeTypeA: equivalent to "xyxy" or "xyzxyz"
+                - CornerCornerModeTypeB: equivalent to "xxyy" or "xxyyzz"
+                - CornerCornerModeTypeC: equivalent to "xyxy" or "xyxyzz"
+                - CornerSizeMode: equivalent to "xywh" or "xyzwhd"
+                - CenterSizeMode: equivalent to "ccwh" or "cccwhd"
+            #. BoxMode instance: choose from monai.data.box_mode, for example,
+                - CornerCornerModeTypeA(): equivalent to "xyxy" or "xyzxyz"
+                - CornerCornerModeTypeB(): equivalent to "xxyy" or "xxyyzz"
+                - CornerCornerModeTypeC(): equivalent to "xyxy" or "xyxyzz"
+                - CornerSizeMode(): equivalent to "xywh" or "xyzwhd"
+                - CenterSizeMode(): equivalent to "ccwh" or "cccwhd"
+            #. None: will assume mode is StandardMode = CornerCornerModeTypeA
     Returns:
         BoxMode object
 
@@ -179,41 +179,53 @@ def convert_box_mode(
     dst_mode: Union[str, BoxMode, Type[BoxMode], None] = None,
 ) -> NdarrayOrTensor:
     """
-    This function converts the boxes in src_mode to the dst_mode
+    This function converts the boxes in src_mode to the dst_mode.
+
     Args:
-        boxes: source bounding box, Nx4 or Nx6 torch tensor or ndarray
+        boxes: source bounding box, Nx4 or Nx6 torch tensor or ndarray.
         src_mode: source box mode. If mode is not given, this func will assume mode is StandardMode.
         dst_mode: target box mode. If mode is not given, this func will assume mode is StandardMode.
+
+    Note:
+        StandardMode is equivalent to CornerCornerModeTypeA, or "xyxy", or "xyzxyz".
+
         src_mode and dst_mode can be:
             #. str: choose from monai.utils.enums.BoxModeName, for example,
-                "xyxy": boxes has format [xmin, ymin, xmax, ymax]
-                "xyzxyz": boxes has format [xmin, ymin, zmin, xmax, ymax, zmax]
-                "xxyy": boxes has format [xmin, xmax, ymin, ymax]
-                "xxyyzz": boxes has format [xmin, xmax, ymin, ymax, zmin, zmax]
-                "xyxyzz": boxes has format [xmin, ymin, xmax, ymax, zmin, zmax]
-                "xywh": boxes has format [xmin, ymin, xsize, ysize]
-                "xyzwhd": boxes has format [xmin, ymin, zmin, xsize, ysize, zsize]
-                "ccwh": boxes has format [xcenter, ycenter, xsize, ysize]
-                "cccwhd": boxes has format [xcenter, ycenter, zcenter, xsize, ysize, zsize]
-            #. BoxMode class: for example,
-                CornerCornerModeTypeA: equivalent to "xyxy" or "xyzxyz"
-                CornerCornerModeTypeB: equivalent to "xxyy" or "xxyyzz"
-                CornerCornerModeTypeC: equivalent to "xyxy" or "xyxyzz"
-                CornerSizeMode: equivalent to "xywh" or "xyzwhd"
-                CenterSizeMode: equivalent to "ccwh" or "cccwhd"
-            #. BoxMode instance: for example,
-                CornerCornerModeTypeA(): equivalent to "xyxy" or "xyzxyz"
-                CornerCornerModeTypeB(): equivalent to "xxyy" or "xxyyzz"
-                CornerCornerModeTypeC(): equivalent to "xyxy" or "xyxyzz"
-                CornerSizeMode(): equivalent to "xywh" or "xyzwhd"
-                CenterSizeMode(): equivalent to "ccwh" or "cccwhd"
+                - "xyxy": boxes has format [xmin, ymin, xmax, ymax]
+                - "xyzxyz": boxes has format [xmin, ymin, zmin, xmax, ymax, zmax]
+                - "xxyy": boxes has format [xmin, xmax, ymin, ymax]
+                - "xxyyzz": boxes has format [xmin, xmax, ymin, ymax, zmin, zmax]
+                - "xyxyzz": boxes has format [xmin, ymin, xmax, ymax, zmin, zmax]
+                - "xywh": boxes has format [xmin, ymin, xsize, ysize]
+                - "xyzwhd": boxes has format [xmin, ymin, zmin, xsize, ysize, zsize]
+                - "ccwh": boxes has format [xcenter, ycenter, xsize, ysize]
+                - "cccwhd": boxes has format [xcenter, ycenter, zcenter, xsize, ysize, zsize]
+            #. BoxMode class: choose from monai.data.box_mode, for example,
+                - CornerCornerModeTypeA: equivalent to "xyxy" or "xyzxyz"
+                - CornerCornerModeTypeB: equivalent to "xxyy" or "xxyyzz"
+                - CornerCornerModeTypeC: equivalent to "xyxy" or "xyxyzz"
+                - CornerSizeMode: equivalent to "xywh" or "xyzwhd"
+                - CenterSizeMode: equivalent to "ccwh" or "cccwhd"
+            #. BoxMode instance: choose from monai.data.box_mode, for example,
+                - CornerCornerModeTypeA(): equivalent to "xyxy" or "xyzxyz"
+                - CornerCornerModeTypeB(): equivalent to "xxyy" or "xxyyzz"
+                - CornerCornerModeTypeC(): equivalent to "xyxy" or "xyxyzz"
+                - CornerSizeMode(): equivalent to "xywh" or "xyzwhd"
+                - CenterSizeMode(): equivalent to "ccwh" or "cccwhd"
             #. None: will assume mode is StandardMode
+
     Returns:
         boxes_dst: bounding box with target mode, does not share memory with original boxes
 
     Example:
-        boxes = torch.ones(10,6)
-        box_convert_mode(boxes=boxes, src_mode="xyzxyz", dst_mode=CenterSizeMode)
+        .. code-block:: python
+
+            boxes = torch.ones(10,4)
+            # The following three lines are equivalent
+            # They convert boxes with format [xmin, ymin, xmax, ymax] to [xcenter, ycenter, xsize, ysize].
+            box_convert_mode(boxes=boxes, src_mode="xyxy", dst_mode="ccwh")
+            box_convert_mode(boxes=boxes, src_mode="xyxy", dst_mode=monai.data.box_mode.CenterSizeMode)
+            box_convert_mode(boxes=boxes, src_mode="xyxy", dst_mode=monai.data.box_mode.CenterSizeMode())
     """
     src_boxmode = get_boxmode(src_mode)
     dst_boxmode = get_boxmode(dst_mode)
@@ -245,39 +257,50 @@ def convert_box_to_standard_mode(
     boxes: NdarrayOrTensor, mode: Union[str, BoxMode, Type[BoxMode], None] = None
 ) -> NdarrayOrTensor:
     """
-    Convert given boxes to standard mode
+    Convert given boxes to standard mode.
+
     Args:
-        boxes: source bounding box, Nx4 or Nx6 torch tensor or ndarray
-        mode: source box mode. If mode is not given, this func will assume mode is StandardMode
-        It can be:
+        boxes: source bounding box, Nx4 or Nx6 torch tensor or ndarray.
+        mode: source box mode. If mode is not given, this func will assume mode is StandardMode.
+
+    Note:
+        StandardMode is equivalent to CornerCornerModeTypeA, or "xyxy", or "xyzxyz".
+
+        mode can be:
             #. str: choose from monai.utils.enums.BoxModeName, for example,
-                "xyxy": boxes has format [xmin, ymin, xmax, ymax]
-                "xyzxyz": boxes has format [xmin, ymin, zmin, xmax, ymax, zmax]
-                "xxyy": boxes has format [xmin, xmax, ymin, ymax]
-                "xxyyzz": boxes has format [xmin, xmax, ymin, ymax, zmin, zmax]
-                "xyxyzz": boxes has format [xmin, ymin, xmax, ymax, zmin, zmax]
-                "xywh": boxes has format [xmin, ymin, xsize, ysize]
-                "xyzwhd": boxes has format [xmin, ymin, zmin, xsize, ysize, zsize]
-                "ccwh": boxes has format [xcenter, ycenter, xsize, ysize]
-                "cccwhd": boxes has format [xcenter, ycenter, zcenter, xsize, ysize, zsize]
-            #. BoxMode class: for example,
-                CornerCornerModeTypeA: equivalent to "xyxy" or "xyzxyz"
-                CornerCornerModeTypeB: equivalent to "xxyy" or "xxyyzz"
-                CornerCornerModeTypeC: equivalent to "xyxy" or "xyxyzz"
-                CornerSizeMode: equivalent to "xywh" or "xyzwhd"
-                CenterSizeMode: equivalent to "ccwh" or "cccwhd"
-            #. BoxMode instance: for example,
-                CornerCornerModeTypeA(): equivalent to "xyxy" or "xyzxyz"
-                CornerCornerModeTypeB(): equivalent to "xxyy" or "xxyyzz"
-                CornerCornerModeTypeC(): equivalent to "xyxy" or "xyxyzz"
-                CornerSizeMode(): equivalent to "xywh" or "xyzwhd"
-                CenterSizeMode(): equivalent to "ccwh" or "cccwhd"
+                - "xyxy": boxes has format [xmin, ymin, xmax, ymax]
+                - "xyzxyz": boxes has format [xmin, ymin, zmin, xmax, ymax, zmax]
+                - "xxyy": boxes has format [xmin, xmax, ymin, ymax]
+                - "xxyyzz": boxes has format [xmin, xmax, ymin, ymax, zmin, zmax]
+                - "xyxyzz": boxes has format [xmin, ymin, xmax, ymax, zmin, zmax]
+                - "xywh": boxes has format [xmin, ymin, xsize, ysize]
+                - "xyzwhd": boxes has format [xmin, ymin, zmin, xsize, ysize, zsize]
+                - "ccwh": boxes has format [xcenter, ycenter, xsize, ysize]
+                - "cccwhd": boxes has format [xcenter, ycenter, zcenter, xsize, ysize, zsize]
+            #. BoxMode class: choose from monai.data.box_mode, for example,
+                - CornerCornerModeTypeA: equivalent to "xyxy" or "xyzxyz"
+                - CornerCornerModeTypeB: equivalent to "xxyy" or "xxyyzz"
+                - CornerCornerModeTypeC: equivalent to "xyxy" or "xyxyzz"
+                - CornerSizeMode: equivalent to "xywh" or "xyzwhd"
+                - CenterSizeMode: equivalent to "ccwh" or "cccwhd"
+            #. BoxMode instance: choose from monai.data.box_mode, for example,
+                - CornerCornerModeTypeA(): equivalent to "xyxy" or "xyzxyz"
+                - CornerCornerModeTypeB(): equivalent to "xxyy" or "xxyyzz"
+                - CornerCornerModeTypeC(): equivalent to "xyxy" or "xyxyzz"
+                - CornerSizeMode(): equivalent to "xywh" or "xyzwhd"
+                - CenterSizeMode(): equivalent to "ccwh" or "cccwhd"
             #. None: will assume mode is StandardMode
+
     Returns:
         boxes_standard: bounding box with standard mode, does not share memory with original boxes
 
     Example:
-        boxes = torch.ones(10,6)
-        box_convert_standard_mode(boxes=boxes, mode="xxyyzz")
+        .. code-block:: python
+
+            boxes = torch.ones(10,6)
+            # The following two lines are equivalent
+            # They convert boxes with format [xmin, xmax, ymin, ymax, zmin, zmax] to [xmin, ymin, zmin, xmax, ymax, zmax]
+            box_convert_standard_mode(boxes=boxes, mode="xxyyzz")
+            box_convert_mode(boxes=boxes, src_mode="xxyyzz", dst_mode="xyzxyz")
     """
     return convert_box_mode(boxes=boxes, src_mode=mode, dst_mode=StandardMode())

From 30b7d32fc70ed1c3534493305531a966c3fb1ec9 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 18 May 2022 02:38:22 +0000
Subject: [PATCH 39/49] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 docs/source/data.rst | 1 -
 1 file changed, 1 deletion(-)

diff --git a/docs/source/data.rst b/docs/source/data.rst
index 6a8f7f581d..0c2ba1931b 100644
--- a/docs/source/data.rst
+++ b/docs/source/data.rst
@@ -322,4 +322,3 @@ convert_box_mode
 convert_box_to_standard_mode
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 .. autofunction:: monai.data.box_utils.convert_box_to_standard_mode
-

From ddbbc2a9a4ada3b9ac76f8d8742e1a786209efcc Mon Sep 17 00:00:00 2001
From: Can Zhao <canz@nvidia.com>
Date: Tue, 17 May 2022 22:38:35 -0400
Subject: [PATCH 40/49] update comment

Signed-off-by: Can Zhao <canz@nvidia.com>
---
 monai/data/box_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/monai/data/box_utils.py b/monai/data/box_utils.py
index 2897a1a501..6e7668678e 100644
--- a/monai/data/box_utils.py
+++ b/monai/data/box_utils.py
@@ -230,7 +230,7 @@ def convert_box_mode(
     src_boxmode = get_boxmode(src_mode)
     dst_boxmode = get_boxmode(dst_mode)
 
-    # if mode not changed, return original boxes
+    # if mode not changed, deepcopy the original boxes
     if type(src_boxmode) is type(dst_boxmode):
         return deepcopy(boxes)
 

From 8426e473ba1ca632b5be65a902d57aa8aca8c61a Mon Sep 17 00:00:00 2001
From: Can Zhao <canz@nvidia.com>
Date: Tue, 17 May 2022 22:41:01 -0400
Subject: [PATCH 41/49] update __init__

Signed-off-by: Can Zhao <canz@nvidia.com>
---
 monai/utils/__init__.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/monai/utils/__init__.py b/monai/utils/__init__.py
index 429183b1a0..ec444e3038 100644
--- a/monai/utils/__init__.py
+++ b/monai/utils/__init__.py
@@ -37,6 +37,7 @@
     TransformBackends,
     UpsampleMode,
     Weight,
+    BoxModeName,
 )
 from .jupyter_utils import StatusMembers, ThreadContainer
 from .misc import (
@@ -85,6 +86,7 @@
 from .profiling import PerfContext, torch_profiler_full, torch_profiler_time_cpu_gpu, torch_profiler_time_end_to_end
 from .state_cacher import StateCacher
 from .type_conversion import (
+    convert_to_list,
     convert_data_type,
     convert_to_cupy,
     convert_to_dst_type,

From da7bebe2c761de1daa89e785e2cdf12cfec649a5 Mon Sep 17 00:00:00 2001
From: Can Zhao <canz@nvidia.com>
Date: Tue, 17 May 2022 22:46:47 -0400
Subject: [PATCH 42/49] reformat

Signed-off-by: Can Zhao <canz@nvidia.com>
---
 monai/utils/__init__.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/monai/utils/__init__.py b/monai/utils/__init__.py
index ec444e3038..76a05940cb 100644
--- a/monai/utils/__init__.py
+++ b/monai/utils/__init__.py
@@ -17,6 +17,7 @@
 from .enums import (
     Average,
     BlendMode,
+    BoxModeName,
     ChannelMatching,
     CommonKeys,
     DiceCEReduction,
@@ -37,7 +38,6 @@
     TransformBackends,
     UpsampleMode,
     Weight,
-    BoxModeName,
 )
 from .jupyter_utils import StatusMembers, ThreadContainer
 from .misc import (
@@ -86,10 +86,10 @@
 from .profiling import PerfContext, torch_profiler_full, torch_profiler_time_cpu_gpu, torch_profiler_time_end_to_end
 from .state_cacher import StateCacher
 from .type_conversion import (
-    convert_to_list,
     convert_data_type,
     convert_to_cupy,
     convert_to_dst_type,
+    convert_to_list,
     convert_to_numpy,
     convert_to_tensor,
     dtype_numpy_to_torch,

From 7912dfc37cb717a20efc3868b8b8ce193cddedab Mon Sep 17 00:00:00 2001
From: Can Zhao <canz@nvidia.com>
Date: Tue, 17 May 2022 22:55:10 -0400
Subject: [PATCH 43/49] update comment in enum

Signed-off-by: Can Zhao <canz@nvidia.com>
---
 monai/utils/enums.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/monai/utils/enums.py b/monai/utils/enums.py
index 12b1a4b86a..e2f89b62a7 100644
--- a/monai/utils/enums.py
+++ b/monai/utils/enums.py
@@ -314,6 +314,9 @@ class JITMetadataKeys(Enum):
 
 
 class BoxModeName(Enum):
+    """
+    Box mode names.
+    """
     XYXY = "xyxy"  # [xmin, ymin, xmax, ymax]
     XYZXYZ = "xyzxyz"  # [xmin, ymin, zmin, xmax, ymax, zmax]
     XXYY = "xxyy"  # [xmin, xmax, ymin, ymax]

From 2cc46f847d02719c38e1fba549ffe2b1652acf54 Mon Sep 17 00:00:00 2001
From: monai-bot <monai.miccai2019@gmail.com>
Date: Wed, 18 May 2022 03:45:02 +0000
Subject: [PATCH 44/49] [MONAI] code formatting

Signed-off-by: monai-bot <monai.miccai2019@gmail.com>
---
 monai/data/box_utils.py | 2 +-
 monai/utils/enums.py    | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/monai/data/box_utils.py b/monai/data/box_utils.py
index 6e7668678e..ea83b42ff4 100644
--- a/monai/data/box_utils.py
+++ b/monai/data/box_utils.py
@@ -231,7 +231,7 @@ def convert_box_mode(
     dst_boxmode = get_boxmode(dst_mode)
 
     # if mode not changed, deepcopy the original boxes
-    if type(src_boxmode) is type(dst_boxmode):
+    if isinstance(src_boxmode, type(dst_boxmode)):
         return deepcopy(boxes)
 
     # convert box mode
diff --git a/monai/utils/enums.py b/monai/utils/enums.py
index e2f89b62a7..a97ef3c7d7 100644
--- a/monai/utils/enums.py
+++ b/monai/utils/enums.py
@@ -317,6 +317,7 @@ class BoxModeName(Enum):
     """
     Box mode names.
     """
+
     XYXY = "xyxy"  # [xmin, ymin, xmax, ymax]
     XYZXYZ = "xyzxyz"  # [xmin, ymin, zmin, xmax, ymax, zmax]
     XXYY = "xxyy"  # [xmin, xmax, ymin, ymax]

From 70710444cf3b78b4c126ccded3ab9efbfd960037 Mon Sep 17 00:00:00 2001
From: Can Zhao <canz@nvidia.com>
Date: Wed, 18 May 2022 01:24:04 -0400
Subject: [PATCH 45/49] update docstring

Signed-off-by: Can Zhao <canz@nvidia.com>
---
 docs/source/data.rst    |  16 +++---
 monai/data/box_mode.py  | 104 +++++++++++++++++++++++++----------
 monai/data/box_utils.py | 116 +++++++++++++++++++---------------------
 monai/utils/enums.py    |   2 +
 4 files changed, 142 insertions(+), 96 deletions(-)

diff --git a/docs/source/data.rst b/docs/source/data.rst
index 0c2ba1931b..2737e8adc5 100644
--- a/docs/source/data.rst
+++ b/docs/source/data.rst
@@ -312,13 +312,15 @@ PatchWSIDataset
 .. autoclass:: monai.data.PatchWSIDataset
     :members:
 
-Box utility
------------
+Bounding box
+--------------------
 
-convert_box_mode
-~~~~~~~~~~~~~~~~~
-.. autofunction:: monai.data.box_utils.convert_box_mode
+Box mode
+~~~~~~~~~~
+.. automodule:: monai.data.box_mode
+    :members:
 
-convert_box_to_standard_mode
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Box mode converter
+~~~~~~~~~~~~~~~~~~
+.. autofunction:: monai.data.box_utils.convert_box_mode
 .. autofunction:: monai.data.box_utils.convert_box_to_standard_mode
diff --git a/monai/data/box_mode.py b/monai/data/box_mode.py
index 43cca85b74..bbf22e25f5 100644
--- a/monai/data/box_mode.py
+++ b/monai/data/box_mode.py
@@ -29,9 +29,10 @@ class BoxMode(ABC):
     """
     An abstract class of a ``BoxMode``.
     A BoxMode is callable that converts box mode of boxes.
-    It always creates a copy and will not modify boxes in place,
-    the implementation should be aware of:
-        #. remember to define ``name`` which is a dictionary that maps ``spatial_dims`` to the box mode name.
+    It always creates a copy and will not modify boxes in place.
+
+    The implementation should be aware of:
+    remember to define class variable ``name`` which is a dictionary that maps ``spatial_dims`` to the box mode name.
     """
 
     name: Dict[int, BoxModeName] = {}
@@ -39,13 +40,13 @@ class BoxMode(ABC):
     @classmethod
     def get_name(cls, spatial_dims: int) -> str:
         """
-        Get the mode name for the given spatial dimension
+        Get the mode name for the given spatial dimension using class variable ``name``.
+
         Args:
             spatial_dims: number of spatial dimensions of the bounding box.
+
         Returns:
-            mode string name
-        Example:
-            BoxMode.get_name(spatial_dims = 2)
+            ``str``: mode string name
         """
         return cls.name[spatial_dims].value
 
@@ -53,14 +54,19 @@ def get_name(cls, spatial_dims: int) -> str:
     def boxes_to_corners(self, boxes: torch.Tensor) -> Tuple:
         """
         Convert the bounding boxes of the current mode to corners.
+
         Args:
             boxes: bounding box, Nx4 or Nx6 torch tensor
+
         Returns:
-            corners of boxes, 4-element or 6-element tuple, each element is a Nx1 torch tensor
+            ``Tuple``: corners of boxes, 4-element or 6-element tuple, each element is a Nx1 torch tensor.
+            It represents (xmin, ymin, xmax, ymax) or (xmin, ymin, zmin, xmax, ymax, zmax)
+
         Example:
-            boxmode = BoxMode()
-            boxes = torch.ones(10,6)
-            boxmode.boxes_to_corners(boxes) will return a 6-element tuple, each element is a 10x1 tensor
+            .. code-block:: python
+
+                boxes = torch.ones(10,6)
+                boxmode.boxes_to_corners(boxes) will return a 6-element tuple, each element is a 10x1 tensor
         """
         raise NotImplementedError(f"Subclass {self.__class__.__name__} must implement this method.")
 
@@ -68,23 +74,35 @@ def boxes_to_corners(self, boxes: torch.Tensor) -> Tuple:
     def corners_to_boxes(self, corners: Sequence) -> torch.Tensor:
         """
         Convert the given box corners to the bounding boxes of the current mode.
+
         Args:
-            corners: corners of boxes, 4-element or 6-element tuple, each element is a Nx1 torch tensor
-            (xmin, ymin, xmax, ymax) or (xmin, ymin, zmin, xmax, ymax, zmax)
+            corners: corners of boxes, 4-element or 6-element tuple, each element is a Nx1 torch tensor.
+                It represents (xmin, ymin, xmax, ymax) or (xmin, ymin, zmin, xmax, ymax, zmax)
+
         Returns:
-            boxes: bounding box, Nx4 or Nx6 torch tensor
+            ``Tensor``: bounding box, Nx4 or Nx6 torch tensor
+
         Example:
-            boxmode = BoxMode()
-            corners = (torch.ones(10,1), torch.ones(10,1), torch.ones(10,1), torch.ones(10,1))
-            boxmode.corners_to_boxes(corners) will return a 10x4 tensor
+            .. code-block:: python
+
+                corners = (torch.ones(10,1), torch.ones(10,1), torch.ones(10,1), torch.ones(10,1))
+                boxmode.corners_to_boxes(corners) will return a 10x4 tensor
         """
         raise NotImplementedError(f"Subclass {self.__class__.__name__} must implement this method.")
 
 
 class CornerCornerModeTypeA(BoxMode):
     """
-    Also represented as "xyxy" or "xyzxyz"
-    [xmin, ymin, xmax, ymax] or [xmin, ymin, zmin, xmax, ymax, zmax]
+    A subclass of ``BoxMode``.
+
+    Also represented as "xyxy" or "xyzxyz", with format of
+    [xmin, ymin, xmax, ymax] or [xmin, ymin, zmin, xmax, ymax, zmax].
+
+    Note:
+        .. code-block:: python
+
+            CornerCornerModeTypeA.get_name(spatial_dims=2) # will return "xyxy"
+            CornerCornerModeTypeA.get_name(spatial_dims=3) # will return "xyzxyz"
     """
 
     name = {2: BoxModeName.XYXY, 3: BoxModeName.XYZXYZ}
@@ -98,8 +116,16 @@ def corners_to_boxes(self, corners: Sequence) -> torch.Tensor:
 
 class CornerCornerModeTypeB(BoxMode):
     """
-    Also represented as "xxyy" or "xxyyzz"
-    [xmin, xmax, ymin, ymax] or [xmin, xmax, ymin, ymax, zmin, zmax]
+    A subclass of ``BoxMode``.
+
+    Also represented as "xxyy" or "xxyyzz", with format of
+    [xmin, xmax, ymin, ymax] or [xmin, xmax, ymin, ymax, zmin, zmax].
+
+    Note:
+        .. code-block:: python
+
+            CornerCornerModeTypeB.get_name(spatial_dims=2) # will return "xxyy"
+            CornerCornerModeTypeB.get_name(spatial_dims=3) # will return "xxyyzz"
     """
 
     name = {2: BoxModeName.XXYY, 3: BoxModeName.XXYYZZ}
@@ -123,8 +149,16 @@ def corners_to_boxes(self, corners: Sequence) -> torch.Tensor:
 
 class CornerCornerModeTypeC(BoxMode):
     """
-    Also represented as "xyxy" or "xyxyzz"
-    [xmin, ymin, xmax, ymax] or [xmin, ymin, xmax, ymax, zmin, zmax]
+    A subclass of ``BoxMode``.
+
+    Also represented as "xyxy" or "xyxyzz", with format of
+    [xmin, ymin, xmax, ymax] or [xmin, ymin, xmax, ymax, zmin, zmax].
+
+    Note:
+        .. code-block:: python
+
+            CornerCornerModeTypeC.get_name(spatial_dims=2) # will return "xyxy"
+            CornerCornerModeTypeC.get_name(spatial_dims=3) # will return "xyxyzz"
     """
 
     name = {2: BoxModeName.XYXY, 3: BoxModeName.XYXYZZ}
@@ -147,8 +181,16 @@ def corners_to_boxes(self, corners: Sequence) -> torch.Tensor:
 
 class CornerSizeMode(BoxMode):
     """
-    Also represented as "xywh" or "xyzwhd"
-    [xmin, ymin, xsize, ysize] or [xmin, ymin, zmin, xsize, ysize, zsize]
+    A subclass of ``BoxMode``.
+
+    Also represented as "xywh" or "xyzwhd", with format of
+    [xmin, ymin, xsize, ysize] or [xmin, ymin, zmin, xsize, ysize, zsize].
+
+    Note:
+        .. code-block:: python
+
+            CornerSizeMode.get_name(spatial_dims=2) # will return "xywh"
+            CornerSizeMode.get_name(spatial_dims=3) # will return "xyzwhd"
     """
 
     name = {2: BoxModeName.XYWH, 3: BoxModeName.XYZWHD}
@@ -185,8 +227,16 @@ def corners_to_boxes(self, corners: Sequence) -> torch.Tensor:
 
 class CenterSizeMode(BoxMode):
     """
-    Also represented as "ccwh" or "cccwhd"
-    [xmin, ymin, xsize, ysize] or [xmin, ymin, zmin, xsize, ysize, zsize]
+    A subclass of ``BoxMode``.
+
+    Also represented as "ccwh" or "cccwhd", with format of
+    [xmin, ymin, xsize, ysize] or [xmin, ymin, zmin, xsize, ysize, zsize].
+
+    Note:
+        .. code-block:: python
+
+            CenterSizeMode.get_name(spatial_dims=2) # will return "ccwh"
+            CenterSizeMode.get_name(spatial_dims=3) # will return "cccwhd"
     """
 
     name = {2: BoxModeName.CCWH, 3: BoxModeName.CCCWHD}
diff --git a/monai/data/box_utils.py b/monai/data/box_utils.py
index 6e7668678e..2426ce4afd 100644
--- a/monai/data/box_utils.py
+++ b/monai/data/box_utils.py
@@ -31,7 +31,7 @@
 # We support 2-D or 3-D bounding boxes
 SUPPORTED_SPATIAL_DIMS = [2, 3]
 
-# We support the conversion between several box modes, i.e., representation of a bounding box
+# We support the conversion between several box modes, i.e., representation of a bounding boxes
 SUPPORTED_MODES = [CornerCornerModeTypeA, CornerCornerModeTypeB, CornerCornerModeTypeC, CornerSizeMode, CenterSizeMode]
 # The standard box mode we use in all the box util functions
 StandardMode = CornerCornerModeTypeA
@@ -47,20 +47,24 @@ def get_spatial_dims(
     Get spatial dimension for the giving setting.
     Missing input is allowed. But at least one of the input value should be given.
     It raises ValueError if the dimensions of multiple inputs do not match with each other.
+
     Args:
-        boxes: bounding box, Nx4 or Nx6 torch tensor or ndarray
+        boxes: bounding boxes, Nx4 or Nx6 torch tensor or ndarray
         points: point coordinates, [x, y] or [x, y, z], Nx2 or Nx3 torch tensor or ndarray
         corners: corners of boxes, 4-element or 6-element tuple, each element is a Nx1 torch tensor or ndarray
         spatial_size: The spatial size of the image where the boxes are attached.
                 len(spatial_size) should be in [2, 3].
+
     Returns:
-        spatial_dims: number of spatial dimensions of the bounding box.
+        ``int``: spatial_dims, number of spatial dimensions of the bounding boxes.
 
     Example:
-        boxes = torch.ones(10,6)
-        get_spatial_dims(boxes, spatial_size=[100,200,200]) will return 3
-        get_spatial_dims(boxes, spatial_size=[100,200]) will raise ValueError
-        get_spatial_dims(boxes) will return 3
+        .. code-block:: python
+
+            boxes = torch.ones(10,6)
+            get_spatial_dims(boxes, spatial_size=[100,200,200]) # will return 3
+            get_spatial_dims(boxes, spatial_size=[100,200]) # will raise ValueError
+            get_spatial_dims(boxes) # will return 3
     """
     spatial_dims_set = set()
 
@@ -103,10 +107,15 @@ def get_spatial_dims(
 def get_boxmode(mode: Union[str, BoxMode, Type[BoxMode], None] = None, *args, **kwargs) -> BoxMode:
     """
     This function returns BoxMode object giving a representation of box mode
+
     Args:
-        mode: a representation of box mode. If mode is not given, this func will assume mode is StandardMode
-        It can be:
-            #. str: choose from monai.utils.enums.BoxModeName, for example,
+        mode: a representation of box mode. If it is not given, this func will assume it is ``StandardMode``.
+
+    Note:
+        ``StandardMode`` is equivalent to :class:`~monai.data.box_mode.CornerCornerModeTypeA`.
+
+        mode can be:
+            #. str: choose from :class:`~monai.utils.enums.BoxModeName`, for example,
                 - "xyxy": boxes has format [xmin, ymin, xmax, ymax]
                 - "xyzxyz": boxes has format [xmin, ymin, zmin, xmax, ymax, zmax]
                 - "xxyy": boxes has format [xmin, xmax, ymin, ymax]
@@ -116,25 +125,28 @@ def get_boxmode(mode: Union[str, BoxMode, Type[BoxMode], None] = None, *args, **
                 - "xyzwhd": boxes has format [xmin, ymin, zmin, xsize, ysize, zsize]
                 - "ccwh": boxes has format [xcenter, ycenter, xsize, ysize]
                 - "cccwhd": boxes has format [xcenter, ycenter, zcenter, xsize, ysize, zsize]
-            #. BoxMode class: choose from monai.data.box_mode, for example,
+            #. BoxMode class: choose from the subclasses of :class:`~monai.data.box_mode.BoxMode`, for example,
                 - CornerCornerModeTypeA: equivalent to "xyxy" or "xyzxyz"
                 - CornerCornerModeTypeB: equivalent to "xxyy" or "xxyyzz"
                 - CornerCornerModeTypeC: equivalent to "xyxy" or "xyxyzz"
                 - CornerSizeMode: equivalent to "xywh" or "xyzwhd"
                 - CenterSizeMode: equivalent to "ccwh" or "cccwhd"
-            #. BoxMode instance: choose from monai.data.box_mode, for example,
+            #. BoxMode object: choose from the subclasses of :class:`~monai.data.box_mode.BoxMode`, for example,
                 - CornerCornerModeTypeA(): equivalent to "xyxy" or "xyzxyz"
                 - CornerCornerModeTypeB(): equivalent to "xxyy" or "xxyyzz"
                 - CornerCornerModeTypeC(): equivalent to "xyxy" or "xyxyzz"
                 - CornerSizeMode(): equivalent to "xywh" or "xyzwhd"
                 - CenterSizeMode(): equivalent to "ccwh" or "cccwhd"
-            #. None: will assume mode is StandardMode = CornerCornerModeTypeA
+            #. None: will assume mode is ``StandardMode``
+
     Returns:
         BoxMode object
 
     Example:
-        mode = "xyzxyz"
-        get_boxmode(mode) will return CornerCornerModeTypeA()
+        .. code-block:: python
+
+            mode = "xyzxyz"
+            get_boxmode(mode) # will return CornerCornerModeTypeA()
     """
     if isinstance(mode, BoxMode):
         return mode
@@ -154,17 +166,22 @@ def get_boxmode(mode: Union[str, BoxMode, Type[BoxMode], None] = None, *args, **
     return boxmode(*args, **kwargs)
 
 
-def check_corners(corners: Sequence) -> bool:
+def _check_corners(corners: Sequence) -> bool:
     """
-    check the validity for the given box corners
+    Internal function to check the validity for the given box corners
+
     Args:
         corners: corners of boxes, 4-element or 6-element tuple, each element is a Nx1 torch tensor
         (xmin, ymin, xmax, ymax) or (xmin, ymin, zmin, xmax, ymax, zmax)
+
     Returns:
-        bool, whether the box is valid
+        ``bool``: whether the box is valid
+
     Example:
-        corners = (torch.ones(10,1), torch.ones(10,1), torch.ones(10,1), torch.ones(10,1))
-        check_corner(corners) will return True
+        .. code-block:: python
+
+            corners = (torch.ones(10,1), torch.ones(10,1), torch.ones(10,1), torch.ones(10,1))
+            check_corner(corners) will return True
     """
     spatial_dims = get_spatial_dims(corners=corners)
     for axis in range(0, spatial_dims):
@@ -182,15 +199,15 @@ def convert_box_mode(
     This function converts the boxes in src_mode to the dst_mode.
 
     Args:
-        boxes: source bounding box, Nx4 or Nx6 torch tensor or ndarray.
-        src_mode: source box mode. If mode is not given, this func will assume mode is StandardMode.
-        dst_mode: target box mode. If mode is not given, this func will assume mode is StandardMode.
+        boxes: source bounding boxes, Nx4 or Nx6 torch tensor or ndarray.
+        src_mode: source box mode. If it is not given, this func will assume it is ``StandardMode``.
+        dst_mode: target box mode. If it is not given, this func will assume it is ``StandardMode``.
 
     Note:
-        StandardMode is equivalent to CornerCornerModeTypeA, or "xyxy", or "xyzxyz".
+        ``StandardMode`` is equivalent to :class:`~monai.data.box_mode.CornerCornerModeTypeA`.
 
-        src_mode and dst_mode can be:
-            #. str: choose from monai.utils.enums.BoxModeName, for example,
+        ``src_mode`` and ``dst_mode`` can be:
+            #. str: choose from :class:`~monai.utils.enums.BoxModeName`, for example,
                 - "xyxy": boxes has format [xmin, ymin, xmax, ymax]
                 - "xyzxyz": boxes has format [xmin, ymin, zmin, xmax, ymax, zmax]
                 - "xxyy": boxes has format [xmin, xmax, ymin, ymax]
@@ -200,22 +217,22 @@ def convert_box_mode(
                 - "xyzwhd": boxes has format [xmin, ymin, zmin, xsize, ysize, zsize]
                 - "ccwh": boxes has format [xcenter, ycenter, xsize, ysize]
                 - "cccwhd": boxes has format [xcenter, ycenter, zcenter, xsize, ysize, zsize]
-            #. BoxMode class: choose from monai.data.box_mode, for example,
+            #. BoxMode class: choose from the subclasses of :class:`~monai.data.box_mode.BoxMode`, for example,
                 - CornerCornerModeTypeA: equivalent to "xyxy" or "xyzxyz"
                 - CornerCornerModeTypeB: equivalent to "xxyy" or "xxyyzz"
                 - CornerCornerModeTypeC: equivalent to "xyxy" or "xyxyzz"
                 - CornerSizeMode: equivalent to "xywh" or "xyzwhd"
                 - CenterSizeMode: equivalent to "ccwh" or "cccwhd"
-            #. BoxMode instance: choose from monai.data.box_mode, for example,
+            #. BoxMode object: choose from the subclasses of :class:`~monai.data.box_mode.BoxMode`, for example,
                 - CornerCornerModeTypeA(): equivalent to "xyxy" or "xyzxyz"
                 - CornerCornerModeTypeB(): equivalent to "xxyy" or "xxyyzz"
                 - CornerCornerModeTypeC(): equivalent to "xyxy" or "xyxyzz"
                 - CornerSizeMode(): equivalent to "xywh" or "xyzwhd"
                 - CenterSizeMode(): equivalent to "ccwh" or "cccwhd"
-            #. None: will assume mode is StandardMode
+            #. None: will assume mode is ``StandardMode``
 
     Returns:
-        boxes_dst: bounding box with target mode, does not share memory with original boxes
+        bounding boxes with target mode, with same format as ``boxes``, does not share memory with ``boxes``
 
     Example:
         .. code-block:: python
@@ -242,7 +259,7 @@ def convert_box_mode(
     corners = src_boxmode.boxes_to_corners(boxes_t)
 
     # check validity of corners
-    if not check_corners(corners):
+    if not _check_corners(corners):
         raise ValueError("Given boxes has invalid values. The box size must be non-negative.")
 
     # convert corners to boxes
@@ -258,41 +275,16 @@ def convert_box_to_standard_mode(
 ) -> NdarrayOrTensor:
     """
     Convert given boxes to standard mode.
+    Standard mode is "xyxy" or "xyzxyz",
+    representing box format of [xmin, ymin, xmax, ymax] or [xmin, ymin, zmin, xmax, ymax, zmax].
 
     Args:
-        boxes: source bounding box, Nx4 or Nx6 torch tensor or ndarray.
-        mode: source box mode. If mode is not given, this func will assume mode is StandardMode.
-
-    Note:
-        StandardMode is equivalent to CornerCornerModeTypeA, or "xyxy", or "xyzxyz".
-
-        mode can be:
-            #. str: choose from monai.utils.enums.BoxModeName, for example,
-                - "xyxy": boxes has format [xmin, ymin, xmax, ymax]
-                - "xyzxyz": boxes has format [xmin, ymin, zmin, xmax, ymax, zmax]
-                - "xxyy": boxes has format [xmin, xmax, ymin, ymax]
-                - "xxyyzz": boxes has format [xmin, xmax, ymin, ymax, zmin, zmax]
-                - "xyxyzz": boxes has format [xmin, ymin, xmax, ymax, zmin, zmax]
-                - "xywh": boxes has format [xmin, ymin, xsize, ysize]
-                - "xyzwhd": boxes has format [xmin, ymin, zmin, xsize, ysize, zsize]
-                - "ccwh": boxes has format [xcenter, ycenter, xsize, ysize]
-                - "cccwhd": boxes has format [xcenter, ycenter, zcenter, xsize, ysize, zsize]
-            #. BoxMode class: choose from monai.data.box_mode, for example,
-                - CornerCornerModeTypeA: equivalent to "xyxy" or "xyzxyz"
-                - CornerCornerModeTypeB: equivalent to "xxyy" or "xxyyzz"
-                - CornerCornerModeTypeC: equivalent to "xyxy" or "xyxyzz"
-                - CornerSizeMode: equivalent to "xywh" or "xyzwhd"
-                - CenterSizeMode: equivalent to "ccwh" or "cccwhd"
-            #. BoxMode instance: choose from monai.data.box_mode, for example,
-                - CornerCornerModeTypeA(): equivalent to "xyxy" or "xyzxyz"
-                - CornerCornerModeTypeB(): equivalent to "xxyy" or "xxyyzz"
-                - CornerCornerModeTypeC(): equivalent to "xyxy" or "xyxyzz"
-                - CornerSizeMode(): equivalent to "xywh" or "xyzwhd"
-                - CenterSizeMode(): equivalent to "ccwh" or "cccwhd"
-            #. None: will assume mode is StandardMode
+        boxes: source bounding boxes, Nx4 or Nx6 torch tensor or ndarray.
+        mode: source box mode. If it is not given, this func will assume it is ``StandardMode``.
+            It follows the same format with ``src_mode`` and ``dst_mode`` in :func:`~monai.data.box_utils.convert_box_mode`.
 
     Returns:
-        boxes_standard: bounding box with standard mode, does not share memory with original boxes
+        bounding boxes with standard mode, with same format as ``boxes``, does not share memory with ``boxes``
 
     Example:
         .. code-block:: python
diff --git a/monai/utils/enums.py b/monai/utils/enums.py
index e2f89b62a7..0871e04c1f 100644
--- a/monai/utils/enums.py
+++ b/monai/utils/enums.py
@@ -36,6 +36,7 @@
     "PostFix",
     "ForwardMode",
     "TransformBackends",
+    "BoxModeName",
 ]
 
 
@@ -317,6 +318,7 @@ class BoxModeName(Enum):
     """
     Box mode names.
     """
+
     XYXY = "xyxy"  # [xmin, ymin, xmax, ymax]
     XYZXYZ = "xyzxyz"  # [xmin, ymin, zmin, xmax, ymax, zmax]
     XXYY = "xxyy"  # [xmin, xmax, ymin, ymax]

From 3806804fdc5a45f7f1240b74d96c4198deeebc49 Mon Sep 17 00:00:00 2001
From: Can Zhao <canz@nvidia.com>
Date: Wed, 18 May 2022 02:56:00 -0400
Subject: [PATCH 46/49] combine box_mode and box_utils, reformat

Signed-off-by: Can Zhao <canz@nvidia.com>
---
 docs/source/data.rst    |   9 +-
 monai/data/box_mode.py  | 292 ------------------------------
 monai/data/box_utils.py | 381 ++++++++++++++++++++++++++++++++++------
 tests/test_box_utils.py |   5 +-
 4 files changed, 334 insertions(+), 353 deletions(-)
 delete mode 100644 monai/data/box_mode.py

diff --git a/docs/source/data.rst b/docs/source/data.rst
index 2737e8adc5..1fcf188b94 100644
--- a/docs/source/data.rst
+++ b/docs/source/data.rst
@@ -313,12 +313,17 @@ PatchWSIDataset
     :members:
 
 Bounding box
---------------------
+------------
 
 Box mode
 ~~~~~~~~~~
-.. automodule:: monai.data.box_mode
+.. autoclass:: monai.data.box_utils.BoxMode
     :members:
+.. autoclass:: monai.data.box_utils.CornerCornerModeTypeA
+.. autoclass:: monai.data.box_utils.CornerCornerModeTypeB
+.. autoclass:: monai.data.box_utils.CornerCornerModeTypeC
+.. autoclass:: monai.data.box_utils.CornerSizeMode
+.. autoclass:: monai.data.box_utils.CenterSizeMode
 
 Box mode converter
 ~~~~~~~~~~~~~~~~~~
diff --git a/monai/data/box_mode.py b/monai/data/box_mode.py
deleted file mode 100644
index bbf22e25f5..0000000000
--- a/monai/data/box_mode.py
+++ /dev/null
@@ -1,292 +0,0 @@
-# Copyright (c) MONAI Consortium
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#     http://www.apache.org/licenses/LICENSE-2.0
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from abc import ABC, abstractmethod
-from typing import Dict, Sequence, Tuple
-
-import torch
-
-import monai
-from monai.utils.enums import BoxModeName
-
-# TO_REMOVE = 0.0 if the bottom-right corner pixel/voxel is not included in the box,
-#      i.e., when xmin=1., xmax=2., we have w = 1.
-# TO_REMOVE = 1.0  if the bottom-right corner pixel/voxel is included in the box,
-#       i.e., when xmin=1., xmax=2., we have w = 2.
-# Currently, only `TO_REMOVE = 0.0` is supported
-TO_REMOVE = 0.0  # xmax-xmin = w -TO_REMOVE.
-
-
-class BoxMode(ABC):
-    """
-    An abstract class of a ``BoxMode``.
-    A BoxMode is callable that converts box mode of boxes.
-    It always creates a copy and will not modify boxes in place.
-
-    The implementation should be aware of:
-    remember to define class variable ``name`` which is a dictionary that maps ``spatial_dims`` to the box mode name.
-    """
-
-    name: Dict[int, BoxModeName] = {}
-
-    @classmethod
-    def get_name(cls, spatial_dims: int) -> str:
-        """
-        Get the mode name for the given spatial dimension using class variable ``name``.
-
-        Args:
-            spatial_dims: number of spatial dimensions of the bounding box.
-
-        Returns:
-            ``str``: mode string name
-        """
-        return cls.name[spatial_dims].value
-
-    @abstractmethod
-    def boxes_to_corners(self, boxes: torch.Tensor) -> Tuple:
-        """
-        Convert the bounding boxes of the current mode to corners.
-
-        Args:
-            boxes: bounding box, Nx4 or Nx6 torch tensor
-
-        Returns:
-            ``Tuple``: corners of boxes, 4-element or 6-element tuple, each element is a Nx1 torch tensor.
-            It represents (xmin, ymin, xmax, ymax) or (xmin, ymin, zmin, xmax, ymax, zmax)
-
-        Example:
-            .. code-block:: python
-
-                boxes = torch.ones(10,6)
-                boxmode.boxes_to_corners(boxes) will return a 6-element tuple, each element is a 10x1 tensor
-        """
-        raise NotImplementedError(f"Subclass {self.__class__.__name__} must implement this method.")
-
-    @abstractmethod
-    def corners_to_boxes(self, corners: Sequence) -> torch.Tensor:
-        """
-        Convert the given box corners to the bounding boxes of the current mode.
-
-        Args:
-            corners: corners of boxes, 4-element or 6-element tuple, each element is a Nx1 torch tensor.
-                It represents (xmin, ymin, xmax, ymax) or (xmin, ymin, zmin, xmax, ymax, zmax)
-
-        Returns:
-            ``Tensor``: bounding box, Nx4 or Nx6 torch tensor
-
-        Example:
-            .. code-block:: python
-
-                corners = (torch.ones(10,1), torch.ones(10,1), torch.ones(10,1), torch.ones(10,1))
-                boxmode.corners_to_boxes(corners) will return a 10x4 tensor
-        """
-        raise NotImplementedError(f"Subclass {self.__class__.__name__} must implement this method.")
-
-
-class CornerCornerModeTypeA(BoxMode):
-    """
-    A subclass of ``BoxMode``.
-
-    Also represented as "xyxy" or "xyzxyz", with format of
-    [xmin, ymin, xmax, ymax] or [xmin, ymin, zmin, xmax, ymax, zmax].
-
-    Note:
-        .. code-block:: python
-
-            CornerCornerModeTypeA.get_name(spatial_dims=2) # will return "xyxy"
-            CornerCornerModeTypeA.get_name(spatial_dims=3) # will return "xyzxyz"
-    """
-
-    name = {2: BoxModeName.XYXY, 3: BoxModeName.XYZXYZ}
-
-    def boxes_to_corners(self, boxes: torch.Tensor) -> Tuple:
-        return boxes.split(1, dim=-1)
-
-    def corners_to_boxes(self, corners: Sequence) -> torch.Tensor:
-        return torch.cat(corners, dim=-1)
-
-
-class CornerCornerModeTypeB(BoxMode):
-    """
-    A subclass of ``BoxMode``.
-
-    Also represented as "xxyy" or "xxyyzz", with format of
-    [xmin, xmax, ymin, ymax] or [xmin, xmax, ymin, ymax, zmin, zmax].
-
-    Note:
-        .. code-block:: python
-
-            CornerCornerModeTypeB.get_name(spatial_dims=2) # will return "xxyy"
-            CornerCornerModeTypeB.get_name(spatial_dims=3) # will return "xxyyzz"
-    """
-
-    name = {2: BoxModeName.XXYY, 3: BoxModeName.XXYYZZ}
-
-    def boxes_to_corners(self, boxes: torch.Tensor) -> Tuple:
-        spatial_dims = monai.data.box_utils.get_spatial_dims(boxes=boxes)
-        if spatial_dims == 3:
-            xmin, xmax, ymin, ymax, zmin, zmax = boxes.split(1, dim=-1)
-            return xmin, ymin, zmin, xmax, ymax, zmax
-        elif spatial_dims == 2:
-            xmin, xmax, ymin, ymax = boxes.split(1, dim=-1)
-            return xmin, ymin, xmax, ymax
-
-    def corners_to_boxes(self, corners: Sequence) -> torch.Tensor:
-        spatial_dims = monai.data.box_utils.get_spatial_dims(corners=corners)
-        if spatial_dims == 3:
-            return torch.cat((corners[0], corners[3], corners[1], corners[4], corners[2], corners[5]), dim=-1)
-        elif spatial_dims == 2:
-            return torch.cat((corners[0], corners[2], corners[1], corners[3]), dim=-1)
-
-
-class CornerCornerModeTypeC(BoxMode):
-    """
-    A subclass of ``BoxMode``.
-
-    Also represented as "xyxy" or "xyxyzz", with format of
-    [xmin, ymin, xmax, ymax] or [xmin, ymin, xmax, ymax, zmin, zmax].
-
-    Note:
-        .. code-block:: python
-
-            CornerCornerModeTypeC.get_name(spatial_dims=2) # will return "xyxy"
-            CornerCornerModeTypeC.get_name(spatial_dims=3) # will return "xyxyzz"
-    """
-
-    name = {2: BoxModeName.XYXY, 3: BoxModeName.XYXYZZ}
-
-    def boxes_to_corners(self, boxes: torch.Tensor) -> Tuple:
-        spatial_dims = monai.data.box_utils.get_spatial_dims(boxes=boxes)
-        if spatial_dims == 3:
-            xmin, ymin, xmax, ymax, zmin, zmax = boxes.split(1, dim=-1)
-            return xmin, ymin, zmin, xmax, ymax, zmax
-        elif spatial_dims == 2:
-            return boxes.split(1, dim=-1)
-
-    def corners_to_boxes(self, corners: Sequence) -> torch.Tensor:
-        spatial_dims = monai.data.box_utils.get_spatial_dims(corners=corners)
-        if spatial_dims == 3:
-            return torch.cat((corners[0], corners[1], corners[3], corners[4], corners[2], corners[5]), dim=-1)
-        elif spatial_dims == 2:
-            return torch.cat(corners, dim=-1)
-
-
-class CornerSizeMode(BoxMode):
-    """
-    A subclass of ``BoxMode``.
-
-    Also represented as "xywh" or "xyzwhd", with format of
-    [xmin, ymin, xsize, ysize] or [xmin, ymin, zmin, xsize, ysize, zsize].
-
-    Note:
-        .. code-block:: python
-
-            CornerSizeMode.get_name(spatial_dims=2) # will return "xywh"
-            CornerSizeMode.get_name(spatial_dims=3) # will return "xyzwhd"
-    """
-
-    name = {2: BoxModeName.XYWH, 3: BoxModeName.XYZWHD}
-
-    def boxes_to_corners(self, boxes: torch.Tensor) -> Tuple:
-        # convert to float32 when computing torch.clamp, which does not support float16
-        box_dtype = boxes.dtype
-        compute_dtype = torch.float32
-
-        spatial_dims = monai.data.box_utils.get_spatial_dims(boxes=boxes)
-        if spatial_dims == 3:
-            xmin, ymin, zmin, w, h, d = boxes.split(1, dim=-1)
-            xmax = xmin + (w - TO_REMOVE).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype)
-            ymax = ymin + (h - TO_REMOVE).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype)
-            zmax = zmin + (d - TO_REMOVE).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype)
-            return xmin, ymin, zmin, xmax, ymax, zmax
-        elif spatial_dims == 2:
-            xmin, ymin, w, h = boxes.split(1, dim=-1)
-            xmax = xmin + (w - TO_REMOVE).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype)
-            ymax = ymin + (h - TO_REMOVE).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype)
-            return xmin, ymin, xmax, ymax
-
-    def corners_to_boxes(self, corners: Sequence) -> torch.Tensor:
-        spatial_dims = monai.data.box_utils.get_spatial_dims(corners=corners)
-        if spatial_dims == 3:
-            xmin, ymin, zmin, xmax, ymax, zmax = corners[0], corners[1], corners[2], corners[3], corners[4], corners[5]
-            return torch.cat(
-                (xmin, ymin, zmin, xmax - xmin + TO_REMOVE, ymax - ymin + TO_REMOVE, zmax - zmin + TO_REMOVE), dim=-1
-            )
-        elif spatial_dims == 2:
-            xmin, ymin, xmax, ymax = corners[0], corners[1], corners[2], corners[3]
-            return torch.cat((xmin, ymin, xmax - xmin + TO_REMOVE, ymax - ymin + TO_REMOVE), dim=-1)
-
-
-class CenterSizeMode(BoxMode):
-    """
-    A subclass of ``BoxMode``.
-
-    Also represented as "ccwh" or "cccwhd", with format of
-    [xmin, ymin, xsize, ysize] or [xmin, ymin, zmin, xsize, ysize, zsize].
-
-    Note:
-        .. code-block:: python
-
-            CenterSizeMode.get_name(spatial_dims=2) # will return "ccwh"
-            CenterSizeMode.get_name(spatial_dims=3) # will return "cccwhd"
-    """
-
-    name = {2: BoxModeName.CCWH, 3: BoxModeName.CCCWHD}
-
-    def boxes_to_corners(self, boxes: torch.Tensor) -> Tuple:
-        # convert to float32 when computing torch.clamp, which does not support float16
-        box_dtype = boxes.dtype
-        compute_dtype = torch.float32
-
-        spatial_dims = monai.data.box_utils.get_spatial_dims(boxes=boxes)
-        if spatial_dims == 3:
-            xc, yc, zc, w, h, d = boxes.split(1, dim=-1)
-            xmin = xc - ((w - TO_REMOVE) / 2.0).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype)
-            xmax = xc + ((w - TO_REMOVE) / 2.0).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype)
-            ymin = yc - ((h - TO_REMOVE) / 2.0).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype)
-            ymax = yc + ((h - TO_REMOVE) / 2.0).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype)
-            zmin = zc - ((d - TO_REMOVE) / 2.0).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype)
-            zmax = zc + ((d - TO_REMOVE) / 2.0).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype)
-            return xmin, ymin, zmin, xmax, ymax, zmax
-        elif spatial_dims == 2:
-            xc, yc, w, h = boxes.split(1, dim=-1)
-            xmin = xc - ((w - TO_REMOVE) / 2.0).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype)
-            xmax = xc + ((w - TO_REMOVE) / 2.0).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype)
-            ymin = yc - ((h - TO_REMOVE) / 2.0).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype)
-            ymax = yc + ((h - TO_REMOVE) / 2.0).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype)
-            return xmin, ymin, xmax, ymax
-
-    def corners_to_boxes(self, corners: Sequence) -> torch.Tensor:
-        spatial_dims = monai.data.box_utils.get_spatial_dims(corners=corners)
-        if spatial_dims == 3:
-            xmin, ymin, zmin, xmax, ymax, zmax = corners[0], corners[1], corners[2], corners[3], corners[4], corners[5]
-            return torch.cat(
-                (
-                    (xmin + xmax + TO_REMOVE) / 2.0,
-                    (ymin + ymax + TO_REMOVE) / 2.0,
-                    (zmin + zmax + TO_REMOVE) / 2.0,
-                    xmax - xmin + TO_REMOVE,
-                    ymax - ymin + TO_REMOVE,
-                    zmax - zmin + TO_REMOVE,
-                ),
-                dim=-1,
-            )
-        elif spatial_dims == 2:
-            xmin, ymin, xmax, ymax = corners[0], corners[1], corners[2], corners[3]
-            return torch.cat(
-                (
-                    (xmin + xmax + TO_REMOVE) / 2.0,
-                    (ymin + ymax + TO_REMOVE) / 2.0,
-                    xmax - xmin + TO_REMOVE,
-                    ymax - ymin + TO_REMOVE,
-                ),
-                dim=-1,
-            )
diff --git a/monai/data/box_utils.py b/monai/data/box_utils.py
index 7bf386f3eb..af8393c202 100644
--- a/monai/data/box_utils.py
+++ b/monai/data/box_utils.py
@@ -10,27 +10,317 @@
 # limitations under the License.
 
 import inspect
+from abc import ABC, abstractmethod
 from copy import deepcopy
-from typing import Sequence, Type, Union
+from typing import Dict, Sequence, Tuple, Type, Union
 
 import numpy as np
 import torch
 
 from monai.config.type_definitions import NdarrayOrTensor
-from monai.data.box_mode import (
-    BoxMode,
-    CenterSizeMode,
-    CornerCornerModeTypeA,
-    CornerCornerModeTypeB,
-    CornerCornerModeTypeC,
-    CornerSizeMode,
-)
 from monai.utils import look_up_option
+from monai.utils.enums import BoxModeName
 from monai.utils.type_conversion import convert_data_type, convert_to_dst_type
 
 # We support 2-D or 3-D bounding boxes
 SUPPORTED_SPATIAL_DIMS = [2, 3]
 
+
+# TO_REMOVE = 0.0 if the bottom-right corner pixel/voxel is not included in the box,
+#      i.e., when xmin=1., xmax=2., we have w = 1.
+# TO_REMOVE = 1.0  if the bottom-right corner pixel/voxel is included in the box,
+#       i.e., when xmin=1., xmax=2., we have w = 2.
+# Currently, only `TO_REMOVE = 0.0` is supported
+TO_REMOVE = 0.0  # xmax-xmin = w -TO_REMOVE.
+
+
+class BoxMode(ABC):
+    """
+    An abstract class of a ``BoxMode``.
+    A BoxMode is callable that converts box mode of boxes.
+    It always creates a copy and will not modify boxes in place.
+
+    The implementation should be aware of:
+    remember to define class variable ``name`` which is a dictionary that maps ``spatial_dims`` to the box mode name.
+    """
+
+    name: Dict[int, BoxModeName] = {}
+
+    @classmethod
+    def get_name(cls, spatial_dims: int) -> str:
+        """
+        Get the mode name for the given spatial dimension using class variable ``name``.
+
+        Args:
+            spatial_dims: number of spatial dimensions of the bounding box.
+
+        Returns:
+            ``str``: mode string name
+        """
+        return cls.name[spatial_dims].value
+
+    @abstractmethod
+    def boxes_to_corners(self, boxes: torch.Tensor) -> Tuple:
+        """
+        Convert the bounding boxes of the current mode to corners.
+
+        Args:
+            boxes: bounding box, Nx4 or Nx6 torch tensor
+
+        Returns:
+            ``Tuple``: corners of boxes, 4-element or 6-element tuple, each element is a Nx1 torch tensor.
+            It represents (xmin, ymin, xmax, ymax) or (xmin, ymin, zmin, xmax, ymax, zmax)
+
+        Example:
+            .. code-block:: python
+
+                boxes = torch.ones(10,6)
+                boxmode.boxes_to_corners(boxes) will return a 6-element tuple, each element is a 10x1 tensor
+        """
+        raise NotImplementedError(f"Subclass {self.__class__.__name__} must implement this method.")
+
+    @abstractmethod
+    def corners_to_boxes(self, corners: Sequence) -> torch.Tensor:
+        """
+        Convert the given box corners to the bounding boxes of the current mode.
+
+        Args:
+            corners: corners of boxes, 4-element or 6-element tuple, each element is a Nx1 torch tensor.
+                It represents (xmin, ymin, xmax, ymax) or (xmin, ymin, zmin, xmax, ymax, zmax)
+
+        Returns:
+            ``Tensor``: bounding box, Nx4 or Nx6 torch tensor
+
+        Example:
+            .. code-block:: python
+
+                corners = (torch.ones(10,1), torch.ones(10,1), torch.ones(10,1), torch.ones(10,1))
+                boxmode.corners_to_boxes(corners) will return a 10x4 tensor
+        """
+        raise NotImplementedError(f"Subclass {self.__class__.__name__} must implement this method.")
+
+
+class CornerCornerModeTypeA(BoxMode):
+    """
+    A subclass of ``BoxMode``.
+
+    Also represented as "xyxy" or "xyzxyz", with format of
+    [xmin, ymin, xmax, ymax] or [xmin, ymin, zmin, xmax, ymax, zmax].
+
+    Example:
+        .. code-block:: python
+
+            CornerCornerModeTypeA.get_name(spatial_dims=2) # will return "xyxy"
+            CornerCornerModeTypeA.get_name(spatial_dims=3) # will return "xyzxyz"
+    """
+
+    name = {2: BoxModeName.XYXY, 3: BoxModeName.XYZXYZ}
+
+    def boxes_to_corners(self, boxes: torch.Tensor) -> Tuple:
+        corners: Tuple
+        corners = boxes.split(1, dim=-1)
+        return corners
+
+    def corners_to_boxes(self, corners: Sequence) -> torch.Tensor:
+        boxes: torch.Tensor
+        boxes = torch.cat(tuple(corners), dim=-1)
+        return boxes
+
+
+class CornerCornerModeTypeB(BoxMode):
+    """
+    A subclass of ``BoxMode``.
+
+    Also represented as "xxyy" or "xxyyzz", with format of
+    [xmin, xmax, ymin, ymax] or [xmin, xmax, ymin, ymax, zmin, zmax].
+
+    Example:
+        .. code-block:: python
+
+            CornerCornerModeTypeB.get_name(spatial_dims=2) # will return "xxyy"
+            CornerCornerModeTypeB.get_name(spatial_dims=3) # will return "xxyyzz"
+    """
+
+    name = {2: BoxModeName.XXYY, 3: BoxModeName.XXYYZZ}
+
+    def boxes_to_corners(self, boxes: torch.Tensor) -> Tuple:
+        corners: Tuple
+        spatial_dims = get_spatial_dims(boxes=boxes)
+        if spatial_dims == 3:
+            xmin, xmax, ymin, ymax, zmin, zmax = boxes.split(1, dim=-1)
+            corners = xmin, ymin, zmin, xmax, ymax, zmax
+        elif spatial_dims == 2:
+            xmin, xmax, ymin, ymax = boxes.split(1, dim=-1)
+            corners = xmin, ymin, xmax, ymax
+        return corners
+
+    def corners_to_boxes(self, corners: Sequence) -> torch.Tensor:
+        boxes: torch.Tensor
+        spatial_dims = get_spatial_dims(corners=corners)
+        if spatial_dims == 3:
+            boxes = torch.cat((corners[0], corners[3], corners[1], corners[4], corners[2], corners[5]), dim=-1)
+        elif spatial_dims == 2:
+            boxes = torch.cat((corners[0], corners[2], corners[1], corners[3]), dim=-1)
+        return boxes
+
+
+class CornerCornerModeTypeC(BoxMode):
+    """
+    A subclass of ``BoxMode``.
+
+    Also represented as "xyxy" or "xyxyzz", with format of
+    [xmin, ymin, xmax, ymax] or [xmin, ymin, xmax, ymax, zmin, zmax].
+
+    Example:
+        .. code-block:: python
+
+            CornerCornerModeTypeC.get_name(spatial_dims=2) # will return "xyxy"
+            CornerCornerModeTypeC.get_name(spatial_dims=3) # will return "xyxyzz"
+    """
+
+    name = {2: BoxModeName.XYXY, 3: BoxModeName.XYXYZZ}
+
+    def boxes_to_corners(self, boxes: torch.Tensor) -> Tuple:
+        corners: Tuple
+        spatial_dims = get_spatial_dims(boxes=boxes)
+        if spatial_dims == 3:
+            xmin, ymin, xmax, ymax, zmin, zmax = boxes.split(1, dim=-1)
+            corners = xmin, ymin, zmin, xmax, ymax, zmax
+        elif spatial_dims == 2:
+            corners = boxes.split(1, dim=-1)
+        return corners
+
+    def corners_to_boxes(self, corners: Sequence) -> torch.Tensor:
+        boxes: torch.Tensor
+        spatial_dims = get_spatial_dims(corners=corners)
+        if spatial_dims == 3:
+            boxes = torch.cat((corners[0], corners[1], corners[3], corners[4], corners[2], corners[5]), dim=-1)
+        elif spatial_dims == 2:
+            boxes = torch.cat(tuple(corners), dim=-1)
+        return boxes
+
+
+class CornerSizeMode(BoxMode):
+    """
+    A subclass of ``BoxMode``.
+
+    Also represented as "xywh" or "xyzwhd", with format of
+    [xmin, ymin, xsize, ysize] or [xmin, ymin, zmin, xsize, ysize, zsize].
+
+    Example:
+        .. code-block:: python
+
+            CornerSizeMode.get_name(spatial_dims=2) # will return "xywh"
+            CornerSizeMode.get_name(spatial_dims=3) # will return "xyzwhd"
+    """
+
+    name = {2: BoxModeName.XYWH, 3: BoxModeName.XYZWHD}
+
+    def boxes_to_corners(self, boxes: torch.Tensor) -> Tuple:
+        corners: Tuple
+        # convert to float32 when computing torch.clamp, which does not support float16
+        box_dtype = boxes.dtype
+        compute_dtype = torch.float32
+
+        spatial_dims = get_spatial_dims(boxes=boxes)
+        if spatial_dims == 3:
+            xmin, ymin, zmin, w, h, d = boxes.split(1, dim=-1)
+            xmax = xmin + (w - TO_REMOVE).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype)
+            ymax = ymin + (h - TO_REMOVE).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype)
+            zmax = zmin + (d - TO_REMOVE).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype)
+            corners = xmin, ymin, zmin, xmax, ymax, zmax
+        elif spatial_dims == 2:
+            xmin, ymin, w, h = boxes.split(1, dim=-1)
+            xmax = xmin + (w - TO_REMOVE).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype)
+            ymax = ymin + (h - TO_REMOVE).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype)
+            corners = xmin, ymin, xmax, ymax
+        return corners
+
+    def corners_to_boxes(self, corners: Sequence) -> torch.Tensor:
+        boxes: torch.Tensor
+        spatial_dims = get_spatial_dims(corners=corners)
+        if spatial_dims == 3:
+            xmin, ymin, zmin, xmax, ymax, zmax = corners[0], corners[1], corners[2], corners[3], corners[4], corners[5]
+            boxes = torch.cat(
+                (xmin, ymin, zmin, xmax - xmin + TO_REMOVE, ymax - ymin + TO_REMOVE, zmax - zmin + TO_REMOVE), dim=-1
+            )
+        elif spatial_dims == 2:
+            xmin, ymin, xmax, ymax = corners[0], corners[1], corners[2], corners[3]
+            boxes = torch.cat((xmin, ymin, xmax - xmin + TO_REMOVE, ymax - ymin + TO_REMOVE), dim=-1)
+        return boxes
+
+
+class CenterSizeMode(BoxMode):
+    """
+    A subclass of ``BoxMode``.
+
+    Also represented as "ccwh" or "cccwhd", with format of
+    [xmin, ymin, xsize, ysize] or [xmin, ymin, zmin, xsize, ysize, zsize].
+
+    Example:
+        .. code-block:: python
+
+            CenterSizeMode.get_name(spatial_dims=2) # will return "ccwh"
+            CenterSizeMode.get_name(spatial_dims=3) # will return "cccwhd"
+    """
+
+    name = {2: BoxModeName.CCWH, 3: BoxModeName.CCCWHD}
+
+    def boxes_to_corners(self, boxes: torch.Tensor) -> Tuple:
+        corners: Tuple
+        # convert to float32 when computing torch.clamp, which does not support float16
+        box_dtype = boxes.dtype
+        compute_dtype = torch.float32
+
+        spatial_dims = get_spatial_dims(boxes=boxes)
+        if spatial_dims == 3:
+            xc, yc, zc, w, h, d = boxes.split(1, dim=-1)
+            xmin = xc - ((w - TO_REMOVE) / 2.0).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype)
+            xmax = xc + ((w - TO_REMOVE) / 2.0).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype)
+            ymin = yc - ((h - TO_REMOVE) / 2.0).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype)
+            ymax = yc + ((h - TO_REMOVE) / 2.0).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype)
+            zmin = zc - ((d - TO_REMOVE) / 2.0).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype)
+            zmax = zc + ((d - TO_REMOVE) / 2.0).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype)
+            corners = xmin, ymin, zmin, xmax, ymax, zmax
+        elif spatial_dims == 2:
+            xc, yc, w, h = boxes.split(1, dim=-1)
+            xmin = xc - ((w - TO_REMOVE) / 2.0).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype)
+            xmax = xc + ((w - TO_REMOVE) / 2.0).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype)
+            ymin = yc - ((h - TO_REMOVE) / 2.0).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype)
+            ymax = yc + ((h - TO_REMOVE) / 2.0).to(dtype=compute_dtype).clamp(min=0).to(dtype=box_dtype)
+            corners = xmin, ymin, xmax, ymax
+        return corners
+
+    def corners_to_boxes(self, corners: Sequence) -> torch.Tensor:
+        boxes: torch.Tensor
+        spatial_dims = get_spatial_dims(corners=corners)
+        if spatial_dims == 3:
+            xmin, ymin, zmin, xmax, ymax, zmax = corners[0], corners[1], corners[2], corners[3], corners[4], corners[5]
+            boxes = torch.cat(
+                (
+                    (xmin + xmax + TO_REMOVE) / 2.0,
+                    (ymin + ymax + TO_REMOVE) / 2.0,
+                    (zmin + zmax + TO_REMOVE) / 2.0,
+                    xmax - xmin + TO_REMOVE,
+                    ymax - ymin + TO_REMOVE,
+                    zmax - zmin + TO_REMOVE,
+                ),
+                dim=-1,
+            )
+        elif spatial_dims == 2:
+            xmin, ymin, xmax, ymax = corners[0], corners[1], corners[2], corners[3]
+            boxes = torch.cat(
+                (
+                    (xmin + xmax + TO_REMOVE) / 2.0,
+                    (ymin + ymax + TO_REMOVE) / 2.0,
+                    xmax - xmin + TO_REMOVE,
+                    ymax - ymin + TO_REMOVE,
+                ),
+                dim=-1,
+            )
+        return boxes
+
+
 # We support the conversion between several box modes, i.e., representation of a bounding boxes
 SUPPORTED_MODES = [CornerCornerModeTypeA, CornerCornerModeTypeB, CornerCornerModeTypeC, CornerSizeMode, CenterSizeMode]
 # The standard box mode we use in all the box util functions
@@ -44,7 +334,7 @@ def get_spatial_dims(
     spatial_size: Union[Sequence[int], torch.Tensor, np.ndarray, None] = None,
 ) -> int:
     """
-    Get spatial dimension for the giving setting.
+    Get spatial dimension for the giving setting and the validity of them.
     Missing input is allowed. But at least one of the input value should be given.
     It raises ValueError if the dimensions of multiple inputs do not match with each other.
 
@@ -104,15 +394,15 @@ def get_spatial_dims(
         raise ValueError("The dimensions of multiple inputs should match with each other.")
 
 
-def get_boxmode(mode: Union[str, BoxMode, Type[BoxMode], None] = None, *args, **kwargs) -> BoxMode:
+def _get_boxmode(mode: Union[str, BoxMode, Type[BoxMode], None] = None, *args, **kwargs) -> BoxMode:
     """
-    This function returns BoxMode object giving a representation of box mode
+    Internal function that returns BoxMode object giving a representation of box mode
 
     Args:
         mode: a representation of box mode. If it is not given, this func will assume it is ``StandardMode``.
 
     Note:
-        ``StandardMode`` is equivalent to :class:`~monai.data.box_mode.CornerCornerModeTypeA`.
+        ``StandardMode`` is equivalent to :class:`~monai.data.box_utils.CornerCornerModeTypeA`.
 
         mode can be:
             #. str: choose from :class:`~monai.utils.enums.BoxModeName`, for example,
@@ -125,13 +415,13 @@ def get_boxmode(mode: Union[str, BoxMode, Type[BoxMode], None] = None, *args, **
                 - "xyzwhd": boxes has format [xmin, ymin, zmin, xsize, ysize, zsize]
                 - "ccwh": boxes has format [xcenter, ycenter, xsize, ysize]
                 - "cccwhd": boxes has format [xcenter, ycenter, zcenter, xsize, ysize, zsize]
-            #. BoxMode class: choose from the subclasses of :class:`~monai.data.box_mode.BoxMode`, for example,
+            #. BoxMode class: choose from the subclasses of :class:`~monai.data.box_utils.BoxMode`, for example,
                 - CornerCornerModeTypeA: equivalent to "xyxy" or "xyzxyz"
                 - CornerCornerModeTypeB: equivalent to "xxyy" or "xxyyzz"
                 - CornerCornerModeTypeC: equivalent to "xyxy" or "xyxyzz"
                 - CornerSizeMode: equivalent to "xywh" or "xyzwhd"
                 - CenterSizeMode: equivalent to "ccwh" or "cccwhd"
-            #. BoxMode object: choose from the subclasses of :class:`~monai.data.box_mode.BoxMode`, for example,
+            #. BoxMode object: choose from the subclasses of :class:`~monai.data.box_utils.BoxMode`, for example,
                 - CornerCornerModeTypeA(): equivalent to "xyxy" or "xyzxyz"
                 - CornerCornerModeTypeB(): equivalent to "xxyy" or "xxyyzz"
                 - CornerCornerModeTypeC(): equivalent to "xyxy" or "xyxyzz"
@@ -146,48 +436,23 @@ def get_boxmode(mode: Union[str, BoxMode, Type[BoxMode], None] = None, *args, **
         .. code-block:: python
 
             mode = "xyzxyz"
-            get_boxmode(mode) # will return CornerCornerModeTypeA()
+            _get_boxmode(mode) # will return CornerCornerModeTypeA()
     """
     if isinstance(mode, BoxMode):
         return mode
 
-    boxmode: Type[BoxMode]
     if inspect.isclass(mode) and issubclass(mode, BoxMode):
-        boxmode = mode
-    elif isinstance(mode, str):
+        return mode(*args, **kwargs)
+
+    if isinstance(mode, str):
         for m in SUPPORTED_MODES:
             for n in SUPPORTED_SPATIAL_DIMS:
                 if m.get_name(n) == mode:
-                    boxmode = m
-    elif mode is None:
-        boxmode = StandardMode
-    else:
-        raise ValueError(f"Unsupported box mode: {mode}.")
-    return boxmode(*args, **kwargs)
-
-
-def _check_corners(corners: Sequence) -> bool:
-    """
-    Internal function to check the validity for the given box corners
-
-    Args:
-        corners: corners of boxes, 4-element or 6-element tuple, each element is a Nx1 torch tensor
-        (xmin, ymin, xmax, ymax) or (xmin, ymin, zmin, xmax, ymax, zmax)
+                    return m(*args, **kwargs)
 
-    Returns:
-        ``bool``: whether the box is valid
-
-    Example:
-        .. code-block:: python
-
-            corners = (torch.ones(10,1), torch.ones(10,1), torch.ones(10,1), torch.ones(10,1))
-            check_corner(corners) will return True
-    """
-    spatial_dims = get_spatial_dims(corners=corners)
-    for axis in range(0, spatial_dims):
-        if (corners[spatial_dims + axis] < corners[axis]).sum() > 0:
-            return False
-    return True
+    if mode is not None:
+        raise ValueError(f"Unsupported box mode: {mode}.")
+    return StandardMode(*args, **kwargs)
 
 
 def convert_box_mode(
@@ -204,7 +469,7 @@ def convert_box_mode(
         dst_mode: target box mode. If it is not given, this func will assume it is ``StandardMode``.
 
     Note:
-        ``StandardMode`` is equivalent to :class:`~monai.data.box_mode.CornerCornerModeTypeA`.
+        ``StandardMode`` is equivalent to :class:`~monai.data.box_utils.CornerCornerModeTypeA`.
 
         ``src_mode`` and ``dst_mode`` can be:
             #. str: choose from :class:`~monai.utils.enums.BoxModeName`, for example,
@@ -217,13 +482,13 @@ def convert_box_mode(
                 - "xyzwhd": boxes has format [xmin, ymin, zmin, xsize, ysize, zsize]
                 - "ccwh": boxes has format [xcenter, ycenter, xsize, ysize]
                 - "cccwhd": boxes has format [xcenter, ycenter, zcenter, xsize, ysize, zsize]
-            #. BoxMode class: choose from the subclasses of :class:`~monai.data.box_mode.BoxMode`, for example,
+            #. BoxMode class: choose from the subclasses of :class:`~monai.data.box_utils.BoxMode`, for example,
                 - CornerCornerModeTypeA: equivalent to "xyxy" or "xyzxyz"
                 - CornerCornerModeTypeB: equivalent to "xxyy" or "xxyyzz"
                 - CornerCornerModeTypeC: equivalent to "xyxy" or "xyxyzz"
                 - CornerSizeMode: equivalent to "xywh" or "xyzwhd"
                 - CenterSizeMode: equivalent to "ccwh" or "cccwhd"
-            #. BoxMode object: choose from the subclasses of :class:`~monai.data.box_mode.BoxMode`, for example,
+            #. BoxMode object: choose from the subclasses of :class:`~monai.data.box_utils.BoxMode`, for example,
                 - CornerCornerModeTypeA(): equivalent to "xyxy" or "xyzxyz"
                 - CornerCornerModeTypeB(): equivalent to "xxyy" or "xxyyzz"
                 - CornerCornerModeTypeC(): equivalent to "xyxy" or "xyxyzz"
@@ -241,11 +506,11 @@ def convert_box_mode(
             # The following three lines are equivalent
             # They convert boxes with format [xmin, ymin, xmax, ymax] to [xcenter, ycenter, xsize, ysize].
             box_convert_mode(boxes=boxes, src_mode="xyxy", dst_mode="ccwh")
-            box_convert_mode(boxes=boxes, src_mode="xyxy", dst_mode=monai.data.box_mode.CenterSizeMode)
-            box_convert_mode(boxes=boxes, src_mode="xyxy", dst_mode=monai.data.box_mode.CenterSizeMode())
+            box_convert_mode(boxes=boxes, src_mode="xyxy", dst_mode=monai.data.box_utils.CenterSizeMode)
+            box_convert_mode(boxes=boxes, src_mode="xyxy", dst_mode=monai.data.box_utils.CenterSizeMode())
     """
-    src_boxmode = get_boxmode(src_mode)
-    dst_boxmode = get_boxmode(dst_mode)
+    src_boxmode = _get_boxmode(src_mode)
+    dst_boxmode = _get_boxmode(dst_mode)
 
     # if mode not changed, deepcopy the original boxes
     if isinstance(src_boxmode, type(dst_boxmode)):
@@ -259,8 +524,10 @@ def convert_box_mode(
     corners = src_boxmode.boxes_to_corners(boxes_t)
 
     # check validity of corners
-    if not _check_corners(corners):
-        raise ValueError("Given boxes has invalid values. The box size must be non-negative.")
+    spatial_dims = get_spatial_dims(boxes=boxes_t)
+    for axis in range(0, spatial_dims):
+        if (corners[spatial_dims + axis] < corners[axis]).sum() > 0:
+            raise ValueError("Given boxes has invalid values. The box size must be non-negative.")
 
     # convert corners to boxes
     boxes_t_dst = dst_boxmode.corners_to_boxes(corners)
diff --git a/tests/test_box_utils.py b/tests/test_box_utils.py
index 5865183d6d..7dee6a6e60 100644
--- a/tests/test_box_utils.py
+++ b/tests/test_box_utils.py
@@ -14,14 +14,15 @@
 import numpy as np
 from parameterized import parameterized
 
-from monai.data.box_mode import (
+from monai.data.box_utils import (
     CenterSizeMode,
     CornerCornerModeTypeA,
     CornerCornerModeTypeB,
     CornerCornerModeTypeC,
     CornerSizeMode,
+    convert_box_mode,
+    convert_box_to_standard_mode,
 )
-from monai.data.box_utils import convert_box_mode, convert_box_to_standard_mode
 from monai.utils.type_conversion import convert_data_type
 from tests.utils import TEST_NDARRAYS, assert_allclose
 

From fc5f41b9a51e28ddf3dc533dfcd09830c56030ee Mon Sep 17 00:00:00 2001
From: Can Zhao <canz@nvidia.com>
Date: Wed, 18 May 2022 03:01:01 -0400
Subject: [PATCH 47/49] reformat

Signed-off-by: Can Zhao <canz@nvidia.com>
---
 monai/data/box_utils.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/monai/data/box_utils.py b/monai/data/box_utils.py
index af8393c202..44a9ed931b 100644
--- a/monai/data/box_utils.py
+++ b/monai/data/box_utils.py
@@ -394,9 +394,9 @@ def get_spatial_dims(
         raise ValueError("The dimensions of multiple inputs should match with each other.")
 
 
-def _get_boxmode(mode: Union[str, BoxMode, Type[BoxMode], None] = None, *args, **kwargs) -> BoxMode:
+def get_boxmode(mode: Union[str, BoxMode, Type[BoxMode], None] = None, *args, **kwargs) -> BoxMode:
     """
-    Internal function that returns BoxMode object giving a representation of box mode
+    This function that return BoxMode object giving a representation of box mode
 
     Args:
         mode: a representation of box mode. If it is not given, this func will assume it is ``StandardMode``.
@@ -436,7 +436,7 @@ def _get_boxmode(mode: Union[str, BoxMode, Type[BoxMode], None] = None, *args, *
         .. code-block:: python
 
             mode = "xyzxyz"
-            _get_boxmode(mode) # will return CornerCornerModeTypeA()
+            get_boxmode(mode) # will return CornerCornerModeTypeA()
     """
     if isinstance(mode, BoxMode):
         return mode
@@ -509,8 +509,8 @@ def convert_box_mode(
             box_convert_mode(boxes=boxes, src_mode="xyxy", dst_mode=monai.data.box_utils.CenterSizeMode)
             box_convert_mode(boxes=boxes, src_mode="xyxy", dst_mode=monai.data.box_utils.CenterSizeMode())
     """
-    src_boxmode = _get_boxmode(src_mode)
-    dst_boxmode = _get_boxmode(dst_mode)
+    src_boxmode = get_boxmode(src_mode)
+    dst_boxmode = get_boxmode(dst_mode)
 
     # if mode not changed, deepcopy the original boxes
     if isinstance(src_boxmode, type(dst_boxmode)):

From ea527e9b02bcd227eb0816981287865ac15f7cfd Mon Sep 17 00:00:00 2001
From: Can Zhao <canz@nvidia.com>
Date: Wed, 18 May 2022 12:38:43 -0400
Subject: [PATCH 48/49] update docstring

Signed-off-by: Can Zhao <canz@nvidia.com>
---
 docs/source/data.rst    |   1 +
 monai/data/box_utils.py | 102 ++++++++++++++++++++++------------------
 2 files changed, 56 insertions(+), 47 deletions(-)

diff --git a/docs/source/data.rst b/docs/source/data.rst
index 1fcf188b94..fad640b81c 100644
--- a/docs/source/data.rst
+++ b/docs/source/data.rst
@@ -327,5 +327,6 @@ Box mode
 
 Box mode converter
 ~~~~~~~~~~~~~~~~~~
+.. autofunction:: monai.data.box_utils.get_boxmode
 .. autofunction:: monai.data.box_utils.convert_box_mode
 .. autofunction:: monai.data.box_utils.convert_box_to_standard_mode
diff --git a/monai/data/box_utils.py b/monai/data/box_utils.py
index 44a9ed931b..08f97ebb88 100644
--- a/monai/data/box_utils.py
+++ b/monai/data/box_utils.py
@@ -10,6 +10,7 @@
 # limitations under the License.
 
 import inspect
+import warnings
 from abc import ABC, abstractmethod
 from copy import deepcopy
 from typing import Dict, Sequence, Tuple, Type, Union
@@ -22,6 +23,15 @@
 from monai.utils.enums import BoxModeName
 from monai.utils.type_conversion import convert_data_type, convert_to_dst_type
 
+"""
+This utility module mainly supports rectangular bounding boxes with a few different parameterizations
+    and methods for converting between them.
+It provides reliable access to the spatial coordinates of the box vertices
+    in the "canonical ordering" -- [xmin, ymin, xmax, ymax] for 2D and [xmin, ymin, zmin, xmax, ymax, zmax] for 3D.
+We currently define this ordering as StandardMode
+    and the rest of the detection pipelines mainly assumes boxes in StandardMode.
+"""
+
 # We support 2-D or 3-D bounding boxes
 SUPPORTED_SPATIAL_DIMS = [2, 3]
 
@@ -37,13 +47,33 @@
 class BoxMode(ABC):
     """
     An abstract class of a ``BoxMode``.
-    A BoxMode is callable that converts box mode of boxes.
-    It always creates a copy and will not modify boxes in place.
+
+    A ``BoxMode`` is callable that converts box mode of ``boxes``, which are Nx4 (2D) or Nx6 (3D) torch tensor or ndarray.
+    ``BoxMode`` has several subclasses that represents different box modes, including
+
+    - :class:`~monai.data.box_utils.CornerCornerModeTypeA`:
+      represents [xmin, ymin, xmax, ymax] for 2D and [xmin, ymin, zmin, xmax, ymax, zmax] for 3D
+    - :class:`~monai.data.box_utils.CornerCornerModeTypeB`:
+      represents [xmin, xmax, ymin, ymax] for 2D and [xmin, xmax, ymin, ymax, zmin, zmax] for 3D
+    - :class:`~monai.data.box_utils.CornerCornerModeTypeC`:
+      represents [xmin, ymin, xmax, ymax] for 2D and [xmin, ymin, xmax, ymax, zmin, zmax] for 3D
+    - :class:`~monai.data.box_utils.CornerSizeMode`:
+      represents [xmin, ymin, xsize, ysize] for 2D and [xmin, ymin, zmin, xsize, ysize, zsize] for 3D
+    - :class:`~monai.data.box_utils.CenterSizeMode`:
+      represents [xcenter, ycenter, xsize, ysize] for 2D and [xcenter, ycenter, zcenter, xsize, ysize, zsize] for 3D
+
+    We currently define ``StandardMode`` = :class:`~monai.data.box_utils.CornerCornerModeTypeA`,
+    and monai detection pipelines mainly assume ``boxes`` are in ``StandardMode``.
 
     The implementation should be aware of:
-    remember to define class variable ``name`` which is a dictionary that maps ``spatial_dims`` to the box mode name.
+
+    - remember to define class variable ``name``,
+      a dictionary that maps ``spatial_dims`` to :class:`~monai.utils.enums.BoxModeName`.
+    - :func:`~monai.data.box_utils.BoxMode.boxes_to_corners` and :func:`~monai.data.box_utils.BoxMode.corners_to_boxes`
+      should not modify inputs in place.
     """
 
+    # a dictionary that maps spatial_dims to monai.utils.enums.BoxModeName.
     name: Dict[int, BoxModeName] = {}
 
     @classmethod
@@ -334,7 +364,7 @@ def get_spatial_dims(
     spatial_size: Union[Sequence[int], torch.Tensor, np.ndarray, None] = None,
 ) -> int:
     """
-    Get spatial dimension for the giving setting and the validity of them.
+    Get spatial dimension for the giving setting and check the validity of them.
     Missing input is allowed. But at least one of the input value should be given.
     It raises ValueError if the dimensions of multiple inputs do not match with each other.
 
@@ -358,6 +388,7 @@ def get_spatial_dims(
     """
     spatial_dims_set = set()
 
+    # Check the validity of each input and add its corresponding spatial_dims to spatial_dims_set
     if boxes is not None:
         if int(boxes.shape[1]) not in [4, 6]:
             raise ValueError(
@@ -383,26 +414,29 @@ def get_spatial_dims(
             )
         spatial_dims_set.add(len(spatial_size))
 
+    # Get spatial_dims from spatial_dims_set, which contains only unique values
     spatial_dims_list = list(spatial_dims_set)
     if len(spatial_dims_list) == 0:
         raise ValueError("At least one of the inputs needs to be non-empty.")
-    elif len(spatial_dims_list) == 1:
+
+    if len(spatial_dims_list) == 1:
         spatial_dims = int(spatial_dims_list[0])
         spatial_dims = look_up_option(spatial_dims, supported=[2, 3])
         return int(spatial_dims)
-    else:
-        raise ValueError("The dimensions of multiple inputs should match with each other.")
+
+    raise ValueError("The dimensions of multiple inputs should match with each other.")
 
 
 def get_boxmode(mode: Union[str, BoxMode, Type[BoxMode], None] = None, *args, **kwargs) -> BoxMode:
     """
-    This function that return BoxMode object giving a representation of box mode
+    This function that return a :class:`~monai.data.box_utils.BoxMode` object giving a representation of box mode
 
     Args:
-        mode: a representation of box mode. If it is not given, this func will assume it is ``StandardMode``.
+        mode: a representation of box mode. If it is not given, this func will assume it is ``StandardMode()``.
 
     Note:
-        ``StandardMode`` is equivalent to :class:`~monai.data.box_utils.CornerCornerModeTypeA`.
+        ``StandardMode`` = :class:`~monai.data.box_utils.CornerCornerModeTypeA`,
+        also represented as "xyxy" for 2D and "xyzxyz" for 3D.
 
         mode can be:
             #. str: choose from :class:`~monai.utils.enums.BoxModeName`, for example,
@@ -427,7 +461,7 @@ def get_boxmode(mode: Union[str, BoxMode, Type[BoxMode], None] = None, *args, **
                 - CornerCornerModeTypeC(): equivalent to "xyxy" or "xyxyzz"
                 - CornerSizeMode(): equivalent to "xywh" or "xyzwhd"
                 - CenterSizeMode(): equivalent to "ccwh" or "cccwhd"
-            #. None: will assume mode is ``StandardMode``
+            #. None: will assume mode is ``StandardMode()``
 
     Returns:
         BoxMode object
@@ -447,7 +481,7 @@ def get_boxmode(mode: Union[str, BoxMode, Type[BoxMode], None] = None, *args, **
     if isinstance(mode, str):
         for m in SUPPORTED_MODES:
             for n in SUPPORTED_SPATIAL_DIMS:
-                if m.get_name(n) == mode:
+                if inspect.isclass(m) and issubclass(m, BoxMode) and m.get_name(n) == mode:
                     return m(*args, **kwargs)
 
     if mode is not None:
@@ -465,39 +499,13 @@ def convert_box_mode(
 
     Args:
         boxes: source bounding boxes, Nx4 or Nx6 torch tensor or ndarray.
-        src_mode: source box mode. If it is not given, this func will assume it is ``StandardMode``.
-        dst_mode: target box mode. If it is not given, this func will assume it is ``StandardMode``.
-
-    Note:
-        ``StandardMode`` is equivalent to :class:`~monai.data.box_utils.CornerCornerModeTypeA`.
-
-        ``src_mode`` and ``dst_mode`` can be:
-            #. str: choose from :class:`~monai.utils.enums.BoxModeName`, for example,
-                - "xyxy": boxes has format [xmin, ymin, xmax, ymax]
-                - "xyzxyz": boxes has format [xmin, ymin, zmin, xmax, ymax, zmax]
-                - "xxyy": boxes has format [xmin, xmax, ymin, ymax]
-                - "xxyyzz": boxes has format [xmin, xmax, ymin, ymax, zmin, zmax]
-                - "xyxyzz": boxes has format [xmin, ymin, xmax, ymax, zmin, zmax]
-                - "xywh": boxes has format [xmin, ymin, xsize, ysize]
-                - "xyzwhd": boxes has format [xmin, ymin, zmin, xsize, ysize, zsize]
-                - "ccwh": boxes has format [xcenter, ycenter, xsize, ysize]
-                - "cccwhd": boxes has format [xcenter, ycenter, zcenter, xsize, ysize, zsize]
-            #. BoxMode class: choose from the subclasses of :class:`~monai.data.box_utils.BoxMode`, for example,
-                - CornerCornerModeTypeA: equivalent to "xyxy" or "xyzxyz"
-                - CornerCornerModeTypeB: equivalent to "xxyy" or "xxyyzz"
-                - CornerCornerModeTypeC: equivalent to "xyxy" or "xyxyzz"
-                - CornerSizeMode: equivalent to "xywh" or "xyzwhd"
-                - CenterSizeMode: equivalent to "ccwh" or "cccwhd"
-            #. BoxMode object: choose from the subclasses of :class:`~monai.data.box_utils.BoxMode`, for example,
-                - CornerCornerModeTypeA(): equivalent to "xyxy" or "xyzxyz"
-                - CornerCornerModeTypeB(): equivalent to "xxyy" or "xxyyzz"
-                - CornerCornerModeTypeC(): equivalent to "xyxy" or "xyxyzz"
-                - CornerSizeMode(): equivalent to "xywh" or "xyzwhd"
-                - CenterSizeMode(): equivalent to "ccwh" or "cccwhd"
-            #. None: will assume mode is ``StandardMode``
+        src_mode: source box mode. If it is not given, this func will assume it is ``StandardMode()``.
+            It follows the same format with ``mode`` in :func:`~monai.data.box_utils.get_boxmode`.
+        dst_mode: target box mode. If it is not given, this func will assume it is ``StandardMode()``.
+            It follows the same format with ``mode`` in :func:`~monai.data.box_utils.get_boxmode`.
 
     Returns:
-        bounding boxes with target mode, with same format as ``boxes``, does not share memory with ``boxes``
+        bounding boxes with target mode, with same data type as ``boxes``, does not share memory with ``boxes``
 
     Example:
         .. code-block:: python
@@ -527,7 +535,7 @@ def convert_box_mode(
     spatial_dims = get_spatial_dims(boxes=boxes_t)
     for axis in range(0, spatial_dims):
         if (corners[spatial_dims + axis] < corners[axis]).sum() > 0:
-            raise ValueError("Given boxes has invalid values. The box size must be non-negative.")
+            warnings.warn("Given boxes has invalid values. The box size must be non-negative.")
 
     # convert corners to boxes
     boxes_t_dst = dst_boxmode.corners_to_boxes(corners)
@@ -547,11 +555,11 @@ def convert_box_to_standard_mode(
 
     Args:
         boxes: source bounding boxes, Nx4 or Nx6 torch tensor or ndarray.
-        mode: source box mode. If it is not given, this func will assume it is ``StandardMode``.
-            It follows the same format with ``src_mode`` and ``dst_mode`` in :func:`~monai.data.box_utils.convert_box_mode`.
+        mode: source box mode. If it is not given, this func will assume it is ``StandardMode()``.
+            It follows the same format with ``mode`` in :func:`~monai.data.box_utils.get_boxmode`.
 
     Returns:
-        bounding boxes with standard mode, with same format as ``boxes``, does not share memory with ``boxes``
+        bounding boxes with standard mode, with same data type as ``boxes``, does not share memory with ``boxes``
 
     Example:
         .. code-block:: python

From 61dc4abec75540d7e1183a8b303d017cb9860951 Mon Sep 17 00:00:00 2001
From: Wenqi Li <wenqil@nvidia.com>
Date: Wed, 18 May 2022 18:03:20 +0100
Subject: [PATCH 49/49] fixes docstring

Signed-off-by: Wenqi Li <wenqil@nvidia.com>
---
 monai/data/box_utils.py | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/monai/data/box_utils.py b/monai/data/box_utils.py
index 08f97ebb88..ca8dcd284a 100644
--- a/monai/data/box_utils.py
+++ b/monai/data/box_utils.py
@@ -9,6 +9,16 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+"""
+This utility module mainly supports rectangular bounding boxes with a few
+different parameterizations and methods for converting between them. It
+provides reliable access to the spatial coordinates of the box vertices in the
+"canonical ordering":
+[xmin, ymin, xmax, ymax] for 2D and [xmin, ymin, zmin, xmax, ymax, zmax] for 3D.
+We currently define this ordering as `monai.data.box_utils.StandardMode` and
+the rest of the detection pipelines mainly assumes boxes in `StandardMode`.
+"""
+
 import inspect
 import warnings
 from abc import ABC, abstractmethod
@@ -23,15 +33,6 @@
 from monai.utils.enums import BoxModeName
 from monai.utils.type_conversion import convert_data_type, convert_to_dst_type
 
-"""
-This utility module mainly supports rectangular bounding boxes with a few different parameterizations
-    and methods for converting between them.
-It provides reliable access to the spatial coordinates of the box vertices
-    in the "canonical ordering" -- [xmin, ymin, xmax, ymax] for 2D and [xmin, ymin, zmin, xmax, ymax, zmax] for 3D.
-We currently define this ordering as StandardMode
-    and the rest of the detection pipelines mainly assumes boxes in StandardMode.
-"""
-
 # We support 2-D or 3-D bounding boxes
 SUPPORTED_SPATIAL_DIMS = [2, 3]