From b0dab6a4cda2cfd64b8cb7eece47e9049977db58 Mon Sep 17 00:00:00 2001 From: geohotstan <135171913+geohotstan@users.noreply.github.com> Date: Mon, 11 Aug 2025 02:10:39 +0800 Subject: [PATCH] onnx Resize OP clean up (#11603) * start * slight clean up --- extra/onnx.py | 75 +++++++++++---------- test/external/external_test_onnx_backend.py | 3 + test/external/external_test_onnx_ops.py | 30 +++++++++ 3 files changed, 73 insertions(+), 35 deletions(-) diff --git a/extra/onnx.py b/extra/onnx.py index 565b7a3599..03b6a8b51e 100644 --- a/extra/onnx.py +++ b/extra/onnx.py @@ -703,52 +703,57 @@ def get_onnx_ops() -> dict[str, types.FunctionType|dict[OpSetId, types.FunctionT return x.triu(k_) if upper else x.tril(k_) def Resize(X:Tensor, roi:list[float]|None=None, scales:list[float]|None=None, sizes:list[int]|None=None, antialias:int=0, - axes:list[int]|None=None, coordinate_transformation_mode:str='half_pixel', cubic_coeff_a:float=-0.75, exclude_outside:int=0, - extrapolation_value:float=0.0, keep_aspect_ratio_policy:str='stretch', mode:str='nearest', nearest_mode:str='round_prefer_floor'): - def _apply_nearest_mode(index: Tensor, input_dim, mode: str): - if mode == "round_prefer_floor": index = (index - 0.5).ceil() - elif mode == "round_prefer_ceil": index = (index + 0.5).floor() - elif mode in ["floor", "ceil"]: index = getattr(index, mode)() - else: raise ValueError(f"invalid {nearest_mode=}") - return index.cast(dtypes.int32).clip(0, input_dim-1) - def _apply_transformation(index: Tensor, input_dim, scale_dim, mode): - # TODO: needs more testing, not confident in this - # NOTE: their reference implementation differ from the implementation in their reference docs - # https://github.com/onnx/onnx/blob/main/onnx/reference/ops/op_resize.py - # https://github.com/onnx/onnx/blob/main/docs/Operators.md#Resize - output_dim = scale_dim * input_dim - if mode == "half_pixel": index = (index + 0.5) / scale_dim - 0.5 - elif mode == "align_corners": index = index * (input_dim - 1) / (output_dim - 1) if output_dim != 1 else Tensor([0]) - elif mode == "asymmetric": index = index / scale_dim - elif mode == "pytorch_half_pixel": index = (index + 0.5) / scale_dim - 0.5 if output_dim != 1 else Tensor([-0.5]) - elif mode == "half_pixel_symmetric": index = input_dim / 2 * (1 - int(output_dim) / output_dim) + (index + 0.5) / scale_dim - 0.5 - else: raise NotImplementedError(f"invalid {coordinate_transformation_mode=}") - return index.clip(0, input_dim-1) + axes:list[int]|None=None, coordinate_transformation_mode:str='half_pixel', cubic_coeff_a:float=-0.75, exclude_outside:int=0, + extrapolation_value:float=0.0, keep_aspect_ratio_policy:str='stretch', mode:str='nearest', nearest_mode:str='round_prefer_floor'): + def _apply_transformation(input_sz, output_sz, scale_dim, mode): + index = Tensor.arange(output_sz, requires_grad=False, device=X.device) + if mode == "half_pixel": return (index + 0.5) / scale_dim - 0.5 + if mode == "align_corners": return index * (input_sz - 1) / (output_sz - 1) if output_sz != 1 else Tensor.zeros_like(index) + if mode == "asymmetric": return index / scale_dim + if mode == "pytorch_half_pixel": return ((index + 0.5) / scale_dim - 0.5) if output_sz != 1 else Tensor.zeros_like(index) + if mode == "half_pixel_symmetric": + output_dim_scaled = input_sz * scale_dim + return (input_sz / 2) * (1 - (output_sz / output_dim_scaled)) + (index + 0.5) / scale_dim - 0.5 + raise ValueError(f"invalid {coordinate_transformation_mode=}") - scales, sizes = (None if scales is None else scales[2-(X.ndim-len(scales)):]), (None if sizes is None else sizes[2-(X.ndim-len(sizes)):]) - # we pre permute the axes and permute back after resize - axes, input_shape, = (axes or list(range(X.ndim))), cast(tuple[int, ...], X.shape[2:]), + if antialias: raise NotImplementedError("antialias is not implemented") + axes = axes or list(range(X.ndim)) perm = [a for a in range(len(X.shape)) if a not in axes] + list(axes) + # we pre-permute the axes and permute back after resize + # the permute aligns X's axes to scales, sizes, and roi X = X.permute(*perm) + input_shape = cast(tuple[int, ...], X.shape[2:]) + if scales is not None: assert all(sc==1 for sc in scales[:-len(input_shape)]), "resizing batch_size dim or channel dim not supported" + if sizes is not None: assert tuple(sizes[:-2]) == tuple(X.shape[X.ndim-len(sizes):-2]), "resizing batch_size dim or channel dim not supported" + assert (scales is not None) ^ (sizes is not None), "only provide one of `scales` or `sizes`" + + scales, sizes = (None if scales is None else scales[-len(input_shape):]), (None if sizes is None else sizes[-len(input_shape):]) if sizes is not None: if keep_aspect_ratio_policy in ["not_larger", "not_smaller"]: scale_fxn = min if keep_aspect_ratio_policy == "not_larger" else max - scales = [scale_fxn([sizes[i] / input_shape[i] for i in range(len(input_shape)) if i+2 in axes])] * 2 - sizes = [int((scales[0] * input_shape[i]) + 0.5) if i+2 in axes else input_shape[i] for i in range(X.ndim-2)] - else: - scales = [size / input_shape for size, input_shape in zip(sizes, input_shape)] - else: - sizes = [int(sc*sh) for sc, sh in zip(scales, input_shape)] + scale = scale_fxn(sz / sh for sz,sh in zip(sizes, input_shape)) + sizes, scales = [int(scale * sh + 0.5) for sh in input_shape], [scale]*len(input_shape) + else: scales = [sz / sh for sz, sh in zip(sizes, input_shape)] + else: sizes = [int(sc * sh) for sc, sh in zip(scales, input_shape)] + + if all(sz == sh for sz, sh in zip(sizes, input_shape)): return X.permute(*argsort(perm)) if perm else X - # NOTE: this transformation makes it so that we can't just call Tensor.interpolate - # in Tensor.interpolate, we use indexes without any transformation indexes = [] - for shape, size, scale in zip(input_shape, sizes, scales): - indexes.append(_apply_transformation(Tensor.arange(size), shape, scale, coordinate_transformation_mode)) + for input_sz, output_sz, scale in zip(input_shape, sizes, scales): + indexes.append(_apply_transformation(input_sz, output_sz, scale, coordinate_transformation_mode)) + + if mode in ["nearest", "linear"]: indexes = [idx.clip(0, sz-1) for idx, sz in zip(indexes, input_shape)] if mode == "nearest": - indexes = [_apply_nearest_mode(index, shape, nearest_mode) for (index, shape) in zip(indexes, input_shape)] + mode_operations = { + "round_prefer_floor": lambda idx: (idx - 0.5).ceil(), + "round_prefer_ceil": lambda idx: (idx + 0.5).floor(), + "floor": lambda idx: idx.floor(), + "ceil": lambda idx: idx.ceil() + } + if nearest_mode not in mode_operations: raise ValueError(f"invalid {nearest_mode=}") + indexes = [mode_operations[nearest_mode](idx).int() for idx in indexes] X = X[(..., *Tensor.meshgrid(*indexes))] if mode == "linear": expand = list(X.shape) diff --git a/test/external/external_test_onnx_backend.py b/test/external/external_test_onnx_backend.py index b290a6d8ef..9f82d5d143 100644 --- a/test/external/external_test_onnx_backend.py +++ b/test/external/external_test_onnx_backend.py @@ -53,6 +53,7 @@ backend_test.exclude('test_dynamicquantizelinear_cpu') backend_test.exclude('test_dynamicquantizelinear_expanded_cpu') # BUG: ORT fails these with numerical error but we match ORT numerically +# see: https://onnx.ai/backend-scoreboard/onnxruntime_details_stable.html # tested in external_test_onnx_ops.py::TestMainOnnxOps.test_qlinearmatmul_2D_int8_float16 backend_test.exclude('test_qlinearmatmul_2D_int8_float16_cpu') # tested in external_test_onnx_ops.py::TestMainOnnxOps.test_qlinearmatmul_3D_int8_float16 @@ -65,6 +66,8 @@ backend_test.exclude('test_qlinearmatmul_3D_int8_float32_cpu') backend_test.exclude('test_maxunpool_export_with_output_shape_cpu') # tested in external_test_onnx_ops.py::TestMainOnnxOps.test_averagepool_3d_dilations_large_count_include_pad_is_1_ceil_mode_is_True backend_test.exclude('test_averagepool_3d_dilations_large_count_include_pad_is_1_ceil_mode_is_True_cpu') +# tested in external_test_onnx_ops.py::TestMainOnnxOps.test_resize_downsample_scales_linear_align_corners +backend_test.exclude('test_resize_downsample_scales_linear_align_corners_cpu') # about different dtypes if not is_dtype_supported(dtypes.float64): diff --git a/test/external/external_test_onnx_ops.py b/test/external/external_test_onnx_ops.py index 431743118e..4de19105bb 100644 --- a/test/external/external_test_onnx_ops.py +++ b/test/external/external_test_onnx_ops.py @@ -75,6 +75,36 @@ class TestMainOnnxOps(TestOnnxOps): outputs = ["y"] self.helper_test_single_op("Gather", inputs, attributes, outputs) + # NOTE: resize OP is sensitive to numerical errors + def _test_resize_scales(self, scale_values, **kwargs): + for sc in scale_values: + for ct_mode in ["half_pixel", "align_corners", "asymmetric", "pytorch_half_pixel", "half_pixel_symmetric"]: + with self.subTest(coordinate_transformation_mode=ct_mode, scale=sc, **kwargs): + X = np.array([[[[1, 2, 3, 4], + [5, 6, 7, 8], + [9,10,11,12]]]], dtype=np.float32) + scales = np.array([1.0, 1.0, sc, sc], dtype=np.float32) + inputs = {"X": X, "roi": np.array([], dtype=np.float32), "scales": scales} + attributes = {"coordinate_transformation_mode": ct_mode, **kwargs} + outputs = ["out"] + self.helper_test_single_op("Resize", inputs, attributes, outputs) + + def test_resize_linear_mode(self): + self._test_resize_scales([0.01, 0.25, 0.5, 0.51, 0.6, 1.0, 1.5, 2.0, 3.5, 20.0], mode="linear") + + def test_resize_nearest_mode(self): + # excluded 3.5 because some values divide into slight numerical differences, which when rounded gives wrong results + self._test_resize_scales([0.01, 0.25, 0.5, 0.51, 0.6, 1.0, 1.5, 2.0, 20.0], mode="nearest") + + def test_resize_downsample_scales_linear_align_corners(self): + # https://github.com/onnx/onnx/blob/main/docs/Operators.md#examples-131 + X = np.array([[[[1, 2, 3, 4], [5, 6, 7, 8]]]], dtype=np.float32) + scales = np.array([1.0, 1.0, 0.6, 0.6], dtype=np.float32) + inputs = {"X": X, "roi": np.array([], dtype=np.float32), "scales": scales} + attributes = {"mode": "linear", "coordinate_transformation_mode": "align_corners"} + outputs = ["out"] + self.helper_test_single_op("Resize", inputs, attributes, outputs) + def test_maxunpool_export_with_output_shape(self): # https://github.com/onnx/onnx/blob/main/docs/Operators.md#examples-91 xT = np.array([[[[5, 6], [7, 8]]]], dtype=np.float32)