fix: qlinearconv quant params (#16234)

* fix: qlinearconv quant params

* fix: simplify reshape

---------

Co-authored-by: Philipp Braun <braunphilipp@users.noreply.github.com>
This commit is contained in:
Philipp Braun
2026-05-20 21:31:41 +03:00
committed by GitHub
parent 19535df53c
commit a01d5918af
2 changed files with 20 additions and 16 deletions

View File

@@ -284,22 +284,23 @@ class TestMainOnnxOps(TestOnnxOps):
def test_qlinear_conv(self):
for dtype, zero_point in [(np.uint8, 128), (np.int8, 0)]:
for b in (np.ones([32], dtype=np.int32), np.zeros([32], dtype=np.int32)):
with self.subTest(dtype=dtype, zero_point=zero_point):
dtype_min, dtype_max = np.iinfo(dtype).min, np.iinfo(dtype).max
inputs = {
"x": np.random.randint(dtype_min, dtype_max + 1, [1, 3, 224, 224], dtype=dtype),
"x_scale": np.array(np.random.uniform(0.01, 0.1), dtype=np.float32),
"x_zero_point": np.array(zero_point, dtype=dtype),
"w": np.random.randint(dtype_min, dtype_max + 1, [32, 3, 3, 3], dtype=dtype),
"w_scale": np.array(np.random.uniform(0.01, 0.1), dtype=np.float32),
"w_zero_point": np.array(zero_point, dtype=dtype),
"y_scale": np.array(np.random.uniform(0.01, 0.1), dtype=np.float32),
"y_zero_point": np.array(zero_point, dtype=dtype),
"b": b
}
attributes = {'auto_pad': 'NOTSET', 'dilations': (1, 1), 'group': 1, 'kernel_shape': (3, 3), 'pads': (1, 1, 1, 1), 'strides': (2, 2)}
outputs = ["out"]
self.helper_test_single_op("QLinearConv", inputs, attributes, outputs, atol=1) # occasionally inaccurate
for channel_shape in [(), (32,)]:
with self.subTest(dtype=dtype, zero_point=zero_point, channel_shape=channel_shape):
dtype_min, dtype_max = np.iinfo(dtype).min, np.iinfo(dtype).max
inputs = {
"x": np.random.randint(dtype_min, dtype_max + 1, [1, 3, 224, 224], dtype=dtype),
"x_scale": np.array(np.random.uniform(0.01, 0.1), dtype=np.float32),
"x_zero_point": np.array(zero_point, dtype=dtype),
"w": np.random.randint(dtype_min, dtype_max + 1, [32, 3, 3, 3], dtype=dtype),
"w_scale": np.random.uniform(0.01, 0.1, channel_shape).astype(np.float32),
"w_zero_point": np.full(channel_shape, zero_point, dtype=dtype),
"y_scale": np.array(np.random.uniform(0.01, 0.1), dtype=np.float32),
"y_zero_point": np.array(zero_point, dtype=dtype),
"b": b
}
attributes = {'auto_pad': 'NOTSET', 'dilations': (1, 1), 'group': 1, 'kernel_shape': (3, 3), 'pads': (1, 1, 1, 1), 'strides': (2, 2)}
outputs = ["out"]
self.helper_test_single_op("QLinearConv", inputs, attributes, outputs, atol=1) # occasionally inaccurate
def test_qlinear_matmul(self):
for dtype, zero_point in [(np.uint8, 128), (np.int8, 0)]:

View File

@@ -1229,6 +1229,9 @@ def get_onnx_ops() -> dict[str, types.FunctionType|dict[OpSetId, types.FunctionT
def QLinearConv(x:Tensor, x_scale:Tensor, x_zero_point:Tensor, w:Tensor, w_scale:Tensor, w_zero_point:Tensor, y_scale:Tensor,
y_zero_point:Tensor, B:Tensor|None=None, **opts):
# align quant params for non-scalars based on https://onnx.ai/onnx/operators/onnx__QLinearConv.html
w_zero_point = w_zero_point.reshape(-1, 1, 1, 1)
w_scale = w_scale.reshape(1, -1, 1, 1)
return _qlinearop_quantized(Conv, [x,w], [x_zero_point,w_zero_point], [x_scale,w_scale], y_scale, y_zero_point, **{"B":B, **opts})
def QLinearMatMul(a:Tensor, a_scale:Tensor, a_zero_point:Tensor, b:Tensor, b_scale:Tensor, b_zero_point:Tensor, y_scale:Tensor,