fix: qlinearconv quant params (#16234)

* fix: qlinearconv quant params * fix: simplify reshape --------- Co-authored-by: Philipp Braun <braunphilipp@users.noreply.github.com>
2026-06-13 00:15:35 +08:00 · 2026-05-20 21:31:41 +03:00
parent 19535df53c
commit a01d5918af
2 changed files with 20 additions and 16 deletions
--- a/test/external/external_test_onnx_ops.py
+++ b/test/external/external_test_onnx_ops.py
@@ -284,22 +284,23 @@ class TestMainOnnxOps(TestOnnxOps):
  def test_qlinear_conv(self):
    for dtype, zero_point in [(np.uint8, 128), (np.int8, 0)]:
      for b in (np.ones([32], dtype=np.int32), np.zeros([32], dtype=np.int32)):
-        with self.subTest(dtype=dtype, zero_point=zero_point):
-          dtype_min, dtype_max = np.iinfo(dtype).min, np.iinfo(dtype).max
-          inputs = {
-            "x": np.random.randint(dtype_min, dtype_max + 1, [1, 3, 224, 224], dtype=dtype),
-            "x_scale": np.array(np.random.uniform(0.01, 0.1), dtype=np.float32),
-            "x_zero_point": np.array(zero_point, dtype=dtype),
-            "w": np.random.randint(dtype_min, dtype_max + 1, [32, 3, 3, 3], dtype=dtype),
-            "w_scale": np.array(np.random.uniform(0.01, 0.1), dtype=np.float32),
-            "w_zero_point": np.array(zero_point, dtype=dtype),
-            "y_scale": np.array(np.random.uniform(0.01, 0.1), dtype=np.float32),
-            "y_zero_point": np.array(zero_point, dtype=dtype),
-            "b": b
-          }
-          attributes = {'auto_pad': 'NOTSET', 'dilations': (1, 1), 'group': 1, 'kernel_shape': (3, 3), 'pads': (1, 1, 1, 1), 'strides': (2, 2)}
-          outputs = ["out"]
-          self.helper_test_single_op("QLinearConv", inputs, attributes, outputs, atol=1) # occasionally inaccurate
+        for channel_shape in [(), (32,)]:
+          with self.subTest(dtype=dtype, zero_point=zero_point, channel_shape=channel_shape):
+            dtype_min, dtype_max = np.iinfo(dtype).min, np.iinfo(dtype).max
+            inputs = {
+              "x": np.random.randint(dtype_min, dtype_max + 1, [1, 3, 224, 224], dtype=dtype),
+              "x_scale": np.array(np.random.uniform(0.01, 0.1), dtype=np.float32),
+              "x_zero_point": np.array(zero_point, dtype=dtype),
+              "w": np.random.randint(dtype_min, dtype_max + 1, [32, 3, 3, 3], dtype=dtype),
+              "w_scale": np.random.uniform(0.01, 0.1, channel_shape).astype(np.float32),
+              "w_zero_point": np.full(channel_shape, zero_point, dtype=dtype),
+              "y_scale": np.array(np.random.uniform(0.01, 0.1), dtype=np.float32),
+              "y_zero_point": np.array(zero_point, dtype=dtype),
+              "b": b
+            }
+            attributes = {'auto_pad': 'NOTSET', 'dilations': (1, 1), 'group': 1, 'kernel_shape': (3, 3), 'pads': (1, 1, 1, 1), 'strides': (2, 2)}
+            outputs = ["out"]
+            self.helper_test_single_op("QLinearConv", inputs, attributes, outputs, atol=1) # occasionally inaccurate

  def test_qlinear_matmul(self):
    for dtype, zero_point in [(np.uint8, 128), (np.int8, 0)]:
--- a/tinygrad/nn/onnx.py
+++ b/tinygrad/nn/onnx.py
@@ -1229,6 +1229,9 @@ def get_onnx_ops() -> dict[str, types.FunctionType|dict[OpSetId, types.FunctionT

  def QLinearConv(x:Tensor, x_scale:Tensor, x_zero_point:Tensor, w:Tensor, w_scale:Tensor, w_zero_point:Tensor, y_scale:Tensor,
                  y_zero_point:Tensor, B:Tensor|None=None, **opts):
+    # align quant params for non-scalars based on https://onnx.ai/onnx/operators/onnx__QLinearConv.html
+    w_zero_point = w_zero_point.reshape(-1, 1, 1, 1)
+    w_scale = w_scale.reshape(1, -1, 1, 1)
    return _qlinearop_quantized(Conv, [x,w], [x_zero_point,w_zero_point], [x_scale,w_scale], y_scale, y_zero_point, **{"B":B, **opts})

  def QLinearMatMul(a:Tensor, a_scale:Tensor, a_zero_point:Tensor, b:Tensor, b_scale:Tensor, b_zero_point:Tensor, y_scale:Tensor,