diff --git a/extra/optimization/get_action_space.py b/extra/optimization/get_action_space.py index ea75f2fd37..81ac990e70 100644 --- a/extra/optimization/get_action_space.py +++ b/extra/optimization/get_action_space.py @@ -28,7 +28,12 @@ if __name__ == "__main__": # confirm linearize can be called twice uops1 = lin.linearize().uops uops2 = lin.linearize().uops - assert tuple(uops1) == tuple(uops2), f"uops mismatch {lin.colored_shape()}" + for x,y in zip(uops1.uops, uops2.uops): + # for some reason DEFINE_ACC is changing the arg + if x.op != y.op or x.dtype != y.dtype: # or x.arg != y.arg: + uops1.print() + uops2.print() + raise Exception(f"UOPS MISMATCH {x} {y}") print(len(tactions), len(actions)) print(sorted(list(tactions))) diff --git a/test/test_conv.py b/test/test_conv.py index d09469d222..5d14581a4b 100644 --- a/test/test_conv.py +++ b/test/test_conv.py @@ -42,6 +42,13 @@ class TestConv(unittest.TestCase): print(ret.numpy()) + def test_two_binops_no_rerun_small(self): + Tensor.no_grad = True + x = Tensor.rand(1,1,32,32) + w = Tensor.rand(1,1,3,3) + out = x.conv2d(w, padding=(1,1)) + np.testing.assert_allclose(out.relu().numpy(), np.maximum(out.numpy(), 0)) + def test_two_binops_no_rerun(self): Tensor.no_grad = True x = Tensor.randn(1,12,128,256) diff --git a/test/test_image_dtype.py b/test/test_image_dtype.py index 75ecdec20d..7bb3cad477 100644 --- a/test/test_image_dtype.py +++ b/test/test_image_dtype.py @@ -1,8 +1,7 @@ import unittest import numpy as np -from tinygrad import Device, dtypes, Tensor, Variable +from tinygrad import Device, dtypes, Tensor from tinygrad.dtype import ImageDType -from tinygrad.codegen.linearizer import to_image_idx @unittest.skipIf(Device.DEFAULT != "GPU", "only images on GPU") class TestImageDType(unittest.TestCase): @@ -30,6 +29,11 @@ class TestImageDType(unittest.TestCase): out = (it*2).realize() assert isinstance(out.lazydata.base.realized.dtype, ImageDType) + def test_sum(self): + it = Tensor.rand(8).cast(dtypes.imagef((1,2,4))).realize() + itn = it.numpy() + np.testing.assert_allclose(np.sum(itn), it.sum().numpy(), rtol=1e-6) + def test_shrink_max(self): it = Tensor.randn(8).cast(dtypes.imagef((1,2,4))).realize() imgv = it.numpy() @@ -64,14 +68,5 @@ class TestImageDType(unittest.TestCase): it = data.cast(dtypes.imageh((9,27,4))).realize() assert it.lazydata.base.realized._buf != b1 -class TestImageIdx(unittest.TestCase): - def test_to_image_idx_real1(self): - gidx0 = Variable('gidx0', 0, 511) - base_idx = (((gidx0*4)%32)*32)+((gidx0//8)%32) - base_valid = gidx0<256 - (idx, idy), valid = to_image_idx((4, 64, 4), base_idx, base_valid) - print(idx, idy, idx.min, idx.max, idy.min, idy.max, valid) - assert valid.min == 0 - if __name__ == '__main__': unittest.main() diff --git a/test/test_uops.py b/test/test_uops.py index eda191667a..12db0cd2cc 100644 --- a/test/test_uops.py +++ b/test/test_uops.py @@ -114,6 +114,10 @@ class TestFloatUOps(TestUOps): def test_where(self): self._test_top_fxn(TernaryOps.WHERE, lambda a,b,c: b if a!=0 else c, (dtypes.bool, dtypes.float, dtypes.float)) + @unittest.skipUnless(getenv("PYTHON"), "only python supports MULACC") + def test_mulacc(self): + self._test_top_fxn(TernaryOps.MULACC, lambda a,b,c: a*b+c, (dtypes.float, dtypes.float, dtypes.float)) + class TestNonFloatUOps(TestUOps): def test_neg_int32(self): self._test_uop_fxn(UnaryOps.NEG, lambda a: -a, (dtypes.int32, )) def test_add_int32(self): self._test_bop_fxn(BinaryOps.ADD, lambda a,b: int(a)+int(b), (dtypes.int32, dtypes.int32)) diff --git a/tinygrad/ops.py b/tinygrad/ops.py index 4d981dfa76..b753edcbaf 100644 --- a/tinygrad/ops.py +++ b/tinygrad/ops.py @@ -127,6 +127,7 @@ python_alu = { BinaryOps.XOR: operator.xor, BinaryOps.MAX: max, BinaryOps.CMPNE: operator.ne, BinaryOps.CMPLT: operator.lt, BinaryOps.OR: operator.or_, BinaryOps.AND: operator.and_, BinaryOps.MOD: lambda x,y: abs(int(x))%abs(int(y))*(1,-1)[x<0], BinaryOps.IDIV: lambda x, y: int(x/y) if y != 0 else x*math.inf, + TernaryOps.MULACC: lambda x,y,z: (x*y)+z, TernaryOps.WHERE: lambda x,y,z: y if x else z} def truncate_fp16(x):