# tensor tests that pass on NULL backend (no copyout needed) import numpy as np import unittest from tinygrad import Tensor, Device, dtypes from tinygrad.device import is_dtype_supported from tinygrad.uop.ops import Ops, UOp from tinygrad.renderer.ptx import PTXRenderer from tinygrad.renderer.nir import NIRRenderer from tinygrad.codegen import to_program from tinygrad.dtype import DType x_init = np.random.randn(1,3).astype(np.float32) W_init = np.random.randn(3,3).astype(np.float32) m_init = np.random.randn(1,3).astype(np.float32) class TestTrainMode(unittest.TestCase): def test_train_mode(self): assert not Tensor.training @Tensor.train() def f(): assert Tensor.training f() assert not Tensor.training class TestInferenceMode(unittest.TestCase): def test_inference(self): x = Tensor(x_init) m = Tensor(m_init) W = Tensor(W_init) tmp = x.mul(m) mm = tmp.matmul(W) out = mm.relu() out = out.sum() #out.backward() assert x.grad is None assert m.grad is None assert tmp.grad is None assert mm.grad is None assert W.grad is None assert W.requires_grad def test_no_grad_mode_context_manager(self): x = Tensor(x_init) m = Tensor(m_init) W = Tensor(W_init) def f(x, m, W): tmp = x.mul(m) mm = tmp.matmul(W) out = mm.relu() out = out.sum() #out.backward() assert x.grad is None assert m.grad is None assert tmp.grad is None assert mm.grad is None assert W.grad is None f(x, m, W) class TestIdxUpcast(unittest.TestCase): def _find_op(self, ast: UOp, op: Ops): if ast.op is op: return ast for src in ast.src: if (ret:=self._find_op(src, op)) is not None: return ret def _schedule_render(self, a: Tensor): linear, _ = a.linear_with_vars() for si in linear.src: ast = si.src[0] if ast.op is Ops.SINK: renderer = Device[si.src[1].buffer.device].renderer prg = to_program(ast, renderer) return tuple(prg.src[2].src) def _assert(self, dtype: DType, a: Tensor): uops = self._schedule_render(a) # Assert the dtype of the INDEX value, This will need be updated if UOp spec changes store = next(uop for uop in uops if uop.op is Ops.STORE) assert store.op is Ops.STORE idx = self._find_op(store, Ops.INDEX) # PTX and NIR turn Ops.INDEX into pointer arithmetic earlier than cstyle, plus it's already cast to int64 if not isinstance(Device[Device.DEFAULT].renderer, (PTXRenderer, NIRRenderer)): assert idx.op is Ops.INDEX idx_val = idx.src[1] self.assertIs(idx_val.dtype, dtype) # use expand to generate kernel that uses large idx def do_op_then_assert(self, dtype: DType, dim1, dim2, dim3): self._assert(dtype, Tensor.empty(dim1, dim2, 1).expand(-1, -1, dim3).contiguous()) @unittest.skipUnless(is_dtype_supported(dtypes.long), "int64 is supported") def test_overflow(self): # 2**11, 2**11, 2**11 -> 2**33 will overflow when indexed self.do_op_then_assert(dtypes.long, 2048, 2048, 2048) @unittest.skipUnless(is_dtype_supported(dtypes.long), "int64 is supported") def test_overflow_sym(self): self.do_op_then_assert(dtypes.long, 2048, 2048, UOp.variable("dim3", 1, 2048).bind(32)) def test_regular(self): self.do_op_then_assert(dtypes.int, 64, 64, 64) def test_regular_sym(self): self.do_op_then_assert(dtypes.int, 256, 256, UOp.variable("dim3", 1, 64).bind(32)) @unittest.skipIf(isinstance(Device[Device.DEFAULT].renderer, (PTXRenderer, NIRRenderer)), "PTX and NIR always converts Ops.INDEX to int64") def test_symfold(self): # This would cause an overflow, but after sym fold it's within int32 a = Tensor.arange(65535) uops = self._schedule_render(a) assert all(uop.dtype is not dtypes.long for uop in uops) @unittest.skipIf(is_dtype_supported(dtypes.long), "int64 is supported") def test_int64_unsupported_overflow_sym(self): with self.assertRaises((KeyError, RuntimeError)): self.do_op_then_assert(dtypes.long, 2048, 2048, UOp.variable("dim3", 1, 2048).bind(32)) @unittest.skipIf(is_dtype_supported(dtypes.long), "int64 is supported") @unittest.expectedFailure # bug in gpu dims limiting def test_int64_unsupported_overflow(self): with self.assertRaises((KeyError, RuntimeError)): self.do_op_then_assert(dtypes.long, 2048, 2048, 2048) @unittest.skip("This is kept for reference, it requires large memory to run") def test_overflow_kernel_run(self): # This creates a total of 2**31+10 elements, requiring at least 2147 MB memory to run # Modified example from issue 3271 a = Tensor.empty(2**11, 2**11, 1, dtype=dtypes.int8).permute((2, 0, 1)).expand((2**9+10, -1, -1)).contiguous() a.realize() class TestTensorUnique(unittest.TestCase): def test_empty_bufs_unique(self): a = Tensor.empty(10, 10).contiguous() b = Tensor.empty(10, 10).contiguous() Tensor.realize(a,b) self.assertIsNot(a.uop.buffer, b.uop.buffer) def test_zeros_bufs_unique_sep(self): a = Tensor.zeros(10, 10).contiguous() Tensor.realize(a) b = Tensor.zeros(10, 10).contiguous() Tensor.realize(b) self.assertIsNot(a.uop.buffer, b.uop.buffer) def test_zeros_bufs_unique(self): a = Tensor.zeros(10, 10).contiguous() b = Tensor.zeros(10, 10).contiguous() Tensor.realize(a,b) self.assertIsNot(a.uop.buffer, b.uop.buffer) def test_eye_bufs_unique(self): a = Tensor.eye(10).contiguous() b = Tensor.eye(10).contiguous() Tensor.realize(a,b) self.assertIsNot(a.uop.buffer, b.uop.buffer) def test_times_2_not_unique(self): a = Tensor.zeros(10, 10).contiguous() b = a * 2 c = a * 2 Tensor.realize(b,c) self.assertIs(b.uop.buffer, c.uop.buffer) class TestRand(unittest.TestCase): def test_rand_large_tensor(self): # large tensor rand (num > uint32.max) should not crash in frontend Tensor.manual_seed(0) Tensor.rand(2**17, 2**17).schedule_linear() Tensor.rand(2**17, 2**17).schedule_linear() Tensor.rand(2**17, 2**17).schedule_linear() class TestTensorConstLike(unittest.TestCase): def test_const_like_shape(self): t = Tensor.ones(3, 4) c = t.const_like(0) self.assertEqual(c.shape, (3, 4)) self.assertEqual(c.dtype, t.dtype) def test_const_like_multi_device(self): devs = ("NULL:0", "NULL:1") t = Tensor.ones(8, 4).shard(devs, axis=0) c = t.const_like(5) self.assertEqual(c.shape, (8, 4)) self.assertEqual(c.device, t.device) self.assertEqual(c.uop.axis, 0) def test_full_like_device_on_multi_raises(self): t = Tensor.ones(8, 4).shard(("NULL:0", "NULL:1"), axis=0) with self.assertRaises(RuntimeError): t.full_like(5, device="NULL") class TestTensorDevice(unittest.TestCase): def test_create_from_single_device_tuple(self): (Tensor([1.0], device=(Device.DEFAULT,)) + Tensor([2.0])).realize() class TestTensorPad(unittest.TestCase): # padding int tensor with float-only value (like -inf) must promote dtype to fit value def test_pad_int_with_neg_inf(self): t = Tensor.arange(9).reshape(1, 1, 3, 3) self.assertEqual(t.dtype, dtypes.int) r = t.pad((1, 2, 0, -1), value=-float('inf')) self.assertEqual(r.dtype, dtypes.float) self.assertEqual(r.shape, (1, 1, 2, 6)) class TestTensorDeviceMismatch(unittest.TestCase): def test_gather(self): x = Tensor.empty(3, 4, device="NULL") idx = Tensor.zeros(3, 4, dtype=dtypes.int32, device="NULL:1") with self.assertRaises(RuntimeError): x.gather(0, idx) def test_scatter_index(self): x = Tensor.zeros(3, 4, device="NULL") idx = Tensor.zeros(3, 4, dtype=dtypes.int32, device="NULL:1") src = Tensor.ones(3, 4, device="NULL") with self.assertRaises(RuntimeError): x.scatter(0, idx, src) def test_scatter_src(self): x = Tensor.zeros(3, 4, device="NULL") idx = Tensor.zeros(3, 4, dtype=dtypes.int32, device="NULL") src = Tensor.ones(3, 4, device="NULL:1") with self.assertRaises(RuntimeError): x.scatter(0, idx, src) def test_getitem_tensor_index(self): x = Tensor.empty(4, 5, device="NULL") idx = Tensor([0, 1], dtype=dtypes.int32, device="NULL:1") with self.assertRaises(RuntimeError): x[idx] def test_sparse_categorical_crossentropy(self): x = Tensor.zeros(2, 3, device="NULL") Y = Tensor([0, 1], dtype=dtypes.int32, device="NULL:1") with self.assertRaises(RuntimeError): x.sparse_categorical_crossentropy(Y) if __name__ == '__main__': unittest.main()