remove all run_schedule from tests (#15846)

This commit is contained in:
nimlgen
2026-04-21 12:02:10 +03:00
committed by GitHub
parent f9655af2a3
commit 01ac1c8c15
12 changed files with 82 additions and 76 deletions

View File

@@ -2,7 +2,8 @@ import unittest
import numpy as np
from tinygrad import Tensor, GlobalCounters, dtypes, nn, Device, Variable
from tinygrad.helpers import Context, getenv, DEV
from tinygrad.engine.realize import run_schedule
from tinygrad.engine.realize import run_linear
from tinygrad.schedule import linear_to_schedule
from tinygrad.engine.realize import CompiledRunner, get_program
from tinygrad.schedule import ExecItem
from tinygrad.renderer import Estimates
@@ -54,9 +55,9 @@ class TestIndexing(unittest.TestCase):
with Context(NOOPT=1):
GlobalCounters.reset()
out = ((Tensor.arange(1,16385)-1)*needle).sum()
sched = out.schedule()
self.assertEqual(len(sched), 1)
run_schedule(sched)
linear, var_vals = out.linear_with_vars()
self.assertEqual(len(linear_to_schedule(linear)), 1)
run_linear(linear, var_vals)
self.assertEqual(out.item(), 1337)
def test_manual_index(self):
@@ -71,9 +72,9 @@ class TestIndexing(unittest.TestCase):
reshape_dataset = dataset.T.reshape(1, DDIM, DSET, 1).expand(4, DDIM, DSET, 1)
full = (rng==idxs).where(reshape_dataset, Tensor.zeros(4, DDIM, DSET, 1))
X = full.sum(axis=(2,3))
sched = X.schedule()
self.assertEqual(len(sched), 1)
run_schedule(sched)
linear, var_vals = X.linear_with_vars()
self.assertEqual(len(linear_to_schedule(linear)), 1)
run_linear(linear, var_vals)
assert GlobalCounters.global_ops < 4*DSET, f"too many ops {GlobalCounters.global_ops}"
np.testing.assert_allclose(real_index, X.numpy())
@@ -97,9 +98,9 @@ class TestIndexing(unittest.TestCase):
GlobalCounters.reset()
X = dataset[idxs]
assert X.shape == (4,DDIM)
sched = X.schedule()
self.assertEqual(len(sched), 1)
run_schedule(sched)
linear, var_vals = X.linear_with_vars()
self.assertEqual(len(linear_to_schedule(linear)), 1)
run_linear(linear, var_vals)
assert GlobalCounters.global_ops < 4*DSET, f"too many ops {GlobalCounters.global_ops}"
np.testing.assert_allclose(real_index, X.numpy())
@@ -112,9 +113,9 @@ class TestIndexing(unittest.TestCase):
GlobalCounters.reset()
X = dataset[idxs]
assert X.shape == (4,DDIM)
sched = X.schedule()
self.assertEqual(len(sched), 1)
run_schedule(sched)
linear, var_vals = X.linear_with_vars()
self.assertEqual(len(linear_to_schedule(linear)), 1)
run_linear(linear, var_vals)
assert GlobalCounters.global_ops < 4*DSET, f"too many ops {GlobalCounters.global_ops} != {4*DSET}"
np.testing.assert_allclose(real_index, X.numpy())
@unittest.skip("not ready")

View File

@@ -6,7 +6,7 @@ from tinygrad.tensor import Tensor
from tinygrad.helpers import Context, from_mv
from tinygrad.dtype import dtypes
from tinygrad.engine.jit import MultiGraphRunner
from tinygrad.schedule import linear_to_schedule
from tinygrad.engine.realize import run_linear
from tinygrad.uop.ops import UOp, Ops, buffers
from test.helpers import needs_second_gpu
@@ -49,7 +49,7 @@ def make_graph(graph_cls, calls:list[UOp]):
return graph_cls(cf, [])
def run_schedule(calls:list[UOp]):
for ei in linear_to_schedule(UOp(Ops.LINEAR, src=tuple(calls))): ei.lower().run({})
run_linear(UOp(Ops.LINEAR, src=tuple(calls)))
def zero_bufs(bufs):
for b in bufs:

View File

@@ -6,7 +6,8 @@ from tinygrad.codegen.opt import Opt, OptOps
from tinygrad.uop.ops import UOp, Ops, GroupOp, AxisType
from tinygrad.device import Device, Buffer, is_dtype_supported
from tinygrad.tensor import Tensor, _to_np_dtype
from tinygrad.engine.realize import run_schedule, CompiledRunner, get_program
from tinygrad.engine.realize import run_linear, CompiledRunner, get_program
from tinygrad.schedule import linear_to_schedule
from tinygrad.helpers import Context, flatten, dedup, TC_SELECT, TC_OPT, DEV
from tinygrad.dtype import DType, dtypes, PtrDType, AddrSpace
from tinygrad.renderer.ptx import PTXRenderer
@@ -286,10 +287,10 @@ class TestLinearizer(unittest.TestCase):
a = Tensor.ones(4, 4).contiguous().realize()
b = a.shrink(((1, 2), None)).pad(((1, 2), None))
a.assign(b.where(2, a))
sched = a.schedule()
assert len(sched) == 1
sched_copy = sched[:]
run_schedule(sched)
linear, var_vals = a.linear_with_vars()
sched_copy = linear_to_schedule(linear)
assert len(sched_copy) == 1
run_linear(linear, var_vals)
np.testing.assert_equal(a.flatten().numpy(), [1.,1.,1.,1.,2.,2.,2.,2.,1.,1.,1.,1.,1.,1.,1.,1.])
program = get_program(replace_opts(sched_copy[-1].ast, []), renderer=Device[Device.DEFAULT].renderer)
assert not any(u.op == Ops.WHERE for u in program.uops), "found where where where should be folded"
@@ -388,8 +389,9 @@ class TestLinearizer(unittest.TestCase):
def helper_realized_ast(r:Tensor|list[Tensor]) -> tuple[UOp, list[Buffer]]:
if isinstance(r, Tensor): r = [r]
s = Tensor.schedule(*r)
run_schedule(s[:-1]) # run all kernels except the last one
linear, var_vals = Tensor.linear_with_vars(*r)
s = linear_to_schedule(linear)
run_linear(UOp(Ops.LINEAR, src=linear.src[:-1]), var_vals) # run all kernels except the last one
assert s[-1].ast.op is Ops.SINK, f"helper_realized_ast expects a SINK {s[-1]}"
# now all input buffers in s[-1] should be realized
# create fresh buffers for the outputs

View File

@@ -4,7 +4,8 @@ from tinygrad.device import is_dtype_supported
from tinygrad.uop.ops import Ops, UOp
from tinygrad.helpers import getenv, prod, Context
from tinygrad.nn.state import get_parameters, get_state_dict
from tinygrad.engine.realize import CompiledRunner, run_schedule
from tinygrad.engine.realize import CompiledRunner, run_linear
from tinygrad.schedule import linear_to_schedule
import numpy as np
from hypothesis import given, strategies as strat, settings
from test.helpers import not_support_multi_device, needs_second_gpu, slow, call_is_graph
@@ -192,11 +193,11 @@ class TestMultiTensor(unittest.TestCase):
# only shrink on the device that owns the shard, this is enabled by the mselect simplifier
for i in range(2):
xt = X[i*2:i*2+2].contiguous()
sched = xt.schedule()
#kernels = [s for s in sched if s.ast.op is Ops.SINK]
linear, var_vals = xt.linear_with_vars()
#kernels = [s for s in linear_to_schedule(linear) if s.ast.op is Ops.SINK]
#self.assertEqual(len(kernels), 1)
#self.assertEqual(kernels[0].bufs[0].device, devices_2[i])
run_schedule(sched)
run_linear(linear, var_vals)
np.testing.assert_equal(xt.numpy(), X_np[i*2:i*2+2])
@given(strat.sampled_from((devices_2, devices_3)),
@@ -784,9 +785,9 @@ class TestMultiTensor(unittest.TestCase):
def test_full_like_shrink_on_shard_axis(self):
t = Tensor.ones(16, 16, dtype=dtypes.int).shard(devices_2, axis=0)
out = Tensor.full_like(t, 2)[:, :8]
sched = out.schedule()
self.assertEqual(len(sched), 0)
run_schedule(sched)
linear, var_vals = out.linear_with_vars()
self.assertEqual(len(linear_to_schedule(linear)), 0)
run_linear(linear, var_vals)
self.assertEqual(out.tolist(), [[2]*8]*16)
def test_dropout_on_shard(self):
@@ -1138,10 +1139,10 @@ class TestMultiBufferView(unittest.TestCase):
"""Apply view_fn to both, verify zero compiled kernels and matching values."""
b_ref = view_fn(a_ref)
b_multi = view_fn(a_multi).contiguous()
sched = b_multi.schedule()
compiled = [si for si in sched if isinstance(si.prg, CompiledRunner)]
linear, var_vals = b_multi.linear_with_vars()
compiled = [si for si in linear_to_schedule(linear) if isinstance(si.prg, CompiledRunner)]
self.assertEqual(len(compiled), 0, f"expected zero compiled kernels, got {len(compiled)}")
run_schedule(sched)
run_linear(linear, var_vals)
np.testing.assert_equal(b_multi.numpy(), b_ref.numpy())
@unittest.skip("flaky on LLVM")
@@ -1168,10 +1169,10 @@ class TestMultiBufferView(unittest.TestCase):
def test_4_devices(self):
ref = Tensor.arange(8*12).reshape(8, 12).contiguous().realize()
a = Tensor.arange(8*12).reshape(8, 12).contiguous().shard(devices_4, axis=1).realize()
sched = a[5].contiguous().schedule()
compiled = [si for si in sched if isinstance(si.prg, CompiledRunner)]
linear, var_vals = a[5].contiguous().linear_with_vars()
compiled = [si for si in linear_to_schedule(linear) if isinstance(si.prg, CompiledRunner)]
self.assertEqual(len(compiled), 0)
run_schedule(sched)
run_linear(linear, var_vals)
np.testing.assert_equal(a[5].contiguous().numpy(), ref[5].numpy())
@unittest.skipIf(not_support_multi_device(), "need multi")

View File

@@ -8,7 +8,8 @@ from tinygrad.helpers import GlobalCounters, Context
from tinygrad.nn import Conv1d, ConvTranspose1d, Conv2d, ConvTranspose2d, Linear, Embedding
from tinygrad.nn import BatchNorm, LayerNorm, LayerNorm2d, GroupNorm, InstanceNorm, RMSNorm, LSTMCell
from tinygrad.nn.state import load_state_dict
from tinygrad.engine.realize import run_schedule
from tinygrad.engine.realize import run_linear
from tinygrad.schedule import linear_to_schedule
from test.helpers import not_support_multi_device, needs_second_gpu, slow
@slow
@@ -431,17 +432,19 @@ class TestNN(unittest.TestCase):
a = Tensor([[1, 5, 9, 11],
[12, 19, 8, 1]])
result = layer(a)
schedule = result.schedule()
self.assertEqual(len([item for item in schedule if item.ast.op is Ops.SINK]), kcount, "first run realizes weight and embedding")
run_schedule(schedule)
linear, var_vals = result.linear_with_vars()
self.assertEqual(len([item for item in linear_to_schedule(linear) if item.ast.op is Ops.SINK]), kcount,
"first run realizes weight and embedding")
run_linear(linear, var_vals)
b = Tensor([[1, 2, 3],
[4, 5, 6],
[7, 8, 9]])
result = layer(b)
schedule = result.schedule()
self.assertEqual(1, len([item for item in schedule if item.ast.op is Ops.SINK]), "second run realizes embedding only")
run_schedule(schedule)
linear, var_vals = result.linear_with_vars()
self.assertEqual(1, len([item for item in linear_to_schedule(linear) if item.ast.op is Ops.SINK]),
"second run realizes embedding only")
run_linear(linear, var_vals)
print(f"Embedding used {GlobalCounters.global_ops} ops")
self.assertLessEqual(GlobalCounters.global_ops, ops)

View File

@@ -12,7 +12,7 @@ from tinygrad.device import is_dtype_supported
from tinygrad.dtype import DType
from tinygrad.uop.ops import UOp, Ops, UPat
from tinygrad.helpers import CI, DEBUG, OSX, GlobalCounters, Context, getenv, all_same, temp
from tinygrad.engine.realize import CompiledRunner, run_schedule, run_linear
from tinygrad.engine.realize import CompiledRunner, run_linear
from tinygrad.schedule import linear_to_schedule
class KernelCountException(Exception): pass
@@ -49,8 +49,9 @@ def _test_conv2d(allowed:int, dtype:DType=dtypes.float):
w = Tensor.uniform(16, CIN, 3, 3, requires_grad=True).realize()
ret = Tensor.conv2d(img, w).relu().mean().backward()
dtypes.default_float = old_default_float
s = Tensor.schedule(ret, img.grad, w.grad)
run_schedule(s.copy())
linear, var_vals = Tensor.linear_with_vars(ret, img.grad, w.grad)
s = linear_to_schedule(linear)
run_linear(linear, var_vals)
cnt = len([si for si in s if si.ast.op is Ops.SINK])
assert cnt == allowed, f"expected {allowed} kernels, got {cnt}"
if getenv("CHECK", 1):
@@ -72,9 +73,9 @@ class TestSchedule(unittest.TestCase):
def test_arange_avgpool2d(self, kcount=1):
x = Tensor.arange(25).reshape(1,1,5,5).cast(dtypes.float32)
t = x.avg_pool2d(padding=1)
sched = t.schedule()
self.assertEqual(len(sched), kcount)
run_schedule(sched)
linear, var_vals = t.linear_with_vars()
self.assertEqual(len(linear_to_schedule(linear)), kcount)
run_linear(linear, var_vals)
import torch
torch_out = torch.nn.functional.avg_pool2d(torch.arange(25).reshape(1,1,5,5).float(), kernel_size=(2,2), padding=1).numpy()
np.testing.assert_allclose(t.numpy(), torch_out)
@@ -1053,8 +1054,9 @@ class TestSchedule(unittest.TestCase):
a2 = mop(a)
expected = (a+a2).tolist()
a.assign(a+a2)
kcount = len(sched:=a.schedule())
run_schedule(sched)
linear, var_vals = a.linear_with_vars()
kcount = len(linear_to_schedule(linear))
run_linear(linear, var_vals)
self.assertListEqual(a.tolist(), expected)
self.assertEqual(kcount, expected_kcount)
def test_setitem_permuted_sched(self): self.test_setitem_sched(lambda x: x.T, 2)
@@ -1353,9 +1355,9 @@ class TestCopyFolding(unittest.TestCase):
def test_copy_to_same_device_sched(self):
a = Tensor.ones(4).contiguous().realize().uop.buf_uop
t = Tensor(a.copy_to_device(a.device))
sched = t.schedule()
assert len([s for s in sched if s.ast.op is Ops.COPY]) == 0
run_schedule(sched)
linear, var_vals = t.linear_with_vars()
assert len([s for s in linear_to_schedule(linear) if s.ast.op is Ops.COPY]) == 0
run_linear(linear, var_vals)
assert t.uop.is_realized, f"didn't realize Tensor {t}"
self.assertListEqual(t.tolist(), [1.,1.,1.,1.])
@@ -1442,8 +1444,7 @@ class TestFusionOp(unittest.TestCase):
def test_expand_fuse(self):
bt = Tensor(np.ones((10, 1)), dtype=dtypes.float32)
out = (bt*2).expand(10,10).sum(1)
sched = out.schedule()
run_schedule(sched)
run_linear(*out.linear_with_vars())
outd = out.tolist()
assert all(x == 20.0 for x in outd)

View File

@@ -6,7 +6,7 @@ from tinygrad import Tensor, TinyJit, GlobalCounters, Device
from tinygrad.helpers import getenv, Context
from tinygrad.nn.optim import SGD
from tinygrad.nn.state import get_parameters
from tinygrad.engine.realize import run_schedule
from tinygrad.engine.realize import run_linear
from extra.models import resnet
from examples.mlperf.initializers import Conv2dHeNormal, Linear
@@ -71,11 +71,11 @@ class BenchmarkResnetTrain(unittest.TestCase):
y = x.sequential(layer).contiguous().contiguous_backward()
y.sum().backward()
if getenv("ASSIGN", 1): sched, _ = Tensor.schedule_with_vars(y, x.grad, *optim.schedule_step())
else: sched, _ = Tensor.schedule_with_vars(y, x.grad, *[t.grad for t in optim.params])
if getenv("ASSIGN", 1): linear, var_vals = Tensor.linear_with_vars(y, x.grad, *optim.schedule_step())
else: linear, var_vals = Tensor.linear_with_vars(y, x.grad, *[t.grad for t in optim.params])
for _ in range(JITCNT):
run_schedule(list(sched))
run_linear(linear, var_vals)
CNT = getenv("CNT", 5)
best_tm = None

View File

@@ -6,7 +6,7 @@ import torch
from tinygrad import GlobalCounters, Tensor, Device
from tinygrad.helpers import getenv
from tinygrad.nn.state import get_parameters
from tinygrad.engine.realize import capturing, run_schedule
from tinygrad.engine.realize import capturing, run_linear
from tinygrad.schedule import linear_to_schedule
from tinygrad.tensor import _to_np_dtype
@@ -15,9 +15,8 @@ class CLCache:
self.allowed, self.strict, self.preclear, self.var_vals = allowed, strict, preclear, var_vals if var_vals is not None else {}
self.count = 0
def add_linear(self, linear, var_vals):
schedule = linear_to_schedule(linear)
self.count += len(schedule)
run_schedule(schedule, var_vals)
self.count += len(linear_to_schedule(linear))
run_linear(linear, var_vals)
def __enter__(self):
if self.preclear:
gc.collect()

View File

@@ -4,7 +4,7 @@ from tinygrad import Tensor, TinyJit, GlobalCounters, Device
from tinygrad.helpers import getenv, Context
from tinygrad.nn.optim import LAMB
from tinygrad.nn.state import get_parameters
from tinygrad.engine.realize import run_schedule
from tinygrad.engine.realize import run_linear
from extra.models import bert
@@ -49,11 +49,11 @@ class BenchmarkBertTrain(unittest.TestCase):
y = layer(*inputs).contiguous().contiguous_backward()
y.sum().backward()
if getenv("ASSIGN", 1): sched, _ = Tensor.schedule_with_vars(y, *list(inputs), *optim.schedule_step())
else: sched, _ = Tensor.schedule_with_vars(y, *list(inputs), *[t.grad for t in optim.params])
if getenv("ASSIGN", 1): linear, var_vals = Tensor.linear_with_vars(y, *list(inputs), *optim.schedule_step())
else: linear, var_vals = Tensor.linear_with_vars(y, *list(inputs), *[t.grad for t in optim.params])
for _ in range(JITCNT):
run_schedule(sched)
run_linear(linear, var_vals)
CNT = getenv("CNT", 5)
best_tm = None

View File

@@ -3,7 +3,7 @@ import gc, inspect
import unittest
import numpy as np
from tinygrad.device import Buffer
from tinygrad.engine.realize import run_schedule
from tinygrad.engine.realize import run_linear
from tinygrad.uop.ops import UOp
from tinygrad.tensor import Tensor
@@ -69,9 +69,8 @@ class TestGC(unittest.TestCase):
init = bufs_allocated()
x = Tensor.ones(256).contiguous().realize()
y = x+Tensor.ones(256).contiguous()
ys = y.schedule()
del x
run_schedule(ys)
run_linear(*y.linear_with_vars())
self.assertEqual(bufs_allocated()-init, 1)
del y
self.assertEqual(bufs_allocated()-init, 0)

View File

@@ -3,7 +3,7 @@ import gc, unittest, time
from tinygrad import nn, dtypes, Device, Tensor
from tinygrad.uop.ops import UOp, Ops, GroupOp, UPat, KernelInfo
from tinygrad.helpers import DEBUG, GlobalCounters, Context
from tinygrad.engine.realize import CompiledRunner, run_schedule, run_linear
from tinygrad.engine.realize import CompiledRunner, run_linear
from tinygrad.schedule import linear_to_schedule
class KernelCountException(Exception): pass
@@ -40,9 +40,9 @@ class TestBufferUOp(unittest.TestCase):
# the device Buffer remains unallocated until it's we run the schedule
self.assertFalse(buf.uop.buffer.is_allocated())
add = buf+1
sched = add.schedule()
linear, var_vals = add.linear_with_vars()
self.assertFalse(buf.uop.buffer.is_allocated())
run_schedule(sched)
run_linear(linear, var_vals)
self.assertTrue(buf.uop.buffer.is_allocated())
def test_buffer_has_unique_buffer(self):

View File

@@ -1,17 +1,17 @@
import unittest
from tinygrad import Tensor
from tinygrad.dtype import Invalid, dtypes
from tinygrad.engine.realize import run_schedule
from tinygrad.engine.realize import run_linear
class TestInvalidTensor(unittest.TestCase):
def _invalid_test_helper(self, out, expected):
sched = out.schedule()
linear, var_vals = out.linear_with_vars()
buf = out.uop.buffer
buf.allocate()
sentinel = memoryview(bytearray(b'\x42' * buf.nbytes))
buf.copyin(sentinel)
before = buf.as_memoryview().cast(out.dtype.fmt).tolist()
run_schedule(sched)
run_linear(linear, var_vals)
ret = buf.as_memoryview().cast(out.dtype.fmt).tolist()
for i,v in enumerate(expected): self.assertEqual(ret[i], before[i] if v is None else v)