remove all run_schedule from tests (#15846)

2026-06-13 00:15:35 +08:00 · 2026-04-21 12:02:10 +03:00
parent f9655af2a3
commit 01ac1c8c15
12 changed files with 82 additions and 76 deletions
--- a/test/backend/test_arange.py
+++ b/test/backend/test_arange.py
@@ -2,7 +2,8 @@ import unittest
 import numpy as np
 from tinygrad import Tensor, GlobalCounters, dtypes, nn, Device, Variable
 from tinygrad.helpers import Context, getenv, DEV
-from tinygrad.engine.realize import run_schedule
+from tinygrad.engine.realize import run_linear
+from tinygrad.schedule import linear_to_schedule
 from tinygrad.engine.realize import CompiledRunner, get_program
 from tinygrad.schedule import ExecItem
 from tinygrad.renderer import Estimates
@@ -54,9 +55,9 @@ class TestIndexing(unittest.TestCase):
    with Context(NOOPT=1):
      GlobalCounters.reset()
      out = ((Tensor.arange(1,16385)-1)*needle).sum()
-      sched = out.schedule()
-      self.assertEqual(len(sched), 1)
-      run_schedule(sched)
+      linear, var_vals = out.linear_with_vars()
+      self.assertEqual(len(linear_to_schedule(linear)), 1)
+      run_linear(linear, var_vals)
    self.assertEqual(out.item(), 1337)

  def test_manual_index(self):
@@ -71,9 +72,9 @@ class TestIndexing(unittest.TestCase):
      reshape_dataset = dataset.T.reshape(1, DDIM, DSET, 1).expand(4, DDIM, DSET, 1)
      full = (rng==idxs).where(reshape_dataset, Tensor.zeros(4, DDIM, DSET, 1))
      X = full.sum(axis=(2,3))
-      sched = X.schedule()
-      self.assertEqual(len(sched), 1)
-      run_schedule(sched)
+      linear, var_vals = X.linear_with_vars()
+      self.assertEqual(len(linear_to_schedule(linear)), 1)
+      run_linear(linear, var_vals)
      assert GlobalCounters.global_ops < 4*DSET, f"too many ops {GlobalCounters.global_ops}"
    np.testing.assert_allclose(real_index, X.numpy())

@@ -97,9 +98,9 @@ class TestIndexing(unittest.TestCase):
      GlobalCounters.reset()
      X = dataset[idxs]
      assert X.shape == (4,DDIM)
-      sched = X.schedule()
-      self.assertEqual(len(sched), 1)
-      run_schedule(sched)
+      linear, var_vals = X.linear_with_vars()
+      self.assertEqual(len(linear_to_schedule(linear)), 1)
+      run_linear(linear, var_vals)
      assert GlobalCounters.global_ops < 4*DSET, f"too many ops {GlobalCounters.global_ops}"
    np.testing.assert_allclose(real_index, X.numpy())

@@ -112,9 +113,9 @@ class TestIndexing(unittest.TestCase):
      GlobalCounters.reset()
      X = dataset[idxs]
      assert X.shape == (4,DDIM)
-      sched = X.schedule()
-      self.assertEqual(len(sched), 1)
-      run_schedule(sched)
+      linear, var_vals = X.linear_with_vars()
+      self.assertEqual(len(linear_to_schedule(linear)), 1)
+      run_linear(linear, var_vals)
      assert GlobalCounters.global_ops < 4*DSET, f"too many ops {GlobalCounters.global_ops} != {4*DSET}"
    np.testing.assert_allclose(real_index, X.numpy())
  @unittest.skip("not ready")
--- a/test/backend/test_graph.py
+++ b/test/backend/test_graph.py
@@ -6,7 +6,7 @@ from tinygrad.tensor import Tensor
 from tinygrad.helpers import Context, from_mv
 from tinygrad.dtype import dtypes
 from tinygrad.engine.jit import MultiGraphRunner
-from tinygrad.schedule import linear_to_schedule
+from tinygrad.engine.realize import run_linear
 from tinygrad.uop.ops import UOp, Ops, buffers

 from test.helpers import needs_second_gpu
@@ -49,7 +49,7 @@ def make_graph(graph_cls, calls:list[UOp]):
  return graph_cls(cf, [])

 def run_schedule(calls:list[UOp]):
-  for ei in linear_to_schedule(UOp(Ops.LINEAR, src=tuple(calls))): ei.lower().run({})
+  run_linear(UOp(Ops.LINEAR, src=tuple(calls)))

 def zero_bufs(bufs):
  for b in bufs:
--- a/test/backend/test_linearizer.py
+++ b/test/backend/test_linearizer.py
@@ -6,7 +6,8 @@ from tinygrad.codegen.opt import Opt, OptOps
 from tinygrad.uop.ops import UOp, Ops, GroupOp, AxisType
 from tinygrad.device import Device, Buffer, is_dtype_supported
 from tinygrad.tensor import Tensor, _to_np_dtype
-from tinygrad.engine.realize import run_schedule, CompiledRunner, get_program
+from tinygrad.engine.realize import run_linear, CompiledRunner, get_program
+from tinygrad.schedule import linear_to_schedule
 from tinygrad.helpers import Context, flatten, dedup, TC_SELECT, TC_OPT, DEV
 from tinygrad.dtype import DType, dtypes, PtrDType, AddrSpace
 from tinygrad.renderer.ptx import PTXRenderer
@@ -286,10 +287,10 @@ class TestLinearizer(unittest.TestCase):
    a = Tensor.ones(4, 4).contiguous().realize()
    b = a.shrink(((1, 2), None)).pad(((1, 2), None))
    a.assign(b.where(2, a))
-    sched = a.schedule()
-    assert len(sched) == 1
-    sched_copy = sched[:]
-    run_schedule(sched)
+    linear, var_vals = a.linear_with_vars()
+    sched_copy = linear_to_schedule(linear)
+    assert len(sched_copy) == 1
+    run_linear(linear, var_vals)
    np.testing.assert_equal(a.flatten().numpy(), [1.,1.,1.,1.,2.,2.,2.,2.,1.,1.,1.,1.,1.,1.,1.,1.])
    program = get_program(replace_opts(sched_copy[-1].ast, []), renderer=Device[Device.DEFAULT].renderer)
    assert not any(u.op == Ops.WHERE for u in program.uops), "found where where where should be folded"
@@ -388,8 +389,9 @@ class TestLinearizer(unittest.TestCase):

 def helper_realized_ast(r:Tensor|list[Tensor]) -> tuple[UOp, list[Buffer]]:
  if isinstance(r, Tensor): r = [r]
-  s = Tensor.schedule(*r)
-  run_schedule(s[:-1])  # run all kernels except the last one
+  linear, var_vals = Tensor.linear_with_vars(*r)
+  s = linear_to_schedule(linear)
+  run_linear(UOp(Ops.LINEAR, src=linear.src[:-1]), var_vals)  # run all kernels except the last one
  assert s[-1].ast.op is Ops.SINK, f"helper_realized_ast expects a SINK {s[-1]}"
  # now all input buffers in s[-1] should be realized
  # create fresh buffers for the outputs
--- a/test/backend/test_multitensor.py
+++ b/test/backend/test_multitensor.py
@@ -4,7 +4,8 @@ from tinygrad.device import is_dtype_supported
 from tinygrad.uop.ops import Ops, UOp
 from tinygrad.helpers import getenv, prod, Context
 from tinygrad.nn.state import get_parameters, get_state_dict
-from tinygrad.engine.realize import CompiledRunner, run_schedule
+from tinygrad.engine.realize import CompiledRunner, run_linear
+from tinygrad.schedule import linear_to_schedule
 import numpy as np
 from hypothesis import given, strategies as strat, settings
 from test.helpers import not_support_multi_device, needs_second_gpu, slow, call_is_graph
@@ -192,11 +193,11 @@ class TestMultiTensor(unittest.TestCase):
    # only shrink on the device that owns the shard, this is enabled by the mselect simplifier
    for i in range(2):
      xt = X[i*2:i*2+2].contiguous()
-      sched = xt.schedule()
-      #kernels = [s for s in sched if s.ast.op is Ops.SINK]
+      linear, var_vals = xt.linear_with_vars()
+      #kernels = [s for s in linear_to_schedule(linear) if s.ast.op is Ops.SINK]
      #self.assertEqual(len(kernels), 1)
      #self.assertEqual(kernels[0].bufs[0].device, devices_2[i])
-      run_schedule(sched)
+      run_linear(linear, var_vals)
      np.testing.assert_equal(xt.numpy(), X_np[i*2:i*2+2])

  @given(strat.sampled_from((devices_2, devices_3)),
@@ -784,9 +785,9 @@ class TestMultiTensor(unittest.TestCase):
  def test_full_like_shrink_on_shard_axis(self):
    t = Tensor.ones(16, 16, dtype=dtypes.int).shard(devices_2, axis=0)
    out = Tensor.full_like(t, 2)[:, :8]
-    sched = out.schedule()
-    self.assertEqual(len(sched), 0)
-    run_schedule(sched)
+    linear, var_vals = out.linear_with_vars()
+    self.assertEqual(len(linear_to_schedule(linear)), 0)
+    run_linear(linear, var_vals)
    self.assertEqual(out.tolist(), [[2]*8]*16)

  def test_dropout_on_shard(self):
@@ -1138,10 +1139,10 @@ class TestMultiBufferView(unittest.TestCase):
    """Apply view_fn to both, verify zero compiled kernels and matching values."""
    b_ref = view_fn(a_ref)
    b_multi = view_fn(a_multi).contiguous()
-    sched = b_multi.schedule()
-    compiled = [si for si in sched if isinstance(si.prg, CompiledRunner)]
+    linear, var_vals = b_multi.linear_with_vars()
+    compiled = [si for si in linear_to_schedule(linear) if isinstance(si.prg, CompiledRunner)]
    self.assertEqual(len(compiled), 0, f"expected zero compiled kernels, got {len(compiled)}")
-    run_schedule(sched)
+    run_linear(linear, var_vals)
    np.testing.assert_equal(b_multi.numpy(), b_ref.numpy())

  @unittest.skip("flaky on LLVM")
@@ -1168,10 +1169,10 @@ class TestMultiBufferView(unittest.TestCase):
  def test_4_devices(self):
    ref = Tensor.arange(8*12).reshape(8, 12).contiguous().realize()
    a = Tensor.arange(8*12).reshape(8, 12).contiguous().shard(devices_4, axis=1).realize()
-    sched = a[5].contiguous().schedule()
-    compiled = [si for si in sched if isinstance(si.prg, CompiledRunner)]
+    linear, var_vals = a[5].contiguous().linear_with_vars()
+    compiled = [si for si in linear_to_schedule(linear) if isinstance(si.prg, CompiledRunner)]
    self.assertEqual(len(compiled), 0)
-    run_schedule(sched)
+    run_linear(linear, var_vals)
    np.testing.assert_equal(a[5].contiguous().numpy(), ref[5].numpy())

@unittest.skipIf(not_support_multi_device(), "need multi")
--- a/test/backend/test_nn.py
+++ b/test/backend/test_nn.py
@@ -8,7 +8,8 @@ from tinygrad.helpers import GlobalCounters, Context
 from tinygrad.nn import Conv1d, ConvTranspose1d, Conv2d, ConvTranspose2d, Linear, Embedding
 from tinygrad.nn import BatchNorm, LayerNorm, LayerNorm2d, GroupNorm, InstanceNorm, RMSNorm, LSTMCell
 from tinygrad.nn.state import load_state_dict
-from tinygrad.engine.realize import run_schedule
+from tinygrad.engine.realize import run_linear
+from tinygrad.schedule import linear_to_schedule
 from test.helpers import not_support_multi_device, needs_second_gpu, slow

@slow
@@ -431,17 +432,19 @@ class TestNN(unittest.TestCase):
    a = Tensor([[1, 5, 9, 11],
                [12, 19, 8, 1]])
    result = layer(a)
-    schedule = result.schedule()
-    self.assertEqual(len([item for item in schedule if item.ast.op is Ops.SINK]), kcount, "first run realizes weight and embedding")
-    run_schedule(schedule)
+    linear, var_vals = result.linear_with_vars()
+    self.assertEqual(len([item for item in linear_to_schedule(linear) if item.ast.op is Ops.SINK]), kcount,
+                     "first run realizes weight and embedding")
+    run_linear(linear, var_vals)

    b = Tensor([[1, 2, 3],
                [4, 5, 6],
                [7, 8, 9]])
    result = layer(b)
-    schedule = result.schedule()
-    self.assertEqual(1, len([item for item in schedule if item.ast.op is Ops.SINK]), "second run realizes embedding only")
-    run_schedule(schedule)
+    linear, var_vals = result.linear_with_vars()
+    self.assertEqual(1, len([item for item in linear_to_schedule(linear) if item.ast.op is Ops.SINK]),
+                     "second run realizes embedding only")
+    run_linear(linear, var_vals)
    print(f"Embedding used {GlobalCounters.global_ops} ops")
    self.assertLessEqual(GlobalCounters.global_ops, ops)

--- a/test/backend/test_schedule.py
+++ b/test/backend/test_schedule.py
@@ -12,7 +12,7 @@ from tinygrad.device import is_dtype_supported
 from tinygrad.dtype import DType
 from tinygrad.uop.ops import UOp, Ops, UPat
 from tinygrad.helpers import CI, DEBUG, OSX, GlobalCounters, Context, getenv, all_same, temp
-from tinygrad.engine.realize import CompiledRunner, run_schedule, run_linear
+from tinygrad.engine.realize import CompiledRunner, run_linear
 from tinygrad.schedule import linear_to_schedule

 class KernelCountException(Exception): pass
@@ -49,8 +49,9 @@ def _test_conv2d(allowed:int, dtype:DType=dtypes.float):
  w = Tensor.uniform(16, CIN, 3, 3, requires_grad=True).realize()
  ret = Tensor.conv2d(img, w).relu().mean().backward()
  dtypes.default_float = old_default_float
-  s = Tensor.schedule(ret, img.grad, w.grad)
-  run_schedule(s.copy())
+  linear, var_vals = Tensor.linear_with_vars(ret, img.grad, w.grad)
+  s = linear_to_schedule(linear)
+  run_linear(linear, var_vals)
  cnt = len([si for si in s if si.ast.op is Ops.SINK])
  assert cnt == allowed, f"expected {allowed} kernels, got {cnt}"
  if getenv("CHECK", 1):
@@ -72,9 +73,9 @@ class TestSchedule(unittest.TestCase):
  def test_arange_avgpool2d(self, kcount=1):
    x = Tensor.arange(25).reshape(1,1,5,5).cast(dtypes.float32)
    t = x.avg_pool2d(padding=1)
-    sched = t.schedule()
-    self.assertEqual(len(sched), kcount)
-    run_schedule(sched)
+    linear, var_vals = t.linear_with_vars()
+    self.assertEqual(len(linear_to_schedule(linear)), kcount)
+    run_linear(linear, var_vals)
    import torch
    torch_out = torch.nn.functional.avg_pool2d(torch.arange(25).reshape(1,1,5,5).float(), kernel_size=(2,2), padding=1).numpy()
    np.testing.assert_allclose(t.numpy(), torch_out)
@@ -1053,8 +1054,9 @@ class TestSchedule(unittest.TestCase):
    a2 = mop(a)
    expected = (a+a2).tolist()
    a.assign(a+a2)
-    kcount = len(sched:=a.schedule())
-    run_schedule(sched)
+    linear, var_vals = a.linear_with_vars()
+    kcount = len(linear_to_schedule(linear))
+    run_linear(linear, var_vals)
    self.assertListEqual(a.tolist(), expected)
    self.assertEqual(kcount, expected_kcount)
  def test_setitem_permuted_sched(self): self.test_setitem_sched(lambda x: x.T, 2)
@@ -1353,9 +1355,9 @@ class TestCopyFolding(unittest.TestCase):
  def test_copy_to_same_device_sched(self):
    a = Tensor.ones(4).contiguous().realize().uop.buf_uop
    t = Tensor(a.copy_to_device(a.device))
-    sched = t.schedule()
-    assert len([s for s in sched if s.ast.op is Ops.COPY]) == 0
-    run_schedule(sched)
+    linear, var_vals = t.linear_with_vars()
+    assert len([s for s in linear_to_schedule(linear) if s.ast.op is Ops.COPY]) == 0
+    run_linear(linear, var_vals)
    assert t.uop.is_realized, f"didn't realize Tensor {t}"
    self.assertListEqual(t.tolist(), [1.,1.,1.,1.])

@@ -1442,8 +1444,7 @@ class TestFusionOp(unittest.TestCase):
  def test_expand_fuse(self):
    bt = Tensor(np.ones((10, 1)), dtype=dtypes.float32)
    out = (bt*2).expand(10,10).sum(1)
-    sched = out.schedule()
-    run_schedule(sched)
+    run_linear(*out.linear_with_vars())
    outd = out.tolist()
    assert all(x == 20.0 for x in outd)

--- a/test/external/external_benchmark_resnet.py
+++ b/test/external/external_benchmark_resnet.py
@@ -6,7 +6,7 @@ from tinygrad import Tensor, TinyJit, GlobalCounters, Device
 from tinygrad.helpers import getenv, Context
 from tinygrad.nn.optim import SGD
 from tinygrad.nn.state import get_parameters
-from tinygrad.engine.realize import run_schedule
+from tinygrad.engine.realize import run_linear

 from extra.models import resnet
 from examples.mlperf.initializers import Conv2dHeNormal, Linear
@@ -71,11 +71,11 @@ class BenchmarkResnetTrain(unittest.TestCase):

      y = x.sequential(layer).contiguous().contiguous_backward()
      y.sum().backward()
-      if getenv("ASSIGN", 1): sched, _ = Tensor.schedule_with_vars(y, x.grad, *optim.schedule_step())
-      else: sched, _ = Tensor.schedule_with_vars(y, x.grad, *[t.grad for t in optim.params])
+      if getenv("ASSIGN", 1): linear, var_vals = Tensor.linear_with_vars(y, x.grad, *optim.schedule_step())
+      else: linear, var_vals = Tensor.linear_with_vars(y, x.grad, *[t.grad for t in optim.params])

      for _ in range(JITCNT):
-        run_schedule(list(sched))
+        run_linear(linear, var_vals)

    CNT = getenv("CNT", 5)
    best_tm = None
--- a/test/external/external_test_opt.py
+++ b/test/external/external_test_opt.py
@@ -6,7 +6,7 @@ import torch
 from tinygrad import GlobalCounters, Tensor, Device
 from tinygrad.helpers import getenv
 from tinygrad.nn.state import get_parameters
-from tinygrad.engine.realize import capturing, run_schedule
+from tinygrad.engine.realize import capturing, run_linear
 from tinygrad.schedule import linear_to_schedule
 from tinygrad.tensor import _to_np_dtype

@@ -15,9 +15,8 @@ class CLCache:
    self.allowed, self.strict, self.preclear, self.var_vals = allowed, strict, preclear, var_vals if var_vals is not None else {}
    self.count = 0
  def add_linear(self, linear, var_vals):
-    schedule = linear_to_schedule(linear)
-    self.count += len(schedule)
-    run_schedule(schedule, var_vals)
+    self.count += len(linear_to_schedule(linear))
+    run_linear(linear, var_vals)
  def __enter__(self):
    if self.preclear:
      gc.collect()
--- a/test/external/mlperf_bert/external_benchmark_bert.py
+++ b/test/external/mlperf_bert/external_benchmark_bert.py
@@ -4,7 +4,7 @@ from tinygrad import Tensor, TinyJit, GlobalCounters, Device
 from tinygrad.helpers import getenv, Context
 from tinygrad.nn.optim import LAMB
 from tinygrad.nn.state import get_parameters
-from tinygrad.engine.realize import run_schedule
+from tinygrad.engine.realize import run_linear

 from extra.models import bert

@@ -49,11 +49,11 @@ class BenchmarkBertTrain(unittest.TestCase):

      y = layer(*inputs).contiguous().contiguous_backward()
      y.sum().backward()
-      if getenv("ASSIGN", 1): sched, _ = Tensor.schedule_with_vars(y, *list(inputs), *optim.schedule_step())
-      else: sched, _ = Tensor.schedule_with_vars(y, *list(inputs), *[t.grad for t in optim.params])
+      if getenv("ASSIGN", 1): linear, var_vals = Tensor.linear_with_vars(y, *list(inputs), *optim.schedule_step())
+      else: linear, var_vals = Tensor.linear_with_vars(y, *list(inputs), *[t.grad for t in optim.params])

      for _ in range(JITCNT):
-        run_schedule(sched)
+        run_linear(linear, var_vals)

    CNT = getenv("CNT", 5)
    best_tm = None
--- a/test/null/test_gc.py
+++ b/test/null/test_gc.py
@@ -3,7 +3,7 @@ import gc, inspect
 import unittest
 import numpy as np
 from tinygrad.device import Buffer
-from tinygrad.engine.realize import run_schedule
+from tinygrad.engine.realize import run_linear
 from tinygrad.uop.ops import UOp
 from tinygrad.tensor import Tensor

@@ -69,9 +69,8 @@ class TestGC(unittest.TestCase):
    init = bufs_allocated()
    x = Tensor.ones(256).contiguous().realize()
    y = x+Tensor.ones(256).contiguous()
-    ys = y.schedule()
    del x
-    run_schedule(ys)
+    run_linear(*y.linear_with_vars())
    self.assertEqual(bufs_allocated()-init, 1)
    del y
    self.assertEqual(bufs_allocated()-init, 0)
--- a/test/null/test_schedule.py
+++ b/test/null/test_schedule.py
@@ -3,7 +3,7 @@ import gc, unittest, time
 from tinygrad import nn, dtypes, Device, Tensor
 from tinygrad.uop.ops import UOp, Ops, GroupOp, UPat, KernelInfo
 from tinygrad.helpers import DEBUG, GlobalCounters, Context
-from tinygrad.engine.realize import CompiledRunner, run_schedule, run_linear
+from tinygrad.engine.realize import CompiledRunner, run_linear
 from tinygrad.schedule import linear_to_schedule

 class KernelCountException(Exception): pass
@@ -40,9 +40,9 @@ class TestBufferUOp(unittest.TestCase):
    # the device Buffer remains unallocated until it's we run the schedule
    self.assertFalse(buf.uop.buffer.is_allocated())
    add = buf+1
-    sched = add.schedule()
+    linear, var_vals = add.linear_with_vars()
    self.assertFalse(buf.uop.buffer.is_allocated())
-    run_schedule(sched)
+    run_linear(linear, var_vals)
    self.assertTrue(buf.uop.buffer.is_allocated())

  def test_buffer_has_unique_buffer(self):
--- a/test/unit/test_invalid_tensor.py
+++ b/test/unit/test_invalid_tensor.py
@@ -1,17 +1,17 @@
 import unittest
 from tinygrad import Tensor
 from tinygrad.dtype import Invalid, dtypes
-from tinygrad.engine.realize import run_schedule
+from tinygrad.engine.realize import run_linear

 class TestInvalidTensor(unittest.TestCase):
  def _invalid_test_helper(self, out, expected):
-    sched = out.schedule()
+    linear, var_vals = out.linear_with_vars()
    buf = out.uop.buffer
    buf.allocate()
    sentinel = memoryview(bytearray(b'\x42' * buf.nbytes))
    buf.copyin(sentinel)
    before = buf.as_memoryview().cast(out.dtype.fmt).tolist()
-    run_schedule(sched)
+    run_linear(linear, var_vals)
    ret = buf.as_memoryview().cast(out.dtype.fmt).tolist()

    for i,v in enumerate(expected): self.assertEqual(ret[i], before[i] if v is None else v)