only use BUFFER_VIEW in disk [pr] (#8629)

* only use BUFFER_VIEW in disk [pr] * delete can_view * BUFFER_VIEW op on DISK * remove that allow_buffer_view=False * notes * bitcast is a low-level op too * this passes on AMD and LLVM
2026-06-13 00:15:35 +08:00 · 2025-01-15 12:34:15 -05:00
parent bae20e5043
commit a1f70ce7d0
5 changed files with 14 additions and 19 deletions
--- a/test/test_schedule.py
+++ b/test/test_schedule.py
@@ -13,7 +13,7 @@ from tinygrad.device import is_dtype_supported
 from tinygrad.dtype import DType, ImageDType
 from tinygrad.shape.shapetracker import ShapeTracker
 from tinygrad.shape.view import View
-from tinygrad.ops import PatternMatcher, UOp, Ops, UPat, graph_rewrite, track_rewrites, view_supported_devices, symbolic_simple, merge_views
+from tinygrad.ops import PatternMatcher, UOp, Ops, UPat, graph_rewrite, track_rewrites, symbolic_simple, merge_views
 from tinygrad.helpers import CI, DEBUG, FUSE_ARANGE, GlobalCounters, getenv, SPLIT_REDUCEOP, unwrap, prod, Context
 from tinygrad.codegen.kernel import verify_ast
 from tinygrad.engine.schedule import ScheduleItem, create_schedule_with_vars, view_right, view_left, remove_movement_ops
@@ -1630,7 +1630,8 @@ class TestIndexing(unittest.TestCase):
    a[0] = 6
    np.testing.assert_equal(a.numpy(), [6., 2., 3., 4.])

-  @unittest.skipUnless(Device.DEFAULT in view_supported_devices, "need view")
+  #@unittest.skipUnless(Device.DEFAULT in view_supported_devices, "need view")
+  @unittest.skip("BUFFER_VIEW no longer supported on non-disk devices")
  def test_arange_view_op(self):
    a = Tensor.arange(12).reshape(4, 3).shrink(((1, 2), (1, 3))).contiguous()
    sched = self.check_schedule(a, 1)
--- a/test/test_setitem.py
+++ b/test/test_setitem.py
@@ -1,6 +1,5 @@
 import unittest
-from tinygrad import Device, Tensor, TinyJit, Variable, dtypes
-from tinygrad.helpers import CI
+from tinygrad import Tensor, TinyJit, Variable, dtypes
 import numpy as np

 class TestSetitem(unittest.TestCase):
@@ -139,12 +138,7 @@ class TestSetitem(unittest.TestCase):
  def test_setitem_overlapping_inplace1(self):
    t = Tensor([[3.0], [2.0], [1.0]]).contiguous()
    t[1:] = t[:-1]
-    if (Device.DEFAULT == "LLVM") or (CI and Device.DEFAULT == "AMD"):
-      # TODO: FIXME
-      with self.assertRaises(AssertionError):
-        self.assertEqual(t.tolist(), [[3.0], [3.0], [2.0]])
-    else:
-      self.assertEqual(t.tolist(), [[3.0], [3.0], [2.0]])
+    self.assertEqual(t.tolist(), [[3.0], [3.0], [2.0]])

  def test_setitem_overlapping_inplace2(self):
    t = Tensor([[3.0], [2.0], [1.0]]).contiguous()
--- a/test/test_tensor_uop.py
+++ b/test/test_tensor_uop.py
@@ -3,7 +3,7 @@ import numpy as np
 import unittest
 from tinygrad import Tensor, Device, dtypes
 from tinygrad.engine.realize import run_schedule
-from tinygrad.ops import Ops, UOp, UPat, view_supported_devices
+from tinygrad.ops import Ops, UOp, UPat

 class TestTensorUOp(unittest.TestCase):
  def test_fromcpu_shape_tracker(self):
@@ -84,7 +84,6 @@ class TestTensorUOp(unittest.TestCase):
    sched = empty.schedule()
    self.assertEqual(len(sched), 0)

-  @unittest.skipIf(Device.DEFAULT in view_supported_devices, "BUFFER_VIEW cannot exist on a CONST")
  def test_contiguous_folded_alu(self):
    a = Tensor.empty(8, 8)
    # NOTE: the buffer for mul_0 late folds to just a CONST
--- a/tinygrad/ops.py
+++ b/tinygrad/ops.py
@@ -367,7 +367,7 @@ class UOp(MathTrait, metaclass=UOpMetaClass):
      raise RuntimeError(f"unsupported size in bitcast {dtype}")
    # shape changing bitcast can use a subbuffer on DISK
    # TODO: this should be moved to realize.py
-    if self.can_view() and self.device.startswith("DISK"): return UOp(Ops.BUFFER_VIEW, dtype, (self,))
+    if self._device is not None and self.device.startswith("DISK"): return UOp(Ops.BUFFER_VIEW, dtype, (self,))
    return UOp(Ops.BITCAST, dtype, (self,))
  def gep(self, i:Union[tuple[int, ...], int]):
    if isinstance(i, int):
@@ -420,9 +420,13 @@ class UOp(MathTrait, metaclass=UOpMetaClass):
    if DEBUG >= 3: print(f"split {divisor}: {self.shape} -> {splitted.shape} -> {new_shape}")
    return splitted._reduce_op(op, axis)._reduce_op(op, (len(new_shape),)).reshape(new_shape)  # reduce original axes, then split
  def assign(self, x:UOp): return UOp(Ops.ASSIGN, self.dtype, (self,x))
-  def contiguous(self, allow_buffer_view=True):
+  def contiguous(self):
+    # TODO: BUFFER_VIEW op should be deleted and subbuffer should be moved to realize.py
+    # NOTE: DISK uses subbuffer because DISK does not render kernels
+    if self.device.startswith("DISK"): return self.alu(Ops.BUFFER_VIEW)
+    # otherwise it's normal CONTIGUOUS
    if not unwrap(self.st).contiguous or self.size != self.base.size or self.base.op is Ops.CONST:
-      return self.alu(Ops.BUFFER_VIEW if allow_buffer_view and self.can_view() else Ops.CONTIGUOUS)
+      return self.alu(Ops.CONTIGUOUS)
    forced_realize.add(self.base)
    return self

@@ -451,9 +455,6 @@ class UOp(MathTrait, metaclass=UOpMetaClass):
    return UOp(Ops.COPY, self.base.dtype, (UOp(Ops.DEVICE, arg=device), self.base), clone).view(unwrap(self.st))
  def clone(self) -> UOp: return self.copy_to_device(self.device, clone=True)
  def is_unrealized_unmasked_const(self): return self.base.op is Ops.CONST and all(v.mask is None for v in unwrap(self.st).views)
-  def can_view(self):
-    return (self.st is not None and self._device is not None and self.st.consecutive and self.base.op is not Ops.CONST and
-            not isinstance(self.dtype, ImageDType) and self.device.split(":")[0] in view_supported_devices)
  @property
  def lbs(self): return [self]
  @property
--- a/tinygrad/tensor.py
+++ b/tinygrad/tensor.py
@@ -413,7 +413,7 @@ class Tensor(SimpleMathTrait):
      lbs = [cast(UOp, t.lazydata) for t in self.split(sizes, axis)]
    sharded_lbs = [lb.copy_to_device(d) for lb,d in zip(lbs, devices)]
    # NOTE: this contiguous is making it impossible for the scheduler to do late const folding
-    mlb = MultiLazyBuffer([lb.contiguous(allow_buffer_view=False) for lb in sharded_lbs], axis)
+    mlb = MultiLazyBuffer([lb.contiguous() for lb in sharded_lbs], axis)
    return Tensor(mlb, device=devices, requires_grad=self.requires_grad)

  def shard_(self, devices:tuple[str, ...], axis:Optional[int]=None):