Files
tinygrad/test/unit/test_hcq_graph.py
George Hotz 8294d105a7 Update the spec in spec.py to match the current state (#16132)
* start work on specv2

* more spec

* more spec

* fix amd emulator

* more spec

* more

* fix test_uop_graph

* move those

* spec=2

* skip those questionable tests

* ptx fix

* more spec=2

* store

* allow custom function in tensor

* spec 2

* fix beam search for tensor cores

* delete the old specs

* fix import
2026-05-11 20:07:47 -07:00

45 lines
2.1 KiB
Python

import unittest
from tinygrad import Device, Tensor
from tinygrad.engine.jit import TinyJit
from tinygrad.uop.ops import UOp, Ops
from tinygrad.dtype import dtypes
from tinygrad.runtime.graph.hcq import HCQGraph
from tinygrad.runtime.support.hcq import HCQCompiled
from tinygrad.runtime.support.usb import USBMMIOInterface
from test.mockgpu.usb import MockUSB
@unittest.skipUnless(issubclass(type(Device[Device.DEFAULT]), HCQCompiled), "HCQ device required to run")
class TestHCQUnit(unittest.TestCase):
@unittest.skipIf(Device.DEFAULT == "CPU", "requires non-CPU HCQ device")
def test_supports_uop(self):
d0, cpu_dev = Device[Device.DEFAULT], Device["CPU"]
@TinyJit
def f(inp, inp_cpu):
return (inp + 1.0).contiguous().realize(), (inp_cpu + 1.0).contiguous().realize()
inp, inp_cpu = Tensor.randn(10, 10, device=Device.DEFAULT).realize(), Tensor.randn(10, 10, device="CPU").realize()
for _ in range(5): f(inp, inp_cpu)
# construct minimal CALL UOps for supports_uop (graphs only see PROGRAMs after compile_linear)
gpu_call = UOp(Ops.PROGRAM, src=(UOp.sink(), UOp(Ops.DEVICE, arg=Device.DEFAULT))).call(UOp.new_buffer(Device.DEFAULT, 1, dtypes.float))
cpu_call = UOp(Ops.PROGRAM, src=(UOp.sink(), UOp(Ops.DEVICE, arg="CPU"))).call(UOp.new_buffer("CPU", 1, dtypes.float))
gpu_devs = [d0]
# local MMIO: GPU works alone and with CPU in batch (cpu_support=True)
assert HCQGraph.supports_uop(gpu_devs, gpu_call) is True
assert HCQGraph.supports_uop(gpu_devs, cpu_call) is True
assert HCQGraph.supports_uop(gpu_devs + [cpu_dev], gpu_call) is True
# USB MMIO: GPU-only still works, but CPU batching must be rejected (cpu_support=False)
orig_view = d0.timeline_signal.base_buf.view
try:
d0.timeline_signal.base_buf.view = USBMMIOInterface(MockUSB(bytearray(256)), 0, 16, fmt='B')
assert HCQGraph.supports_uop(gpu_devs, gpu_call) is True
assert HCQGraph.supports_uop(gpu_devs, cpu_call) is False
assert HCQGraph.supports_uop(gpu_devs + [cpu_dev], gpu_call) is False
finally:
d0.timeline_signal.base_buf.view = orig_view
if __name__ == "__main__":
unittest.main()