Files
tinygrad/tinygrad/runtime/ops_null.py
Christopher Milan 7810be8d3c compile QCOM without opening device (#15165)
Co-authored-by: Comma Device <device@comma.ai>
2026-03-06 06:24:27 -05:00

46 lines
2.5 KiB
Python

import functools
from tinygrad.device import Compiled, Allocator, CompilerSet
from tinygrad.engine.jit import MultiGraphRunner
from tinygrad.renderer.cstyle import Renderer, CStyleLanguage, AMDHIPRenderer, QCOMCLRenderer
from tinygrad.uop.ops import Ops
from tinygrad.helpers import cpu_profile, EMULATE, NULL_QCOMCL, NULL_IR3, NULL_NAK, NULL_ALLOW_COPYOUT
from tinygrad.renderer.nir import IR3Renderer, NAKRenderer
class NullRenderer(CStyleLanguage):
device = "NULL"
has_local = False
float4 = "float4"
barrier = "// BARRIER"
code_for_op = {**CStyleLanguage.code_for_op, Ops.THREEFRY: lambda a,b,dtype: f"threefry({a},{b})", Ops.MAX: lambda a,b,dtype: f"max({a},{b})"}
class NullProgram:
def __init__(self, device:str, name:str, lib:bytes, *args, **kwargs): self.device, self.name = device, name
def __call__(self, *bufs, global_size:tuple[int,int,int]=(1,1,1), local_size:tuple[int,int,int]=(1,1,1), vals:tuple[int, ...]=(), wait=False, **kw):
with cpu_profile(self.name, self.device): return 1e-3
class NullAllocator(Allocator['NullDevice']):
def _alloc(self, size, options): pass
def _copyin(self, dest, src:memoryview): pass
def _copyout(self, dest:memoryview, src):
if not NULL_ALLOW_COPYOUT: raise RuntimeError("no copyout on NULL")
def _transfer(self, dest, src, sz:int, src_dev, dest_dev):
with cpu_profile(f"{src_dev.device} -> {dest_dev.device}", f"{self.dev.device}:COPY"): pass
def _offset(self, buf, offset:int, size:int): pass
class NullGraph(MultiGraphRunner):
def __call__(self, input_buffers, var_vals, wait=False) -> float|None: return 1e-1
class NullDevice(Compiled):
def __init__(self, device:str):
renderer:functools.partial|type[Renderer]
match str(EMULATE.value):
case "AMD": renderer = functools.partial(AMDHIPRenderer, "gfx1100")
case "AMD_RDNA4": renderer = functools.partial(AMDHIPRenderer, "gfx1201")
case "AMD_CDNA4": renderer = functools.partial(AMDHIPRenderer, "gfx950")
case "": renderer = NullRenderer
case _: raise RuntimeError(f"can't EMULATE device: {EMULATE.value}")
compilers = CompilerSet([(renderer, None), (functools.partial(QCOMCLRenderer, 0x6030001), NULL_QCOMCL), # adreno 630
(functools.partial(IR3Renderer, 0x6030001), NULL_IR3), # adreno 630
(functools.partial(NAKRenderer, "sm_120", 48), NULL_NAK)]) # 5090
super().__init__(device, NullAllocator(self), compilers, functools.partial(NullProgram, device), NullGraph)