From bd731a86247e6fe7da998ea2829b609895feb648 Mon Sep 17 00:00:00 2001 From: b1tg <33436708+b1tg@users.noreply.github.com> Date: Thu, 20 Mar 2025 09:44:07 +0800 Subject: [PATCH] AMDCompiler refactor (no_comgr prereq) (#9497) * add amdgpu_disassemble to helpers * refactor hip compiler --------- Co-authored-by: b1tg --- tinygrad/runtime/ops_amd.py | 4 ++-- tinygrad/runtime/ops_hip.py | 4 ++-- .../support/{compiler_hip.py => compiler_amd.py} | 10 ++++++---- 3 files changed, 10 insertions(+), 8 deletions(-) rename tinygrad/runtime/support/{compiler_hip.py => compiler_amd.py} (93%) diff --git a/tinygrad/runtime/ops_amd.py b/tinygrad/runtime/ops_amd.py index 989da75a1a..2f04450b59 100644 --- a/tinygrad/runtime/ops_amd.py +++ b/tinygrad/runtime/ops_amd.py @@ -10,7 +10,7 @@ from tinygrad.helpers import getenv, to_mv, round_up, data64_le, mv_address, DEB from tinygrad.renderer.cstyle import AMDRenderer from tinygrad.runtime.autogen import kfd, hsa, amd_gpu, libc, pci, vfio, sqtt from tinygrad.runtime.autogen.am import am, gc_11_0_0 -from tinygrad.runtime.support.compiler_hip import AMDCompiler +from tinygrad.runtime.support.compiler_amd import HIPCompiler from tinygrad.runtime.support.elf import elf_loader from tinygrad.runtime.support.am.amdev import AMDev, AMMapping if getenv("IOCTL"): import extra.hip_gpu_driver.hip_ioctl # noqa: F401 # pylint: disable=unused-import @@ -706,7 +706,7 @@ class AMDDevice(HCQCompiled): self.sdma_queue = self.create_queue(kfd.KFD_IOC_QUEUE_TYPE_SDMA, 0x800000) - super().__init__(device, AMDAllocator(self), AMDRenderer(self.arch), AMDCompiler(self.arch), functools.partial(AMDProgram, self), + super().__init__(device, AMDAllocator(self), AMDRenderer(self.arch), HIPCompiler(self.arch), functools.partial(AMDProgram, self), AMDSignal, AMDComputeQueue, AMDCopyQueue) # Scratch setup diff --git a/tinygrad/runtime/ops_hip.py b/tinygrad/runtime/ops_hip.py index 412ddc0925..3fdfccdb53 100644 --- a/tinygrad/runtime/ops_hip.py +++ b/tinygrad/runtime/ops_hip.py @@ -2,7 +2,7 @@ import ctypes, functools from tinygrad.helpers import init_c_var, from_mv, init_c_struct_t, getenv from tinygrad.device import Compiled, LRUAllocator, BufferSpec from tinygrad.runtime.autogen import hip -from tinygrad.runtime.support.compiler_hip import AMDCompiler +from tinygrad.runtime.support.compiler_amd import HIPCompiler from tinygrad.renderer.cstyle import HIPRenderer if getenv("IOCTL"): import extra.hip_gpu_driver.hip_ioctl # noqa: F401 # pylint: disable=unused-import @@ -14,7 +14,7 @@ class HIPDevice(Compiled): self.device_id = int(device.split(":")[1]) if ":" in device else 0 self.arch = init_c_var(hip.hipDeviceProp_t(), lambda x: check(hip.hipGetDeviceProperties(x, self.device_id))).gcnArchName.decode() self.time_event_st, self.time_event_en = [init_c_var(hip.hipEvent_t(), lambda x: hip.hipEventCreate(ctypes.byref(x), 0)) for _ in range(2)] - super().__init__(device, HIPAllocator(self), HIPRenderer(self.arch), AMDCompiler(self.arch), functools.partial(HIPProgram, self)) + super().__init__(device, HIPAllocator(self), HIPRenderer(self.arch), HIPCompiler(self.arch), functools.partial(HIPProgram, self)) def synchronize(self): check(hip.hipSetDevice(self.device_id)) check(hip.hipDeviceSynchronize()) diff --git a/tinygrad/runtime/support/compiler_hip.py b/tinygrad/runtime/support/compiler_amd.py similarity index 93% rename from tinygrad/runtime/support/compiler_hip.py rename to tinygrad/runtime/support/compiler_amd.py index b7f077bd82..2f527ce775 100644 --- a/tinygrad/runtime/support/compiler_hip.py +++ b/tinygrad/runtime/support/compiler_amd.py @@ -2,6 +2,10 @@ import ctypes, subprocess import tinygrad.runtime.autogen.comgr as comgr from tinygrad.device import Compiler, CompileError +def amdgpu_disassemble(lib:bytes): + asm = subprocess.check_output(["/opt/rocm/llvm/bin/llvm-objdump", '-d', '-'], input=lib) + print('\n'.join([x for x in asm.decode('utf-8').split("\n") if 's_code_end' not in x])) + def check(status): if status != 0: comgr.amd_comgr_status_string(status, ctypes.byref(status_str := ctypes.POINTER(ctypes.c_char)())) @@ -56,13 +60,11 @@ def compile_hip(prg:str, arch="gfx1100", asm=False) -> bytes: check(comgr.amd_comgr_destroy_action_info(action_info)) return ret -class AMDCompiler(Compiler): +class HIPCompiler(Compiler): def __init__(self, arch:str): self.arch = arch super().__init__(f"compile_hip_{self.arch}") def compile(self, src:str) -> bytes: try: return compile_hip(src, self.arch, src.split('\n', 1)[0].strip() == '.text') except RuntimeError as e: raise CompileError(e) from e - def disassemble(self, lib:bytes): - asm = subprocess.check_output(["/opt/rocm/llvm/bin/llvm-objdump", '-d', '-'], input=lib) - print('\n'.join([x for x in asm.decode('utf-8').split("\n") if 's_code_end' not in x])) + def disassemble(self, lib:bytes): amdgpu_disassemble(lib)