mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-06-13 00:15:35 +08:00
compile QCOM without opening device (#15165)
Co-authored-by: Comma Device <device@comma.ai>
This commit is contained in:
committed by
GitHub
parent
6fd18ef875
commit
7810be8d3c
10
.github/actions/setup-tinygrad/action.yml
vendored
10
.github/actions/setup-tinygrad/action.yml
vendored
@@ -45,6 +45,10 @@ inputs:
|
||||
description: "Install mesa"
|
||||
required: false
|
||||
default: 'false'
|
||||
tinydreno:
|
||||
description: "Install tinydreno"
|
||||
required: false
|
||||
default: 'false'
|
||||
runs:
|
||||
using: "composite"
|
||||
steps:
|
||||
@@ -326,3 +330,9 @@ runs:
|
||||
if: inputs.mesa == 'true' && runner.os == 'macOS'
|
||||
shell: bash
|
||||
run: brew install sirhcm/tinymesa/tinymesa_cpu
|
||||
|
||||
# *** tinydreno ***
|
||||
- name: Install tinydreno (linux)
|
||||
if: inputs.tinydreno == 'true' && runner.os == 'Linux'
|
||||
shell: bash
|
||||
run: sudo curl -fL https://github.com/sirhcm/tinydreno/raw/refs/heads/master/libllvm-qcom.so -o /usr/lib/libllvm-qcom.so
|
||||
|
||||
23
.github/workflows/test.yml
vendored
23
.github/workflows/test.yml
vendored
@@ -1011,3 +1011,26 @@ jobs:
|
||||
python -c "from tinygrad import Device; assert Device.DEFAULT == 'NULL'"
|
||||
DEBUG=4 python3 test/backend/test_ops.py TestOps.test_add
|
||||
python -m pytest -n=auto test/backend/test_ops.py --durations=20
|
||||
qcomclcompiletests:
|
||||
name: Compile-only (QCOM CL)
|
||||
runs-on: ubuntu-24.04-arm
|
||||
timeout-minutes: 15
|
||||
steps:
|
||||
- name: Checkout Code
|
||||
uses: actions/checkout@v4
|
||||
- name: Setup Environment
|
||||
uses: ./.github/actions/setup-tinygrad
|
||||
with:
|
||||
key: compile-qcomcl
|
||||
deps: testing_unit
|
||||
tinydreno: 'true'
|
||||
python-version: '3.12'
|
||||
- name: Set env
|
||||
shell: bash
|
||||
run: printf "NULL=1\nNULL_ALLOW_COPYOUT=1\nNULL_QCOMCL=1" >> $GITHUB_ENV
|
||||
- name: Run test_ops
|
||||
shell: bash
|
||||
run: |
|
||||
python -c "from tinygrad import Device; assert Device.DEFAULT == 'NULL'"
|
||||
DEBUG=4 python3 test/backend/test_ops.py TestOps.test_add
|
||||
python -m pytest -n=auto test/backend/test_ops.py --durations=20
|
||||
|
||||
@@ -6,6 +6,7 @@ from tinygrad.helpers import getenv, IMAGE, DEBUG, CI, Context, CPU_LLVM, AMD_LL
|
||||
from tinygrad import Tensor, Device, dtypes
|
||||
from tinygrad.tensor import _to_np_dtype
|
||||
from tinygrad.device import is_dtype_supported
|
||||
from tinygrad.renderer.cstyle import QCOMCLRenderer
|
||||
from tinygrad.renderer.nir import NIRRenderer
|
||||
|
||||
TINY_BACKEND = getenv("TINY_BACKEND")
|
||||
@@ -436,7 +437,7 @@ class TestOps(unittest.TestCase):
|
||||
helper_test_op([(45,35), (45,35), (45,35)], lambda x,y,z: x.lerp(y,z))
|
||||
helper_test_op(None, lambda x,y,z: x.lerp(y,z), vals=[[1.,2.,3.], [4.,5.,6.], 0.5])
|
||||
|
||||
@unittest.skipIf(Device.DEFAULT == "QCOM", "OpenCL fails to compile this (both on GPU(qcom)/QCOM backends)")
|
||||
@unittest.skipIf(isinstance(Device[Device.DEFAULT].renderer, QCOMCLRenderer), "QCOM CL vectorized bool bug")
|
||||
def test_tril(self):
|
||||
helper_test_op([(3,3)], lambda x: x.tril())
|
||||
helper_test_op([(3,3)], lambda x: x.tril(1))
|
||||
@@ -454,7 +455,7 @@ class TestOps(unittest.TestCase):
|
||||
helper_test_op([(5,3,3)], lambda x: x.tril(1))
|
||||
helper_test_op(None, lambda x: x.tril(), vals=[[[True] * 3] * 3], forward_only=True)
|
||||
|
||||
@unittest.skipIf(Device.DEFAULT == "QCOM", "OpenCL fails to compile this (both on GPU(qcom)/QCOM backends)")
|
||||
@unittest.skipIf(isinstance(Device[Device.DEFAULT].renderer, QCOMCLRenderer), "QCOM CL vectorized bool bug")
|
||||
def test_triu(self):
|
||||
helper_test_op([(3,3)], lambda x: x.triu())
|
||||
helper_test_op([(3,3)], lambda x: x.triu(1))
|
||||
@@ -765,6 +766,7 @@ class TestOps(unittest.TestCase):
|
||||
|
||||
self.helper_test_exception([(4), (4)], lambda x,y: x.bitwise_xor(y), expected=RuntimeError)
|
||||
|
||||
@unittest.skipIf(isinstance(Device[Device.DEFAULT].renderer, QCOMCLRenderer), "QCOM CL vectorized bool bug")
|
||||
def test_and(self):
|
||||
data = [[1,-8,1],[32,1,6]]
|
||||
tor = torch.tensor(data, dtype=torch.int)
|
||||
@@ -782,6 +784,7 @@ class TestOps(unittest.TestCase):
|
||||
|
||||
self.helper_test_exception([(4), (4)], lambda x,y: x.bitwise_and(y), expected=RuntimeError)
|
||||
|
||||
@unittest.skipIf(isinstance(Device[Device.DEFAULT].renderer, QCOMCLRenderer), "QCOM CL vectorized bool bug")
|
||||
def test_or(self):
|
||||
data = [[1,-8,1],[32,1,6]]
|
||||
tor = torch.tensor(data, dtype=torch.int)
|
||||
@@ -1170,6 +1173,7 @@ class TestOps(unittest.TestCase):
|
||||
helper_test_op(None, lambda x: x.type(torch.int32).argmax().type(torch.int32), lambda x: x.argmax(), forward_only=True, vals=[[False, True]])
|
||||
helper_test_op(None, lambda x: x.type(torch.int32).argmax().type(torch.int32), lambda x: x.argmax(), forward_only=True, vals=[[True, False]])
|
||||
|
||||
@unittest.skipIf(isinstance(Device[Device.DEFAULT].renderer, QCOMCLRenderer), "QCOM CL vectorized bool bug")
|
||||
def test_argmin(self):
|
||||
# check if it returns the first index for multiple occurrences
|
||||
helper_test_op(None, lambda x: x.argmin().type(torch.int32), lambda x: x.argmin(), forward_only=True, vals=[[2, 2]])
|
||||
@@ -1475,6 +1479,7 @@ class TestOps(unittest.TestCase):
|
||||
def test_prod_dtype_arg(self):
|
||||
with self.assertRaises(AttributeError): Tensor([1.0, 2.0]).prod(dtype="")
|
||||
|
||||
@unittest.skipIf(isinstance(Device[Device.DEFAULT].renderer, QCOMCLRenderer), "QCOM CL vectorized bool bug")
|
||||
def test_min(self):
|
||||
helper_test_op([(3,3)], lambda x: x.min())
|
||||
helper_test_op([(45,3)], lambda x: x.min())
|
||||
@@ -1503,7 +1508,6 @@ class TestOps(unittest.TestCase):
|
||||
helper_test_op([(3,3)], lambda x: torch.full_like(x, 2).prod(), lambda x: (x.full_like(2)).prod(), forward_only=True)
|
||||
helper_test_op([(3,3)], lambda x: torch.full_like(x, 2).max(), lambda x: (x.full_like(2)).max(), forward_only=True)
|
||||
|
||||
@unittest.skipIf(Device.DEFAULT == "QCOM", "OpenCL fails to compile this (both on GPU(qcom)/QCOM backends)")
|
||||
def test_any(self):
|
||||
helper_test_op([(3,4,5,6)], lambda x: x.any(), forward_only=True)
|
||||
helper_test_op(None, lambda x: x.any(), vals=[[True, True]], forward_only=True)
|
||||
@@ -1515,7 +1519,7 @@ class TestOps(unittest.TestCase):
|
||||
def test_any_zero_axis(self):
|
||||
helper_test_op([(1,0,3,0,5)], lambda x: x.any(axis=(1,3)), forward_only=True)
|
||||
|
||||
@unittest.skipIf(Device.DEFAULT == "QCOM", "OpenCL fails to compile this (both on GPU(qcom)/QCOM backends)")
|
||||
@unittest.skipIf(isinstance(Device[Device.DEFAULT].renderer, QCOMCLRenderer), "QCOM CL vectorized bool bug")
|
||||
def test_all(self):
|
||||
helper_test_op([(3,4,5,6)], lambda x: x.all(), forward_only=True)
|
||||
helper_test_op(None, lambda x: x.all(), vals=[[True, True]], forward_only=True)
|
||||
@@ -2889,6 +2893,7 @@ class TestOps(unittest.TestCase):
|
||||
helper_test_op([(2,5,6,5,3,4)], lambda x: x[...,c,:,e], lambda x: x[...,k,:,p])
|
||||
|
||||
@slow_test
|
||||
@unittest.skipIf(isinstance(Device[Device.DEFAULT].renderer, QCOMCLRenderer), "QCOM CL vectorized bool bug")
|
||||
def test_slice_fancy_indexing_dim_collapse_int(self):
|
||||
a,b,c,d,e,i,j,k,o,p = self._get_index_randoms()
|
||||
# dim collapse from int
|
||||
@@ -2899,6 +2904,7 @@ class TestOps(unittest.TestCase):
|
||||
helper_test_op([(2,5,6,5,3,4)], lambda x: x[1,:,3:11:2,d,0:2], lambda x: x[1,:,3:11:2,o,0:2])
|
||||
|
||||
@slow_test
|
||||
@unittest.skipIf(isinstance(Device[Device.DEFAULT].renderer, QCOMCLRenderer), "QCOM CL vectorized bool bug")
|
||||
def test_slice_fancy_indexing_dim_inject_none(self):
|
||||
a,b,c,d,e,i,j,k,o,p = self._get_index_randoms()
|
||||
# dim injection from None
|
||||
@@ -2933,6 +2939,7 @@ class TestOps(unittest.TestCase):
|
||||
lambda x: x[Tensor([[0,1,-1],[-1,-2,0]]), Tensor([2,1,-1])])
|
||||
|
||||
@slow_test
|
||||
@unittest.skipIf(isinstance(Device[Device.DEFAULT].renderer, QCOMCLRenderer), "QCOM CL vectorized bool bug")
|
||||
def test_slice_fancy_indexing_list_indices(self):
|
||||
a,b,c,d,e,i,j,k,o,p = self._get_index_randoms()
|
||||
helper_test_op([(2,5,6,5,3,4)], lambda x: x[((0,),)])
|
||||
@@ -2944,6 +2951,7 @@ class TestOps(unittest.TestCase):
|
||||
helper_test_op([(2,5,6,5,3,4)], lambda x: x[a,(2,1,0),c,(-2,1,0),e], lambda x: x[i,(2,1,0),k,(-2,1,0),p])
|
||||
|
||||
@slow_test
|
||||
@unittest.skipIf(isinstance(Device[Device.DEFAULT].renderer, QCOMCLRenderer), "QCOM CL vectorized bool bug")
|
||||
def test_slice_fancy_indexing_tuple_indices(self):
|
||||
a,b,c,d,e,i,j,k,o,p = self._get_index_randoms()
|
||||
helper_test_op([(2,5,6,5,3,4)], lambda x: x[(((0,),),)], lambda x: x[(((0,),),)])
|
||||
@@ -3285,7 +3293,6 @@ class TestOps(unittest.TestCase):
|
||||
helper_test_op([(20,)], lambda x: (x>0.5).nonzero().int(), lambda x: (x>0.5).nonzero(), forward_only=True)
|
||||
helper_test_op([(10, 5, 3)], lambda x: (x>0.5).nonzero().int(), lambda x: (x>0.5).nonzero(), forward_only=True)
|
||||
|
||||
@unittest.skipIf(Device.DEFAULT == "QCOM", "OpenCL fails to compile this (both on GPU(qcom)/QCOM backends)")
|
||||
def test_cast(self):
|
||||
helper_test_op([(3, 3)], lambda x: x.float())
|
||||
helper_test_op(None, lambda x: x.float(), vals=[[0, 1, 2, 3]], forward_only=True)
|
||||
|
||||
@@ -6,7 +6,7 @@ import importlib, inspect, functools, pathlib, os, platform, contextlib, sys, re
|
||||
from tinygrad.helpers import CI, OSX, LRU, getenv, diskcache_get, diskcache_put, DEBUG, GlobalCounters, flat_mv, PROFILE, temp, colored
|
||||
from tinygrad.helpers import Context, CCACHE, ALLOW_DEVICE_USAGE, MAX_BUFFER_SIZE, cpu_events, ProfileEvent, ProfilePointEvent, dedup, ContextVar
|
||||
from tinygrad.helpers import unwrap_class_type, suppress_finalizing, select_first_inited, VIZ, CPU_LLVM, CPU_LVP, NV_PTX, CUDA_PTX, NV_NAK
|
||||
from tinygrad.helpers import EMULATED_DTYPES, TracingKey
|
||||
from tinygrad.helpers import EMULATED_DTYPES, NULL_IR3, NULL_QCOMCL, TracingKey
|
||||
from tinygrad.dtype import DType, ImageDType, PtrDType, dtypes, _to_np_dtype
|
||||
if TYPE_CHECKING: from tinygrad.renderer import Renderer
|
||||
|
||||
@@ -371,7 +371,7 @@ def is_dtype_supported(dtype:DType, device:str|None=None) -> bool:
|
||||
if device in ["CUDA", "NV"]: return not CI
|
||||
if device == "CPU" and CPU_LLVM: return OSX
|
||||
if device == "PYTHON": return sys.version_info >= (3, 12)
|
||||
if dtype == dtypes.float64: return (device not in {"METAL", "QCOM"} and not (OSX and device == "CL") and not getenv("NULL_IR3")
|
||||
if dtype == dtypes.float64: return (device not in {"METAL", "QCOM"} and not (OSX and device == "CL") and not NULL_IR3 and not NULL_QCOMCL
|
||||
and dtypes.long not in EMULATED_DTYPES.tolist(dtypes))
|
||||
return True
|
||||
|
||||
|
||||
@@ -195,7 +195,8 @@ CPU_COUNT = ContextVar("CPU_COUNT", max(1, len(os.sched_getaffinity(0)) if hasat
|
||||
CPU_CC, CPU_LLVM, CPU_LVP = ContextVar("CPU_CC", ""), ContextVar("CPU_LLVM", 0), ContextVar("CPU_LVP", 0)
|
||||
NV_CC, NV_PTX, NV_NAK, NV_NVCC = ContextVar("NV_CC", ""), ContextVar("NV_PTX", 0), ContextVar("NV_NAK", 0), ContextVar("NV_NVCC", 0)
|
||||
CUDA_CC, CUDA_PTX, CUDA_NVCC = ContextVar("CUDA_CC", ""), ContextVar("CUDA_PTX", 0), ContextVar("CUDA_NVCC", 0)
|
||||
NULL_IR3, NULL_NAK, NULL_ALLOW_COPYOUT = ContextVar("NULL_IR3", 0), ContextVar("NULL_NAK", 0), ContextVar("NULL_ALLOW_COPYOUT", 0)
|
||||
NULL_QCOMCL, NULL_IR3, NULL_NAK = ContextVar("NULL_QCOMCL", 0), ContextVar("NULL_IR3", 0), ContextVar("NULL_NAK", 0)
|
||||
NULL_ALLOW_COPYOUT = ContextVar("NULL_ALLOW_COPYOUT", 0)
|
||||
AMD_CC, AMD_LLVM, AMD_HIPCC = ContextVar("AMD_CC", ""), ContextVar("AMD_LLVM", 0), ContextVar("AMD_HIPCC", 0)
|
||||
QCOM_CC, QCOM_IR3 = ContextVar("QCOM_CC", ""), ContextVar("QCOM_IR3", 0)
|
||||
# VIZ implies PROFILE, but you can run PROFILE without VIZ
|
||||
|
||||
@@ -566,4 +566,9 @@ class AMDHIPCCRenderer(AMDHIPRenderer):
|
||||
super().__init__(arch)
|
||||
self.compiler = HIPCCCompiler(arch)
|
||||
|
||||
class QCOMRenderer(OpenCLRenderer): device = "QCOM"
|
||||
class QCOMCLRenderer(OpenCLRenderer):
|
||||
device = "QCOM"
|
||||
|
||||
def __init__(self, chip_id):
|
||||
from tinygrad.runtime.support.compiler_qcom import QCOMCompiler
|
||||
self.compiler = QCOMCompiler(chip_id)
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
import functools
|
||||
from tinygrad.device import Compiled, Allocator, CompilerSet
|
||||
from tinygrad.engine.jit import MultiGraphRunner
|
||||
from tinygrad.renderer.cstyle import Renderer, CStyleLanguage, AMDHIPRenderer
|
||||
from tinygrad.renderer.cstyle import Renderer, CStyleLanguage, AMDHIPRenderer, QCOMCLRenderer
|
||||
from tinygrad.uop.ops import Ops
|
||||
from tinygrad.helpers import cpu_profile, EMULATE, NULL_IR3, NULL_NAK, NULL_ALLOW_COPYOUT
|
||||
from tinygrad.helpers import cpu_profile, EMULATE, NULL_QCOMCL, NULL_IR3, NULL_NAK, NULL_ALLOW_COPYOUT
|
||||
from tinygrad.renderer.nir import IR3Renderer, NAKRenderer
|
||||
|
||||
class NullRenderer(CStyleLanguage):
|
||||
@@ -39,6 +39,7 @@ class NullDevice(Compiled):
|
||||
case "AMD_CDNA4": renderer = functools.partial(AMDHIPRenderer, "gfx950")
|
||||
case "": renderer = NullRenderer
|
||||
case _: raise RuntimeError(f"can't EMULATE device: {EMULATE.value}")
|
||||
compilers = CompilerSet([(renderer, None), (functools.partial(IR3Renderer, 0x6030001), NULL_IR3), # adreno 630
|
||||
compilers = CompilerSet([(renderer, None), (functools.partial(QCOMCLRenderer, 0x6030001), NULL_QCOMCL), # adreno 630
|
||||
(functools.partial(IR3Renderer, 0x6030001), NULL_IR3), # adreno 630
|
||||
(functools.partial(NAKRenderer, "sm_120", 48), NULL_NAK)]) # 5090
|
||||
super().__init__(device, NullAllocator(self), compilers, functools.partial(NullProgram, device), NullGraph)
|
||||
|
||||
@@ -6,11 +6,10 @@ from tinygrad.device import BufferSpec, CompilerSet, Device
|
||||
from tinygrad.runtime.support.hcq import HCQBuffer, HWQueue, HCQProgram, HCQCompiled, HCQAllocatorBase, HCQSignal, HCQArgsState, BumpAllocator
|
||||
from tinygrad.runtime.support.hcq import FileIOInterface, MMIOInterface
|
||||
from tinygrad.runtime.autogen import kgsl, mesa
|
||||
from tinygrad.runtime.ops_cl import CLDevice
|
||||
from tinygrad.renderer.cstyle import QCOMRenderer
|
||||
from tinygrad.renderer.cstyle import QCOMCLRenderer
|
||||
from tinygrad.renderer.nir import IR3Renderer
|
||||
from tinygrad.helpers import getenv, mv_address, to_mv, round_up, data64_le, ceildiv, prod, fromimport, cpu_profile, lo32, suppress_finalizing
|
||||
from tinygrad.helpers import next_power2, flatten, QCOM_IR3, QCOM_CC, PROFILE, DEBUG
|
||||
from tinygrad.helpers import getenv, mv_address, to_mv, round_up, data64_le, ceildiv, prod, cpu_profile, lo32, suppress_finalizing
|
||||
from tinygrad.helpers import next_power2, flatten, QCOM_IR3, QCOM_CC, PROFILE
|
||||
from tinygrad.dtype import ImageDType, dtypes
|
||||
from tinygrad.runtime.support.system import System
|
||||
if getenv("IOCTL"): import extra.qcom_gpu_driver.opencl_ioctl # noqa: F401 # pylint: disable=unused-import
|
||||
@@ -248,9 +247,7 @@ class QCOMProgram(HCQProgram):
|
||||
self.tex_off, self.ibo_off, self.samp_off = 2048, 2048 + 0x40 * self.tex_cnt, 2048 + 0x40 * (self.tex_cnt + self.ibo_cnt)
|
||||
self.fregs, self.hregs = v.info.max_reg + 1, v.info.max_half_reg + 1
|
||||
self.consts_info:list[tuple] = []
|
||||
else:
|
||||
self._parse_lib(lib:=self.dev.cl_dev.cl_compiler.compile_cached(lib.decode()))
|
||||
if DEBUG >= 7: fromimport('tinygrad.runtime.support.compiler_mesa', 'disas_adreno')(lib[(ofs:=_read_lib(lib, 0xc0)):ofs+_read_lib(lib, 0x100)])
|
||||
else: self._parse_lib(lib)
|
||||
|
||||
self.lib_gpu: HCQBuffer = self.dev.allocator.alloc(self.image_size, buf_spec:=BufferSpec(cpu_access=True, nolru=True))
|
||||
to_mv(self.lib_gpu.va_addr, self.image_size)[:] = self.image
|
||||
@@ -384,8 +381,8 @@ class QCOMDevice(HCQCompiled):
|
||||
if PROFILE and self.gpu_id[:2] < (7, 3):
|
||||
System.write_sysfs("/sys/class/kgsl/kgsl-3d0/idle_timer", value="4000000000", msg="Failed to disable suspend mode", expected="4294967276")
|
||||
|
||||
self.cl_dev = CLDevice(device)
|
||||
compilers = CompilerSet(ctrl_var=QCOM_CC, cset=[(QCOMRenderer, None), (functools.partial(IR3Renderer, info.chip_id), QCOM_IR3)])
|
||||
compilers = CompilerSet(ctrl_var=QCOM_CC, cset=[(functools.partial(QCOMCLRenderer, info.chip_id), None),
|
||||
(functools.partial(IR3Renderer, info.chip_id), QCOM_IR3)])
|
||||
super().__init__(device, QCOMAllocator(self), compilers, functools.partial(QCOMProgram, self), QCOMSignal,
|
||||
functools.partial(QCOMComputeQueue, self), None)
|
||||
|
||||
|
||||
57
tinygrad/runtime/support/compiler_qcom.py
Normal file
57
tinygrad/runtime/support/compiler_qcom.py
Normal file
@@ -0,0 +1,57 @@
|
||||
import ctypes, struct
|
||||
from tinygrad.device import Compiler
|
||||
from tinygrad.runtime.support.c import DLL
|
||||
from tinygrad.runtime.support.compiler_mesa import disas_adreno
|
||||
|
||||
# see https://github.com/sirhcm/tinydreno
|
||||
dll = DLL("llvm-qcom", ["llvm-qcom"])
|
||||
|
||||
(create_llvm_instance:=dll.cl_compiler_create_llvm_instance).restype, create_llvm_instance.argtypes = ctypes.c_void_p, []
|
||||
|
||||
(compile_source:=dll.cl_compiler_compile_source).restype = ctypes.c_void_p
|
||||
compile_source.argtypes = [ctypes.c_void_p, ctypes.c_uint64, ctypes.c_int, ctypes.c_char_p, ctypes.c_int, ctypes.c_uint64, ctypes.c_uint64,
|
||||
ctypes.c_char_p, ctypes.c_uint64, ctypes.c_uint64, ctypes.c_void_p]
|
||||
|
||||
(link_program:=dll.cl_compiler_link_program).restype = ctypes.c_void_p
|
||||
link_program.argtypes = [ctypes.c_void_p, ctypes.c_uint64, ctypes.c_int, ctypes.c_char_p, ctypes.c_int, ctypes.c_void_p]
|
||||
|
||||
(get_error_code:=dll.cl_compiler_get_error_code).restype, get_error_code.argtypes = ctypes.c_int, [ctypes.c_void_p]
|
||||
(get_build_log:=dll.cl_compiler_get_build_log).restype, get_build_log.argtypes = ctypes.c_char_p, [ctypes.c_void_p]
|
||||
|
||||
(handle_create_binary:=dll.cl_compiler_handle_create_binary).restype = None
|
||||
handle_create_binary.argtypes = [ctypes.c_void_p, ctypes.POINTER(ctypes.c_void_p), ctypes.POINTER(ctypes.c_size_t)]
|
||||
|
||||
(free_handle:=dll.cl_compiler_free_handle).restype, free_handle.argtypes = None, [ctypes.c_void_p]
|
||||
(free_assembly:=dll.cl_compiler_free_assembly).restype, free_assembly.argtypes = None, [ctypes.c_void_p]
|
||||
(destroy_llvm_instance:=dll.cl_compiler_destroy_llvm_instance).restype, destroy_llvm_instance.argtypes = None, [ctypes.c_void_p]
|
||||
|
||||
MODE_32BIT, MODE_64BIT, SRC_STR, SRC_BLOB = 0, 1, 0, 1
|
||||
|
||||
def _read_lib(lib, off) -> int: return struct.unpack("I", lib[off:off+4])[0]
|
||||
|
||||
class QCOMCompiler(Compiler):
|
||||
def __init__(self, chip_id):
|
||||
self.chip_id, self.llvm_inst = chip_id, create_llvm_instance()
|
||||
super().__init__(f"compile_qcomcl_{chip_id}")
|
||||
|
||||
def __del__(self): destroy_llvm_instance(self.llvm_inst)
|
||||
|
||||
def __reduce__(self): return QCOMCompiler, (self.chip_id,)
|
||||
|
||||
def checked(self, handle):
|
||||
if handle is None or get_error_code(handle) != 0:
|
||||
destroy_llvm_instance(self.llvm_inst)
|
||||
self.llvm_inst = create_llvm_instance()
|
||||
raise RuntimeError("QCOM Compilation Error" + ("" if handle is None else f": {get_build_log(handle)}"))
|
||||
return handle
|
||||
|
||||
def compile(self, src) -> bytes:
|
||||
ch = self.checked(compile_source(self.llvm_inst, self.chip_id, MODE_64BIT, b"", 0, 0, 0, src.encode(), 0, SRC_STR, None))
|
||||
lh = self.checked(link_program(self.llvm_inst, self.chip_id, MODE_64BIT, None, 1, ctypes.pointer(ctypes.c_void_p(ch))))
|
||||
handle_create_binary(lh, ctypes.byref(ptr:=ctypes.c_void_p()), ctypes.byref(sz:=ctypes.c_size_t()))
|
||||
for h in [ch, lh]: free_handle(h)
|
||||
ret = ctypes.string_at(ptr, sz.value)
|
||||
free_assembly(ptr)
|
||||
return ret
|
||||
|
||||
def disassemble(self, lib: bytes): disas_adreno(lib[(ofs:=_read_lib(lib, 0xc0)):ofs+_read_lib(lib, 0x100)], self.chip_id)
|
||||
Reference in New Issue
Block a user