diff --git a/extra/nv_gpu_driver/fw.h b/extra/nv_gpu_driver/fw.h new file mode 100644 index 0000000000..819c485a77 --- /dev/null +++ b/extra/nv_gpu_driver/fw.h @@ -0,0 +1,25 @@ +/* adapted from linux/drivers/gpu/drm/nouveau/include/nvfw/fw.h */ +/* SPDX-License-Identifier: MIT */ +#ifndef __NVFW_FW_H__ +#define __NVFW_FW_H__ +typedef unsigned int u32; + +struct nvfw_bin_hdr { + u32 bin_magic; + u32 bin_ver; + u32 bin_size; + u32 header_offset; + u32 data_offset; + u32 data_size; +}; + +struct nvfw_bl_desc { + u32 start_tag; + u32 dmem_load_off; + u32 code_off; + u32 code_size; + u32 data_off; + u32 data_size; +}; + +#endif diff --git a/extra/nv_gpu_driver/hs.h b/extra/nv_gpu_driver/hs.h new file mode 100644 index 0000000000..feeb7a0e43 --- /dev/null +++ b/extra/nv_gpu_driver/hs.h @@ -0,0 +1,52 @@ +/* adapted from linux/drivers/gpu/drm/nouveau/include/nvfw/hs.h */ +/* SPDX-License-Identifier: MIT */ +#ifndef __NVFW_HS_H__ +#define __NVFW_HS_H__ +typedef unsigned int u32; + +struct nvfw_hs_header { + u32 sig_dbg_offset; + u32 sig_dbg_size; + u32 sig_prod_offset; + u32 sig_prod_size; + u32 patch_loc; + u32 patch_sig; + u32 hdr_offset; + u32 hdr_size; +}; + +struct nvfw_hs_header_v2 { + u32 sig_prod_offset; + u32 sig_prod_size; + u32 patch_loc; + u32 patch_sig; + u32 meta_data_offset; + u32 meta_data_size; + u32 num_sig; + u32 header_offset; + u32 header_size; +}; + +struct nvfw_hs_load_header { + u32 non_sec_code_off; + u32 non_sec_code_size; + u32 data_dma_base; + u32 data_size; + u32 num_apps; + u32 apps[]; +}; + +struct nvfw_hs_load_header_v2 { + u32 os_code_offset; + u32 os_code_size; + u32 os_data_offset; + u32 os_data_size; + u32 num_apps; + struct { + u32 offset; + u32 size; + u32 data_offset; + u32 data_size; + } app[]; +}; +#endif diff --git a/tinygrad/runtime/autogen/__init__.py b/tinygrad/runtime/autogen/__init__.py index 044bc37fdb..94b500affe 100644 --- a/tinygrad/runtime/autogen/__init__.py +++ b/tinygrad/runtime/autogen/__init__.py @@ -81,7 +81,7 @@ def __getattr__(nm): *[f"{{}}/src/nvidia/inc/kernel/vgpu/{s}.h" for s in ["rpc_headers", "rpc_global_enums"]], "{}/src/common/uproc/os/common/include/libos_init_args.h", "{}/src/common/shared/msgq/inc/msgq/msgq_priv.h", "{}/src/nvidia/generated/g_rpc-structures.h", root/"extra/nv_gpu_driver/g_rpc-message-header.h", root/"extra/nv_gpu_driver/gsp_static_config.h", - root/"extra/nv_gpu_driver/vbios.h", root/"extra/nv_gpu_driver/pci_exp_table.h" + *[root/f"extra/nv_gpu_driver/{s}.h" for s in ["vbios", "pci_exp_table", "fw", "hs"]] ], args=[ "-DRPC_MESSAGE_STRUCTURES", "-DRPC_STRUCTURES", "-include", "{}/src/common/sdk/nvidia/inc/nvtypes.h", "-I{}/src/nvidia/generated", "-I{}/src/common/inc", "-I{}/src/nvidia/inc", "-I{}/src/nvidia/interface/", "-I{}/src/nvidia/inc/kernel", "-I{}/src/nvidia/inc/libraries", diff --git a/tinygrad/runtime/autogen/nv.py b/tinygrad/runtime/autogen/nv.py index 2c60cb4734..6fb82400de 100644 --- a/tinygrad/runtime/autogen/nv.py +++ b/tinygrad/runtime/autogen/nv.py @@ -4683,6 +4683,80 @@ class struct__NV_PCI_DATA_EXT_STRUCT(c.Struct): struct__NV_PCI_DATA_EXT_STRUCT.register_fields([('signature', NvU32, 0), ('nvPciDataExtRev', NvU16, 4), ('nvPciDataExtLen', NvU16, 6), ('subimageLen', NvU16, 8), ('privLastImage', NvU8, 10), ('flags', NvU8, 11)]) NV_PCI_DATA_EXT_STRUCT: TypeAlias = struct__NV_PCI_DATA_EXT_STRUCT PNV_PCI_DATA_EXT_STRUCT: TypeAlias = c.POINTER[struct__NV_PCI_DATA_EXT_STRUCT] +u32: TypeAlias = ctypes.c_uint32 +@c.record +class struct_nvfw_bin_hdr(c.Struct): + SIZE = 24 + bin_magic: int + bin_ver: int + bin_size: int + header_offset: int + data_offset: int + data_size: int +struct_nvfw_bin_hdr.register_fields([('bin_magic', u32, 0), ('bin_ver', u32, 4), ('bin_size', u32, 8), ('header_offset', u32, 12), ('data_offset', u32, 16), ('data_size', u32, 20)]) +@c.record +class struct_nvfw_bl_desc(c.Struct): + SIZE = 24 + start_tag: int + dmem_load_off: int + code_off: int + code_size: int + data_off: int + data_size: int +struct_nvfw_bl_desc.register_fields([('start_tag', u32, 0), ('dmem_load_off', u32, 4), ('code_off', u32, 8), ('code_size', u32, 12), ('data_off', u32, 16), ('data_size', u32, 20)]) +@c.record +class struct_nvfw_hs_header(c.Struct): + SIZE = 32 + sig_dbg_offset: int + sig_dbg_size: int + sig_prod_offset: int + sig_prod_size: int + patch_loc: int + patch_sig: int + hdr_offset: int + hdr_size: int +struct_nvfw_hs_header.register_fields([('sig_dbg_offset', u32, 0), ('sig_dbg_size', u32, 4), ('sig_prod_offset', u32, 8), ('sig_prod_size', u32, 12), ('patch_loc', u32, 16), ('patch_sig', u32, 20), ('hdr_offset', u32, 24), ('hdr_size', u32, 28)]) +@c.record +class struct_nvfw_hs_header_v2(c.Struct): + SIZE = 36 + sig_prod_offset: int + sig_prod_size: int + patch_loc: int + patch_sig: int + meta_data_offset: int + meta_data_size: int + num_sig: int + header_offset: int + header_size: int +struct_nvfw_hs_header_v2.register_fields([('sig_prod_offset', u32, 0), ('sig_prod_size', u32, 4), ('patch_loc', u32, 8), ('patch_sig', u32, 12), ('meta_data_offset', u32, 16), ('meta_data_size', u32, 20), ('num_sig', u32, 24), ('header_offset', u32, 28), ('header_size', u32, 32)]) +@c.record +class struct_nvfw_hs_load_header(c.Struct): + SIZE = 20 + non_sec_code_off: int + non_sec_code_size: int + data_dma_base: int + data_size: int + num_apps: int + apps: c.Array[ctypes.c_uint32, Literal[0]] +struct_nvfw_hs_load_header.register_fields([('non_sec_code_off', u32, 0), ('non_sec_code_size', u32, 4), ('data_dma_base', u32, 8), ('data_size', u32, 12), ('num_apps', u32, 16), ('apps', c.Array[u32, Literal[0]], 20)]) +@c.record +class struct_nvfw_hs_load_header_v2(c.Struct): + SIZE = 20 + os_code_offset: int + os_code_size: int + os_data_offset: int + os_data_size: int + num_apps: int + app: c.Array[struct_nvfw_hs_load_header_v2_app, Literal[0]] +@c.record +class struct_nvfw_hs_load_header_v2_app(c.Struct): + SIZE = 16 + offset: int + size: int + data_offset: int + data_size: int +struct_nvfw_hs_load_header_v2_app.register_fields([('offset', u32, 0), ('size', u32, 4), ('data_offset', u32, 8), ('data_size', u32, 12)]) +struct_nvfw_hs_load_header_v2.register_fields([('os_code_offset', u32, 0), ('os_code_size', u32, 4), ('os_data_offset', u32, 8), ('os_data_size', u32, 12), ('num_apps', u32, 16), ('app', c.Array[struct_nvfw_hs_load_header_v2_app, Literal[0]], 20)]) GSP_FW_WPR_META_VERIFIED = 0xa0a0a0a0a0a0a0a0 GSP_FW_WPR_META_REVISION = 1 GSP_FW_WPR_META_MAGIC = 0xdc3aae21371a60b3 diff --git a/tinygrad/runtime/support/nv/ip.py b/tinygrad/runtime/support/nv/ip.py index 48ae47646e..ae47a8aaac 100644 --- a/tinygrad/runtime/support/nv/ip.py +++ b/tinygrad/runtime/support/nv/ip.py @@ -169,16 +169,19 @@ class NV_FLCN(NV_IP): _, self.frts_image_sysmem = __patch(0x15, bytes(frts_cmd)) def prep_booter(self): - image = self.nvdev.extract_fw("kgspBinArchiveBooterLoadUcode", "image_prod_data") - sig = self.nvdev.extract_fw("kgspBinArchiveBooterLoadUcode", "sig_prod_data") - header = self.nvdev.extract_fw("kgspBinArchiveBooterLoadUcode", "header_prod_data") - patch_loc = int.from_bytes(self.nvdev.extract_fw("kgspBinArchiveBooterLoadUcode", "patch_loc_data"), 'little') - sig_len = len(sig) // int.from_bytes(self.nvdev.extract_fw("kgspBinArchiveBooterLoadUcode", "num_sigs_data"), 'little') + sha = {"ga102":"4497e3eff7e95c774b8a569d17b27c08c9650158d10b229d2be81cdcad9a085b", + "ad102":"8b293e19b637c5e22c87a2428d1c71bb13e0904e8a88ac6b3c6c1f2679c6e37a"}[self.nvdev.fw_name] + h = nv.struct_nvfw_bin_hdr.from_buffer_copy(b:=fetch_fw(f"nvidia/{self.nvdev.fw_name}/gsp", "booter_load-570.144.bin", sha)) + lh = nv.struct_nvfw_hs_load_header_v2.from_buffer_copy(b, (hs:=nv.struct_nvfw_hs_header_v2.from_buffer_copy(b, h.header_offset)).header_offset) + app = nv.struct_nvfw_hs_load_header_v2_app.from_buffer_copy(b, hs.header_offset + ctypes.sizeof(nv.struct_nvfw_hs_load_header_v2)) + + patch_loc, patch_sig = struct.unpack_from("= 0x1a else (2, False) self.flcn:NV_FLCN|NV_FLCN_COT = NV_FLCN_COT(self) if self.fmc_boot else NV_FLCN(self) @@ -161,14 +161,6 @@ class NVDev: url = f"https://raw.githubusercontent.com/NVIDIA/open-gpu-kernel-modules/8ec351aeb96a93a4bb69ccc12a542bf8a8df2b6f/{file}" return fetch(url, subdir="defines").read_text() - def extract_fw(self, file:str, dname:str) -> bytes: - # Extracts the firmware binary from the given header - tname = file.replace("kgsp", "kgspGet") - text = self._download(f"src/nvidia/generated/g_bindata_{tname}_{self.fw_name}.c") - info, sl = text[text[:text.index(dnm:=f'{file}_{self.fw_name}_{dname}')].rindex("COMPRESSION:"):][:16], text[text.index(dnm) + len(dnm) + 7:] - image = bytes.fromhex(sl[:sl.find("};")].strip().replace("0x", "").replace(",", "").replace(" ", "").replace("\n", "")) - return gzip.decompress(struct.pack("<4BL2B", 0x1f, 0x8b, 8, 0, 0, 0, 3) + image) if "COMPRESSION: YES" in info else image - def include(self, file:str): def _do_eval(s:str): return eval(s) # pylint: disable=eval-used