am: large allocs aligned to 2mb to use 2mb pages (#15609)

This commit is contained in:
nimlgen
2026-04-05 18:01:31 +03:00
committed by GitHub
parent b2d5b29f45
commit 604cdbf2f7
4 changed files with 7 additions and 9 deletions

View File

@@ -27,7 +27,7 @@ class FakeAM:
self.gmc = FakeGMC(self)
self.mm = AMMemoryManager(self, self.vram_size, boot_size=(32 << 20), pt_t=AMPageTableEntry, va_shifts=[12, 21, 30, 39], va_bits=48,
first_lv=am.AMDGPU_VM_PDB2, va_base=AMMemoryManager.va_allocator.base,
palloc_ranges=[(1 << (i + 12), 0x1000) for i in range(9 * (3 - am.AMDGPU_VM_PDB2), -1, -1)])
palloc_ranges=[(1 << (i + 12), (2 << 20) if i >= 9 else 0x1000) for i in range(9 * (3 - am.AMDGPU_VM_PDB2), -1, -1)])
self.is_booting = False
self.ip_ver = {am.GC_HWIP: (11, 0, 0)}
def paddr2cpu(self, paddr:int) -> int: return paddr + mv_address(self.vram)

View File

@@ -548,12 +548,6 @@ class PCIIface(PCIIfaceBase):
self.gpfifo_class, self.compute_class, self.dma_class = (gsp:=self.dev_impl.gsp).gpfifo_class, gsp.compute_class, gsp.dma_class
self.viddec_class = None
def alloc(self, size:int, host=False, uncached=False, cpu_access=False, contiguous=False, force_devmem=False, **kwargs) -> HCQBuffer:
# Force use of huge pages for large allocations. NVDev will attempt to use huge pages in any case,
# but if the size is not aligned, the tail will be allocated with 4KB pages, increasing TLB pressure.
return super().alloc(round_up(size, mmap.PAGESIZE if uncached or host else ((2 << 20) if size >= (8 << 20) else (4 << 10))),
host=host, uncached=uncached, cpu_access=cpu_access, contiguous=contiguous, force_devmem=force_devmem, **kwargs)
def setup_usermode(self): return 0xce000000, self.pci_dev.map_bar(bar=0, fmt='I', off=0xbb0000, size=0x10000)
def setup_vm(self, vaspace): pass
def setup_gpfifo_vm(self, gpfifo): pass

View File

@@ -198,8 +198,8 @@ class AMDev:
# Memory manager & firmware
self.mm = AMMemoryManager(self, self.vram_size - self.reserved_vram_size, boot_size=(32 << 20), pt_t=AMPageTableEntry, va_shifts=[12, 21, 30, 39],
va_bits=48, first_lv=am.AMDGPU_VM_PDB2, va_base=AMMemoryManager.va_allocator.base,
palloc_ranges=[(1 << (i + 12), 0x1000) for i in range(9 * (3 - am.AMDGPU_VM_PDB2), -1, -1)], reserve_ptable=not self.large_bar)
va_bits=48, first_lv=am.AMDGPU_VM_PDB2, va_base=AMMemoryManager.va_allocator.base, reserve_ptable=not self.large_bar,
palloc_ranges=[(1 << (i + 12), (2 << 20) if i >= 9 else 0x1000) for i in range(9 * (3 - am.AMDGPU_VM_PDB2), -1, -1)])
self.fw = AMFirmware(self)
# Initialize IP blocks

View File

@@ -248,6 +248,10 @@ class PCIIfaceBase:
def alloc(self, size:int, host=False, uncached=False, cpu_access=False, contiguous=False, force_devmem=False, **kwargs) -> HCQBuffer:
should_use_sysmem = host or ((cpu_access if self.is_bar_small() else (uncached and cpu_access)) and not force_devmem)
# Align size to huge pages for large allocations, otherwise the unaligned tail falls back to 4KB pages, increasing TLB pressure.
size = round_up(size, mmap.PAGESIZE if should_use_sysmem else ((2 << 20) if size >= (8 << 20) else (4 << 10)))
if should_use_sysmem:
vaddr = self.dev_impl.mm.alloc_vaddr(size:=round_up(size, mmap.PAGESIZE), align=mmap.PAGESIZE)
memview, paddrs = self.pci_dev.alloc_sysmem(size, vaddr=vaddr, contiguous=contiguous)