mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-06-13 00:15:35 +08:00
adaptive am_smi (#9319)
This commit is contained in:
@@ -8,9 +8,14 @@ from tinygrad.runtime.support.am.amdev import AMDev, AMMemoryManager
|
||||
from tinygrad.runtime.support.am.ip import AM_SOC21, AM_GMC, AM_IH, AM_PSP, AM_SMU, AM_GFX, AM_SDMA
|
||||
|
||||
AM_VERSION = 0xA0000002
|
||||
SMU_11_0_GFX_BUSY_THRESHOLD = 15
|
||||
|
||||
def bold(s): return f"\033[1m{s}\033[0m"
|
||||
|
||||
def trim(s:str, length:int) -> str:
|
||||
if len(s) > length: return s[:length-3] + "..."
|
||||
return s
|
||||
|
||||
def color_temp(temp):
|
||||
if temp >= 87: return colored(f"{temp:>3}", "red")
|
||||
elif temp >= 80: return colored(f"{temp:>3}", "yellow")
|
||||
@@ -23,7 +28,10 @@ def draw_bar(percentage, width=40, fill='█', empty='░'):
|
||||
bar = fill * filled_width + empty * (width - filled_width)
|
||||
return f'[{bar}] {percentage*100:5.1f}%'
|
||||
|
||||
def same_line(strs:list[list[str]], split=8) -> list[str]:
|
||||
def same_line(strs:list[list[str]|None], split=8) -> list[str]:
|
||||
strs = [s for s in strs if s is not None]
|
||||
if len(strs) == 0: return []
|
||||
|
||||
ret = []
|
||||
max_width_in_block = [max(ansilen(line) for line in block) for block in strs]
|
||||
max_height = max(len(block) for block in strs)
|
||||
@@ -80,6 +88,7 @@ class SMICtx:
|
||||
self.opened_pcidevs = []
|
||||
self.opened_pci_resources = {}
|
||||
self.prev_lines_cnt = 0
|
||||
self.prev_terminal_width = 0
|
||||
|
||||
remove_parts = ["Advanced Micro Devices, Inc. [AMD/ATI]", "VGA compatible controller:"]
|
||||
lspci = subprocess.check_output(["lspci"]).decode("utf-8").splitlines()
|
||||
@@ -126,7 +135,16 @@ class SMICtx:
|
||||
def collect(self): return {d: d.smu.read_metrics() if d.pci_state == "D0" else None for d in self.devs}
|
||||
|
||||
def draw(self):
|
||||
terminal_width, _ = shutil.get_terminal_size()
|
||||
terminal_width, terminal_height = shutil.get_terminal_size()
|
||||
if self.prev_terminal_width != terminal_width or self.prev_terminal_height != terminal_height:
|
||||
os.system('clear')
|
||||
self.prev_terminal_width, self.prev_terminal_height = terminal_width, terminal_height
|
||||
|
||||
activity_line_width = 50 if self.prev_terminal_width > 170 else \
|
||||
(30 if self.prev_terminal_width > 130 else \
|
||||
(16 if self.prev_terminal_width > 92 else \
|
||||
max(0, self.prev_terminal_width - 77)))
|
||||
max_col_size = terminal_width // 2
|
||||
|
||||
dev_metrics = self.collect()
|
||||
dev_content = []
|
||||
@@ -136,22 +154,34 @@ class SMICtx:
|
||||
[f"PCI State: {dev.pci_state}"] + [" "*107])
|
||||
continue
|
||||
|
||||
device_line = [f"{bold(dev.pcibus)}: {self.lspci[dev.pcibus[5:]]}"] + [""]
|
||||
activity_line = [f"GFX Activity {draw_bar(metrics.SmuMetrics.AverageGfxActivity / 100, 50)}"] \
|
||||
+ [f"MEM Activity {draw_bar(metrics.SmuMetrics.AverageUclkActivity / 100, 50)}"] + [""]
|
||||
device_line = [f"{bold(dev.pcibus)} {trim(self.lspci[dev.pcibus[5:]], max_col_size - 24)}"] + [""]
|
||||
activity_line = [f"GFX Activity {draw_bar(metrics.SmuMetrics.AverageGfxActivity / 100, activity_line_width)}"] \
|
||||
+ [f"MEM Activity {draw_bar(metrics.SmuMetrics.AverageUclkActivity / 100, activity_line_width)}"]
|
||||
|
||||
# draw_metrics_table(metrics, dev)
|
||||
temps_keys = [(k, name) for k, name in smu_v13_0_0.c__EA_TEMP_e__enumvalues.items()
|
||||
if k < smu_v13_0_0.TEMP_COUNT and metrics.SmuMetrics.AvgTemperature[k] != 0]
|
||||
temps_table = ["=== Temps (C) ==="] + [f"{name:<15}: {color_temp(metrics.SmuMetrics.AvgTemperature[k])}" for k, name in temps_keys]
|
||||
temps_table = ["=== Temps (°C) ==="] + [f"{name:<15}: {color_temp(metrics.SmuMetrics.AvgTemperature[k])}" for k, name in temps_keys]
|
||||
temps_table_compact = [f"Temps (°C): {color_temp(metrics.SmuMetrics.AvgTemperature[smu_v13_0_0.TEMP_HOTSPOT])} hotspot /" \
|
||||
+ f"{color_temp(metrics.SmuMetrics.AvgTemperature[smu_v13_0_0.TEMP_MEM])} mem"]
|
||||
|
||||
voltage_keys = [(k, name) for k, name in smu_v13_0_0.c__EA_SVI_PLANE_e__enumvalues.items() if k < smu_v13_0_0.SVI_PLANE_COUNT]
|
||||
power_table = ["=== Power ==="] \
|
||||
+ [f"Fan Speed: {metrics.SmuMetrics.AvgFanRpm} RPM"] \
|
||||
+ [f"Fan Power: {metrics.SmuMetrics.AvgFanPwm}%"] \
|
||||
+ [f"Power: {metrics.SmuMetrics.AverageSocketPower:>3}W " +
|
||||
draw_bar(metrics.SmuMetrics.AverageSocketPower / metrics.SmuMetrics.dGPU_W_MAX, 16)] \
|
||||
+ ["", "=== Voltages ==="] + [f"{name:<20}: {color_voltage(metrics.SmuMetrics.AvgVoltage[k])}" for k, name in voltage_keys]
|
||||
+ [f"Fan Power: {metrics.SmuMetrics.AvgFanPwm}%"]
|
||||
power_line = [f"Power: {metrics.SmuMetrics.AverageSocketPower:>3}W " +
|
||||
draw_bar(metrics.SmuMetrics.AverageSocketPower / metrics.SmuMetrics.dGPU_W_MAX, 16)]
|
||||
power_line_compact = [f"Power: {metrics.SmuMetrics.AverageSocketPower:>3}W " +
|
||||
draw_bar(metrics.SmuMetrics.AverageSocketPower / metrics.SmuMetrics.dGPU_W_MAX, activity_line_width)]
|
||||
|
||||
voltage_table = ["=== Voltages ==="] + [f"{name:<20}: {color_voltage(metrics.SmuMetrics.AvgVoltage[k])}" for k, name in voltage_keys]
|
||||
|
||||
gfx_freq = (metrics.SmuMetrics.AverageGfxclkFrequencyPostDs if metrics.SmuMetrics.AverageGfxActivity <= SMU_11_0_GFX_BUSY_THRESHOLD else \
|
||||
metrics.SmuMetrics.AverageGfxclkFrequencyPreDs)
|
||||
fclk_freq = (metrics.SmuMetrics.AverageFclkFrequencyPostDs if metrics.SmuMetrics.AverageUclkActivity <= SMU_11_0_GFX_BUSY_THRESHOLD else \
|
||||
metrics.SmuMetrics.AverageFclkFrequencyPreDs)
|
||||
mclk_freq = (metrics.SmuMetrics.AverageMemclkFrequencyPostDs if metrics.SmuMetrics.AverageUclkActivity <= SMU_11_0_GFX_BUSY_THRESHOLD else \
|
||||
metrics.SmuMetrics.AverageMemclkFrequencyPreDs)
|
||||
|
||||
frequency_table = ["=== Frequencies ===",
|
||||
f"GFXCLK Target : {metrics.SmuMetrics.AverageGfxclkFrequencyTarget:>4} MHz",
|
||||
@@ -166,6 +196,28 @@ class SMICtx:
|
||||
f"VCLK1 : {metrics.SmuMetrics.AverageVclk1Frequency:>4} MHz",
|
||||
f"DCLK1 : {metrics.SmuMetrics.AverageDclk1Frequency:>4} MHz"]
|
||||
|
||||
frequency_table_compact = ["=== Frequencies ===",
|
||||
f"GFXCLK: {gfx_freq:>4} MHz",
|
||||
f"FCLK : {fclk_freq:>4} MHz",
|
||||
f"MCLK : {mclk_freq:>4} MHz"]
|
||||
|
||||
if self.prev_terminal_width >= 231:
|
||||
power_table += power_line + [""] + voltage_table
|
||||
activity_line += [""]
|
||||
elif self.prev_terminal_width >= 171:
|
||||
power_table += power_line + [""] + frequency_table_compact
|
||||
activity_line += [""]
|
||||
frequency_table = None
|
||||
elif self.prev_terminal_width >= 121:
|
||||
temps_table = None
|
||||
frequency_table = frequency_table_compact
|
||||
activity_line += power_line_compact
|
||||
else:
|
||||
temps_table = None
|
||||
power_table = None
|
||||
frequency_table = None
|
||||
activity_line += power_line_compact
|
||||
|
||||
dev_content.append(device_line + activity_line + same_line([temps_table, power_table, frequency_table]))
|
||||
|
||||
raw_text = 'AM Monitor'.center(terminal_width) + "\n" + "=" * terminal_width + "\n\n"
|
||||
|
||||
Reference in New Issue
Block a user