From ed1d784077cf46bbc981ce32e96b2cffb4cb4da5 Mon Sep 17 00:00:00 2001 From: nimlgen <138685161+nimlgen@users.noreply.github.com> Date: Sat, 27 Jul 2024 16:47:37 +0300 Subject: [PATCH] test profiler timer sync across devs (#5751) * test profiler timer sync across devs * more correct * typo --- test/test_hcq.py | 40 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 37 insertions(+), 3 deletions(-) diff --git a/test/test_hcq.py b/test/test_hcq.py index 7477636478..973b6dc89b 100644 --- a/test/test_hcq.py +++ b/test/test_hcq.py @@ -1,4 +1,4 @@ -import unittest, ctypes, struct, contextlib, tempfile, pathlib, json, time, atexit +import unittest, ctypes, struct, contextlib, tempfile, pathlib, json, time, atexit, random from tinygrad import Device, Tensor, dtypes from tinygrad.helpers import CI, getenv, Context from tinygrad.device import Buffer, BufferOptions, HCQCompiled @@ -370,8 +370,16 @@ class TestHCQ(unittest.TestCase): assert buf2.as_buffer()[0] == i @contextlib.contextmanager -def helper_collect_profile(*devs): - for dev in devs: dev._prof_setup() +def helper_collect_profile(*devs, random_setup_delay=False): + if random_setup_delay: + devs = list(devs) + for dev in devs: dev.synchronize() + random.shuffle(devs) + for dev in devs: + dev._prof_setup() + time.sleep(random.randint(1, 1000) / 1000) + else: + for dev in devs: dev._prof_setup() profile_dict = {} _, tmp = tempfile.mkstemp() @@ -475,5 +483,31 @@ class TestProfiler(unittest.TestCase): copyin_node_2 = helper_profile_filter_node(profile, name=f"CPU -> {Device.DEFAULT}:1")[0] helper_validate_node(copyin_node_2, profile=profile, pid_name=f"{Device.DEFAULT}:1", tid_name="DMA") + @unittest.skipIf(CI, "skip CI") + def test_profile_sync(self): + mv = memoryview(bytearray(struct.pack("ff", 0, 1))) + expected_diff = 100000 # sleep in us + + devs = [Device[f"{Device.DEFAULT}:{i}"] for i in range(6)] + bufs = [Buffer(f"{Device.DEFAULT}:{i}", 2, dtypes.float, options=BufferOptions(nolru=True)).ensure_allocated() for i in range(6)] + + # enqueue ops on different queues to check the timer sync + cpu_time = [] + with helper_collect_profile(*devs, random_setup_delay=True) as profile: + for i in range(6): + x = time.perf_counter_ns() + time.sleep(expected_diff / 1e6) + bufs[i].copyin(mv) + cpu_time.append(((time.perf_counter_ns() - x) / 1000) - expected_diff) + + nodes = [helper_profile_filter_node(profile, name=f"CPU -> {Device.canonicalize(f'{Device.DEFAULT}:{i}')}")[-1] for i in range(6)] + avg_diff = [] + for i in range(1, 6): + diff = nodes[i]['ts'] - nodes[i-1]['ts'] - cpu_time[i] + avg_diff.append(diff - expected_diff) + assert expected_diff * 0.998 < diff < expected_diff * 1.002, "more that 0.2% diff" + + print(f"total avg delay is {sum(avg_diff) / len(avg_diff)} us") + if __name__ == "__main__": unittest.main()