Files
dragonpilot/tinygrad_repo/test/external/external_benchmark_kernel_launch.py
T
Adeeb Shihadeh 100f89a161 openpilot v0.9.9 release (#35334)
* openpilot v0.9.9 release

date: 2025-06-05T19:54:08
master commit: 8aadf02b2fd91f4e1285e18c2c7feb32d93b66f5

* AGNOS 12.4 (#35558)

agnos12.4

---------

Co-authored-by: Vehicle Researcher <user@comma.ai>
Co-authored-by: Maxime Desroches <desroches.maxime@gmail.com>
2025-06-17 16:32:08 -07:00

39 lines
1.1 KiB
Python

import time
from tinygrad import Tensor, TinyJit, Device, Context
from tinygrad.helpers import Profiling, Timing, GlobalCounters
# python3 test/test_speed_v_torch.py TestSpeed.test_add_a
@TinyJit
def plus(a:Tensor, b:Tensor): return a+b
if __name__ == "__main__":
a = Tensor([1]).realize()
b = Tensor([1]).realize()
for i in range(5):
with Timing(prefix=f"{i}:"):
c = plus(a,b)
Device[c.device].synchronize()
assert c.item() == 2
for i in range(5):
st = time.perf_counter()
c = plus(a,b)
et = time.perf_counter() - st
Device[c.device].synchronize()
print(f"nosync {i}: {et*1e6:.2f} us")
for i in range(5):
st = time.perf_counter()
c = plus(a,b)
Device[c.device].synchronize()
et = time.perf_counter() - st
print(f"precise {i}: {et*1e6:.2f} us")
assert GlobalCounters.time_sum_s == 0
with Context(DEBUG=2):
st = time.perf_counter()
c = plus(a,b)
Device[c.device].synchronize()
et = time.perf_counter() - st
print(f"kernel {GlobalCounters.time_sum_s*1e3:.2f} ms / full {et*1e3:.2f} ms -- {et/(GlobalCounters.time_sum_s+1e-12):.2f} x")
with Profiling():
c = plus(a,b)