mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-06-13 16:37:04 +08:00
136 lines
4.0 KiB
Python
136 lines
4.0 KiB
Python
import pickle, os, sys, functools, numpy as np
|
|
from pathlib import Path
|
|
|
|
os.environ["DEV"] = "CUDA"
|
|
os.environ["PROFILE"] = os.environ.get("PROFILE", "2")
|
|
from extra.nv_pma.cupti import cu_prof_ext
|
|
cu_prof_ext.enable_auto()
|
|
|
|
from tinygrad import Tensor, Device
|
|
|
|
if not os.environ.get("IOCTL") or not os.environ.get("GRAB_PMA"):
|
|
print("Usage: GRAB_PMA=1 IOCTL=1 IOCTL_PRINT=0 python3 extra/nv_pma/collect.py")
|
|
sys.exit(1)
|
|
|
|
assert Device.DEFAULT == "CUDA", "only works with CUDA"
|
|
|
|
EXAMPLES_DIR = Path(__file__).parent / "examples"
|
|
_collectors: list[tuple[str, callable]] = []
|
|
|
|
def pcsampling_test(name: str):
|
|
def decorator(fn):
|
|
@functools.wraps(fn)
|
|
def wrapper():
|
|
cu_prof_ext.clear_pma_raw_dumps()
|
|
cu_prof_ext.clear_cupti_pc_samples()
|
|
|
|
fn()
|
|
Device["CUDA"].synchronize()
|
|
|
|
dumps = cu_prof_ext.get_pma_raw_dumps()
|
|
# from hexdump import hexdump
|
|
# hexdump(dumps[0][:0x40])
|
|
|
|
return {"test_name": name, "pma_raw_dumps": list(cu_prof_ext.get_pma_raw_dumps()), "cupti_pc_samples": list(cu_prof_ext.get_cupti_pc_samples())}
|
|
_collectors.append((name, wrapper))
|
|
return wrapper
|
|
return decorator
|
|
|
|
# Refs
|
|
|
|
@pcsampling_test("test_plus")
|
|
def test_plus():
|
|
a = Tensor([1, 2, 3, 4])
|
|
b = Tensor([5, 6, 7, 8])
|
|
(a + b).realize()
|
|
|
|
@pcsampling_test("test_matmul")
|
|
def test_matmul():
|
|
a = Tensor(np.random.rand(12, 12).astype(np.float32))
|
|
b = Tensor(np.random.rand(12, 12).astype(np.float32))
|
|
(a @ b).realize()
|
|
|
|
@pcsampling_test("test_reduce_sum")
|
|
def test_reduce_sum():
|
|
a = Tensor(np.random.rand(1024).astype(np.float32))
|
|
a.sum().realize()
|
|
|
|
@pcsampling_test("test_reduce_max")
|
|
def test_reduce_max():
|
|
a = Tensor(np.random.rand(1024).astype(np.float32))
|
|
a.max().realize()
|
|
|
|
@pcsampling_test("test_exp")
|
|
def test_exp():
|
|
a = Tensor(np.random.rand(256).astype(np.float32))
|
|
a.exp().realize()
|
|
|
|
@pcsampling_test("test_softmax")
|
|
def test_softmax():
|
|
a = Tensor(np.random.rand(64, 64).astype(np.float32))
|
|
a.softmax().realize()
|
|
|
|
@pcsampling_test("test_conv2d")
|
|
def test_conv2d():
|
|
x = Tensor(np.random.rand(1, 3, 32, 32).astype(np.float32))
|
|
w = Tensor(np.random.rand(8, 3, 3, 3).astype(np.float32))
|
|
x.conv2d(w).realize()
|
|
|
|
@pcsampling_test("test_large_matmul")
|
|
def test_large_matmul():
|
|
a = Tensor(np.random.rand(128, 128).astype(np.float32))
|
|
b = Tensor(np.random.rand(128, 128).astype(np.float32))
|
|
(a @ b).realize()
|
|
|
|
@pcsampling_test("test_elementwise_chain")
|
|
def test_elementwise_chain():
|
|
a = Tensor(np.random.rand(512).astype(np.float32))
|
|
((a + 1) * 2 - 0.5).relu().realize()
|
|
|
|
@pcsampling_test("test_broadcast")
|
|
def test_broadcast():
|
|
a = Tensor(np.random.rand(64, 1).astype(np.float32))
|
|
b = Tensor(np.random.rand(1, 64).astype(np.float32))
|
|
(a + b).realize()
|
|
|
|
@pcsampling_test("test_plus_big")
|
|
def test_plus_big():
|
|
a = Tensor(np.random.rand(64, 32).astype(np.float32))
|
|
b = Tensor(np.random.rand(64, 32).astype(np.float32))
|
|
(a + b).realize()
|
|
|
|
def save_example(name: str, data: dict):
|
|
pma_bytes = sum(len(d) for d in data['pma_raw_dumps'])
|
|
cupti_samples = sum(r['samples'] for r in data['cupti_pc_samples'])
|
|
print(f" PMA: {len(data['pma_raw_dumps'])} buffers, {pma_bytes} bytes")
|
|
print(f" CUPTI: {len(data['cupti_pc_samples'])} records, {cupti_samples} samples")
|
|
|
|
outfile = EXAMPLES_DIR / f"{name}.pkl"
|
|
with open(outfile, "wb") as f:
|
|
pickle.dump(data, f)
|
|
print(f" Saved to {outfile}")
|
|
|
|
if __name__ == "__main__":
|
|
EXAMPLES_DIR.mkdir(exist_ok=True)
|
|
|
|
# Run specific tests if provided as arguments, otherwise run all
|
|
if len(sys.argv) > 1:
|
|
test_names = sys.argv[1:]
|
|
collectors = [(name, fn) for name, fn in _collectors if name in test_names]
|
|
if not collectors:
|
|
print(f"Unknown tests: {test_names}")
|
|
print(f"Available: {[name for name, _ in _collectors]}")
|
|
sys.exit(1)
|
|
else:
|
|
collectors = _collectors
|
|
|
|
for name, collect_fn in collectors:
|
|
print(f"\nCollecting {name}...")
|
|
try:
|
|
data = collect_fn()
|
|
save_example(name, data)
|
|
except Exception as e:
|
|
print(f" ERROR: {e}")
|
|
import traceback
|
|
traceback.print_exc()
|