remove getenv(CI) (#16365)

gone everywhere except test_interop, because torch MPS does not work in actions
This commit is contained in:
Christopher Milan
2026-05-25 17:23:33 -07:00
committed by GitHub
parent 695a0069ed
commit 8ddd1328df
24 changed files with 60 additions and 90 deletions

View File

@@ -1,7 +1,6 @@
import unittest
import numpy as np
from test.helpers import CI
from tinygrad.helpers import BEAM, Timing, prod
from tinygrad import Variable, Device, Tensor
from tinygrad.nn import Conv2d
@@ -65,7 +64,7 @@ class TestBeamSearch(unittest.TestCase):
actual = a.numpy()
np.testing.assert_allclose(actual, desired)
@unittest.skipIf(CI, "flaky. CL_OUT_OF_RESOURCES")
@unittest.skip("flaky. CL_OUT_OF_RESOURCES")
def test_conv_beam(self):
c = Conv2d(3, 16, (3,3))
x = rand(1,3,32,32)

View File

@@ -2,13 +2,13 @@ import contextlib, unittest, math
import numpy as np
import torch
from typing import Any, List
from tinygrad.helpers import getenv, DEBUG, EMULATED_DTYPES
from tinygrad.helpers import getenv, DEBUG, EMULATED_DTYPES, DEV
from tinygrad.dtype import DType, DTYPES_DICT, least_upper_dtype, fp8_to_float, float_to_fp8, _to_np_dtype, _to_torch_dtype, truncate
from tinygrad.renderer.ptx import PTXRenderer
from tinygrad.renderer.nir import NIRRenderer
from tinygrad import Context, Device, Tensor, dtypes
from hypothesis import given, settings, strategies as strat
from test.helpers import rand_for_dtype, CI
from test.helpers import rand_for_dtype
from test.unit.test_dtype_spec import _assert_eq, core_dtypes, dtype_ints, dtype_floats, FP8E4M3_MAX, FP8E5M2_MAX, FP8E4M3FNUZ_MAX, FP8E5M2FNUZ_MAX
import pytest
pytestmark = pytest.mark.filterwarnings("ignore")
@@ -225,7 +225,7 @@ class TestFloatDType(TestDType):
@unittest.skipUnless(dtypes.double in supported_dtypes, f"no double on {Device.DEFAULT}")
class TestDoubleDType(TestDType):
DTYPE = dtypes.double
@unittest.skipIf((CI and Device.DEFAULT in {"CUDA", "NV"}) or \
@unittest.skipIf((DEV.interface.startswith("MOCK") and Device.DEFAULT in {"CUDA", "NV"}) or \
isinstance(Device[Device.DEFAULT].renderer, (PTXRenderer, NIRRenderer)), "conversion not supported on CI CUDA, PTX, and NIR") # TODO: why not?
def test_float64_increased_precision(self):
for func in [

View File

@@ -7,7 +7,6 @@ from tinygrad.runtime.ops_python import from_storage_scalar
from tinygrad.renderer.ptx import PTXRenderer
from tinygrad.renderer.nir import NIRRenderer
from tinygrad.uop import Ops
from test.helpers import CI
import numpy as np
import pytest
from hypothesis import assume, given, strategies as strat, settings
@@ -331,12 +330,12 @@ class TestDTypeALU(unittest.TestCase):
@given(ht.bool, ht.bool, strat.sampled_from(((operator.add, operator.add), (operator.mul, operator.mul))))
def test_bool(self, a, b, op): universal_test(a, b, dtypes.bool, op)
@unittest.skipIf(not CI and Device.DEFAULT == "METAL", "broken on local M3")
@given(ht.int32, ht.int32, ht.float32, strat.sampled_from(integer_binary_operations), strat.sampled_from(binary_operations))
def test_int32_midcast_float(self, a, b, c, op1, op2): universal_test_midcast(a, b, c, op1, op2, dtypes.int32, dtypes.float32)
# Metal and CUDA and HIP and NIR behave differently than numpy in CI for overflows
skip_overflow = (CI and Device.DEFAULT in {"AMD", "NV", "CUDA"}) or isinstance(Device[Device.DEFAULT].renderer, NIRRenderer)
# Metal and (MOCK)CUDA and HIP and NIR behave differently than numpy for overflows
skip_overflow = ((DEV.interface.startswith("MOCK") and Device.DEFAULT in {"AMD", "NV", "CUDA"})
or isinstance(Device[Device.DEFAULT].renderer, NIRRenderer))
@given(strat.floats(width=32, min_value=0, max_value=10.0) if skip_overflow else ht.float32,
strat.floats(width=32, min_value=0, max_value=10.0) if skip_overflow else ht.float32,
ht.int32, strat.sampled_from(binary_operations), strat.sampled_from(integer_binary_operations))

View File

@@ -1,5 +1,5 @@
#!/usr/bin/env python
import unittest
import unittest, os
import torch
import numpy as np
@@ -7,7 +7,6 @@ from tinygrad.helpers import DEV
from tinygrad.tensor import Tensor
from tinygrad.device import Device
from tinygrad.dtype import _from_torch_dtype, _to_torch_dtype
from test.helpers import CI
MOCKGPU = DEV.interface.startswith("MOCK")
@@ -28,7 +27,7 @@ class TestInterop(unittest.TestCase):
tg_out = tg_data[:, :, 0] * 0.2989 + tg_data[:, :, 1] * 0.5870 + tg_data[:, :, 2] * 0.1140
tg_res = tg_out.numpy()
if self.torch_device == "mps" and CI:
if self.torch_device == "mps" and os.getenv("CI", "") != "":
# MPS backend out of memory: https://discuss.pytorch.org/t/mps-back-end-out-of-memory-on-github-action/189773
# Calculate expected value on cpu.
inp = inp.cpu()

View File

@@ -1,4 +1,4 @@
import time, math, unittest, functools, platform, warnings
import time, math, unittest, functools, platform, warnings, sys
import numpy as np
from typing import List, Callable
import torch
@@ -7,7 +7,6 @@ from tinygrad import Tensor, Device, dtypes
from tinygrad.tensor import _to_np_dtype
from tinygrad.renderer.cstyle import QCOMCLRenderer
from tinygrad.renderer.nir import NIRRenderer
from test.helpers import CI
TINY_BACKEND = getenv("TINY_BACKEND")
if TINY_BACKEND:
@@ -74,7 +73,7 @@ def helper_test_op(shps, torch_fxn, tinygrad_fxn=None, atol=1e-6, rtol=1e-3, gra
for i, (t, torch_grad) in enumerate(zip(tiny_grads, torch_grads)):
compare(f"backward pass tensor {i}", t.numpy(), torch_grad.detach().cpu().numpy(), atol=grad_atol, rtol=grad_rtol)
if not CI:
if sys.stdout.isatty():
print("\ntesting %40r torch/tinygrad fp: %.2f / %.2f ms bp: %.2f / %.2f ms " % \
(shps, torch_fp*1000, tinygrad_fp*1000, torch_fbp*1000, tinygrad_fbp*1000), end="")
@@ -103,7 +102,7 @@ class TestOps(unittest.TestCase):
with self.assertRaises(expected) as tinygrad_cm:
tinygrad_fxn(*tst)
if exact: self.assertEqual(str(torch_cm.exception), str(tinygrad_cm.exception))
if not CI: print("\ntesting %40r torch/tinygrad exception: %s / %s" % (shps, torch_cm.exception, tinygrad_cm.exception), end="")
if sys.stdout.isatty(): print("\ntesting %40r torch/tinygrad exception: %s / %s" % (shps, torch_cm.exception, tinygrad_cm.exception), end="")
def test_full_like(self):
a = Tensor([[1,2,3],[4,5,6]], dtype=dtypes.float32)

View File

@@ -5,7 +5,6 @@ from tinygrad.device import Buffer, BufferSpec, Compiled, ProfileDeviceEvent, Pr
from tinygrad.runtime.support.hcq import HCQCompiled
from tinygrad.engine.realize import get_runtime
from tinygrad.codegen import to_program
from test.helpers import CI
MOCKGPU = DEV.interface.startswith("MOCK")
def _dev_base(d):
@@ -145,7 +144,8 @@ class TestProfiler(unittest.TestCase):
assert len(graph_evs) == 2, "2 graph events are expected"
assert len(graph_evs[0].ents) == 2, "two entities are expected"
@unittest.skipIf(CI or not issubclass(type(Device[Device.DEFAULT]), HCQCompiled), "skip CI")
@unittest.skipIf(MOCKGPU, "skip MOCKGPU")
@unittest.skipUnless(issubclass(type(Device[Device.DEFAULT]), HCQCompiled), "must be HCQ")
def test_dev_jitter_matrix(self):
dev_cnt = 6
try: devs = [Device[f"{Device.DEFAULT}:{i}"] for i in range(dev_cnt)]

View File

@@ -1,14 +1,14 @@
import unittest, math
from tinygrad import dtypes, Tensor, Device
from tinygrad.helpers import getenv
from tinygrad.helpers import getenv, DEV
from tinygrad.codegen import to_program
from tinygrad.uop.ops import Ops
from tinygrad.renderer.ptx import PTXRenderer
from tinygrad.renderer.nir import NIRRenderer
from tinygrad.renderer.isa.x86 import X86Renderer
from test.helpers import not_support_multi_device, needs_second_gpu, CI
from test.helpers import not_support_multi_device, needs_second_gpu
from test.unit.test_randomness import equal_distribution, normal_test
import numpy as np
@@ -48,7 +48,7 @@ class TestRandomness(unittest.TestCase):
assert nx[nx == 0].size > 0
equal_distribution(lambda *x: Tensor.rand(*x, dtype=dtypes.float16), torch.rand, lambda x: np.random.rand(*x), shape=(2, N, N))
@unittest.skipIf(CI and Device.DEFAULT in {"NV", "CUDA"}, "gpuocelot doesn't support certain ops needed for threefry")
@unittest.skipIf(DEV.interface.startswith("MOCK") and Device.DEFAULT in {"NV", "CUDA"}, "gpuocelot doesn't support certain ops needed for threefry")
def test_threefry_against_reference(self):
Tensor.manual_seed(1337)

View File

@@ -10,9 +10,8 @@ from hypothesis import assume, given, strategies as strat
from tinygrad import nn, dtypes, Device, Tensor, Variable
from tinygrad.dtype import DType
from tinygrad.uop.ops import UOp, Ops, UPat
from tinygrad.helpers import DEBUG, OSX, GlobalCounters, Context, getenv, all_same, temp
from tinygrad.helpers import DEBUG, DEV, OSX, GlobalCounters, Context, getenv, all_same, temp
from tinygrad.engine.realize import compile_linear, run_linear
from test.helpers import CI
supported_dtypes = Device[Device.DEFAULT].renderer.supported_dtypes()
@@ -115,7 +114,6 @@ class TestSchedule(unittest.TestCase):
run_linear(*check_schedule(b, 1))
np.testing.assert_allclose(b.numpy(), np.broadcast_to(a.numpy().astype(np.float16), (2, 4, 4))+2, rtol=1e-3)
@unittest.skipIf(CI and Device.DEFAULT == "NV", "crashes on NV CI")
def test_add_chain_buffers(self):
N = 31
with Context(TRACK_MATCH_STATS=0, DEBUG=0):
@@ -1114,7 +1112,7 @@ class TestSchedule(unittest.TestCase):
self.assertListEqual(a.tolist(), [[1.]*shape[1]]*shape[0])
class TestLimitBufs(unittest.TestCase):
@unittest.skipIf(CI and Device.DEFAULT == "NV", "crashes on NV CI")
@unittest.skipIf(DEV.interface.startswith("MOCK") and Device.DEFAULT == "NV", "crashes in ocelot")
def test_limit_bufs_with_var(self):
N = 31
with Context(TRACK_MATCH_STATS=0, DEBUG=0):

View File

@@ -2,7 +2,6 @@ import unittest
from tinygrad import Tensor, Device, dtypes
from tinygrad.tensor import _to_np_dtype
from tinygrad.helpers import Context, getenv, DEV, OSX
from test.helpers import CI
from test.backend.test_schedule import check_schedule
from test.backend.test_dtype_alu import ht, dtypes_float
import numpy as np
@@ -32,7 +31,7 @@ class TestTranscendentalMath(unittest.TestCase):
([(Tensor.sin, np.sin)] if dtypes.ulong in supported_dtypes else [])))
def test_float32(self, x, op):
# wrong nan behavior on Vulkan
if (math.isnan(x) or (x < 0 and op[0] == Tensor.log)) and CI and Device.DEFAULT == "WEBGPU" and not OSX: return
if (math.isnan(x) or (x < 0 and op[0] == Tensor.log)) and Device.DEFAULT == "WEBGPU" and not OSX: return
with Context(TRANSCENDENTAL=2), np.errstate(all='ignore'):
np.testing.assert_allclose(op[0](Tensor([x], dtype=dtypes.float32)).numpy(),
op[1](np.array([x], dtype=_to_np_dtype(dtypes.float32))),
@@ -43,7 +42,7 @@ class TestTranscendentalMath(unittest.TestCase):
([(Tensor.sin, np.sin)] if dtypes.ulong in supported_dtypes else [])))
def test_float16(self, x, op):
# wrong nan behavior on Vulkan
if (math.isnan(x) or (x < 0 and op[0] == Tensor.log)) and CI and Device.DEFAULT == "WEBGPU" and not OSX: return
if (math.isnan(x) or (x < 0 and op[0] == Tensor.log)) and Device.DEFAULT == "WEBGPU" and not OSX: return
with Context(TRANSCENDENTAL=2), np.errstate(all='ignore'):
np.testing.assert_allclose(op[0](Tensor([x], dtype=dtypes.float16)).numpy(),
op[1](np.array([x], dtype=_to_np_dtype(dtypes.float16))),
@@ -117,7 +116,7 @@ class TestFloat16Log2(unittest.TestCase):
np.testing.assert_allclose(result, expected, rtol=1e-3, err_msg=f"log2({val})")
@unittest.skipUnless(dtypes.float16 in supported_dtypes, f"no float16 on {Device.DEFAULT}")
@unittest.skipIf(Device.DEFAULT == "WEBGPU" and CI, "Nan handling differs on Vulkan")
@unittest.skipIf(Device.DEFAULT == "WEBGPU" and not OSX, "Nan handling differs on Vulkan")
def test_float16_log2_special(self):
# special values: inf, -inf, nan, 0, negative
with Context(TRANSCENDENTAL=2), np.errstate(all='ignore'):

View File

@@ -11,7 +11,7 @@ from tinygrad.engine.realize import run_linear
from tinygrad.codegen import to_program
from tinygrad.codegen.opt import Opt, OptOps
from tinygrad.renderer.ptx import PTXRenderer
from test.helpers import to_uops_list, CI
from test.helpers import to_uops_list
def run_uops(uops_list:list[UOp], bufs:list[Buffer]):
buf_uops = [UOp.new_buffer(b.device, b.size, b.dtype) for b in bufs]
@@ -173,8 +173,6 @@ class TestBoolUOps(TestUOps):
def test_where_bool(self): self._test_top_bool_fxn(Ops.WHERE, lambda a,b,c: b if a else c)
class TestLocalAccess(unittest.TestCase):
# NOTE: this is failing on METAL CI, no idea why. Works locally.
@unittest.skipIf(Device.DEFAULT == "METAL" and CI, "failing only in CI")
@unittest.skipUnless(Device[Device.DEFAULT].renderer.has_shared, "test requires shared memory")
def test_local_basic(self):
uops = []

View File

@@ -1,8 +1,7 @@
import unittest
import unittest, sys
from tinygrad import Device
from tinygrad.tensor import Tensor
from tinygrad.helpers import getenv, OSX
from test.helpers import CI
def multidevice_test(fxn):
exclude_devices = getenv("EXCLUDE_DEVICES", "").split(",")
@@ -10,15 +9,15 @@ def multidevice_test(fxn):
for device in Device._devices:
# broken on OSX USB AMD, why?
if device in ["DISK", "NPY", "FAKE", "DSP", "NULL"] or (OSX and device in ["AMD"]): continue
if not CI: print(device)
if sys.stdout.isatty(): print(device)
if device in exclude_devices:
if not CI: print(f"WARNING: {device} test is excluded")
if sys.stdout.isatty(): print(f"WARNING: {device} test is excluded")
continue
with self.subTest(device=device):
try:
Device[device]
except Exception:
if not CI: print(f"WARNING: {device} test isn't running")
if sys.stdout.isatty(): print(f"WARNING: {device} test isn't running")
continue
fxn(self, device)
return ret

View File

@@ -1,10 +1,9 @@
import unittest, ctypes, struct, time, array
from tinygrad import Device, Tensor, dtypes
from tinygrad.helpers import to_mv
from tinygrad.helpers import to_mv, DEV
from tinygrad.device import Buffer, BufferSpec
from tinygrad.engine.realize import get_runtime
from tinygrad.codegen import to_program
from test.helpers import CI
def _time_queue(q, d):
st = time.perf_counter()
@@ -149,7 +148,7 @@ class TestHCQ(unittest.TestCase):
val = TestHCQ.b.uop.buffer.as_memoryview().cast("f")[1]
assert val == 0.0, f"got val {val}, should not be updated"
@unittest.skipIf(CI, "Can't handle async update on CPU")
@unittest.skipIf(DEV.interface.startswith("MOCK"), "Can't handle async update on CPU")
def test_wait_signal(self):
temp_signal = TestHCQ.d0._alloc_signal(value=0)
TestHCQ.compute_queue().wait(temp_signal, value=1).signal(TestHCQ.d0.timeline_signal, TestHCQ.d0.timeline_value).submit(TestHCQ.d0)
@@ -160,7 +159,7 @@ class TestHCQ(unittest.TestCase):
TestHCQ.d0._wait_signal(TestHCQ.d0.timeline_signal, TestHCQ.d0.timeline_value, timeout=100)
TestHCQ.d0.timeline_value += 1
@unittest.skipIf(CI, "Can't handle async update on CPU")
@unittest.skipIf(DEV.interface.startswith("MOCK"), "Can't handle async update on CPU")
def test_wait_copy_signal(self):
temp_signal = TestHCQ.d0._alloc_signal(value=0)
TestHCQ.copy_queue().wait(temp_signal, value=1).signal(TestHCQ.d0.timeline_signal, TestHCQ.d0.timeline_value).submit(TestHCQ.d0)

View File

@@ -3,7 +3,7 @@ import unittest
import numpy as np
from tinygrad import Tensor, dtypes
from tinygrad.engine.jit import TinyJit
from test.helpers import derandomize_model, CI
from test.helpers import derandomize_model
from examples.llama import Transformer
@@ -27,7 +27,6 @@ class TestJittedModels(unittest.TestCase):
helper_test_jitted_correctness(lambda: (Tensor([[1,]]),), test, test_jit)
dtypes.default_float = old_float
@unittest.skipUnless(not CI, "huge for CI")
def test_jitted_stable_diffusion(self):
from examples.stable_diffusion import UNetModel, unet_params
model = UNetModel(**unet_params)

View File

@@ -8,14 +8,11 @@ from tinygrad.tensor import _to_np_dtype
from tinygrad.codegen import to_program
from tinygrad.dtype import DType
from tinygrad.nn.state import get_parameters
from tinygrad.helpers import T, Target
from tinygrad.helpers import T, Target, DEV
from tinygrad.renderer import Renderer
from tinygrad.codegen import full_rewrite_to_sink, line_rewrite, pm_linearize_cleanups
from tinygrad.codegen.late.linearizer import linearize
# TODO: remove this everywhere!
CI = os.getenv("CI", "") != ""
# decorator to skip slow tests by default, run with RUN_SLOW=1 to include them
slow = unittest.skipUnless(os.getenv("RUN_SLOW"), "slow test, set RUN_SLOW=1 to run")
from tinygrad.runtime.ops_python import PythonProgram, PythonRenderer, PythonCompiler
@@ -100,7 +97,7 @@ def to_uops_list(u:list[UOp], ren=None) -> list[UOp]:
def not_support_multi_device():
# CL and CUDA don't support multi device if in CI
return CI and Device.DEFAULT in ("CL", "CUDA")
return (Device.DEFAULT == "CL" and Device[Device.DEFAULT].count() < 2) or (Device.DEFAULT == "CUDA" and DEV.interface.startswith("MOCK"))
def needs_second_gpu(fn):
@functools.wraps(fn)

View File

@@ -1,19 +1,18 @@
import torch
from torch import nn
import unittest
import unittest, sys
import numpy as np
from tinygrad.nn.state import get_parameters, get_state_dict
from tinygrad.nn import optim, Linear, Conv2d, BatchNorm2d
from tinygrad.tensor import Tensor
from extra.datasets import fetch_mnist
from test.helpers import CI
def compare_tiny_torch(model, model_torch, X, Y):
with Tensor.train():
model_torch.train()
model_state_dict = get_state_dict(model)
for k,v in model_torch.named_parameters():
if not CI: print(f"initting {k} from torch")
if sys.stdout.isatty(): print(f"initting {k} from torch")
model_state_dict[k].assign(Tensor(v.detach().numpy())).realize()
optimizer = optim.SGD(get_parameters(model), lr=0.001)
@@ -35,14 +34,14 @@ def compare_tiny_torch(model, model_torch, X, Y):
loss_torch.backward()
# assert losses match
if not CI: print(loss.realize().numpy())
if not CI: print(loss_torch.detach().numpy())
if sys.stdout.isatty(): print(loss.realize().numpy())
if sys.stdout.isatty(): print(loss_torch.detach().numpy())
np.testing.assert_allclose(loss.realize().numpy(), loss_torch.detach().numpy(), atol=1e-4)
for k,v in list(model_torch.named_parameters())[::-1]:
g = model_state_dict[k].grad.numpy()
gt = v.grad.detach().numpy()
if not CI: print("testing grads", k, model_state_dict[k].grad.dtype)
if sys.stdout.isatty(): print("testing grads", k, model_state_dict[k].grad.dtype)
np.testing.assert_allclose(g, gt, atol=1e-3, err_msg=f'grad mismatch {k}')
# take the steps
@@ -51,7 +50,7 @@ def compare_tiny_torch(model, model_torch, X, Y):
# assert weights match
for k,v in model_torch.named_parameters():
if not CI: print("testing weight", k, model_state_dict[k].dtype)
if sys.stdout.isatty(): print("testing weight", k, model_state_dict[k].dtype)
np.testing.assert_allclose(model_state_dict[k].numpy(), v.detach().numpy(), atol=1e-3, err_msg=f'weight mismatch {k}')
def get_mnist_data():

View File

@@ -5,7 +5,6 @@ from tinygrad import Tensor
from tinygrad.device import Device, Compiler, enumerate_devices_str
from tinygrad.helpers import diskcache_get, diskcache_put, getenv, Context, Target, WIN, OSX, DEV
from tinygrad.runtime.support.c import DLL
from test.helpers import CI
class TestDevice(unittest.TestCase):
def test_canonicalize(self):
@@ -67,7 +66,7 @@ class TestDevice(unittest.TestCase):
self.assertNotEqual(result.returncode, 0)
self.assertIn(b"deprecated", result.stderr)
@unittest.skipIf(WIN and CI, "skipping windows test") # TODO: subprocess causes memory violation?
@unittest.skipIf(WIN, "skipping windows test") # TODO: subprocess causes memory violation?
def test_env_overwrite_default_compiler(self):
if Device.DEFAULT == "CPU":
from tinygrad.runtime.support.compiler_cpu import CPULLVMCompiler, ClangJITCompiler
@@ -95,7 +94,7 @@ class TestDevice(unittest.TestCase):
shell=True, check=True, env={**os.environ, "DEV": "AMD:HIP"})
else: self.skipTest("only run on CPU/AMD")
@unittest.skipIf(WIN and CI, "skipping windows test")
@unittest.skipIf(WIN, "skipping windows test")
def test_env_online(self):
from tinygrad.runtime.support.compiler_cpu import CPULLVMCompiler, ClangJITCompiler
try: _, _ = CPULLVMCompiler(), ClangJITCompiler()

View File

@@ -1,7 +1,6 @@
import unittest, sys
from tinygrad import Tensor, GlobalCounters, dtypes, Context
from tinygrad.helpers import Profiling, WINO
from test.helpers import CI
from tinygrad.helpers import WINO
@unittest.skipIf(sys.platform.startswith("win"), "flaky on Windows")
class TestWinograd(unittest.TestCase):
@@ -11,11 +10,6 @@ class TestWinograd(unittest.TestCase):
def tearDown(self):
WINO.value = self.old
def test_profile(self):
x,w = Tensor.rand(1,4,9,9).realize(), Tensor.rand(4,4,3,3).realize()
with Profiling(enabled=not CI, sort='time'):
Tensor.conv2d(x,w).realize()
def test_forward_kernels(self):
x,w = Tensor.rand(1,4,9,9).realize(), Tensor.rand(4,4,3,3).realize()
out = Tensor.conv2d(x,w)

View File

@@ -1,7 +1,6 @@
import unittest, numpy as np
import unittest, numpy as np, os
from tinygrad import Tensor, Device, TinyJit
from tinygrad.helpers import Timing, OSX, getenv
from test.helpers import CI
from tinygrad.helpers import Timing, getenv
import multiprocessing.shared_memory as shared_memory
N = getenv("NSZ", 256)
@@ -12,7 +11,7 @@ class TestCopySpeed(unittest.TestCase):
def testCopySHMtoDefault(self):
s = shared_memory.SharedMemory(name="test_X", create=True, size=N*N*4)
s.close()
if CI and not OSX:
if os.path.exists("/dev/shm"):
t = Tensor.empty(N, N, device="disk:/dev/shm/test_X").realize()
else:
t = Tensor.empty(N, N, device="disk:shm:test_X").realize()
@@ -77,11 +76,8 @@ class TestCopySpeed(unittest.TestCase):
Device[Device.DEFAULT].synchronize()
np.testing.assert_equal(t.numpy(), x.numpy())
@unittest.skipIf(CI, "CI doesn't have 6 GPUs")
@unittest.skipIf(Device.DEFAULT != "CL", "only test this on CL")
@unittest.skipIf(Device.DEFAULT != "CL" or Device[Device.DEFAULT].count() != 6, "only test this on CL, with 6 gpus")
def testCopyCPUto6GPUs(self):
from tinygrad.runtime.ops_cl import CLDevice
if len(CLDevice.device_ids) != 6: raise unittest.SkipTest("computer doesn't have 6 GPUs")
t = Tensor.ones(N, N, device="CPU").contiguous().realize()
print(f"buffer: {t.nbytes()*1e-9:.2f} GB")
for _ in range(3):

View File

@@ -1,9 +1,9 @@
import unittest
from tinygrad import Tensor, Device, dtypes
from test.helpers import CI
from tinygrad.helpers import DEV
# similar to test/external/external_test_gpu_ast.py, but universal
@unittest.skipIf(Device.DEFAULT in {"CUDA", "NV"} and CI, "slow on CUDA CI")
@unittest.skipIf(Device.DEFAULT in {"CUDA", "NV"} and DEV.interface.startswith("MOCK"), "slow on ocelot")
class TestSpecific(unittest.TestCase):
# from openpilot

View File

@@ -9,11 +9,11 @@ import torch
torch.set_num_threads(1)
import time
import numpy as np
import sys
np.set_printoptions(linewidth=160)
from tinygrad import Tensor, Device, GlobalCounters, TinyJit
from tinygrad.nn import Conv2d
from tinygrad.helpers import colorize_float, getenv, DEV
from test.helpers import CI
IN_CHANS = [int(x) for x in getenv("IN_CHANS", "4,16,64").split(",")]
@@ -96,7 +96,7 @@ def helper_test_generic(name, f1, f1_args, f2, f2_args):
desc = "faster" if et_torch > et_tinygrad else "slower"
flops = save_ops*1e-6
mem = save_mem*1e-6
print(("\r" if not CI else "")+f"{name:42s} {et_torch:7.2f} ms ({flops/et_torch:9.2f} GFLOPS {mem/et_torch:7.2f} GB/s) in torch, {et_tinygrad:7.2f} ms ({flops/et_tinygrad:9.2f} GFLOPS {mem/et_tinygrad:7.2f} GB/s) in tinygrad, {colorize_float(et_tinygrad/et_torch)} {desc} {flops:10.2f} MOPS {mem:8.2f} MB") # noqa: E501
print(("\r" if sys.stdout.isatty() else "")+f"{name:42s} {et_torch:7.2f} ms ({flops/et_torch:9.2f} GFLOPS {mem/et_torch:7.2f} GB/s) in torch, {et_tinygrad:7.2f} ms ({flops/et_tinygrad:9.2f} GFLOPS {mem/et_tinygrad:7.2f} GB/s) in tinygrad, {colorize_float(et_tinygrad/et_torch)} {desc} {flops:10.2f} MOPS {mem:8.2f} MB") # noqa: E501
atol, rtol = (1e-2, 1e-2) if torch_dt == torch.float16 else (1e-3, 1e-3)
np.testing.assert_allclose(val_tinygrad, val_torch, atol=atol, rtol=rtol)

View File

@@ -2,7 +2,6 @@
import unittest, random
from tinygrad import Tensor, Context, Variable, TinyJit, dtypes, Device, nn
from tinygrad.helpers import getenv
from test.helpers import CI
class TestTiny(unittest.TestCase):
@@ -112,7 +111,7 @@ class TestTiny(unittest.TestCase):
# *** a model ***
# TODO: this is failing because of how swizzling rewrites the ShapeTracker of the final STORE
@unittest.skipIf(CI and Device.DEFAULT == "DSP", "failing because of make things that can't be images not images")
@unittest.skipIf(Device.DEFAULT == "DSP", "failing because of make things that can't be images not images")
def test_mnist(self):
layers = [
nn.Conv2d(1, 32, 5), Tensor.relu,
@@ -131,7 +130,7 @@ class TestTiny(unittest.TestCase):
self.assertEqual(len(probs[0]), 10)
# TODO: this is failing because of how swizzling rewrites the ShapeTracker of the final STORE
@unittest.skipIf(CI and Device.DEFAULT == "DSP", "failing because of make things that can't be images not images")
@unittest.skipIf(Device.DEFAULT == "DSP", "failing because of make things that can't be images not images")
def test_mnist_backward(self):
# NOTE: we don't have the whole model here for speed
layers = [

View File

@@ -2,12 +2,12 @@ import unittest, time
from unittest.case import skipIf
from extra.bench_log import BenchEvent, InstantBenchEvent, WallTimeEvent, KernelTimeEvent, log_event_instant, _events, clear_events
from tinygrad.helpers import Context
from tinygrad.helpers import Context, DEV
from tinygrad.tensor import Tensor
from tinygrad.device import Device
from test.helpers import CI
_SKIP_KERNEL_TIMING = Device.DEFAULT == "WEBGPU" # WEBGPU kernel timing not supported
# WEBGPU kernel timing not supported, ocelot CUDA is inaccurate
_SKIP_KERNEL_TIMING = Device.DEFAULT == "WEBGPU" or (Device.DEFAULT == "CUDA" and DEV.interface.startswith("MOCK"))
class TestBenchLog(unittest.TestCase):
def setUp(self):
@@ -38,7 +38,7 @@ class TestBenchLog(unittest.TestCase):
self.assertGreater(_events[event]["wall"][0], 0)
self.assertGreater(_events[event]["wall"][1], 0)
@skipIf(CI or _SKIP_KERNEL_TIMING, "ci timing is not accurate")
@skipIf(_SKIP_KERNEL_TIMING, "ci timing is not accurate")
def test_log_single_kernel_time(self):
wall_times = []
@@ -55,7 +55,7 @@ class TestBenchLog(unittest.TestCase):
self.assertLess(_events[event]["kernel"][0], wall_times[0])
self.assertGreater(_events[event]["kernel"][0], 0)
@skipIf((CI and Device.DEFAULT == "CUDA") or _SKIP_KERNEL_TIMING, "ci cuda timing is not accurate")
@skipIf(_SKIP_KERNEL_TIMING, "ci cuda timing is not accurate")
def test_interleaved_wall_kernel_time(self):
wall_times = []
with Context(DEBUG=2):
@@ -77,7 +77,7 @@ class TestBenchLog(unittest.TestCase):
self.assertLess(_events[event]["kernel"][0], wall_times[0])
self.assertGreater(_events[event]["kernel"][0], 0)
@skipIf((CI and Device.DEFAULT == "CUDA") or _SKIP_KERNEL_TIMING, "ci cuda timing is not accurate")
@skipIf(_SKIP_KERNEL_TIMING, "ci cuda timing is not accurate")
def test_stacked_wall_kernel_time(self):
with Context(DEBUG=2):
for event in BenchEvent:

View File

@@ -4,7 +4,6 @@ import numpy as np
from tinygrad import dtypes, Tensor, TinyJit, GlobalCounters, Variable
from tinygrad.uop.ops import Ops, UOp
from tinygrad.helpers import temp, DEV, Context
from test.helpers import CI
N = 200 # has to be bigger than the cache to fail
@@ -189,7 +188,7 @@ class TestAssign(unittest.TestCase):
new = a + times_a
np.testing.assert_allclose(new.numpy(), 8)
@unittest.skipIf(CI and DEV.renderer == "LVP", "flaky in CI")
@unittest.skipIf(DEV.renderer == "LVP", "flaky in CI")
def test_double_assign(self):
a = Tensor.ones(4).contiguous().realize()
a += 1

View File

@@ -2,11 +2,10 @@ import unittest
import multiprocessing.shared_memory as shared_memory
from tinygrad.helpers import WIN
from tinygrad import Tensor, Device
from test.helpers import CI
import numpy as np
class TestRawShmBuffer(unittest.TestCase):
@unittest.skipIf(WIN and CI, "only fails on CI windows instance")
@unittest.skipIf(WIN, "only fails on CI windows instance")
def test_e2e(self):
t = Tensor.randn(2, 2, 2).realize()