From 8ddd1328df26ef970ee6dbf3379b285181f7e242 Mon Sep 17 00:00:00 2001 From: Christopher Milan Date: Mon, 25 May 2026 17:23:33 -0700 Subject: [PATCH] remove getenv(CI) (#16365) gone everywhere except test_interop, because torch MPS does not work in actions --- extra/optimization/test_beam_search.py | 3 +-- test/backend/test_dtype.py | 6 +++--- test/backend/test_dtype_alu.py | 7 +++---- test/backend/test_interop.py | 5 ++--- test/backend/test_ops.py | 7 +++---- test/backend/test_profiler.py | 4 ++-- test/backend/test_randomness.py | 6 +++--- test/backend/test_schedule.py | 6 ++---- test/backend/test_transcendental.py | 7 +++---- test/backend/test_uops.py | 4 +--- test/external/external_test_example.py | 9 ++++----- test/external/external_test_hcq.py | 7 +++---- test/external/external_test_jit_on_models.py | 3 +-- test/helpers.py | 7 ++----- test/models/test_end2end.py | 13 ++++++------- test/null/test_device.py | 5 ++--- test/null/test_winograd.py | 8 +------- test/speed/external_test_copy_speed.py | 12 ++++-------- test/speed/external_test_specific_conv.py | 4 ++-- test/speed/external_test_speed_v_torch.py | 4 ++-- test/test_tiny.py | 5 ++--- test/testextra/test_bench_log.py | 12 ++++++------ test/unit/test_assign.py | 3 +-- test/unit/test_shm_tensor.py | 3 +-- 24 files changed, 60 insertions(+), 90 deletions(-) diff --git a/extra/optimization/test_beam_search.py b/extra/optimization/test_beam_search.py index a81b1dde55..133c779960 100644 --- a/extra/optimization/test_beam_search.py +++ b/extra/optimization/test_beam_search.py @@ -1,7 +1,6 @@ import unittest import numpy as np -from test.helpers import CI from tinygrad.helpers import BEAM, Timing, prod from tinygrad import Variable, Device, Tensor from tinygrad.nn import Conv2d @@ -65,7 +64,7 @@ class TestBeamSearch(unittest.TestCase): actual = a.numpy() np.testing.assert_allclose(actual, desired) - @unittest.skipIf(CI, "flaky. CL_OUT_OF_RESOURCES") + @unittest.skip("flaky. CL_OUT_OF_RESOURCES") def test_conv_beam(self): c = Conv2d(3, 16, (3,3)) x = rand(1,3,32,32) diff --git a/test/backend/test_dtype.py b/test/backend/test_dtype.py index 7baf8c1559..1494c44f01 100644 --- a/test/backend/test_dtype.py +++ b/test/backend/test_dtype.py @@ -2,13 +2,13 @@ import contextlib, unittest, math import numpy as np import torch from typing import Any, List -from tinygrad.helpers import getenv, DEBUG, EMULATED_DTYPES +from tinygrad.helpers import getenv, DEBUG, EMULATED_DTYPES, DEV from tinygrad.dtype import DType, DTYPES_DICT, least_upper_dtype, fp8_to_float, float_to_fp8, _to_np_dtype, _to_torch_dtype, truncate from tinygrad.renderer.ptx import PTXRenderer from tinygrad.renderer.nir import NIRRenderer from tinygrad import Context, Device, Tensor, dtypes from hypothesis import given, settings, strategies as strat -from test.helpers import rand_for_dtype, CI +from test.helpers import rand_for_dtype from test.unit.test_dtype_spec import _assert_eq, core_dtypes, dtype_ints, dtype_floats, FP8E4M3_MAX, FP8E5M2_MAX, FP8E4M3FNUZ_MAX, FP8E5M2FNUZ_MAX import pytest pytestmark = pytest.mark.filterwarnings("ignore") @@ -225,7 +225,7 @@ class TestFloatDType(TestDType): @unittest.skipUnless(dtypes.double in supported_dtypes, f"no double on {Device.DEFAULT}") class TestDoubleDType(TestDType): DTYPE = dtypes.double - @unittest.skipIf((CI and Device.DEFAULT in {"CUDA", "NV"}) or \ + @unittest.skipIf((DEV.interface.startswith("MOCK") and Device.DEFAULT in {"CUDA", "NV"}) or \ isinstance(Device[Device.DEFAULT].renderer, (PTXRenderer, NIRRenderer)), "conversion not supported on CI CUDA, PTX, and NIR") # TODO: why not? def test_float64_increased_precision(self): for func in [ diff --git a/test/backend/test_dtype_alu.py b/test/backend/test_dtype_alu.py index ae12b8810c..05c3074bf4 100644 --- a/test/backend/test_dtype_alu.py +++ b/test/backend/test_dtype_alu.py @@ -7,7 +7,6 @@ from tinygrad.runtime.ops_python import from_storage_scalar from tinygrad.renderer.ptx import PTXRenderer from tinygrad.renderer.nir import NIRRenderer from tinygrad.uop import Ops -from test.helpers import CI import numpy as np import pytest from hypothesis import assume, given, strategies as strat, settings @@ -331,12 +330,12 @@ class TestDTypeALU(unittest.TestCase): @given(ht.bool, ht.bool, strat.sampled_from(((operator.add, operator.add), (operator.mul, operator.mul)))) def test_bool(self, a, b, op): universal_test(a, b, dtypes.bool, op) - @unittest.skipIf(not CI and Device.DEFAULT == "METAL", "broken on local M3") @given(ht.int32, ht.int32, ht.float32, strat.sampled_from(integer_binary_operations), strat.sampled_from(binary_operations)) def test_int32_midcast_float(self, a, b, c, op1, op2): universal_test_midcast(a, b, c, op1, op2, dtypes.int32, dtypes.float32) - # Metal and CUDA and HIP and NIR behave differently than numpy in CI for overflows - skip_overflow = (CI and Device.DEFAULT in {"AMD", "NV", "CUDA"}) or isinstance(Device[Device.DEFAULT].renderer, NIRRenderer) + # Metal and (MOCK)CUDA and HIP and NIR behave differently than numpy for overflows + skip_overflow = ((DEV.interface.startswith("MOCK") and Device.DEFAULT in {"AMD", "NV", "CUDA"}) + or isinstance(Device[Device.DEFAULT].renderer, NIRRenderer)) @given(strat.floats(width=32, min_value=0, max_value=10.0) if skip_overflow else ht.float32, strat.floats(width=32, min_value=0, max_value=10.0) if skip_overflow else ht.float32, ht.int32, strat.sampled_from(binary_operations), strat.sampled_from(integer_binary_operations)) diff --git a/test/backend/test_interop.py b/test/backend/test_interop.py index 7593837d7f..18346212cf 100644 --- a/test/backend/test_interop.py +++ b/test/backend/test_interop.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -import unittest +import unittest, os import torch import numpy as np @@ -7,7 +7,6 @@ from tinygrad.helpers import DEV from tinygrad.tensor import Tensor from tinygrad.device import Device from tinygrad.dtype import _from_torch_dtype, _to_torch_dtype -from test.helpers import CI MOCKGPU = DEV.interface.startswith("MOCK") @@ -28,7 +27,7 @@ class TestInterop(unittest.TestCase): tg_out = tg_data[:, :, 0] * 0.2989 + tg_data[:, :, 1] * 0.5870 + tg_data[:, :, 2] * 0.1140 tg_res = tg_out.numpy() - if self.torch_device == "mps" and CI: + if self.torch_device == "mps" and os.getenv("CI", "") != "": # MPS backend out of memory: https://discuss.pytorch.org/t/mps-back-end-out-of-memory-on-github-action/189773 # Calculate expected value on cpu. inp = inp.cpu() diff --git a/test/backend/test_ops.py b/test/backend/test_ops.py index 0f21519637..67d3833700 100644 --- a/test/backend/test_ops.py +++ b/test/backend/test_ops.py @@ -1,4 +1,4 @@ -import time, math, unittest, functools, platform, warnings +import time, math, unittest, functools, platform, warnings, sys import numpy as np from typing import List, Callable import torch @@ -7,7 +7,6 @@ from tinygrad import Tensor, Device, dtypes from tinygrad.tensor import _to_np_dtype from tinygrad.renderer.cstyle import QCOMCLRenderer from tinygrad.renderer.nir import NIRRenderer -from test.helpers import CI TINY_BACKEND = getenv("TINY_BACKEND") if TINY_BACKEND: @@ -74,7 +73,7 @@ def helper_test_op(shps, torch_fxn, tinygrad_fxn=None, atol=1e-6, rtol=1e-3, gra for i, (t, torch_grad) in enumerate(zip(tiny_grads, torch_grads)): compare(f"backward pass tensor {i}", t.numpy(), torch_grad.detach().cpu().numpy(), atol=grad_atol, rtol=grad_rtol) - if not CI: + if sys.stdout.isatty(): print("\ntesting %40r torch/tinygrad fp: %.2f / %.2f ms bp: %.2f / %.2f ms " % \ (shps, torch_fp*1000, tinygrad_fp*1000, torch_fbp*1000, tinygrad_fbp*1000), end="") @@ -103,7 +102,7 @@ class TestOps(unittest.TestCase): with self.assertRaises(expected) as tinygrad_cm: tinygrad_fxn(*tst) if exact: self.assertEqual(str(torch_cm.exception), str(tinygrad_cm.exception)) - if not CI: print("\ntesting %40r torch/tinygrad exception: %s / %s" % (shps, torch_cm.exception, tinygrad_cm.exception), end="") + if sys.stdout.isatty(): print("\ntesting %40r torch/tinygrad exception: %s / %s" % (shps, torch_cm.exception, tinygrad_cm.exception), end="") def test_full_like(self): a = Tensor([[1,2,3],[4,5,6]], dtype=dtypes.float32) diff --git a/test/backend/test_profiler.py b/test/backend/test_profiler.py index 5016bdd5e7..2091f85ac4 100644 --- a/test/backend/test_profiler.py +++ b/test/backend/test_profiler.py @@ -5,7 +5,6 @@ from tinygrad.device import Buffer, BufferSpec, Compiled, ProfileDeviceEvent, Pr from tinygrad.runtime.support.hcq import HCQCompiled from tinygrad.engine.realize import get_runtime from tinygrad.codegen import to_program -from test.helpers import CI MOCKGPU = DEV.interface.startswith("MOCK") def _dev_base(d): @@ -145,7 +144,8 @@ class TestProfiler(unittest.TestCase): assert len(graph_evs) == 2, "2 graph events are expected" assert len(graph_evs[0].ents) == 2, "two entities are expected" - @unittest.skipIf(CI or not issubclass(type(Device[Device.DEFAULT]), HCQCompiled), "skip CI") + @unittest.skipIf(MOCKGPU, "skip MOCKGPU") + @unittest.skipUnless(issubclass(type(Device[Device.DEFAULT]), HCQCompiled), "must be HCQ") def test_dev_jitter_matrix(self): dev_cnt = 6 try: devs = [Device[f"{Device.DEFAULT}:{i}"] for i in range(dev_cnt)] diff --git a/test/backend/test_randomness.py b/test/backend/test_randomness.py index a4aad9a826..834eb9ddca 100644 --- a/test/backend/test_randomness.py +++ b/test/backend/test_randomness.py @@ -1,14 +1,14 @@ import unittest, math from tinygrad import dtypes, Tensor, Device -from tinygrad.helpers import getenv +from tinygrad.helpers import getenv, DEV from tinygrad.codegen import to_program from tinygrad.uop.ops import Ops from tinygrad.renderer.ptx import PTXRenderer from tinygrad.renderer.nir import NIRRenderer from tinygrad.renderer.isa.x86 import X86Renderer -from test.helpers import not_support_multi_device, needs_second_gpu, CI +from test.helpers import not_support_multi_device, needs_second_gpu from test.unit.test_randomness import equal_distribution, normal_test import numpy as np @@ -48,7 +48,7 @@ class TestRandomness(unittest.TestCase): assert nx[nx == 0].size > 0 equal_distribution(lambda *x: Tensor.rand(*x, dtype=dtypes.float16), torch.rand, lambda x: np.random.rand(*x), shape=(2, N, N)) - @unittest.skipIf(CI and Device.DEFAULT in {"NV", "CUDA"}, "gpuocelot doesn't support certain ops needed for threefry") + @unittest.skipIf(DEV.interface.startswith("MOCK") and Device.DEFAULT in {"NV", "CUDA"}, "gpuocelot doesn't support certain ops needed for threefry") def test_threefry_against_reference(self): Tensor.manual_seed(1337) diff --git a/test/backend/test_schedule.py b/test/backend/test_schedule.py index 39bdb21fd3..a754cbdd07 100644 --- a/test/backend/test_schedule.py +++ b/test/backend/test_schedule.py @@ -10,9 +10,8 @@ from hypothesis import assume, given, strategies as strat from tinygrad import nn, dtypes, Device, Tensor, Variable from tinygrad.dtype import DType from tinygrad.uop.ops import UOp, Ops, UPat -from tinygrad.helpers import DEBUG, OSX, GlobalCounters, Context, getenv, all_same, temp +from tinygrad.helpers import DEBUG, DEV, OSX, GlobalCounters, Context, getenv, all_same, temp from tinygrad.engine.realize import compile_linear, run_linear -from test.helpers import CI supported_dtypes = Device[Device.DEFAULT].renderer.supported_dtypes() @@ -115,7 +114,6 @@ class TestSchedule(unittest.TestCase): run_linear(*check_schedule(b, 1)) np.testing.assert_allclose(b.numpy(), np.broadcast_to(a.numpy().astype(np.float16), (2, 4, 4))+2, rtol=1e-3) - @unittest.skipIf(CI and Device.DEFAULT == "NV", "crashes on NV CI") def test_add_chain_buffers(self): N = 31 with Context(TRACK_MATCH_STATS=0, DEBUG=0): @@ -1114,7 +1112,7 @@ class TestSchedule(unittest.TestCase): self.assertListEqual(a.tolist(), [[1.]*shape[1]]*shape[0]) class TestLimitBufs(unittest.TestCase): - @unittest.skipIf(CI and Device.DEFAULT == "NV", "crashes on NV CI") + @unittest.skipIf(DEV.interface.startswith("MOCK") and Device.DEFAULT == "NV", "crashes in ocelot") def test_limit_bufs_with_var(self): N = 31 with Context(TRACK_MATCH_STATS=0, DEBUG=0): diff --git a/test/backend/test_transcendental.py b/test/backend/test_transcendental.py index 0106ecef19..8602e9f73a 100644 --- a/test/backend/test_transcendental.py +++ b/test/backend/test_transcendental.py @@ -2,7 +2,6 @@ import unittest from tinygrad import Tensor, Device, dtypes from tinygrad.tensor import _to_np_dtype from tinygrad.helpers import Context, getenv, DEV, OSX -from test.helpers import CI from test.backend.test_schedule import check_schedule from test.backend.test_dtype_alu import ht, dtypes_float import numpy as np @@ -32,7 +31,7 @@ class TestTranscendentalMath(unittest.TestCase): ([(Tensor.sin, np.sin)] if dtypes.ulong in supported_dtypes else []))) def test_float32(self, x, op): # wrong nan behavior on Vulkan - if (math.isnan(x) or (x < 0 and op[0] == Tensor.log)) and CI and Device.DEFAULT == "WEBGPU" and not OSX: return + if (math.isnan(x) or (x < 0 and op[0] == Tensor.log)) and Device.DEFAULT == "WEBGPU" and not OSX: return with Context(TRANSCENDENTAL=2), np.errstate(all='ignore'): np.testing.assert_allclose(op[0](Tensor([x], dtype=dtypes.float32)).numpy(), op[1](np.array([x], dtype=_to_np_dtype(dtypes.float32))), @@ -43,7 +42,7 @@ class TestTranscendentalMath(unittest.TestCase): ([(Tensor.sin, np.sin)] if dtypes.ulong in supported_dtypes else []))) def test_float16(self, x, op): # wrong nan behavior on Vulkan - if (math.isnan(x) or (x < 0 and op[0] == Tensor.log)) and CI and Device.DEFAULT == "WEBGPU" and not OSX: return + if (math.isnan(x) or (x < 0 and op[0] == Tensor.log)) and Device.DEFAULT == "WEBGPU" and not OSX: return with Context(TRANSCENDENTAL=2), np.errstate(all='ignore'): np.testing.assert_allclose(op[0](Tensor([x], dtype=dtypes.float16)).numpy(), op[1](np.array([x], dtype=_to_np_dtype(dtypes.float16))), @@ -117,7 +116,7 @@ class TestFloat16Log2(unittest.TestCase): np.testing.assert_allclose(result, expected, rtol=1e-3, err_msg=f"log2({val})") @unittest.skipUnless(dtypes.float16 in supported_dtypes, f"no float16 on {Device.DEFAULT}") - @unittest.skipIf(Device.DEFAULT == "WEBGPU" and CI, "Nan handling differs on Vulkan") + @unittest.skipIf(Device.DEFAULT == "WEBGPU" and not OSX, "Nan handling differs on Vulkan") def test_float16_log2_special(self): # special values: inf, -inf, nan, 0, negative with Context(TRANSCENDENTAL=2), np.errstate(all='ignore'): diff --git a/test/backend/test_uops.py b/test/backend/test_uops.py index 27ab4c3a8e..cda690e26a 100644 --- a/test/backend/test_uops.py +++ b/test/backend/test_uops.py @@ -11,7 +11,7 @@ from tinygrad.engine.realize import run_linear from tinygrad.codegen import to_program from tinygrad.codegen.opt import Opt, OptOps from tinygrad.renderer.ptx import PTXRenderer -from test.helpers import to_uops_list, CI +from test.helpers import to_uops_list def run_uops(uops_list:list[UOp], bufs:list[Buffer]): buf_uops = [UOp.new_buffer(b.device, b.size, b.dtype) for b in bufs] @@ -173,8 +173,6 @@ class TestBoolUOps(TestUOps): def test_where_bool(self): self._test_top_bool_fxn(Ops.WHERE, lambda a,b,c: b if a else c) class TestLocalAccess(unittest.TestCase): - # NOTE: this is failing on METAL CI, no idea why. Works locally. - @unittest.skipIf(Device.DEFAULT == "METAL" and CI, "failing only in CI") @unittest.skipUnless(Device[Device.DEFAULT].renderer.has_shared, "test requires shared memory") def test_local_basic(self): uops = [] diff --git a/test/external/external_test_example.py b/test/external/external_test_example.py index de51efb1be..37554fc22c 100644 --- a/test/external/external_test_example.py +++ b/test/external/external_test_example.py @@ -1,8 +1,7 @@ -import unittest +import unittest, sys from tinygrad import Device from tinygrad.tensor import Tensor from tinygrad.helpers import getenv, OSX -from test.helpers import CI def multidevice_test(fxn): exclude_devices = getenv("EXCLUDE_DEVICES", "").split(",") @@ -10,15 +9,15 @@ def multidevice_test(fxn): for device in Device._devices: # broken on OSX USB AMD, why? if device in ["DISK", "NPY", "FAKE", "DSP", "NULL"] or (OSX and device in ["AMD"]): continue - if not CI: print(device) + if sys.stdout.isatty(): print(device) if device in exclude_devices: - if not CI: print(f"WARNING: {device} test is excluded") + if sys.stdout.isatty(): print(f"WARNING: {device} test is excluded") continue with self.subTest(device=device): try: Device[device] except Exception: - if not CI: print(f"WARNING: {device} test isn't running") + if sys.stdout.isatty(): print(f"WARNING: {device} test isn't running") continue fxn(self, device) return ret diff --git a/test/external/external_test_hcq.py b/test/external/external_test_hcq.py index 044928fe33..cb21563ae1 100644 --- a/test/external/external_test_hcq.py +++ b/test/external/external_test_hcq.py @@ -1,10 +1,9 @@ import unittest, ctypes, struct, time, array from tinygrad import Device, Tensor, dtypes -from tinygrad.helpers import to_mv +from tinygrad.helpers import to_mv, DEV from tinygrad.device import Buffer, BufferSpec from tinygrad.engine.realize import get_runtime from tinygrad.codegen import to_program -from test.helpers import CI def _time_queue(q, d): st = time.perf_counter() @@ -149,7 +148,7 @@ class TestHCQ(unittest.TestCase): val = TestHCQ.b.uop.buffer.as_memoryview().cast("f")[1] assert val == 0.0, f"got val {val}, should not be updated" - @unittest.skipIf(CI, "Can't handle async update on CPU") + @unittest.skipIf(DEV.interface.startswith("MOCK"), "Can't handle async update on CPU") def test_wait_signal(self): temp_signal = TestHCQ.d0._alloc_signal(value=0) TestHCQ.compute_queue().wait(temp_signal, value=1).signal(TestHCQ.d0.timeline_signal, TestHCQ.d0.timeline_value).submit(TestHCQ.d0) @@ -160,7 +159,7 @@ class TestHCQ(unittest.TestCase): TestHCQ.d0._wait_signal(TestHCQ.d0.timeline_signal, TestHCQ.d0.timeline_value, timeout=100) TestHCQ.d0.timeline_value += 1 - @unittest.skipIf(CI, "Can't handle async update on CPU") + @unittest.skipIf(DEV.interface.startswith("MOCK"), "Can't handle async update on CPU") def test_wait_copy_signal(self): temp_signal = TestHCQ.d0._alloc_signal(value=0) TestHCQ.copy_queue().wait(temp_signal, value=1).signal(TestHCQ.d0.timeline_signal, TestHCQ.d0.timeline_value).submit(TestHCQ.d0) diff --git a/test/external/external_test_jit_on_models.py b/test/external/external_test_jit_on_models.py index 9b1ef6217e..e2d14d9768 100644 --- a/test/external/external_test_jit_on_models.py +++ b/test/external/external_test_jit_on_models.py @@ -3,7 +3,7 @@ import unittest import numpy as np from tinygrad import Tensor, dtypes from tinygrad.engine.jit import TinyJit -from test.helpers import derandomize_model, CI +from test.helpers import derandomize_model from examples.llama import Transformer @@ -27,7 +27,6 @@ class TestJittedModels(unittest.TestCase): helper_test_jitted_correctness(lambda: (Tensor([[1,]]),), test, test_jit) dtypes.default_float = old_float - @unittest.skipUnless(not CI, "huge for CI") def test_jitted_stable_diffusion(self): from examples.stable_diffusion import UNetModel, unet_params model = UNetModel(**unet_params) diff --git a/test/helpers.py b/test/helpers.py index fa26cac0e6..3ce7c1c8a1 100644 --- a/test/helpers.py +++ b/test/helpers.py @@ -8,14 +8,11 @@ from tinygrad.tensor import _to_np_dtype from tinygrad.codegen import to_program from tinygrad.dtype import DType from tinygrad.nn.state import get_parameters -from tinygrad.helpers import T, Target +from tinygrad.helpers import T, Target, DEV from tinygrad.renderer import Renderer from tinygrad.codegen import full_rewrite_to_sink, line_rewrite, pm_linearize_cleanups from tinygrad.codegen.late.linearizer import linearize -# TODO: remove this everywhere! -CI = os.getenv("CI", "") != "" - # decorator to skip slow tests by default, run with RUN_SLOW=1 to include them slow = unittest.skipUnless(os.getenv("RUN_SLOW"), "slow test, set RUN_SLOW=1 to run") from tinygrad.runtime.ops_python import PythonProgram, PythonRenderer, PythonCompiler @@ -100,7 +97,7 @@ def to_uops_list(u:list[UOp], ren=None) -> list[UOp]: def not_support_multi_device(): # CL and CUDA don't support multi device if in CI - return CI and Device.DEFAULT in ("CL", "CUDA") + return (Device.DEFAULT == "CL" and Device[Device.DEFAULT].count() < 2) or (Device.DEFAULT == "CUDA" and DEV.interface.startswith("MOCK")) def needs_second_gpu(fn): @functools.wraps(fn) diff --git a/test/models/test_end2end.py b/test/models/test_end2end.py index b7742dde38..452e0d0704 100644 --- a/test/models/test_end2end.py +++ b/test/models/test_end2end.py @@ -1,19 +1,18 @@ import torch from torch import nn -import unittest +import unittest, sys import numpy as np from tinygrad.nn.state import get_parameters, get_state_dict from tinygrad.nn import optim, Linear, Conv2d, BatchNorm2d from tinygrad.tensor import Tensor from extra.datasets import fetch_mnist -from test.helpers import CI def compare_tiny_torch(model, model_torch, X, Y): with Tensor.train(): model_torch.train() model_state_dict = get_state_dict(model) for k,v in model_torch.named_parameters(): - if not CI: print(f"initting {k} from torch") + if sys.stdout.isatty(): print(f"initting {k} from torch") model_state_dict[k].assign(Tensor(v.detach().numpy())).realize() optimizer = optim.SGD(get_parameters(model), lr=0.001) @@ -35,14 +34,14 @@ def compare_tiny_torch(model, model_torch, X, Y): loss_torch.backward() # assert losses match - if not CI: print(loss.realize().numpy()) - if not CI: print(loss_torch.detach().numpy()) + if sys.stdout.isatty(): print(loss.realize().numpy()) + if sys.stdout.isatty(): print(loss_torch.detach().numpy()) np.testing.assert_allclose(loss.realize().numpy(), loss_torch.detach().numpy(), atol=1e-4) for k,v in list(model_torch.named_parameters())[::-1]: g = model_state_dict[k].grad.numpy() gt = v.grad.detach().numpy() - if not CI: print("testing grads", k, model_state_dict[k].grad.dtype) + if sys.stdout.isatty(): print("testing grads", k, model_state_dict[k].grad.dtype) np.testing.assert_allclose(g, gt, atol=1e-3, err_msg=f'grad mismatch {k}') # take the steps @@ -51,7 +50,7 @@ def compare_tiny_torch(model, model_torch, X, Y): # assert weights match for k,v in model_torch.named_parameters(): - if not CI: print("testing weight", k, model_state_dict[k].dtype) + if sys.stdout.isatty(): print("testing weight", k, model_state_dict[k].dtype) np.testing.assert_allclose(model_state_dict[k].numpy(), v.detach().numpy(), atol=1e-3, err_msg=f'weight mismatch {k}') def get_mnist_data(): diff --git a/test/null/test_device.py b/test/null/test_device.py index e08b37866e..b31be94173 100644 --- a/test/null/test_device.py +++ b/test/null/test_device.py @@ -5,7 +5,6 @@ from tinygrad import Tensor from tinygrad.device import Device, Compiler, enumerate_devices_str from tinygrad.helpers import diskcache_get, diskcache_put, getenv, Context, Target, WIN, OSX, DEV from tinygrad.runtime.support.c import DLL -from test.helpers import CI class TestDevice(unittest.TestCase): def test_canonicalize(self): @@ -67,7 +66,7 @@ class TestDevice(unittest.TestCase): self.assertNotEqual(result.returncode, 0) self.assertIn(b"deprecated", result.stderr) - @unittest.skipIf(WIN and CI, "skipping windows test") # TODO: subprocess causes memory violation? + @unittest.skipIf(WIN, "skipping windows test") # TODO: subprocess causes memory violation? def test_env_overwrite_default_compiler(self): if Device.DEFAULT == "CPU": from tinygrad.runtime.support.compiler_cpu import CPULLVMCompiler, ClangJITCompiler @@ -95,7 +94,7 @@ class TestDevice(unittest.TestCase): shell=True, check=True, env={**os.environ, "DEV": "AMD:HIP"}) else: self.skipTest("only run on CPU/AMD") - @unittest.skipIf(WIN and CI, "skipping windows test") + @unittest.skipIf(WIN, "skipping windows test") def test_env_online(self): from tinygrad.runtime.support.compiler_cpu import CPULLVMCompiler, ClangJITCompiler try: _, _ = CPULLVMCompiler(), ClangJITCompiler() diff --git a/test/null/test_winograd.py b/test/null/test_winograd.py index 98d137f733..e9ac04b8be 100644 --- a/test/null/test_winograd.py +++ b/test/null/test_winograd.py @@ -1,7 +1,6 @@ import unittest, sys from tinygrad import Tensor, GlobalCounters, dtypes, Context -from tinygrad.helpers import Profiling, WINO -from test.helpers import CI +from tinygrad.helpers import WINO @unittest.skipIf(sys.platform.startswith("win"), "flaky on Windows") class TestWinograd(unittest.TestCase): @@ -11,11 +10,6 @@ class TestWinograd(unittest.TestCase): def tearDown(self): WINO.value = self.old - def test_profile(self): - x,w = Tensor.rand(1,4,9,9).realize(), Tensor.rand(4,4,3,3).realize() - with Profiling(enabled=not CI, sort='time'): - Tensor.conv2d(x,w).realize() - def test_forward_kernels(self): x,w = Tensor.rand(1,4,9,9).realize(), Tensor.rand(4,4,3,3).realize() out = Tensor.conv2d(x,w) diff --git a/test/speed/external_test_copy_speed.py b/test/speed/external_test_copy_speed.py index 6ab3b1df50..e34b0fc1fe 100644 --- a/test/speed/external_test_copy_speed.py +++ b/test/speed/external_test_copy_speed.py @@ -1,7 +1,6 @@ -import unittest, numpy as np +import unittest, numpy as np, os from tinygrad import Tensor, Device, TinyJit -from tinygrad.helpers import Timing, OSX, getenv -from test.helpers import CI +from tinygrad.helpers import Timing, getenv import multiprocessing.shared_memory as shared_memory N = getenv("NSZ", 256) @@ -12,7 +11,7 @@ class TestCopySpeed(unittest.TestCase): def testCopySHMtoDefault(self): s = shared_memory.SharedMemory(name="test_X", create=True, size=N*N*4) s.close() - if CI and not OSX: + if os.path.exists("/dev/shm"): t = Tensor.empty(N, N, device="disk:/dev/shm/test_X").realize() else: t = Tensor.empty(N, N, device="disk:shm:test_X").realize() @@ -77,11 +76,8 @@ class TestCopySpeed(unittest.TestCase): Device[Device.DEFAULT].synchronize() np.testing.assert_equal(t.numpy(), x.numpy()) - @unittest.skipIf(CI, "CI doesn't have 6 GPUs") - @unittest.skipIf(Device.DEFAULT != "CL", "only test this on CL") + @unittest.skipIf(Device.DEFAULT != "CL" or Device[Device.DEFAULT].count() != 6, "only test this on CL, with 6 gpus") def testCopyCPUto6GPUs(self): - from tinygrad.runtime.ops_cl import CLDevice - if len(CLDevice.device_ids) != 6: raise unittest.SkipTest("computer doesn't have 6 GPUs") t = Tensor.ones(N, N, device="CPU").contiguous().realize() print(f"buffer: {t.nbytes()*1e-9:.2f} GB") for _ in range(3): diff --git a/test/speed/external_test_specific_conv.py b/test/speed/external_test_specific_conv.py index 6533279cf5..190a1ccee9 100644 --- a/test/speed/external_test_specific_conv.py +++ b/test/speed/external_test_specific_conv.py @@ -1,9 +1,9 @@ import unittest from tinygrad import Tensor, Device, dtypes -from test.helpers import CI +from tinygrad.helpers import DEV # similar to test/external/external_test_gpu_ast.py, but universal -@unittest.skipIf(Device.DEFAULT in {"CUDA", "NV"} and CI, "slow on CUDA CI") +@unittest.skipIf(Device.DEFAULT in {"CUDA", "NV"} and DEV.interface.startswith("MOCK"), "slow on ocelot") class TestSpecific(unittest.TestCase): # from openpilot diff --git a/test/speed/external_test_speed_v_torch.py b/test/speed/external_test_speed_v_torch.py index 1c72ba8900..bd3520c056 100644 --- a/test/speed/external_test_speed_v_torch.py +++ b/test/speed/external_test_speed_v_torch.py @@ -9,11 +9,11 @@ import torch torch.set_num_threads(1) import time import numpy as np +import sys np.set_printoptions(linewidth=160) from tinygrad import Tensor, Device, GlobalCounters, TinyJit from tinygrad.nn import Conv2d from tinygrad.helpers import colorize_float, getenv, DEV -from test.helpers import CI IN_CHANS = [int(x) for x in getenv("IN_CHANS", "4,16,64").split(",")] @@ -96,7 +96,7 @@ def helper_test_generic(name, f1, f1_args, f2, f2_args): desc = "faster" if et_torch > et_tinygrad else "slower" flops = save_ops*1e-6 mem = save_mem*1e-6 - print(("\r" if not CI else "")+f"{name:42s} {et_torch:7.2f} ms ({flops/et_torch:9.2f} GFLOPS {mem/et_torch:7.2f} GB/s) in torch, {et_tinygrad:7.2f} ms ({flops/et_tinygrad:9.2f} GFLOPS {mem/et_tinygrad:7.2f} GB/s) in tinygrad, {colorize_float(et_tinygrad/et_torch)} {desc} {flops:10.2f} MOPS {mem:8.2f} MB") # noqa: E501 + print(("\r" if sys.stdout.isatty() else "")+f"{name:42s} {et_torch:7.2f} ms ({flops/et_torch:9.2f} GFLOPS {mem/et_torch:7.2f} GB/s) in torch, {et_tinygrad:7.2f} ms ({flops/et_tinygrad:9.2f} GFLOPS {mem/et_tinygrad:7.2f} GB/s) in tinygrad, {colorize_float(et_tinygrad/et_torch)} {desc} {flops:10.2f} MOPS {mem:8.2f} MB") # noqa: E501 atol, rtol = (1e-2, 1e-2) if torch_dt == torch.float16 else (1e-3, 1e-3) np.testing.assert_allclose(val_tinygrad, val_torch, atol=atol, rtol=rtol) diff --git a/test/test_tiny.py b/test/test_tiny.py index 71e2dfdb96..10ad7428df 100644 --- a/test/test_tiny.py +++ b/test/test_tiny.py @@ -2,7 +2,6 @@ import unittest, random from tinygrad import Tensor, Context, Variable, TinyJit, dtypes, Device, nn from tinygrad.helpers import getenv -from test.helpers import CI class TestTiny(unittest.TestCase): @@ -112,7 +111,7 @@ class TestTiny(unittest.TestCase): # *** a model *** # TODO: this is failing because of how swizzling rewrites the ShapeTracker of the final STORE - @unittest.skipIf(CI and Device.DEFAULT == "DSP", "failing because of make things that can't be images not images") + @unittest.skipIf(Device.DEFAULT == "DSP", "failing because of make things that can't be images not images") def test_mnist(self): layers = [ nn.Conv2d(1, 32, 5), Tensor.relu, @@ -131,7 +130,7 @@ class TestTiny(unittest.TestCase): self.assertEqual(len(probs[0]), 10) # TODO: this is failing because of how swizzling rewrites the ShapeTracker of the final STORE - @unittest.skipIf(CI and Device.DEFAULT == "DSP", "failing because of make things that can't be images not images") + @unittest.skipIf(Device.DEFAULT == "DSP", "failing because of make things that can't be images not images") def test_mnist_backward(self): # NOTE: we don't have the whole model here for speed layers = [ diff --git a/test/testextra/test_bench_log.py b/test/testextra/test_bench_log.py index fede0e028b..975bef2fe1 100644 --- a/test/testextra/test_bench_log.py +++ b/test/testextra/test_bench_log.py @@ -2,12 +2,12 @@ import unittest, time from unittest.case import skipIf from extra.bench_log import BenchEvent, InstantBenchEvent, WallTimeEvent, KernelTimeEvent, log_event_instant, _events, clear_events -from tinygrad.helpers import Context +from tinygrad.helpers import Context, DEV from tinygrad.tensor import Tensor from tinygrad.device import Device -from test.helpers import CI -_SKIP_KERNEL_TIMING = Device.DEFAULT == "WEBGPU" # WEBGPU kernel timing not supported +# WEBGPU kernel timing not supported, ocelot CUDA is inaccurate +_SKIP_KERNEL_TIMING = Device.DEFAULT == "WEBGPU" or (Device.DEFAULT == "CUDA" and DEV.interface.startswith("MOCK")) class TestBenchLog(unittest.TestCase): def setUp(self): @@ -38,7 +38,7 @@ class TestBenchLog(unittest.TestCase): self.assertGreater(_events[event]["wall"][0], 0) self.assertGreater(_events[event]["wall"][1], 0) - @skipIf(CI or _SKIP_KERNEL_TIMING, "ci timing is not accurate") + @skipIf(_SKIP_KERNEL_TIMING, "ci timing is not accurate") def test_log_single_kernel_time(self): wall_times = [] @@ -55,7 +55,7 @@ class TestBenchLog(unittest.TestCase): self.assertLess(_events[event]["kernel"][0], wall_times[0]) self.assertGreater(_events[event]["kernel"][0], 0) - @skipIf((CI and Device.DEFAULT == "CUDA") or _SKIP_KERNEL_TIMING, "ci cuda timing is not accurate") + @skipIf(_SKIP_KERNEL_TIMING, "ci cuda timing is not accurate") def test_interleaved_wall_kernel_time(self): wall_times = [] with Context(DEBUG=2): @@ -77,7 +77,7 @@ class TestBenchLog(unittest.TestCase): self.assertLess(_events[event]["kernel"][0], wall_times[0]) self.assertGreater(_events[event]["kernel"][0], 0) - @skipIf((CI and Device.DEFAULT == "CUDA") or _SKIP_KERNEL_TIMING, "ci cuda timing is not accurate") + @skipIf(_SKIP_KERNEL_TIMING, "ci cuda timing is not accurate") def test_stacked_wall_kernel_time(self): with Context(DEBUG=2): for event in BenchEvent: diff --git a/test/unit/test_assign.py b/test/unit/test_assign.py index 4aec79b4aa..5e5b1ff16a 100644 --- a/test/unit/test_assign.py +++ b/test/unit/test_assign.py @@ -4,7 +4,6 @@ import numpy as np from tinygrad import dtypes, Tensor, TinyJit, GlobalCounters, Variable from tinygrad.uop.ops import Ops, UOp from tinygrad.helpers import temp, DEV, Context -from test.helpers import CI N = 200 # has to be bigger than the cache to fail @@ -189,7 +188,7 @@ class TestAssign(unittest.TestCase): new = a + times_a np.testing.assert_allclose(new.numpy(), 8) - @unittest.skipIf(CI and DEV.renderer == "LVP", "flaky in CI") + @unittest.skipIf(DEV.renderer == "LVP", "flaky in CI") def test_double_assign(self): a = Tensor.ones(4).contiguous().realize() a += 1 diff --git a/test/unit/test_shm_tensor.py b/test/unit/test_shm_tensor.py index 69a6746e28..19b6c9ff9d 100644 --- a/test/unit/test_shm_tensor.py +++ b/test/unit/test_shm_tensor.py @@ -2,11 +2,10 @@ import unittest import multiprocessing.shared_memory as shared_memory from tinygrad.helpers import WIN from tinygrad import Tensor, Device -from test.helpers import CI import numpy as np class TestRawShmBuffer(unittest.TestCase): - @unittest.skipIf(WIN and CI, "only fails on CI windows instance") + @unittest.skipIf(WIN, "only fails on CI windows instance") def test_e2e(self): t = Tensor.randn(2, 2, 2).realize()