mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-06-08 05:54:59 +08:00
remove getenv(CI) (#16365)
gone everywhere except test_interop, because torch MPS does not work in actions
This commit is contained in:
committed by
GitHub
parent
695a0069ed
commit
8ddd1328df
@@ -1,7 +1,6 @@
|
||||
import unittest
|
||||
import numpy as np
|
||||
|
||||
from test.helpers import CI
|
||||
from tinygrad.helpers import BEAM, Timing, prod
|
||||
from tinygrad import Variable, Device, Tensor
|
||||
from tinygrad.nn import Conv2d
|
||||
@@ -65,7 +64,7 @@ class TestBeamSearch(unittest.TestCase):
|
||||
actual = a.numpy()
|
||||
np.testing.assert_allclose(actual, desired)
|
||||
|
||||
@unittest.skipIf(CI, "flaky. CL_OUT_OF_RESOURCES")
|
||||
@unittest.skip("flaky. CL_OUT_OF_RESOURCES")
|
||||
def test_conv_beam(self):
|
||||
c = Conv2d(3, 16, (3,3))
|
||||
x = rand(1,3,32,32)
|
||||
|
||||
@@ -2,13 +2,13 @@ import contextlib, unittest, math
|
||||
import numpy as np
|
||||
import torch
|
||||
from typing import Any, List
|
||||
from tinygrad.helpers import getenv, DEBUG, EMULATED_DTYPES
|
||||
from tinygrad.helpers import getenv, DEBUG, EMULATED_DTYPES, DEV
|
||||
from tinygrad.dtype import DType, DTYPES_DICT, least_upper_dtype, fp8_to_float, float_to_fp8, _to_np_dtype, _to_torch_dtype, truncate
|
||||
from tinygrad.renderer.ptx import PTXRenderer
|
||||
from tinygrad.renderer.nir import NIRRenderer
|
||||
from tinygrad import Context, Device, Tensor, dtypes
|
||||
from hypothesis import given, settings, strategies as strat
|
||||
from test.helpers import rand_for_dtype, CI
|
||||
from test.helpers import rand_for_dtype
|
||||
from test.unit.test_dtype_spec import _assert_eq, core_dtypes, dtype_ints, dtype_floats, FP8E4M3_MAX, FP8E5M2_MAX, FP8E4M3FNUZ_MAX, FP8E5M2FNUZ_MAX
|
||||
import pytest
|
||||
pytestmark = pytest.mark.filterwarnings("ignore")
|
||||
@@ -225,7 +225,7 @@ class TestFloatDType(TestDType):
|
||||
@unittest.skipUnless(dtypes.double in supported_dtypes, f"no double on {Device.DEFAULT}")
|
||||
class TestDoubleDType(TestDType):
|
||||
DTYPE = dtypes.double
|
||||
@unittest.skipIf((CI and Device.DEFAULT in {"CUDA", "NV"}) or \
|
||||
@unittest.skipIf((DEV.interface.startswith("MOCK") and Device.DEFAULT in {"CUDA", "NV"}) or \
|
||||
isinstance(Device[Device.DEFAULT].renderer, (PTXRenderer, NIRRenderer)), "conversion not supported on CI CUDA, PTX, and NIR") # TODO: why not?
|
||||
def test_float64_increased_precision(self):
|
||||
for func in [
|
||||
|
||||
@@ -7,7 +7,6 @@ from tinygrad.runtime.ops_python import from_storage_scalar
|
||||
from tinygrad.renderer.ptx import PTXRenderer
|
||||
from tinygrad.renderer.nir import NIRRenderer
|
||||
from tinygrad.uop import Ops
|
||||
from test.helpers import CI
|
||||
import numpy as np
|
||||
import pytest
|
||||
from hypothesis import assume, given, strategies as strat, settings
|
||||
@@ -331,12 +330,12 @@ class TestDTypeALU(unittest.TestCase):
|
||||
@given(ht.bool, ht.bool, strat.sampled_from(((operator.add, operator.add), (operator.mul, operator.mul))))
|
||||
def test_bool(self, a, b, op): universal_test(a, b, dtypes.bool, op)
|
||||
|
||||
@unittest.skipIf(not CI and Device.DEFAULT == "METAL", "broken on local M3")
|
||||
@given(ht.int32, ht.int32, ht.float32, strat.sampled_from(integer_binary_operations), strat.sampled_from(binary_operations))
|
||||
def test_int32_midcast_float(self, a, b, c, op1, op2): universal_test_midcast(a, b, c, op1, op2, dtypes.int32, dtypes.float32)
|
||||
|
||||
# Metal and CUDA and HIP and NIR behave differently than numpy in CI for overflows
|
||||
skip_overflow = (CI and Device.DEFAULT in {"AMD", "NV", "CUDA"}) or isinstance(Device[Device.DEFAULT].renderer, NIRRenderer)
|
||||
# Metal and (MOCK)CUDA and HIP and NIR behave differently than numpy for overflows
|
||||
skip_overflow = ((DEV.interface.startswith("MOCK") and Device.DEFAULT in {"AMD", "NV", "CUDA"})
|
||||
or isinstance(Device[Device.DEFAULT].renderer, NIRRenderer))
|
||||
@given(strat.floats(width=32, min_value=0, max_value=10.0) if skip_overflow else ht.float32,
|
||||
strat.floats(width=32, min_value=0, max_value=10.0) if skip_overflow else ht.float32,
|
||||
ht.int32, strat.sampled_from(binary_operations), strat.sampled_from(integer_binary_operations))
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
#!/usr/bin/env python
|
||||
import unittest
|
||||
import unittest, os
|
||||
import torch
|
||||
import numpy as np
|
||||
|
||||
@@ -7,7 +7,6 @@ from tinygrad.helpers import DEV
|
||||
from tinygrad.tensor import Tensor
|
||||
from tinygrad.device import Device
|
||||
from tinygrad.dtype import _from_torch_dtype, _to_torch_dtype
|
||||
from test.helpers import CI
|
||||
|
||||
MOCKGPU = DEV.interface.startswith("MOCK")
|
||||
|
||||
@@ -28,7 +27,7 @@ class TestInterop(unittest.TestCase):
|
||||
tg_out = tg_data[:, :, 0] * 0.2989 + tg_data[:, :, 1] * 0.5870 + tg_data[:, :, 2] * 0.1140
|
||||
tg_res = tg_out.numpy()
|
||||
|
||||
if self.torch_device == "mps" and CI:
|
||||
if self.torch_device == "mps" and os.getenv("CI", "") != "":
|
||||
# MPS backend out of memory: https://discuss.pytorch.org/t/mps-back-end-out-of-memory-on-github-action/189773
|
||||
# Calculate expected value on cpu.
|
||||
inp = inp.cpu()
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import time, math, unittest, functools, platform, warnings
|
||||
import time, math, unittest, functools, platform, warnings, sys
|
||||
import numpy as np
|
||||
from typing import List, Callable
|
||||
import torch
|
||||
@@ -7,7 +7,6 @@ from tinygrad import Tensor, Device, dtypes
|
||||
from tinygrad.tensor import _to_np_dtype
|
||||
from tinygrad.renderer.cstyle import QCOMCLRenderer
|
||||
from tinygrad.renderer.nir import NIRRenderer
|
||||
from test.helpers import CI
|
||||
|
||||
TINY_BACKEND = getenv("TINY_BACKEND")
|
||||
if TINY_BACKEND:
|
||||
@@ -74,7 +73,7 @@ def helper_test_op(shps, torch_fxn, tinygrad_fxn=None, atol=1e-6, rtol=1e-3, gra
|
||||
for i, (t, torch_grad) in enumerate(zip(tiny_grads, torch_grads)):
|
||||
compare(f"backward pass tensor {i}", t.numpy(), torch_grad.detach().cpu().numpy(), atol=grad_atol, rtol=grad_rtol)
|
||||
|
||||
if not CI:
|
||||
if sys.stdout.isatty():
|
||||
print("\ntesting %40r torch/tinygrad fp: %.2f / %.2f ms bp: %.2f / %.2f ms " % \
|
||||
(shps, torch_fp*1000, tinygrad_fp*1000, torch_fbp*1000, tinygrad_fbp*1000), end="")
|
||||
|
||||
@@ -103,7 +102,7 @@ class TestOps(unittest.TestCase):
|
||||
with self.assertRaises(expected) as tinygrad_cm:
|
||||
tinygrad_fxn(*tst)
|
||||
if exact: self.assertEqual(str(torch_cm.exception), str(tinygrad_cm.exception))
|
||||
if not CI: print("\ntesting %40r torch/tinygrad exception: %s / %s" % (shps, torch_cm.exception, tinygrad_cm.exception), end="")
|
||||
if sys.stdout.isatty(): print("\ntesting %40r torch/tinygrad exception: %s / %s" % (shps, torch_cm.exception, tinygrad_cm.exception), end="")
|
||||
|
||||
def test_full_like(self):
|
||||
a = Tensor([[1,2,3],[4,5,6]], dtype=dtypes.float32)
|
||||
|
||||
@@ -5,7 +5,6 @@ from tinygrad.device import Buffer, BufferSpec, Compiled, ProfileDeviceEvent, Pr
|
||||
from tinygrad.runtime.support.hcq import HCQCompiled
|
||||
from tinygrad.engine.realize import get_runtime
|
||||
from tinygrad.codegen import to_program
|
||||
from test.helpers import CI
|
||||
|
||||
MOCKGPU = DEV.interface.startswith("MOCK")
|
||||
def _dev_base(d):
|
||||
@@ -145,7 +144,8 @@ class TestProfiler(unittest.TestCase):
|
||||
assert len(graph_evs) == 2, "2 graph events are expected"
|
||||
assert len(graph_evs[0].ents) == 2, "two entities are expected"
|
||||
|
||||
@unittest.skipIf(CI or not issubclass(type(Device[Device.DEFAULT]), HCQCompiled), "skip CI")
|
||||
@unittest.skipIf(MOCKGPU, "skip MOCKGPU")
|
||||
@unittest.skipUnless(issubclass(type(Device[Device.DEFAULT]), HCQCompiled), "must be HCQ")
|
||||
def test_dev_jitter_matrix(self):
|
||||
dev_cnt = 6
|
||||
try: devs = [Device[f"{Device.DEFAULT}:{i}"] for i in range(dev_cnt)]
|
||||
|
||||
@@ -1,14 +1,14 @@
|
||||
import unittest, math
|
||||
|
||||
from tinygrad import dtypes, Tensor, Device
|
||||
from tinygrad.helpers import getenv
|
||||
from tinygrad.helpers import getenv, DEV
|
||||
from tinygrad.codegen import to_program
|
||||
|
||||
from tinygrad.uop.ops import Ops
|
||||
from tinygrad.renderer.ptx import PTXRenderer
|
||||
from tinygrad.renderer.nir import NIRRenderer
|
||||
from tinygrad.renderer.isa.x86 import X86Renderer
|
||||
from test.helpers import not_support_multi_device, needs_second_gpu, CI
|
||||
from test.helpers import not_support_multi_device, needs_second_gpu
|
||||
from test.unit.test_randomness import equal_distribution, normal_test
|
||||
|
||||
import numpy as np
|
||||
@@ -48,7 +48,7 @@ class TestRandomness(unittest.TestCase):
|
||||
assert nx[nx == 0].size > 0
|
||||
equal_distribution(lambda *x: Tensor.rand(*x, dtype=dtypes.float16), torch.rand, lambda x: np.random.rand(*x), shape=(2, N, N))
|
||||
|
||||
@unittest.skipIf(CI and Device.DEFAULT in {"NV", "CUDA"}, "gpuocelot doesn't support certain ops needed for threefry")
|
||||
@unittest.skipIf(DEV.interface.startswith("MOCK") and Device.DEFAULT in {"NV", "CUDA"}, "gpuocelot doesn't support certain ops needed for threefry")
|
||||
def test_threefry_against_reference(self):
|
||||
Tensor.manual_seed(1337)
|
||||
|
||||
|
||||
@@ -10,9 +10,8 @@ from hypothesis import assume, given, strategies as strat
|
||||
from tinygrad import nn, dtypes, Device, Tensor, Variable
|
||||
from tinygrad.dtype import DType
|
||||
from tinygrad.uop.ops import UOp, Ops, UPat
|
||||
from tinygrad.helpers import DEBUG, OSX, GlobalCounters, Context, getenv, all_same, temp
|
||||
from tinygrad.helpers import DEBUG, DEV, OSX, GlobalCounters, Context, getenv, all_same, temp
|
||||
from tinygrad.engine.realize import compile_linear, run_linear
|
||||
from test.helpers import CI
|
||||
|
||||
supported_dtypes = Device[Device.DEFAULT].renderer.supported_dtypes()
|
||||
|
||||
@@ -115,7 +114,6 @@ class TestSchedule(unittest.TestCase):
|
||||
run_linear(*check_schedule(b, 1))
|
||||
np.testing.assert_allclose(b.numpy(), np.broadcast_to(a.numpy().astype(np.float16), (2, 4, 4))+2, rtol=1e-3)
|
||||
|
||||
@unittest.skipIf(CI and Device.DEFAULT == "NV", "crashes on NV CI")
|
||||
def test_add_chain_buffers(self):
|
||||
N = 31
|
||||
with Context(TRACK_MATCH_STATS=0, DEBUG=0):
|
||||
@@ -1114,7 +1112,7 @@ class TestSchedule(unittest.TestCase):
|
||||
self.assertListEqual(a.tolist(), [[1.]*shape[1]]*shape[0])
|
||||
|
||||
class TestLimitBufs(unittest.TestCase):
|
||||
@unittest.skipIf(CI and Device.DEFAULT == "NV", "crashes on NV CI")
|
||||
@unittest.skipIf(DEV.interface.startswith("MOCK") and Device.DEFAULT == "NV", "crashes in ocelot")
|
||||
def test_limit_bufs_with_var(self):
|
||||
N = 31
|
||||
with Context(TRACK_MATCH_STATS=0, DEBUG=0):
|
||||
|
||||
@@ -2,7 +2,6 @@ import unittest
|
||||
from tinygrad import Tensor, Device, dtypes
|
||||
from tinygrad.tensor import _to_np_dtype
|
||||
from tinygrad.helpers import Context, getenv, DEV, OSX
|
||||
from test.helpers import CI
|
||||
from test.backend.test_schedule import check_schedule
|
||||
from test.backend.test_dtype_alu import ht, dtypes_float
|
||||
import numpy as np
|
||||
@@ -32,7 +31,7 @@ class TestTranscendentalMath(unittest.TestCase):
|
||||
([(Tensor.sin, np.sin)] if dtypes.ulong in supported_dtypes else [])))
|
||||
def test_float32(self, x, op):
|
||||
# wrong nan behavior on Vulkan
|
||||
if (math.isnan(x) or (x < 0 and op[0] == Tensor.log)) and CI and Device.DEFAULT == "WEBGPU" and not OSX: return
|
||||
if (math.isnan(x) or (x < 0 and op[0] == Tensor.log)) and Device.DEFAULT == "WEBGPU" and not OSX: return
|
||||
with Context(TRANSCENDENTAL=2), np.errstate(all='ignore'):
|
||||
np.testing.assert_allclose(op[0](Tensor([x], dtype=dtypes.float32)).numpy(),
|
||||
op[1](np.array([x], dtype=_to_np_dtype(dtypes.float32))),
|
||||
@@ -43,7 +42,7 @@ class TestTranscendentalMath(unittest.TestCase):
|
||||
([(Tensor.sin, np.sin)] if dtypes.ulong in supported_dtypes else [])))
|
||||
def test_float16(self, x, op):
|
||||
# wrong nan behavior on Vulkan
|
||||
if (math.isnan(x) or (x < 0 and op[0] == Tensor.log)) and CI and Device.DEFAULT == "WEBGPU" and not OSX: return
|
||||
if (math.isnan(x) or (x < 0 and op[0] == Tensor.log)) and Device.DEFAULT == "WEBGPU" and not OSX: return
|
||||
with Context(TRANSCENDENTAL=2), np.errstate(all='ignore'):
|
||||
np.testing.assert_allclose(op[0](Tensor([x], dtype=dtypes.float16)).numpy(),
|
||||
op[1](np.array([x], dtype=_to_np_dtype(dtypes.float16))),
|
||||
@@ -117,7 +116,7 @@ class TestFloat16Log2(unittest.TestCase):
|
||||
np.testing.assert_allclose(result, expected, rtol=1e-3, err_msg=f"log2({val})")
|
||||
|
||||
@unittest.skipUnless(dtypes.float16 in supported_dtypes, f"no float16 on {Device.DEFAULT}")
|
||||
@unittest.skipIf(Device.DEFAULT == "WEBGPU" and CI, "Nan handling differs on Vulkan")
|
||||
@unittest.skipIf(Device.DEFAULT == "WEBGPU" and not OSX, "Nan handling differs on Vulkan")
|
||||
def test_float16_log2_special(self):
|
||||
# special values: inf, -inf, nan, 0, negative
|
||||
with Context(TRANSCENDENTAL=2), np.errstate(all='ignore'):
|
||||
|
||||
@@ -11,7 +11,7 @@ from tinygrad.engine.realize import run_linear
|
||||
from tinygrad.codegen import to_program
|
||||
from tinygrad.codegen.opt import Opt, OptOps
|
||||
from tinygrad.renderer.ptx import PTXRenderer
|
||||
from test.helpers import to_uops_list, CI
|
||||
from test.helpers import to_uops_list
|
||||
|
||||
def run_uops(uops_list:list[UOp], bufs:list[Buffer]):
|
||||
buf_uops = [UOp.new_buffer(b.device, b.size, b.dtype) for b in bufs]
|
||||
@@ -173,8 +173,6 @@ class TestBoolUOps(TestUOps):
|
||||
def test_where_bool(self): self._test_top_bool_fxn(Ops.WHERE, lambda a,b,c: b if a else c)
|
||||
|
||||
class TestLocalAccess(unittest.TestCase):
|
||||
# NOTE: this is failing on METAL CI, no idea why. Works locally.
|
||||
@unittest.skipIf(Device.DEFAULT == "METAL" and CI, "failing only in CI")
|
||||
@unittest.skipUnless(Device[Device.DEFAULT].renderer.has_shared, "test requires shared memory")
|
||||
def test_local_basic(self):
|
||||
uops = []
|
||||
|
||||
9
test/external/external_test_example.py
vendored
9
test/external/external_test_example.py
vendored
@@ -1,8 +1,7 @@
|
||||
import unittest
|
||||
import unittest, sys
|
||||
from tinygrad import Device
|
||||
from tinygrad.tensor import Tensor
|
||||
from tinygrad.helpers import getenv, OSX
|
||||
from test.helpers import CI
|
||||
|
||||
def multidevice_test(fxn):
|
||||
exclude_devices = getenv("EXCLUDE_DEVICES", "").split(",")
|
||||
@@ -10,15 +9,15 @@ def multidevice_test(fxn):
|
||||
for device in Device._devices:
|
||||
# broken on OSX USB AMD, why?
|
||||
if device in ["DISK", "NPY", "FAKE", "DSP", "NULL"] or (OSX and device in ["AMD"]): continue
|
||||
if not CI: print(device)
|
||||
if sys.stdout.isatty(): print(device)
|
||||
if device in exclude_devices:
|
||||
if not CI: print(f"WARNING: {device} test is excluded")
|
||||
if sys.stdout.isatty(): print(f"WARNING: {device} test is excluded")
|
||||
continue
|
||||
with self.subTest(device=device):
|
||||
try:
|
||||
Device[device]
|
||||
except Exception:
|
||||
if not CI: print(f"WARNING: {device} test isn't running")
|
||||
if sys.stdout.isatty(): print(f"WARNING: {device} test isn't running")
|
||||
continue
|
||||
fxn(self, device)
|
||||
return ret
|
||||
|
||||
7
test/external/external_test_hcq.py
vendored
7
test/external/external_test_hcq.py
vendored
@@ -1,10 +1,9 @@
|
||||
import unittest, ctypes, struct, time, array
|
||||
from tinygrad import Device, Tensor, dtypes
|
||||
from tinygrad.helpers import to_mv
|
||||
from tinygrad.helpers import to_mv, DEV
|
||||
from tinygrad.device import Buffer, BufferSpec
|
||||
from tinygrad.engine.realize import get_runtime
|
||||
from tinygrad.codegen import to_program
|
||||
from test.helpers import CI
|
||||
|
||||
def _time_queue(q, d):
|
||||
st = time.perf_counter()
|
||||
@@ -149,7 +148,7 @@ class TestHCQ(unittest.TestCase):
|
||||
val = TestHCQ.b.uop.buffer.as_memoryview().cast("f")[1]
|
||||
assert val == 0.0, f"got val {val}, should not be updated"
|
||||
|
||||
@unittest.skipIf(CI, "Can't handle async update on CPU")
|
||||
@unittest.skipIf(DEV.interface.startswith("MOCK"), "Can't handle async update on CPU")
|
||||
def test_wait_signal(self):
|
||||
temp_signal = TestHCQ.d0._alloc_signal(value=0)
|
||||
TestHCQ.compute_queue().wait(temp_signal, value=1).signal(TestHCQ.d0.timeline_signal, TestHCQ.d0.timeline_value).submit(TestHCQ.d0)
|
||||
@@ -160,7 +159,7 @@ class TestHCQ(unittest.TestCase):
|
||||
TestHCQ.d0._wait_signal(TestHCQ.d0.timeline_signal, TestHCQ.d0.timeline_value, timeout=100)
|
||||
TestHCQ.d0.timeline_value += 1
|
||||
|
||||
@unittest.skipIf(CI, "Can't handle async update on CPU")
|
||||
@unittest.skipIf(DEV.interface.startswith("MOCK"), "Can't handle async update on CPU")
|
||||
def test_wait_copy_signal(self):
|
||||
temp_signal = TestHCQ.d0._alloc_signal(value=0)
|
||||
TestHCQ.copy_queue().wait(temp_signal, value=1).signal(TestHCQ.d0.timeline_signal, TestHCQ.d0.timeline_value).submit(TestHCQ.d0)
|
||||
|
||||
3
test/external/external_test_jit_on_models.py
vendored
3
test/external/external_test_jit_on_models.py
vendored
@@ -3,7 +3,7 @@ import unittest
|
||||
import numpy as np
|
||||
from tinygrad import Tensor, dtypes
|
||||
from tinygrad.engine.jit import TinyJit
|
||||
from test.helpers import derandomize_model, CI
|
||||
from test.helpers import derandomize_model
|
||||
|
||||
from examples.llama import Transformer
|
||||
|
||||
@@ -27,7 +27,6 @@ class TestJittedModels(unittest.TestCase):
|
||||
helper_test_jitted_correctness(lambda: (Tensor([[1,]]),), test, test_jit)
|
||||
dtypes.default_float = old_float
|
||||
|
||||
@unittest.skipUnless(not CI, "huge for CI")
|
||||
def test_jitted_stable_diffusion(self):
|
||||
from examples.stable_diffusion import UNetModel, unet_params
|
||||
model = UNetModel(**unet_params)
|
||||
|
||||
@@ -8,14 +8,11 @@ from tinygrad.tensor import _to_np_dtype
|
||||
from tinygrad.codegen import to_program
|
||||
from tinygrad.dtype import DType
|
||||
from tinygrad.nn.state import get_parameters
|
||||
from tinygrad.helpers import T, Target
|
||||
from tinygrad.helpers import T, Target, DEV
|
||||
from tinygrad.renderer import Renderer
|
||||
from tinygrad.codegen import full_rewrite_to_sink, line_rewrite, pm_linearize_cleanups
|
||||
from tinygrad.codegen.late.linearizer import linearize
|
||||
|
||||
# TODO: remove this everywhere!
|
||||
CI = os.getenv("CI", "") != ""
|
||||
|
||||
# decorator to skip slow tests by default, run with RUN_SLOW=1 to include them
|
||||
slow = unittest.skipUnless(os.getenv("RUN_SLOW"), "slow test, set RUN_SLOW=1 to run")
|
||||
from tinygrad.runtime.ops_python import PythonProgram, PythonRenderer, PythonCompiler
|
||||
@@ -100,7 +97,7 @@ def to_uops_list(u:list[UOp], ren=None) -> list[UOp]:
|
||||
|
||||
def not_support_multi_device():
|
||||
# CL and CUDA don't support multi device if in CI
|
||||
return CI and Device.DEFAULT in ("CL", "CUDA")
|
||||
return (Device.DEFAULT == "CL" and Device[Device.DEFAULT].count() < 2) or (Device.DEFAULT == "CUDA" and DEV.interface.startswith("MOCK"))
|
||||
|
||||
def needs_second_gpu(fn):
|
||||
@functools.wraps(fn)
|
||||
|
||||
@@ -1,19 +1,18 @@
|
||||
import torch
|
||||
from torch import nn
|
||||
import unittest
|
||||
import unittest, sys
|
||||
import numpy as np
|
||||
from tinygrad.nn.state import get_parameters, get_state_dict
|
||||
from tinygrad.nn import optim, Linear, Conv2d, BatchNorm2d
|
||||
from tinygrad.tensor import Tensor
|
||||
from extra.datasets import fetch_mnist
|
||||
from test.helpers import CI
|
||||
|
||||
def compare_tiny_torch(model, model_torch, X, Y):
|
||||
with Tensor.train():
|
||||
model_torch.train()
|
||||
model_state_dict = get_state_dict(model)
|
||||
for k,v in model_torch.named_parameters():
|
||||
if not CI: print(f"initting {k} from torch")
|
||||
if sys.stdout.isatty(): print(f"initting {k} from torch")
|
||||
model_state_dict[k].assign(Tensor(v.detach().numpy())).realize()
|
||||
|
||||
optimizer = optim.SGD(get_parameters(model), lr=0.001)
|
||||
@@ -35,14 +34,14 @@ def compare_tiny_torch(model, model_torch, X, Y):
|
||||
loss_torch.backward()
|
||||
|
||||
# assert losses match
|
||||
if not CI: print(loss.realize().numpy())
|
||||
if not CI: print(loss_torch.detach().numpy())
|
||||
if sys.stdout.isatty(): print(loss.realize().numpy())
|
||||
if sys.stdout.isatty(): print(loss_torch.detach().numpy())
|
||||
np.testing.assert_allclose(loss.realize().numpy(), loss_torch.detach().numpy(), atol=1e-4)
|
||||
|
||||
for k,v in list(model_torch.named_parameters())[::-1]:
|
||||
g = model_state_dict[k].grad.numpy()
|
||||
gt = v.grad.detach().numpy()
|
||||
if not CI: print("testing grads", k, model_state_dict[k].grad.dtype)
|
||||
if sys.stdout.isatty(): print("testing grads", k, model_state_dict[k].grad.dtype)
|
||||
np.testing.assert_allclose(g, gt, atol=1e-3, err_msg=f'grad mismatch {k}')
|
||||
|
||||
# take the steps
|
||||
@@ -51,7 +50,7 @@ def compare_tiny_torch(model, model_torch, X, Y):
|
||||
|
||||
# assert weights match
|
||||
for k,v in model_torch.named_parameters():
|
||||
if not CI: print("testing weight", k, model_state_dict[k].dtype)
|
||||
if sys.stdout.isatty(): print("testing weight", k, model_state_dict[k].dtype)
|
||||
np.testing.assert_allclose(model_state_dict[k].numpy(), v.detach().numpy(), atol=1e-3, err_msg=f'weight mismatch {k}')
|
||||
|
||||
def get_mnist_data():
|
||||
|
||||
@@ -5,7 +5,6 @@ from tinygrad import Tensor
|
||||
from tinygrad.device import Device, Compiler, enumerate_devices_str
|
||||
from tinygrad.helpers import diskcache_get, diskcache_put, getenv, Context, Target, WIN, OSX, DEV
|
||||
from tinygrad.runtime.support.c import DLL
|
||||
from test.helpers import CI
|
||||
|
||||
class TestDevice(unittest.TestCase):
|
||||
def test_canonicalize(self):
|
||||
@@ -67,7 +66,7 @@ class TestDevice(unittest.TestCase):
|
||||
self.assertNotEqual(result.returncode, 0)
|
||||
self.assertIn(b"deprecated", result.stderr)
|
||||
|
||||
@unittest.skipIf(WIN and CI, "skipping windows test") # TODO: subprocess causes memory violation?
|
||||
@unittest.skipIf(WIN, "skipping windows test") # TODO: subprocess causes memory violation?
|
||||
def test_env_overwrite_default_compiler(self):
|
||||
if Device.DEFAULT == "CPU":
|
||||
from tinygrad.runtime.support.compiler_cpu import CPULLVMCompiler, ClangJITCompiler
|
||||
@@ -95,7 +94,7 @@ class TestDevice(unittest.TestCase):
|
||||
shell=True, check=True, env={**os.environ, "DEV": "AMD:HIP"})
|
||||
else: self.skipTest("only run on CPU/AMD")
|
||||
|
||||
@unittest.skipIf(WIN and CI, "skipping windows test")
|
||||
@unittest.skipIf(WIN, "skipping windows test")
|
||||
def test_env_online(self):
|
||||
from tinygrad.runtime.support.compiler_cpu import CPULLVMCompiler, ClangJITCompiler
|
||||
try: _, _ = CPULLVMCompiler(), ClangJITCompiler()
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
import unittest, sys
|
||||
from tinygrad import Tensor, GlobalCounters, dtypes, Context
|
||||
from tinygrad.helpers import Profiling, WINO
|
||||
from test.helpers import CI
|
||||
from tinygrad.helpers import WINO
|
||||
|
||||
@unittest.skipIf(sys.platform.startswith("win"), "flaky on Windows")
|
||||
class TestWinograd(unittest.TestCase):
|
||||
@@ -11,11 +10,6 @@ class TestWinograd(unittest.TestCase):
|
||||
def tearDown(self):
|
||||
WINO.value = self.old
|
||||
|
||||
def test_profile(self):
|
||||
x,w = Tensor.rand(1,4,9,9).realize(), Tensor.rand(4,4,3,3).realize()
|
||||
with Profiling(enabled=not CI, sort='time'):
|
||||
Tensor.conv2d(x,w).realize()
|
||||
|
||||
def test_forward_kernels(self):
|
||||
x,w = Tensor.rand(1,4,9,9).realize(), Tensor.rand(4,4,3,3).realize()
|
||||
out = Tensor.conv2d(x,w)
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
import unittest, numpy as np
|
||||
import unittest, numpy as np, os
|
||||
from tinygrad import Tensor, Device, TinyJit
|
||||
from tinygrad.helpers import Timing, OSX, getenv
|
||||
from test.helpers import CI
|
||||
from tinygrad.helpers import Timing, getenv
|
||||
import multiprocessing.shared_memory as shared_memory
|
||||
|
||||
N = getenv("NSZ", 256)
|
||||
@@ -12,7 +11,7 @@ class TestCopySpeed(unittest.TestCase):
|
||||
def testCopySHMtoDefault(self):
|
||||
s = shared_memory.SharedMemory(name="test_X", create=True, size=N*N*4)
|
||||
s.close()
|
||||
if CI and not OSX:
|
||||
if os.path.exists("/dev/shm"):
|
||||
t = Tensor.empty(N, N, device="disk:/dev/shm/test_X").realize()
|
||||
else:
|
||||
t = Tensor.empty(N, N, device="disk:shm:test_X").realize()
|
||||
@@ -77,11 +76,8 @@ class TestCopySpeed(unittest.TestCase):
|
||||
Device[Device.DEFAULT].synchronize()
|
||||
np.testing.assert_equal(t.numpy(), x.numpy())
|
||||
|
||||
@unittest.skipIf(CI, "CI doesn't have 6 GPUs")
|
||||
@unittest.skipIf(Device.DEFAULT != "CL", "only test this on CL")
|
||||
@unittest.skipIf(Device.DEFAULT != "CL" or Device[Device.DEFAULT].count() != 6, "only test this on CL, with 6 gpus")
|
||||
def testCopyCPUto6GPUs(self):
|
||||
from tinygrad.runtime.ops_cl import CLDevice
|
||||
if len(CLDevice.device_ids) != 6: raise unittest.SkipTest("computer doesn't have 6 GPUs")
|
||||
t = Tensor.ones(N, N, device="CPU").contiguous().realize()
|
||||
print(f"buffer: {t.nbytes()*1e-9:.2f} GB")
|
||||
for _ in range(3):
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
import unittest
|
||||
from tinygrad import Tensor, Device, dtypes
|
||||
from test.helpers import CI
|
||||
from tinygrad.helpers import DEV
|
||||
# similar to test/external/external_test_gpu_ast.py, but universal
|
||||
|
||||
@unittest.skipIf(Device.DEFAULT in {"CUDA", "NV"} and CI, "slow on CUDA CI")
|
||||
@unittest.skipIf(Device.DEFAULT in {"CUDA", "NV"} and DEV.interface.startswith("MOCK"), "slow on ocelot")
|
||||
class TestSpecific(unittest.TestCase):
|
||||
# from openpilot
|
||||
|
||||
|
||||
@@ -9,11 +9,11 @@ import torch
|
||||
torch.set_num_threads(1)
|
||||
import time
|
||||
import numpy as np
|
||||
import sys
|
||||
np.set_printoptions(linewidth=160)
|
||||
from tinygrad import Tensor, Device, GlobalCounters, TinyJit
|
||||
from tinygrad.nn import Conv2d
|
||||
from tinygrad.helpers import colorize_float, getenv, DEV
|
||||
from test.helpers import CI
|
||||
|
||||
IN_CHANS = [int(x) for x in getenv("IN_CHANS", "4,16,64").split(",")]
|
||||
|
||||
@@ -96,7 +96,7 @@ def helper_test_generic(name, f1, f1_args, f2, f2_args):
|
||||
desc = "faster" if et_torch > et_tinygrad else "slower"
|
||||
flops = save_ops*1e-6
|
||||
mem = save_mem*1e-6
|
||||
print(("\r" if not CI else "")+f"{name:42s} {et_torch:7.2f} ms ({flops/et_torch:9.2f} GFLOPS {mem/et_torch:7.2f} GB/s) in torch, {et_tinygrad:7.2f} ms ({flops/et_tinygrad:9.2f} GFLOPS {mem/et_tinygrad:7.2f} GB/s) in tinygrad, {colorize_float(et_tinygrad/et_torch)} {desc} {flops:10.2f} MOPS {mem:8.2f} MB") # noqa: E501
|
||||
print(("\r" if sys.stdout.isatty() else "")+f"{name:42s} {et_torch:7.2f} ms ({flops/et_torch:9.2f} GFLOPS {mem/et_torch:7.2f} GB/s) in torch, {et_tinygrad:7.2f} ms ({flops/et_tinygrad:9.2f} GFLOPS {mem/et_tinygrad:7.2f} GB/s) in tinygrad, {colorize_float(et_tinygrad/et_torch)} {desc} {flops:10.2f} MOPS {mem:8.2f} MB") # noqa: E501
|
||||
atol, rtol = (1e-2, 1e-2) if torch_dt == torch.float16 else (1e-3, 1e-3)
|
||||
np.testing.assert_allclose(val_tinygrad, val_torch, atol=atol, rtol=rtol)
|
||||
|
||||
|
||||
@@ -2,7 +2,6 @@
|
||||
import unittest, random
|
||||
from tinygrad import Tensor, Context, Variable, TinyJit, dtypes, Device, nn
|
||||
from tinygrad.helpers import getenv
|
||||
from test.helpers import CI
|
||||
|
||||
class TestTiny(unittest.TestCase):
|
||||
|
||||
@@ -112,7 +111,7 @@ class TestTiny(unittest.TestCase):
|
||||
# *** a model ***
|
||||
|
||||
# TODO: this is failing because of how swizzling rewrites the ShapeTracker of the final STORE
|
||||
@unittest.skipIf(CI and Device.DEFAULT == "DSP", "failing because of make things that can't be images not images")
|
||||
@unittest.skipIf(Device.DEFAULT == "DSP", "failing because of make things that can't be images not images")
|
||||
def test_mnist(self):
|
||||
layers = [
|
||||
nn.Conv2d(1, 32, 5), Tensor.relu,
|
||||
@@ -131,7 +130,7 @@ class TestTiny(unittest.TestCase):
|
||||
self.assertEqual(len(probs[0]), 10)
|
||||
|
||||
# TODO: this is failing because of how swizzling rewrites the ShapeTracker of the final STORE
|
||||
@unittest.skipIf(CI and Device.DEFAULT == "DSP", "failing because of make things that can't be images not images")
|
||||
@unittest.skipIf(Device.DEFAULT == "DSP", "failing because of make things that can't be images not images")
|
||||
def test_mnist_backward(self):
|
||||
# NOTE: we don't have the whole model here for speed
|
||||
layers = [
|
||||
|
||||
@@ -2,12 +2,12 @@ import unittest, time
|
||||
from unittest.case import skipIf
|
||||
|
||||
from extra.bench_log import BenchEvent, InstantBenchEvent, WallTimeEvent, KernelTimeEvent, log_event_instant, _events, clear_events
|
||||
from tinygrad.helpers import Context
|
||||
from tinygrad.helpers import Context, DEV
|
||||
from tinygrad.tensor import Tensor
|
||||
from tinygrad.device import Device
|
||||
from test.helpers import CI
|
||||
|
||||
_SKIP_KERNEL_TIMING = Device.DEFAULT == "WEBGPU" # WEBGPU kernel timing not supported
|
||||
# WEBGPU kernel timing not supported, ocelot CUDA is inaccurate
|
||||
_SKIP_KERNEL_TIMING = Device.DEFAULT == "WEBGPU" or (Device.DEFAULT == "CUDA" and DEV.interface.startswith("MOCK"))
|
||||
|
||||
class TestBenchLog(unittest.TestCase):
|
||||
def setUp(self):
|
||||
@@ -38,7 +38,7 @@ class TestBenchLog(unittest.TestCase):
|
||||
self.assertGreater(_events[event]["wall"][0], 0)
|
||||
self.assertGreater(_events[event]["wall"][1], 0)
|
||||
|
||||
@skipIf(CI or _SKIP_KERNEL_TIMING, "ci timing is not accurate")
|
||||
@skipIf(_SKIP_KERNEL_TIMING, "ci timing is not accurate")
|
||||
def test_log_single_kernel_time(self):
|
||||
wall_times = []
|
||||
|
||||
@@ -55,7 +55,7 @@ class TestBenchLog(unittest.TestCase):
|
||||
self.assertLess(_events[event]["kernel"][0], wall_times[0])
|
||||
self.assertGreater(_events[event]["kernel"][0], 0)
|
||||
|
||||
@skipIf((CI and Device.DEFAULT == "CUDA") or _SKIP_KERNEL_TIMING, "ci cuda timing is not accurate")
|
||||
@skipIf(_SKIP_KERNEL_TIMING, "ci cuda timing is not accurate")
|
||||
def test_interleaved_wall_kernel_time(self):
|
||||
wall_times = []
|
||||
with Context(DEBUG=2):
|
||||
@@ -77,7 +77,7 @@ class TestBenchLog(unittest.TestCase):
|
||||
self.assertLess(_events[event]["kernel"][0], wall_times[0])
|
||||
self.assertGreater(_events[event]["kernel"][0], 0)
|
||||
|
||||
@skipIf((CI and Device.DEFAULT == "CUDA") or _SKIP_KERNEL_TIMING, "ci cuda timing is not accurate")
|
||||
@skipIf(_SKIP_KERNEL_TIMING, "ci cuda timing is not accurate")
|
||||
def test_stacked_wall_kernel_time(self):
|
||||
with Context(DEBUG=2):
|
||||
for event in BenchEvent:
|
||||
|
||||
@@ -4,7 +4,6 @@ import numpy as np
|
||||
from tinygrad import dtypes, Tensor, TinyJit, GlobalCounters, Variable
|
||||
from tinygrad.uop.ops import Ops, UOp
|
||||
from tinygrad.helpers import temp, DEV, Context
|
||||
from test.helpers import CI
|
||||
|
||||
N = 200 # has to be bigger than the cache to fail
|
||||
|
||||
@@ -189,7 +188,7 @@ class TestAssign(unittest.TestCase):
|
||||
new = a + times_a
|
||||
np.testing.assert_allclose(new.numpy(), 8)
|
||||
|
||||
@unittest.skipIf(CI and DEV.renderer == "LVP", "flaky in CI")
|
||||
@unittest.skipIf(DEV.renderer == "LVP", "flaky in CI")
|
||||
def test_double_assign(self):
|
||||
a = Tensor.ones(4).contiguous().realize()
|
||||
a += 1
|
||||
|
||||
@@ -2,11 +2,10 @@ import unittest
|
||||
import multiprocessing.shared_memory as shared_memory
|
||||
from tinygrad.helpers import WIN
|
||||
from tinygrad import Tensor, Device
|
||||
from test.helpers import CI
|
||||
import numpy as np
|
||||
|
||||
class TestRawShmBuffer(unittest.TestCase):
|
||||
@unittest.skipIf(WIN and CI, "only fails on CI windows instance")
|
||||
@unittest.skipIf(WIN, "only fails on CI windows instance")
|
||||
def test_e2e(self):
|
||||
t = Tensor.randn(2, 2, 2).realize()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user