mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-06-15 01:15:49 +08:00
* preallocate all realized buffers
* contiguous
* work
* comment that out
* move to schedule
* better
* correct fix
* just buffer
* disk bufs
* fixes disk tensor stuff
* fix symbolic stuff
* fix multi
* 162 failures
* bugfixes
* don't check that anymore
* fix schedule tests
* mnist should be contiguious
* type and buffer
* fix tests
* shrink axis correction
* mypy fixes
* tests skips
* same 37 failures
* dedup
* no shrink in the graph
* 29 failures
* skips
* fix custom kernel
* fix training
* those optimizations aren't supported currently
* simpler
* more correct
* tests
* 14 failures
* works
* fix that test
* broken
* 11 failures
* only kernel counts left
* fixes
* all tests pass
* remove tensor_map
* op test
* 200 -> 230
* test fixes
* fixes
* revert test_tiny thing
* guard
* revert that
* test tiny passes
* no contigs there
* base realize back
* Revert "no contigs there"
This reverts commit c45bb9fcfd.
* revert that
* chop many assigns
* 12 failures
* fix tests
* tests
* apply after
* pre-commit
* remove old code
* delete that
* fix types
* remove extra contig
* fix dataloader
* torch fix
* disk fix
* update kernel fusion numbres
* runs on amd
* restore kernel count
* add that rule back
* that
* disable that
* wrong
* add the correct rule for that folding
* more tests
* guard c1.arg
* no newlines
* realize those
* split into a different file
* remove detach/contig back
* skip 2
* update that
60 lines
2.2 KiB
Python
60 lines
2.2 KiB
Python
import unittest, sys
|
|
from tinygrad import Tensor, GlobalCounters, dtypes, Context
|
|
from tinygrad.helpers import CI, Profiling, WINO
|
|
|
|
@unittest.skipIf(sys.platform.startswith("win"), "flaky on Windows")
|
|
class TestWinograd(unittest.TestCase):
|
|
def setUp(self):
|
|
self.old = WINO.value
|
|
WINO.value = 1
|
|
def tearDown(self):
|
|
WINO.value = self.old
|
|
|
|
def test_profile(self):
|
|
x,w = Tensor.rand(1,4,9,9).realize(), Tensor.rand(4,4,3,3).realize()
|
|
with Profiling(enabled=not CI, sort='time'):
|
|
Tensor.conv2d(x,w).realize()
|
|
|
|
def test_forward_kernels(self):
|
|
x,w = Tensor.rand(1,4,9,9).realize(), Tensor.rand(4,4,3,3).realize()
|
|
out = Tensor.conv2d(x,w)
|
|
self.assertEqual(len(out.schedule()), 2)
|
|
|
|
def test_backward_kernels(self):
|
|
x,w = Tensor.empty(1,4,9,9,requires_grad=True).realize(), Tensor.empty(4,4,3,3,requires_grad=True).realize()
|
|
out = Tensor.conv2d(x,w, padding=1)
|
|
out.mean().backward()
|
|
backward_schedule = Tensor.schedule(x.grad, w.grad)
|
|
self.assertEqual(len(backward_schedule), 4)
|
|
|
|
def test_counters(self):
|
|
IC, OC, X, Y = 4,4,9,9
|
|
x,w = Tensor.rand(1,IC,Y,X).realize(), Tensor.rand(OC,IC,3,3).realize()
|
|
GlobalCounters.reset()
|
|
with Context(WINO=1):
|
|
Tensor.conv2d(x,w).realize()
|
|
ops_wino, mem_wino = GlobalCounters.global_ops, GlobalCounters.global_mem
|
|
GlobalCounters.reset()
|
|
with Context(WINO=0):
|
|
Tensor.conv2d(x,w).realize()
|
|
ops_normal, mem_normal = GlobalCounters.global_ops, GlobalCounters.global_mem
|
|
|
|
ops_ratio, mem_ratio = ops_wino/ops_normal, mem_wino/mem_normal
|
|
print(f"ops: normal {ops_normal:9d} wino {ops_wino:9d} ratio {ops_ratio:.2f}")
|
|
print(f"mem: normal {mem_normal:9d} wino {mem_wino:9d} ratio {mem_ratio:.2f}")
|
|
|
|
# TODO: what's optimal on this?
|
|
self.assertLess(ops_ratio, 4.3)
|
|
self.assertLess(mem_ratio, 4)
|
|
|
|
def test_dtype(self):
|
|
IC, OC, X, Y = 4,4,9,9
|
|
x,w = Tensor.empty(1,IC,Y,X), Tensor.empty(OC,IC,3,3)
|
|
self.assertEqual(Tensor.conv2d(x,w).dtype, dtypes.default_float)
|
|
|
|
x,w = Tensor.empty(1,IC,Y,X,dtype=dtypes.half), Tensor.empty(OC,IC,3,3,dtype=dtypes.half)
|
|
self.assertEqual(Tensor.conv2d(x,w).dtype, dtypes.half)
|
|
|
|
if __name__ == '__main__':
|
|
unittest.main(verbosity=2)
|