diff --git a/tinygrad/nn/optim.py b/tinygrad/nn/optim.py index 74f3d1297a..aa1d3f40a9 100644 --- a/tinygrad/nn/optim.py +++ b/tinygrad/nn/optim.py @@ -34,10 +34,9 @@ class SGD(Optimizer): def step(self) -> None: for i, t in enumerate(self.params): assert t.grad is not None - # this is needed since the grads can form a "diamond" + # contiguous is needed since the grads can allegedly form a "diamond" # TODO: fix this in lazy.py - t.grad.realize() - g = t.grad + self.wd * t.detach() + g = t.grad.contiguous() + self.wd * t.detach() if self.momentum: self.b[i].assign(self.momentum * self.b[i] + g) # NOTE: self.b[i] is zero on the first run, no if required g = (g + self.momentum * self.b[i]) if self.nesterov else self.b[i]