mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-06-13 00:15:35 +08:00
Profile GPU and CPU copying. (#182)
Moving memory is slow, and therefor monitoring the time spent converting and limiting the number of copy operations can improve performance.
This commit is contained in:
@@ -157,11 +157,12 @@ class Tensor:
|
||||
|
||||
def cpu(self):
|
||||
if self.gpu:
|
||||
ret = Tensor(np.empty(self.shape, dtype=np.float32), gpu=False)
|
||||
cl.enqueue_copy(cl_queue, ret.data, self.data.cl, is_blocking=True)
|
||||
if self.grad:
|
||||
ret.grad = self.grad.cpu()
|
||||
return ret
|
||||
with ProfileOp("toCPU", [self]):
|
||||
ret = Tensor(np.empty(self.shape, dtype=np.float32), gpu=False)
|
||||
cl.enqueue_copy(cl_queue, ret.data, self.data.cl, is_blocking=True)
|
||||
if self.grad:
|
||||
ret.grad = self.grad.cpu()
|
||||
return ret
|
||||
else:
|
||||
return self
|
||||
|
||||
@@ -173,11 +174,12 @@ class Tensor:
|
||||
if not GPU:
|
||||
raise Exception("No GPU Support, install pyopencl")
|
||||
if not self.gpu:
|
||||
require_init_gpu()
|
||||
ret = Tensor(GPUBuffer(self.shape, self.data))
|
||||
if self.grad:
|
||||
ret.grad = self.grad.cuda()
|
||||
return ret
|
||||
with ProfileOp("toGPU", [self]):
|
||||
require_init_gpu()
|
||||
ret = Tensor(GPUBuffer(self.shape, self.data))
|
||||
if self.grad:
|
||||
ret.grad = self.grad.cuda()
|
||||
return ret
|
||||
else:
|
||||
return self
|
||||
|
||||
|
||||
Reference in New Issue
Block a user