Profile GPU and CPU copying. (#182)

Moving memory is slow, and therefor monitoring the time spent converting
and limiting the number of copy operations can improve performance.
This commit is contained in:
Liam
2020-12-12 21:15:47 +01:00
committed by GitHub
parent 8e8cbc74b3
commit bf9ba8718a

View File

@@ -157,11 +157,12 @@ class Tensor:
def cpu(self):
if self.gpu:
ret = Tensor(np.empty(self.shape, dtype=np.float32), gpu=False)
cl.enqueue_copy(cl_queue, ret.data, self.data.cl, is_blocking=True)
if self.grad:
ret.grad = self.grad.cpu()
return ret
with ProfileOp("toCPU", [self]):
ret = Tensor(np.empty(self.shape, dtype=np.float32), gpu=False)
cl.enqueue_copy(cl_queue, ret.data, self.data.cl, is_blocking=True)
if self.grad:
ret.grad = self.grad.cpu()
return ret
else:
return self
@@ -173,11 +174,12 @@ class Tensor:
if not GPU:
raise Exception("No GPU Support, install pyopencl")
if not self.gpu:
require_init_gpu()
ret = Tensor(GPUBuffer(self.shape, self.data))
if self.grad:
ret.grad = self.grad.cuda()
return ret
with ProfileOp("toGPU", [self]):
require_init_gpu()
ret = Tensor(GPUBuffer(self.shape, self.data))
if self.grad:
ret.grad = self.grad.cuda()
return ret
else:
return self