diff --git a/selfdrive/modeld/SConscript b/selfdrive/modeld/SConscript index 7a82ff88b8..bad1cdd500 100644 --- a/selfdrive/modeld/SConscript +++ b/selfdrive/modeld/SConscript @@ -18,7 +18,7 @@ def estimate_pickle_max_size(onnx_size): tg_flags = { 'larch64': 'DEV=QCOM FLOAT16=1 NOLOCALS=1 JIT_BATCH_SIZE=0', 'Darwin': f'DEV=CPU THREADS=0 HOME={os.path.expanduser("~")}', # tinygrad calls brew which needs a $HOME in the env -}.get(arch, 'DEV=CPU CPU_LLVM=1 THREADS=0') +}.get(arch, 'DEV=CPU:LLVM THREADS=0') # Get model metadata for model_name in ['driving_vision', 'driving_off_policy', 'driving_on_policy', 'dmonitoring_model']: diff --git a/selfdrive/modeld/compile_warp.py b/selfdrive/modeld/compile_warp.py index 75cc65f84c..47511f2a2b 100755 --- a/selfdrive/modeld/compile_warp.py +++ b/selfdrive/modeld/compile_warp.py @@ -94,11 +94,11 @@ def make_frame_prepare(cam_w, cam_h, model_w, model_h): def make_update_img_input(frame_prepare, model_w, model_h): - def update_img_input_tinygrad(tensor, frame, M_inv): + def update_img_input_tinygrad(frame_buffer, frame, M_inv): M_inv = M_inv.to(Device.DEFAULT) new_img = frame_prepare(frame, M_inv) - full_buffer = tensor[6:].cat(new_img, dim=0).contiguous() - return full_buffer, Tensor.cat(full_buffer[:6], full_buffer[-6:], dim=0).contiguous().reshape(1, 12, model_h//2, model_w//2) + frame_buffer.assign(frame_buffer[6:].cat(new_img, dim=0).contiguous()) + return Tensor.cat(frame_buffer[:6], frame_buffer[-6:], dim=0).contiguous().reshape(1, 12, model_h//2, model_w//2) return update_img_input_tinygrad @@ -107,9 +107,9 @@ def make_update_both_imgs(frame_prepare, model_w, model_h): def update_both_imgs_tinygrad(calib_img_buffer, new_img, M_inv, calib_big_img_buffer, new_big_img, M_inv_big): - calib_img_buffer, calib_img_pair = update_img(calib_img_buffer, new_img, M_inv) - calib_big_img_buffer, calib_big_img_pair = update_img(calib_big_img_buffer, new_big_img, M_inv_big) - return calib_img_buffer, calib_img_pair, calib_big_img_buffer, calib_big_img_pair + calib_img_pair = update_img(calib_img_buffer, new_img, M_inv) + calib_big_img_pair = update_img(calib_big_img_buffer, new_big_img, M_inv_big) + return calib_img_pair, calib_big_img_pair return update_both_imgs_tinygrad @@ -136,29 +136,18 @@ def compile_modeld_warp(cam_w, cam_h): full_buffer = Tensor.zeros(IMG_BUFFER_SHAPE, dtype='uint8').contiguous().realize() big_full_buffer = Tensor.zeros(IMG_BUFFER_SHAPE, dtype='uint8').contiguous().realize() - full_buffer_np = np.zeros(IMG_BUFFER_SHAPE, dtype=np.uint8) - big_full_buffer_np = np.zeros(IMG_BUFFER_SHAPE, dtype=np.uint8) - for i in range(10): - new_frame_np = (32 * np.random.randn(yuv_size).astype(np.float32) + 128).clip(0, 255).astype(np.uint8) img_inputs = [full_buffer, - Tensor.from_blob(new_frame_np.ctypes.data, (yuv_size,), dtype='uint8').realize(), + Tensor(np.random.randint(0, 256, yuv_size, dtype=np.uint8)).realize(), Tensor(Tensor.randn(3, 3).mul(8).realize().numpy(), device='NPY')] - new_big_frame_np = (32 * np.random.randn(yuv_size).astype(np.float32) + 128).clip(0, 255).astype(np.uint8) big_img_inputs = [big_full_buffer, - Tensor.from_blob(new_big_frame_np.ctypes.data, (yuv_size,), dtype='uint8').realize(), + Tensor(np.random.randint(0, 256, yuv_size, dtype=np.uint8)).realize(), Tensor(Tensor.randn(3, 3).mul(8).realize().numpy(), device='NPY')] inputs = img_inputs + big_img_inputs Device.default.synchronize() - inputs_np = [x.numpy() for x in inputs] - inputs_np[0] = full_buffer_np - inputs_np[3] = big_full_buffer_np - st = time.perf_counter() - out = update_img_jit(*inputs) - full_buffer = out[0].contiguous().realize().clone() - big_full_buffer = out[2].contiguous().realize().clone() + _ = update_img_jit(*inputs) mt = time.perf_counter() Device.default.synchronize() et = time.perf_counter() @@ -183,7 +172,7 @@ def compile_dm_warp(cam_w, cam_h): warp_dm_jit = TinyJit(warp_dm, prune=True) for i in range(10): - inputs = [Tensor.from_blob((32 * Tensor.randn(yuv_size,) + 128).cast(dtype='uint8').realize().numpy().ctypes.data, (yuv_size,), dtype='uint8'), + inputs = [Tensor(np.random.randint(0, 256, yuv_size, dtype=np.uint8)).realize(), Tensor(Tensor.randn(3, 3).mul(8).realize().numpy(), device='NPY')] Device.default.synchronize() st = time.perf_counter() diff --git a/selfdrive/modeld/modeld.py b/selfdrive/modeld/modeld.py index 6421ecfd21..07c3af4b7e 100755 --- a/selfdrive/modeld/modeld.py +++ b/selfdrive/modeld/modeld.py @@ -222,8 +222,7 @@ class ModelState: out = self.update_imgs(self.img_queues['img'], self.full_frames['img'], self.transforms['img'], self.img_queues['big_img'], self.full_frames['big_img'], self.transforms['big_img']) - self.img_queues['img'], self.img_queues['big_img'] = out[0].realize(), out[2].realize() - vision_inputs = {'img': out[1], 'big_img': out[3]} + vision_inputs = {'img': out[0], 'big_img': out[1]} if prepare_only: return None diff --git a/tinygrad_repo b/tinygrad_repo index 2f55005ad9..1aa04eab08 160000 --- a/tinygrad_repo +++ b/tinygrad_repo @@ -1 +1 @@ -Subproject commit 2f55005ad93c777cca69b20dddc28c7f02f0eb01 +Subproject commit 1aa04eab086d9c22855cfe50f746235200d28867