diff --git a/selfdrive/modeld/SConscript b/selfdrive/modeld/SConscript
index 7a82ff88b8..bad1cdd500 100644
--- a/selfdrive/modeld/SConscript
+++ b/selfdrive/modeld/SConscript
@@ -18,7 +18,7 @@ def estimate_pickle_max_size(onnx_size):
 tg_flags = {
     'larch64': 'DEV=QCOM FLOAT16=1 NOLOCALS=1 JIT_BATCH_SIZE=0',
     'Darwin': f'DEV=CPU THREADS=0 HOME={os.path.expanduser("~")}', # tinygrad calls brew which needs a $HOME in the env
-}.get(arch, 'DEV=CPU CPU_LLVM=1 THREADS=0')
+}.get(arch, 'DEV=CPU:LLVM THREADS=0')
 
 # Get model metadata
 for model_name in ['driving_vision', 'driving_off_policy', 'driving_on_policy', 'dmonitoring_model']:
diff --git a/selfdrive/modeld/compile_warp.py b/selfdrive/modeld/compile_warp.py
index 75cc65f84c..47511f2a2b 100755
--- a/selfdrive/modeld/compile_warp.py
+++ b/selfdrive/modeld/compile_warp.py
@@ -94,11 +94,11 @@ def make_frame_prepare(cam_w, cam_h, model_w, model_h):
 
 
 def make_update_img_input(frame_prepare, model_w, model_h):
-  def update_img_input_tinygrad(tensor, frame, M_inv):
+  def update_img_input_tinygrad(frame_buffer, frame, M_inv):
     M_inv = M_inv.to(Device.DEFAULT)
     new_img = frame_prepare(frame, M_inv)
-    full_buffer = tensor[6:].cat(new_img, dim=0).contiguous()
-    return full_buffer, Tensor.cat(full_buffer[:6], full_buffer[-6:], dim=0).contiguous().reshape(1, 12, model_h//2, model_w//2)
+    frame_buffer.assign(frame_buffer[6:].cat(new_img, dim=0).contiguous())
+    return Tensor.cat(frame_buffer[:6], frame_buffer[-6:], dim=0).contiguous().reshape(1, 12, model_h//2, model_w//2)
   return update_img_input_tinygrad
 
 
@@ -107,9 +107,9 @@ def make_update_both_imgs(frame_prepare, model_w, model_h):
 
   def update_both_imgs_tinygrad(calib_img_buffer, new_img, M_inv,
                                 calib_big_img_buffer, new_big_img, M_inv_big):
-    calib_img_buffer, calib_img_pair = update_img(calib_img_buffer, new_img, M_inv)
-    calib_big_img_buffer, calib_big_img_pair = update_img(calib_big_img_buffer, new_big_img, M_inv_big)
-    return calib_img_buffer, calib_img_pair, calib_big_img_buffer, calib_big_img_pair
+    calib_img_pair = update_img(calib_img_buffer, new_img, M_inv)
+    calib_big_img_pair = update_img(calib_big_img_buffer, new_big_img, M_inv_big)
+    return calib_img_pair, calib_big_img_pair
   return update_both_imgs_tinygrad
 
 
@@ -136,29 +136,18 @@ def compile_modeld_warp(cam_w, cam_h):
 
   full_buffer = Tensor.zeros(IMG_BUFFER_SHAPE, dtype='uint8').contiguous().realize()
   big_full_buffer = Tensor.zeros(IMG_BUFFER_SHAPE, dtype='uint8').contiguous().realize()
-  full_buffer_np = np.zeros(IMG_BUFFER_SHAPE, dtype=np.uint8)
-  big_full_buffer_np = np.zeros(IMG_BUFFER_SHAPE, dtype=np.uint8)
-
   for i in range(10):
-    new_frame_np = (32 * np.random.randn(yuv_size).astype(np.float32) + 128).clip(0, 255).astype(np.uint8)
     img_inputs = [full_buffer,
-                  Tensor.from_blob(new_frame_np.ctypes.data, (yuv_size,), dtype='uint8').realize(),
+                  Tensor(np.random.randint(0, 256, yuv_size, dtype=np.uint8)).realize(),
                   Tensor(Tensor.randn(3, 3).mul(8).realize().numpy(), device='NPY')]
-    new_big_frame_np = (32 * np.random.randn(yuv_size).astype(np.float32) + 128).clip(0, 255).astype(np.uint8)
     big_img_inputs = [big_full_buffer,
-                      Tensor.from_blob(new_big_frame_np.ctypes.data, (yuv_size,), dtype='uint8').realize(),
+                      Tensor(np.random.randint(0, 256, yuv_size, dtype=np.uint8)).realize(),
                       Tensor(Tensor.randn(3, 3).mul(8).realize().numpy(), device='NPY')]
     inputs = img_inputs + big_img_inputs
     Device.default.synchronize()
 
-    inputs_np = [x.numpy() for x in inputs]
-    inputs_np[0] = full_buffer_np
-    inputs_np[3] = big_full_buffer_np
-
     st = time.perf_counter()
-    out = update_img_jit(*inputs)
-    full_buffer = out[0].contiguous().realize().clone()
-    big_full_buffer = out[2].contiguous().realize().clone()
+    _ = update_img_jit(*inputs)
     mt = time.perf_counter()
     Device.default.synchronize()
     et = time.perf_counter()
@@ -183,7 +172,7 @@ def compile_dm_warp(cam_w, cam_h):
   warp_dm_jit = TinyJit(warp_dm, prune=True)
 
   for i in range(10):
-    inputs = [Tensor.from_blob((32 * Tensor.randn(yuv_size,) + 128).cast(dtype='uint8').realize().numpy().ctypes.data, (yuv_size,), dtype='uint8'),
+    inputs = [Tensor(np.random.randint(0, 256, yuv_size, dtype=np.uint8)).realize(),
               Tensor(Tensor.randn(3, 3).mul(8).realize().numpy(), device='NPY')]
     Device.default.synchronize()
     st = time.perf_counter()
diff --git a/selfdrive/modeld/modeld.py b/selfdrive/modeld/modeld.py
index 6421ecfd21..07c3af4b7e 100755
--- a/selfdrive/modeld/modeld.py
+++ b/selfdrive/modeld/modeld.py
@@ -222,8 +222,7 @@ class ModelState:
 
     out = self.update_imgs(self.img_queues['img'], self.full_frames['img'], self.transforms['img'],
                            self.img_queues['big_img'], self.full_frames['big_img'], self.transforms['big_img'])
-    self.img_queues['img'], self.img_queues['big_img'] = out[0].realize(), out[2].realize()
-    vision_inputs = {'img': out[1], 'big_img': out[3]}
+    vision_inputs = {'img': out[0], 'big_img': out[1]}
 
     if prepare_only:
       return None
diff --git a/tinygrad_repo b/tinygrad_repo
index 2f55005ad9..1aa04eab08 160000
--- a/tinygrad_repo
+++ b/tinygrad_repo
@@ -1 +1 @@
-Subproject commit 2f55005ad93c777cca69b20dddc28c7f02f0eb01
+Subproject commit 1aa04eab086d9c22855cfe50f746235200d28867