bump tg (#37700)

* bump tg * bump tg * assign * bump * cpu llvm * frame buffer updated in place, no need to return * don't bake in stale pointers * fix update image output indices * lint * bump
2026-06-10 08:44:14 +08:00 · 2026-04-02 09:16:11 -07:00
parent cb32793300
commit 55c3885742
4 changed files with 13 additions and 25 deletions
--- a/selfdrive/modeld/SConscript
+++ b/selfdrive/modeld/SConscript
@@ -18,7 +18,7 @@ def estimate_pickle_max_size(onnx_size):
 tg_flags = {
    'larch64': 'DEV=QCOM FLOAT16=1 NOLOCALS=1 JIT_BATCH_SIZE=0',
    'Darwin': f'DEV=CPU THREADS=0 HOME={os.path.expanduser("~")}', # tinygrad calls brew which needs a $HOME in the env
-}.get(arch, 'DEV=CPU CPU_LLVM=1 THREADS=0')
+}.get(arch, 'DEV=CPU:LLVM THREADS=0')

 # Get model metadata
 for model_name in ['driving_vision', 'driving_off_policy', 'driving_on_policy', 'dmonitoring_model']:
--- a/selfdrive/modeld/compile_warp.py
+++ b/selfdrive/modeld/compile_warp.py
@@ -94,11 +94,11 @@ def make_frame_prepare(cam_w, cam_h, model_w, model_h):


 def make_update_img_input(frame_prepare, model_w, model_h):
-  def update_img_input_tinygrad(tensor, frame, M_inv):
+  def update_img_input_tinygrad(frame_buffer, frame, M_inv):
    M_inv = M_inv.to(Device.DEFAULT)
    new_img = frame_prepare(frame, M_inv)
-    full_buffer = tensor[6:].cat(new_img, dim=0).contiguous()
-    return full_buffer, Tensor.cat(full_buffer[:6], full_buffer[-6:], dim=0).contiguous().reshape(1, 12, model_h//2, model_w//2)
+    frame_buffer.assign(frame_buffer[6:].cat(new_img, dim=0).contiguous())
+    return Tensor.cat(frame_buffer[:6], frame_buffer[-6:], dim=0).contiguous().reshape(1, 12, model_h//2, model_w//2)
  return update_img_input_tinygrad


@@ -107,9 +107,9 @@ def make_update_both_imgs(frame_prepare, model_w, model_h):

  def update_both_imgs_tinygrad(calib_img_buffer, new_img, M_inv,
                                calib_big_img_buffer, new_big_img, M_inv_big):
-    calib_img_buffer, calib_img_pair = update_img(calib_img_buffer, new_img, M_inv)
-    calib_big_img_buffer, calib_big_img_pair = update_img(calib_big_img_buffer, new_big_img, M_inv_big)
-    return calib_img_buffer, calib_img_pair, calib_big_img_buffer, calib_big_img_pair
+    calib_img_pair = update_img(calib_img_buffer, new_img, M_inv)
+    calib_big_img_pair = update_img(calib_big_img_buffer, new_big_img, M_inv_big)
+    return calib_img_pair, calib_big_img_pair
  return update_both_imgs_tinygrad


@@ -136,29 +136,18 @@ def compile_modeld_warp(cam_w, cam_h):

  full_buffer = Tensor.zeros(IMG_BUFFER_SHAPE, dtype='uint8').contiguous().realize()
  big_full_buffer = Tensor.zeros(IMG_BUFFER_SHAPE, dtype='uint8').contiguous().realize()
-  full_buffer_np = np.zeros(IMG_BUFFER_SHAPE, dtype=np.uint8)
-  big_full_buffer_np = np.zeros(IMG_BUFFER_SHAPE, dtype=np.uint8)
-
  for i in range(10):
-    new_frame_np = (32 * np.random.randn(yuv_size).astype(np.float32) + 128).clip(0, 255).astype(np.uint8)
    img_inputs = [full_buffer,
-                  Tensor.from_blob(new_frame_np.ctypes.data, (yuv_size,), dtype='uint8').realize(),
+                  Tensor(np.random.randint(0, 256, yuv_size, dtype=np.uint8)).realize(),
                  Tensor(Tensor.randn(3, 3).mul(8).realize().numpy(), device='NPY')]
-    new_big_frame_np = (32 * np.random.randn(yuv_size).astype(np.float32) + 128).clip(0, 255).astype(np.uint8)
    big_img_inputs = [big_full_buffer,
-                      Tensor.from_blob(new_big_frame_np.ctypes.data, (yuv_size,), dtype='uint8').realize(),
+                      Tensor(np.random.randint(0, 256, yuv_size, dtype=np.uint8)).realize(),
                      Tensor(Tensor.randn(3, 3).mul(8).realize().numpy(), device='NPY')]
    inputs = img_inputs + big_img_inputs
    Device.default.synchronize()

-    inputs_np = [x.numpy() for x in inputs]
-    inputs_np[0] = full_buffer_np
-    inputs_np[3] = big_full_buffer_np
-
    st = time.perf_counter()
-    out = update_img_jit(*inputs)
-    full_buffer = out[0].contiguous().realize().clone()
-    big_full_buffer = out[2].contiguous().realize().clone()
+    _ = update_img_jit(*inputs)
    mt = time.perf_counter()
    Device.default.synchronize()
    et = time.perf_counter()
@@ -183,7 +172,7 @@ def compile_dm_warp(cam_w, cam_h):
  warp_dm_jit = TinyJit(warp_dm, prune=True)

  for i in range(10):
-    inputs = [Tensor.from_blob((32 * Tensor.randn(yuv_size,) + 128).cast(dtype='uint8').realize().numpy().ctypes.data, (yuv_size,), dtype='uint8'),
+    inputs = [Tensor(np.random.randint(0, 256, yuv_size, dtype=np.uint8)).realize(),
              Tensor(Tensor.randn(3, 3).mul(8).realize().numpy(), device='NPY')]
    Device.default.synchronize()
    st = time.perf_counter()
--- a/selfdrive/modeld/modeld.py
+++ b/selfdrive/modeld/modeld.py
@@ -222,8 +222,7 @@ class ModelState:

    out = self.update_imgs(self.img_queues['img'], self.full_frames['img'], self.transforms['img'],
                           self.img_queues['big_img'], self.full_frames['big_img'], self.transforms['big_img'])
-    self.img_queues['img'], self.img_queues['big_img'] = out[0].realize(), out[2].realize()
-    vision_inputs = {'img': out[1], 'big_img': out[3]}
+    vision_inputs = {'img': out[0], 'big_img': out[1]}

    if prepare_only:
      return None
--- a/2
+++ b/2