mirror of
https://github.com/dzid26/sunnypilot.git
synced 2026-06-10 08:44:14 +08:00
bump tg (#37700)
* bump tg * bump tg * assign * bump * cpu llvm * frame buffer updated in place, no need to return * don't bake in stale pointers * fix update image output indices * lint * bump
This commit is contained in:
committed by
GitHub
parent
cb32793300
commit
55c3885742
@@ -18,7 +18,7 @@ def estimate_pickle_max_size(onnx_size):
|
||||
tg_flags = {
|
||||
'larch64': 'DEV=QCOM FLOAT16=1 NOLOCALS=1 JIT_BATCH_SIZE=0',
|
||||
'Darwin': f'DEV=CPU THREADS=0 HOME={os.path.expanduser("~")}', # tinygrad calls brew which needs a $HOME in the env
|
||||
}.get(arch, 'DEV=CPU CPU_LLVM=1 THREADS=0')
|
||||
}.get(arch, 'DEV=CPU:LLVM THREADS=0')
|
||||
|
||||
# Get model metadata
|
||||
for model_name in ['driving_vision', 'driving_off_policy', 'driving_on_policy', 'dmonitoring_model']:
|
||||
|
||||
@@ -94,11 +94,11 @@ def make_frame_prepare(cam_w, cam_h, model_w, model_h):
|
||||
|
||||
|
||||
def make_update_img_input(frame_prepare, model_w, model_h):
|
||||
def update_img_input_tinygrad(tensor, frame, M_inv):
|
||||
def update_img_input_tinygrad(frame_buffer, frame, M_inv):
|
||||
M_inv = M_inv.to(Device.DEFAULT)
|
||||
new_img = frame_prepare(frame, M_inv)
|
||||
full_buffer = tensor[6:].cat(new_img, dim=0).contiguous()
|
||||
return full_buffer, Tensor.cat(full_buffer[:6], full_buffer[-6:], dim=0).contiguous().reshape(1, 12, model_h//2, model_w//2)
|
||||
frame_buffer.assign(frame_buffer[6:].cat(new_img, dim=0).contiguous())
|
||||
return Tensor.cat(frame_buffer[:6], frame_buffer[-6:], dim=0).contiguous().reshape(1, 12, model_h//2, model_w//2)
|
||||
return update_img_input_tinygrad
|
||||
|
||||
|
||||
@@ -107,9 +107,9 @@ def make_update_both_imgs(frame_prepare, model_w, model_h):
|
||||
|
||||
def update_both_imgs_tinygrad(calib_img_buffer, new_img, M_inv,
|
||||
calib_big_img_buffer, new_big_img, M_inv_big):
|
||||
calib_img_buffer, calib_img_pair = update_img(calib_img_buffer, new_img, M_inv)
|
||||
calib_big_img_buffer, calib_big_img_pair = update_img(calib_big_img_buffer, new_big_img, M_inv_big)
|
||||
return calib_img_buffer, calib_img_pair, calib_big_img_buffer, calib_big_img_pair
|
||||
calib_img_pair = update_img(calib_img_buffer, new_img, M_inv)
|
||||
calib_big_img_pair = update_img(calib_big_img_buffer, new_big_img, M_inv_big)
|
||||
return calib_img_pair, calib_big_img_pair
|
||||
return update_both_imgs_tinygrad
|
||||
|
||||
|
||||
@@ -136,29 +136,18 @@ def compile_modeld_warp(cam_w, cam_h):
|
||||
|
||||
full_buffer = Tensor.zeros(IMG_BUFFER_SHAPE, dtype='uint8').contiguous().realize()
|
||||
big_full_buffer = Tensor.zeros(IMG_BUFFER_SHAPE, dtype='uint8').contiguous().realize()
|
||||
full_buffer_np = np.zeros(IMG_BUFFER_SHAPE, dtype=np.uint8)
|
||||
big_full_buffer_np = np.zeros(IMG_BUFFER_SHAPE, dtype=np.uint8)
|
||||
|
||||
for i in range(10):
|
||||
new_frame_np = (32 * np.random.randn(yuv_size).astype(np.float32) + 128).clip(0, 255).astype(np.uint8)
|
||||
img_inputs = [full_buffer,
|
||||
Tensor.from_blob(new_frame_np.ctypes.data, (yuv_size,), dtype='uint8').realize(),
|
||||
Tensor(np.random.randint(0, 256, yuv_size, dtype=np.uint8)).realize(),
|
||||
Tensor(Tensor.randn(3, 3).mul(8).realize().numpy(), device='NPY')]
|
||||
new_big_frame_np = (32 * np.random.randn(yuv_size).astype(np.float32) + 128).clip(0, 255).astype(np.uint8)
|
||||
big_img_inputs = [big_full_buffer,
|
||||
Tensor.from_blob(new_big_frame_np.ctypes.data, (yuv_size,), dtype='uint8').realize(),
|
||||
Tensor(np.random.randint(0, 256, yuv_size, dtype=np.uint8)).realize(),
|
||||
Tensor(Tensor.randn(3, 3).mul(8).realize().numpy(), device='NPY')]
|
||||
inputs = img_inputs + big_img_inputs
|
||||
Device.default.synchronize()
|
||||
|
||||
inputs_np = [x.numpy() for x in inputs]
|
||||
inputs_np[0] = full_buffer_np
|
||||
inputs_np[3] = big_full_buffer_np
|
||||
|
||||
st = time.perf_counter()
|
||||
out = update_img_jit(*inputs)
|
||||
full_buffer = out[0].contiguous().realize().clone()
|
||||
big_full_buffer = out[2].contiguous().realize().clone()
|
||||
_ = update_img_jit(*inputs)
|
||||
mt = time.perf_counter()
|
||||
Device.default.synchronize()
|
||||
et = time.perf_counter()
|
||||
@@ -183,7 +172,7 @@ def compile_dm_warp(cam_w, cam_h):
|
||||
warp_dm_jit = TinyJit(warp_dm, prune=True)
|
||||
|
||||
for i in range(10):
|
||||
inputs = [Tensor.from_blob((32 * Tensor.randn(yuv_size,) + 128).cast(dtype='uint8').realize().numpy().ctypes.data, (yuv_size,), dtype='uint8'),
|
||||
inputs = [Tensor(np.random.randint(0, 256, yuv_size, dtype=np.uint8)).realize(),
|
||||
Tensor(Tensor.randn(3, 3).mul(8).realize().numpy(), device='NPY')]
|
||||
Device.default.synchronize()
|
||||
st = time.perf_counter()
|
||||
|
||||
@@ -222,8 +222,7 @@ class ModelState:
|
||||
|
||||
out = self.update_imgs(self.img_queues['img'], self.full_frames['img'], self.transforms['img'],
|
||||
self.img_queues['big_img'], self.full_frames['big_img'], self.transforms['big_img'])
|
||||
self.img_queues['img'], self.img_queues['big_img'] = out[0].realize(), out[2].realize()
|
||||
vision_inputs = {'img': out[1], 'big_img': out[3]}
|
||||
vision_inputs = {'img': out[0], 'big_img': out[1]}
|
||||
|
||||
if prepare_only:
|
||||
return None
|
||||
|
||||
Submodule tinygrad_repo updated: 2f55005ad9...1aa04eab08
Reference in New Issue
Block a user