mirror of
https://github.com/sunnypilot/sunnypilot.git
synced 2026-06-22 21:02:06 +08:00
Compare commits
24 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 6654e9cdf9 | |||
| 6523084bfc | |||
| 94737c523d | |||
| 46fd88376e | |||
| 3ac95a7475 | |||
| 4cc84c5680 | |||
| 0768b2408c | |||
| 402f3c8966 | |||
| ae44e4d998 | |||
| ccf40652b6 | |||
| 271ed5e091 | |||
| 41dea5d48d | |||
| dc11e5fd84 | |||
| ced4a664cc | |||
| 03db277c22 | |||
| 11ed3800bf | |||
| 92526b878c | |||
| 66ff8ae52c | |||
| d85cb76304 | |||
| b4c613680e | |||
| f7511491f7 | |||
| 88b30e199b | |||
| 2898f394dd | |||
| 554cf9ca4a |
@@ -172,7 +172,7 @@ jobs:
|
|||||||
output_file="${{ env.MODELS_DIR }}/${base_name}_tinygrad.pkl"
|
output_file="${{ env.MODELS_DIR }}/${base_name}_tinygrad.pkl"
|
||||||
|
|
||||||
echo "Compiling: $onnx_file -> $output_file"
|
echo "Compiling: $onnx_file -> $output_file"
|
||||||
QCOM=1 python3 "${{ env.TINYGRAD_PATH }}/examples/openpilot/compile3.py" "$onnx_file" "$output_file"
|
DEV=QCOM FLOAT16=1 NOLOCALS=1 JIT_BATCH_SIZE=0 IMAGE=2 python3 "${{ env.TINYGRAD_PATH }}/examples/openpilot/compile3.py" "$onnx_file" "$output_file"
|
||||||
DEV=QCOM FLOAT16=1 NOLOCALS=1 JIT_BATCH_SIZE=0 python3 "${{ env.MODELS_DIR }}/../get_model_metadata.py" "$onnx_file" || true
|
DEV=QCOM FLOAT16=1 NOLOCALS=1 JIT_BATCH_SIZE=0 python3 "${{ env.MODELS_DIR }}/../get_model_metadata.py" "$onnx_file" || true
|
||||||
done
|
done
|
||||||
|
|
||||||
|
|||||||
+47
-17
@@ -1,10 +1,23 @@
|
|||||||
import glob
|
import glob
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
|
from itertools import product
|
||||||
from SCons.Script import Value
|
from SCons.Script import Value
|
||||||
from openpilot.common.file_chunker import chunk_file, get_chunk_paths
|
from openpilot.common.file_chunker import chunk_file, get_chunk_paths
|
||||||
|
from openpilot.common.transformations.camera import _ar_ox_fisheye, _os_fisheye
|
||||||
|
from openpilot.common.transformations.model import MEDMODEL_INPUT_SIZE, DM_INPUT_SIZE
|
||||||
|
from openpilot.selfdrive.modeld.constants import ModelConstants
|
||||||
|
from openpilot.selfdrive.modeld.helpers import CompileConfig
|
||||||
from tinygrad import Device
|
from tinygrad import Device
|
||||||
|
|
||||||
|
CAMERA_CONFIGS = [
|
||||||
|
(_ar_ox_fisheye.width, _ar_ox_fisheye.height), # tici: 1928x1208
|
||||||
|
(_os_fisheye.width, _os_fisheye.height), # mici: 1344x760
|
||||||
|
]
|
||||||
|
MODELD_CONFIGS = [CompileConfig(cam_w, cam_h, prepare_only, 'driving_')
|
||||||
|
for (cam_w, cam_h), prepare_only in product(CAMERA_CONFIGS, [True, False])]
|
||||||
|
DM_WARP_CONFIGS = [CompileConfig(cam_w, cam_h, True, 'dm_') for cam_w, cam_h in CAMERA_CONFIGS]
|
||||||
|
|
||||||
Import('env', 'arch')
|
Import('env', 'arch')
|
||||||
chunker_file = File("#common/file_chunker.py")
|
chunker_file = File("#common/file_chunker.py")
|
||||||
lenv = env.Clone()
|
lenv = env.Clone()
|
||||||
@@ -16,18 +29,17 @@ tinygrad_files = ["#"+x for x in glob.glob(env.Dir("#tinygrad_repo").relpath + "
|
|||||||
def estimate_pickle_max_size(onnx_size):
|
def estimate_pickle_max_size(onnx_size):
|
||||||
return 1.2 * onnx_size + 10 * 1024 * 1024 # 20% + 10MB is plenty
|
return 1.2 * onnx_size + 10 * 1024 * 1024 # 20% + 10MB is plenty
|
||||||
|
|
||||||
# THREADS=0 is need to prevent bug: https://github.com/tinygrad/tinygrad/issues/14689
|
|
||||||
# get fastest TG config
|
# get fastest TG config
|
||||||
available = set(Device.get_available_devices())
|
available = set(Device.get_available_devices())
|
||||||
# FIXME-SP: reset when we bump tg
|
if 'CUDA' in available:
|
||||||
if False: # 'CUDA' in available:
|
|
||||||
tg_backend = 'CUDA'
|
tg_backend = 'CUDA'
|
||||||
tg_flags = f'DEV={tg_backend}'
|
tg_flags = f'DEV={tg_backend}'
|
||||||
elif 'QCOM' in available:
|
elif 'QCOM' in available:
|
||||||
tg_backend = 'QCOM'
|
tg_backend = 'QCOM'
|
||||||
tg_flags = f'DEV={tg_backend} FLOAT16=1 NOLOCALS=1 JIT_BATCH_SIZE=0'
|
tg_flags = f'DEV={tg_backend} FLOAT16=1 NOLOCALS=1 JIT_BATCH_SIZE=0 OPENPILOT_HACKS=1'
|
||||||
else:
|
else:
|
||||||
tg_backend = 'CPU' if arch == 'Darwin' else 'CPU CPU_LLVM=1' # FIXME-SP: reset when we bump tg
|
tg_backend = 'CPU' if arch == 'Darwin' else 'CPU:LLVM'
|
||||||
|
# THREADS=0 is need to prevent bug: https://github.com/tinygrad/tinygrad/issues/14689
|
||||||
tg_flags = f'DEV={tg_backend} THREADS=0'
|
tg_flags = f'DEV={tg_backend} THREADS=0'
|
||||||
|
|
||||||
def write_tg_compiled_flags(target, source, env):
|
def write_tg_compiled_flags(target, source, env):
|
||||||
@@ -54,14 +66,35 @@ for model_name in ['driving_vision', 'driving_policy', 'dmonitoring_model']:
|
|||||||
image_flag = {
|
image_flag = {
|
||||||
'larch64': 'IMAGE=2',
|
'larch64': 'IMAGE=2',
|
||||||
}.get(arch, 'IMAGE=0')
|
}.get(arch, 'IMAGE=0')
|
||||||
script_files = [File(Dir("#selfdrive/modeld").File("compile_warp.py").abspath)]
|
modeld_dir = Dir("#selfdrive/modeld").abspath
|
||||||
compile_warp_cmd = f'{tg_flags} {mac_brew_string} python3 {Dir("#selfdrive/modeld").abspath}/compile_warp.py '
|
compile_modeld_script = [File(f"{modeld_dir}/compile_modeld.py")]
|
||||||
from openpilot.common.transformations.camera import _ar_ox_fisheye, _os_fisheye
|
compile_dm_warp_script = [File(f"{modeld_dir}/compile_dm_warp.py")]
|
||||||
warp_targets = []
|
driving_onnx_deps = [File(f"models/{m}.onnx").abspath for m in ['driving_vision', 'driving_policy']]
|
||||||
for cam in [_ar_ox_fisheye, _os_fisheye]:
|
driving_metadata_deps = [File(f"models/{m}_metadata.pkl").abspath for m in ['driving_vision', 'driving_policy']]
|
||||||
w, h = cam.width, cam.height
|
|
||||||
warp_targets += [File(f"models/warp_{w}x{h}_tinygrad.pkl").abspath, File(f"models/dm_warp_{w}x{h}_tinygrad.pkl").abspath]
|
model_w, model_h = MEDMODEL_INPUT_SIZE
|
||||||
lenv.Command(warp_targets, tinygrad_files + script_files + [compiled_flags_node], compile_warp_cmd)
|
frame_skip = ModelConstants.MODEL_RUN_FREQ // ModelConstants.MODEL_CONTEXT_FREQ
|
||||||
|
for cfg in MODELD_CONFIGS:
|
||||||
|
cmd = (f'{tg_flags} {mac_brew_string} {image_flag} python3 {modeld_dir}/compile_modeld.py '
|
||||||
|
f'--model-size {model_w}x{model_h} '
|
||||||
|
f'--nv12 {",".join(str(x) for x in cfg.nv12)} '
|
||||||
|
f'--vision-onnx {File("models/driving_vision.onnx").abspath} '
|
||||||
|
f'--policy-onnx {File("models/driving_policy.onnx").abspath} '
|
||||||
|
f'--output {cfg.pkl_path} --frame-skip {frame_skip}'
|
||||||
|
+ (' --prepare-only' if cfg.prepare_only else ''))
|
||||||
|
node = lenv.Command(cfg.pkl_path, tinygrad_files + compile_modeld_script + driving_onnx_deps + driving_metadata_deps + [chunker_file, compiled_flags_node], cmd)
|
||||||
|
onnx_sizes_sum = sum(os.path.getsize(f) for f in driving_onnx_deps)
|
||||||
|
chunk_targets = get_chunk_paths(cfg.pkl_path, estimate_pickle_max_size(onnx_sizes_sum))
|
||||||
|
def do_chunk(target, source, env, pkl=cfg.pkl_path, chunks=chunk_targets):
|
||||||
|
chunk_file(pkl, chunks)
|
||||||
|
lenv.Command(chunk_targets, node, do_chunk)
|
||||||
|
|
||||||
|
dm_w, dm_h = DM_INPUT_SIZE
|
||||||
|
for cfg in DM_WARP_CONFIGS:
|
||||||
|
cmd = (f'{tg_flags} {mac_brew_string} {image_flag} python3 {modeld_dir}/compile_dm_warp.py '
|
||||||
|
f'--nv12 {",".join(str(x) for x in cfg.nv12)} --warp-to {dm_w}x{dm_h} '
|
||||||
|
f'--output {cfg.pkl_path}')
|
||||||
|
lenv.Command(cfg.pkl_path, tinygrad_files + compile_dm_warp_script + compile_modeld_script + [compiled_flags_node], cmd)
|
||||||
|
|
||||||
def tg_compile(flags, model_name):
|
def tg_compile(flags, model_name):
|
||||||
pythonpath_string = 'PYTHONPATH="${PYTHONPATH}:' + env.Dir("#tinygrad_repo").abspath + '"'
|
pythonpath_string = 'PYTHONPATH="${PYTHONPATH}:' + env.Dir("#tinygrad_repo").abspath + '"'
|
||||||
@@ -82,7 +115,4 @@ def tg_compile(flags, model_name):
|
|||||||
do_chunk,
|
do_chunk,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Compile small models
|
tg_compile(tg_flags, 'dmonitoring_model')
|
||||||
for model_name in ['driving_vision', 'driving_policy', 'dmonitoring_model']:
|
|
||||||
tg_compile(tg_flags, model_name)
|
|
||||||
|
|
||||||
|
|||||||
Executable
+54
@@ -0,0 +1,54 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
import argparse
|
||||||
|
import pickle
|
||||||
|
import time
|
||||||
|
|
||||||
|
from tinygrad.tensor import Tensor
|
||||||
|
from tinygrad.device import Device
|
||||||
|
from tinygrad.engine.jit import TinyJit
|
||||||
|
|
||||||
|
from openpilot.selfdrive.modeld.compile_modeld import NV12Frame, warp_perspective_tinygrad, _parse_size, _parse_nv12
|
||||||
|
|
||||||
|
|
||||||
|
def make_warp_dm(nv12: NV12Frame, dm_w, dm_h):
|
||||||
|
cam_w, cam_h, stride, _, _, _ = nv12
|
||||||
|
stride_pad = stride - cam_w
|
||||||
|
|
||||||
|
def warp_dm(input_frame, M_inv):
|
||||||
|
M_inv = M_inv.to(Device.DEFAULT).realize()
|
||||||
|
return warp_perspective_tinygrad(input_frame[:cam_h*stride], M_inv,
|
||||||
|
(dm_w, dm_h), (cam_h, cam_w), stride_pad).reshape(-1, dm_h * dm_w)
|
||||||
|
return warp_dm
|
||||||
|
|
||||||
|
|
||||||
|
def compile_dm_warp(nv12: NV12Frame, dm_w, dm_h, pkl_path):
|
||||||
|
print(f"Compiling DM warp for {nv12.width}x{nv12.height} -> {dm_w}x{dm_h}...")
|
||||||
|
|
||||||
|
warp_dm_jit = TinyJit(make_warp_dm(nv12, dm_w, dm_h), prune=True)
|
||||||
|
|
||||||
|
for i in range(10):
|
||||||
|
frame = Tensor.randint(nv12.size, low=0, high=256, dtype='uint8').realize()
|
||||||
|
M_inv = Tensor(Tensor.randn(3, 3).mul(8).realize().numpy(), device='NPY')
|
||||||
|
Device.default.synchronize()
|
||||||
|
st = time.perf_counter()
|
||||||
|
warp_dm_jit(frame, M_inv).realize()
|
||||||
|
mt = time.perf_counter()
|
||||||
|
Device.default.synchronize()
|
||||||
|
et = time.perf_counter()
|
||||||
|
print(f" [{i+1}/10] enqueue {(mt-st)*1e3:6.2f} ms -- total {(et-st)*1e3:6.2f} ms")
|
||||||
|
|
||||||
|
with open(pkl_path, "wb") as f:
|
||||||
|
pickle.dump(warp_dm_jit, f)
|
||||||
|
print(f" Saved to {pkl_path}")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
p = argparse.ArgumentParser()
|
||||||
|
p.add_argument('--nv12', type=_parse_nv12, required=True,
|
||||||
|
help=f'NV12 frame layout: {",".join(NV12Frame._fields)}')
|
||||||
|
p.add_argument('--warp-to', type=_parse_size, required=True, help='DM input WxH')
|
||||||
|
p.add_argument('--output', required=True)
|
||||||
|
args = p.parse_args()
|
||||||
|
|
||||||
|
dm_w, dm_h = args.warp_to
|
||||||
|
compile_dm_warp(args.nv12, dm_w, dm_h, args.output)
|
||||||
Executable
+253
@@ -0,0 +1,253 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
import argparse
|
||||||
|
import pickle
|
||||||
|
import time
|
||||||
|
from functools import partial
|
||||||
|
from collections import namedtuple
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
from tinygrad.tensor import Tensor
|
||||||
|
from tinygrad.helpers import Context
|
||||||
|
from tinygrad.device import Device
|
||||||
|
from tinygrad.engine.jit import TinyJit
|
||||||
|
from tinygrad.nn.onnx import OnnxRunner
|
||||||
|
|
||||||
|
# https://github.com/tinygrad/tinygrad/issues/15682
|
||||||
|
from tinygrad.uop.ops import UOp, Ops
|
||||||
|
_orig = UOp.__reduce__
|
||||||
|
UOp.__reduce__ = lambda self: (UOp.unique, ()) if self.op is Ops.UNIQUE else _orig(self)
|
||||||
|
|
||||||
|
|
||||||
|
NV12Frame = namedtuple("NV12Frame", ['width', 'height', 'stride', 'y_height', 'uv_height', 'size'])
|
||||||
|
|
||||||
|
UV_SCALE_MATRIX = np.array([[0.5, 0, 0], [0, 0.5, 0], [0, 0, 1]], dtype=np.float32)
|
||||||
|
UV_SCALE_MATRIX_INV = np.linalg.inv(UV_SCALE_MATRIX)
|
||||||
|
|
||||||
|
|
||||||
|
def warp_perspective_tinygrad(src_flat, M_inv, dst_shape, src_shape, stride_pad):
|
||||||
|
w_dst, h_dst = dst_shape
|
||||||
|
h_src, w_src = src_shape
|
||||||
|
|
||||||
|
x = Tensor.arange(w_dst).reshape(1, w_dst).expand(h_dst, w_dst).reshape(-1)
|
||||||
|
y = Tensor.arange(h_dst).reshape(h_dst, 1).expand(h_dst, w_dst).reshape(-1)
|
||||||
|
|
||||||
|
# inline 3x3 matmul as elementwise to avoid reduce op (enables fusion with gather)
|
||||||
|
src_x = M_inv[0, 0] * x + M_inv[0, 1] * y + M_inv[0, 2]
|
||||||
|
src_y = M_inv[1, 0] * x + M_inv[1, 1] * y + M_inv[1, 2]
|
||||||
|
src_w = M_inv[2, 0] * x + M_inv[2, 1] * y + M_inv[2, 2]
|
||||||
|
|
||||||
|
src_x = src_x / src_w
|
||||||
|
src_y = src_y / src_w
|
||||||
|
|
||||||
|
x_nn_clipped = Tensor.round(src_x).clip(0, w_src - 1).cast('int')
|
||||||
|
y_nn_clipped = Tensor.round(src_y).clip(0, h_src - 1).cast('int')
|
||||||
|
idx = y_nn_clipped * (w_src + stride_pad) + x_nn_clipped
|
||||||
|
|
||||||
|
return src_flat[idx]
|
||||||
|
|
||||||
|
|
||||||
|
def frames_to_tensor(frames):
|
||||||
|
H = (frames.shape[0] * 2) // 3
|
||||||
|
W = frames.shape[1]
|
||||||
|
in_img1 = Tensor.cat(frames[0:H:2, 0::2],
|
||||||
|
frames[1:H:2, 0::2],
|
||||||
|
frames[0:H:2, 1::2],
|
||||||
|
frames[1:H:2, 1::2],
|
||||||
|
frames[H:H+H//4].reshape((H//2, W//2)),
|
||||||
|
frames[H+H//4:H+H//2].reshape((H//2, W//2)), dim=0).reshape((6, H//2, W//2))
|
||||||
|
return in_img1
|
||||||
|
|
||||||
|
|
||||||
|
def make_frame_prepare(nv12: NV12Frame, model_w, model_h):
|
||||||
|
cam_w, cam_h, stride, y_height, uv_height, _ = nv12
|
||||||
|
uv_offset = stride * y_height
|
||||||
|
stride_pad = stride - cam_w
|
||||||
|
|
||||||
|
def frame_prepare_tinygrad(input_frame, M_inv):
|
||||||
|
# UV_SCALE @ M_inv @ UV_SCALE_INV simplifies to elementwise scaling
|
||||||
|
M_inv_uv = M_inv * Tensor([[1.0, 1.0, 0.5], [1.0, 1.0, 0.5], [2.0, 2.0, 1.0]])
|
||||||
|
# deinterleave NV12 UV plane (UVUV... -> separate U, V)
|
||||||
|
uv = input_frame[uv_offset:uv_offset + uv_height * stride].reshape(uv_height, stride)
|
||||||
|
with Context(SPLIT_REDUCEOP=0):
|
||||||
|
y = warp_perspective_tinygrad(input_frame[:cam_h*stride],
|
||||||
|
M_inv, (model_w, model_h),
|
||||||
|
(cam_h, cam_w), stride_pad).realize()
|
||||||
|
u = warp_perspective_tinygrad(uv[:cam_h//2, :cam_w:2].flatten(),
|
||||||
|
M_inv_uv, (model_w//2, model_h//2),
|
||||||
|
(cam_h//2, cam_w//2), 0).realize()
|
||||||
|
v = warp_perspective_tinygrad(uv[:cam_h//2, 1:cam_w:2].flatten(),
|
||||||
|
M_inv_uv, (model_w//2, model_h//2),
|
||||||
|
(cam_h//2, cam_w//2), 0).realize()
|
||||||
|
yuv = y.cat(u).cat(v).reshape((model_h * 3 // 2, model_w))
|
||||||
|
tensor = frames_to_tensor(yuv)
|
||||||
|
return tensor
|
||||||
|
return frame_prepare_tinygrad
|
||||||
|
|
||||||
|
|
||||||
|
def make_input_queues(vision_input_shapes, policy_input_shapes, frame_skip):
|
||||||
|
img = vision_input_shapes['img'] # (1, 12, 128, 256)
|
||||||
|
n_frames = img[1] // 6
|
||||||
|
img_buf_shape = (frame_skip * (n_frames - 1) + 1, 6, img[2], img[3])
|
||||||
|
|
||||||
|
fb = policy_input_shapes['features_buffer'] # (1, 25, 512)
|
||||||
|
dp = policy_input_shapes['desire_pulse'] # (1, 25, 8)
|
||||||
|
tc = policy_input_shapes['traffic_convention'] # (1, 2)
|
||||||
|
|
||||||
|
npy = {
|
||||||
|
'desire': np.zeros(dp[2], dtype=np.float32),
|
||||||
|
'traffic_convention': np.zeros(tc, dtype=np.float32),
|
||||||
|
'tfm': np.zeros((3, 3), dtype=np.float32),
|
||||||
|
'big_tfm': np.zeros((3, 3), dtype=np.float32),
|
||||||
|
}
|
||||||
|
input_queues = {
|
||||||
|
'img_q': Tensor.zeros(img_buf_shape, dtype='uint8').contiguous().realize(),
|
||||||
|
'big_img_q': Tensor.zeros(img_buf_shape, dtype='uint8').contiguous().realize(),
|
||||||
|
'feat_q': Tensor.zeros(frame_skip * (fb[1] - 1) + 1, fb[0], fb[2]).contiguous().realize(),
|
||||||
|
'desire_q': Tensor.zeros(frame_skip * dp[1], dp[0], dp[2]).contiguous().realize(),
|
||||||
|
**{k: Tensor(v, device='NPY').realize() for k, v in npy.items()},
|
||||||
|
}
|
||||||
|
return input_queues, npy
|
||||||
|
|
||||||
|
|
||||||
|
def shift_and_sample(buf, new_val, sample_fn):
|
||||||
|
buf.assign(buf[1:].cat(new_val, dim=0).contiguous())
|
||||||
|
return sample_fn(buf)
|
||||||
|
|
||||||
|
|
||||||
|
def sample_skip(buf, frame_skip):
|
||||||
|
return buf[::frame_skip].contiguous().flatten(0, 1).unsqueeze(0)
|
||||||
|
|
||||||
|
|
||||||
|
def sample_desire(buf, frame_skip):
|
||||||
|
return buf.reshape(-1, frame_skip, *buf.shape[1:]).max(1).flatten(0, 1).unsqueeze(0)
|
||||||
|
|
||||||
|
|
||||||
|
def make_run_policy(vision_runner, policy_runner, nv12: NV12Frame, model_w, model_h,
|
||||||
|
vision_features_slice, frame_skip, prepare_only=False):
|
||||||
|
frame_prepare = make_frame_prepare(nv12, model_w, model_h)
|
||||||
|
sample_skip_fn = partial(sample_skip, frame_skip=frame_skip)
|
||||||
|
sample_desire_fn = partial(sample_desire, frame_skip=frame_skip)
|
||||||
|
|
||||||
|
def run_policy(img_q, big_img_q, feat_q, desire_q, desire, traffic_convention, tfm, big_tfm, frame, big_frame):
|
||||||
|
tfm = tfm.to(Device.DEFAULT)
|
||||||
|
big_tfm = big_tfm.to(Device.DEFAULT)
|
||||||
|
desire = desire.to(Device.DEFAULT)
|
||||||
|
traffic_convention = traffic_convention.to(Device.DEFAULT)
|
||||||
|
Tensor.realize(tfm, big_tfm, desire, traffic_convention)
|
||||||
|
|
||||||
|
img = shift_and_sample(img_q, frame_prepare(frame, tfm).unsqueeze(0), sample_skip_fn)
|
||||||
|
big_img = shift_and_sample(big_img_q, frame_prepare(big_frame, big_tfm).unsqueeze(0), sample_skip_fn)
|
||||||
|
|
||||||
|
if prepare_only:
|
||||||
|
return img, big_img
|
||||||
|
|
||||||
|
vision_out = next(iter(vision_runner({'img': img, 'big_img': big_img}).values())).cast('float32')
|
||||||
|
|
||||||
|
new_feat = vision_out[:, vision_features_slice].reshape(1, -1).unsqueeze(0)
|
||||||
|
feat_buf = shift_and_sample(feat_q, new_feat, sample_skip_fn)
|
||||||
|
desire_buf = shift_and_sample(desire_q, desire.reshape(1, 1, -1), sample_desire_fn)
|
||||||
|
|
||||||
|
inputs = {'features_buffer': feat_buf, 'desire_pulse': desire_buf, 'traffic_convention': traffic_convention}
|
||||||
|
policy_out = next(iter(policy_runner(inputs).values())).cast('float32')
|
||||||
|
|
||||||
|
return vision_out, policy_out
|
||||||
|
return run_policy
|
||||||
|
|
||||||
|
|
||||||
|
def compile_modeld(nv12: NV12Frame, model_w, model_h, prepare_only, frame_skip,
|
||||||
|
vision_onnx, policy_onnx, pkl_path):
|
||||||
|
from get_model_metadata import metadata_path_for
|
||||||
|
|
||||||
|
print(f"Compiling combined policy JIT for {nv12.width}x{nv12.height} (prepare_only={prepare_only})...")
|
||||||
|
|
||||||
|
vision_runner = OnnxRunner(vision_onnx)
|
||||||
|
policy_runner = OnnxRunner(policy_onnx)
|
||||||
|
|
||||||
|
with open(metadata_path_for(vision_onnx), 'rb') as f:
|
||||||
|
vision_metadata = pickle.load(f)
|
||||||
|
vision_features_slice = vision_metadata['output_slices']['hidden_state']
|
||||||
|
vision_input_shapes = vision_metadata['input_shapes']
|
||||||
|
with open(metadata_path_for(policy_onnx), 'rb') as f:
|
||||||
|
policy_input_shapes = pickle.load(f)['input_shapes']
|
||||||
|
|
||||||
|
_run = make_run_policy(vision_runner, policy_runner, nv12, model_w, model_h,
|
||||||
|
vision_features_slice, frame_skip, prepare_only)
|
||||||
|
run_policy_jit = TinyJit(_run, prune=True)
|
||||||
|
|
||||||
|
N_RUNS = 3
|
||||||
|
SEED = 42
|
||||||
|
|
||||||
|
def random_inputs_run_fn(fn, seed, test_val=None, test_buffers=None, expect_match=True):
|
||||||
|
input_queues, npy = make_input_queues(vision_input_shapes, policy_input_shapes, frame_skip)
|
||||||
|
np.random.seed(seed)
|
||||||
|
Tensor.manual_seed(seed)
|
||||||
|
|
||||||
|
for i in range(N_RUNS):
|
||||||
|
frame = Tensor.randint(nv12.size, low=0, high=256, dtype='uint8').realize()
|
||||||
|
big_frame = Tensor.randint(nv12.size, low=0, high=256, dtype='uint8').realize()
|
||||||
|
for v in npy.values():
|
||||||
|
v[:] = np.random.randn(*v.shape).astype(v.dtype)
|
||||||
|
Device.default.synchronize()
|
||||||
|
st = time.perf_counter()
|
||||||
|
outs = fn(**input_queues, frame=frame, big_frame=big_frame)
|
||||||
|
mt = time.perf_counter()
|
||||||
|
for o in outs:
|
||||||
|
# .realize() not needed once jitted, but needed for unjitted fn
|
||||||
|
o.realize()
|
||||||
|
Device.default.synchronize()
|
||||||
|
et = time.perf_counter()
|
||||||
|
print(f" [{i+1}/{N_RUNS}] enqueue {(mt-st)*1e3:6.2f} ms -- total {(et-st)*1e3:6.2f} ms")
|
||||||
|
|
||||||
|
val = [np.copy(v.numpy()) for v in outs]
|
||||||
|
buffers = [np.copy(v.numpy().copy()) for v in input_queues.values()]
|
||||||
|
|
||||||
|
if test_val is not None:
|
||||||
|
match = all(np.array_equal(a, b) for a, b in zip(val, test_val, strict=True))
|
||||||
|
assert match == expect_match, f"outputs {'differ from' if expect_match else 'match'} baseline (seed={seed})"
|
||||||
|
if test_buffers is not None:
|
||||||
|
match = all(np.array_equal(a, b) for a, b in zip(buffers, test_buffers, strict=True))
|
||||||
|
assert match == expect_match, f"buffers {'differ from' if expect_match else 'match'} baseline (seed={seed})"
|
||||||
|
return fn, val, buffers
|
||||||
|
|
||||||
|
print('run unjitted')
|
||||||
|
_, test_val, test_buffers = random_inputs_run_fn(_run, seed=SEED)
|
||||||
|
print('capture + replay')
|
||||||
|
run_policy_jit, _, _ = random_inputs_run_fn(run_policy_jit, SEED, test_val, test_buffers)
|
||||||
|
|
||||||
|
print('pickle round trip')
|
||||||
|
with open(pkl_path, "wb") as f:
|
||||||
|
pickle.dump(run_policy_jit, f)
|
||||||
|
print(f" Saved to {pkl_path}")
|
||||||
|
with open(pkl_path, "rb") as f:
|
||||||
|
run_policy_jit = pickle.load(f)
|
||||||
|
random_inputs_run_fn(run_policy_jit, SEED, test_val, test_buffers, expect_match=True)
|
||||||
|
random_inputs_run_fn(run_policy_jit, SEED+1, test_val, test_buffers, expect_match=False)
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_size(s):
|
||||||
|
w, h = s.lower().split('x')
|
||||||
|
return int(w), int(h)
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_nv12(s):
|
||||||
|
parts = s.split(',')
|
||||||
|
assert len(parts) == len(NV12Frame._fields), \
|
||||||
|
f"--nv12 expects {','.join(NV12Frame._fields)} (got {s!r})"
|
||||||
|
return NV12Frame(*(int(x) for x in parts))
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
p = argparse.ArgumentParser()
|
||||||
|
p.add_argument('--model-size', type=_parse_size, required=True, help='model input WxH')
|
||||||
|
p.add_argument('--nv12', type=_parse_nv12, required=True,
|
||||||
|
help=f'NV12 frame layout: {",".join(NV12Frame._fields)}')
|
||||||
|
p.add_argument('--vision-onnx', required=True)
|
||||||
|
p.add_argument('--policy-onnx', required=True)
|
||||||
|
p.add_argument('--output', required=True)
|
||||||
|
p.add_argument('--prepare-only', action='store_true')
|
||||||
|
p.add_argument('--frame-skip', type=int, required=True)
|
||||||
|
args = p.parse_args()
|
||||||
|
|
||||||
|
model_w, model_h = args.model_size
|
||||||
|
compile_modeld(args.nv12, model_w, model_h, args.prepare_only, args.frame_skip,
|
||||||
|
args.vision_onnx, args.policy_onnx, args.output)
|
||||||
@@ -1,201 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
import time
|
|
||||||
import pickle
|
|
||||||
import numpy as np
|
|
||||||
from pathlib import Path
|
|
||||||
from tinygrad.tensor import Tensor
|
|
||||||
from tinygrad.helpers import Context
|
|
||||||
from tinygrad.device import Device
|
|
||||||
from tinygrad.engine.jit import TinyJit
|
|
||||||
|
|
||||||
from openpilot.system.camerad.cameras.nv12_info import get_nv12_info
|
|
||||||
from openpilot.common.transformations.model import MEDMODEL_INPUT_SIZE, DM_INPUT_SIZE
|
|
||||||
from openpilot.common.transformations.camera import _ar_ox_fisheye, _os_fisheye
|
|
||||||
|
|
||||||
MODELS_DIR = Path(__file__).parent / 'models'
|
|
||||||
|
|
||||||
CAMERA_CONFIGS = [
|
|
||||||
(_ar_ox_fisheye.width, _ar_ox_fisheye.height), # tici: 1928x1208
|
|
||||||
(_os_fisheye.width, _os_fisheye.height), # mici: 1344x760
|
|
||||||
]
|
|
||||||
|
|
||||||
UV_SCALE_MATRIX = np.array([[0.5, 0, 0], [0, 0.5, 0], [0, 0, 1]], dtype=np.float32)
|
|
||||||
UV_SCALE_MATRIX_INV = np.linalg.inv(UV_SCALE_MATRIX)
|
|
||||||
|
|
||||||
IMG_BUFFER_SHAPE = (30, MEDMODEL_INPUT_SIZE[1] // 2, MEDMODEL_INPUT_SIZE[0] // 2)
|
|
||||||
|
|
||||||
|
|
||||||
def warp_pkl_path(w, h):
|
|
||||||
return MODELS_DIR / f'warp_{w}x{h}_tinygrad.pkl'
|
|
||||||
|
|
||||||
|
|
||||||
def dm_warp_pkl_path(w, h):
|
|
||||||
return MODELS_DIR / f'dm_warp_{w}x{h}_tinygrad.pkl'
|
|
||||||
|
|
||||||
|
|
||||||
def warp_perspective_tinygrad(src_flat, M_inv, dst_shape, src_shape, stride_pad):
|
|
||||||
w_dst, h_dst = dst_shape
|
|
||||||
h_src, w_src = src_shape
|
|
||||||
|
|
||||||
x = Tensor.arange(w_dst).reshape(1, w_dst).expand(h_dst, w_dst).reshape(-1)
|
|
||||||
y = Tensor.arange(h_dst).reshape(h_dst, 1).expand(h_dst, w_dst).reshape(-1)
|
|
||||||
|
|
||||||
# inline 3x3 matmul as elementwise to avoid reduce op (enables fusion with gather)
|
|
||||||
src_x = M_inv[0, 0] * x + M_inv[0, 1] * y + M_inv[0, 2]
|
|
||||||
src_y = M_inv[1, 0] * x + M_inv[1, 1] * y + M_inv[1, 2]
|
|
||||||
src_w = M_inv[2, 0] * x + M_inv[2, 1] * y + M_inv[2, 2]
|
|
||||||
|
|
||||||
src_x = src_x / src_w
|
|
||||||
src_y = src_y / src_w
|
|
||||||
|
|
||||||
x_nn_clipped = Tensor.round(src_x).clip(0, w_src - 1).cast('int')
|
|
||||||
y_nn_clipped = Tensor.round(src_y).clip(0, h_src - 1).cast('int')
|
|
||||||
idx = y_nn_clipped * (w_src + stride_pad) + x_nn_clipped
|
|
||||||
|
|
||||||
return src_flat[idx]
|
|
||||||
|
|
||||||
|
|
||||||
def frames_to_tensor(frames, model_w, model_h):
|
|
||||||
H = (frames.shape[0] * 2) // 3
|
|
||||||
W = frames.shape[1]
|
|
||||||
in_img1 = Tensor.cat(frames[0:H:2, 0::2],
|
|
||||||
frames[1:H:2, 0::2],
|
|
||||||
frames[0:H:2, 1::2],
|
|
||||||
frames[1:H:2, 1::2],
|
|
||||||
frames[H:H+H//4].reshape((H//2, W//2)),
|
|
||||||
frames[H+H//4:H+H//2].reshape((H//2, W//2)), dim=0).reshape((6, H//2, W//2))
|
|
||||||
return in_img1
|
|
||||||
|
|
||||||
|
|
||||||
def make_frame_prepare(cam_w, cam_h, model_w, model_h):
|
|
||||||
stride, y_height, uv_height, _ = get_nv12_info(cam_w, cam_h)
|
|
||||||
uv_offset = stride * y_height
|
|
||||||
stride_pad = stride - cam_w
|
|
||||||
|
|
||||||
def frame_prepare_tinygrad(input_frame, M_inv):
|
|
||||||
# UV_SCALE @ M_inv @ UV_SCALE_INV simplifies to elementwise scaling
|
|
||||||
M_inv_uv = M_inv * Tensor([[1.0, 1.0, 0.5], [1.0, 1.0, 0.5], [2.0, 2.0, 1.0]])
|
|
||||||
# deinterleave NV12 UV plane (UVUV... -> separate U, V)
|
|
||||||
uv = input_frame[uv_offset:uv_offset + uv_height * stride].reshape(uv_height, stride)
|
|
||||||
with Context(SPLIT_REDUCEOP=0):
|
|
||||||
y = warp_perspective_tinygrad(input_frame[:cam_h*stride],
|
|
||||||
M_inv, (model_w, model_h),
|
|
||||||
(cam_h, cam_w), stride_pad).realize()
|
|
||||||
u = warp_perspective_tinygrad(uv[:cam_h//2, :cam_w:2].flatten(),
|
|
||||||
M_inv_uv, (model_w//2, model_h//2),
|
|
||||||
(cam_h//2, cam_w//2), 0).realize()
|
|
||||||
v = warp_perspective_tinygrad(uv[:cam_h//2, 1:cam_w:2].flatten(),
|
|
||||||
M_inv_uv, (model_w//2, model_h//2),
|
|
||||||
(cam_h//2, cam_w//2), 0).realize()
|
|
||||||
yuv = y.cat(u).cat(v).reshape((model_h * 3 // 2, model_w))
|
|
||||||
tensor = frames_to_tensor(yuv, model_w, model_h)
|
|
||||||
return tensor
|
|
||||||
return frame_prepare_tinygrad
|
|
||||||
|
|
||||||
|
|
||||||
def make_update_img_input(frame_prepare, model_w, model_h):
|
|
||||||
def update_img_input_tinygrad(tensor, frame, M_inv):
|
|
||||||
M_inv = M_inv.to(Device.DEFAULT)
|
|
||||||
new_img = frame_prepare(frame, M_inv)
|
|
||||||
tensor.assign(tensor[6:].cat(new_img, dim=0).contiguous())
|
|
||||||
return Tensor.cat(tensor[:6], tensor[-6:], dim=0).contiguous().reshape(1, 12, model_h//2, model_w//2)
|
|
||||||
return update_img_input_tinygrad
|
|
||||||
|
|
||||||
|
|
||||||
def make_update_both_imgs(frame_prepare, model_w, model_h):
|
|
||||||
update_img = make_update_img_input(frame_prepare, model_w, model_h)
|
|
||||||
|
|
||||||
def update_both_imgs_tinygrad(calib_img_buffer, new_img, M_inv,
|
|
||||||
calib_big_img_buffer, new_big_img, M_inv_big):
|
|
||||||
calib_img_pair = update_img(calib_img_buffer, new_img, M_inv)
|
|
||||||
calib_big_img_pair = update_img(calib_big_img_buffer, new_big_img, M_inv_big)
|
|
||||||
return calib_img_pair, calib_big_img_pair
|
|
||||||
return update_both_imgs_tinygrad
|
|
||||||
|
|
||||||
|
|
||||||
def make_warp_dm(cam_w, cam_h, dm_w, dm_h):
|
|
||||||
stride, y_height, _, _ = get_nv12_info(cam_w, cam_h)
|
|
||||||
stride_pad = stride - cam_w
|
|
||||||
|
|
||||||
def warp_dm(input_frame, M_inv):
|
|
||||||
M_inv = M_inv.to(Device.DEFAULT)
|
|
||||||
result = warp_perspective_tinygrad(input_frame[:cam_h*stride], M_inv, (dm_w, dm_h), (cam_h, cam_w), stride_pad).reshape(-1, dm_h * dm_w)
|
|
||||||
return result
|
|
||||||
return warp_dm
|
|
||||||
|
|
||||||
|
|
||||||
def compile_modeld_warp(cam_w, cam_h):
|
|
||||||
model_w, model_h = MEDMODEL_INPUT_SIZE
|
|
||||||
_, _, _, yuv_size = get_nv12_info(cam_w, cam_h)
|
|
||||||
|
|
||||||
print(f"Compiling modeld warp for {cam_w}x{cam_h}...")
|
|
||||||
|
|
||||||
frame_prepare = make_frame_prepare(cam_w, cam_h, model_w, model_h)
|
|
||||||
update_both_imgs = make_update_both_imgs(frame_prepare, model_w, model_h)
|
|
||||||
update_img_jit = TinyJit(update_both_imgs, prune=True)
|
|
||||||
|
|
||||||
full_buffer = Tensor.zeros(IMG_BUFFER_SHAPE, dtype='uint8').contiguous().realize()
|
|
||||||
big_full_buffer = Tensor.zeros(IMG_BUFFER_SHAPE, dtype='uint8').contiguous().realize()
|
|
||||||
new_frame_np = np.random.randint(0, 256, yuv_size, dtype=np.uint8)
|
|
||||||
new_big_frame_np = np.random.randint(0, 256, yuv_size, dtype=np.uint8)
|
|
||||||
for i in range(10):
|
|
||||||
img_inputs = [full_buffer,
|
|
||||||
Tensor.from_blob(new_frame_np.ctypes.data, (yuv_size,), dtype='uint8').realize(),
|
|
||||||
Tensor(Tensor.randn(3, 3).mul(8).realize().numpy(), device='NPY')]
|
|
||||||
big_img_inputs = [big_full_buffer,
|
|
||||||
Tensor.from_blob(new_big_frame_np.ctypes.data, (yuv_size,), dtype='uint8').realize(),
|
|
||||||
Tensor(Tensor.randn(3, 3).mul(8).realize().numpy(), device='NPY')]
|
|
||||||
inputs = img_inputs + big_img_inputs
|
|
||||||
Device.default.synchronize()
|
|
||||||
|
|
||||||
st = time.perf_counter()
|
|
||||||
_ = update_img_jit(*inputs)
|
|
||||||
mt = time.perf_counter()
|
|
||||||
Device.default.synchronize()
|
|
||||||
et = time.perf_counter()
|
|
||||||
print(f" [{i+1}/10] enqueue {(mt-st)*1e3:6.2f} ms -- total {(et-st)*1e3:6.2f} ms")
|
|
||||||
|
|
||||||
pkl_path = warp_pkl_path(cam_w, cam_h)
|
|
||||||
with open(pkl_path, "wb") as f:
|
|
||||||
pickle.dump(update_img_jit, f)
|
|
||||||
print(f" Saved to {pkl_path}")
|
|
||||||
|
|
||||||
jit = pickle.load(open(pkl_path, "rb"))
|
|
||||||
jit(*inputs)
|
|
||||||
|
|
||||||
|
|
||||||
def compile_dm_warp(cam_w, cam_h):
|
|
||||||
dm_w, dm_h = DM_INPUT_SIZE
|
|
||||||
_, _, _, yuv_size = get_nv12_info(cam_w, cam_h)
|
|
||||||
|
|
||||||
print(f"Compiling DM warp for {cam_w}x{cam_h}...")
|
|
||||||
|
|
||||||
warp_dm = make_warp_dm(cam_w, cam_h, dm_w, dm_h)
|
|
||||||
warp_dm_jit = TinyJit(warp_dm, prune=True)
|
|
||||||
|
|
||||||
new_frame_np = np.random.randint(0, 256, yuv_size, dtype=np.uint8)
|
|
||||||
for i in range(10):
|
|
||||||
inputs = [Tensor.from_blob(new_frame_np.ctypes.data, (yuv_size,), dtype='uint8').realize(),
|
|
||||||
Tensor(Tensor.randn(3, 3).mul(8).realize().numpy(), device='NPY')]
|
|
||||||
Device.default.synchronize()
|
|
||||||
st = time.perf_counter()
|
|
||||||
warp_dm_jit(*inputs)
|
|
||||||
mt = time.perf_counter()
|
|
||||||
Device.default.synchronize()
|
|
||||||
et = time.perf_counter()
|
|
||||||
print(f" [{i+1}/10] enqueue {(mt-st)*1e3:6.2f} ms -- total {(et-st)*1e3:6.2f} ms")
|
|
||||||
|
|
||||||
pkl_path = dm_warp_pkl_path(cam_w, cam_h)
|
|
||||||
with open(pkl_path, "wb") as f:
|
|
||||||
pickle.dump(warp_dm_jit, f)
|
|
||||||
print(f" Saved to {pkl_path}")
|
|
||||||
|
|
||||||
|
|
||||||
def run_and_save_pickle():
|
|
||||||
for cam_w, cam_h in CAMERA_CONFIGS:
|
|
||||||
compile_modeld_warp(cam_w, cam_h)
|
|
||||||
compile_dm_warp(cam_w, cam_h)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
run_and_save_pickle()
|
|
||||||
@@ -1,12 +1,8 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
import os
|
import os
|
||||||
from openpilot.selfdrive.modeld.tinygrad_helpers import MODELS_DIR, set_tinygrad_backend_from_compiled_flags
|
from openpilot.selfdrive.modeld.helpers import MODELS_DIR, CompileConfig, set_tinygrad_backend_from_compiled_flags
|
||||||
set_tinygrad_backend_from_compiled_flags()
|
set_tinygrad_backend_from_compiled_flags()
|
||||||
|
|
||||||
# FIXME-SP: remove once we bump tg
|
|
||||||
from openpilot.system.hardware import TICI
|
|
||||||
os.environ['DEV'] = 'QCOM' if TICI else 'CPU'
|
|
||||||
|
|
||||||
from tinygrad.tensor import Tensor
|
from tinygrad.tensor import Tensor
|
||||||
import time
|
import time
|
||||||
import pickle
|
import pickle
|
||||||
@@ -32,7 +28,7 @@ class ModelState:
|
|||||||
inputs: dict[str, np.ndarray]
|
inputs: dict[str, np.ndarray]
|
||||||
output: np.ndarray
|
output: np.ndarray
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self, cam_w: int, cam_h: int):
|
||||||
with open(METADATA_PATH, 'rb') as f:
|
with open(METADATA_PATH, 'rb') as f:
|
||||||
model_metadata = pickle.load(f)
|
model_metadata = pickle.load(f)
|
||||||
self.input_shapes = model_metadata['input_shapes']
|
self.input_shapes = model_metadata['input_shapes']
|
||||||
@@ -44,22 +40,18 @@ class ModelState:
|
|||||||
|
|
||||||
self.warp_inputs_np = {'transform': np.zeros((3,3), dtype=np.float32)}
|
self.warp_inputs_np = {'transform': np.zeros((3,3), dtype=np.float32)}
|
||||||
self.warp_inputs = {k: Tensor(v, device='NPY') for k,v in self.warp_inputs_np.items()}
|
self.warp_inputs = {k: Tensor(v, device='NPY') for k,v in self.warp_inputs_np.items()}
|
||||||
self.frame_buf_params = None
|
self.frame_buf_params = get_nv12_info(cam_w, cam_h)
|
||||||
self.tensor_inputs = {k: Tensor(v, device='NPY').realize() for k,v in self.numpy_inputs.items()}
|
self.tensor_inputs = {k: Tensor(v, device='NPY').realize() for k,v in self.numpy_inputs.items()}
|
||||||
self._blob_cache : dict[int, Tensor] = {}
|
self._blob_cache : dict[int, Tensor] = {}
|
||||||
self.image_warp = None
|
|
||||||
self.model_run = pickle.loads(read_file_chunked(str(MODEL_PKL_PATH)))
|
self.model_run = pickle.loads(read_file_chunked(str(MODEL_PKL_PATH)))
|
||||||
|
with open(CompileConfig(cam_w, cam_h, prefix='dm_', prepare_only=True).pkl_path, "rb") as f:
|
||||||
|
self.image_warp = pickle.load(f)
|
||||||
|
|
||||||
def run(self, buf: VisionBuf, calib: np.ndarray, transform: np.ndarray) -> tuple[np.ndarray, float]:
|
def run(self, buf: VisionBuf, calib: np.ndarray, transform: np.ndarray) -> tuple[np.ndarray, float]:
|
||||||
self.numpy_inputs['calib'][0,:] = calib
|
self.numpy_inputs['calib'][0,:] = calib
|
||||||
|
|
||||||
t1 = time.perf_counter()
|
t1 = time.perf_counter()
|
||||||
|
|
||||||
if self.image_warp is None:
|
|
||||||
self.frame_buf_params = get_nv12_info(buf.width, buf.height)
|
|
||||||
warp_path = MODELS_DIR / f'dm_warp_{buf.width}x{buf.height}_tinygrad.pkl'
|
|
||||||
with open(warp_path, "rb") as f:
|
|
||||||
self.image_warp = pickle.load(f)
|
|
||||||
ptr = buf.data.ctypes.data
|
ptr = buf.data.ctypes.data
|
||||||
# There is a ringbuffer of imgs, just cache tensors pointing to all of them
|
# There is a ringbuffer of imgs, just cache tensors pointing to all of them
|
||||||
if ptr not in self._blob_cache:
|
if ptr not in self._blob_cache:
|
||||||
@@ -113,9 +105,6 @@ def get_driverstate_packet(model_output, frame_id: int, location_ts: int, exec_t
|
|||||||
def main():
|
def main():
|
||||||
config_realtime_process(7, 5)
|
config_realtime_process(7, 5)
|
||||||
|
|
||||||
model = ModelState()
|
|
||||||
cloudlog.warning("models loaded, dmonitoringmodeld starting")
|
|
||||||
|
|
||||||
cloudlog.warning("connecting to driver stream")
|
cloudlog.warning("connecting to driver stream")
|
||||||
vipc_client = VisionIpcClient("camerad", VisionStreamType.VISION_STREAM_DRIVER, True)
|
vipc_client = VisionIpcClient("camerad", VisionStreamType.VISION_STREAM_DRIVER, True)
|
||||||
while not vipc_client.connect(False):
|
while not vipc_client.connect(False):
|
||||||
@@ -123,6 +112,9 @@ def main():
|
|||||||
assert vipc_client.is_connected()
|
assert vipc_client.is_connected()
|
||||||
cloudlog.warning(f"connected with buffer size: {vipc_client.buffer_len}")
|
cloudlog.warning(f"connected with buffer size: {vipc_client.buffer_len}")
|
||||||
|
|
||||||
|
model = ModelState(vipc_client.width, vipc_client.height)
|
||||||
|
cloudlog.warning("models loaded, dmonitoringmodeld starting")
|
||||||
|
|
||||||
sm = SubMaster(["liveCalibration"])
|
sm = SubMaster(["liveCalibration"])
|
||||||
pm = PubMaster(["driverStateV2"])
|
pm = PubMaster(["driverStateV2"])
|
||||||
|
|
||||||
|
|||||||
@@ -7,6 +7,10 @@ from typing import Any
|
|||||||
|
|
||||||
from tinygrad.nn.onnx import OnnxPBParser
|
from tinygrad.nn.onnx import OnnxPBParser
|
||||||
|
|
||||||
|
def metadata_path_for(onnx_path) -> pathlib.Path:
|
||||||
|
p = pathlib.Path(onnx_path)
|
||||||
|
return p.parent / (p.stem + '_metadata.pkl')
|
||||||
|
|
||||||
|
|
||||||
class MetadataOnnxPBParser(OnnxPBParser):
|
class MetadataOnnxPBParser(OnnxPBParser):
|
||||||
def _parse_ModelProto(self) -> dict:
|
def _parse_ModelProto(self) -> dict:
|
||||||
@@ -48,7 +52,7 @@ if __name__ == "__main__":
|
|||||||
'output_shapes': dict(get_name_and_shape(x) for x in model["graph"]["output"]),
|
'output_shapes': dict(get_name_and_shape(x) for x in model["graph"]["output"]),
|
||||||
}
|
}
|
||||||
|
|
||||||
metadata_path = model_path.parent / (model_path.stem + '_metadata.pkl')
|
metadata_path = metadata_path_for(model_path)
|
||||||
with open(metadata_path, 'wb') as f:
|
with open(metadata_path, 'wb') as f:
|
||||||
pickle.dump(metadata, f)
|
pickle.dump(metadata, f)
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,31 @@
|
|||||||
|
import json
|
||||||
|
import os
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from openpilot.system.camerad.cameras.nv12_info import get_nv12_info
|
||||||
|
|
||||||
|
MODELS_DIR = Path(__file__).resolve().parent / 'models'
|
||||||
|
COMPILED_FLAGS_PATH = MODELS_DIR / 'tg_compiled_flags.json'
|
||||||
|
|
||||||
|
|
||||||
|
def set_tinygrad_backend_from_compiled_flags() -> None:
|
||||||
|
if os.path.isfile(COMPILED_FLAGS_PATH):
|
||||||
|
with open(COMPILED_FLAGS_PATH) as f:
|
||||||
|
os.environ['DEV'] = str(json.load(f)['DEV'])
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class CompileConfig:
|
||||||
|
cam_w: int
|
||||||
|
cam_h: int
|
||||||
|
prepare_only: bool
|
||||||
|
prefix: str
|
||||||
|
|
||||||
|
@property
|
||||||
|
def pkl_path(self):
|
||||||
|
return str(MODELS_DIR / f'{self.prefix}{"warp_" if self.prepare_only else ""}{self.cam_w}x{self.cam_h}_tinygrad.pkl')
|
||||||
|
|
||||||
|
@property
|
||||||
|
def nv12(self):
|
||||||
|
return (self.cam_w, self.cam_h, *get_nv12_info(self.cam_w, self.cam_h))
|
||||||
+35
-122
@@ -1,12 +1,8 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
import os
|
import os
|
||||||
from openpilot.selfdrive.modeld.tinygrad_helpers import MODELS_DIR, set_tinygrad_backend_from_compiled_flags
|
from openpilot.selfdrive.modeld.helpers import MODELS_DIR, CompileConfig, set_tinygrad_backend_from_compiled_flags
|
||||||
set_tinygrad_backend_from_compiled_flags()
|
set_tinygrad_backend_from_compiled_flags()
|
||||||
|
|
||||||
# FIXME-SP: remove once we bump tg
|
|
||||||
from openpilot.system.hardware import TICI
|
|
||||||
os.environ['DEV'] = 'QCOM' if TICI else 'CPU'
|
|
||||||
|
|
||||||
USBGPU = "USBGPU" in os.environ
|
USBGPU = "USBGPU" in os.environ
|
||||||
if USBGPU:
|
if USBGPU:
|
||||||
os.environ['DEV'] = 'AMD'
|
os.environ['DEV'] = 'AMD'
|
||||||
@@ -30,6 +26,7 @@ from openpilot.common.transformations.model import get_warp_matrix
|
|||||||
from openpilot.selfdrive.controls.lib.desire_helper import DesireHelper
|
from openpilot.selfdrive.controls.lib.desire_helper import DesireHelper
|
||||||
from openpilot.selfdrive.controls.lib.drive_helpers import get_accel_from_plan, smooth_value, get_curvature_from_plan
|
from openpilot.selfdrive.controls.lib.drive_helpers import get_accel_from_plan, smooth_value, get_curvature_from_plan
|
||||||
from openpilot.selfdrive.modeld.parse_model_outputs import Parser
|
from openpilot.selfdrive.modeld.parse_model_outputs import Parser
|
||||||
|
from openpilot.selfdrive.modeld.compile_modeld import make_input_queues
|
||||||
from openpilot.selfdrive.modeld.fill_model_msg import fill_model_msg, fill_pose_msg, PublishState
|
from openpilot.selfdrive.modeld.fill_model_msg import fill_model_msg, fill_pose_msg, PublishState
|
||||||
from openpilot.common.file_chunker import read_file_chunked
|
from openpilot.common.file_chunker import read_file_chunked
|
||||||
from openpilot.selfdrive.modeld.constants import ModelConstants, Plan
|
from openpilot.selfdrive.modeld.constants import ModelConstants, Plan
|
||||||
@@ -41,17 +38,13 @@ from openpilot.sunnypilot.modeld_v2.modeld_base import ModelStateBase
|
|||||||
PROCESS_NAME = "selfdrive.modeld.modeld"
|
PROCESS_NAME = "selfdrive.modeld.modeld"
|
||||||
SEND_RAW_PRED = os.getenv('SEND_RAW_PRED')
|
SEND_RAW_PRED = os.getenv('SEND_RAW_PRED')
|
||||||
|
|
||||||
VISION_PKL_PATH = MODELS_DIR / 'driving_vision_tinygrad.pkl'
|
|
||||||
VISION_METADATA_PATH = MODELS_DIR / 'driving_vision_metadata.pkl'
|
VISION_METADATA_PATH = MODELS_DIR / 'driving_vision_metadata.pkl'
|
||||||
POLICY_PKL_PATH = MODELS_DIR / 'driving_policy_tinygrad.pkl'
|
|
||||||
POLICY_METADATA_PATH = MODELS_DIR / 'driving_policy_metadata.pkl'
|
POLICY_METADATA_PATH = MODELS_DIR / 'driving_policy_metadata.pkl'
|
||||||
|
|
||||||
LAT_SMOOTH_SECONDS = 0.0
|
LAT_SMOOTH_SECONDS = 0.0
|
||||||
LONG_SMOOTH_SECONDS = 0.3
|
LONG_SMOOTH_SECONDS = 0.3
|
||||||
MIN_LAT_CONTROL_SPEED = 0.3
|
MIN_LAT_CONTROL_SPEED = 0.3
|
||||||
|
|
||||||
IMG_QUEUE_SHAPE = (6*(ModelConstants.MODEL_RUN_FREQ//ModelConstants.MODEL_CONTEXT_FREQ + 1), 128, 256)
|
|
||||||
assert IMG_QUEUE_SHAPE[0] == 30
|
|
||||||
|
|
||||||
|
|
||||||
def get_action_from_model(model_output: dict[str, np.ndarray], prev_action: log.ModelDataV2.Action,
|
def get_action_from_model(model_output: dict[str, np.ndarray], prev_action: log.ModelDataV2.Action,
|
||||||
@@ -86,108 +79,39 @@ class FrameMeta:
|
|||||||
if vipc is not None:
|
if vipc is not None:
|
||||||
self.frame_id, self.timestamp_sof, self.timestamp_eof = vipc.frame_id, vipc.timestamp_sof, vipc.timestamp_eof
|
self.frame_id, self.timestamp_sof, self.timestamp_eof = vipc.frame_id, vipc.timestamp_sof, vipc.timestamp_eof
|
||||||
|
|
||||||
class InputQueues:
|
|
||||||
def __init__ (self, model_fps, env_fps, n_frames_input):
|
|
||||||
assert env_fps % model_fps == 0
|
|
||||||
assert env_fps >= model_fps
|
|
||||||
self.model_fps = model_fps
|
|
||||||
self.env_fps = env_fps
|
|
||||||
self.n_frames_input = n_frames_input
|
|
||||||
|
|
||||||
self.dtypes = {}
|
|
||||||
self.shapes = {}
|
|
||||||
self.q = {}
|
|
||||||
|
|
||||||
def update_dtypes_and_shapes(self, input_dtypes, input_shapes) -> None:
|
|
||||||
self.dtypes.update(input_dtypes)
|
|
||||||
if self.env_fps == self.model_fps:
|
|
||||||
self.shapes.update(input_shapes)
|
|
||||||
else:
|
|
||||||
for k in input_shapes:
|
|
||||||
shape = list(input_shapes[k])
|
|
||||||
if 'img' in k:
|
|
||||||
n_channels = shape[1] // self.n_frames_input
|
|
||||||
shape[1] = (self.env_fps // self.model_fps + (self.n_frames_input - 1)) * n_channels
|
|
||||||
else:
|
|
||||||
shape[1] = (self.env_fps // self.model_fps) * shape[1]
|
|
||||||
self.shapes[k] = tuple(shape)
|
|
||||||
|
|
||||||
def reset(self) -> None:
|
|
||||||
self.q = {k: np.zeros(self.shapes[k], dtype=self.dtypes[k]) for k in self.dtypes.keys()}
|
|
||||||
|
|
||||||
def enqueue(self, inputs:dict[str, np.ndarray]) -> None:
|
|
||||||
for k in inputs.keys():
|
|
||||||
if inputs[k].dtype != self.dtypes[k]:
|
|
||||||
raise ValueError(f'supplied input <{k}({inputs[k].dtype})> has wrong dtype, expected {self.dtypes[k]}')
|
|
||||||
input_shape = list(self.shapes[k])
|
|
||||||
input_shape[1] = -1
|
|
||||||
single_input = inputs[k].reshape(tuple(input_shape))
|
|
||||||
sz = single_input.shape[1]
|
|
||||||
self.q[k][:,:-sz] = self.q[k][:,sz:]
|
|
||||||
self.q[k][:,-sz:] = single_input
|
|
||||||
|
|
||||||
def get(self, *names) -> dict[str, np.ndarray]:
|
|
||||||
if self.env_fps == self.model_fps:
|
|
||||||
return {k: self.q[k] for k in names}
|
|
||||||
else:
|
|
||||||
out = {}
|
|
||||||
for k in names:
|
|
||||||
shape = self.shapes[k]
|
|
||||||
if 'img' in k:
|
|
||||||
n_channels = shape[1] // (self.env_fps // self.model_fps + (self.n_frames_input - 1))
|
|
||||||
out[k] = np.concatenate([self.q[k][:, s:s+n_channels] for s in np.linspace(0, shape[1] - n_channels, self.n_frames_input, dtype=int)], axis=1)
|
|
||||||
elif 'pulse' in k:
|
|
||||||
# any pulse within interval counts
|
|
||||||
out[k] = self.q[k].reshape((shape[0], shape[1] * self.model_fps // self.env_fps, self.env_fps // self.model_fps, -1)).max(axis=2)
|
|
||||||
else:
|
|
||||||
idxs = np.arange(-1, -shape[1], -self.env_fps // self.model_fps)[::-1]
|
|
||||||
out[k] = self.q[k][:, idxs]
|
|
||||||
return out
|
|
||||||
|
|
||||||
class ModelState(ModelStateBase):
|
class ModelState(ModelStateBase):
|
||||||
inputs: dict[str, np.ndarray]
|
|
||||||
output: np.ndarray
|
|
||||||
prev_desire: np.ndarray # for tracking the rising edge of the pulse
|
prev_desire: np.ndarray # for tracking the rising edge of the pulse
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self, cam_w: int, cam_h: int):
|
||||||
ModelStateBase.__init__(self)
|
ModelStateBase.__init__(self)
|
||||||
self.LAT_SMOOTH_SECONDS = LAT_SMOOTH_SECONDS
|
self.LAT_SMOOTH_SECONDS = LAT_SMOOTH_SECONDS
|
||||||
|
|
||||||
with open(VISION_METADATA_PATH, 'rb') as f:
|
with open(VISION_METADATA_PATH, 'rb') as f:
|
||||||
vision_metadata = pickle.load(f)
|
vision_metadata = pickle.load(f)
|
||||||
self.vision_input_shapes = vision_metadata['input_shapes']
|
self.vision_input_shapes = vision_metadata['input_shapes']
|
||||||
self.vision_input_names = list(self.vision_input_shapes.keys())
|
self.vision_input_names = list(self.vision_input_shapes.keys())
|
||||||
self.vision_output_slices = vision_metadata['output_slices']
|
self.vision_output_slices = vision_metadata['output_slices']
|
||||||
vision_output_size = vision_metadata['output_shapes']['outputs'][1]
|
|
||||||
|
|
||||||
with open(POLICY_METADATA_PATH, 'rb') as f:
|
with open(POLICY_METADATA_PATH, 'rb') as f:
|
||||||
policy_metadata = pickle.load(f)
|
policy_metadata = pickle.load(f)
|
||||||
self.policy_input_shapes = policy_metadata['input_shapes']
|
self.policy_input_shapes = policy_metadata['input_shapes']
|
||||||
self.policy_output_slices = policy_metadata['output_slices']
|
self.policy_output_slices = policy_metadata['output_slices']
|
||||||
policy_output_size = policy_metadata['output_shapes']['outputs'][1]
|
|
||||||
|
|
||||||
self.prev_desire = np.zeros(ModelConstants.DESIRE_LEN, dtype=np.float32)
|
self.prev_desire = np.zeros(ModelConstants.DESIRE_LEN, dtype=np.float32)
|
||||||
|
|
||||||
# policy inputs
|
self.frame_skip = ModelConstants.MODEL_RUN_FREQ // ModelConstants.MODEL_CONTEXT_FREQ
|
||||||
self.numpy_inputs = {k: np.zeros(self.policy_input_shapes[k], dtype=np.float32) for k in self.policy_input_shapes}
|
self.input_queues, self.npy = make_input_queues(self.vision_input_shapes, self.policy_input_shapes, self.frame_skip)
|
||||||
self.full_input_queues = InputQueues(ModelConstants.MODEL_CONTEXT_FREQ, ModelConstants.MODEL_RUN_FREQ, ModelConstants.N_FRAMES)
|
|
||||||
for k in ['desire_pulse', 'features_buffer']:
|
|
||||||
self.full_input_queues.update_dtypes_and_shapes({k: self.numpy_inputs[k].dtype}, {k: self.numpy_inputs[k].shape})
|
|
||||||
self.full_input_queues.reset()
|
|
||||||
|
|
||||||
self.img_queues = {'img': Tensor.zeros(IMG_QUEUE_SHAPE, dtype='uint8').contiguous().realize(),
|
|
||||||
'big_img': Tensor.zeros(IMG_QUEUE_SHAPE, dtype='uint8').contiguous().realize()}
|
|
||||||
self.full_frames : dict[str, Tensor] = {}
|
self.full_frames : dict[str, Tensor] = {}
|
||||||
self._blob_cache : dict[int, Tensor] = {}
|
self._blob_cache : dict[int, Tensor] = {}
|
||||||
self.transforms_np = {k: np.zeros((3,3), dtype=np.float32) for k in self.img_queues}
|
|
||||||
self.transforms = {k: Tensor(v, device='NPY').realize() for k, v in self.transforms_np.items()}
|
|
||||||
self.vision_output = np.zeros(vision_output_size, dtype=np.float32)
|
|
||||||
self.policy_inputs = {k: Tensor(v, device='NPY').realize() for k,v in self.numpy_inputs.items()}
|
|
||||||
self.policy_output = np.zeros(policy_output_size, dtype=np.float32)
|
|
||||||
self.parser = Parser()
|
self.parser = Parser()
|
||||||
self.frame_buf_params : dict[str, tuple[int, int, int, int]] = {}
|
self.frame_buf_params = {k: get_nv12_info(cam_w, cam_h) for k in ('img', 'big_img')}
|
||||||
self.update_imgs = None
|
self.run_policy = pickle.loads(read_file_chunked(CompileConfig(cam_w, cam_h, prefix='driving_', prepare_only=False).pkl_path))
|
||||||
self.vision_run = pickle.loads(read_file_chunked(str(VISION_PKL_PATH)))
|
self.warp_enqueue = pickle.loads(read_file_chunked(CompileConfig(cam_w, cam_h, prefix='driving_', prepare_only=True).pkl_path))
|
||||||
self.policy_run = pickle.loads(read_file_chunked(str(POLICY_PKL_PATH)))
|
self.warp_enqueue(
|
||||||
|
**self.input_queues,
|
||||||
|
frame=Tensor.zeros(self.frame_buf_params['img'][3], dtype='uint8').contiguous().realize(),
|
||||||
|
big_frame=Tensor.zeros(self.frame_buf_params['big_img'][3], dtype='uint8').contiguous().realize())
|
||||||
|
|
||||||
def slice_outputs(self, model_outputs: np.ndarray, output_slices: dict[str, slice]) -> dict[str, np.ndarray]:
|
def slice_outputs(self, model_outputs: np.ndarray, output_slices: dict[str, slice]) -> dict[str, np.ndarray]:
|
||||||
parsed_model_outputs = {k: model_outputs[np.newaxis, v] for k,v in output_slices.items()}
|
parsed_model_outputs = {k: model_outputs[np.newaxis, v] for k,v in output_slices.items()}
|
||||||
@@ -195,18 +119,6 @@ class ModelState(ModelStateBase):
|
|||||||
|
|
||||||
def run(self, bufs: dict[str, VisionBuf], transforms: dict[str, np.ndarray],
|
def run(self, bufs: dict[str, VisionBuf], transforms: dict[str, np.ndarray],
|
||||||
inputs: dict[str, np.ndarray], prepare_only: bool) -> dict[str, np.ndarray] | None:
|
inputs: dict[str, np.ndarray], prepare_only: bool) -> dict[str, np.ndarray] | None:
|
||||||
# Model decides when action is completed, so desire input is just a pulse triggered on rising edge
|
|
||||||
inputs['desire_pulse'][0] = 0
|
|
||||||
new_desire = np.where(inputs['desire_pulse'] - self.prev_desire > .99, inputs['desire_pulse'], 0)
|
|
||||||
self.prev_desire[:] = inputs['desire_pulse']
|
|
||||||
if self.update_imgs is None:
|
|
||||||
for key in bufs.keys():
|
|
||||||
w, h = bufs[key].width, bufs[key].height
|
|
||||||
self.frame_buf_params[key] = get_nv12_info(w, h)
|
|
||||||
warp_path = MODELS_DIR / f'warp_{w}x{h}_tinygrad.pkl'
|
|
||||||
with open(warp_path, "rb") as f:
|
|
||||||
self.update_imgs = pickle.load(f)
|
|
||||||
|
|
||||||
for key in bufs.keys():
|
for key in bufs.keys():
|
||||||
ptr = bufs[key].data.ctypes.data
|
ptr = bufs[key].data.ctypes.data
|
||||||
yuv_size = self.frame_buf_params[key][3]
|
yuv_size = self.frame_buf_params[key][3]
|
||||||
@@ -215,30 +127,31 @@ class ModelState(ModelStateBase):
|
|||||||
if cache_key not in self._blob_cache:
|
if cache_key not in self._blob_cache:
|
||||||
self._blob_cache[cache_key] = Tensor.from_blob(ptr, (yuv_size,), dtype='uint8')
|
self._blob_cache[cache_key] = Tensor.from_blob(ptr, (yuv_size,), dtype='uint8')
|
||||||
self.full_frames[key] = self._blob_cache[cache_key]
|
self.full_frames[key] = self._blob_cache[cache_key]
|
||||||
for key in bufs.keys():
|
|
||||||
self.transforms_np[key][:,:] = transforms[key][:,:]
|
|
||||||
|
|
||||||
out = self.update_imgs(self.img_queues['img'], self.full_frames['img'], self.transforms['img'],
|
# Model decides when action is completed, so desire input is just a pulse triggered on rising edge
|
||||||
self.img_queues['big_img'], self.full_frames['big_img'], self.transforms['big_img'])
|
inputs['desire_pulse'][0] = 0
|
||||||
vision_inputs = {'img': out[0], 'big_img': out[1]}
|
self.npy['desire'][:] = np.where(inputs['desire_pulse'] - self.prev_desire > .99, inputs['desire_pulse'], 0)
|
||||||
|
self.prev_desire[:] = inputs['desire_pulse']
|
||||||
|
self.npy['traffic_convention'][:] = inputs['traffic_convention']
|
||||||
|
self.npy['tfm'][:,:] = transforms['img'][:,:]
|
||||||
|
self.npy['big_tfm'][:,:] = transforms['big_img'][:,:]
|
||||||
|
|
||||||
if prepare_only:
|
if prepare_only:
|
||||||
|
self.warp_enqueue(**self.input_queues, frame=self.full_frames['img'], big_frame=self.full_frames['big_img'])
|
||||||
return None
|
return None
|
||||||
|
|
||||||
self.vision_output = self.vision_run(**vision_inputs).contiguous().realize().uop.base.buffer.numpy().flatten()
|
vision_output, policy_output = self.run_policy(
|
||||||
vision_outputs_dict = self.parser.parse_vision_outputs(self.slice_outputs(self.vision_output, self.vision_output_slices))
|
**self.input_queues, frame=self.full_frames['img'], big_frame=self.full_frames['big_img']
|
||||||
|
)
|
||||||
|
|
||||||
self.full_input_queues.enqueue({'features_buffer': vision_outputs_dict['hidden_state'], 'desire_pulse': new_desire})
|
vision_output = vision_output.numpy().flatten()
|
||||||
for k in ['desire_pulse', 'features_buffer']:
|
policy_output = policy_output.numpy().flatten()
|
||||||
self.numpy_inputs[k][:] = self.full_input_queues.get(k)[k]
|
vision_outputs_dict = self.parser.parse_vision_outputs(self.slice_outputs(vision_output, self.vision_output_slices))
|
||||||
self.numpy_inputs['traffic_convention'][:] = inputs['traffic_convention']
|
policy_outputs_dict = self.parser.parse_policy_outputs(self.slice_outputs(policy_output, self.policy_output_slices))
|
||||||
|
|
||||||
self.policy_output = self.policy_run(**self.policy_inputs).contiguous().realize().uop.base.buffer.numpy().flatten()
|
|
||||||
policy_outputs_dict = self.parser.parse_policy_outputs(self.slice_outputs(self.policy_output, self.policy_output_slices))
|
|
||||||
combined_outputs_dict = {**vision_outputs_dict, **policy_outputs_dict}
|
combined_outputs_dict = {**vision_outputs_dict, **policy_outputs_dict}
|
||||||
if SEND_RAW_PRED:
|
|
||||||
combined_outputs_dict['raw_pred'] = np.concatenate([self.vision_output.copy(), self.policy_output.copy()])
|
|
||||||
|
|
||||||
|
if SEND_RAW_PRED:
|
||||||
|
combined_outputs_dict['raw_pred'] = np.concatenate([vision_output.copy(), policy_output.copy()])
|
||||||
return combined_outputs_dict
|
return combined_outputs_dict
|
||||||
|
|
||||||
|
|
||||||
@@ -250,11 +163,6 @@ def main(demo=False):
|
|||||||
# also need to move the aux USB interrupts for good timings
|
# also need to move the aux USB interrupts for good timings
|
||||||
config_realtime_process(7, 54)
|
config_realtime_process(7, 54)
|
||||||
|
|
||||||
st = time.monotonic()
|
|
||||||
cloudlog.warning("loading model")
|
|
||||||
model = ModelState()
|
|
||||||
cloudlog.warning(f"models loaded in {time.monotonic() - st:.1f}s, modeld starting")
|
|
||||||
|
|
||||||
# visionipc clients
|
# visionipc clients
|
||||||
while True:
|
while True:
|
||||||
available_streams = VisionIpcClient.available_streams("camerad", block=False)
|
available_streams = VisionIpcClient.available_streams("camerad", block=False)
|
||||||
@@ -278,6 +186,11 @@ def main(demo=False):
|
|||||||
if use_extra_client:
|
if use_extra_client:
|
||||||
cloudlog.warning(f"connected extra cam with buffer size: {vipc_client_extra.buffer_len} ({vipc_client_extra.width} x {vipc_client_extra.height})")
|
cloudlog.warning(f"connected extra cam with buffer size: {vipc_client_extra.buffer_len} ({vipc_client_extra.width} x {vipc_client_extra.height})")
|
||||||
|
|
||||||
|
st = time.monotonic()
|
||||||
|
cloudlog.warning("loading model")
|
||||||
|
model = ModelState(vipc_client_main.width, vipc_client_main.height)
|
||||||
|
cloudlog.warning(f"models loaded in {time.monotonic() - st:.1f}s, modeld starting")
|
||||||
|
|
||||||
# messaging
|
# messaging
|
||||||
pm = PubMaster(["modelV2", "drivingModelData", "cameraOdometry", "modelDataV2SP"])
|
pm = PubMaster(["modelV2", "drivingModelData", "cameraOdometry", "modelDataV2SP"])
|
||||||
sm = SubMaster(["deviceState", "carState", "roadCameraState", "liveCalibration", "driverMonitoringState", "carControl", "liveDelay"])
|
sm = SubMaster(["deviceState", "carState", "roadCameraState", "liveCalibration", "driverMonitoringState", "carControl", "liveDelay"])
|
||||||
|
|||||||
@@ -1,12 +0,0 @@
|
|||||||
import json
|
|
||||||
import os
|
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
MODELS_DIR = Path(__file__).parent / 'models'
|
|
||||||
COMPILED_FLAGS_PATH = MODELS_DIR / 'tg_compiled_flags.json'
|
|
||||||
|
|
||||||
|
|
||||||
def set_tinygrad_backend_from_compiled_flags() -> None:
|
|
||||||
if os.path.isfile(COMPILED_FLAGS_PATH):
|
|
||||||
with open(COMPILED_FLAGS_PATH) as f:
|
|
||||||
os.environ['DEV'] = str(json.load(f)['DEV'])
|
|
||||||
@@ -1,5 +1,6 @@
|
|||||||
import os
|
import os
|
||||||
import glob
|
import glob
|
||||||
|
from tinygrad import Device
|
||||||
|
|
||||||
Import('env', 'arch')
|
Import('env', 'arch')
|
||||||
lenv = env.Clone()
|
lenv = env.Clone()
|
||||||
@@ -21,10 +22,19 @@ if PC:
|
|||||||
if outputs:
|
if outputs:
|
||||||
lenv.Command(outputs, inputs, cmd)
|
lenv.Command(outputs, inputs, cmd)
|
||||||
|
|
||||||
tg_flags = {
|
available = set(Device.get_available_devices())
|
||||||
'larch64': 'DEV=QCOM FLOAT16=1 NOLOCALS=1 JIT_BATCH_SIZE=0',
|
if 'CUDA' in available:
|
||||||
'Darwin': f'DEV=CPU THREADS=0 HOME={os.path.expanduser("~")}',
|
tg_backend = 'CUDA'
|
||||||
}.get(arch, 'DEV=CPU CPU_LLVM=1 THREADS=0')
|
tg_flags = f'DEV={tg_backend}'
|
||||||
|
elif 'QCOM' in available:
|
||||||
|
tg_backend = 'QCOM'
|
||||||
|
tg_flags = f'DEV={tg_backend} FLOAT16=1 NOLOCALS=1 JIT_BATCH_SIZE=0'
|
||||||
|
else:
|
||||||
|
tg_backend = 'CPU' if arch == 'Darwin' else 'CPU:LLVM'
|
||||||
|
# THREADS=0 is need to prevent bug: https://github.com/tinygrad/tinygrad/issues/14689
|
||||||
|
tg_flags = f'DEV={tg_backend} THREADS=0'
|
||||||
|
|
||||||
|
mac_brew_string = f'HOME={os.path.expanduser("~")}' if arch == 'Darwin' else ''
|
||||||
|
|
||||||
image_flag = {
|
image_flag = {
|
||||||
'larch64': 'IMAGE=2',
|
'larch64': 'IMAGE=2',
|
||||||
@@ -38,7 +48,7 @@ def tg_compile(flags, model_name):
|
|||||||
return lenv.Command(
|
return lenv.Command(
|
||||||
out,
|
out,
|
||||||
[fn + ".onnx"] + tinygrad_files,
|
[fn + ".onnx"] + tinygrad_files,
|
||||||
f'{pythonpath_string} {flags} {image_flag} python3 {Dir("#tinygrad_repo").abspath}/examples/openpilot/compile3.py {fn}.onnx {out}'
|
f'{pythonpath_string} {tg_flags} {mac_brew_string} {image_flag} python3 {Dir("#tinygrad_repo").abspath}/examples/openpilot/compile3.py {fn}.onnx {out}'
|
||||||
)
|
)
|
||||||
|
|
||||||
# Compile models
|
# Compile models
|
||||||
@@ -46,9 +56,9 @@ for model_name in ['supercombo', 'driving_vision', 'driving_off_policy', 'drivin
|
|||||||
if File(f"models/{model_name}.onnx").exists():
|
if File(f"models/{model_name}.onnx").exists():
|
||||||
tg_compile(tg_flags, model_name)
|
tg_compile(tg_flags, model_name)
|
||||||
|
|
||||||
script_files = [File("warp.py"), File(Dir("#selfdrive/modeld").File("compile_warp.py").abspath)]
|
script_files = [File("warp.py")]
|
||||||
pythonpath_string = 'PYTHONPATH="${PYTHONPATH}:' + env.Dir("#tinygrad_repo").abspath + ':' + env.Dir("#").abspath + '"'
|
pythonpath_string = 'PYTHONPATH="${PYTHONPATH}:' + env.Dir("#tinygrad_repo").abspath + ':' + env.Dir("#").abspath + '"'
|
||||||
compile_warp_cmd = f'{pythonpath_string} {tg_flags} python3 -m sunnypilot.modeld_v2.warp'
|
compile_warp_cmd = f'{pythonpath_string} {tg_flags} {mac_brew_string} {image_flag} python3 -m sunnypilot.modeld_v2.warp'
|
||||||
|
|
||||||
from openpilot.common.transformations.camera import _ar_ox_fisheye, _os_fisheye
|
from openpilot.common.transformations.camera import _ar_ox_fisheye, _os_fisheye
|
||||||
warp_targets = []
|
warp_targets = []
|
||||||
|
|||||||
@@ -129,8 +129,7 @@ class ModelState(ModelStateBase):
|
|||||||
self.numpy_inputs[key][:] = inputs[key]
|
self.numpy_inputs[key][:] = inputs[key]
|
||||||
|
|
||||||
imgs_tensors = self.warp.process(bufs, transforms)
|
imgs_tensors = self.warp.process(bufs, transforms)
|
||||||
for name, tensor in imgs_tensors.items():
|
self.model_runner.update_vision_inputs(imgs_tensors)
|
||||||
self.model_runner.inputs[name] = tensor
|
|
||||||
self.model_runner.prepare_inputs(self.numpy_inputs)
|
self.model_runner.prepare_inputs(self.numpy_inputs)
|
||||||
|
|
||||||
if prepare_only:
|
if prepare_only:
|
||||||
|
|||||||
@@ -2,8 +2,11 @@ import os
|
|||||||
os.environ['DEV'] = 'CPU'
|
os.environ['DEV'] = 'CPU'
|
||||||
import pytest
|
import pytest
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from openpilot.selfdrive.modeld.compile_warp import get_nv12_info, CAMERA_CONFIGS
|
from openpilot.sunnypilot.modeld_v2.warp import CAMERA_CONFIGS
|
||||||
from openpilot.sunnypilot.modeld_v2.warp import Warp, MODEL_W, MODEL_H
|
from openpilot.system.camerad.cameras.nv12_info import get_nv12_info
|
||||||
|
from openpilot.sunnypilot.modeld_v2.warp import Warp
|
||||||
|
from openpilot.common.transformations.model import MEDMODEL_INPUT_SIZE
|
||||||
|
MODEL_W, MODEL_H = MEDMODEL_INPUT_SIZE
|
||||||
|
|
||||||
VISION_NAME_PAIRS = [ # needed to account for supercombos input_imgs
|
VISION_NAME_PAIRS = [ # needed to account for supercombos input_imgs
|
||||||
('img', 'big_img'),
|
('img', 'big_img'),
|
||||||
|
|||||||
+120
-23
@@ -6,29 +6,128 @@ from tinygrad.tensor import Tensor
|
|||||||
from tinygrad.engine.jit import TinyJit
|
from tinygrad.engine.jit import TinyJit
|
||||||
from tinygrad.device import Device
|
from tinygrad.device import Device
|
||||||
|
|
||||||
|
from typing import NamedTuple
|
||||||
|
# https://github.com/tinygrad/tinygrad/issues/15682
|
||||||
|
from tinygrad.uop.ops import UOp, Ops
|
||||||
|
_orig = UOp.__reduce__
|
||||||
|
UOp.__reduce__ = lambda self: (UOp.unique, ()) if self.op is Ops.UNIQUE else _orig(self)
|
||||||
|
|
||||||
|
from tinygrad.helpers import Context
|
||||||
from openpilot.system.camerad.cameras.nv12_info import get_nv12_info
|
from openpilot.system.camerad.cameras.nv12_info import get_nv12_info
|
||||||
from openpilot.selfdrive.modeld.compile_warp import (
|
from openpilot.common.transformations.camera import _ar_ox_fisheye, _os_fisheye
|
||||||
CAMERA_CONFIGS, MEDMODEL_INPUT_SIZE, make_frame_prepare, make_update_both_imgs,
|
|
||||||
warp_pkl_path,
|
class NV12Frame(NamedTuple):
|
||||||
)
|
cam_w: int
|
||||||
|
cam_h: int
|
||||||
|
stride: int
|
||||||
|
y_height: int
|
||||||
|
uv_height: int
|
||||||
|
size: int
|
||||||
|
|
||||||
|
UV_SCALE_MATRIX = np.array([[0.5, 0, 0], [0, 0.5, 0], [0, 0, 1]], dtype=np.float32)
|
||||||
|
UV_SCALE_MATRIX_INV = np.linalg.inv(UV_SCALE_MATRIX)
|
||||||
|
|
||||||
|
CAMERA_CONFIGS = [
|
||||||
|
(_ar_ox_fisheye.width, _ar_ox_fisheye.height), # tici: 1928x1208
|
||||||
|
(_os_fisheye.width, _os_fisheye.height), # mici: 1344x760
|
||||||
|
]
|
||||||
|
from openpilot.common.transformations.model import MEDMODEL_INPUT_SIZE
|
||||||
|
|
||||||
MODELS_DIR = Path(__file__).parent / 'models'
|
MODELS_DIR = Path(__file__).parent / 'models'
|
||||||
MODEL_W, MODEL_H = MEDMODEL_INPUT_SIZE
|
|
||||||
UPSTREAM_BUFFER_LENGTH = 5
|
UPSTREAM_BUFFER_LENGTH = 5
|
||||||
|
|
||||||
|
def warp_pkl_path(cam_w, cam_h):
|
||||||
|
return MODELS_DIR / f'warp_{cam_w}x{cam_h}_tinygrad.pkl'
|
||||||
|
|
||||||
|
def warp_perspective_tinygrad(src_flat, M_inv, dst_shape, src_shape, stride_pad):
|
||||||
|
w_dst, h_dst = dst_shape
|
||||||
|
h_src, w_src = src_shape
|
||||||
|
|
||||||
|
x = Tensor.arange(w_dst).reshape(1, w_dst).expand(h_dst, w_dst).reshape(-1)
|
||||||
|
y = Tensor.arange(h_dst).reshape(h_dst, 1).expand(h_dst, w_dst).reshape(-1)
|
||||||
|
|
||||||
|
# inline 3x3 matmul as elementwise to avoid reduce op (enables fusion with gather)
|
||||||
|
src_x = M_inv[0, 0] * x + M_inv[0, 1] * y + M_inv[0, 2]
|
||||||
|
src_y = M_inv[1, 0] * x + M_inv[1, 1] * y + M_inv[1, 2]
|
||||||
|
src_w = M_inv[2, 0] * x + M_inv[2, 1] * y + M_inv[2, 2]
|
||||||
|
|
||||||
|
src_x = src_x / src_w
|
||||||
|
src_y = src_y / src_w
|
||||||
|
|
||||||
|
x_nn_clipped = Tensor.round(src_x).clip(0, w_src - 1).cast('int')
|
||||||
|
y_nn_clipped = Tensor.round(src_y).clip(0, h_src - 1).cast('int')
|
||||||
|
idx = y_nn_clipped * (w_src + stride_pad) + x_nn_clipped
|
||||||
|
|
||||||
|
return src_flat[idx]
|
||||||
|
|
||||||
|
def frames_to_tensor(frames, model_w, model_h):
|
||||||
|
H = (frames.shape[0] * 2) // 3
|
||||||
|
W = frames.shape[1]
|
||||||
|
in_img1 = Tensor.cat(frames[0:H:2, 0::2],
|
||||||
|
frames[1:H:2, 0::2],
|
||||||
|
frames[0:H:2, 1::2],
|
||||||
|
frames[1:H:2, 1::2],
|
||||||
|
frames[H:H+H//4].reshape((H//2, W//2)),
|
||||||
|
frames[H+H//4:H+H//2].reshape((H//2, W//2)), dim=0).reshape((6, H//2, W//2))
|
||||||
|
return in_img1
|
||||||
|
|
||||||
|
def make_frame_prepare(cam_w, cam_h, model_w, model_h):
|
||||||
|
stride, y_height, uv_height, _ = get_nv12_info(cam_w, cam_h)
|
||||||
|
uv_offset = stride * y_height
|
||||||
|
stride_pad = stride - cam_w
|
||||||
|
|
||||||
|
def frame_prepare_tinygrad(input_frame, M_inv):
|
||||||
|
# UV_SCALE @ M_inv @ UV_SCALE_INV simplifies to elementwise scaling
|
||||||
|
M_inv_uv = M_inv * Tensor([[1.0, 1.0, 0.5], [1.0, 1.0, 0.5], [2.0, 2.0, 1.0]])
|
||||||
|
# deinterleave NV12 UV plane (UVUV... -> separate U, V)
|
||||||
|
uv = input_frame[uv_offset:uv_offset + uv_height * stride].reshape(uv_height, stride)
|
||||||
|
with Context(SPLIT_REDUCEOP=0):
|
||||||
|
y = warp_perspective_tinygrad(input_frame[:cam_h*stride],
|
||||||
|
M_inv, (model_w, model_h),
|
||||||
|
(cam_h, cam_w), stride_pad).realize()
|
||||||
|
u = warp_perspective_tinygrad(uv[:cam_h//2, :cam_w:2].flatten(),
|
||||||
|
M_inv_uv, (model_w//2, model_h//2),
|
||||||
|
(cam_h//2, cam_w//2), 0).realize()
|
||||||
|
v = warp_perspective_tinygrad(uv[:cam_h//2, 1:cam_w:2].flatten(),
|
||||||
|
M_inv_uv, (model_w//2, model_h//2),
|
||||||
|
(cam_h//2, cam_w//2), 0).realize()
|
||||||
|
yuv = y.cat(u).cat(v).reshape((model_h * 3 // 2, model_w))
|
||||||
|
tensor = frames_to_tensor(yuv, model_w, model_h)
|
||||||
|
return tensor
|
||||||
|
return frame_prepare_tinygrad
|
||||||
|
|
||||||
|
def make_update_img_input(frame_prepare, model_w, model_h):
|
||||||
|
def update_img_input_tinygrad(tensor, frame, M_inv):
|
||||||
|
M_inv = M_inv.to(Device.DEFAULT)
|
||||||
|
new_img = frame_prepare(frame, M_inv)
|
||||||
|
tensor.assign(tensor[6:].cat(new_img, dim=0).contiguous())
|
||||||
|
return tensor, Tensor.cat(tensor[:6], tensor[-6:], dim=0).contiguous().reshape(1, 12, model_h//2, model_w//2)
|
||||||
|
return update_img_input_tinygrad
|
||||||
|
|
||||||
|
def make_update_both_imgs(frame_prepare, model_w, model_h):
|
||||||
|
update_img = make_update_img_input(frame_prepare, model_w, model_h)
|
||||||
|
|
||||||
|
def update_both_imgs_tinygrad(calib_img_buffer, new_img, M_inv,
|
||||||
|
calib_big_img_buffer, new_big_img, M_inv_big):
|
||||||
|
r1, r2 = update_img(calib_img_buffer, new_img, M_inv)
|
||||||
|
w1, w2 = update_img(calib_big_img_buffer, new_big_img, M_inv_big)
|
||||||
|
return r1, r2, w1, w2
|
||||||
|
return update_both_imgs_tinygrad
|
||||||
|
|
||||||
|
|
||||||
def v2_warp_pkl_path(cam_w, cam_h, buffer_length):
|
def v2_warp_pkl_path(cam_w, cam_h, buffer_length):
|
||||||
return MODELS_DIR / f'warp_{cam_w}x{cam_h}_b{buffer_length}_tinygrad.pkl'
|
return MODELS_DIR / f'warp_{cam_w}x{cam_h}_b{buffer_length}_tinygrad.pkl'
|
||||||
|
|
||||||
|
|
||||||
def compile_v2_warp(cam_w, cam_h, buffer_length):
|
def compile_v2_warp(cam_w, cam_h, buffer_length, model_w=MEDMODEL_INPUT_SIZE[0], model_h=MEDMODEL_INPUT_SIZE[1], pkl_path=None):
|
||||||
_, _, _, yuv_size = get_nv12_info(cam_w, cam_h)
|
_, _, _, yuv_size = get_nv12_info(cam_w, cam_h)
|
||||||
img_buffer_shape = (buffer_length * 6, MODEL_H // 2, MODEL_W // 2)
|
img_buffer_shape = (buffer_length * 6, model_h // 2, model_w // 2)
|
||||||
|
|
||||||
print(f"Compiling v2 warp for {cam_w}x{cam_h} buffer_length={buffer_length}...")
|
print(f"Compiling v2 warp for {cam_w}x{cam_h} buffer_length={buffer_length}...")
|
||||||
|
|
||||||
frame_prepare = make_frame_prepare(cam_w, cam_h, MODEL_W, MODEL_H)
|
frame_prepare = make_frame_prepare(cam_w, cam_h, model_w, model_h)
|
||||||
update_both_imgs = make_update_both_imgs(frame_prepare, MODEL_W, MODEL_H)
|
update_both_imgs = make_update_both_imgs(frame_prepare, model_w, model_h)
|
||||||
update_img_jit = TinyJit(update_both_imgs, prune=True)
|
update_img_jit = TinyJit(update_both_imgs, prune=True)
|
||||||
|
|
||||||
full_buffer = Tensor.zeros(img_buffer_shape, dtype='uint8').contiguous().realize()
|
full_buffer = Tensor.zeros(img_buffer_shape, dtype='uint8').contiguous().realize()
|
||||||
@@ -46,25 +145,25 @@ def compile_v2_warp(cam_w, cam_h, buffer_length):
|
|||||||
Device.default.synchronize()
|
Device.default.synchronize()
|
||||||
|
|
||||||
st = time.perf_counter()
|
st = time.perf_counter()
|
||||||
_ = update_img_jit(*inputs)
|
update_img_jit(*inputs)
|
||||||
mt = time.perf_counter()
|
mt = time.perf_counter()
|
||||||
Device.default.synchronize()
|
Device.default.synchronize()
|
||||||
et = time.perf_counter()
|
et = time.perf_counter()
|
||||||
print(f" [{i+1}/10] enqueue {(mt-st)*1e3:6.2f} ms -- total {(et-st)*1e3:6.2f} ms")
|
print(f" [{i+1}/10] enqueue {(mt-st)*1e3:6.2f} ms -- total {(et-st)*1e3:6.2f} ms")
|
||||||
|
|
||||||
pkl_path = v2_warp_pkl_path(cam_w, cam_h, buffer_length)
|
if pkl_path is None:
|
||||||
|
pkl_path = v2_warp_pkl_path(cam_w, cam_h, buffer_length)
|
||||||
with open(pkl_path, "wb") as f:
|
with open(pkl_path, "wb") as f:
|
||||||
pickle.dump(update_img_jit, f)
|
pickle.dump(update_img_jit, f)
|
||||||
print(f" Saved to {pkl_path}")
|
print(f" Saved to {pkl_path}")
|
||||||
|
|
||||||
jit = pickle.load(open(pkl_path, "rb"))
|
|
||||||
jit(*inputs)
|
|
||||||
|
|
||||||
|
|
||||||
class Warp:
|
class Warp:
|
||||||
def __init__(self, buffer_length=2):
|
def __init__(self, buffer_length=2, model_w=MEDMODEL_INPUT_SIZE[0], model_h=MEDMODEL_INPUT_SIZE[1]):
|
||||||
self.buffer_length = buffer_length
|
self.buffer_length = buffer_length
|
||||||
self.img_buffer_shape = (buffer_length * 6, MODEL_H // 2, MODEL_W // 2)
|
self.model_w = model_w
|
||||||
|
self.model_h = model_h
|
||||||
|
self.img_buffer_shape = (buffer_length * 6, model_h // 2, model_w // 2)
|
||||||
|
|
||||||
self.jit_cache = {}
|
self.jit_cache = {}
|
||||||
self.full_buffers = {k: Tensor.zeros(self.img_buffer_shape, dtype='uint8').contiguous().realize() for k in ['img', 'big_img']}
|
self.full_buffers = {k: Tensor.zeros(self.img_buffer_shape, dtype='uint8').contiguous().realize() for k in ['img', 'big_img']}
|
||||||
@@ -92,8 +191,8 @@ class Warp:
|
|||||||
with open(upstream_pkl, 'rb') as f:
|
with open(upstream_pkl, 'rb') as f:
|
||||||
self.jit_cache[key] = pickle.load(f)
|
self.jit_cache[key] = pickle.load(f)
|
||||||
if key not in self.jit_cache:
|
if key not in self.jit_cache:
|
||||||
frame_prepare = make_frame_prepare(cam_w, cam_h, MODEL_W, MODEL_H)
|
frame_prepare = make_frame_prepare(cam_w, cam_h, self.model_w, self.model_h)
|
||||||
update_both_imgs = make_update_both_imgs(frame_prepare, MODEL_W, MODEL_H)
|
update_both_imgs = make_update_both_imgs(frame_prepare, self.model_w, self.model_h)
|
||||||
self.jit_cache[key] = TinyJit(update_both_imgs, prune=True)
|
self.jit_cache[key] = TinyJit(update_both_imgs, prune=True)
|
||||||
|
|
||||||
if key not in self._nv12_cache:
|
if key not in self._nv12_cache:
|
||||||
@@ -107,7 +206,7 @@ class Warp:
|
|||||||
if wide_ptr not in self._blob_cache:
|
if wide_ptr not in self._blob_cache:
|
||||||
self._blob_cache[wide_ptr] = Tensor.from_blob(wide_ptr, (yuv_size,), dtype='uint8')
|
self._blob_cache[wide_ptr] = Tensor.from_blob(wide_ptr, (yuv_size,), dtype='uint8')
|
||||||
road_blob = self._blob_cache[road_ptr]
|
road_blob = self._blob_cache[road_ptr]
|
||||||
wide_blob = self._blob_cache[wide_ptr] if wide_ptr != road_ptr else Tensor.from_blob(wide_ptr, (yuv_size,), dtype='uint8')
|
wide_blob = self._blob_cache[wide_ptr]
|
||||||
np.copyto(self.transforms_np['img'], transforms[road].reshape(3, 3))
|
np.copyto(self.transforms_np['img'], transforms[road].reshape(3, 3))
|
||||||
np.copyto(self.transforms_np['big_img'], transforms[wide].reshape(3, 3))
|
np.copyto(self.transforms_np['big_img'], transforms[wide].reshape(3, 3))
|
||||||
|
|
||||||
@@ -116,13 +215,11 @@ class Warp:
|
|||||||
self.full_buffers['img'], road_blob, self.transforms['img'],
|
self.full_buffers['img'], road_blob, self.transforms['img'],
|
||||||
self.full_buffers['big_img'], wide_blob, self.transforms['big_img'],
|
self.full_buffers['big_img'], wide_blob, self.transforms['big_img'],
|
||||||
)
|
)
|
||||||
out_road = res[0].realize()
|
return {road: res[1].realize(), wide: res[3].realize()}
|
||||||
out_wide = res[1].realize()
|
|
||||||
|
|
||||||
return {road: out_road, wide: out_wide}
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
for cam_w, cam_h in CAMERA_CONFIGS:
|
for cam_w, cam_h in CAMERA_CONFIGS:
|
||||||
|
compile_v2_warp(cam_w, cam_h, 5, pkl_path=warp_pkl_path(cam_w, cam_h))
|
||||||
for bl in [2, 5]:
|
for bl in [2, 5]:
|
||||||
compile_v2_warp(cam_w, cam_h, bl)
|
compile_v2_warp(cam_w, cam_h, bl)
|
||||||
|
|||||||
@@ -116,7 +116,7 @@ class ModelCache:
|
|||||||
|
|
||||||
class ModelFetcher:
|
class ModelFetcher:
|
||||||
"""Handles fetching and caching of model data from remote source"""
|
"""Handles fetching and caching of model data from remote source"""
|
||||||
MODEL_URL = "https://raw.githubusercontent.com/sunnypilot/sunnypilot-models/refs/heads/gh-pages/docs/driving_models_v16.json"
|
MODEL_URL = "https://raw.githubusercontent.com/sunnypilot/sunnypilot-models/refs/heads/gh-pages/docs/driving_models_v18.json"
|
||||||
|
|
||||||
def __init__(self, params: Params):
|
def __init__(self, params: Params):
|
||||||
self.params = params
|
self.params = params
|
||||||
|
|||||||
@@ -132,6 +132,11 @@ class ModelRunner(ModularRunner):
|
|||||||
return list(self._model_data.input_shapes.keys())
|
return list(self._model_data.input_shapes.keys())
|
||||||
raise ValueError("Model data is not available. Ensure the model is loaded correctly.")
|
raise ValueError("Model data is not available. Ensure the model is loaded correctly.")
|
||||||
|
|
||||||
|
def update_vision_inputs(self, vision_inputs: dict) -> None:
|
||||||
|
"""Updates the vision inputs in the runner."""
|
||||||
|
for name, tensor in vision_inputs.items():
|
||||||
|
self.inputs[name] = tensor
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def prepare_inputs(self, numpy_inputs: NumpyDict) -> dict:
|
def prepare_inputs(self, numpy_inputs: NumpyDict) -> dict:
|
||||||
"""
|
"""
|
||||||
|
|||||||
@@ -46,14 +46,13 @@ class TinygradRunner(ModelRunner, SupercomboTinygrad, PolicyTinygrad, VisionTiny
|
|||||||
assert "/dev/kgsl-3d0" not in str(e), "Model was built on C3 or C3X, but is being loaded on PC"
|
assert "/dev/kgsl-3d0" not in str(e), "Model was built on C3 or C3X, but is being loaded on PC"
|
||||||
raise
|
raise
|
||||||
|
|
||||||
# Map input names to their required dtype and device from the loaded model
|
|
||||||
self.input_to_dtype = {}
|
self.input_to_dtype = {}
|
||||||
self.input_to_device = {}
|
self.input_to_device = {}
|
||||||
for idx, name in enumerate(self.model_run.captured.expected_names):
|
for idx, name in enumerate(self.model_run.captured.expected_names):
|
||||||
info = self.model_run.captured.expected_input_info[idx]
|
info = self.model_run.captured.expected_input_info[idx]
|
||||||
self.input_to_dtype[name] = info[2] # dtype
|
self.input_to_dtype[name] = info[2]
|
||||||
self.input_to_device[name] = info[3] # device
|
self.input_to_device[name] = info[3]
|
||||||
self._policy_cached = False
|
self.inputs[name] = Tensor.zeros(*self.input_shapes[name], dtype=info[2], device=info[3]).realize()
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def vision_input_names(self) -> list[str]:
|
def vision_input_names(self) -> list[str]:
|
||||||
@@ -62,22 +61,23 @@ class TinygradRunner(ModelRunner, SupercomboTinygrad, PolicyTinygrad, VisionTiny
|
|||||||
|
|
||||||
|
|
||||||
def prepare_policy_inputs(self, numpy_inputs: NumpyDict):
|
def prepare_policy_inputs(self, numpy_inputs: NumpyDict):
|
||||||
if not self._policy_cached:
|
for key, value in numpy_inputs.items():
|
||||||
for key, value in numpy_inputs.items():
|
if key in self.inputs:
|
||||||
self.inputs[key] = Tensor(value, device='NPY').realize()
|
self.inputs[key].assign(Tensor(value, device=self.inputs[key].device))
|
||||||
self._policy_cached = True
|
|
||||||
|
|
||||||
def prepare_inputs(self, numpy_inputs: NumpyDict) -> dict:
|
def prepare_inputs(self, numpy_inputs: NumpyDict) -> dict:
|
||||||
"""Prepares all vision and policy inputs for the model."""
|
"""Prepares all vision and policy inputs for the model."""
|
||||||
self.prepare_policy_inputs(numpy_inputs)
|
self.prepare_policy_inputs(numpy_inputs)
|
||||||
for key in self.vision_input_names:
|
|
||||||
if key in self.inputs:
|
|
||||||
self.inputs[key] = self.inputs[key].cast(self.input_to_dtype[key])
|
|
||||||
return self.inputs
|
return self.inputs
|
||||||
|
|
||||||
|
def update_vision_inputs(self, vision_inputs: dict[str, Tensor]):
|
||||||
|
for name, tensor in vision_inputs.items():
|
||||||
|
if name in self.inputs:
|
||||||
|
self.inputs[name].assign(tensor)
|
||||||
|
|
||||||
def _run_model(self) -> NumpyDict:
|
def _run_model(self) -> NumpyDict:
|
||||||
"""Runs the Tinygrad model inference and parses the outputs."""
|
"""Runs the Tinygrad model inference and parses the outputs."""
|
||||||
outputs = self.model_run(**self.inputs).contiguous().realize().uop.base.buffer.numpy().flatten()
|
outputs = self.model_run(**self.inputs).numpy().flatten()
|
||||||
return self._parse_outputs(outputs)
|
return self._parse_outputs(outputs)
|
||||||
|
|
||||||
def _parse_outputs(self, model_outputs: np.ndarray) -> NumpyDict:
|
def _parse_outputs(self, model_outputs: np.ndarray) -> NumpyDict:
|
||||||
|
|||||||
+1
-1
Submodule tinygrad_repo updated: 3501a71478...4ad60723e9
Reference in New Issue
Block a user