mirror of
https://github.com/sunnypilot/sunnypilot.git
synced 2026-06-08 17:14:36 +08:00
Compare commits
12 Commits
master-dev
...
new-chubbs
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
85c1a79a14 | ||
|
|
ced4a664cc | ||
|
|
03db277c22 | ||
|
|
11ed3800bf | ||
|
|
92526b878c | ||
|
|
66ff8ae52c | ||
|
|
d85cb76304 | ||
|
|
b4c613680e | ||
|
|
f7511491f7 | ||
|
|
88b30e199b | ||
|
|
2898f394dd | ||
|
|
554cf9ca4a |
@@ -172,8 +172,8 @@ jobs:
|
||||
output_file="${{ env.MODELS_DIR }}/${base_name}_tinygrad.pkl"
|
||||
|
||||
echo "Compiling: $onnx_file -> $output_file"
|
||||
QCOM=1 python3 "${{ env.TINYGRAD_PATH }}/examples/openpilot/compile3.py" "$onnx_file" "$output_file"
|
||||
DEV=QCOM FLOAT16=1 NOLOCALS=1 JIT_BATCH_SIZE=0 python3 "${{ env.MODELS_DIR }}/../get_model_metadata.py" "$onnx_file" || true
|
||||
DEV=QCOM FLOAT16=1 NOLOCALS=1 JIT_BATCH_SIZE=0 OPENPILOT_HACKS=1 IMAGE=2 python3 "${{ env.TINYGRAD_PATH }}/examples/openpilot/compile3.py" "$onnx_file" "$output_file"
|
||||
DEV=QCOM FLOAT16=1 NOLOCALS=1 JIT_BATCH_SIZE=0 OPENPILOT_HACKS=1 python3 "${{ env.MODELS_DIR }}/../get_model_metadata.py" "$onnx_file" || true
|
||||
done
|
||||
|
||||
- name: Validate Model Outputs
|
||||
|
||||
@@ -22,15 +22,19 @@ from openpilot.sunnypilot.selfdrive.controls.lib.latcontrol_torque_ext import La
|
||||
# Additionally, there is friction in the steering wheel that needs
|
||||
# to be overcome to move it at all, this is compensated for too.
|
||||
|
||||
KP = 0.8
|
||||
KI = 0.15
|
||||
|
||||
INTERP_SPEEDS = [1, 1.5, 2.0, 3.0, 5, 7.5, 10, 15, 30]
|
||||
KP_INTERP = [250, 120, 65, 30, 11.5, 5.5, 3.5, 2.0, KP]
|
||||
KP_INTERP = [150.0, 100.0, 30.0, 6.0, 2.0, 1.5, 1.2, 0.8, 0.5]
|
||||
KI_INTERP = [0.06, 0.06, 0.06, 0.08, 0.08, 0.10, 0.12, 0.14, 0.16]
|
||||
JERK_INTERP = [0.25, 0.25, 0.25, 0.23, 0.20, 0.18, 0.16, 0.15, 0.15]
|
||||
MEAS_FILTER_TAU_INTERP = [0.07, 0.07, 0.07, 0.07, 0.07, 0.06, 0.06, 0.05, 0.01]
|
||||
INTEGRATOR_DECAY_INTERP = [0.990, 0.990, 0.990, 0.992, 0.993, 0.995, 0.996, 0.998, 0.999]
|
||||
INTEGRATOR_DECAY_FRAMES = 20
|
||||
STRAIGHT_DAMP_THRESHOLD_INTERP = [0.03, 0.03, 0.03, 0.03, 0.03, 0.05, 0.05, 0.2, 0.2]
|
||||
STRAIGHT_DAMP_MIN_INTERP = [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
|
||||
STRAIGHT_DAMP_TAU = 0.5
|
||||
|
||||
LP_FILTER_CUTOFF_HZ = 1.2
|
||||
JERK_LOOKAHEAD_SECONDS = 0.19
|
||||
JERK_GAIN = 0.3
|
||||
JERK_LOOKAHEAD_SECONDS = 0.30
|
||||
LAT_ACCEL_REQUEST_BUFFER_SECONDS = 1.0
|
||||
VERSION = 1
|
||||
|
||||
@@ -40,13 +44,16 @@ class LatControlTorque(LatControl):
|
||||
self.torque_params = CP.lateralTuning.torque.as_builder()
|
||||
self.torque_from_lateral_accel = CI.torque_from_lateral_accel()
|
||||
self.lateral_accel_from_torque = CI.lateral_accel_from_torque()
|
||||
self.pid = PIDController([INTERP_SPEEDS, KP_INTERP], KI, rate=1/self.dt)
|
||||
self.pid = PIDController([INTERP_SPEEDS, KP_INTERP], [INTERP_SPEEDS, KI_INTERP], rate=1/self.dt)
|
||||
self.update_limits()
|
||||
self.steering_angle_deadzone_deg = self.torque_params.steeringAngleDeadzoneDeg
|
||||
self.lat_accel_request_buffer_len = int(LAT_ACCEL_REQUEST_BUFFER_SECONDS / self.dt)
|
||||
self.lat_accel_request_buffer = deque([0.] * self.lat_accel_request_buffer_len , maxlen=self.lat_accel_request_buffer_len)
|
||||
self.lookahead_frames = int(JERK_LOOKAHEAD_SECONDS / self.dt)
|
||||
self.jerk_filter = FirstOrderFilter(0.0, 1 / (2 * np.pi * LP_FILTER_CUTOFF_HZ), self.dt)
|
||||
self.measurement_filter = FirstOrderFilter(0.0, 0.04, self.dt)
|
||||
self.integrator_decay_counter = 0
|
||||
self.straight_damp_filter = FirstOrderFilter(1.0, STRAIGHT_DAMP_TAU, self.dt)
|
||||
|
||||
self.extension = LatControlTorqueExt(self, CP, CP_SP, CI)
|
||||
|
||||
@@ -68,7 +75,9 @@ class LatControlTorque(LatControl):
|
||||
pid_log = log.ControlsState.LateralTorqueState.new_message()
|
||||
pid_log.version = VERSION
|
||||
measured_curvature = -VM.calc_curvature(math.radians(CS.steeringAngleDeg - params.angleOffsetDeg), CS.vEgo, params.roll)
|
||||
measurement = measured_curvature * CS.vEgo ** 2
|
||||
meas_tau = float(np.interp(CS.vEgo, INTERP_SPEEDS, MEAS_FILTER_TAU_INTERP))
|
||||
self.measurement_filter.update_alpha(meas_tau)
|
||||
measurement = self.measurement_filter.update(measured_curvature * CS.vEgo ** 2)
|
||||
future_desired_lateral_accel = desired_curvature * CS.vEgo ** 2
|
||||
self.lat_accel_request_buffer.append(future_desired_lateral_accel)
|
||||
|
||||
@@ -79,7 +88,12 @@ class LatControlTorque(LatControl):
|
||||
delay_frames = int(np.clip(lat_delay / self.dt + 1, 1, self.lat_accel_request_buffer_len))
|
||||
expected_lateral_accel = self.lat_accel_request_buffer[-delay_frames]
|
||||
setpoint = expected_lateral_accel
|
||||
error = setpoint - measurement
|
||||
raw_error = setpoint - measurement
|
||||
damp_min = float(np.interp(CS.vEgo, INTERP_SPEEDS, STRAIGHT_DAMP_MIN_INTERP))
|
||||
damp_threshold = float(np.interp(CS.vEgo, INTERP_SPEEDS, STRAIGHT_DAMP_THRESHOLD_INTERP))
|
||||
damp_target = max(damp_min, min(1.0, abs(setpoint) / damp_threshold))
|
||||
damp_factor = self.straight_damp_filter.update(damp_target)
|
||||
error = raw_error * damp_factor
|
||||
|
||||
lookahead_idx = int(np.clip(-delay_frames + self.lookahead_frames, -self.lat_accel_request_buffer_len+1, -2))
|
||||
raw_lateral_jerk = (self.lat_accel_request_buffer[lookahead_idx+1] - self.lat_accel_request_buffer[lookahead_idx-1]) / (2 * self.dt)
|
||||
@@ -88,7 +102,8 @@ class LatControlTorque(LatControl):
|
||||
ff = gravity_adjusted_future_lateral_accel
|
||||
# latAccelOffset corrects roll compensation bias from device roll misalignment relative to car roll
|
||||
ff -= self.torque_params.latAccelOffset
|
||||
ff += get_friction(error + JERK_GAIN * desired_lateral_jerk, lateral_accel_deadzone, FRICTION_THRESHOLD, self.torque_params)
|
||||
jerk_gain = float(np.interp(CS.vEgo, INTERP_SPEEDS, JERK_INTERP))
|
||||
ff += get_friction(error + jerk_gain * desired_lateral_jerk, lateral_accel_deadzone, FRICTION_THRESHOLD, self.torque_params)
|
||||
|
||||
if not active:
|
||||
output_torque = 0.0
|
||||
@@ -99,6 +114,15 @@ class LatControlTorque(LatControl):
|
||||
|
||||
freeze_integrator = steer_limited_by_safety or CS.steeringPressed or CS.vEgo < 5
|
||||
output_lataccel = self.pid.update(pid_log.error, speed=CS.vEgo, feedforward=ff, freeze_integrator=freeze_integrator)
|
||||
|
||||
error_opposes_integrator = (self.pid.i > 0 and pid_log.error < 0) or (self.pid.i < 0 and pid_log.error > 0)
|
||||
if error_opposes_integrator:
|
||||
self.integrator_decay_counter = min(self.integrator_decay_counter + 1, INTEGRATOR_DECAY_FRAMES + 10)
|
||||
else:
|
||||
self.integrator_decay_counter = 0
|
||||
if self.integrator_decay_counter >= INTEGRATOR_DECAY_FRAMES:
|
||||
self.pid.i *= float(np.interp(CS.vEgo, INTERP_SPEEDS, INTEGRATOR_DECAY_INTERP))
|
||||
|
||||
output_torque = self.torque_from_lateral_accel(output_lataccel, self.torque_params)
|
||||
|
||||
# Lateral acceleration torque controller extension updates
|
||||
|
||||
@@ -32,12 +32,12 @@ COST_E_DIM = 5
|
||||
COST_DIM = COST_E_DIM + 1
|
||||
CONSTR_DIM = 4
|
||||
|
||||
X_EGO_OBSTACLE_COST = 3.
|
||||
X_EGO_OBSTACLE_COST = 2.
|
||||
X_EGO_COST = 0.
|
||||
V_EGO_COST = 0.
|
||||
A_EGO_COST = 0.
|
||||
J_EGO_COST = 5.
|
||||
A_CHANGE_COST = 200.
|
||||
A_CHANGE_COST = 350.
|
||||
DANGER_ZONE_COST = 100.
|
||||
CRASH_DISTANCE = .25
|
||||
LEAD_DANGER_FACTOR = 0.75
|
||||
@@ -53,7 +53,7 @@ T_IDXS_LST = [index_function(idx, max_val=MAX_T, max_idx=N) for idx in range(N+1
|
||||
T_IDXS = np.array(T_IDXS_LST)
|
||||
FCW_IDXS = T_IDXS < 5.0
|
||||
T_DIFFS = np.diff(T_IDXS, prepend=[0.])
|
||||
COMFORT_BRAKE = 2.5
|
||||
COMFORT_BRAKE = 2.
|
||||
STOP_DISTANCE = 6.0
|
||||
CRUISE_MIN_ACCEL = -1.2
|
||||
CRUISE_MAX_ACCEL = 1.6
|
||||
|
||||
@@ -1,10 +1,23 @@
|
||||
import glob
|
||||
import json
|
||||
import os
|
||||
from itertools import product
|
||||
from SCons.Script import Value
|
||||
from openpilot.common.file_chunker import chunk_file, get_chunk_paths
|
||||
from openpilot.common.transformations.camera import _ar_ox_fisheye, _os_fisheye
|
||||
from openpilot.common.transformations.model import MEDMODEL_INPUT_SIZE, DM_INPUT_SIZE
|
||||
from openpilot.selfdrive.modeld.constants import ModelConstants
|
||||
from openpilot.selfdrive.modeld.helpers import CompileConfig
|
||||
from tinygrad import Device
|
||||
|
||||
CAMERA_CONFIGS = [
|
||||
(_ar_ox_fisheye.width, _ar_ox_fisheye.height), # tici: 1928x1208
|
||||
(_os_fisheye.width, _os_fisheye.height), # mici: 1344x760
|
||||
]
|
||||
MODELD_CONFIGS = [CompileConfig(cam_w, cam_h, prepare_only, 'driving_')
|
||||
for (cam_w, cam_h), prepare_only in product(CAMERA_CONFIGS, [True, False])]
|
||||
DM_WARP_CONFIGS = [CompileConfig(cam_w, cam_h, True, 'dm_') for cam_w, cam_h in CAMERA_CONFIGS]
|
||||
|
||||
Import('env', 'arch')
|
||||
chunker_file = File("#common/file_chunker.py")
|
||||
lenv = env.Clone()
|
||||
@@ -16,18 +29,17 @@ tinygrad_files = ["#"+x for x in glob.glob(env.Dir("#tinygrad_repo").relpath + "
|
||||
def estimate_pickle_max_size(onnx_size):
|
||||
return 1.2 * onnx_size + 10 * 1024 * 1024 # 20% + 10MB is plenty
|
||||
|
||||
# THREADS=0 is need to prevent bug: https://github.com/tinygrad/tinygrad/issues/14689
|
||||
# get fastest TG config
|
||||
available = set(Device.get_available_devices())
|
||||
# FIXME-SP: reset when we bump tg
|
||||
if False: # 'CUDA' in available:
|
||||
if 'CUDA' in available:
|
||||
tg_backend = 'CUDA'
|
||||
tg_flags = f'DEV={tg_backend}'
|
||||
elif 'QCOM' in available:
|
||||
tg_backend = 'QCOM'
|
||||
tg_flags = f'DEV={tg_backend} FLOAT16=1 NOLOCALS=1 JIT_BATCH_SIZE=0'
|
||||
tg_flags = f'DEV={tg_backend} FLOAT16=1 NOLOCALS=1 JIT_BATCH_SIZE=0 OPENPILOT_HACKS=1'
|
||||
else:
|
||||
tg_backend = 'CPU' if arch == 'Darwin' else 'CPU CPU_LLVM=1' # FIXME-SP: reset when we bump tg
|
||||
tg_backend = 'CPU' if arch == 'Darwin' else 'CPU:LLVM'
|
||||
# THREADS=0 is need to prevent bug: https://github.com/tinygrad/tinygrad/issues/14689
|
||||
tg_flags = f'DEV={tg_backend} THREADS=0'
|
||||
|
||||
def write_tg_compiled_flags(target, source, env):
|
||||
@@ -54,14 +66,35 @@ for model_name in ['driving_vision', 'driving_policy', 'dmonitoring_model']:
|
||||
image_flag = {
|
||||
'larch64': 'IMAGE=2',
|
||||
}.get(arch, 'IMAGE=0')
|
||||
script_files = [File(Dir("#selfdrive/modeld").File("compile_warp.py").abspath)]
|
||||
compile_warp_cmd = f'{tg_flags} {mac_brew_string} python3 {Dir("#selfdrive/modeld").abspath}/compile_warp.py '
|
||||
from openpilot.common.transformations.camera import _ar_ox_fisheye, _os_fisheye
|
||||
warp_targets = []
|
||||
for cam in [_ar_ox_fisheye, _os_fisheye]:
|
||||
w, h = cam.width, cam.height
|
||||
warp_targets += [File(f"models/warp_{w}x{h}_tinygrad.pkl").abspath, File(f"models/dm_warp_{w}x{h}_tinygrad.pkl").abspath]
|
||||
lenv.Command(warp_targets, tinygrad_files + script_files + [compiled_flags_node], compile_warp_cmd)
|
||||
modeld_dir = Dir("#selfdrive/modeld").abspath
|
||||
compile_modeld_script = [File(f"{modeld_dir}/compile_modeld.py")]
|
||||
compile_dm_warp_script = [File(f"{modeld_dir}/compile_dm_warp.py")]
|
||||
driving_onnx_deps = [File(f"models/{m}.onnx").abspath for m in ['driving_vision', 'driving_policy']]
|
||||
driving_metadata_deps = [File(f"models/{m}_metadata.pkl").abspath for m in ['driving_vision', 'driving_policy']]
|
||||
|
||||
model_w, model_h = MEDMODEL_INPUT_SIZE
|
||||
frame_skip = ModelConstants.MODEL_RUN_FREQ // ModelConstants.MODEL_CONTEXT_FREQ
|
||||
for cfg in MODELD_CONFIGS:
|
||||
cmd = (f'{tg_flags} {mac_brew_string} {image_flag} python3 {modeld_dir}/compile_modeld.py '
|
||||
f'--model-size {model_w}x{model_h} '
|
||||
f'--nv12 {",".join(str(x) for x in cfg.nv12)} '
|
||||
f'--vision-onnx {File("models/driving_vision.onnx").abspath} '
|
||||
f'--policy-onnx {File("models/driving_policy.onnx").abspath} '
|
||||
f'--output {cfg.pkl_path} --frame-skip {frame_skip}'
|
||||
+ (' --prepare-only' if cfg.prepare_only else ''))
|
||||
node = lenv.Command(cfg.pkl_path, tinygrad_files + compile_modeld_script + driving_onnx_deps + driving_metadata_deps + [chunker_file, compiled_flags_node], cmd)
|
||||
onnx_sizes_sum = sum(os.path.getsize(f) for f in driving_onnx_deps)
|
||||
chunk_targets = get_chunk_paths(cfg.pkl_path, estimate_pickle_max_size(onnx_sizes_sum))
|
||||
def do_chunk(target, source, env, pkl=cfg.pkl_path, chunks=chunk_targets):
|
||||
chunk_file(pkl, chunks)
|
||||
lenv.Command(chunk_targets, node, do_chunk)
|
||||
|
||||
dm_w, dm_h = DM_INPUT_SIZE
|
||||
for cfg in DM_WARP_CONFIGS:
|
||||
cmd = (f'{tg_flags} {mac_brew_string} {image_flag} python3 {modeld_dir}/compile_dm_warp.py '
|
||||
f'--nv12 {",".join(str(x) for x in cfg.nv12)} --warp-to {dm_w}x{dm_h} '
|
||||
f'--output {cfg.pkl_path}')
|
||||
lenv.Command(cfg.pkl_path, tinygrad_files + compile_dm_warp_script + compile_modeld_script + [compiled_flags_node], cmd)
|
||||
|
||||
def tg_compile(flags, model_name):
|
||||
pythonpath_string = 'PYTHONPATH="${PYTHONPATH}:' + env.Dir("#tinygrad_repo").abspath + '"'
|
||||
@@ -82,7 +115,4 @@ def tg_compile(flags, model_name):
|
||||
do_chunk,
|
||||
)
|
||||
|
||||
# Compile small models
|
||||
for model_name in ['driving_vision', 'driving_policy', 'dmonitoring_model']:
|
||||
tg_compile(tg_flags, model_name)
|
||||
|
||||
tg_compile(tg_flags, 'dmonitoring_model')
|
||||
|
||||
54
selfdrive/modeld/compile_dm_warp.py
Executable file
54
selfdrive/modeld/compile_dm_warp.py
Executable file
@@ -0,0 +1,54 @@
|
||||
#!/usr/bin/env python3
|
||||
import argparse
|
||||
import pickle
|
||||
import time
|
||||
|
||||
from tinygrad.tensor import Tensor
|
||||
from tinygrad.device import Device
|
||||
from tinygrad.engine.jit import TinyJit
|
||||
|
||||
from openpilot.selfdrive.modeld.compile_modeld import NV12Frame, warp_perspective_tinygrad, _parse_size, _parse_nv12
|
||||
|
||||
|
||||
def make_warp_dm(nv12: NV12Frame, dm_w, dm_h):
|
||||
cam_w, cam_h, stride, _, _, _ = nv12
|
||||
stride_pad = stride - cam_w
|
||||
|
||||
def warp_dm(input_frame, M_inv):
|
||||
M_inv = M_inv.to(Device.DEFAULT).realize()
|
||||
return warp_perspective_tinygrad(input_frame[:cam_h*stride], M_inv,
|
||||
(dm_w, dm_h), (cam_h, cam_w), stride_pad).reshape(-1, dm_h * dm_w)
|
||||
return warp_dm
|
||||
|
||||
|
||||
def compile_dm_warp(nv12: NV12Frame, dm_w, dm_h, pkl_path):
|
||||
print(f"Compiling DM warp for {nv12.width}x{nv12.height} -> {dm_w}x{dm_h}...")
|
||||
|
||||
warp_dm_jit = TinyJit(make_warp_dm(nv12, dm_w, dm_h), prune=True)
|
||||
|
||||
for i in range(10):
|
||||
frame = Tensor.randint(nv12.size, low=0, high=256, dtype='uint8').realize()
|
||||
M_inv = Tensor(Tensor.randn(3, 3).mul(8).realize().numpy(), device='NPY')
|
||||
Device.default.synchronize()
|
||||
st = time.perf_counter()
|
||||
warp_dm_jit(frame, M_inv).realize()
|
||||
mt = time.perf_counter()
|
||||
Device.default.synchronize()
|
||||
et = time.perf_counter()
|
||||
print(f" [{i+1}/10] enqueue {(mt-st)*1e3:6.2f} ms -- total {(et-st)*1e3:6.2f} ms")
|
||||
|
||||
with open(pkl_path, "wb") as f:
|
||||
pickle.dump(warp_dm_jit, f)
|
||||
print(f" Saved to {pkl_path}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
p = argparse.ArgumentParser()
|
||||
p.add_argument('--nv12', type=_parse_nv12, required=True,
|
||||
help=f'NV12 frame layout: {",".join(NV12Frame._fields)}')
|
||||
p.add_argument('--warp-to', type=_parse_size, required=True, help='DM input WxH')
|
||||
p.add_argument('--output', required=True)
|
||||
args = p.parse_args()
|
||||
|
||||
dm_w, dm_h = args.warp_to
|
||||
compile_dm_warp(args.nv12, dm_w, dm_h, args.output)
|
||||
253
selfdrive/modeld/compile_modeld.py
Executable file
253
selfdrive/modeld/compile_modeld.py
Executable file
@@ -0,0 +1,253 @@
|
||||
#!/usr/bin/env python3
|
||||
import argparse
|
||||
import pickle
|
||||
import time
|
||||
from functools import partial
|
||||
from collections import namedtuple
|
||||
|
||||
import numpy as np
|
||||
from tinygrad.tensor import Tensor
|
||||
from tinygrad.helpers import Context
|
||||
from tinygrad.device import Device
|
||||
from tinygrad.engine.jit import TinyJit
|
||||
from tinygrad.nn.onnx import OnnxRunner
|
||||
|
||||
# https://github.com/tinygrad/tinygrad/issues/15682
|
||||
from tinygrad.uop.ops import UOp, Ops
|
||||
_orig = UOp.__reduce__
|
||||
UOp.__reduce__ = lambda self: (UOp.unique, ()) if self.op is Ops.UNIQUE else _orig(self)
|
||||
|
||||
|
||||
NV12Frame = namedtuple("NV12Frame", ['width', 'height', 'stride', 'y_height', 'uv_height', 'size'])
|
||||
|
||||
UV_SCALE_MATRIX = np.array([[0.5, 0, 0], [0, 0.5, 0], [0, 0, 1]], dtype=np.float32)
|
||||
UV_SCALE_MATRIX_INV = np.linalg.inv(UV_SCALE_MATRIX)
|
||||
|
||||
|
||||
def warp_perspective_tinygrad(src_flat, M_inv, dst_shape, src_shape, stride_pad):
|
||||
w_dst, h_dst = dst_shape
|
||||
h_src, w_src = src_shape
|
||||
|
||||
x = Tensor.arange(w_dst).reshape(1, w_dst).expand(h_dst, w_dst).reshape(-1)
|
||||
y = Tensor.arange(h_dst).reshape(h_dst, 1).expand(h_dst, w_dst).reshape(-1)
|
||||
|
||||
# inline 3x3 matmul as elementwise to avoid reduce op (enables fusion with gather)
|
||||
src_x = M_inv[0, 0] * x + M_inv[0, 1] * y + M_inv[0, 2]
|
||||
src_y = M_inv[1, 0] * x + M_inv[1, 1] * y + M_inv[1, 2]
|
||||
src_w = M_inv[2, 0] * x + M_inv[2, 1] * y + M_inv[2, 2]
|
||||
|
||||
src_x = src_x / src_w
|
||||
src_y = src_y / src_w
|
||||
|
||||
x_nn_clipped = Tensor.round(src_x).clip(0, w_src - 1).cast('int')
|
||||
y_nn_clipped = Tensor.round(src_y).clip(0, h_src - 1).cast('int')
|
||||
idx = y_nn_clipped * (w_src + stride_pad) + x_nn_clipped
|
||||
|
||||
return src_flat[idx]
|
||||
|
||||
|
||||
def frames_to_tensor(frames):
|
||||
H = (frames.shape[0] * 2) // 3
|
||||
W = frames.shape[1]
|
||||
in_img1 = Tensor.cat(frames[0:H:2, 0::2],
|
||||
frames[1:H:2, 0::2],
|
||||
frames[0:H:2, 1::2],
|
||||
frames[1:H:2, 1::2],
|
||||
frames[H:H+H//4].reshape((H//2, W//2)),
|
||||
frames[H+H//4:H+H//2].reshape((H//2, W//2)), dim=0).reshape((6, H//2, W//2))
|
||||
return in_img1
|
||||
|
||||
|
||||
def make_frame_prepare(nv12: NV12Frame, model_w, model_h):
|
||||
cam_w, cam_h, stride, y_height, uv_height, _ = nv12
|
||||
uv_offset = stride * y_height
|
||||
stride_pad = stride - cam_w
|
||||
|
||||
def frame_prepare_tinygrad(input_frame, M_inv):
|
||||
# UV_SCALE @ M_inv @ UV_SCALE_INV simplifies to elementwise scaling
|
||||
M_inv_uv = M_inv * Tensor([[1.0, 1.0, 0.5], [1.0, 1.0, 0.5], [2.0, 2.0, 1.0]])
|
||||
# deinterleave NV12 UV plane (UVUV... -> separate U, V)
|
||||
uv = input_frame[uv_offset:uv_offset + uv_height * stride].reshape(uv_height, stride)
|
||||
with Context(SPLIT_REDUCEOP=0):
|
||||
y = warp_perspective_tinygrad(input_frame[:cam_h*stride],
|
||||
M_inv, (model_w, model_h),
|
||||
(cam_h, cam_w), stride_pad).realize()
|
||||
u = warp_perspective_tinygrad(uv[:cam_h//2, :cam_w:2].flatten(),
|
||||
M_inv_uv, (model_w//2, model_h//2),
|
||||
(cam_h//2, cam_w//2), 0).realize()
|
||||
v = warp_perspective_tinygrad(uv[:cam_h//2, 1:cam_w:2].flatten(),
|
||||
M_inv_uv, (model_w//2, model_h//2),
|
||||
(cam_h//2, cam_w//2), 0).realize()
|
||||
yuv = y.cat(u).cat(v).reshape((model_h * 3 // 2, model_w))
|
||||
tensor = frames_to_tensor(yuv)
|
||||
return tensor
|
||||
return frame_prepare_tinygrad
|
||||
|
||||
|
||||
def make_input_queues(vision_input_shapes, policy_input_shapes, frame_skip):
|
||||
img = vision_input_shapes['img'] # (1, 12, 128, 256)
|
||||
n_frames = img[1] // 6
|
||||
img_buf_shape = (frame_skip * (n_frames - 1) + 1, 6, img[2], img[3])
|
||||
|
||||
fb = policy_input_shapes['features_buffer'] # (1, 25, 512)
|
||||
dp = policy_input_shapes['desire_pulse'] # (1, 25, 8)
|
||||
tc = policy_input_shapes['traffic_convention'] # (1, 2)
|
||||
|
||||
npy = {
|
||||
'desire': np.zeros(dp[2], dtype=np.float32),
|
||||
'traffic_convention': np.zeros(tc, dtype=np.float32),
|
||||
'tfm': np.zeros((3, 3), dtype=np.float32),
|
||||
'big_tfm': np.zeros((3, 3), dtype=np.float32),
|
||||
}
|
||||
input_queues = {
|
||||
'img_q': Tensor.zeros(img_buf_shape, dtype='uint8').contiguous().realize(),
|
||||
'big_img_q': Tensor.zeros(img_buf_shape, dtype='uint8').contiguous().realize(),
|
||||
'feat_q': Tensor.zeros(frame_skip * (fb[1] - 1) + 1, fb[0], fb[2]).contiguous().realize(),
|
||||
'desire_q': Tensor.zeros(frame_skip * dp[1], dp[0], dp[2]).contiguous().realize(),
|
||||
**{k: Tensor(v, device='NPY').realize() for k, v in npy.items()},
|
||||
}
|
||||
return input_queues, npy
|
||||
|
||||
|
||||
def shift_and_sample(buf, new_val, sample_fn):
|
||||
buf.assign(buf[1:].cat(new_val, dim=0).contiguous())
|
||||
return sample_fn(buf)
|
||||
|
||||
|
||||
def sample_skip(buf, frame_skip):
|
||||
return buf[::frame_skip].contiguous().flatten(0, 1).unsqueeze(0)
|
||||
|
||||
|
||||
def sample_desire(buf, frame_skip):
|
||||
return buf.reshape(-1, frame_skip, *buf.shape[1:]).max(1).flatten(0, 1).unsqueeze(0)
|
||||
|
||||
|
||||
def make_run_policy(vision_runner, policy_runner, nv12: NV12Frame, model_w, model_h,
|
||||
vision_features_slice, frame_skip, prepare_only=False):
|
||||
frame_prepare = make_frame_prepare(nv12, model_w, model_h)
|
||||
sample_skip_fn = partial(sample_skip, frame_skip=frame_skip)
|
||||
sample_desire_fn = partial(sample_desire, frame_skip=frame_skip)
|
||||
|
||||
def run_policy(img_q, big_img_q, feat_q, desire_q, desire, traffic_convention, tfm, big_tfm, frame, big_frame):
|
||||
tfm = tfm.to(Device.DEFAULT)
|
||||
big_tfm = big_tfm.to(Device.DEFAULT)
|
||||
desire = desire.to(Device.DEFAULT)
|
||||
traffic_convention = traffic_convention.to(Device.DEFAULT)
|
||||
Tensor.realize(tfm, big_tfm, desire, traffic_convention)
|
||||
|
||||
img = shift_and_sample(img_q, frame_prepare(frame, tfm).unsqueeze(0), sample_skip_fn)
|
||||
big_img = shift_and_sample(big_img_q, frame_prepare(big_frame, big_tfm).unsqueeze(0), sample_skip_fn)
|
||||
|
||||
if prepare_only:
|
||||
return img, big_img
|
||||
|
||||
vision_out = next(iter(vision_runner({'img': img, 'big_img': big_img}).values())).cast('float32')
|
||||
|
||||
new_feat = vision_out[:, vision_features_slice].reshape(1, -1).unsqueeze(0)
|
||||
feat_buf = shift_and_sample(feat_q, new_feat, sample_skip_fn)
|
||||
desire_buf = shift_and_sample(desire_q, desire.reshape(1, 1, -1), sample_desire_fn)
|
||||
|
||||
inputs = {'features_buffer': feat_buf, 'desire_pulse': desire_buf, 'traffic_convention': traffic_convention}
|
||||
policy_out = next(iter(policy_runner(inputs).values())).cast('float32')
|
||||
|
||||
return vision_out, policy_out
|
||||
return run_policy
|
||||
|
||||
|
||||
def compile_modeld(nv12: NV12Frame, model_w, model_h, prepare_only, frame_skip,
|
||||
vision_onnx, policy_onnx, pkl_path):
|
||||
from get_model_metadata import metadata_path_for
|
||||
|
||||
print(f"Compiling combined policy JIT for {nv12.width}x{nv12.height} (prepare_only={prepare_only})...")
|
||||
|
||||
vision_runner = OnnxRunner(vision_onnx)
|
||||
policy_runner = OnnxRunner(policy_onnx)
|
||||
|
||||
with open(metadata_path_for(vision_onnx), 'rb') as f:
|
||||
vision_metadata = pickle.load(f)
|
||||
vision_features_slice = vision_metadata['output_slices']['hidden_state']
|
||||
vision_input_shapes = vision_metadata['input_shapes']
|
||||
with open(metadata_path_for(policy_onnx), 'rb') as f:
|
||||
policy_input_shapes = pickle.load(f)['input_shapes']
|
||||
|
||||
_run = make_run_policy(vision_runner, policy_runner, nv12, model_w, model_h,
|
||||
vision_features_slice, frame_skip, prepare_only)
|
||||
run_policy_jit = TinyJit(_run, prune=True)
|
||||
|
||||
N_RUNS = 3
|
||||
SEED = 42
|
||||
|
||||
def random_inputs_run_fn(fn, seed, test_val=None, test_buffers=None, expect_match=True):
|
||||
input_queues, npy = make_input_queues(vision_input_shapes, policy_input_shapes, frame_skip)
|
||||
np.random.seed(seed)
|
||||
Tensor.manual_seed(seed)
|
||||
|
||||
for i in range(N_RUNS):
|
||||
frame = Tensor.randint(nv12.size, low=0, high=256, dtype='uint8').realize()
|
||||
big_frame = Tensor.randint(nv12.size, low=0, high=256, dtype='uint8').realize()
|
||||
for v in npy.values():
|
||||
v[:] = np.random.randn(*v.shape).astype(v.dtype)
|
||||
Device.default.synchronize()
|
||||
st = time.perf_counter()
|
||||
outs = fn(**input_queues, frame=frame, big_frame=big_frame)
|
||||
mt = time.perf_counter()
|
||||
for o in outs:
|
||||
# .realize() not needed once jitted, but needed for unjitted fn
|
||||
o.realize()
|
||||
Device.default.synchronize()
|
||||
et = time.perf_counter()
|
||||
print(f" [{i+1}/{N_RUNS}] enqueue {(mt-st)*1e3:6.2f} ms -- total {(et-st)*1e3:6.2f} ms")
|
||||
|
||||
val = [np.copy(v.numpy()) for v in outs]
|
||||
buffers = [np.copy(v.numpy().copy()) for v in input_queues.values()]
|
||||
|
||||
if test_val is not None:
|
||||
match = all(np.array_equal(a, b) for a, b in zip(val, test_val, strict=True))
|
||||
assert match == expect_match, f"outputs {'differ from' if expect_match else 'match'} baseline (seed={seed})"
|
||||
if test_buffers is not None:
|
||||
match = all(np.array_equal(a, b) for a, b in zip(buffers, test_buffers, strict=True))
|
||||
assert match == expect_match, f"buffers {'differ from' if expect_match else 'match'} baseline (seed={seed})"
|
||||
return fn, val, buffers
|
||||
|
||||
print('run unjitted')
|
||||
_, test_val, test_buffers = random_inputs_run_fn(_run, seed=SEED)
|
||||
print('capture + replay')
|
||||
run_policy_jit, _, _ = random_inputs_run_fn(run_policy_jit, SEED, test_val, test_buffers)
|
||||
|
||||
print('pickle round trip')
|
||||
with open(pkl_path, "wb") as f:
|
||||
pickle.dump(run_policy_jit, f)
|
||||
print(f" Saved to {pkl_path}")
|
||||
with open(pkl_path, "rb") as f:
|
||||
run_policy_jit = pickle.load(f)
|
||||
random_inputs_run_fn(run_policy_jit, SEED, test_val, test_buffers, expect_match=True)
|
||||
random_inputs_run_fn(run_policy_jit, SEED+1, test_val, test_buffers, expect_match=False)
|
||||
|
||||
|
||||
def _parse_size(s):
|
||||
w, h = s.lower().split('x')
|
||||
return int(w), int(h)
|
||||
|
||||
|
||||
def _parse_nv12(s):
|
||||
parts = s.split(',')
|
||||
assert len(parts) == len(NV12Frame._fields), \
|
||||
f"--nv12 expects {','.join(NV12Frame._fields)} (got {s!r})"
|
||||
return NV12Frame(*(int(x) for x in parts))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
p = argparse.ArgumentParser()
|
||||
p.add_argument('--model-size', type=_parse_size, required=True, help='model input WxH')
|
||||
p.add_argument('--nv12', type=_parse_nv12, required=True,
|
||||
help=f'NV12 frame layout: {",".join(NV12Frame._fields)}')
|
||||
p.add_argument('--vision-onnx', required=True)
|
||||
p.add_argument('--policy-onnx', required=True)
|
||||
p.add_argument('--output', required=True)
|
||||
p.add_argument('--prepare-only', action='store_true')
|
||||
p.add_argument('--frame-skip', type=int, required=True)
|
||||
args = p.parse_args()
|
||||
|
||||
model_w, model_h = args.model_size
|
||||
compile_modeld(args.nv12, model_w, model_h, args.prepare_only, args.frame_skip,
|
||||
args.vision_onnx, args.policy_onnx, args.output)
|
||||
@@ -1,201 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
import time
|
||||
import pickle
|
||||
import numpy as np
|
||||
from pathlib import Path
|
||||
from tinygrad.tensor import Tensor
|
||||
from tinygrad.helpers import Context
|
||||
from tinygrad.device import Device
|
||||
from tinygrad.engine.jit import TinyJit
|
||||
|
||||
from openpilot.system.camerad.cameras.nv12_info import get_nv12_info
|
||||
from openpilot.common.transformations.model import MEDMODEL_INPUT_SIZE, DM_INPUT_SIZE
|
||||
from openpilot.common.transformations.camera import _ar_ox_fisheye, _os_fisheye
|
||||
|
||||
MODELS_DIR = Path(__file__).parent / 'models'
|
||||
|
||||
CAMERA_CONFIGS = [
|
||||
(_ar_ox_fisheye.width, _ar_ox_fisheye.height), # tici: 1928x1208
|
||||
(_os_fisheye.width, _os_fisheye.height), # mici: 1344x760
|
||||
]
|
||||
|
||||
UV_SCALE_MATRIX = np.array([[0.5, 0, 0], [0, 0.5, 0], [0, 0, 1]], dtype=np.float32)
|
||||
UV_SCALE_MATRIX_INV = np.linalg.inv(UV_SCALE_MATRIX)
|
||||
|
||||
IMG_BUFFER_SHAPE = (30, MEDMODEL_INPUT_SIZE[1] // 2, MEDMODEL_INPUT_SIZE[0] // 2)
|
||||
|
||||
|
||||
def warp_pkl_path(w, h):
|
||||
return MODELS_DIR / f'warp_{w}x{h}_tinygrad.pkl'
|
||||
|
||||
|
||||
def dm_warp_pkl_path(w, h):
|
||||
return MODELS_DIR / f'dm_warp_{w}x{h}_tinygrad.pkl'
|
||||
|
||||
|
||||
def warp_perspective_tinygrad(src_flat, M_inv, dst_shape, src_shape, stride_pad):
|
||||
w_dst, h_dst = dst_shape
|
||||
h_src, w_src = src_shape
|
||||
|
||||
x = Tensor.arange(w_dst).reshape(1, w_dst).expand(h_dst, w_dst).reshape(-1)
|
||||
y = Tensor.arange(h_dst).reshape(h_dst, 1).expand(h_dst, w_dst).reshape(-1)
|
||||
|
||||
# inline 3x3 matmul as elementwise to avoid reduce op (enables fusion with gather)
|
||||
src_x = M_inv[0, 0] * x + M_inv[0, 1] * y + M_inv[0, 2]
|
||||
src_y = M_inv[1, 0] * x + M_inv[1, 1] * y + M_inv[1, 2]
|
||||
src_w = M_inv[2, 0] * x + M_inv[2, 1] * y + M_inv[2, 2]
|
||||
|
||||
src_x = src_x / src_w
|
||||
src_y = src_y / src_w
|
||||
|
||||
x_nn_clipped = Tensor.round(src_x).clip(0, w_src - 1).cast('int')
|
||||
y_nn_clipped = Tensor.round(src_y).clip(0, h_src - 1).cast('int')
|
||||
idx = y_nn_clipped * (w_src + stride_pad) + x_nn_clipped
|
||||
|
||||
return src_flat[idx]
|
||||
|
||||
|
||||
def frames_to_tensor(frames, model_w, model_h):
|
||||
H = (frames.shape[0] * 2) // 3
|
||||
W = frames.shape[1]
|
||||
in_img1 = Tensor.cat(frames[0:H:2, 0::2],
|
||||
frames[1:H:2, 0::2],
|
||||
frames[0:H:2, 1::2],
|
||||
frames[1:H:2, 1::2],
|
||||
frames[H:H+H//4].reshape((H//2, W//2)),
|
||||
frames[H+H//4:H+H//2].reshape((H//2, W//2)), dim=0).reshape((6, H//2, W//2))
|
||||
return in_img1
|
||||
|
||||
|
||||
def make_frame_prepare(cam_w, cam_h, model_w, model_h):
|
||||
stride, y_height, uv_height, _ = get_nv12_info(cam_w, cam_h)
|
||||
uv_offset = stride * y_height
|
||||
stride_pad = stride - cam_w
|
||||
|
||||
def frame_prepare_tinygrad(input_frame, M_inv):
|
||||
# UV_SCALE @ M_inv @ UV_SCALE_INV simplifies to elementwise scaling
|
||||
M_inv_uv = M_inv * Tensor([[1.0, 1.0, 0.5], [1.0, 1.0, 0.5], [2.0, 2.0, 1.0]])
|
||||
# deinterleave NV12 UV plane (UVUV... -> separate U, V)
|
||||
uv = input_frame[uv_offset:uv_offset + uv_height * stride].reshape(uv_height, stride)
|
||||
with Context(SPLIT_REDUCEOP=0):
|
||||
y = warp_perspective_tinygrad(input_frame[:cam_h*stride],
|
||||
M_inv, (model_w, model_h),
|
||||
(cam_h, cam_w), stride_pad).realize()
|
||||
u = warp_perspective_tinygrad(uv[:cam_h//2, :cam_w:2].flatten(),
|
||||
M_inv_uv, (model_w//2, model_h//2),
|
||||
(cam_h//2, cam_w//2), 0).realize()
|
||||
v = warp_perspective_tinygrad(uv[:cam_h//2, 1:cam_w:2].flatten(),
|
||||
M_inv_uv, (model_w//2, model_h//2),
|
||||
(cam_h//2, cam_w//2), 0).realize()
|
||||
yuv = y.cat(u).cat(v).reshape((model_h * 3 // 2, model_w))
|
||||
tensor = frames_to_tensor(yuv, model_w, model_h)
|
||||
return tensor
|
||||
return frame_prepare_tinygrad
|
||||
|
||||
|
||||
def make_update_img_input(frame_prepare, model_w, model_h):
|
||||
def update_img_input_tinygrad(tensor, frame, M_inv):
|
||||
M_inv = M_inv.to(Device.DEFAULT)
|
||||
new_img = frame_prepare(frame, M_inv)
|
||||
tensor.assign(tensor[6:].cat(new_img, dim=0).contiguous())
|
||||
return Tensor.cat(tensor[:6], tensor[-6:], dim=0).contiguous().reshape(1, 12, model_h//2, model_w//2)
|
||||
return update_img_input_tinygrad
|
||||
|
||||
|
||||
def make_update_both_imgs(frame_prepare, model_w, model_h):
|
||||
update_img = make_update_img_input(frame_prepare, model_w, model_h)
|
||||
|
||||
def update_both_imgs_tinygrad(calib_img_buffer, new_img, M_inv,
|
||||
calib_big_img_buffer, new_big_img, M_inv_big):
|
||||
calib_img_pair = update_img(calib_img_buffer, new_img, M_inv)
|
||||
calib_big_img_pair = update_img(calib_big_img_buffer, new_big_img, M_inv_big)
|
||||
return calib_img_pair, calib_big_img_pair
|
||||
return update_both_imgs_tinygrad
|
||||
|
||||
|
||||
def make_warp_dm(cam_w, cam_h, dm_w, dm_h):
|
||||
stride, y_height, _, _ = get_nv12_info(cam_w, cam_h)
|
||||
stride_pad = stride - cam_w
|
||||
|
||||
def warp_dm(input_frame, M_inv):
|
||||
M_inv = M_inv.to(Device.DEFAULT)
|
||||
result = warp_perspective_tinygrad(input_frame[:cam_h*stride], M_inv, (dm_w, dm_h), (cam_h, cam_w), stride_pad).reshape(-1, dm_h * dm_w)
|
||||
return result
|
||||
return warp_dm
|
||||
|
||||
|
||||
def compile_modeld_warp(cam_w, cam_h):
|
||||
model_w, model_h = MEDMODEL_INPUT_SIZE
|
||||
_, _, _, yuv_size = get_nv12_info(cam_w, cam_h)
|
||||
|
||||
print(f"Compiling modeld warp for {cam_w}x{cam_h}...")
|
||||
|
||||
frame_prepare = make_frame_prepare(cam_w, cam_h, model_w, model_h)
|
||||
update_both_imgs = make_update_both_imgs(frame_prepare, model_w, model_h)
|
||||
update_img_jit = TinyJit(update_both_imgs, prune=True)
|
||||
|
||||
full_buffer = Tensor.zeros(IMG_BUFFER_SHAPE, dtype='uint8').contiguous().realize()
|
||||
big_full_buffer = Tensor.zeros(IMG_BUFFER_SHAPE, dtype='uint8').contiguous().realize()
|
||||
new_frame_np = np.random.randint(0, 256, yuv_size, dtype=np.uint8)
|
||||
new_big_frame_np = np.random.randint(0, 256, yuv_size, dtype=np.uint8)
|
||||
for i in range(10):
|
||||
img_inputs = [full_buffer,
|
||||
Tensor.from_blob(new_frame_np.ctypes.data, (yuv_size,), dtype='uint8').realize(),
|
||||
Tensor(Tensor.randn(3, 3).mul(8).realize().numpy(), device='NPY')]
|
||||
big_img_inputs = [big_full_buffer,
|
||||
Tensor.from_blob(new_big_frame_np.ctypes.data, (yuv_size,), dtype='uint8').realize(),
|
||||
Tensor(Tensor.randn(3, 3).mul(8).realize().numpy(), device='NPY')]
|
||||
inputs = img_inputs + big_img_inputs
|
||||
Device.default.synchronize()
|
||||
|
||||
st = time.perf_counter()
|
||||
_ = update_img_jit(*inputs)
|
||||
mt = time.perf_counter()
|
||||
Device.default.synchronize()
|
||||
et = time.perf_counter()
|
||||
print(f" [{i+1}/10] enqueue {(mt-st)*1e3:6.2f} ms -- total {(et-st)*1e3:6.2f} ms")
|
||||
|
||||
pkl_path = warp_pkl_path(cam_w, cam_h)
|
||||
with open(pkl_path, "wb") as f:
|
||||
pickle.dump(update_img_jit, f)
|
||||
print(f" Saved to {pkl_path}")
|
||||
|
||||
jit = pickle.load(open(pkl_path, "rb"))
|
||||
jit(*inputs)
|
||||
|
||||
|
||||
def compile_dm_warp(cam_w, cam_h):
|
||||
dm_w, dm_h = DM_INPUT_SIZE
|
||||
_, _, _, yuv_size = get_nv12_info(cam_w, cam_h)
|
||||
|
||||
print(f"Compiling DM warp for {cam_w}x{cam_h}...")
|
||||
|
||||
warp_dm = make_warp_dm(cam_w, cam_h, dm_w, dm_h)
|
||||
warp_dm_jit = TinyJit(warp_dm, prune=True)
|
||||
|
||||
new_frame_np = np.random.randint(0, 256, yuv_size, dtype=np.uint8)
|
||||
for i in range(10):
|
||||
inputs = [Tensor.from_blob(new_frame_np.ctypes.data, (yuv_size,), dtype='uint8').realize(),
|
||||
Tensor(Tensor.randn(3, 3).mul(8).realize().numpy(), device='NPY')]
|
||||
Device.default.synchronize()
|
||||
st = time.perf_counter()
|
||||
warp_dm_jit(*inputs)
|
||||
mt = time.perf_counter()
|
||||
Device.default.synchronize()
|
||||
et = time.perf_counter()
|
||||
print(f" [{i+1}/10] enqueue {(mt-st)*1e3:6.2f} ms -- total {(et-st)*1e3:6.2f} ms")
|
||||
|
||||
pkl_path = dm_warp_pkl_path(cam_w, cam_h)
|
||||
with open(pkl_path, "wb") as f:
|
||||
pickle.dump(warp_dm_jit, f)
|
||||
print(f" Saved to {pkl_path}")
|
||||
|
||||
|
||||
def run_and_save_pickle():
|
||||
for cam_w, cam_h in CAMERA_CONFIGS:
|
||||
compile_modeld_warp(cam_w, cam_h)
|
||||
compile_dm_warp(cam_w, cam_h)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
run_and_save_pickle()
|
||||
@@ -7,6 +7,10 @@ from typing import Any
|
||||
|
||||
from tinygrad.nn.onnx import OnnxPBParser
|
||||
|
||||
def metadata_path_for(onnx_path) -> pathlib.Path:
|
||||
p = pathlib.Path(onnx_path)
|
||||
return p.parent / (p.stem + '_metadata.pkl')
|
||||
|
||||
|
||||
class MetadataOnnxPBParser(OnnxPBParser):
|
||||
def _parse_ModelProto(self) -> dict:
|
||||
@@ -48,7 +52,7 @@ if __name__ == "__main__":
|
||||
'output_shapes': dict(get_name_and_shape(x) for x in model["graph"]["output"]),
|
||||
}
|
||||
|
||||
metadata_path = model_path.parent / (model_path.stem + '_metadata.pkl')
|
||||
metadata_path = metadata_path_for(model_path)
|
||||
with open(metadata_path, 'wb') as f:
|
||||
pickle.dump(metadata, f)
|
||||
|
||||
|
||||
31
selfdrive/modeld/helpers.py
Normal file
31
selfdrive/modeld/helpers.py
Normal file
@@ -0,0 +1,31 @@
|
||||
import json
|
||||
import os
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
|
||||
from openpilot.system.camerad.cameras.nv12_info import get_nv12_info
|
||||
|
||||
MODELS_DIR = Path(__file__).resolve().parent / 'models'
|
||||
COMPILED_FLAGS_PATH = MODELS_DIR / 'tg_compiled_flags.json'
|
||||
|
||||
|
||||
def set_tinygrad_backend_from_compiled_flags() -> None:
|
||||
if os.path.isfile(COMPILED_FLAGS_PATH):
|
||||
with open(COMPILED_FLAGS_PATH) as f:
|
||||
os.environ['DEV'] = str(json.load(f)['DEV'])
|
||||
|
||||
|
||||
@dataclass
|
||||
class CompileConfig:
|
||||
cam_w: int
|
||||
cam_h: int
|
||||
prepare_only: bool
|
||||
prefix: str
|
||||
|
||||
@property
|
||||
def pkl_path(self):
|
||||
return str(MODELS_DIR / f'{self.prefix}{"warp_" if self.prepare_only else ""}{self.cam_w}x{self.cam_h}_tinygrad.pkl')
|
||||
|
||||
@property
|
||||
def nv12(self):
|
||||
return (self.cam_w, self.cam_h, *get_nv12_info(self.cam_w, self.cam_h))
|
||||
@@ -1,12 +1,8 @@
|
||||
#!/usr/bin/env python3
|
||||
import os
|
||||
from openpilot.selfdrive.modeld.tinygrad_helpers import MODELS_DIR, set_tinygrad_backend_from_compiled_flags
|
||||
from openpilot.selfdrive.modeld.helpers import MODELS_DIR, CompileConfig, set_tinygrad_backend_from_compiled_flags
|
||||
set_tinygrad_backend_from_compiled_flags()
|
||||
|
||||
# FIXME-SP: remove once we bump tg
|
||||
from openpilot.system.hardware import TICI
|
||||
os.environ['DEV'] = 'QCOM' if TICI else 'CPU'
|
||||
|
||||
USBGPU = "USBGPU" in os.environ
|
||||
if USBGPU:
|
||||
os.environ['DEV'] = 'AMD'
|
||||
@@ -30,6 +26,7 @@ from openpilot.common.transformations.model import get_warp_matrix
|
||||
from openpilot.selfdrive.controls.lib.desire_helper import DesireHelper
|
||||
from openpilot.selfdrive.controls.lib.drive_helpers import get_accel_from_plan, smooth_value, get_curvature_from_plan
|
||||
from openpilot.selfdrive.modeld.parse_model_outputs import Parser
|
||||
from openpilot.selfdrive.modeld.compile_modeld import make_input_queues
|
||||
from openpilot.selfdrive.modeld.fill_model_msg import fill_model_msg, fill_pose_msg, PublishState
|
||||
from openpilot.common.file_chunker import read_file_chunked
|
||||
from openpilot.selfdrive.modeld.constants import ModelConstants, Plan
|
||||
@@ -41,17 +38,13 @@ from openpilot.sunnypilot.modeld_v2.modeld_base import ModelStateBase
|
||||
PROCESS_NAME = "selfdrive.modeld.modeld"
|
||||
SEND_RAW_PRED = os.getenv('SEND_RAW_PRED')
|
||||
|
||||
VISION_PKL_PATH = MODELS_DIR / 'driving_vision_tinygrad.pkl'
|
||||
VISION_METADATA_PATH = MODELS_DIR / 'driving_vision_metadata.pkl'
|
||||
POLICY_PKL_PATH = MODELS_DIR / 'driving_policy_tinygrad.pkl'
|
||||
POLICY_METADATA_PATH = MODELS_DIR / 'driving_policy_metadata.pkl'
|
||||
|
||||
LAT_SMOOTH_SECONDS = 0.0
|
||||
LONG_SMOOTH_SECONDS = 0.3
|
||||
MIN_LAT_CONTROL_SPEED = 0.3
|
||||
|
||||
IMG_QUEUE_SHAPE = (6*(ModelConstants.MODEL_RUN_FREQ//ModelConstants.MODEL_CONTEXT_FREQ + 1), 128, 256)
|
||||
assert IMG_QUEUE_SHAPE[0] == 30
|
||||
|
||||
|
||||
def get_action_from_model(model_output: dict[str, np.ndarray], prev_action: log.ModelDataV2.Action,
|
||||
@@ -86,108 +79,39 @@ class FrameMeta:
|
||||
if vipc is not None:
|
||||
self.frame_id, self.timestamp_sof, self.timestamp_eof = vipc.frame_id, vipc.timestamp_sof, vipc.timestamp_eof
|
||||
|
||||
class InputQueues:
|
||||
def __init__ (self, model_fps, env_fps, n_frames_input):
|
||||
assert env_fps % model_fps == 0
|
||||
assert env_fps >= model_fps
|
||||
self.model_fps = model_fps
|
||||
self.env_fps = env_fps
|
||||
self.n_frames_input = n_frames_input
|
||||
|
||||
self.dtypes = {}
|
||||
self.shapes = {}
|
||||
self.q = {}
|
||||
|
||||
def update_dtypes_and_shapes(self, input_dtypes, input_shapes) -> None:
|
||||
self.dtypes.update(input_dtypes)
|
||||
if self.env_fps == self.model_fps:
|
||||
self.shapes.update(input_shapes)
|
||||
else:
|
||||
for k in input_shapes:
|
||||
shape = list(input_shapes[k])
|
||||
if 'img' in k:
|
||||
n_channels = shape[1] // self.n_frames_input
|
||||
shape[1] = (self.env_fps // self.model_fps + (self.n_frames_input - 1)) * n_channels
|
||||
else:
|
||||
shape[1] = (self.env_fps // self.model_fps) * shape[1]
|
||||
self.shapes[k] = tuple(shape)
|
||||
|
||||
def reset(self) -> None:
|
||||
self.q = {k: np.zeros(self.shapes[k], dtype=self.dtypes[k]) for k in self.dtypes.keys()}
|
||||
|
||||
def enqueue(self, inputs:dict[str, np.ndarray]) -> None:
|
||||
for k in inputs.keys():
|
||||
if inputs[k].dtype != self.dtypes[k]:
|
||||
raise ValueError(f'supplied input <{k}({inputs[k].dtype})> has wrong dtype, expected {self.dtypes[k]}')
|
||||
input_shape = list(self.shapes[k])
|
||||
input_shape[1] = -1
|
||||
single_input = inputs[k].reshape(tuple(input_shape))
|
||||
sz = single_input.shape[1]
|
||||
self.q[k][:,:-sz] = self.q[k][:,sz:]
|
||||
self.q[k][:,-sz:] = single_input
|
||||
|
||||
def get(self, *names) -> dict[str, np.ndarray]:
|
||||
if self.env_fps == self.model_fps:
|
||||
return {k: self.q[k] for k in names}
|
||||
else:
|
||||
out = {}
|
||||
for k in names:
|
||||
shape = self.shapes[k]
|
||||
if 'img' in k:
|
||||
n_channels = shape[1] // (self.env_fps // self.model_fps + (self.n_frames_input - 1))
|
||||
out[k] = np.concatenate([self.q[k][:, s:s+n_channels] for s in np.linspace(0, shape[1] - n_channels, self.n_frames_input, dtype=int)], axis=1)
|
||||
elif 'pulse' in k:
|
||||
# any pulse within interval counts
|
||||
out[k] = self.q[k].reshape((shape[0], shape[1] * self.model_fps // self.env_fps, self.env_fps // self.model_fps, -1)).max(axis=2)
|
||||
else:
|
||||
idxs = np.arange(-1, -shape[1], -self.env_fps // self.model_fps)[::-1]
|
||||
out[k] = self.q[k][:, idxs]
|
||||
return out
|
||||
|
||||
class ModelState(ModelStateBase):
|
||||
inputs: dict[str, np.ndarray]
|
||||
output: np.ndarray
|
||||
prev_desire: np.ndarray # for tracking the rising edge of the pulse
|
||||
|
||||
def __init__(self):
|
||||
def __init__(self, cam_w: int, cam_h: int):
|
||||
ModelStateBase.__init__(self)
|
||||
self.LAT_SMOOTH_SECONDS = LAT_SMOOTH_SECONDS
|
||||
|
||||
with open(VISION_METADATA_PATH, 'rb') as f:
|
||||
vision_metadata = pickle.load(f)
|
||||
self.vision_input_shapes = vision_metadata['input_shapes']
|
||||
self.vision_input_names = list(self.vision_input_shapes.keys())
|
||||
self.vision_output_slices = vision_metadata['output_slices']
|
||||
vision_output_size = vision_metadata['output_shapes']['outputs'][1]
|
||||
|
||||
with open(POLICY_METADATA_PATH, 'rb') as f:
|
||||
policy_metadata = pickle.load(f)
|
||||
self.policy_input_shapes = policy_metadata['input_shapes']
|
||||
self.policy_output_slices = policy_metadata['output_slices']
|
||||
policy_output_size = policy_metadata['output_shapes']['outputs'][1]
|
||||
|
||||
self.prev_desire = np.zeros(ModelConstants.DESIRE_LEN, dtype=np.float32)
|
||||
|
||||
# policy inputs
|
||||
self.numpy_inputs = {k: np.zeros(self.policy_input_shapes[k], dtype=np.float32) for k in self.policy_input_shapes}
|
||||
self.full_input_queues = InputQueues(ModelConstants.MODEL_CONTEXT_FREQ, ModelConstants.MODEL_RUN_FREQ, ModelConstants.N_FRAMES)
|
||||
for k in ['desire_pulse', 'features_buffer']:
|
||||
self.full_input_queues.update_dtypes_and_shapes({k: self.numpy_inputs[k].dtype}, {k: self.numpy_inputs[k].shape})
|
||||
self.full_input_queues.reset()
|
||||
|
||||
self.img_queues = {'img': Tensor.zeros(IMG_QUEUE_SHAPE, dtype='uint8').contiguous().realize(),
|
||||
'big_img': Tensor.zeros(IMG_QUEUE_SHAPE, dtype='uint8').contiguous().realize()}
|
||||
self.frame_skip = ModelConstants.MODEL_RUN_FREQ // ModelConstants.MODEL_CONTEXT_FREQ
|
||||
self.input_queues, self.npy = make_input_queues(self.vision_input_shapes, self.policy_input_shapes, self.frame_skip)
|
||||
self.full_frames : dict[str, Tensor] = {}
|
||||
self._blob_cache : dict[int, Tensor] = {}
|
||||
self.transforms_np = {k: np.zeros((3,3), dtype=np.float32) for k in self.img_queues}
|
||||
self.transforms = {k: Tensor(v, device='NPY').realize() for k, v in self.transforms_np.items()}
|
||||
self.vision_output = np.zeros(vision_output_size, dtype=np.float32)
|
||||
self.policy_inputs = {k: Tensor(v, device='NPY').realize() for k,v in self.numpy_inputs.items()}
|
||||
self.policy_output = np.zeros(policy_output_size, dtype=np.float32)
|
||||
self.parser = Parser()
|
||||
self.frame_buf_params : dict[str, tuple[int, int, int, int]] = {}
|
||||
self.update_imgs = None
|
||||
self.vision_run = pickle.loads(read_file_chunked(str(VISION_PKL_PATH)))
|
||||
self.policy_run = pickle.loads(read_file_chunked(str(POLICY_PKL_PATH)))
|
||||
self.frame_buf_params = {k: get_nv12_info(cam_w, cam_h) for k in ('img', 'big_img')}
|
||||
self.run_policy = pickle.loads(read_file_chunked(CompileConfig(cam_w, cam_h, prefix='driving_', prepare_only=False).pkl_path))
|
||||
self.warp_enqueue = pickle.loads(read_file_chunked(CompileConfig(cam_w, cam_h, prefix='driving_', prepare_only=True).pkl_path))
|
||||
self.warp_enqueue(
|
||||
**self.input_queues,
|
||||
frame=Tensor.zeros(self.frame_buf_params['img'][3], dtype='uint8').contiguous().realize(),
|
||||
big_frame=Tensor.zeros(self.frame_buf_params['big_img'][3], dtype='uint8').contiguous().realize())
|
||||
|
||||
def slice_outputs(self, model_outputs: np.ndarray, output_slices: dict[str, slice]) -> dict[str, np.ndarray]:
|
||||
parsed_model_outputs = {k: model_outputs[np.newaxis, v] for k,v in output_slices.items()}
|
||||
@@ -195,18 +119,6 @@ class ModelState(ModelStateBase):
|
||||
|
||||
def run(self, bufs: dict[str, VisionBuf], transforms: dict[str, np.ndarray],
|
||||
inputs: dict[str, np.ndarray], prepare_only: bool) -> dict[str, np.ndarray] | None:
|
||||
# Model decides when action is completed, so desire input is just a pulse triggered on rising edge
|
||||
inputs['desire_pulse'][0] = 0
|
||||
new_desire = np.where(inputs['desire_pulse'] - self.prev_desire > .99, inputs['desire_pulse'], 0)
|
||||
self.prev_desire[:] = inputs['desire_pulse']
|
||||
if self.update_imgs is None:
|
||||
for key in bufs.keys():
|
||||
w, h = bufs[key].width, bufs[key].height
|
||||
self.frame_buf_params[key] = get_nv12_info(w, h)
|
||||
warp_path = MODELS_DIR / f'warp_{w}x{h}_tinygrad.pkl'
|
||||
with open(warp_path, "rb") as f:
|
||||
self.update_imgs = pickle.load(f)
|
||||
|
||||
for key in bufs.keys():
|
||||
ptr = bufs[key].data.ctypes.data
|
||||
yuv_size = self.frame_buf_params[key][3]
|
||||
@@ -215,30 +127,31 @@ class ModelState(ModelStateBase):
|
||||
if cache_key not in self._blob_cache:
|
||||
self._blob_cache[cache_key] = Tensor.from_blob(ptr, (yuv_size,), dtype='uint8')
|
||||
self.full_frames[key] = self._blob_cache[cache_key]
|
||||
for key in bufs.keys():
|
||||
self.transforms_np[key][:,:] = transforms[key][:,:]
|
||||
|
||||
out = self.update_imgs(self.img_queues['img'], self.full_frames['img'], self.transforms['img'],
|
||||
self.img_queues['big_img'], self.full_frames['big_img'], self.transforms['big_img'])
|
||||
vision_inputs = {'img': out[0], 'big_img': out[1]}
|
||||
# Model decides when action is completed, so desire input is just a pulse triggered on rising edge
|
||||
inputs['desire_pulse'][0] = 0
|
||||
self.npy['desire'][:] = np.where(inputs['desire_pulse'] - self.prev_desire > .99, inputs['desire_pulse'], 0)
|
||||
self.prev_desire[:] = inputs['desire_pulse']
|
||||
self.npy['traffic_convention'][:] = inputs['traffic_convention']
|
||||
self.npy['tfm'][:,:] = transforms['img'][:,:]
|
||||
self.npy['big_tfm'][:,:] = transforms['big_img'][:,:]
|
||||
|
||||
if prepare_only:
|
||||
self.warp_enqueue(**self.input_queues, frame=self.full_frames['img'], big_frame=self.full_frames['big_img'])
|
||||
return None
|
||||
|
||||
self.vision_output = self.vision_run(**vision_inputs).contiguous().realize().uop.base.buffer.numpy().flatten()
|
||||
vision_outputs_dict = self.parser.parse_vision_outputs(self.slice_outputs(self.vision_output, self.vision_output_slices))
|
||||
vision_output, policy_output = self.run_policy(
|
||||
**self.input_queues, frame=self.full_frames['img'], big_frame=self.full_frames['big_img']
|
||||
)
|
||||
|
||||
self.full_input_queues.enqueue({'features_buffer': vision_outputs_dict['hidden_state'], 'desire_pulse': new_desire})
|
||||
for k in ['desire_pulse', 'features_buffer']:
|
||||
self.numpy_inputs[k][:] = self.full_input_queues.get(k)[k]
|
||||
self.numpy_inputs['traffic_convention'][:] = inputs['traffic_convention']
|
||||
|
||||
self.policy_output = self.policy_run(**self.policy_inputs).contiguous().realize().uop.base.buffer.numpy().flatten()
|
||||
policy_outputs_dict = self.parser.parse_policy_outputs(self.slice_outputs(self.policy_output, self.policy_output_slices))
|
||||
vision_output = vision_output.numpy().flatten()
|
||||
policy_output = policy_output.numpy().flatten()
|
||||
vision_outputs_dict = self.parser.parse_vision_outputs(self.slice_outputs(vision_output, self.vision_output_slices))
|
||||
policy_outputs_dict = self.parser.parse_policy_outputs(self.slice_outputs(policy_output, self.policy_output_slices))
|
||||
combined_outputs_dict = {**vision_outputs_dict, **policy_outputs_dict}
|
||||
if SEND_RAW_PRED:
|
||||
combined_outputs_dict['raw_pred'] = np.concatenate([self.vision_output.copy(), self.policy_output.copy()])
|
||||
|
||||
if SEND_RAW_PRED:
|
||||
combined_outputs_dict['raw_pred'] = np.concatenate([vision_output.copy(), policy_output.copy()])
|
||||
return combined_outputs_dict
|
||||
|
||||
|
||||
@@ -250,11 +163,6 @@ def main(demo=False):
|
||||
# also need to move the aux USB interrupts for good timings
|
||||
config_realtime_process(7, 54)
|
||||
|
||||
st = time.monotonic()
|
||||
cloudlog.warning("loading model")
|
||||
model = ModelState()
|
||||
cloudlog.warning(f"models loaded in {time.monotonic() - st:.1f}s, modeld starting")
|
||||
|
||||
# visionipc clients
|
||||
while True:
|
||||
available_streams = VisionIpcClient.available_streams("camerad", block=False)
|
||||
@@ -278,6 +186,11 @@ def main(demo=False):
|
||||
if use_extra_client:
|
||||
cloudlog.warning(f"connected extra cam with buffer size: {vipc_client_extra.buffer_len} ({vipc_client_extra.width} x {vipc_client_extra.height})")
|
||||
|
||||
st = time.monotonic()
|
||||
cloudlog.warning("loading model")
|
||||
model = ModelState(vipc_client_main.width, vipc_client_main.height)
|
||||
cloudlog.warning(f"models loaded in {time.monotonic() - st:.1f}s, modeld starting")
|
||||
|
||||
# messaging
|
||||
pm = PubMaster(["modelV2", "drivingModelData", "cameraOdometry", "modelDataV2SP"])
|
||||
sm = SubMaster(["deviceState", "carState", "roadCameraState", "liveCalibration", "driverMonitoringState", "carControl", "liveDelay"])
|
||||
|
||||
@@ -1,12 +0,0 @@
|
||||
import json
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
MODELS_DIR = Path(__file__).parent / 'models'
|
||||
COMPILED_FLAGS_PATH = MODELS_DIR / 'tg_compiled_flags.json'
|
||||
|
||||
|
||||
def set_tinygrad_backend_from_compiled_flags() -> None:
|
||||
if os.path.isfile(COMPILED_FLAGS_PATH):
|
||||
with open(COMPILED_FLAGS_PATH) as f:
|
||||
os.environ['DEV'] = str(json.load(f)['DEV'])
|
||||
@@ -1,5 +1,6 @@
|
||||
import os
|
||||
import glob
|
||||
from tinygrad import Device
|
||||
|
||||
Import('env', 'arch')
|
||||
lenv = env.Clone()
|
||||
@@ -21,10 +22,19 @@ if PC:
|
||||
if outputs:
|
||||
lenv.Command(outputs, inputs, cmd)
|
||||
|
||||
tg_flags = {
|
||||
'larch64': 'DEV=QCOM FLOAT16=1 NOLOCALS=1 JIT_BATCH_SIZE=0',
|
||||
'Darwin': f'DEV=CPU THREADS=0 HOME={os.path.expanduser("~")}',
|
||||
}.get(arch, 'DEV=CPU CPU_LLVM=1 THREADS=0')
|
||||
available = set(Device.get_available_devices())
|
||||
if 'CUDA' in available:
|
||||
tg_backend = 'CUDA'
|
||||
tg_flags = f'DEV={tg_backend}'
|
||||
elif 'QCOM' in available:
|
||||
tg_backend = 'QCOM'
|
||||
tg_flags = f'DEV={tg_backend} FLOAT16=1 NOLOCALS=1 JIT_BATCH_SIZE=0 OPENPILOT_HACKS=1'
|
||||
else:
|
||||
tg_backend = 'CPU' if arch == 'Darwin' else 'CPU:LLVM'
|
||||
# THREADS=0 is need to prevent bug: https://github.com/tinygrad/tinygrad/issues/14689
|
||||
tg_flags = f'DEV={tg_backend} THREADS=0'
|
||||
|
||||
mac_brew_string = f'HOME={os.path.expanduser("~")}' if arch == 'Darwin' else ''
|
||||
|
||||
image_flag = {
|
||||
'larch64': 'IMAGE=2',
|
||||
@@ -38,7 +48,7 @@ def tg_compile(flags, model_name):
|
||||
return lenv.Command(
|
||||
out,
|
||||
[fn + ".onnx"] + tinygrad_files,
|
||||
f'{pythonpath_string} {flags} {image_flag} python3 {Dir("#tinygrad_repo").abspath}/examples/openpilot/compile3.py {fn}.onnx {out}'
|
||||
f'{pythonpath_string} {tg_flags} {mac_brew_string} {image_flag} python3 {Dir("#tinygrad_repo").abspath}/examples/openpilot/compile3.py {fn}.onnx {out}'
|
||||
)
|
||||
|
||||
# Compile models
|
||||
@@ -46,9 +56,9 @@ for model_name in ['supercombo', 'driving_vision', 'driving_off_policy', 'drivin
|
||||
if File(f"models/{model_name}.onnx").exists():
|
||||
tg_compile(tg_flags, model_name)
|
||||
|
||||
script_files = [File("warp.py"), File(Dir("#selfdrive/modeld").File("compile_warp.py").abspath)]
|
||||
script_files = [File("warp.py"), File(Dir("#selfdrive/modeld").File("compile_modeld.py").abspath)]
|
||||
pythonpath_string = 'PYTHONPATH="${PYTHONPATH}:' + env.Dir("#tinygrad_repo").abspath + ':' + env.Dir("#").abspath + '"'
|
||||
compile_warp_cmd = f'{pythonpath_string} {tg_flags} python3 -m sunnypilot.modeld_v2.warp'
|
||||
compile_warp_cmd = f'{pythonpath_string} {tg_flags} {mac_brew_string} {image_flag} python3 -m sunnypilot.modeld_v2.warp'
|
||||
|
||||
from openpilot.common.transformations.camera import _ar_ox_fisheye, _os_fisheye
|
||||
warp_targets = []
|
||||
|
||||
@@ -2,8 +2,11 @@ import os
|
||||
os.environ['DEV'] = 'CPU'
|
||||
import pytest
|
||||
import numpy as np
|
||||
from openpilot.selfdrive.modeld.compile_warp import get_nv12_info, CAMERA_CONFIGS
|
||||
from openpilot.sunnypilot.modeld_v2.warp import Warp, MODEL_W, MODEL_H
|
||||
from openpilot.sunnypilot.modeld_v2.warp import CAMERA_CONFIGS
|
||||
from openpilot.system.camerad.cameras.nv12_info import get_nv12_info
|
||||
from openpilot.sunnypilot.modeld_v2.warp import Warp
|
||||
from openpilot.common.transformations.model import MEDMODEL_INPUT_SIZE
|
||||
MODEL_W, MODEL_H = MEDMODEL_INPUT_SIZE
|
||||
|
||||
VISION_NAME_PAIRS = [ # needed to account for supercombos input_imgs
|
||||
('img', 'big_img'),
|
||||
|
||||
@@ -6,29 +6,61 @@ from tinygrad.tensor import Tensor
|
||||
from tinygrad.engine.jit import TinyJit
|
||||
from tinygrad.device import Device
|
||||
|
||||
# https://github.com/tinygrad/tinygrad/issues/15682
|
||||
from tinygrad.uop.ops import UOp, Ops
|
||||
_orig = UOp.__reduce__
|
||||
UOp.__reduce__ = lambda self: (UOp.unique, ()) if self.op is Ops.UNIQUE else _orig(self)
|
||||
from openpilot.system.camerad.cameras.nv12_info import get_nv12_info
|
||||
from openpilot.selfdrive.modeld.compile_warp import (
|
||||
CAMERA_CONFIGS, MEDMODEL_INPUT_SIZE, make_frame_prepare, make_update_both_imgs,
|
||||
warp_pkl_path,
|
||||
from openpilot.selfdrive.modeld.compile_modeld import (
|
||||
NV12Frame, make_frame_prepare,
|
||||
)
|
||||
from openpilot.common.transformations.camera import _ar_ox_fisheye, _os_fisheye
|
||||
|
||||
CAMERA_CONFIGS = [
|
||||
(_ar_ox_fisheye.width, _ar_ox_fisheye.height), # tici: 1928x1208
|
||||
(_os_fisheye.width, _os_fisheye.height), # mici: 1344x760
|
||||
]
|
||||
from openpilot.common.transformations.model import MEDMODEL_INPUT_SIZE
|
||||
|
||||
MODELS_DIR = Path(__file__).parent / 'models'
|
||||
MODEL_W, MODEL_H = MEDMODEL_INPUT_SIZE
|
||||
|
||||
UPSTREAM_BUFFER_LENGTH = 5
|
||||
|
||||
def warp_pkl_path(cam_w, cam_h):
|
||||
return MODELS_DIR / f'warp_{cam_w}x{cam_h}_tinygrad.pkl'
|
||||
|
||||
def make_update_img_input(frame_prepare, model_w, model_h):
|
||||
def update_img_input_tinygrad(tensor, frame, M_inv):
|
||||
M_inv = M_inv.to(Device.DEFAULT)
|
||||
new_img = frame_prepare(frame, M_inv)
|
||||
tensor.assign(tensor[6:].cat(new_img, dim=0).contiguous())
|
||||
return Tensor.cat(tensor[:6], tensor[-6:], dim=0).contiguous().reshape(1, 12, model_h//2, model_w//2)
|
||||
return update_img_input_tinygrad
|
||||
|
||||
def make_update_both_imgs(frame_prepare, model_w, model_h):
|
||||
update_img = make_update_img_input(frame_prepare, model_w, model_h)
|
||||
|
||||
def update_both_imgs_tinygrad(calib_img_buffer, new_img, M_inv,
|
||||
calib_big_img_buffer, new_big_img, M_inv_big):
|
||||
calib_img_pair = update_img(calib_img_buffer, new_img, M_inv)
|
||||
calib_big_img_pair = update_img(calib_big_img_buffer, new_big_img, M_inv_big)
|
||||
return calib_img_pair, calib_big_img_pair
|
||||
return update_both_imgs_tinygrad
|
||||
|
||||
|
||||
def v2_warp_pkl_path(cam_w, cam_h, buffer_length):
|
||||
return MODELS_DIR / f'warp_{cam_w}x{cam_h}_b{buffer_length}_tinygrad.pkl'
|
||||
|
||||
|
||||
def compile_v2_warp(cam_w, cam_h, buffer_length):
|
||||
def compile_v2_warp(cam_w, cam_h, buffer_length, model_w=MEDMODEL_INPUT_SIZE[0], model_h=MEDMODEL_INPUT_SIZE[1]):
|
||||
_, _, _, yuv_size = get_nv12_info(cam_w, cam_h)
|
||||
img_buffer_shape = (buffer_length * 6, MODEL_H // 2, MODEL_W // 2)
|
||||
img_buffer_shape = (buffer_length * 6, model_h // 2, model_w // 2)
|
||||
|
||||
print(f"Compiling v2 warp for {cam_w}x{cam_h} buffer_length={buffer_length}...")
|
||||
|
||||
frame_prepare = make_frame_prepare(cam_w, cam_h, MODEL_W, MODEL_H)
|
||||
update_both_imgs = make_update_both_imgs(frame_prepare, MODEL_W, MODEL_H)
|
||||
nv12 = NV12Frame(cam_w, cam_h, *get_nv12_info(cam_w, cam_h))
|
||||
frame_prepare = make_frame_prepare(nv12, model_w, model_h)
|
||||
update_both_imgs = make_update_both_imgs(frame_prepare, model_w, model_h)
|
||||
update_img_jit = TinyJit(update_both_imgs, prune=True)
|
||||
|
||||
full_buffer = Tensor.zeros(img_buffer_shape, dtype='uint8').contiguous().realize()
|
||||
@@ -62,9 +94,11 @@ def compile_v2_warp(cam_w, cam_h, buffer_length):
|
||||
|
||||
|
||||
class Warp:
|
||||
def __init__(self, buffer_length=2):
|
||||
def __init__(self, buffer_length=2, model_w=MEDMODEL_INPUT_SIZE[0], model_h=MEDMODEL_INPUT_SIZE[1]):
|
||||
self.buffer_length = buffer_length
|
||||
self.img_buffer_shape = (buffer_length * 6, MODEL_H // 2, MODEL_W // 2)
|
||||
self.model_w = model_w
|
||||
self.model_h = model_h
|
||||
self.img_buffer_shape = (buffer_length * 6, model_h // 2, model_w // 2)
|
||||
|
||||
self.jit_cache = {}
|
||||
self.full_buffers = {k: Tensor.zeros(self.img_buffer_shape, dtype='uint8').contiguous().realize() for k in ['img', 'big_img']}
|
||||
@@ -92,8 +126,9 @@ class Warp:
|
||||
with open(upstream_pkl, 'rb') as f:
|
||||
self.jit_cache[key] = pickle.load(f)
|
||||
if key not in self.jit_cache:
|
||||
frame_prepare = make_frame_prepare(cam_w, cam_h, MODEL_W, MODEL_H)
|
||||
update_both_imgs = make_update_both_imgs(frame_prepare, MODEL_W, MODEL_H)
|
||||
nv12 = NV12Frame(cam_w, cam_h, *get_nv12_info(cam_w, cam_h))
|
||||
frame_prepare = make_frame_prepare(nv12, self.model_w, self.model_h)
|
||||
update_both_imgs = make_update_both_imgs(frame_prepare, self.model_w, self.model_h)
|
||||
self.jit_cache[key] = TinyJit(update_both_imgs, prune=True)
|
||||
|
||||
if key not in self._nv12_cache:
|
||||
|
||||
@@ -116,7 +116,7 @@ class ModelCache:
|
||||
|
||||
class ModelFetcher:
|
||||
"""Handles fetching and caching of model data from remote source"""
|
||||
MODEL_URL = "https://raw.githubusercontent.com/sunnypilot/sunnypilot-models/refs/heads/gh-pages/docs/driving_models_v16.json"
|
||||
MODEL_URL = "https://raw.githubusercontent.com/sunnypilot/sunnypilot-models/refs/heads/gh-pages/docs/driving_models_v17.json"
|
||||
|
||||
def __init__(self, params: Params):
|
||||
self.params = params
|
||||
|
||||
Submodule tinygrad_repo updated: 3501a71478...8164a9c46e
Reference in New Issue
Block a user