diff --git a/.github/labeler.yaml b/.github/labeler.yaml index db1f976da8..0b1fcac0ec 100644 --- a/.github/labeler.yaml +++ b/.github/labeler.yaml @@ -24,4 +24,4 @@ multilanguage: autonomy: - changed-files: - - any-glob-to-all-files: "{selfdrive/modeld/models/**,selfdrive/test/process_replay/model_replay_ref_commit}" + - any-glob-to-all-files: "{selfdrive/modeld/models/**,selfdrive/test/process_replay/model_replay_ref_commit,sunnypilot/modeld*/models/**}" diff --git a/.gitignore b/.gitignore index 4e393b88f0..2d9e7a374f 100644 --- a/.gitignore +++ b/.gitignore @@ -74,9 +74,9 @@ comma*.sh selfdrive/modeld/thneed/compile selfdrive/modeld/models/*.thneed selfdrive/modeld/models/*.pkl -sunnypilot/modeld/thneed/compile -sunnypilot/modeld/models/*.thneed -sunnypilot/modeld/models/*.pkl +sunnypilot/modeld*/thneed/compile +sunnypilot/modeld*/models/*.thneed +sunnypilot/modeld*/models/*.pkl *.bz2 *.zst diff --git a/cereal/custom.capnp b/cereal/custom.capnp index 3aa9c7ad18..36cc2c9498 100644 --- a/cereal/custom.capnp +++ b/cereal/custom.capnp @@ -70,6 +70,7 @@ struct ModelManagerSP @0xaedffd8f31e7b55d { enum Runner { snpe @0; tinygrad @1; + stock @2; } struct ModelBundle { diff --git a/conftest.py b/conftest.py index 7ee0c78943..29921c787a 100644 --- a/conftest.py +++ b/conftest.py @@ -19,6 +19,7 @@ collect_ignore = [ collect_ignore_glob = [ "selfdrive/debug/*.py", "selfdrive/modeld/*.py", + "sunnypilot/modeld*/*.py", ] diff --git a/release/build_release.sh b/release/build_release.sh index 8b26dc74ab..e7fbad6f71 100755 --- a/release/build_release.sh +++ b/release/build_release.sh @@ -75,6 +75,7 @@ find . -name 'moc_*' -delete find . -name '__pycache__' -delete rm -rf .sconsign.dblite Jenkinsfile release/ rm selfdrive/modeld/models/supercombo.onnx +rm sunnypilot/modeld*/models/supercombo.onnx find third_party/ -name '*x86*' -exec rm -r {} + find third_party/ -name '*Darwin*' -exec rm -r {} + diff --git a/selfdrive/debug/uiview.py b/selfdrive/debug/uiview.py index ad7b997fa5..ad3ccea036 100755 --- a/selfdrive/debug/uiview.py +++ b/selfdrive/debug/uiview.py @@ -3,19 +3,25 @@ import time from cereal import car, log, messaging from openpilot.common.params import Params -from openpilot.system.manager.process_config import managed_processes, is_snpe_model +from openpilot.system.manager.process_config import managed_processes, is_snpe_model, is_tinygrad_model, is_stock_model from openpilot.system.hardware import HARDWARE if __name__ == "__main__": CP = car.CarParams(notCar=True, wheelbase=1, steerRatio=10) params = Params() params.put("CarParams", CP.to_bytes()) + if use_snpe_modeld := is_snpe_model(False, params, CP): print("Using SNPE modeld") + if use_tinygrad_modeld := is_tinygrad_model(False, params, CP): + print("Using TinyGrad modeld") + if use_stock_modeld := is_stock_model(False, params, CP): + print("Using stock modeld") + HARDWARE.set_power_save(False) procs = ['camerad', 'ui', 'calibrationd', 'plannerd', 'dmonitoringmodeld', 'dmonitoringd'] - procs += ["modeld_snpe" if use_snpe_modeld else "modeld"] + procs += ["modeld_snpe" if use_snpe_modeld else "modeld_tinygrad" if use_tinygrad_modeld else "modeld"] for p in procs: managed_processes[p].start() diff --git a/sunnypilot/SConscript b/sunnypilot/SConscript index 8bc5b84ef5..3aa302b92b 100644 --- a/sunnypilot/SConscript +++ b/sunnypilot/SConscript @@ -1 +1,2 @@ -SConscript(['modeld/SConscript']) \ No newline at end of file +SConscript(['modeld/SConscript']) +SConscript(['modeld_v2/SConscript']) \ No newline at end of file diff --git a/sunnypilot/modeld/SConscript b/sunnypilot/modeld/SConscript index 2704974da7..90fb80975f 100644 --- a/sunnypilot/modeld/SConscript +++ b/sunnypilot/modeld/SConscript @@ -56,7 +56,7 @@ tinygrad_files = ["#"+x for x in glob.glob(env.Dir("#tinygrad_repo").relpath + " # Get model metadata fn = File("models/supercombo").abspath -cmd = f'python3 {Dir("#selfdrive/modeld").abspath}/get_model_metadata.py {fn}.onnx' +cmd = f'python3 {Dir("#sunnypilot/modeld").abspath}/get_model_metadata.py {fn}.onnx' lenv.Command(fn + "_metadata.pkl", [fn + ".onnx"] + tinygrad_files, cmd) if arch == "larch64": diff --git a/sunnypilot/modeld_v2/.gitignore b/sunnypilot/modeld_v2/.gitignore new file mode 100644 index 0000000000..742d3d1205 --- /dev/null +++ b/sunnypilot/modeld_v2/.gitignore @@ -0,0 +1 @@ +*_pyx.cpp diff --git a/sunnypilot/modeld_v2/SConscript b/sunnypilot/modeld_v2/SConscript new file mode 100644 index 0000000000..1a173dcdeb --- /dev/null +++ b/sunnypilot/modeld_v2/SConscript @@ -0,0 +1,50 @@ +import glob + +Import('env', 'envCython', 'arch', 'cereal', 'messaging', 'common', 'gpucommon', 'visionipc', 'transformations') +lenv = env.Clone() +lenvCython = envCython.Clone() + +libs = [cereal, messaging, visionipc, gpucommon, common, 'capnp', 'kj', 'pthread'] +frameworks = [] + +common_src = [ + "models/commonmodel.cc", + "transforms/loadyuv.cc", + "transforms/transform.cc", +] + + +# OpenCL is a framework on Mac +if arch == "Darwin": + frameworks += ['OpenCL'] +else: + libs += ['OpenCL'] + +# Set path definitions +for pathdef, fn in {'TRANSFORM': 'transforms/transform.cl', 'LOADYUV': 'transforms/loadyuv.cl'}.items(): + for xenv in (lenv, lenvCython): + xenv['CXXFLAGS'].append(f'-D{pathdef}_PATH=\\"{File(fn).abspath}\\"') + +# Compile cython +cython_libs = envCython["LIBS"] + libs +commonmodel_lib = lenv.Library('commonmodel', common_src) +lenvCython.Program('models/commonmodel_pyx.so', 'models/commonmodel_pyx.pyx', LIBS=[commonmodel_lib, *cython_libs], FRAMEWORKS=frameworks) +tinygrad_files = ["#"+x for x in glob.glob(env.Dir("#tinygrad_repo").relpath + "/**", recursive=True, root_dir=env.Dir("#").abspath) if 'pycache' not in x] + +# Get model metadata +fn = File("models/supercombo").abspath +cmd = f'python3 {Dir("#sunnypilot/modeld_v2").abspath}/get_model_metadata.py {fn}.onnx' +lenv.Command(fn + "_metadata.pkl", [fn + ".onnx"] + tinygrad_files, cmd) + +# Compile tinygrad model +pythonpath_string = 'PYTHONPATH="${PYTHONPATH}:' + env.Dir("#tinygrad_repo").abspath + '"' +if arch == 'larch64': + device_string = 'QCOM=1' +else: + device_string = 'CLANG=1 IMAGE=0' + +for model_name in ['supercombo', 'dmonitoring_model']: + fn = File(f"models/{model_name}").abspath + cmd = f'{pythonpath_string} {device_string} python3 {Dir("#tinygrad_repo").abspath}/examples/openpilot/compile3.py {fn}.onnx {fn}_tinygrad.pkl' + lenv.Command(fn + "_tinygrad.pkl", [fn + ".onnx"] + tinygrad_files, cmd) + diff --git a/sunnypilot/modeld_v2/__init__.py b/sunnypilot/modeld_v2/__init__.py index e69de29bb2..639622e827 100644 --- a/sunnypilot/modeld_v2/__init__.py +++ b/sunnypilot/modeld_v2/__init__.py @@ -0,0 +1,5 @@ +from pathlib import Path + +MODEL_PATH = Path(__file__).parent / 'models/supercombo.onnx' +MODEL_PKL_PATH = Path(__file__).parent / 'models/supercombo_tinygrad.pkl' +METADATA_PATH = Path(__file__).parent / 'models/supercombo_metadata.pkl' diff --git a/sunnypilot/modeld_v2/constants.py b/sunnypilot/modeld_v2/constants.py new file mode 100644 index 0000000000..cf5157591e --- /dev/null +++ b/sunnypilot/modeld_v2/constants.py @@ -0,0 +1,84 @@ +import numpy as np + +def index_function(idx, max_val=192, max_idx=32): + return (max_val) * ((idx/max_idx)**2) + +class ModelConstants: + # time and distance indices + IDX_N = 33 + T_IDXS = [index_function(idx, max_val=10.0) for idx in range(IDX_N)] + X_IDXS = [index_function(idx, max_val=192.0) for idx in range(IDX_N)] + LEAD_T_IDXS = [0., 2., 4., 6., 8., 10.] + LEAD_T_OFFSETS = [0., 2., 4.] + META_T_IDXS = [2., 4., 6., 8., 10.] + + # model inputs constants + MODEL_FREQ = 20 + FEATURE_LEN = 512 + FULL_HISTORY_BUFFER_LEN = 99 + DESIRE_LEN = 8 + TRAFFIC_CONVENTION_LEN = 2 + LAT_PLANNER_STATE_LEN = 4 + LATERAL_CONTROL_PARAMS_LEN = 2 + PREV_DESIRED_CURV_LEN = 1 + + # model outputs constants + FCW_THRESHOLDS_5MS2 = np.array([.05, .05, .15, .15, .15], dtype=np.float32) + FCW_THRESHOLDS_3MS2 = np.array([.7, .7], dtype=np.float32) + FCW_5MS2_PROBS_WIDTH = 5 + FCW_3MS2_PROBS_WIDTH = 2 + + DISENGAGE_WIDTH = 5 + POSE_WIDTH = 6 + WIDE_FROM_DEVICE_WIDTH = 3 + LEAD_WIDTH = 4 + LANE_LINES_WIDTH = 2 + ROAD_EDGES_WIDTH = 2 + PLAN_WIDTH = 15 + DESIRE_PRED_WIDTH = 8 + LAT_PLANNER_SOLUTION_WIDTH = 4 + DESIRED_CURV_WIDTH = 1 + + NUM_LANE_LINES = 4 + NUM_ROAD_EDGES = 2 + + LEAD_TRAJ_LEN = 6 + DESIRE_PRED_LEN = 4 + + PLAN_MHP_N = 5 + LEAD_MHP_N = 2 + PLAN_MHP_SELECTION = 1 + LEAD_MHP_SELECTION = 3 + + FCW_THRESHOLD_5MS2_HIGH = 0.15 + FCW_THRESHOLD_5MS2_LOW = 0.05 + FCW_THRESHOLD_3MS2 = 0.7 + + CONFIDENCE_BUFFER_LEN = 5 + RYG_GREEN = 0.01165 + RYG_YELLOW = 0.06157 + + POLY_PATH_DEGREE = 4 + +# model outputs slices +class Plan: + POSITION = slice(0, 3) + VELOCITY = slice(3, 6) + ACCELERATION = slice(6, 9) + T_FROM_CURRENT_EULER = slice(9, 12) + ORIENTATION_RATE = slice(12, 15) + +class Meta: + ENGAGED = slice(0, 1) + # next 2, 4, 6, 8, 10 seconds + GAS_DISENGAGE = slice(1, 31, 6) + BRAKE_DISENGAGE = slice(2, 31, 6) + STEER_OVERRIDE = slice(3, 31, 6) + HARD_BRAKE_3 = slice(4, 31, 6) + HARD_BRAKE_4 = slice(5, 31, 6) + HARD_BRAKE_5 = slice(6, 31, 6) + # next 0, 2, 4, 6, 8, 10 seconds + GAS_PRESS = slice(31, 55, 4) + BRAKE_PRESS = slice(32, 55, 4) + LEFT_BLINKER = slice(33, 55, 4) + RIGHT_BLINKER = slice(34, 55, 4) diff --git a/sunnypilot/modeld_v2/dmonitoringmodeld b/sunnypilot/modeld_v2/dmonitoringmodeld new file mode 100755 index 0000000000..90b43800fe --- /dev/null +++ b/sunnypilot/modeld_v2/dmonitoringmodeld @@ -0,0 +1,4 @@ +#!/usr/bin/env bash + +DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null && pwd)" +exec "$DIR/dmonitoringmodeld.py" "$@" diff --git a/sunnypilot/modeld_v2/dmonitoringmodeld.py b/sunnypilot/modeld_v2/dmonitoringmodeld.py new file mode 100755 index 0000000000..4469618c88 --- /dev/null +++ b/sunnypilot/modeld_v2/dmonitoringmodeld.py @@ -0,0 +1,192 @@ +#!/usr/bin/env python3 +import os +from openpilot.system.hardware import TICI +if TICI: + from tinygrad.tensor import Tensor + from tinygrad.dtype import dtypes + from openpilot.sunnypilot.modeld_v2.runners.tinygrad_helpers import qcom_tensor_from_opencl_address + os.environ['QCOM'] = '1' +else: + from openpilot.sunnypilot.modeld_v2.runners.ort_helpers import make_onnx_cpu_runner +import math +import time +import pickle +import ctypes +import numpy as np +from pathlib import Path +from setproctitle import setproctitle + +from cereal import messaging +from cereal.messaging import PubMaster, SubMaster +from msgq.visionipc import VisionIpcClient, VisionStreamType, VisionBuf +from openpilot.common.swaglog import cloudlog +from openpilot.common.realtime import config_realtime_process +from openpilot.common.transformations.model import dmonitoringmodel_intrinsics, DM_INPUT_SIZE +from openpilot.common.transformations.camera import _ar_ox_fisheye, _os_fisheye +from openpilot.sunnypilot.modeld_v2.models.commonmodel_pyx import CLContext, MonitoringModelFrame +from openpilot.sunnypilot.modeld_v2.parse_model_outputs import sigmoid +from openpilot.system import sentry + +MODEL_WIDTH, MODEL_HEIGHT = DM_INPUT_SIZE +CALIB_LEN = 3 +FEATURE_LEN = 512 +OUTPUT_SIZE = 84 + FEATURE_LEN + +PROCESS_NAME = "selfdrive.modeld.dmonitoringmodeld" +SEND_RAW_PRED = os.getenv('SEND_RAW_PRED') +MODEL_PATH = Path(__file__).parent / 'models/dmonitoring_model.onnx' +MODEL_PKL_PATH = Path(__file__).parent / 'models/dmonitoring_model_tinygrad.pkl' + + +class DriverStateResult(ctypes.Structure): + _fields_ = [ + ("face_orientation", ctypes.c_float*3), + ("face_position", ctypes.c_float*3), + ("face_orientation_std", ctypes.c_float*3), + ("face_position_std", ctypes.c_float*3), + ("face_prob", ctypes.c_float), + ("_unused_a", ctypes.c_float*8), + ("left_eye_prob", ctypes.c_float), + ("_unused_b", ctypes.c_float*8), + ("right_eye_prob", ctypes.c_float), + ("left_blink_prob", ctypes.c_float), + ("right_blink_prob", ctypes.c_float), + ("sunglasses_prob", ctypes.c_float), + ("occluded_prob", ctypes.c_float), + ("ready_prob", ctypes.c_float*4), + ("not_ready_prob", ctypes.c_float*2)] + + +class DMonitoringModelResult(ctypes.Structure): + _fields_ = [ + ("driver_state_lhd", DriverStateResult), + ("driver_state_rhd", DriverStateResult), + ("poor_vision_prob", ctypes.c_float), + ("wheel_on_right_prob", ctypes.c_float), + ("features", ctypes.c_float*FEATURE_LEN)] + + +class ModelState: + inputs: dict[str, np.ndarray] + output: np.ndarray + + def __init__(self, cl_ctx): + assert ctypes.sizeof(DMonitoringModelResult) == OUTPUT_SIZE * ctypes.sizeof(ctypes.c_float) + + self.frame = MonitoringModelFrame(cl_ctx) + self.numpy_inputs = { + 'calib': np.zeros((1, CALIB_LEN), dtype=np.float32), + } + + if TICI: + self.tensor_inputs = {k: Tensor(v, device='NPY').realize() for k,v in self.numpy_inputs.items()} + with open(MODEL_PKL_PATH, "rb") as f: + self.model_run = pickle.load(f) + else: + self.onnx_cpu_runner = make_onnx_cpu_runner(MODEL_PATH) + + def run(self, buf: VisionBuf, calib: np.ndarray, transform: np.ndarray) -> tuple[np.ndarray, float]: + self.numpy_inputs['calib'][0,:] = calib + + t1 = time.perf_counter() + + input_img_cl = self.frame.prepare(buf, transform.flatten()) + if TICI: + # The imgs tensors are backed by opencl memory, only need init once + if 'input_img' not in self.tensor_inputs: + self.tensor_inputs['input_img'] = qcom_tensor_from_opencl_address(input_img_cl.mem_address, (1, MODEL_WIDTH*MODEL_HEIGHT), dtype=dtypes.uint8) + else: + self.numpy_inputs['input_img'] = self.frame.buffer_from_cl(input_img_cl).reshape((1, MODEL_WIDTH*MODEL_HEIGHT)) + + if TICI: + output = self.model_run(**self.tensor_inputs).numpy().flatten() + else: + output = self.onnx_cpu_runner.run(None, self.numpy_inputs)[0].flatten() + + t2 = time.perf_counter() + return output, t2 - t1 + + +def fill_driver_state(msg, ds_result: DriverStateResult): + msg.faceOrientation = list(ds_result.face_orientation) + msg.faceOrientationStd = [math.exp(x) for x in ds_result.face_orientation_std] + msg.facePosition = list(ds_result.face_position[:2]) + msg.facePositionStd = [math.exp(x) for x in ds_result.face_position_std[:2]] + msg.faceProb = float(sigmoid(ds_result.face_prob)) + msg.leftEyeProb = float(sigmoid(ds_result.left_eye_prob)) + msg.rightEyeProb = float(sigmoid(ds_result.right_eye_prob)) + msg.leftBlinkProb = float(sigmoid(ds_result.left_blink_prob)) + msg.rightBlinkProb = float(sigmoid(ds_result.right_blink_prob)) + msg.sunglassesProb = float(sigmoid(ds_result.sunglasses_prob)) + msg.occludedProb = float(sigmoid(ds_result.occluded_prob)) + msg.readyProb = [float(sigmoid(x)) for x in ds_result.ready_prob] + msg.notReadyProb = [float(sigmoid(x)) for x in ds_result.not_ready_prob] + + +def get_driverstate_packet(model_output: np.ndarray, frame_id: int, location_ts: int, execution_time: float, gpu_execution_time: float): + model_result = ctypes.cast(model_output.ctypes.data, ctypes.POINTER(DMonitoringModelResult)).contents + msg = messaging.new_message('driverStateV2', valid=True) + ds = msg.driverStateV2 + ds.frameId = frame_id + ds.modelExecutionTime = execution_time + ds.gpuExecutionTime = gpu_execution_time + ds.poorVisionProb = float(sigmoid(model_result.poor_vision_prob)) + ds.wheelOnRightProb = float(sigmoid(model_result.wheel_on_right_prob)) + ds.rawPredictions = model_output.tobytes() if SEND_RAW_PRED else b'' + fill_driver_state(ds.leftDriverData, model_result.driver_state_lhd) + fill_driver_state(ds.rightDriverData, model_result.driver_state_rhd) + return msg + + +def main(): + setproctitle(PROCESS_NAME) + config_realtime_process([0, 1, 2, 3], 5) + + sentry.set_tag("daemon", PROCESS_NAME) + cloudlog.bind(daemon=PROCESS_NAME) + + cl_context = CLContext() + model = ModelState(cl_context) + cloudlog.warning("models loaded, dmonitoringmodeld starting") + + cloudlog.warning("connecting to driver stream") + vipc_client = VisionIpcClient("camerad", VisionStreamType.VISION_STREAM_DRIVER, True, cl_context) + while not vipc_client.connect(False): + time.sleep(0.1) + assert vipc_client.is_connected() + cloudlog.warning(f"connected with buffer size: {vipc_client.buffer_len}") + + sm = SubMaster(["liveCalibration"]) + pm = PubMaster(["driverStateV2"]) + + calib = np.zeros(CALIB_LEN, dtype=np.float32) + model_transform = None + + while True: + buf = vipc_client.recv() + if buf is None: + continue + + if model_transform is None: + cam = _os_fisheye if buf.width == _os_fisheye.width else _ar_ox_fisheye + model_transform = np.linalg.inv(np.dot(dmonitoringmodel_intrinsics, np.linalg.inv(cam.intrinsics))).astype(np.float32) + + sm.update(0) + if sm.updated["liveCalibration"]: + calib[:] = np.array(sm["liveCalibration"].rpyCalib) + + t1 = time.perf_counter() + model_output, gpu_execution_time = model.run(buf, calib, model_transform) + t2 = time.perf_counter() + + pm.send("driverStateV2", get_driverstate_packet(model_output, vipc_client.frame_id, vipc_client.timestamp_sof, t2 - t1, gpu_execution_time)) + + +if __name__ == "__main__": + try: + main() + except KeyboardInterrupt: + cloudlog.warning(f"child {PROCESS_NAME} got SIGINT") + except Exception: + sentry.capture_exception() + raise diff --git a/sunnypilot/modeld_v2/fill_model_msg.py b/sunnypilot/modeld_v2/fill_model_msg.py new file mode 100644 index 0000000000..c6129936ca --- /dev/null +++ b/sunnypilot/modeld_v2/fill_model_msg.py @@ -0,0 +1,245 @@ +import os +import capnp +import numpy as np +from cereal import log +from openpilot.sunnypilot.modeld_v2.constants import ModelConstants, Plan +from openpilot.selfdrive.controls.lib.drive_helpers import MIN_SPEED + +SEND_RAW_PRED = os.getenv('SEND_RAW_PRED') + +ConfidenceClass = log.ModelDataV2.ConfidenceClass + + +def curv_from_psis(psi_target, psi_rate, vego, delay): + vego = np.clip(vego, MIN_SPEED, np.inf) + curv_from_psi = psi_target / (vego * delay) # epsilon to prevent divide-by-zero + return 2 * curv_from_psi - psi_rate / vego + + +def get_curvature_from_plan(plan, vego, delay): + psi_target = np.interp(delay, ModelConstants.T_IDXS, plan[:, Plan.T_FROM_CURRENT_EULER][:, 2]) + psi_rate = plan[:, Plan.ORIENTATION_RATE][0, 2] + return curv_from_psis(psi_target, psi_rate, vego, delay) + + +def get_curvature_from_output(output, vego, delay): + if desired_curv := output.get('desired_curvature'): # If the model outputs the desired curvature, use that directly + return float(desired_curv[0, 0]) + + return float(get_curvature_from_plan(output['plan'][0], vego, delay)) + + +class PublishState: + def __init__(self): + self.disengage_buffer = np.zeros(ModelConstants.CONFIDENCE_BUFFER_LEN*ModelConstants.DISENGAGE_WIDTH, dtype=np.float32) + self.prev_brake_5ms2_probs = np.zeros(ModelConstants.FCW_5MS2_PROBS_WIDTH, dtype=np.float32) + self.prev_brake_3ms2_probs = np.zeros(ModelConstants.FCW_3MS2_PROBS_WIDTH, dtype=np.float32) + +def fill_xyzt(builder, t, x, y, z, x_std=None, y_std=None, z_std=None): + builder.t = t + builder.x = x.tolist() + builder.y = y.tolist() + builder.z = z.tolist() + if x_std is not None: + builder.xStd = x_std.tolist() + if y_std is not None: + builder.yStd = y_std.tolist() + if z_std is not None: + builder.zStd = z_std.tolist() + +def fill_xyvat(builder, t, x, y, v, a, x_std=None, y_std=None, v_std=None, a_std=None): + builder.t = t + builder.x = x.tolist() + builder.y = y.tolist() + builder.v = v.tolist() + builder.a = a.tolist() + if x_std is not None: + builder.xStd = x_std.tolist() + if y_std is not None: + builder.yStd = y_std.tolist() + if v_std is not None: + builder.vStd = v_std.tolist() + if a_std is not None: + builder.aStd = a_std.tolist() + +def fill_xyz_poly(builder, degree, x, y, z): + xyz = np.stack([x, y, z], axis=1) + coeffs = np.polynomial.polynomial.polyfit(ModelConstants.T_IDXS, xyz, deg=degree) + builder.xCoefficients = coeffs[:, 0].tolist() + builder.yCoefficients = coeffs[:, 1].tolist() + builder.zCoefficients = coeffs[:, 2].tolist() + +def fill_lane_line_meta(builder, lane_lines, lane_line_probs): + builder.leftY = lane_lines[1].y[0] + builder.leftProb = lane_line_probs[1] + builder.rightY = lane_lines[2].y[0] + builder.rightProb = lane_line_probs[2] + +def fill_model_msg(base_msg: capnp._DynamicStructBuilder, extended_msg: capnp._DynamicStructBuilder, + net_output_data: dict[str, np.ndarray], v_ego: float, delay: float, + publish_state: PublishState, vipc_frame_id: int, vipc_frame_id_extra: int, + frame_id: int, frame_drop: float, timestamp_eof: int, model_execution_time: float, + valid: bool, model_meta) -> None: + frame_age = frame_id - vipc_frame_id if frame_id > vipc_frame_id else 0 + frame_drop_perc = frame_drop * 100 + extended_msg.valid = valid + base_msg.valid = valid + + desired_curvature = float(get_curvature_from_output(net_output_data, v_ego, delay)) + + driving_model_data = base_msg.drivingModelData + + driving_model_data.frameId = vipc_frame_id + driving_model_data.frameIdExtra = vipc_frame_id_extra + driving_model_data.frameDropPerc = frame_drop_perc + driving_model_data.modelExecutionTime = model_execution_time + driving_model_data.action.desiredCurvature = desired_curvature + + modelV2 = extended_msg.modelV2 + modelV2.frameId = vipc_frame_id + modelV2.frameIdExtra = vipc_frame_id_extra + modelV2.frameAge = frame_age + modelV2.frameDropPerc = frame_drop_perc + modelV2.timestampEof = timestamp_eof + modelV2.modelExecutionTime = model_execution_time + + # plan + fill_xyzt(modelV2.position, ModelConstants.T_IDXS, *net_output_data['plan'][0,:,Plan.POSITION].T, *net_output_data['plan_stds'][0,:,Plan.POSITION].T) + fill_xyzt(modelV2.velocity, ModelConstants.T_IDXS, *net_output_data['plan'][0,:,Plan.VELOCITY].T) + fill_xyzt(modelV2.acceleration, ModelConstants.T_IDXS, *net_output_data['plan'][0,:,Plan.ACCELERATION].T) + fill_xyzt(modelV2.orientation, ModelConstants.T_IDXS, *net_output_data['plan'][0,:,Plan.T_FROM_CURRENT_EULER].T) + fill_xyzt(modelV2.orientationRate, ModelConstants.T_IDXS, *net_output_data['plan'][0,:,Plan.ORIENTATION_RATE].T) + + # temporal pose + temporal_pose = modelV2.temporalPose + if 'sim_pose' in net_output_data: + temporal_pose.trans = net_output_data['sim_pose'][0,:ModelConstants.POSE_WIDTH//2].tolist() + temporal_pose.transStd = net_output_data['sim_pose_stds'][0,:ModelConstants.POSE_WIDTH//2].tolist() + temporal_pose.rot = net_output_data['sim_pose'][0,ModelConstants.POSE_WIDTH//2:].tolist() + temporal_pose.rotStd = net_output_data['sim_pose_stds'][0,ModelConstants.POSE_WIDTH//2:].tolist() + else: + temporal_pose.trans = net_output_data['plan'][0,0,Plan.VELOCITY].tolist() + temporal_pose.transStd = net_output_data['plan_stds'][0,0,Plan.VELOCITY].tolist() + temporal_pose.rot = net_output_data['plan'][0,0,Plan.ORIENTATION_RATE].tolist() + temporal_pose.rotStd = net_output_data['plan_stds'][0,0,Plan.ORIENTATION_RATE].tolist() + + # poly path + fill_xyz_poly(driving_model_data.path, ModelConstants.POLY_PATH_DEGREE, *net_output_data['plan'][0,:,Plan.POSITION].T) + + # lateral planning + modelV2.action.desiredCurvature = desired_curvature + + # times at X_IDXS according to model plan + PLAN_T_IDXS = [np.nan] * ModelConstants.IDX_N + PLAN_T_IDXS[0] = 0.0 + plan_x = net_output_data['plan'][0,:,Plan.POSITION][:,0].tolist() + for xidx in range(1, ModelConstants.IDX_N): + tidx = 0 + # increment tidx until we find an element that's further away than the current xidx + while tidx < ModelConstants.IDX_N - 1 and plan_x[tidx+1] < ModelConstants.X_IDXS[xidx]: + tidx += 1 + if tidx == ModelConstants.IDX_N - 1: + # if the Plan doesn't extend far enough, set plan_t to the max value (10s), then break + PLAN_T_IDXS[xidx] = ModelConstants.T_IDXS[ModelConstants.IDX_N - 1] + break + # interpolate to find `t` for the current xidx + current_x_val = plan_x[tidx] + next_x_val = plan_x[tidx+1] + p = (ModelConstants.X_IDXS[xidx] - current_x_val) / (next_x_val - current_x_val) if abs(next_x_val - current_x_val) > 1e-9 else float('nan') + PLAN_T_IDXS[xidx] = p * ModelConstants.T_IDXS[tidx+1] + (1 - p) * ModelConstants.T_IDXS[tidx] + + # lane lines + modelV2.init('laneLines', 4) + for i in range(4): + lane_line = modelV2.laneLines[i] + fill_xyzt(lane_line, PLAN_T_IDXS, np.array(ModelConstants.X_IDXS), net_output_data['lane_lines'][0,i,:,0], net_output_data['lane_lines'][0,i,:,1]) + modelV2.laneLineStds = net_output_data['lane_lines_stds'][0,:,0,0].tolist() + modelV2.laneLineProbs = net_output_data['lane_lines_prob'][0,1::2].tolist() + + fill_lane_line_meta(driving_model_data.laneLineMeta, modelV2.laneLines, modelV2.laneLineProbs) + + # road edges + modelV2.init('roadEdges', 2) + for i in range(2): + road_edge = modelV2.roadEdges[i] + fill_xyzt(road_edge, PLAN_T_IDXS, np.array(ModelConstants.X_IDXS), net_output_data['road_edges'][0,i,:,0], net_output_data['road_edges'][0,i,:,1]) + modelV2.roadEdgeStds = net_output_data['road_edges_stds'][0,:,0,0].tolist() + + # leads + modelV2.init('leadsV3', 3) + for i in range(3): + lead = modelV2.leadsV3[i] + fill_xyvat(lead, ModelConstants.LEAD_T_IDXS, *net_output_data['lead'][0,i].T, *net_output_data['lead_stds'][0,i].T) + lead.prob = net_output_data['lead_prob'][0,i].tolist() + lead.probTime = ModelConstants.LEAD_T_OFFSETS[i] + + # meta + meta = modelV2.meta + meta.desireState = net_output_data['desire_state'][0].reshape(-1).tolist() + meta.desirePrediction = net_output_data['desire_pred'][0].reshape(-1).tolist() + meta.engagedProb = net_output_data['meta'][0,model_meta.ENGAGED].item() + meta.init('disengagePredictions') + disengage_predictions = meta.disengagePredictions + disengage_predictions.t = ModelConstants.META_T_IDXS + disengage_predictions.brakeDisengageProbs = net_output_data['meta'][0,model_meta.BRAKE_DISENGAGE].tolist() + disengage_predictions.gasDisengageProbs = net_output_data['meta'][0,model_meta.GAS_DISENGAGE].tolist() + disengage_predictions.steerOverrideProbs = net_output_data['meta'][0,model_meta.STEER_OVERRIDE].tolist() + disengage_predictions.brake3MetersPerSecondSquaredProbs = net_output_data['meta'][0,model_meta.HARD_BRAKE_3].tolist() + disengage_predictions.brake4MetersPerSecondSquaredProbs = net_output_data['meta'][0,model_meta.HARD_BRAKE_4].tolist() + disengage_predictions.brake5MetersPerSecondSquaredProbs = net_output_data['meta'][0,model_meta.HARD_BRAKE_5].tolist() + + if hasattr(model_meta, 'GAS_PRESS') and hasattr(model_meta, 'BRAKE_PRESS'): + disengage_predictions.gasPressProbs = net_output_data['meta'][0,model_meta.GAS_PRESS].tolist() + disengage_predictions.brakePressProbs = net_output_data['meta'][0,model_meta.BRAKE_PRESS].tolist() + + publish_state.prev_brake_5ms2_probs[:-1] = publish_state.prev_brake_5ms2_probs[1:] + publish_state.prev_brake_5ms2_probs[-1] = net_output_data['meta'][0,model_meta.HARD_BRAKE_5][0] + publish_state.prev_brake_3ms2_probs[:-1] = publish_state.prev_brake_3ms2_probs[1:] + publish_state.prev_brake_3ms2_probs[-1] = net_output_data['meta'][0,model_meta.HARD_BRAKE_3][0] + hard_brake_predicted = (publish_state.prev_brake_5ms2_probs > ModelConstants.FCW_THRESHOLDS_5MS2).all() and \ + (publish_state.prev_brake_3ms2_probs > ModelConstants.FCW_THRESHOLDS_3MS2).all() + meta.hardBrakePredicted = hard_brake_predicted.item() + + # confidence + if vipc_frame_id % (2*ModelConstants.MODEL_FREQ) == 0: + # any disengage prob + brake_disengage_probs = net_output_data['meta'][0,model_meta.BRAKE_DISENGAGE] + gas_disengage_probs = net_output_data['meta'][0,model_meta.GAS_DISENGAGE] + steer_override_probs = net_output_data['meta'][0,model_meta.STEER_OVERRIDE] + any_disengage_probs = 1-((1-brake_disengage_probs)*(1-gas_disengage_probs)*(1-steer_override_probs)) + # independent disengage prob for each 2s slice + ind_disengage_probs = np.r_[any_disengage_probs[0], np.diff(any_disengage_probs) / (1 - any_disengage_probs[:-1])] + # rolling buf for 2, 4, 6, 8, 10s + publish_state.disengage_buffer[:-ModelConstants.DISENGAGE_WIDTH] = publish_state.disengage_buffer[ModelConstants.DISENGAGE_WIDTH:] + publish_state.disengage_buffer[-ModelConstants.DISENGAGE_WIDTH:] = ind_disengage_probs + + score = 0. + for i in range(ModelConstants.DISENGAGE_WIDTH): + score += publish_state.disengage_buffer[i*ModelConstants.DISENGAGE_WIDTH+ModelConstants.DISENGAGE_WIDTH-1-i].item() / ModelConstants.DISENGAGE_WIDTH + if score < ModelConstants.RYG_GREEN: + modelV2.confidence = ConfidenceClass.green + elif score < ModelConstants.RYG_YELLOW: + modelV2.confidence = ConfidenceClass.yellow + else: + modelV2.confidence = ConfidenceClass.red + + # raw prediction if enabled + if SEND_RAW_PRED: + modelV2.rawPredictions = net_output_data['raw_pred'].tobytes() + +def fill_pose_msg(msg: capnp._DynamicStructBuilder, net_output_data: dict[str, np.ndarray], + vipc_frame_id: int, vipc_dropped_frames: int, timestamp_eof: int, live_calib_seen: bool) -> None: + msg.valid = live_calib_seen & (vipc_dropped_frames < 1) + cameraOdometry = msg.cameraOdometry + + cameraOdometry.frameId = vipc_frame_id + cameraOdometry.timestampEof = timestamp_eof + + cameraOdometry.trans = net_output_data['pose'][0,:3].tolist() + cameraOdometry.rot = net_output_data['pose'][0,3:].tolist() + cameraOdometry.wideFromDeviceEuler = net_output_data['wide_from_device_euler'][0,:].tolist() + cameraOdometry.roadTransformTrans = net_output_data['road_transform'][0,:3].tolist() + cameraOdometry.transStd = net_output_data['pose_stds'][0,:3].tolist() + cameraOdometry.rotStd = net_output_data['pose_stds'][0,3:].tolist() + cameraOdometry.wideFromDeviceEulerStd = net_output_data['wide_from_device_euler_stds'][0,:].tolist() + cameraOdometry.roadTransformTransStd = net_output_data['road_transform_stds'][0,:3].tolist() diff --git a/sunnypilot/modeld_v2/get_model_metadata.py b/sunnypilot/modeld_v2/get_model_metadata.py new file mode 100755 index 0000000000..144860204f --- /dev/null +++ b/sunnypilot/modeld_v2/get_model_metadata.py @@ -0,0 +1,28 @@ +#!/usr/bin/env python3 +import sys +import pathlib +import onnx +import codecs +import pickle + +def get_name_and_shape(value_info:onnx.ValueInfoProto) -> tuple[str, tuple[int,...]]: + shape = tuple([int(dim.dim_value) for dim in value_info.type.tensor_type.shape.dim]) + name = value_info.name + return name, shape + +if __name__ == "__main__": + model_path = pathlib.Path(sys.argv[1]) + model = onnx.load(str(model_path)) + i = [x.key for x in model.metadata_props].index('output_slices') + output_slices = model.metadata_props[i].value + + metadata = {} + metadata['output_slices'] = pickle.loads(codecs.decode(output_slices.encode(), "base64")) + metadata['input_shapes'] = dict([get_name_and_shape(x) for x in model.graph.input]) + metadata['output_shapes'] = dict([get_name_and_shape(x) for x in model.graph.output]) + + metadata_path = model_path.parent / (model_path.stem + '_metadata.pkl') + with open(metadata_path, 'wb') as f: + pickle.dump(metadata, f) + + print(f'saved metadata to {metadata_path}') diff --git a/sunnypilot/modeld_v2/meta_20hz.py b/sunnypilot/modeld_v2/meta_20hz.py index 77a815ee82..8a61925aaa 100644 --- a/sunnypilot/modeld_v2/meta_20hz.py +++ b/sunnypilot/modeld_v2/meta_20hz.py @@ -1,4 +1,4 @@ -from openpilot.selfdrive.modeld.constants import Meta +from openpilot.sunnypilot.modeld_v2.constants import Meta class Meta20hz(Meta): diff --git a/sunnypilot/modeld_v2/meta_helper.py b/sunnypilot/modeld_v2/meta_helper.py index b9c6b98c9c..3fa10b2415 100644 --- a/sunnypilot/modeld_v2/meta_helper.py +++ b/sunnypilot/modeld_v2/meta_helper.py @@ -1,4 +1,4 @@ -from openpilot.selfdrive.modeld.constants import Meta +from openpilot.sunnypilot.modeld_v2.constants import Meta from cereal import custom from openpilot.sunnypilot.modeld_v2.meta_20hz import Meta20hz from openpilot.sunnypilot.models.helpers import get_active_bundle diff --git a/sunnypilot/modeld_v2/model_runner.py b/sunnypilot/modeld_v2/model_runner.py index 500a34a4ec..82fc3f5196 100644 --- a/sunnypilot/modeld_v2/model_runner.py +++ b/sunnypilot/modeld_v2/model_runner.py @@ -4,10 +4,10 @@ from abc import ABC, abstractmethod import numpy as np from cereal import custom -from openpilot.selfdrive.modeld import MODEL_PATH, MODEL_PKL_PATH, METADATA_PATH -from openpilot.selfdrive.modeld.models.commonmodel_pyx import DrivingModelFrame, CLMem -from openpilot.selfdrive.modeld.runners.ort_helpers import make_onnx_cpu_runner, ORT_TYPES_TO_NP_TYPES -from openpilot.selfdrive.modeld.runners.tinygrad_helpers import qcom_tensor_from_opencl_address +from openpilot.sunnypilot.modeld_v2 import MODEL_PATH, MODEL_PKL_PATH, METADATA_PATH +from openpilot.sunnypilot.modeld_v2.models.commonmodel_pyx import DrivingModelFrame, CLMem +from openpilot.sunnypilot.modeld_v2.runners.ort_helpers import make_onnx_cpu_runner, ORT_TYPES_TO_NP_TYPES +from openpilot.sunnypilot.modeld_v2.runners.tinygrad_helpers import qcom_tensor_from_opencl_address from openpilot.system.hardware import TICI from openpilot.system.hardware.hw import Paths diff --git a/sunnypilot/modeld_v2/modeld b/sunnypilot/modeld_v2/modeld new file mode 100755 index 0000000000..5ba4688554 --- /dev/null +++ b/sunnypilot/modeld_v2/modeld @@ -0,0 +1,4 @@ +#!/usr/bin/env bash + +DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null && pwd)" +exec "$DIR/modeld.py" "$@" diff --git a/sunnypilot/modeld_v2/modeld.py b/sunnypilot/modeld_v2/modeld.py new file mode 100755 index 0000000000..bc67db297d --- /dev/null +++ b/sunnypilot/modeld_v2/modeld.py @@ -0,0 +1,318 @@ +#!/usr/bin/env python3 +from openpilot.system.hardware import TICI + +# +import time +import numpy as np +import cereal.messaging as messaging +from cereal import car, log +from setproctitle import setproctitle +from cereal.messaging import PubMaster, SubMaster +from msgq.visionipc import VisionIpcClient, VisionStreamType, VisionBuf +from opendbc.car.car_helpers import get_demo_car_params +from openpilot.common.swaglog import cloudlog +from openpilot.common.params import Params +from openpilot.common.filter_simple import FirstOrderFilter +from openpilot.common.realtime import config_realtime_process +from openpilot.common.transformations.camera import DEVICE_CAMERAS +from openpilot.common.transformations.model import get_warp_matrix +from openpilot.system import sentry +from openpilot.selfdrive.controls.lib.desire_helper import DesireHelper +from openpilot.sunnypilot.modeld_v2.parse_model_outputs import Parser +from openpilot.sunnypilot.modeld_v2.fill_model_msg import fill_model_msg, fill_pose_msg, PublishState +from openpilot.sunnypilot.modeld_v2.constants import ModelConstants +from openpilot.sunnypilot.modeld_v2.models.commonmodel_pyx import DrivingModelFrame, CLContext + +from openpilot.sunnypilot.modeld_v2.meta_helper import load_meta_constants +from openpilot.sunnypilot.modeld_v2.model_runner import ONNXRunner, TinygradRunner + +PROCESS_NAME = "selfdrive.modeld.modeld" + + +class FrameMeta: + frame_id: int = 0 + timestamp_sof: int = 0 + timestamp_eof: int = 0 + + def __init__(self, vipc=None): + if vipc is not None: + self.frame_id, self.timestamp_sof, self.timestamp_eof = vipc.frame_id, vipc.timestamp_sof, vipc.timestamp_eof + +class ModelState: + frames: dict[str, DrivingModelFrame] + inputs: dict[str, np.ndarray] + output: np.ndarray + prev_desire: np.ndarray # for tracking the rising edge of the pulse + + def __init__(self, context: CLContext): + try: + self.model_runner = TinygradRunner() if TICI else ONNXRunner() + except Exception as e: + cloudlog.exception(f"Failed to initialize model runner: {str(e)}") + + buffer_length = 5 if self.model_runner.is_20hz else 2 + self.frames = {'input_imgs': DrivingModelFrame(context, buffer_length), 'big_input_imgs': DrivingModelFrame(context, buffer_length)} + self.prev_desire = np.zeros(ModelConstants.DESIRE_LEN, dtype=np.float32) + if self.model_runner.is_20hz: + self.full_features_20Hz = np.zeros((ModelConstants.FULL_HISTORY_BUFFER_LEN, ModelConstants.FEATURE_LEN), dtype=np.float32) + self.desire_20Hz = np.zeros((ModelConstants.FULL_HISTORY_BUFFER_LEN + 1, ModelConstants.DESIRE_LEN), dtype=np.float32) + + # img buffers are managed in openCL transform code + self.numpy_inputs = {} + + for key, shape in self.model_runner.input_shapes.items(): + if key not in self.frames: # Managed by opencl + self.numpy_inputs[key] = np.zeros(shape, dtype=np.float32) + + self.parser = Parser() + + if self.model_runner.is_20hz: + net_output_size = self.model_runner.model_metadata['output_shapes']['outputs'][1] + self.output = np.zeros(net_output_size, dtype=np.float32) + + num_elements = self.numpy_inputs['features_buffer'].shape[1] + step_size = int(-100 / num_elements) + self.full_features_20Hz_idxs = np.arange(step_size, step_size * (num_elements + 1), step_size)[::-1] + self.desire_reshape_dims = (self.numpy_inputs['desire'].shape[0], self.numpy_inputs['desire'].shape[1], -1, self.numpy_inputs['desire'].shape[2]) + + def run(self, buf: VisionBuf, wbuf: VisionBuf, transform: np.ndarray, transform_wide: np.ndarray, + inputs: dict[str, np.ndarray], prepare_only: bool) -> dict[str, np.ndarray] | None: + # Model decides when action is completed, so desire input is just a pulse triggered on rising edge + inputs['desire'][0] = 0 + new_desire = np.where(inputs['desire'] - self.prev_desire > .99, inputs['desire'], 0) + self.prev_desire[:] = inputs['desire'] + + if self.model_runner.is_20hz: + self.desire_20Hz[:-1] = self.desire_20Hz[1:] + self.desire_20Hz[-1] = new_desire + self.numpy_inputs['desire'][:] = self.desire_20Hz.reshape(self.desire_reshape_dims).max(axis=2) + else: + length = inputs['desire'].shape[0] + self.numpy_inputs['desire'][0, :-1] = self.numpy_inputs['desire'][0, 1:] + self.numpy_inputs['desire'][0, -1, :length] = new_desire[:length] + + for key in self.numpy_inputs: + if key in inputs and key not in ['desire']: + self.numpy_inputs[key][:] = inputs[key] + + imgs_cl = {'input_imgs': self.frames['input_imgs'].prepare(buf, transform.flatten()), + 'big_input_imgs': self.frames['big_input_imgs'].prepare(wbuf, transform_wide.flatten())} + + # Prepare inputs using the model runner + self.model_runner.prepare_inputs(imgs_cl, self.numpy_inputs, self.frames) + + if prepare_only: + return None + + # Run model inference + self.output = self.model_runner.run_model() + outputs = self.parser.parse_outputs(self.model_runner.slice_outputs(self.output)) + + if self.model_runner.is_20hz: + self.full_features_20Hz[:-1] = self.full_features_20Hz[1:] + self.full_features_20Hz[-1] = outputs['hidden_state'][0, :] + self.numpy_inputs['features_buffer'][:] = self.full_features_20Hz[self.full_features_20Hz_idxs] + else: + feature_len = outputs['hidden_state'].shape[1] + self.numpy_inputs['features_buffer'][0, :-1] = self.numpy_inputs['features_buffer'][0, 1:] + self.numpy_inputs['features_buffer'][0, -1, :feature_len] = outputs['hidden_state'][0, :feature_len] + + if "desired_curvature" in outputs: + input_name_prev = None + + if "prev_desired_curvs" in self.numpy_inputs.keys(): + input_name_prev = 'prev_desired_curvs' + elif "prev_desired_curv" in self.numpy_inputs.keys(): + input_name_prev = 'prev_desired_curv' + + if input_name_prev is not None: + length = outputs['desired_curvature'][0].size + self.numpy_inputs[input_name_prev][0, :-length, 0] = self.numpy_inputs[input_name_prev][0, length:, 0] + self.numpy_inputs[input_name_prev][0, -length:, 0] = outputs['desired_curvature'][0] + return outputs + + +def main(demo=False): + cloudlog.warning("modeld init") + + sentry.set_tag("daemon", PROCESS_NAME) + cloudlog.bind(daemon=PROCESS_NAME) + setproctitle(PROCESS_NAME) + config_realtime_process(7, 54) + + cloudlog.warning("setting up CL context") + cl_context = CLContext() + cloudlog.warning("CL context ready; loading model") + model = ModelState(cl_context) + cloudlog.warning("models loaded, modeld starting") + + # visionipc clients + while True: + available_streams = VisionIpcClient.available_streams("camerad", block=False) + if available_streams: + use_extra_client = VisionStreamType.VISION_STREAM_WIDE_ROAD in available_streams and VisionStreamType.VISION_STREAM_ROAD in available_streams + main_wide_camera = VisionStreamType.VISION_STREAM_ROAD not in available_streams + break + time.sleep(.1) + + vipc_client_main_stream = VisionStreamType.VISION_STREAM_WIDE_ROAD if main_wide_camera else VisionStreamType.VISION_STREAM_ROAD + vipc_client_main = VisionIpcClient("camerad", vipc_client_main_stream, True, cl_context) + vipc_client_extra = VisionIpcClient("camerad", VisionStreamType.VISION_STREAM_WIDE_ROAD, False, cl_context) + cloudlog.warning(f"vision stream set up, main_wide_camera: {main_wide_camera}, use_extra_client: {use_extra_client}") + + while not vipc_client_main.connect(False): + time.sleep(0.1) + while use_extra_client and not vipc_client_extra.connect(False): + time.sleep(0.1) + + cloudlog.warning(f"connected main cam with buffer size: {vipc_client_main.buffer_len} ({vipc_client_main.width} x {vipc_client_main.height})") + if use_extra_client: + cloudlog.warning(f"connected extra cam with buffer size: {vipc_client_extra.buffer_len} ({vipc_client_extra.width} x {vipc_client_extra.height})") + + # messaging + pm = PubMaster(["modelV2", "drivingModelData", "cameraOdometry"]) + sm = SubMaster(["deviceState", "carState", "roadCameraState", "liveCalibration", "driverMonitoringState", "carControl"]) + + publish_state = PublishState() + params = Params() + + # setup filter to track dropped frames + frame_dropped_filter = FirstOrderFilter(0., 10., 1. / ModelConstants.MODEL_FREQ) + frame_id = 0 + last_vipc_frame_id = 0 + run_count = 0 + + model_transform_main = np.zeros((3, 3), dtype=np.float32) + model_transform_extra = np.zeros((3, 3), dtype=np.float32) + live_calib_seen = False + buf_main, buf_extra = None, None + meta_main = FrameMeta() + meta_extra = FrameMeta() + + + if demo: + CP = get_demo_car_params() + else: + CP = messaging.log_from_bytes(params.get("CarParams", block=True), car.CarParams) + cloudlog.info("modeld got CarParams: %s", CP.brand) + + # TODO this needs more thought, use .2s extra for now to estimate other delays + steer_delay = CP.steerActuatorDelay + .2 + + DH = DesireHelper() + + while True: + # Keep receiving frames until we are at least 1 frame ahead of previous extra frame + while meta_main.timestamp_sof < meta_extra.timestamp_sof + 25000000: + buf_main = vipc_client_main.recv() + meta_main = FrameMeta(vipc_client_main) + if buf_main is None: + break + + if buf_main is None: + cloudlog.debug("vipc_client_main no frame") + continue + + if use_extra_client: + # Keep receiving extra frames until frame id matches main camera + while True: + buf_extra = vipc_client_extra.recv() + meta_extra = FrameMeta(vipc_client_extra) + if buf_extra is None or meta_main.timestamp_sof < meta_extra.timestamp_sof + 25000000: + break + + if buf_extra is None: + cloudlog.debug("vipc_client_extra no frame") + continue + + if abs(meta_main.timestamp_sof - meta_extra.timestamp_sof) > 10000000: + cloudlog.error(f"frames out of sync! main: {meta_main.frame_id} ({meta_main.timestamp_sof / 1e9:.5f}),\ + extra: {meta_extra.frame_id} ({meta_extra.timestamp_sof / 1e9:.5f})") + + else: + # Use single camera + buf_extra = buf_main + meta_extra = meta_main + + sm.update(0) + desire = DH.desire + is_rhd = sm["driverMonitoringState"].isRHD + frame_id = sm["roadCameraState"].frameId + v_ego = max(sm["carState"].vEgo, 0.) + if sm.updated["liveCalibration"] and sm.seen['roadCameraState'] and sm.seen['deviceState']: + device_from_calib_euler = np.array(sm["liveCalibration"].rpyCalib, dtype=np.float32) + dc = DEVICE_CAMERAS[(str(sm['deviceState'].deviceType), str(sm['roadCameraState'].sensor))] + model_transform_main = get_warp_matrix(device_from_calib_euler, dc.ecam.intrinsics if main_wide_camera else dc.fcam.intrinsics, False).astype(np.float32) + model_transform_extra = get_warp_matrix(device_from_calib_euler, dc.ecam.intrinsics, True).astype(np.float32) + live_calib_seen = True + + traffic_convention = np.zeros(2) + traffic_convention[int(is_rhd)] = 1 + + vec_desire = np.zeros(ModelConstants.DESIRE_LEN, dtype=np.float32) + if desire >= 0 and desire < ModelConstants.DESIRE_LEN: + vec_desire[desire] = 1 + + # tracked dropped frames + vipc_dropped_frames = max(0, meta_main.frame_id - last_vipc_frame_id - 1) + frames_dropped = frame_dropped_filter.update(min(vipc_dropped_frames, 10)) + if run_count < 10: # let frame drops warm up + frame_dropped_filter.x = 0. + frames_dropped = 0. + run_count = run_count + 1 + + frame_drop_ratio = frames_dropped / (1 + frames_dropped) + prepare_only = vipc_dropped_frames > 0 + if prepare_only: + cloudlog.error(f"skipping model eval. Dropped {vipc_dropped_frames} frames") + + inputs:dict[str, np.ndarray] = { + 'desire': vec_desire, + 'traffic_convention': traffic_convention, + } + + if "lateral_control_params" in model.numpy_inputs.keys(): + inputs['lateral_control_params'] = np.array([v_ego, steer_delay], dtype=np.float32) + + mt1 = time.perf_counter() + model_output = model.run(buf_main, buf_extra, model_transform_main, model_transform_extra, inputs, prepare_only) + mt2 = time.perf_counter() + model_execution_time = mt2 - mt1 + + if model_output is not None: + modelv2_send = messaging.new_message('modelV2') + drivingdata_send = messaging.new_message('drivingModelData') + posenet_send = messaging.new_message('cameraOdometry') + fill_model_msg(drivingdata_send, modelv2_send, model_output, v_ego, steer_delay, + publish_state, meta_main.frame_id, meta_extra.frame_id, frame_id, + frame_drop_ratio, meta_main.timestamp_eof, model_execution_time, live_calib_seen, load_meta_constants()) + + desire_state = modelv2_send.modelV2.meta.desireState + l_lane_change_prob = desire_state[log.Desire.laneChangeLeft] + r_lane_change_prob = desire_state[log.Desire.laneChangeRight] + lane_change_prob = l_lane_change_prob + r_lane_change_prob + DH.update(sm['carState'], sm['carControl'].latActive, lane_change_prob) + modelv2_send.modelV2.meta.laneChangeState = DH.lane_change_state + modelv2_send.modelV2.meta.laneChangeDirection = DH.lane_change_direction + drivingdata_send.drivingModelData.meta.laneChangeState = DH.lane_change_state + drivingdata_send.drivingModelData.meta.laneChangeDirection = DH.lane_change_direction + + fill_pose_msg(posenet_send, model_output, meta_main.frame_id, vipc_dropped_frames, meta_main.timestamp_eof, live_calib_seen) + pm.send('modelV2', modelv2_send) + pm.send('drivingModelData', drivingdata_send) + pm.send('cameraOdometry', posenet_send) + last_vipc_frame_id = meta_main.frame_id + + +if __name__ == "__main__": + try: + import argparse + parser = argparse.ArgumentParser() + parser.add_argument('--demo', action='store_true', help='A boolean for demo mode.') + args = parser.parse_args() + main(demo=args.demo) + except KeyboardInterrupt: + cloudlog.warning(f"child {PROCESS_NAME} got SIGINT") + except Exception: + sentry.capture_exception() + raise diff --git a/sunnypilot/modeld_v2/models/README.md b/sunnypilot/modeld_v2/models/README.md new file mode 100644 index 0000000000..9e11ca8255 --- /dev/null +++ b/sunnypilot/modeld_v2/models/README.md @@ -0,0 +1,62 @@ +## Neural networks in openpilot +To view the architecture of the ONNX networks, you can use [netron](https://netron.app/) + +## Supercombo +### Supercombo input format (Full size: 799906 x float32) +* **image stream** + * Two consecutive images (256 * 512 * 3 in RGB) recorded at 20 Hz : 393216 = 2 * 6 * 128 * 256 + * Each 256 * 512 image is represented in YUV420 with 6 channels : 6 * 128 * 256 + * Channels 0,1,2,3 represent the full-res Y channel and are represented in numpy as Y[::2, ::2], Y[::2, 1::2], Y[1::2, ::2], and Y[1::2, 1::2] + * Channel 4 represents the half-res U channel + * Channel 5 represents the half-res V channel +* **wide image stream** + * Two consecutive images (256 * 512 * 3 in RGB) recorded at 20 Hz : 393216 = 2 * 6 * 128 * 256 + * Each 256 * 512 image is represented in YUV420 with 6 channels : 6 * 128 * 256 + * Channels 0,1,2,3 represent the full-res Y channel and are represented in numpy as Y[::2, ::2], Y[::2, 1::2], Y[1::2, ::2], and Y[1::2, 1::2] + * Channel 4 represents the half-res U channel + * Channel 5 represents the half-res V channel +* **desire** + * one-hot encoded buffer to command model to execute certain actions, bit needs to be sent for the past 5 seconds (at 20FPS) : 100 * 8 +* **traffic convention** + * one-hot encoded vector to tell model whether traffic is right-hand or left-hand traffic : 2 +* **feature buffer** + * A buffer of intermediate features that gets appended to the current feature to form a 5 seconds temporal context (at 20FPS) : 99 * 512 + + +### Supercombo output format (Full size: XXX x float32) +Read [here](https://github.com/commaai/openpilot/blob/90af436a121164a51da9fa48d093c29f738adf6a/selfdrive/modeld/models/driving.h#L236) for more. + + +## Driver Monitoring Model +* .onnx model can be run with onnx runtimes +* .dlc file is a pre-quantized model and only runs on qualcomm DSPs + +### input format +* single image W = 1440 H = 960 luminance channel (Y) from the planar YUV420 format: + * full input size is 1440 * 960 = 1382400 + * normalized ranging from 0.0 to 1.0 in float32 (onnx runner) or ranging from 0 to 255 in uint8 (snpe runner) +* camera calibration angles (roll, pitch, yaw) from liveCalibration: 3 x float32 inputs + +### output format +* 84 x float32 outputs = 2 + 41 * 2 ([parsing example](https://github.com/commaai/openpilot/blob/22ce4e17ba0d3bfcf37f8255a4dd1dc683fe0c38/selfdrive/modeld/models/dmonitoring.cc#L33)) + * for each person in the front seats (2 * 41) + * face pose: 12 = 6 + 6 + * face orientation [pitch, yaw, roll] in camera frame: 3 + * face position [dx, dy] relative to image center: 2 + * normalized face size: 1 + * standard deviations for above outputs: 6 + * face visible probability: 1 + * eyes: 20 = (8 + 1) + (8 + 1) + 1 + 1 + * eye position and size, and their standard deviations: 8 + * eye visible probability: 1 + * eye closed probability: 1 + * wearing sunglasses probability: 1 + * face occluded probability: 1 + * touching wheel probability: 1 + * paying attention probability: 1 + * (deprecated) distracted probabilities: 2 + * using phone probability: 1 + * distracted probability: 1 + * common outputs 2 + * poor camera vision probability: 1 + * left hand drive probability: 1 diff --git a/sunnypilot/modeld_v2/models/__init__.py b/sunnypilot/modeld_v2/models/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/sunnypilot/modeld_v2/models/commonmodel.cc b/sunnypilot/modeld_v2/models/commonmodel.cc new file mode 100644 index 0000000000..5cd3a84fcc --- /dev/null +++ b/sunnypilot/modeld_v2/models/commonmodel.cc @@ -0,0 +1,62 @@ +#include "sunnypilot/modeld_v2/models/commonmodel.h" + +#include +#include + +#include "common/clutil.h" + +DrivingModelFrame::DrivingModelFrame(cl_device_id device_id, cl_context context, uint8_t buffer_length) : ModelFrame(device_id, context), buffer_length(buffer_length) { + input_frames = std::make_unique(buf_size); + input_frames_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, buf_size, NULL, &err)); + img_buffer_20hz_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, buffer_length*frame_size_bytes, NULL, &err)); + region.origin = (buffer_length - 1) * frame_size_bytes; + region.size = frame_size_bytes; + last_img_cl = CL_CHECK_ERR(clCreateSubBuffer(img_buffer_20hz_cl, CL_MEM_READ_WRITE, CL_BUFFER_CREATE_TYPE_REGION, ®ion, &err)); + // printf("Buffer length: %d, region origin: %lu, region size: %lu\n", buffer_length, region.origin, region.size); + + loadyuv_init(&loadyuv, context, device_id, MODEL_WIDTH, MODEL_HEIGHT); + init_transform(device_id, context, MODEL_WIDTH, MODEL_HEIGHT); +} + +cl_mem* DrivingModelFrame::prepare(cl_mem yuv_cl, int frame_width, int frame_height, int frame_stride, int frame_uv_offset, const mat3& projection) { + run_transform(yuv_cl, MODEL_WIDTH, MODEL_HEIGHT, frame_width, frame_height, frame_stride, frame_uv_offset, projection); + + for (int i = 0; i < (buffer_length - 1); i++) { + CL_CHECK(clEnqueueCopyBuffer(q, img_buffer_20hz_cl, img_buffer_20hz_cl, (i+1)*frame_size_bytes, i*frame_size_bytes, frame_size_bytes, 0, nullptr, nullptr)); + } + loadyuv_queue(&loadyuv, q, y_cl, u_cl, v_cl, last_img_cl); + + copy_queue(&loadyuv, q, img_buffer_20hz_cl, input_frames_cl, 0, 0, frame_size_bytes); + copy_queue(&loadyuv, q, last_img_cl, input_frames_cl, 0, frame_size_bytes, frame_size_bytes); + + // NOTE: Since thneed is using a different command queue, this clFinish is needed to ensure the image is ready. + clFinish(q); + return &input_frames_cl; +} + +DrivingModelFrame::~DrivingModelFrame() { + deinit_transform(); + loadyuv_destroy(&loadyuv); + CL_CHECK(clReleaseMemObject(img_buffer_20hz_cl)); + CL_CHECK(clReleaseMemObject(last_img_cl)); + CL_CHECK(clReleaseCommandQueue(q)); +} + + +MonitoringModelFrame::MonitoringModelFrame(cl_device_id device_id, cl_context context) : ModelFrame(device_id, context) { + input_frames = std::make_unique(buf_size); + input_frame_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, buf_size, NULL, &err)); + + init_transform(device_id, context, MODEL_WIDTH, MODEL_HEIGHT); +} + +cl_mem* MonitoringModelFrame::prepare(cl_mem yuv_cl, int frame_width, int frame_height, int frame_stride, int frame_uv_offset, const mat3& projection) { + run_transform(yuv_cl, MODEL_WIDTH, MODEL_HEIGHT, frame_width, frame_height, frame_stride, frame_uv_offset, projection); + clFinish(q); + return &y_cl; +} + +MonitoringModelFrame::~MonitoringModelFrame() { + deinit_transform(); + CL_CHECK(clReleaseCommandQueue(q)); +} diff --git a/sunnypilot/modeld_v2/models/commonmodel.h b/sunnypilot/modeld_v2/models/commonmodel.h new file mode 100644 index 0000000000..8203e064e0 --- /dev/null +++ b/sunnypilot/modeld_v2/models/commonmodel.h @@ -0,0 +1,97 @@ +#pragma once + +#include +#include +#include + +#include + +#define CL_USE_DEPRECATED_OPENCL_1_2_APIS +#ifdef __APPLE__ +#include +#else +#include +#endif + +#include "common/mat.h" +#include "sunnypilot/modeld_v2/transforms/loadyuv.h" +#include "sunnypilot/modeld_v2/transforms/transform.h" + +class ModelFrame { +public: + ModelFrame(cl_device_id device_id, cl_context context) { + q = CL_CHECK_ERR(clCreateCommandQueue(context, device_id, 0, &err)); + } + virtual ~ModelFrame() {} + virtual cl_mem* prepare(cl_mem yuv_cl, int frame_width, int frame_height, int frame_stride, int frame_uv_offset, const mat3& projection) { return NULL; } + uint8_t* buffer_from_cl(cl_mem *in_frames, int buffer_size) { + CL_CHECK(clEnqueueReadBuffer(q, *in_frames, CL_TRUE, 0, buffer_size, input_frames.get(), 0, nullptr, nullptr)); + clFinish(q); + return &input_frames[0]; + } + + int MODEL_WIDTH; + int MODEL_HEIGHT; + int MODEL_FRAME_SIZE; + int buf_size; + +protected: + cl_mem y_cl, u_cl, v_cl; + Transform transform; + cl_command_queue q; + std::unique_ptr input_frames; + + void init_transform(cl_device_id device_id, cl_context context, int model_width, int model_height) { + y_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, model_width * model_height, NULL, &err)); + u_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, (model_width / 2) * (model_height / 2), NULL, &err)); + v_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, (model_width / 2) * (model_height / 2), NULL, &err)); + transform_init(&transform, context, device_id); + } + + void deinit_transform() { + transform_destroy(&transform); + CL_CHECK(clReleaseMemObject(v_cl)); + CL_CHECK(clReleaseMemObject(u_cl)); + CL_CHECK(clReleaseMemObject(y_cl)); + } + + void run_transform(cl_mem yuv_cl, int model_width, int model_height, int frame_width, int frame_height, int frame_stride, int frame_uv_offset, const mat3& projection) { + transform_queue(&transform, q, + yuv_cl, frame_width, frame_height, frame_stride, frame_uv_offset, + y_cl, u_cl, v_cl, model_width, model_height, projection); + } +}; + +class DrivingModelFrame : public ModelFrame { +public: + DrivingModelFrame(cl_device_id device_id, cl_context context, uint8_t buffer_length); + ~DrivingModelFrame(); + cl_mem* prepare(cl_mem yuv_cl, int frame_width, int frame_height, int frame_stride, int frame_uv_offset, const mat3& projection); + + const int MODEL_WIDTH = 512; + const int MODEL_HEIGHT = 256; + const int MODEL_FRAME_SIZE = MODEL_WIDTH * MODEL_HEIGHT * 3 / 2; + const int buf_size = MODEL_FRAME_SIZE * 2; + const size_t frame_size_bytes = MODEL_FRAME_SIZE * sizeof(uint8_t); + const uint8_t buffer_length; + +private: + LoadYUVState loadyuv; + cl_mem img_buffer_20hz_cl, last_img_cl, input_frames_cl; + cl_buffer_region region; +}; + +class MonitoringModelFrame : public ModelFrame { +public: + MonitoringModelFrame(cl_device_id device_id, cl_context context); + ~MonitoringModelFrame(); + cl_mem* prepare(cl_mem yuv_cl, int frame_width, int frame_height, int frame_stride, int frame_uv_offset, const mat3& projection); + + const int MODEL_WIDTH = 1440; + const int MODEL_HEIGHT = 960; + const int MODEL_FRAME_SIZE = MODEL_WIDTH * MODEL_HEIGHT; + const int buf_size = MODEL_FRAME_SIZE; + +private: + cl_mem input_frame_cl; +}; diff --git a/sunnypilot/modeld_v2/models/commonmodel.pxd b/sunnypilot/modeld_v2/models/commonmodel.pxd new file mode 100644 index 0000000000..55023ac4b9 --- /dev/null +++ b/sunnypilot/modeld_v2/models/commonmodel.pxd @@ -0,0 +1,27 @@ +# distutils: language = c++ + +from msgq.visionipc.visionipc cimport cl_device_id, cl_context, cl_mem + +cdef extern from "common/mat.h": + cdef struct mat3: + float v[9] + +cdef extern from "common/clutil.h": + cdef unsigned long CL_DEVICE_TYPE_DEFAULT + cl_device_id cl_get_device_id(unsigned long) + cl_context cl_create_context(cl_device_id) + void cl_release_context(cl_context) + +cdef extern from "sunnypilot/modeld_v2/models/commonmodel.h": + cppclass ModelFrame: + int buf_size + unsigned char * buffer_from_cl(cl_mem*, int); + cl_mem * prepare(cl_mem, int, int, int, int, mat3) + + cppclass DrivingModelFrame: + int buf_size + DrivingModelFrame(cl_device_id, cl_context, unsigned char) + + cppclass MonitoringModelFrame: + int buf_size + MonitoringModelFrame(cl_device_id, cl_context) diff --git a/sunnypilot/modeld_v2/models/commonmodel_pyx.pxd b/sunnypilot/modeld_v2/models/commonmodel_pyx.pxd new file mode 100644 index 0000000000..0bb798625b --- /dev/null +++ b/sunnypilot/modeld_v2/models/commonmodel_pyx.pxd @@ -0,0 +1,13 @@ +# distutils: language = c++ + +from msgq.visionipc.visionipc cimport cl_mem +from msgq.visionipc.visionipc_pyx cimport CLContext as BaseCLContext + +cdef class CLContext(BaseCLContext): + pass + +cdef class CLMem: + cdef cl_mem * mem + + @staticmethod + cdef create(void*) diff --git a/sunnypilot/modeld_v2/models/commonmodel_pyx.pyx b/sunnypilot/modeld_v2/models/commonmodel_pyx.pyx new file mode 100644 index 0000000000..78a891f031 --- /dev/null +++ b/sunnypilot/modeld_v2/models/commonmodel_pyx.pyx @@ -0,0 +1,74 @@ +# distutils: language = c++ +# cython: c_string_encoding=ascii, language_level=3 + +import numpy as np +cimport numpy as cnp +from libc.string cimport memcpy +from libc.stdint cimport uintptr_t, uint8_t + +from msgq.visionipc.visionipc cimport cl_mem +from msgq.visionipc.visionipc_pyx cimport VisionBuf, CLContext as BaseCLContext +from .commonmodel cimport CL_DEVICE_TYPE_DEFAULT, cl_get_device_id, cl_create_context, cl_release_context +from .commonmodel cimport mat3, ModelFrame as cppModelFrame, DrivingModelFrame as cppDrivingModelFrame, MonitoringModelFrame as cppMonitoringModelFrame + + +cdef class CLContext(BaseCLContext): + def __cinit__(self): + self.device_id = cl_get_device_id(CL_DEVICE_TYPE_DEFAULT) + self.context = cl_create_context(self.device_id) + + def __dealloc__(self): + if self.context: + cl_release_context(self.context) + +cdef class CLMem: + @staticmethod + cdef create(void * cmem): + mem = CLMem() + mem.mem = cmem + return mem + + @property + def mem_address(self): + return (self.mem) + +def cl_from_visionbuf(VisionBuf buf): + return CLMem.create(&buf.buf.buf_cl) + + +cdef class ModelFrame: + cdef cppModelFrame * frame + cdef int buf_size + + def __dealloc__(self): + del self.frame + + def prepare(self, VisionBuf buf, float[:] projection): + cdef mat3 cprojection + memcpy(cprojection.v, &projection[0], 9*sizeof(float)) + cdef cl_mem * data + data = self.frame.prepare(buf.buf.buf_cl, buf.width, buf.height, buf.stride, buf.uv_offset, cprojection) + return CLMem.create(data) + + def buffer_from_cl(self, CLMem in_frames): + cdef unsigned char * data2 + data2 = self.frame.buffer_from_cl(in_frames.mem, self.buf_size) + return np.asarray( data2) + + +cdef class DrivingModelFrame(ModelFrame): + cdef cppDrivingModelFrame * _frame + + def __cinit__(self, CLContext context, int buffer_length=2): + self._frame = new cppDrivingModelFrame(context.device_id, context.context, buffer_length) + self.frame = (self._frame) + self.buf_size = self._frame.buf_size + +cdef class MonitoringModelFrame(ModelFrame): + cdef cppMonitoringModelFrame * _frame + + def __cinit__(self, CLContext context): + self._frame = new cppMonitoringModelFrame(context.device_id, context.context) + self.frame = (self._frame) + self.buf_size = self._frame.buf_size + diff --git a/sunnypilot/modeld_v2/models/dmonitoring_model.current b/sunnypilot/modeld_v2/models/dmonitoring_model.current new file mode 100644 index 0000000000..121871ef2b --- /dev/null +++ b/sunnypilot/modeld_v2/models/dmonitoring_model.current @@ -0,0 +1,2 @@ +fa69be01-b430-4504-9d72-7dcb058eb6dd +d9fb22d1c4fa3ca3d201dbc8edf1d0f0918e53e6 diff --git a/sunnypilot/modeld_v2/models/dmonitoring_model.onnx b/sunnypilot/modeld_v2/models/dmonitoring_model.onnx new file mode 100644 index 0000000000..dcc727510e --- /dev/null +++ b/sunnypilot/modeld_v2/models/dmonitoring_model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50efe6451a3fb3fa04b6bb0e846544533329bd46ecefe9e657e91214dee2aaeb +size 7196502 diff --git a/sunnypilot/modeld_v2/models/supercombo.onnx b/sunnypilot/modeld_v2/models/supercombo.onnx new file mode 100644 index 0000000000..4c9f795574 --- /dev/null +++ b/sunnypilot/modeld_v2/models/supercombo.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d21daa542227ecc5972da45df4e26f018ba113c0461f270e367d57e3ad89221a +size 51461700 diff --git a/sunnypilot/modeld_v2/parse_model_outputs.py b/sunnypilot/modeld_v2/parse_model_outputs.py new file mode 100644 index 0000000000..c71a146454 --- /dev/null +++ b/sunnypilot/modeld_v2/parse_model_outputs.py @@ -0,0 +1,107 @@ +import numpy as np +from openpilot.sunnypilot.modeld_v2.constants import ModelConstants + +def safe_exp(x, out=None): + # -11 is around 10**14, more causes float16 overflow + return np.exp(np.clip(x, -np.inf, 11), out=out) + +def sigmoid(x): + return 1. / (1. + safe_exp(-x)) + +def softmax(x, axis=-1): + x -= np.max(x, axis=axis, keepdims=True) + if x.dtype == np.float32 or x.dtype == np.float64: + safe_exp(x, out=x) + else: + x = safe_exp(x) + x /= np.sum(x, axis=axis, keepdims=True) + return x + +class Parser: + def __init__(self, ignore_missing=False): + self.ignore_missing = ignore_missing + + def check_missing(self, outs, name): + if name not in outs and not self.ignore_missing: + raise ValueError(f"Missing output {name}") + return name not in outs + + def parse_categorical_crossentropy(self, name, outs, out_shape=None): + if self.check_missing(outs, name): + return + raw = outs[name] + if out_shape is not None: + raw = raw.reshape((raw.shape[0],) + out_shape) + outs[name] = softmax(raw, axis=-1) + + def parse_binary_crossentropy(self, name, outs): + if self.check_missing(outs, name): + return + raw = outs[name] + outs[name] = sigmoid(raw) + + def parse_mdn(self, name, outs, in_N=0, out_N=1, out_shape=None): + if self.check_missing(outs, name): + return + raw = outs[name] + raw = raw.reshape((raw.shape[0], max(in_N, 1), -1)) + + n_values = (raw.shape[2] - out_N)//2 + pred_mu = raw[:,:,:n_values] + pred_std = safe_exp(raw[:,:,n_values: 2*n_values]) + + if in_N > 1: + weights = np.zeros((raw.shape[0], in_N, out_N), dtype=raw.dtype) + for i in range(out_N): + weights[:,:,i - out_N] = softmax(raw[:,:,i - out_N], axis=-1) + + if out_N == 1: + for fidx in range(weights.shape[0]): + idxs = np.argsort(weights[fidx][:,0])[::-1] + weights[fidx] = weights[fidx][idxs] + pred_mu[fidx] = pred_mu[fidx][idxs] + pred_std[fidx] = pred_std[fidx][idxs] + full_shape = tuple([raw.shape[0], in_N] + list(out_shape)) + outs[name + '_weights'] = weights + outs[name + '_hypotheses'] = pred_mu.reshape(full_shape) + outs[name + '_stds_hypotheses'] = pred_std.reshape(full_shape) + + pred_mu_final = np.zeros((raw.shape[0], out_N, n_values), dtype=raw.dtype) + pred_std_final = np.zeros((raw.shape[0], out_N, n_values), dtype=raw.dtype) + for fidx in range(weights.shape[0]): + for hidx in range(out_N): + idxs = np.argsort(weights[fidx,:,hidx])[::-1] + pred_mu_final[fidx, hidx] = pred_mu[fidx, idxs[0]] + pred_std_final[fidx, hidx] = pred_std[fidx, idxs[0]] + else: + pred_mu_final = pred_mu + pred_std_final = pred_std + + if out_N > 1: + final_shape = tuple([raw.shape[0], out_N] + list(out_shape)) + else: + final_shape = tuple([raw.shape[0],] + list(out_shape)) + outs[name] = pred_mu_final.reshape(final_shape) + outs[name + '_stds'] = pred_std_final.reshape(final_shape) + + def parse_outputs(self, outs: dict[str, np.ndarray]) -> dict[str, np.ndarray]: + self.parse_mdn('plan', outs, in_N=ModelConstants.PLAN_MHP_N, out_N=ModelConstants.PLAN_MHP_SELECTION, + out_shape=(ModelConstants.IDX_N,ModelConstants.PLAN_WIDTH)) + self.parse_mdn('lane_lines', outs, in_N=0, out_N=0, out_shape=(ModelConstants.NUM_LANE_LINES,ModelConstants.IDX_N,ModelConstants.LANE_LINES_WIDTH)) + self.parse_mdn('road_edges', outs, in_N=0, out_N=0, out_shape=(ModelConstants.NUM_ROAD_EDGES,ModelConstants.IDX_N,ModelConstants.LANE_LINES_WIDTH)) + self.parse_mdn('pose', outs, in_N=0, out_N=0, out_shape=(ModelConstants.POSE_WIDTH,)) + self.parse_mdn('road_transform', outs, in_N=0, out_N=0, out_shape=(ModelConstants.POSE_WIDTH,)) + if 'sim_pose' in outs: + self.parse_mdn('sim_pose', outs, in_N=0, out_N=0, out_shape=(ModelConstants.POSE_WIDTH,)) + self.parse_mdn('wide_from_device_euler', outs, in_N=0, out_N=0, out_shape=(ModelConstants.WIDE_FROM_DEVICE_WIDTH,)) + self.parse_mdn('lead', outs, in_N=ModelConstants.LEAD_MHP_N, out_N=ModelConstants.LEAD_MHP_SELECTION, + out_shape=(ModelConstants.LEAD_TRAJ_LEN,ModelConstants.LEAD_WIDTH)) + if 'lat_planner_solution' in outs: + self.parse_mdn('lat_planner_solution', outs, in_N=0, out_N=0, out_shape=(ModelConstants.IDX_N,ModelConstants.LAT_PLANNER_SOLUTION_WIDTH)) + if 'desired_curvature' in outs: + self.parse_mdn('desired_curvature', outs, in_N=0, out_N=0, out_shape=(ModelConstants.DESIRED_CURV_WIDTH,)) + for k in ['lead_prob', 'lane_lines_prob', 'meta']: + self.parse_binary_crossentropy(k, outs) + self.parse_categorical_crossentropy('desire_state', outs, out_shape=(ModelConstants.DESIRE_PRED_WIDTH,)) + self.parse_categorical_crossentropy('desire_pred', outs, out_shape=(ModelConstants.DESIRE_PRED_LEN,ModelConstants.DESIRE_PRED_WIDTH)) + return outs diff --git a/sunnypilot/modeld_v2/runners/ort_helpers.py b/sunnypilot/modeld_v2/runners/ort_helpers.py new file mode 100644 index 0000000000..26afb03562 --- /dev/null +++ b/sunnypilot/modeld_v2/runners/ort_helpers.py @@ -0,0 +1,36 @@ +import onnx +import onnxruntime as ort +import numpy as np +import itertools + +ORT_TYPES_TO_NP_TYPES = {'tensor(float16)': np.float16, 'tensor(float)': np.float32, 'tensor(uint8)': np.uint8} + +def attributeproto_fp16_to_fp32(attr): + float32_list = np.frombuffer(attr.raw_data, dtype=np.float16) + attr.data_type = 1 + attr.raw_data = float32_list.astype(np.float32).tobytes() + +def convert_fp16_to_fp32(model): + for i in model.graph.initializer: + if i.data_type == 10: + attributeproto_fp16_to_fp32(i) + for i in itertools.chain(model.graph.input, model.graph.output): + if i.type.tensor_type.elem_type == 10: + i.type.tensor_type.elem_type = 1 + for i in model.graph.node: + if i.op_type == 'Cast' and i.attribute[0].i == 10: + i.attribute[0].i = 1 + for a in i.attribute: + if hasattr(a, 't'): + if a.t.data_type == 10: + attributeproto_fp16_to_fp32(a.t) + return model.SerializeToString() + + +def make_onnx_cpu_runner(model_path): + options = ort.SessionOptions() + options.intra_op_num_threads = 4 + options.execution_mode = ort.ExecutionMode.ORT_SEQUENTIAL + options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL + model_data = convert_fp16_to_fp32(onnx.load(model_path)) + return ort.InferenceSession(model_data, options, providers=['CPUExecutionProvider']) diff --git a/sunnypilot/modeld_v2/runners/tinygrad_helpers.py b/sunnypilot/modeld_v2/runners/tinygrad_helpers.py new file mode 100644 index 0000000000..776381341c --- /dev/null +++ b/sunnypilot/modeld_v2/runners/tinygrad_helpers.py @@ -0,0 +1,8 @@ + +from tinygrad.tensor import Tensor +from tinygrad.helpers import to_mv + +def qcom_tensor_from_opencl_address(opencl_address, shape, dtype): + cl_buf_desc_ptr = to_mv(opencl_address, 8).cast('Q')[0] + rawbuf_ptr = to_mv(cl_buf_desc_ptr, 0x100).cast('Q')[20] # offset 0xA0 is a raw gpu pointer. + return Tensor.from_blob(rawbuf_ptr, shape, dtype=dtype, device='QCOM') diff --git a/sunnypilot/modeld_v2/tests/__init__.py b/sunnypilot/modeld_v2/tests/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/sunnypilot/modeld_v2/tests/dmon_lag/repro.cc b/sunnypilot/modeld_v2/tests/dmon_lag/repro.cc new file mode 100644 index 0000000000..c4c1c65cbe --- /dev/null +++ b/sunnypilot/modeld_v2/tests/dmon_lag/repro.cc @@ -0,0 +1,101 @@ +// clang++ -O2 repro.cc && ./a.out + +#include +#include +#include + +#include +#include +#include +#include +#include + +static inline double millis_since_boot() { + struct timespec t; + clock_gettime(CLOCK_BOOTTIME, &t); + return t.tv_sec * 1000.0 + t.tv_nsec * 1e-6; +} + +#define MODEL_WIDTH 320 +#define MODEL_HEIGHT 640 + +// null function still breaks it +#define input_lambda(x) x + +// this is copied from models/dmonitoring.cc, and is the code that triggers the issue +void inner(uint8_t *resized_buf, float *net_input_buf) { + int resized_width = MODEL_WIDTH; + int resized_height = MODEL_HEIGHT; + + // one shot conversion, O(n) anyway + // yuvframe2tensor, normalize + for (int r = 0; r < MODEL_HEIGHT/2; r++) { + for (int c = 0; c < MODEL_WIDTH/2; c++) { + // Y_ul + net_input_buf[(c*MODEL_HEIGHT/2) + r] = input_lambda(resized_buf[(2*r*resized_width) + (2*c)]); + // Y_ur + net_input_buf[(c*MODEL_HEIGHT/2) + r + (2*(MODEL_WIDTH/2)*(MODEL_HEIGHT/2))] = input_lambda(resized_buf[(2*r*resized_width) + (2*c+1)]); + // Y_dl + net_input_buf[(c*MODEL_HEIGHT/2) + r + ((MODEL_WIDTH/2)*(MODEL_HEIGHT/2))] = input_lambda(resized_buf[(2*r*resized_width+1) + (2*c)]); + // Y_dr + net_input_buf[(c*MODEL_HEIGHT/2) + r + (3*(MODEL_WIDTH/2)*(MODEL_HEIGHT/2))] = input_lambda(resized_buf[(2*r*resized_width+1) + (2*c+1)]); + // U + net_input_buf[(c*MODEL_HEIGHT/2) + r + (4*(MODEL_WIDTH/2)*(MODEL_HEIGHT/2))] = input_lambda(resized_buf[(resized_width*resized_height) + (r*resized_width/2) + c]); + // V + net_input_buf[(c*MODEL_HEIGHT/2) + r + (5*(MODEL_WIDTH/2)*(MODEL_HEIGHT/2))] = input_lambda(resized_buf[(resized_width*resized_height) + ((resized_width/2)*(resized_height/2)) + (r*resized_width/2) + c]); + } + } +} + +float trial() { + int resized_width = MODEL_WIDTH; + int resized_height = MODEL_HEIGHT; + + int yuv_buf_len = (MODEL_WIDTH/2) * (MODEL_HEIGHT/2) * 6; // Y|u|v -> y|y|y|y|u|v + + // allocate the buffers + uint8_t *resized_buf = (uint8_t*)malloc(resized_width*resized_height*3/2); + float *net_input_buf = (float*)malloc(yuv_buf_len*sizeof(float)); + printf("allocate -- %p 0x%x -- %p 0x%lx\n", resized_buf, resized_width*resized_height*3/2, net_input_buf, yuv_buf_len*sizeof(float)); + + // test for bad buffers + static int CNT = 20; + float avg = 0.0; + for (int i = 0; i < CNT; i++) { + double s4 = millis_since_boot(); + inner(resized_buf, net_input_buf); + double s5 = millis_since_boot(); + avg += s5-s4; + } + avg /= CNT; + + // once it's bad, it's reliably bad + if (avg > 10) { + printf("HIT %f\n", avg); + printf("BAD\n"); + + for (int i = 0; i < 200; i++) { + double s4 = millis_since_boot(); + inner(resized_buf, net_input_buf); + double s5 = millis_since_boot(); + printf("%.2f ", s5-s4); + } + printf("\n"); + + exit(0); + } + + // don't free so we get a different buffer each time + //free(resized_buf); + //free(net_input_buf); + + return avg; +} + +int main() { + while (true) { + float ret = trial(); + printf("got %f\n", ret); + } +} + diff --git a/sunnypilot/modeld_v2/tests/test_modeld.py b/sunnypilot/modeld_v2/tests/test_modeld.py new file mode 100644 index 0000000000..6927c9e473 --- /dev/null +++ b/sunnypilot/modeld_v2/tests/test_modeld.py @@ -0,0 +1,102 @@ +import numpy as np +import random + +import cereal.messaging as messaging +from msgq.visionipc import VisionIpcServer, VisionStreamType +from opendbc.car.car_helpers import get_demo_car_params +from openpilot.common.params import Params +from openpilot.common.transformations.camera import DEVICE_CAMERAS +from openpilot.common.realtime import DT_MDL +from openpilot.system.manager.process_config import managed_processes +from openpilot.selfdrive.test.process_replay.vision_meta import meta_from_camera_state + +CAM = DEVICE_CAMERAS[("tici", "ar0231")].fcam +IMG = np.zeros(int(CAM.width*CAM.height*(3/2)), dtype=np.uint8) +IMG_BYTES = IMG.flatten().tobytes() + + +class TestModeld: + + def setup_method(self): + self.vipc_server = VisionIpcServer("camerad") + self.vipc_server.create_buffers(VisionStreamType.VISION_STREAM_ROAD, 40, CAM.width, CAM.height) + self.vipc_server.create_buffers(VisionStreamType.VISION_STREAM_DRIVER, 40, CAM.width, CAM.height) + self.vipc_server.create_buffers(VisionStreamType.VISION_STREAM_WIDE_ROAD, 40, CAM.width, CAM.height) + self.vipc_server.start_listener() + Params().put("CarParams", get_demo_car_params().to_bytes()) + + self.sm = messaging.SubMaster(['modelV2', 'cameraOdometry']) + self.pm = messaging.PubMaster(['roadCameraState', 'wideRoadCameraState', 'liveCalibration']) + + managed_processes['modeld'].start() + self.pm.wait_for_readers_to_update("roadCameraState", 10) + + def teardown_method(self): + managed_processes['modeld'].stop() + del self.vipc_server + + def _send_frames(self, frame_id, cams=None): + if cams is None: + cams = ('roadCameraState', 'wideRoadCameraState') + + cs = None + for cam in cams: + msg = messaging.new_message(cam) + cs = getattr(msg, cam) + cs.frameId = frame_id + cs.timestampSof = int((frame_id * DT_MDL) * 1e9) + cs.timestampEof = int(cs.timestampSof + (DT_MDL * 1e9)) + cam_meta = meta_from_camera_state(cam) + + self.pm.send(msg.which(), msg) + self.vipc_server.send(cam_meta.stream, IMG_BYTES, cs.frameId, + cs.timestampSof, cs.timestampEof) + return cs + + def _wait(self): + self.sm.update(5000) + if self.sm['modelV2'].frameId != self.sm['cameraOdometry'].frameId: + self.sm.update(1000) + + def test_modeld(self): + for n in range(1, 500): + cs = self._send_frames(n) + self._wait() + + mdl = self.sm['modelV2'] + assert mdl.frameId == n + assert mdl.frameIdExtra == n + assert mdl.timestampEof == cs.timestampEof + assert mdl.frameAge == 0 + assert mdl.frameDropPerc == 0 + + odo = self.sm['cameraOdometry'] + assert odo.frameId == n + assert odo.timestampEof == cs.timestampEof + + def test_dropped_frames(self): + """ + modeld should only run on consecutive road frames + """ + frame_id = -1 + road_frames = list() + for n in range(1, 50): + if (random.random() < 0.1) and n > 3: + cams = random.choice([(), ('wideRoadCameraState', )]) + self._send_frames(n, cams) + else: + self._send_frames(n) + road_frames.append(n) + self._wait() + + if len(road_frames) < 3 or road_frames[-1] - road_frames[-2] == 1: + frame_id = road_frames[-1] + + mdl = self.sm['modelV2'] + odo = self.sm['cameraOdometry'] + assert mdl.frameId == frame_id + assert mdl.frameIdExtra == frame_id + assert odo.frameId == frame_id + if n != frame_id: + assert not self.sm.updated['modelV2'] + assert not self.sm.updated['cameraOdometry'] diff --git a/sunnypilot/modeld_v2/tests/tf_test/build.sh b/sunnypilot/modeld_v2/tests/tf_test/build.sh new file mode 100755 index 0000000000..df1d24761e --- /dev/null +++ b/sunnypilot/modeld_v2/tests/tf_test/build.sh @@ -0,0 +1,2 @@ +#!/usr/bin/env bash +clang++ -I /home/batman/one/external/tensorflow/include/ -L /home/batman/one/external/tensorflow/lib -Wl,-rpath=/home/batman/one/external/tensorflow/lib main.cc -ltensorflow diff --git a/sunnypilot/modeld_v2/tests/tf_test/main.cc b/sunnypilot/modeld_v2/tests/tf_test/main.cc new file mode 100644 index 0000000000..b00f7f95e8 --- /dev/null +++ b/sunnypilot/modeld_v2/tests/tf_test/main.cc @@ -0,0 +1,69 @@ +#include +#include +#include +#include "tensorflow/c/c_api.h" + +void* read_file(const char* path, size_t* out_len) { + FILE* f = fopen(path, "r"); + if (!f) { + return NULL; + } + fseek(f, 0, SEEK_END); + long f_len = ftell(f); + rewind(f); + + char* buf = (char*)calloc(f_len, 1); + assert(buf); + + size_t num_read = fread(buf, f_len, 1, f); + fclose(f); + + if (num_read != 1) { + free(buf); + return NULL; + } + + if (out_len) { + *out_len = f_len; + } + + return buf; +} + +static void DeallocateBuffer(void* data, size_t) { + free(data); +} + +int main(int argc, char* argv[]) { + TF_Buffer* buf; + TF_Graph* graph; + TF_Status* status; + char *path = argv[1]; + + // load model + { + size_t model_size; + char tmp[1024]; + snprintf(tmp, sizeof(tmp), "%s.pb", path); + printf("loading model %s\n", tmp); + uint8_t *model_data = (uint8_t *)read_file(tmp, &model_size); + buf = TF_NewBuffer(); + buf->data = model_data; + buf->length = model_size; + buf->data_deallocator = DeallocateBuffer; + printf("loaded model of size %d\n", model_size); + } + + // import graph + status = TF_NewStatus(); + graph = TF_NewGraph(); + TF_ImportGraphDefOptions *opts = TF_NewImportGraphDefOptions(); + TF_GraphImportGraphDef(graph, buf, opts, status); + TF_DeleteImportGraphDefOptions(opts); + TF_DeleteBuffer(buf); + if (TF_GetCode(status) != TF_OK) { + printf("FAIL: %s\n", TF_Message(status)); + } else { + printf("SUCCESS\n"); + } +} diff --git a/sunnypilot/modeld_v2/tests/tf_test/pb_loader.py b/sunnypilot/modeld_v2/tests/tf_test/pb_loader.py new file mode 100755 index 0000000000..3e476628eb --- /dev/null +++ b/sunnypilot/modeld_v2/tests/tf_test/pb_loader.py @@ -0,0 +1,8 @@ +#!/usr/bin/env python3 +import sys +import tensorflow as tf + +with open(sys.argv[1], "rb") as f: + graph_def = tf.compat.v1.GraphDef() + graph_def.ParseFromString(f.read()) + #tf.io.write_graph(graph_def, '', sys.argv[1]+".try") diff --git a/sunnypilot/modeld_v2/tests/timing/benchmark.py b/sunnypilot/modeld_v2/tests/timing/benchmark.py new file mode 100755 index 0000000000..c629ec2fff --- /dev/null +++ b/sunnypilot/modeld_v2/tests/timing/benchmark.py @@ -0,0 +1,39 @@ +#!/usr/bin/env python3 +# type: ignore + +import os +import time +import numpy as np + +import cereal.messaging as messaging +from openpilot.system.manager.process_config import managed_processes + + +N = int(os.getenv("N", "5")) +TIME = int(os.getenv("TIME", "30")) + +if __name__ == "__main__": + sock = messaging.sub_sock('modelV2', conflate=False, timeout=1000) + + execution_times = [] + + for _ in range(N): + os.environ['LOGPRINT'] = 'debug' + managed_processes['modeld'].start() + time.sleep(5) + + t = [] + start = time.monotonic() + while time.monotonic() - start < TIME: + msgs = messaging.drain_sock(sock, wait_for_one=True) + for m in msgs: + t.append(m.modelV2.modelExecutionTime) + + execution_times.append(np.array(t[10:]) * 1000) + managed_processes['modeld'].stop() + + print("\n\n") + print(f"ran modeld {N} times for {TIME}s each") + for _, t in enumerate(execution_times): + print(f"\tavg: {sum(t)/len(t):0.2f}ms, min: {min(t):0.2f}ms, max: {max(t):0.2f}ms") + print("\n\n") diff --git a/sunnypilot/modeld_v2/transforms/loadyuv.cc b/sunnypilot/modeld_v2/transforms/loadyuv.cc new file mode 100644 index 0000000000..eb669a5929 --- /dev/null +++ b/sunnypilot/modeld_v2/transforms/loadyuv.cc @@ -0,0 +1,76 @@ +#include "sunnypilot/modeld_v2/transforms/loadyuv.h" + +#include +#include +#include + +void loadyuv_init(LoadYUVState* s, cl_context ctx, cl_device_id device_id, int width, int height) { + memset(s, 0, sizeof(*s)); + + s->width = width; + s->height = height; + + char args[1024]; + snprintf(args, sizeof(args), + "-cl-fast-relaxed-math -cl-denorms-are-zero " + "-DTRANSFORMED_WIDTH=%d -DTRANSFORMED_HEIGHT=%d", + width, height); + cl_program prg = cl_program_from_file(ctx, device_id, LOADYUV_PATH, args); + + s->loadys_krnl = CL_CHECK_ERR(clCreateKernel(prg, "loadys", &err)); + s->loaduv_krnl = CL_CHECK_ERR(clCreateKernel(prg, "loaduv", &err)); + s->copy_krnl = CL_CHECK_ERR(clCreateKernel(prg, "copy", &err)); + + // done with this + CL_CHECK(clReleaseProgram(prg)); +} + +void loadyuv_destroy(LoadYUVState* s) { + CL_CHECK(clReleaseKernel(s->loadys_krnl)); + CL_CHECK(clReleaseKernel(s->loaduv_krnl)); + CL_CHECK(clReleaseKernel(s->copy_krnl)); +} + +void loadyuv_queue(LoadYUVState* s, cl_command_queue q, + cl_mem y_cl, cl_mem u_cl, cl_mem v_cl, + cl_mem out_cl) { + cl_int global_out_off = 0; + + CL_CHECK(clSetKernelArg(s->loadys_krnl, 0, sizeof(cl_mem), &y_cl)); + CL_CHECK(clSetKernelArg(s->loadys_krnl, 1, sizeof(cl_mem), &out_cl)); + CL_CHECK(clSetKernelArg(s->loadys_krnl, 2, sizeof(cl_int), &global_out_off)); + + const size_t loadys_work_size = (s->width*s->height)/8; + CL_CHECK(clEnqueueNDRangeKernel(q, s->loadys_krnl, 1, NULL, + &loadys_work_size, NULL, 0, 0, NULL)); + + const size_t loaduv_work_size = ((s->width/2)*(s->height/2))/8; + global_out_off += (s->width*s->height); + + CL_CHECK(clSetKernelArg(s->loaduv_krnl, 0, sizeof(cl_mem), &u_cl)); + CL_CHECK(clSetKernelArg(s->loaduv_krnl, 1, sizeof(cl_mem), &out_cl)); + CL_CHECK(clSetKernelArg(s->loaduv_krnl, 2, sizeof(cl_int), &global_out_off)); + + CL_CHECK(clEnqueueNDRangeKernel(q, s->loaduv_krnl, 1, NULL, + &loaduv_work_size, NULL, 0, 0, NULL)); + + global_out_off += (s->width/2)*(s->height/2); + + CL_CHECK(clSetKernelArg(s->loaduv_krnl, 0, sizeof(cl_mem), &v_cl)); + CL_CHECK(clSetKernelArg(s->loaduv_krnl, 1, sizeof(cl_mem), &out_cl)); + CL_CHECK(clSetKernelArg(s->loaduv_krnl, 2, sizeof(cl_int), &global_out_off)); + + CL_CHECK(clEnqueueNDRangeKernel(q, s->loaduv_krnl, 1, NULL, + &loaduv_work_size, NULL, 0, 0, NULL)); +} + +void copy_queue(LoadYUVState* s, cl_command_queue q, cl_mem src, cl_mem dst, + size_t src_offset, size_t dst_offset, size_t size) { + CL_CHECK(clSetKernelArg(s->copy_krnl, 0, sizeof(cl_mem), &src)); + CL_CHECK(clSetKernelArg(s->copy_krnl, 1, sizeof(cl_mem), &dst)); + CL_CHECK(clSetKernelArg(s->copy_krnl, 2, sizeof(cl_int), &src_offset)); + CL_CHECK(clSetKernelArg(s->copy_krnl, 3, sizeof(cl_int), &dst_offset)); + const size_t copy_work_size = size/8; + CL_CHECK(clEnqueueNDRangeKernel(q, s->copy_krnl, 1, NULL, + ©_work_size, NULL, 0, 0, NULL)); +} \ No newline at end of file diff --git a/sunnypilot/modeld_v2/transforms/loadyuv.cl b/sunnypilot/modeld_v2/transforms/loadyuv.cl new file mode 100644 index 0000000000..970187a6d7 --- /dev/null +++ b/sunnypilot/modeld_v2/transforms/loadyuv.cl @@ -0,0 +1,47 @@ +#define UV_SIZE ((TRANSFORMED_WIDTH/2)*(TRANSFORMED_HEIGHT/2)) + +__kernel void loadys(__global uchar8 const * const Y, + __global uchar * out, + int out_offset) +{ + const int gid = get_global_id(0); + const int ois = gid * 8; + const int oy = ois / TRANSFORMED_WIDTH; + const int ox = ois % TRANSFORMED_WIDTH; + + const uchar8 ys = Y[gid]; + + // 02 + // 13 + + __global uchar* outy0; + __global uchar* outy1; + if ((oy & 1) == 0) { + outy0 = out + out_offset; //y0 + outy1 = out + out_offset + UV_SIZE*2; //y2 + } else { + outy0 = out + out_offset + UV_SIZE; //y1 + outy1 = out + out_offset + UV_SIZE*3; //y3 + } + + vstore4(ys.s0246, 0, outy0 + (oy/2) * (TRANSFORMED_WIDTH/2) + ox/2); + vstore4(ys.s1357, 0, outy1 + (oy/2) * (TRANSFORMED_WIDTH/2) + ox/2); +} + +__kernel void loaduv(__global uchar8 const * const in, + __global uchar8 * out, + int out_offset) +{ + const int gid = get_global_id(0); + const uchar8 inv = in[gid]; + out[gid + out_offset / 8] = inv; +} + +__kernel void copy(__global uchar8 * in, + __global uchar8 * out, + int in_offset, + int out_offset) +{ + const int gid = get_global_id(0); + out[gid + out_offset / 8] = in[gid + in_offset / 8]; +} diff --git a/sunnypilot/modeld_v2/transforms/loadyuv.h b/sunnypilot/modeld_v2/transforms/loadyuv.h new file mode 100644 index 0000000000..659059cd25 --- /dev/null +++ b/sunnypilot/modeld_v2/transforms/loadyuv.h @@ -0,0 +1,20 @@ +#pragma once + +#include "common/clutil.h" + +typedef struct { + int width, height; + cl_kernel loadys_krnl, loaduv_krnl, copy_krnl; +} LoadYUVState; + +void loadyuv_init(LoadYUVState* s, cl_context ctx, cl_device_id device_id, int width, int height); + +void loadyuv_destroy(LoadYUVState* s); + +void loadyuv_queue(LoadYUVState* s, cl_command_queue q, + cl_mem y_cl, cl_mem u_cl, cl_mem v_cl, + cl_mem out_cl); + + +void copy_queue(LoadYUVState* s, cl_command_queue q, cl_mem src, cl_mem dst, + size_t src_offset, size_t dst_offset, size_t size); \ No newline at end of file diff --git a/sunnypilot/modeld_v2/transforms/transform.cc b/sunnypilot/modeld_v2/transforms/transform.cc new file mode 100644 index 0000000000..adc9bcebf4 --- /dev/null +++ b/sunnypilot/modeld_v2/transforms/transform.cc @@ -0,0 +1,97 @@ +#include "sunnypilot/modeld_v2/transforms/transform.h" + +#include +#include + +#include "common/clutil.h" + +void transform_init(Transform* s, cl_context ctx, cl_device_id device_id) { + memset(s, 0, sizeof(*s)); + + cl_program prg = cl_program_from_file(ctx, device_id, TRANSFORM_PATH, ""); + s->krnl = CL_CHECK_ERR(clCreateKernel(prg, "warpPerspective", &err)); + // done with this + CL_CHECK(clReleaseProgram(prg)); + + s->m_y_cl = CL_CHECK_ERR(clCreateBuffer(ctx, CL_MEM_READ_WRITE, 3*3*sizeof(float), NULL, &err)); + s->m_uv_cl = CL_CHECK_ERR(clCreateBuffer(ctx, CL_MEM_READ_WRITE, 3*3*sizeof(float), NULL, &err)); +} + +void transform_destroy(Transform* s) { + CL_CHECK(clReleaseMemObject(s->m_y_cl)); + CL_CHECK(clReleaseMemObject(s->m_uv_cl)); + CL_CHECK(clReleaseKernel(s->krnl)); +} + +void transform_queue(Transform* s, + cl_command_queue q, + cl_mem in_yuv, int in_width, int in_height, int in_stride, int in_uv_offset, + cl_mem out_y, cl_mem out_u, cl_mem out_v, + int out_width, int out_height, + const mat3& projection) { + const int zero = 0; + + // sampled using pixel center origin + // (because that's how fastcv and opencv does it) + + mat3 projection_y = projection; + + // in and out uv is half the size of y. + mat3 projection_uv = transform_scale_buffer(projection, 0.5); + + CL_CHECK(clEnqueueWriteBuffer(q, s->m_y_cl, CL_TRUE, 0, 3*3*sizeof(float), (void*)projection_y.v, 0, NULL, NULL)); + CL_CHECK(clEnqueueWriteBuffer(q, s->m_uv_cl, CL_TRUE, 0, 3*3*sizeof(float), (void*)projection_uv.v, 0, NULL, NULL)); + + const int in_y_width = in_width; + const int in_y_height = in_height; + const int in_y_px_stride = 1; + const int in_uv_width = in_width/2; + const int in_uv_height = in_height/2; + const int in_uv_px_stride = 2; + const int in_u_offset = in_uv_offset; + const int in_v_offset = in_uv_offset + 1; + + const int out_y_width = out_width; + const int out_y_height = out_height; + const int out_uv_width = out_width/2; + const int out_uv_height = out_height/2; + + CL_CHECK(clSetKernelArg(s->krnl, 0, sizeof(cl_mem), &in_yuv)); // src + CL_CHECK(clSetKernelArg(s->krnl, 1, sizeof(cl_int), &in_stride)); // src_row_stride + CL_CHECK(clSetKernelArg(s->krnl, 2, sizeof(cl_int), &in_y_px_stride)); // src_px_stride + CL_CHECK(clSetKernelArg(s->krnl, 3, sizeof(cl_int), &zero)); // src_offset + CL_CHECK(clSetKernelArg(s->krnl, 4, sizeof(cl_int), &in_y_height)); // src_rows + CL_CHECK(clSetKernelArg(s->krnl, 5, sizeof(cl_int), &in_y_width)); // src_cols + CL_CHECK(clSetKernelArg(s->krnl, 6, sizeof(cl_mem), &out_y)); // dst + CL_CHECK(clSetKernelArg(s->krnl, 7, sizeof(cl_int), &out_y_width)); // dst_row_stride + CL_CHECK(clSetKernelArg(s->krnl, 8, sizeof(cl_int), &zero)); // dst_offset + CL_CHECK(clSetKernelArg(s->krnl, 9, sizeof(cl_int), &out_y_height)); // dst_rows + CL_CHECK(clSetKernelArg(s->krnl, 10, sizeof(cl_int), &out_y_width)); // dst_cols + CL_CHECK(clSetKernelArg(s->krnl, 11, sizeof(cl_mem), &s->m_y_cl)); // M + + const size_t work_size_y[2] = {(size_t)out_y_width, (size_t)out_y_height}; + + CL_CHECK(clEnqueueNDRangeKernel(q, s->krnl, 2, NULL, + (const size_t*)&work_size_y, NULL, 0, 0, NULL)); + + const size_t work_size_uv[2] = {(size_t)out_uv_width, (size_t)out_uv_height}; + + CL_CHECK(clSetKernelArg(s->krnl, 2, sizeof(cl_int), &in_uv_px_stride)); // src_px_stride + CL_CHECK(clSetKernelArg(s->krnl, 3, sizeof(cl_int), &in_u_offset)); // src_offset + CL_CHECK(clSetKernelArg(s->krnl, 4, sizeof(cl_int), &in_uv_height)); // src_rows + CL_CHECK(clSetKernelArg(s->krnl, 5, sizeof(cl_int), &in_uv_width)); // src_cols + CL_CHECK(clSetKernelArg(s->krnl, 6, sizeof(cl_mem), &out_u)); // dst + CL_CHECK(clSetKernelArg(s->krnl, 7, sizeof(cl_int), &out_uv_width)); // dst_row_stride + CL_CHECK(clSetKernelArg(s->krnl, 8, sizeof(cl_int), &zero)); // dst_offset + CL_CHECK(clSetKernelArg(s->krnl, 9, sizeof(cl_int), &out_uv_height)); // dst_rows + CL_CHECK(clSetKernelArg(s->krnl, 10, sizeof(cl_int), &out_uv_width)); // dst_cols + CL_CHECK(clSetKernelArg(s->krnl, 11, sizeof(cl_mem), &s->m_uv_cl)); // M + + CL_CHECK(clEnqueueNDRangeKernel(q, s->krnl, 2, NULL, + (const size_t*)&work_size_uv, NULL, 0, 0, NULL)); + CL_CHECK(clSetKernelArg(s->krnl, 3, sizeof(cl_int), &in_v_offset)); // src_ofset + CL_CHECK(clSetKernelArg(s->krnl, 6, sizeof(cl_mem), &out_v)); // dst + + CL_CHECK(clEnqueueNDRangeKernel(q, s->krnl, 2, NULL, + (const size_t*)&work_size_uv, NULL, 0, 0, NULL)); +} diff --git a/sunnypilot/modeld_v2/transforms/transform.cl b/sunnypilot/modeld_v2/transforms/transform.cl new file mode 100644 index 0000000000..2ca25920cd --- /dev/null +++ b/sunnypilot/modeld_v2/transforms/transform.cl @@ -0,0 +1,54 @@ +#define INTER_BITS 5 +#define INTER_TAB_SIZE (1 << INTER_BITS) +#define INTER_SCALE 1.f / INTER_TAB_SIZE + +#define INTER_REMAP_COEF_BITS 15 +#define INTER_REMAP_COEF_SCALE (1 << INTER_REMAP_COEF_BITS) + +__kernel void warpPerspective(__global const uchar * src, + int src_row_stride, int src_px_stride, int src_offset, int src_rows, int src_cols, + __global uchar * dst, + int dst_row_stride, int dst_offset, int dst_rows, int dst_cols, + __constant float * M) +{ + int dx = get_global_id(0); + int dy = get_global_id(1); + + if (dx < dst_cols && dy < dst_rows) + { + float X0 = M[0] * dx + M[1] * dy + M[2]; + float Y0 = M[3] * dx + M[4] * dy + M[5]; + float W = M[6] * dx + M[7] * dy + M[8]; + W = W != 0.0f ? INTER_TAB_SIZE / W : 0.0f; + int X = rint(X0 * W), Y = rint(Y0 * W); + + int sx = convert_short_sat(X >> INTER_BITS); + int sy = convert_short_sat(Y >> INTER_BITS); + + short sx_clamp = clamp(sx, 0, src_cols - 1); + short sx_p1_clamp = clamp(sx + 1, 0, src_cols - 1); + short sy_clamp = clamp(sy, 0, src_rows - 1); + short sy_p1_clamp = clamp(sy + 1, 0, src_rows - 1); + int v0 = convert_int(src[mad24(sy_clamp, src_row_stride, src_offset + sx_clamp*src_px_stride)]); + int v1 = convert_int(src[mad24(sy_clamp, src_row_stride, src_offset + sx_p1_clamp*src_px_stride)]); + int v2 = convert_int(src[mad24(sy_p1_clamp, src_row_stride, src_offset + sx_clamp*src_px_stride)]); + int v3 = convert_int(src[mad24(sy_p1_clamp, src_row_stride, src_offset + sx_p1_clamp*src_px_stride)]); + + short ay = (short)(Y & (INTER_TAB_SIZE - 1)); + short ax = (short)(X & (INTER_TAB_SIZE - 1)); + float taby = 1.f/INTER_TAB_SIZE*ay; + float tabx = 1.f/INTER_TAB_SIZE*ax; + + int dst_index = mad24(dy, dst_row_stride, dst_offset + dx); + + int itab0 = convert_short_sat_rte( (1.0f-taby)*(1.0f-tabx) * INTER_REMAP_COEF_SCALE ); + int itab1 = convert_short_sat_rte( (1.0f-taby)*tabx * INTER_REMAP_COEF_SCALE ); + int itab2 = convert_short_sat_rte( taby*(1.0f-tabx) * INTER_REMAP_COEF_SCALE ); + int itab3 = convert_short_sat_rte( taby*tabx * INTER_REMAP_COEF_SCALE ); + + int val = v0 * itab0 + v1 * itab1 + v2 * itab2 + v3 * itab3; + + uchar pix = convert_uchar_sat((val + (1 << (INTER_REMAP_COEF_BITS-1))) >> INTER_REMAP_COEF_BITS); + dst[dst_index] = pix; + } +} diff --git a/sunnypilot/modeld_v2/transforms/transform.h b/sunnypilot/modeld_v2/transforms/transform.h new file mode 100644 index 0000000000..771a7054b3 --- /dev/null +++ b/sunnypilot/modeld_v2/transforms/transform.h @@ -0,0 +1,25 @@ +#pragma once + +#define CL_USE_DEPRECATED_OPENCL_1_2_APIS +#ifdef __APPLE__ +#include +#else +#include +#endif + +#include "common/mat.h" + +typedef struct { + cl_kernel krnl; + cl_mem m_y_cl, m_uv_cl; +} Transform; + +void transform_init(Transform* s, cl_context ctx, cl_device_id device_id); + +void transform_destroy(Transform* transform); + +void transform_queue(Transform* s, cl_command_queue q, + cl_mem yuv, int in_width, int in_height, int in_stride, int in_uv_offset, + cl_mem out_y, cl_mem out_u, cl_mem out_v, + int out_width, int out_height, + const mat3& projection); diff --git a/sunnypilot/models/helpers.py b/sunnypilot/models/helpers.py index 868d1fa5e5..915f1a4206 100644 --- a/sunnypilot/models/helpers.py +++ b/sunnypilot/models/helpers.py @@ -60,7 +60,7 @@ def get_active_model_runner(params: Params = None, force_check=False) -> custom. if isinstance(cached_runner_type, str) and cached_runner_type.isdigit(): return int(cached_runner_type) - runner_type = custom.ModelManagerSP.Runner.tinygrad + runner_type = custom.ModelManagerSP.Runner.stock if active_bundle := get_active_bundle(params): runner_type = active_bundle.runner.raw diff --git a/system/manager/process_config.py b/system/manager/process_config.py index 4473b77960..77c73cad16 100644 --- a/system/manager/process_config.py +++ b/system/manager/process_config.py @@ -76,9 +76,13 @@ def is_snpe_model(started, params, CP: car.CarParams) -> bool: """Check if the active model runner is SNPE.""" return bool(get_active_model_runner(params, not started) == custom.ModelManagerSP.Runner.snpe) +def is_tinygrad_model(started, params, CP: car.CarParams) -> bool: + """Check if the active model runner is SNPE.""" + return bool(get_active_model_runner(params, not started) == custom.ModelManagerSP.Runner.tinygrad) + def is_stock_model(started, params, CP: car.CarParams) -> bool: """Check if the active model runner is stock.""" - return not is_snpe_model(started, params, CP) + return bool(get_active_model_runner(params, not started) == custom.ModelManagerSP.Runner.stock) def or_(*fns): return lambda *args: operator.or_(*(fn(*args) for fn in fns)) @@ -146,6 +150,7 @@ procs = [ procs += [ PythonProcess("models_manager", "sunnypilot.models.manager", only_offroad), NativeProcess("modeld_snpe", "sunnypilot/modeld", ["./modeld"], and_(only_onroad, is_snpe_model)), + NativeProcess("modeld_tinygrad", "sunnypilot/modeld_v2", ["./modeld"], and_(only_onroad, is_tinygrad_model)), ] if os.path.exists("./github_runner.sh"):