Model: split modeld into it's own contained modeld implementation (#642)

* Add support for TinyGrad model runner processing

Introduced a new function `is_tinygrad_model` to detect TinyGrad as an active model runner. Updated the `is_stock_model` logic to account for TinyGrad models and added a new process entry for TinyGrad in the model manager. This enables handling TinyGrad models alongside existing configurations.

adding modeld back

Add support for `modeld_v2` and update paths for consistency

Updated `SConscript` files to integrate `modeld_v2` alongside `modeld` and adjusted script paths for correct metadata handling. Adjusted various configurations and scripts, such as `labeler.yaml` and `build_release.sh`, to include `modeld_v2` and ensure cohesive project structure.

Refactor imports to use updated `modeld_v2` paths.

Replaced outdated `modeld` references with their `modeld_v2` counterparts for consistency and clarity across the codebase. Also updated `.gitignore` to accommodate new directory structure. This change ensures better maintainability and alignment with the new directory schema.

Refactor and reorganize modeld to sunnypilot/modeld_v2 structure.

Moved and renamed `modeld` components to the new `sunnypilot/modeld_v2` directory for better organization and modularity. Updated imports and file references to align with the new structure, ensuring compatibility and functionality. Streamlined project structure to improve maintainability and future development.

* typo

* Use `stock` model runner and refactor model checks.

Replaces outdated model detection logic with unified `stock` runner integration, simplifying the decision flow for model selection. Includes `stock` as a new enum in the `Runner` type and updates affected references accordingly.

* Handle missing 'sim_pose' in model outputs gracefully.

Added conditional checks to ensure the code handles cases where 'sim_pose' is absent in the model outputs. Fallback behaviors use 'plan' data when 'sim_pose' is unavailable, preventing potential errors and enhancing robustness.
This commit is contained in:
DevTekVE
2025-03-02 20:49:30 +01:00
committed by GitHub
parent cce0f14976
commit 1a8dd310ae
49 changed files with 2096 additions and 16 deletions

View File

@@ -24,4 +24,4 @@ multilanguage:
autonomy:
- changed-files:
- any-glob-to-all-files: "{selfdrive/modeld/models/**,selfdrive/test/process_replay/model_replay_ref_commit}"
- any-glob-to-all-files: "{selfdrive/modeld/models/**,selfdrive/test/process_replay/model_replay_ref_commit,sunnypilot/modeld*/models/**}"

6
.gitignore vendored
View File

@@ -74,9 +74,9 @@ comma*.sh
selfdrive/modeld/thneed/compile
selfdrive/modeld/models/*.thneed
selfdrive/modeld/models/*.pkl
sunnypilot/modeld/thneed/compile
sunnypilot/modeld/models/*.thneed
sunnypilot/modeld/models/*.pkl
sunnypilot/modeld*/thneed/compile
sunnypilot/modeld*/models/*.thneed
sunnypilot/modeld*/models/*.pkl
*.bz2
*.zst

View File

@@ -70,6 +70,7 @@ struct ModelManagerSP @0xaedffd8f31e7b55d {
enum Runner {
snpe @0;
tinygrad @1;
stock @2;
}
struct ModelBundle {

View File

@@ -19,6 +19,7 @@ collect_ignore = [
collect_ignore_glob = [
"selfdrive/debug/*.py",
"selfdrive/modeld/*.py",
"sunnypilot/modeld*/*.py",
]

View File

@@ -75,6 +75,7 @@ find . -name 'moc_*' -delete
find . -name '__pycache__' -delete
rm -rf .sconsign.dblite Jenkinsfile release/
rm selfdrive/modeld/models/supercombo.onnx
rm sunnypilot/modeld*/models/supercombo.onnx
find third_party/ -name '*x86*' -exec rm -r {} +
find third_party/ -name '*Darwin*' -exec rm -r {} +

View File

@@ -3,19 +3,25 @@ import time
from cereal import car, log, messaging
from openpilot.common.params import Params
from openpilot.system.manager.process_config import managed_processes, is_snpe_model
from openpilot.system.manager.process_config import managed_processes, is_snpe_model, is_tinygrad_model, is_stock_model
from openpilot.system.hardware import HARDWARE
if __name__ == "__main__":
CP = car.CarParams(notCar=True, wheelbase=1, steerRatio=10)
params = Params()
params.put("CarParams", CP.to_bytes())
if use_snpe_modeld := is_snpe_model(False, params, CP):
print("Using SNPE modeld")
if use_tinygrad_modeld := is_tinygrad_model(False, params, CP):
print("Using TinyGrad modeld")
if use_stock_modeld := is_stock_model(False, params, CP):
print("Using stock modeld")
HARDWARE.set_power_save(False)
procs = ['camerad', 'ui', 'calibrationd', 'plannerd', 'dmonitoringmodeld', 'dmonitoringd']
procs += ["modeld_snpe" if use_snpe_modeld else "modeld"]
procs += ["modeld_snpe" if use_snpe_modeld else "modeld_tinygrad" if use_tinygrad_modeld else "modeld"]
for p in procs:
managed_processes[p].start()

View File

@@ -1 +1,2 @@
SConscript(['modeld/SConscript'])
SConscript(['modeld/SConscript'])
SConscript(['modeld_v2/SConscript'])

View File

@@ -56,7 +56,7 @@ tinygrad_files = ["#"+x for x in glob.glob(env.Dir("#tinygrad_repo").relpath + "
# Get model metadata
fn = File("models/supercombo").abspath
cmd = f'python3 {Dir("#selfdrive/modeld").abspath}/get_model_metadata.py {fn}.onnx'
cmd = f'python3 {Dir("#sunnypilot/modeld").abspath}/get_model_metadata.py {fn}.onnx'
lenv.Command(fn + "_metadata.pkl", [fn + ".onnx"] + tinygrad_files, cmd)
if arch == "larch64":

1
sunnypilot/modeld_v2/.gitignore vendored Normal file
View File

@@ -0,0 +1 @@
*_pyx.cpp

View File

@@ -0,0 +1,50 @@
import glob
Import('env', 'envCython', 'arch', 'cereal', 'messaging', 'common', 'gpucommon', 'visionipc', 'transformations')
lenv = env.Clone()
lenvCython = envCython.Clone()
libs = [cereal, messaging, visionipc, gpucommon, common, 'capnp', 'kj', 'pthread']
frameworks = []
common_src = [
"models/commonmodel.cc",
"transforms/loadyuv.cc",
"transforms/transform.cc",
]
# OpenCL is a framework on Mac
if arch == "Darwin":
frameworks += ['OpenCL']
else:
libs += ['OpenCL']
# Set path definitions
for pathdef, fn in {'TRANSFORM': 'transforms/transform.cl', 'LOADYUV': 'transforms/loadyuv.cl'}.items():
for xenv in (lenv, lenvCython):
xenv['CXXFLAGS'].append(f'-D{pathdef}_PATH=\\"{File(fn).abspath}\\"')
# Compile cython
cython_libs = envCython["LIBS"] + libs
commonmodel_lib = lenv.Library('commonmodel', common_src)
lenvCython.Program('models/commonmodel_pyx.so', 'models/commonmodel_pyx.pyx', LIBS=[commonmodel_lib, *cython_libs], FRAMEWORKS=frameworks)
tinygrad_files = ["#"+x for x in glob.glob(env.Dir("#tinygrad_repo").relpath + "/**", recursive=True, root_dir=env.Dir("#").abspath) if 'pycache' not in x]
# Get model metadata
fn = File("models/supercombo").abspath
cmd = f'python3 {Dir("#sunnypilot/modeld_v2").abspath}/get_model_metadata.py {fn}.onnx'
lenv.Command(fn + "_metadata.pkl", [fn + ".onnx"] + tinygrad_files, cmd)
# Compile tinygrad model
pythonpath_string = 'PYTHONPATH="${PYTHONPATH}:' + env.Dir("#tinygrad_repo").abspath + '"'
if arch == 'larch64':
device_string = 'QCOM=1'
else:
device_string = 'CLANG=1 IMAGE=0'
for model_name in ['supercombo', 'dmonitoring_model']:
fn = File(f"models/{model_name}").abspath
cmd = f'{pythonpath_string} {device_string} python3 {Dir("#tinygrad_repo").abspath}/examples/openpilot/compile3.py {fn}.onnx {fn}_tinygrad.pkl'
lenv.Command(fn + "_tinygrad.pkl", [fn + ".onnx"] + tinygrad_files, cmd)

View File

@@ -0,0 +1,5 @@
from pathlib import Path
MODEL_PATH = Path(__file__).parent / 'models/supercombo.onnx'
MODEL_PKL_PATH = Path(__file__).parent / 'models/supercombo_tinygrad.pkl'
METADATA_PATH = Path(__file__).parent / 'models/supercombo_metadata.pkl'

View File

@@ -0,0 +1,84 @@
import numpy as np
def index_function(idx, max_val=192, max_idx=32):
return (max_val) * ((idx/max_idx)**2)
class ModelConstants:
# time and distance indices
IDX_N = 33
T_IDXS = [index_function(idx, max_val=10.0) for idx in range(IDX_N)]
X_IDXS = [index_function(idx, max_val=192.0) for idx in range(IDX_N)]
LEAD_T_IDXS = [0., 2., 4., 6., 8., 10.]
LEAD_T_OFFSETS = [0., 2., 4.]
META_T_IDXS = [2., 4., 6., 8., 10.]
# model inputs constants
MODEL_FREQ = 20
FEATURE_LEN = 512
FULL_HISTORY_BUFFER_LEN = 99
DESIRE_LEN = 8
TRAFFIC_CONVENTION_LEN = 2
LAT_PLANNER_STATE_LEN = 4
LATERAL_CONTROL_PARAMS_LEN = 2
PREV_DESIRED_CURV_LEN = 1
# model outputs constants
FCW_THRESHOLDS_5MS2 = np.array([.05, .05, .15, .15, .15], dtype=np.float32)
FCW_THRESHOLDS_3MS2 = np.array([.7, .7], dtype=np.float32)
FCW_5MS2_PROBS_WIDTH = 5
FCW_3MS2_PROBS_WIDTH = 2
DISENGAGE_WIDTH = 5
POSE_WIDTH = 6
WIDE_FROM_DEVICE_WIDTH = 3
LEAD_WIDTH = 4
LANE_LINES_WIDTH = 2
ROAD_EDGES_WIDTH = 2
PLAN_WIDTH = 15
DESIRE_PRED_WIDTH = 8
LAT_PLANNER_SOLUTION_WIDTH = 4
DESIRED_CURV_WIDTH = 1
NUM_LANE_LINES = 4
NUM_ROAD_EDGES = 2
LEAD_TRAJ_LEN = 6
DESIRE_PRED_LEN = 4
PLAN_MHP_N = 5
LEAD_MHP_N = 2
PLAN_MHP_SELECTION = 1
LEAD_MHP_SELECTION = 3
FCW_THRESHOLD_5MS2_HIGH = 0.15
FCW_THRESHOLD_5MS2_LOW = 0.05
FCW_THRESHOLD_3MS2 = 0.7
CONFIDENCE_BUFFER_LEN = 5
RYG_GREEN = 0.01165
RYG_YELLOW = 0.06157
POLY_PATH_DEGREE = 4
# model outputs slices
class Plan:
POSITION = slice(0, 3)
VELOCITY = slice(3, 6)
ACCELERATION = slice(6, 9)
T_FROM_CURRENT_EULER = slice(9, 12)
ORIENTATION_RATE = slice(12, 15)
class Meta:
ENGAGED = slice(0, 1)
# next 2, 4, 6, 8, 10 seconds
GAS_DISENGAGE = slice(1, 31, 6)
BRAKE_DISENGAGE = slice(2, 31, 6)
STEER_OVERRIDE = slice(3, 31, 6)
HARD_BRAKE_3 = slice(4, 31, 6)
HARD_BRAKE_4 = slice(5, 31, 6)
HARD_BRAKE_5 = slice(6, 31, 6)
# next 0, 2, 4, 6, 8, 10 seconds
GAS_PRESS = slice(31, 55, 4)
BRAKE_PRESS = slice(32, 55, 4)
LEFT_BLINKER = slice(33, 55, 4)
RIGHT_BLINKER = slice(34, 55, 4)

View File

@@ -0,0 +1,4 @@
#!/usr/bin/env bash
DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null && pwd)"
exec "$DIR/dmonitoringmodeld.py" "$@"

View File

@@ -0,0 +1,192 @@
#!/usr/bin/env python3
import os
from openpilot.system.hardware import TICI
if TICI:
from tinygrad.tensor import Tensor
from tinygrad.dtype import dtypes
from openpilot.sunnypilot.modeld_v2.runners.tinygrad_helpers import qcom_tensor_from_opencl_address
os.environ['QCOM'] = '1'
else:
from openpilot.sunnypilot.modeld_v2.runners.ort_helpers import make_onnx_cpu_runner
import math
import time
import pickle
import ctypes
import numpy as np
from pathlib import Path
from setproctitle import setproctitle
from cereal import messaging
from cereal.messaging import PubMaster, SubMaster
from msgq.visionipc import VisionIpcClient, VisionStreamType, VisionBuf
from openpilot.common.swaglog import cloudlog
from openpilot.common.realtime import config_realtime_process
from openpilot.common.transformations.model import dmonitoringmodel_intrinsics, DM_INPUT_SIZE
from openpilot.common.transformations.camera import _ar_ox_fisheye, _os_fisheye
from openpilot.sunnypilot.modeld_v2.models.commonmodel_pyx import CLContext, MonitoringModelFrame
from openpilot.sunnypilot.modeld_v2.parse_model_outputs import sigmoid
from openpilot.system import sentry
MODEL_WIDTH, MODEL_HEIGHT = DM_INPUT_SIZE
CALIB_LEN = 3
FEATURE_LEN = 512
OUTPUT_SIZE = 84 + FEATURE_LEN
PROCESS_NAME = "selfdrive.modeld.dmonitoringmodeld"
SEND_RAW_PRED = os.getenv('SEND_RAW_PRED')
MODEL_PATH = Path(__file__).parent / 'models/dmonitoring_model.onnx'
MODEL_PKL_PATH = Path(__file__).parent / 'models/dmonitoring_model_tinygrad.pkl'
class DriverStateResult(ctypes.Structure):
_fields_ = [
("face_orientation", ctypes.c_float*3),
("face_position", ctypes.c_float*3),
("face_orientation_std", ctypes.c_float*3),
("face_position_std", ctypes.c_float*3),
("face_prob", ctypes.c_float),
("_unused_a", ctypes.c_float*8),
("left_eye_prob", ctypes.c_float),
("_unused_b", ctypes.c_float*8),
("right_eye_prob", ctypes.c_float),
("left_blink_prob", ctypes.c_float),
("right_blink_prob", ctypes.c_float),
("sunglasses_prob", ctypes.c_float),
("occluded_prob", ctypes.c_float),
("ready_prob", ctypes.c_float*4),
("not_ready_prob", ctypes.c_float*2)]
class DMonitoringModelResult(ctypes.Structure):
_fields_ = [
("driver_state_lhd", DriverStateResult),
("driver_state_rhd", DriverStateResult),
("poor_vision_prob", ctypes.c_float),
("wheel_on_right_prob", ctypes.c_float),
("features", ctypes.c_float*FEATURE_LEN)]
class ModelState:
inputs: dict[str, np.ndarray]
output: np.ndarray
def __init__(self, cl_ctx):
assert ctypes.sizeof(DMonitoringModelResult) == OUTPUT_SIZE * ctypes.sizeof(ctypes.c_float)
self.frame = MonitoringModelFrame(cl_ctx)
self.numpy_inputs = {
'calib': np.zeros((1, CALIB_LEN), dtype=np.float32),
}
if TICI:
self.tensor_inputs = {k: Tensor(v, device='NPY').realize() for k,v in self.numpy_inputs.items()}
with open(MODEL_PKL_PATH, "rb") as f:
self.model_run = pickle.load(f)
else:
self.onnx_cpu_runner = make_onnx_cpu_runner(MODEL_PATH)
def run(self, buf: VisionBuf, calib: np.ndarray, transform: np.ndarray) -> tuple[np.ndarray, float]:
self.numpy_inputs['calib'][0,:] = calib
t1 = time.perf_counter()
input_img_cl = self.frame.prepare(buf, transform.flatten())
if TICI:
# The imgs tensors are backed by opencl memory, only need init once
if 'input_img' not in self.tensor_inputs:
self.tensor_inputs['input_img'] = qcom_tensor_from_opencl_address(input_img_cl.mem_address, (1, MODEL_WIDTH*MODEL_HEIGHT), dtype=dtypes.uint8)
else:
self.numpy_inputs['input_img'] = self.frame.buffer_from_cl(input_img_cl).reshape((1, MODEL_WIDTH*MODEL_HEIGHT))
if TICI:
output = self.model_run(**self.tensor_inputs).numpy().flatten()
else:
output = self.onnx_cpu_runner.run(None, self.numpy_inputs)[0].flatten()
t2 = time.perf_counter()
return output, t2 - t1
def fill_driver_state(msg, ds_result: DriverStateResult):
msg.faceOrientation = list(ds_result.face_orientation)
msg.faceOrientationStd = [math.exp(x) for x in ds_result.face_orientation_std]
msg.facePosition = list(ds_result.face_position[:2])
msg.facePositionStd = [math.exp(x) for x in ds_result.face_position_std[:2]]
msg.faceProb = float(sigmoid(ds_result.face_prob))
msg.leftEyeProb = float(sigmoid(ds_result.left_eye_prob))
msg.rightEyeProb = float(sigmoid(ds_result.right_eye_prob))
msg.leftBlinkProb = float(sigmoid(ds_result.left_blink_prob))
msg.rightBlinkProb = float(sigmoid(ds_result.right_blink_prob))
msg.sunglassesProb = float(sigmoid(ds_result.sunglasses_prob))
msg.occludedProb = float(sigmoid(ds_result.occluded_prob))
msg.readyProb = [float(sigmoid(x)) for x in ds_result.ready_prob]
msg.notReadyProb = [float(sigmoid(x)) for x in ds_result.not_ready_prob]
def get_driverstate_packet(model_output: np.ndarray, frame_id: int, location_ts: int, execution_time: float, gpu_execution_time: float):
model_result = ctypes.cast(model_output.ctypes.data, ctypes.POINTER(DMonitoringModelResult)).contents
msg = messaging.new_message('driverStateV2', valid=True)
ds = msg.driverStateV2
ds.frameId = frame_id
ds.modelExecutionTime = execution_time
ds.gpuExecutionTime = gpu_execution_time
ds.poorVisionProb = float(sigmoid(model_result.poor_vision_prob))
ds.wheelOnRightProb = float(sigmoid(model_result.wheel_on_right_prob))
ds.rawPredictions = model_output.tobytes() if SEND_RAW_PRED else b''
fill_driver_state(ds.leftDriverData, model_result.driver_state_lhd)
fill_driver_state(ds.rightDriverData, model_result.driver_state_rhd)
return msg
def main():
setproctitle(PROCESS_NAME)
config_realtime_process([0, 1, 2, 3], 5)
sentry.set_tag("daemon", PROCESS_NAME)
cloudlog.bind(daemon=PROCESS_NAME)
cl_context = CLContext()
model = ModelState(cl_context)
cloudlog.warning("models loaded, dmonitoringmodeld starting")
cloudlog.warning("connecting to driver stream")
vipc_client = VisionIpcClient("camerad", VisionStreamType.VISION_STREAM_DRIVER, True, cl_context)
while not vipc_client.connect(False):
time.sleep(0.1)
assert vipc_client.is_connected()
cloudlog.warning(f"connected with buffer size: {vipc_client.buffer_len}")
sm = SubMaster(["liveCalibration"])
pm = PubMaster(["driverStateV2"])
calib = np.zeros(CALIB_LEN, dtype=np.float32)
model_transform = None
while True:
buf = vipc_client.recv()
if buf is None:
continue
if model_transform is None:
cam = _os_fisheye if buf.width == _os_fisheye.width else _ar_ox_fisheye
model_transform = np.linalg.inv(np.dot(dmonitoringmodel_intrinsics, np.linalg.inv(cam.intrinsics))).astype(np.float32)
sm.update(0)
if sm.updated["liveCalibration"]:
calib[:] = np.array(sm["liveCalibration"].rpyCalib)
t1 = time.perf_counter()
model_output, gpu_execution_time = model.run(buf, calib, model_transform)
t2 = time.perf_counter()
pm.send("driverStateV2", get_driverstate_packet(model_output, vipc_client.frame_id, vipc_client.timestamp_sof, t2 - t1, gpu_execution_time))
if __name__ == "__main__":
try:
main()
except KeyboardInterrupt:
cloudlog.warning(f"child {PROCESS_NAME} got SIGINT")
except Exception:
sentry.capture_exception()
raise

View File

@@ -0,0 +1,245 @@
import os
import capnp
import numpy as np
from cereal import log
from openpilot.sunnypilot.modeld_v2.constants import ModelConstants, Plan
from openpilot.selfdrive.controls.lib.drive_helpers import MIN_SPEED
SEND_RAW_PRED = os.getenv('SEND_RAW_PRED')
ConfidenceClass = log.ModelDataV2.ConfidenceClass
def curv_from_psis(psi_target, psi_rate, vego, delay):
vego = np.clip(vego, MIN_SPEED, np.inf)
curv_from_psi = psi_target / (vego * delay) # epsilon to prevent divide-by-zero
return 2 * curv_from_psi - psi_rate / vego
def get_curvature_from_plan(plan, vego, delay):
psi_target = np.interp(delay, ModelConstants.T_IDXS, plan[:, Plan.T_FROM_CURRENT_EULER][:, 2])
psi_rate = plan[:, Plan.ORIENTATION_RATE][0, 2]
return curv_from_psis(psi_target, psi_rate, vego, delay)
def get_curvature_from_output(output, vego, delay):
if desired_curv := output.get('desired_curvature'): # If the model outputs the desired curvature, use that directly
return float(desired_curv[0, 0])
return float(get_curvature_from_plan(output['plan'][0], vego, delay))
class PublishState:
def __init__(self):
self.disengage_buffer = np.zeros(ModelConstants.CONFIDENCE_BUFFER_LEN*ModelConstants.DISENGAGE_WIDTH, dtype=np.float32)
self.prev_brake_5ms2_probs = np.zeros(ModelConstants.FCW_5MS2_PROBS_WIDTH, dtype=np.float32)
self.prev_brake_3ms2_probs = np.zeros(ModelConstants.FCW_3MS2_PROBS_WIDTH, dtype=np.float32)
def fill_xyzt(builder, t, x, y, z, x_std=None, y_std=None, z_std=None):
builder.t = t
builder.x = x.tolist()
builder.y = y.tolist()
builder.z = z.tolist()
if x_std is not None:
builder.xStd = x_std.tolist()
if y_std is not None:
builder.yStd = y_std.tolist()
if z_std is not None:
builder.zStd = z_std.tolist()
def fill_xyvat(builder, t, x, y, v, a, x_std=None, y_std=None, v_std=None, a_std=None):
builder.t = t
builder.x = x.tolist()
builder.y = y.tolist()
builder.v = v.tolist()
builder.a = a.tolist()
if x_std is not None:
builder.xStd = x_std.tolist()
if y_std is not None:
builder.yStd = y_std.tolist()
if v_std is not None:
builder.vStd = v_std.tolist()
if a_std is not None:
builder.aStd = a_std.tolist()
def fill_xyz_poly(builder, degree, x, y, z):
xyz = np.stack([x, y, z], axis=1)
coeffs = np.polynomial.polynomial.polyfit(ModelConstants.T_IDXS, xyz, deg=degree)
builder.xCoefficients = coeffs[:, 0].tolist()
builder.yCoefficients = coeffs[:, 1].tolist()
builder.zCoefficients = coeffs[:, 2].tolist()
def fill_lane_line_meta(builder, lane_lines, lane_line_probs):
builder.leftY = lane_lines[1].y[0]
builder.leftProb = lane_line_probs[1]
builder.rightY = lane_lines[2].y[0]
builder.rightProb = lane_line_probs[2]
def fill_model_msg(base_msg: capnp._DynamicStructBuilder, extended_msg: capnp._DynamicStructBuilder,
net_output_data: dict[str, np.ndarray], v_ego: float, delay: float,
publish_state: PublishState, vipc_frame_id: int, vipc_frame_id_extra: int,
frame_id: int, frame_drop: float, timestamp_eof: int, model_execution_time: float,
valid: bool, model_meta) -> None:
frame_age = frame_id - vipc_frame_id if frame_id > vipc_frame_id else 0
frame_drop_perc = frame_drop * 100
extended_msg.valid = valid
base_msg.valid = valid
desired_curvature = float(get_curvature_from_output(net_output_data, v_ego, delay))
driving_model_data = base_msg.drivingModelData
driving_model_data.frameId = vipc_frame_id
driving_model_data.frameIdExtra = vipc_frame_id_extra
driving_model_data.frameDropPerc = frame_drop_perc
driving_model_data.modelExecutionTime = model_execution_time
driving_model_data.action.desiredCurvature = desired_curvature
modelV2 = extended_msg.modelV2
modelV2.frameId = vipc_frame_id
modelV2.frameIdExtra = vipc_frame_id_extra
modelV2.frameAge = frame_age
modelV2.frameDropPerc = frame_drop_perc
modelV2.timestampEof = timestamp_eof
modelV2.modelExecutionTime = model_execution_time
# plan
fill_xyzt(modelV2.position, ModelConstants.T_IDXS, *net_output_data['plan'][0,:,Plan.POSITION].T, *net_output_data['plan_stds'][0,:,Plan.POSITION].T)
fill_xyzt(modelV2.velocity, ModelConstants.T_IDXS, *net_output_data['plan'][0,:,Plan.VELOCITY].T)
fill_xyzt(modelV2.acceleration, ModelConstants.T_IDXS, *net_output_data['plan'][0,:,Plan.ACCELERATION].T)
fill_xyzt(modelV2.orientation, ModelConstants.T_IDXS, *net_output_data['plan'][0,:,Plan.T_FROM_CURRENT_EULER].T)
fill_xyzt(modelV2.orientationRate, ModelConstants.T_IDXS, *net_output_data['plan'][0,:,Plan.ORIENTATION_RATE].T)
# temporal pose
temporal_pose = modelV2.temporalPose
if 'sim_pose' in net_output_data:
temporal_pose.trans = net_output_data['sim_pose'][0,:ModelConstants.POSE_WIDTH//2].tolist()
temporal_pose.transStd = net_output_data['sim_pose_stds'][0,:ModelConstants.POSE_WIDTH//2].tolist()
temporal_pose.rot = net_output_data['sim_pose'][0,ModelConstants.POSE_WIDTH//2:].tolist()
temporal_pose.rotStd = net_output_data['sim_pose_stds'][0,ModelConstants.POSE_WIDTH//2:].tolist()
else:
temporal_pose.trans = net_output_data['plan'][0,0,Plan.VELOCITY].tolist()
temporal_pose.transStd = net_output_data['plan_stds'][0,0,Plan.VELOCITY].tolist()
temporal_pose.rot = net_output_data['plan'][0,0,Plan.ORIENTATION_RATE].tolist()
temporal_pose.rotStd = net_output_data['plan_stds'][0,0,Plan.ORIENTATION_RATE].tolist()
# poly path
fill_xyz_poly(driving_model_data.path, ModelConstants.POLY_PATH_DEGREE, *net_output_data['plan'][0,:,Plan.POSITION].T)
# lateral planning
modelV2.action.desiredCurvature = desired_curvature
# times at X_IDXS according to model plan
PLAN_T_IDXS = [np.nan] * ModelConstants.IDX_N
PLAN_T_IDXS[0] = 0.0
plan_x = net_output_data['plan'][0,:,Plan.POSITION][:,0].tolist()
for xidx in range(1, ModelConstants.IDX_N):
tidx = 0
# increment tidx until we find an element that's further away than the current xidx
while tidx < ModelConstants.IDX_N - 1 and plan_x[tidx+1] < ModelConstants.X_IDXS[xidx]:
tidx += 1
if tidx == ModelConstants.IDX_N - 1:
# if the Plan doesn't extend far enough, set plan_t to the max value (10s), then break
PLAN_T_IDXS[xidx] = ModelConstants.T_IDXS[ModelConstants.IDX_N - 1]
break
# interpolate to find `t` for the current xidx
current_x_val = plan_x[tidx]
next_x_val = plan_x[tidx+1]
p = (ModelConstants.X_IDXS[xidx] - current_x_val) / (next_x_val - current_x_val) if abs(next_x_val - current_x_val) > 1e-9 else float('nan')
PLAN_T_IDXS[xidx] = p * ModelConstants.T_IDXS[tidx+1] + (1 - p) * ModelConstants.T_IDXS[tidx]
# lane lines
modelV2.init('laneLines', 4)
for i in range(4):
lane_line = modelV2.laneLines[i]
fill_xyzt(lane_line, PLAN_T_IDXS, np.array(ModelConstants.X_IDXS), net_output_data['lane_lines'][0,i,:,0], net_output_data['lane_lines'][0,i,:,1])
modelV2.laneLineStds = net_output_data['lane_lines_stds'][0,:,0,0].tolist()
modelV2.laneLineProbs = net_output_data['lane_lines_prob'][0,1::2].tolist()
fill_lane_line_meta(driving_model_data.laneLineMeta, modelV2.laneLines, modelV2.laneLineProbs)
# road edges
modelV2.init('roadEdges', 2)
for i in range(2):
road_edge = modelV2.roadEdges[i]
fill_xyzt(road_edge, PLAN_T_IDXS, np.array(ModelConstants.X_IDXS), net_output_data['road_edges'][0,i,:,0], net_output_data['road_edges'][0,i,:,1])
modelV2.roadEdgeStds = net_output_data['road_edges_stds'][0,:,0,0].tolist()
# leads
modelV2.init('leadsV3', 3)
for i in range(3):
lead = modelV2.leadsV3[i]
fill_xyvat(lead, ModelConstants.LEAD_T_IDXS, *net_output_data['lead'][0,i].T, *net_output_data['lead_stds'][0,i].T)
lead.prob = net_output_data['lead_prob'][0,i].tolist()
lead.probTime = ModelConstants.LEAD_T_OFFSETS[i]
# meta
meta = modelV2.meta
meta.desireState = net_output_data['desire_state'][0].reshape(-1).tolist()
meta.desirePrediction = net_output_data['desire_pred'][0].reshape(-1).tolist()
meta.engagedProb = net_output_data['meta'][0,model_meta.ENGAGED].item()
meta.init('disengagePredictions')
disengage_predictions = meta.disengagePredictions
disengage_predictions.t = ModelConstants.META_T_IDXS
disengage_predictions.brakeDisengageProbs = net_output_data['meta'][0,model_meta.BRAKE_DISENGAGE].tolist()
disengage_predictions.gasDisengageProbs = net_output_data['meta'][0,model_meta.GAS_DISENGAGE].tolist()
disengage_predictions.steerOverrideProbs = net_output_data['meta'][0,model_meta.STEER_OVERRIDE].tolist()
disengage_predictions.brake3MetersPerSecondSquaredProbs = net_output_data['meta'][0,model_meta.HARD_BRAKE_3].tolist()
disengage_predictions.brake4MetersPerSecondSquaredProbs = net_output_data['meta'][0,model_meta.HARD_BRAKE_4].tolist()
disengage_predictions.brake5MetersPerSecondSquaredProbs = net_output_data['meta'][0,model_meta.HARD_BRAKE_5].tolist()
if hasattr(model_meta, 'GAS_PRESS') and hasattr(model_meta, 'BRAKE_PRESS'):
disengage_predictions.gasPressProbs = net_output_data['meta'][0,model_meta.GAS_PRESS].tolist()
disengage_predictions.brakePressProbs = net_output_data['meta'][0,model_meta.BRAKE_PRESS].tolist()
publish_state.prev_brake_5ms2_probs[:-1] = publish_state.prev_brake_5ms2_probs[1:]
publish_state.prev_brake_5ms2_probs[-1] = net_output_data['meta'][0,model_meta.HARD_BRAKE_5][0]
publish_state.prev_brake_3ms2_probs[:-1] = publish_state.prev_brake_3ms2_probs[1:]
publish_state.prev_brake_3ms2_probs[-1] = net_output_data['meta'][0,model_meta.HARD_BRAKE_3][0]
hard_brake_predicted = (publish_state.prev_brake_5ms2_probs > ModelConstants.FCW_THRESHOLDS_5MS2).all() and \
(publish_state.prev_brake_3ms2_probs > ModelConstants.FCW_THRESHOLDS_3MS2).all()
meta.hardBrakePredicted = hard_brake_predicted.item()
# confidence
if vipc_frame_id % (2*ModelConstants.MODEL_FREQ) == 0:
# any disengage prob
brake_disengage_probs = net_output_data['meta'][0,model_meta.BRAKE_DISENGAGE]
gas_disengage_probs = net_output_data['meta'][0,model_meta.GAS_DISENGAGE]
steer_override_probs = net_output_data['meta'][0,model_meta.STEER_OVERRIDE]
any_disengage_probs = 1-((1-brake_disengage_probs)*(1-gas_disengage_probs)*(1-steer_override_probs))
# independent disengage prob for each 2s slice
ind_disengage_probs = np.r_[any_disengage_probs[0], np.diff(any_disengage_probs) / (1 - any_disengage_probs[:-1])]
# rolling buf for 2, 4, 6, 8, 10s
publish_state.disengage_buffer[:-ModelConstants.DISENGAGE_WIDTH] = publish_state.disengage_buffer[ModelConstants.DISENGAGE_WIDTH:]
publish_state.disengage_buffer[-ModelConstants.DISENGAGE_WIDTH:] = ind_disengage_probs
score = 0.
for i in range(ModelConstants.DISENGAGE_WIDTH):
score += publish_state.disengage_buffer[i*ModelConstants.DISENGAGE_WIDTH+ModelConstants.DISENGAGE_WIDTH-1-i].item() / ModelConstants.DISENGAGE_WIDTH
if score < ModelConstants.RYG_GREEN:
modelV2.confidence = ConfidenceClass.green
elif score < ModelConstants.RYG_YELLOW:
modelV2.confidence = ConfidenceClass.yellow
else:
modelV2.confidence = ConfidenceClass.red
# raw prediction if enabled
if SEND_RAW_PRED:
modelV2.rawPredictions = net_output_data['raw_pred'].tobytes()
def fill_pose_msg(msg: capnp._DynamicStructBuilder, net_output_data: dict[str, np.ndarray],
vipc_frame_id: int, vipc_dropped_frames: int, timestamp_eof: int, live_calib_seen: bool) -> None:
msg.valid = live_calib_seen & (vipc_dropped_frames < 1)
cameraOdometry = msg.cameraOdometry
cameraOdometry.frameId = vipc_frame_id
cameraOdometry.timestampEof = timestamp_eof
cameraOdometry.trans = net_output_data['pose'][0,:3].tolist()
cameraOdometry.rot = net_output_data['pose'][0,3:].tolist()
cameraOdometry.wideFromDeviceEuler = net_output_data['wide_from_device_euler'][0,:].tolist()
cameraOdometry.roadTransformTrans = net_output_data['road_transform'][0,:3].tolist()
cameraOdometry.transStd = net_output_data['pose_stds'][0,:3].tolist()
cameraOdometry.rotStd = net_output_data['pose_stds'][0,3:].tolist()
cameraOdometry.wideFromDeviceEulerStd = net_output_data['wide_from_device_euler_stds'][0,:].tolist()
cameraOdometry.roadTransformTransStd = net_output_data['road_transform_stds'][0,:3].tolist()

View File

@@ -0,0 +1,28 @@
#!/usr/bin/env python3
import sys
import pathlib
import onnx
import codecs
import pickle
def get_name_and_shape(value_info:onnx.ValueInfoProto) -> tuple[str, tuple[int,...]]:
shape = tuple([int(dim.dim_value) for dim in value_info.type.tensor_type.shape.dim])
name = value_info.name
return name, shape
if __name__ == "__main__":
model_path = pathlib.Path(sys.argv[1])
model = onnx.load(str(model_path))
i = [x.key for x in model.metadata_props].index('output_slices')
output_slices = model.metadata_props[i].value
metadata = {}
metadata['output_slices'] = pickle.loads(codecs.decode(output_slices.encode(), "base64"))
metadata['input_shapes'] = dict([get_name_and_shape(x) for x in model.graph.input])
metadata['output_shapes'] = dict([get_name_and_shape(x) for x in model.graph.output])
metadata_path = model_path.parent / (model_path.stem + '_metadata.pkl')
with open(metadata_path, 'wb') as f:
pickle.dump(metadata, f)
print(f'saved metadata to {metadata_path}')

View File

@@ -1,4 +1,4 @@
from openpilot.selfdrive.modeld.constants import Meta
from openpilot.sunnypilot.modeld_v2.constants import Meta
class Meta20hz(Meta):

View File

@@ -1,4 +1,4 @@
from openpilot.selfdrive.modeld.constants import Meta
from openpilot.sunnypilot.modeld_v2.constants import Meta
from cereal import custom
from openpilot.sunnypilot.modeld_v2.meta_20hz import Meta20hz
from openpilot.sunnypilot.models.helpers import get_active_bundle

View File

@@ -4,10 +4,10 @@ from abc import ABC, abstractmethod
import numpy as np
from cereal import custom
from openpilot.selfdrive.modeld import MODEL_PATH, MODEL_PKL_PATH, METADATA_PATH
from openpilot.selfdrive.modeld.models.commonmodel_pyx import DrivingModelFrame, CLMem
from openpilot.selfdrive.modeld.runners.ort_helpers import make_onnx_cpu_runner, ORT_TYPES_TO_NP_TYPES
from openpilot.selfdrive.modeld.runners.tinygrad_helpers import qcom_tensor_from_opencl_address
from openpilot.sunnypilot.modeld_v2 import MODEL_PATH, MODEL_PKL_PATH, METADATA_PATH
from openpilot.sunnypilot.modeld_v2.models.commonmodel_pyx import DrivingModelFrame, CLMem
from openpilot.sunnypilot.modeld_v2.runners.ort_helpers import make_onnx_cpu_runner, ORT_TYPES_TO_NP_TYPES
from openpilot.sunnypilot.modeld_v2.runners.tinygrad_helpers import qcom_tensor_from_opencl_address
from openpilot.system.hardware import TICI
from openpilot.system.hardware.hw import Paths

4
sunnypilot/modeld_v2/modeld Executable file
View File

@@ -0,0 +1,4 @@
#!/usr/bin/env bash
DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null && pwd)"
exec "$DIR/modeld.py" "$@"

318
sunnypilot/modeld_v2/modeld.py Executable file
View File

@@ -0,0 +1,318 @@
#!/usr/bin/env python3
from openpilot.system.hardware import TICI
#
import time
import numpy as np
import cereal.messaging as messaging
from cereal import car, log
from setproctitle import setproctitle
from cereal.messaging import PubMaster, SubMaster
from msgq.visionipc import VisionIpcClient, VisionStreamType, VisionBuf
from opendbc.car.car_helpers import get_demo_car_params
from openpilot.common.swaglog import cloudlog
from openpilot.common.params import Params
from openpilot.common.filter_simple import FirstOrderFilter
from openpilot.common.realtime import config_realtime_process
from openpilot.common.transformations.camera import DEVICE_CAMERAS
from openpilot.common.transformations.model import get_warp_matrix
from openpilot.system import sentry
from openpilot.selfdrive.controls.lib.desire_helper import DesireHelper
from openpilot.sunnypilot.modeld_v2.parse_model_outputs import Parser
from openpilot.sunnypilot.modeld_v2.fill_model_msg import fill_model_msg, fill_pose_msg, PublishState
from openpilot.sunnypilot.modeld_v2.constants import ModelConstants
from openpilot.sunnypilot.modeld_v2.models.commonmodel_pyx import DrivingModelFrame, CLContext
from openpilot.sunnypilot.modeld_v2.meta_helper import load_meta_constants
from openpilot.sunnypilot.modeld_v2.model_runner import ONNXRunner, TinygradRunner
PROCESS_NAME = "selfdrive.modeld.modeld"
class FrameMeta:
frame_id: int = 0
timestamp_sof: int = 0
timestamp_eof: int = 0
def __init__(self, vipc=None):
if vipc is not None:
self.frame_id, self.timestamp_sof, self.timestamp_eof = vipc.frame_id, vipc.timestamp_sof, vipc.timestamp_eof
class ModelState:
frames: dict[str, DrivingModelFrame]
inputs: dict[str, np.ndarray]
output: np.ndarray
prev_desire: np.ndarray # for tracking the rising edge of the pulse
def __init__(self, context: CLContext):
try:
self.model_runner = TinygradRunner() if TICI else ONNXRunner()
except Exception as e:
cloudlog.exception(f"Failed to initialize model runner: {str(e)}")
buffer_length = 5 if self.model_runner.is_20hz else 2
self.frames = {'input_imgs': DrivingModelFrame(context, buffer_length), 'big_input_imgs': DrivingModelFrame(context, buffer_length)}
self.prev_desire = np.zeros(ModelConstants.DESIRE_LEN, dtype=np.float32)
if self.model_runner.is_20hz:
self.full_features_20Hz = np.zeros((ModelConstants.FULL_HISTORY_BUFFER_LEN, ModelConstants.FEATURE_LEN), dtype=np.float32)
self.desire_20Hz = np.zeros((ModelConstants.FULL_HISTORY_BUFFER_LEN + 1, ModelConstants.DESIRE_LEN), dtype=np.float32)
# img buffers are managed in openCL transform code
self.numpy_inputs = {}
for key, shape in self.model_runner.input_shapes.items():
if key not in self.frames: # Managed by opencl
self.numpy_inputs[key] = np.zeros(shape, dtype=np.float32)
self.parser = Parser()
if self.model_runner.is_20hz:
net_output_size = self.model_runner.model_metadata['output_shapes']['outputs'][1]
self.output = np.zeros(net_output_size, dtype=np.float32)
num_elements = self.numpy_inputs['features_buffer'].shape[1]
step_size = int(-100 / num_elements)
self.full_features_20Hz_idxs = np.arange(step_size, step_size * (num_elements + 1), step_size)[::-1]
self.desire_reshape_dims = (self.numpy_inputs['desire'].shape[0], self.numpy_inputs['desire'].shape[1], -1, self.numpy_inputs['desire'].shape[2])
def run(self, buf: VisionBuf, wbuf: VisionBuf, transform: np.ndarray, transform_wide: np.ndarray,
inputs: dict[str, np.ndarray], prepare_only: bool) -> dict[str, np.ndarray] | None:
# Model decides when action is completed, so desire input is just a pulse triggered on rising edge
inputs['desire'][0] = 0
new_desire = np.where(inputs['desire'] - self.prev_desire > .99, inputs['desire'], 0)
self.prev_desire[:] = inputs['desire']
if self.model_runner.is_20hz:
self.desire_20Hz[:-1] = self.desire_20Hz[1:]
self.desire_20Hz[-1] = new_desire
self.numpy_inputs['desire'][:] = self.desire_20Hz.reshape(self.desire_reshape_dims).max(axis=2)
else:
length = inputs['desire'].shape[0]
self.numpy_inputs['desire'][0, :-1] = self.numpy_inputs['desire'][0, 1:]
self.numpy_inputs['desire'][0, -1, :length] = new_desire[:length]
for key in self.numpy_inputs:
if key in inputs and key not in ['desire']:
self.numpy_inputs[key][:] = inputs[key]
imgs_cl = {'input_imgs': self.frames['input_imgs'].prepare(buf, transform.flatten()),
'big_input_imgs': self.frames['big_input_imgs'].prepare(wbuf, transform_wide.flatten())}
# Prepare inputs using the model runner
self.model_runner.prepare_inputs(imgs_cl, self.numpy_inputs, self.frames)
if prepare_only:
return None
# Run model inference
self.output = self.model_runner.run_model()
outputs = self.parser.parse_outputs(self.model_runner.slice_outputs(self.output))
if self.model_runner.is_20hz:
self.full_features_20Hz[:-1] = self.full_features_20Hz[1:]
self.full_features_20Hz[-1] = outputs['hidden_state'][0, :]
self.numpy_inputs['features_buffer'][:] = self.full_features_20Hz[self.full_features_20Hz_idxs]
else:
feature_len = outputs['hidden_state'].shape[1]
self.numpy_inputs['features_buffer'][0, :-1] = self.numpy_inputs['features_buffer'][0, 1:]
self.numpy_inputs['features_buffer'][0, -1, :feature_len] = outputs['hidden_state'][0, :feature_len]
if "desired_curvature" in outputs:
input_name_prev = None
if "prev_desired_curvs" in self.numpy_inputs.keys():
input_name_prev = 'prev_desired_curvs'
elif "prev_desired_curv" in self.numpy_inputs.keys():
input_name_prev = 'prev_desired_curv'
if input_name_prev is not None:
length = outputs['desired_curvature'][0].size
self.numpy_inputs[input_name_prev][0, :-length, 0] = self.numpy_inputs[input_name_prev][0, length:, 0]
self.numpy_inputs[input_name_prev][0, -length:, 0] = outputs['desired_curvature'][0]
return outputs
def main(demo=False):
cloudlog.warning("modeld init")
sentry.set_tag("daemon", PROCESS_NAME)
cloudlog.bind(daemon=PROCESS_NAME)
setproctitle(PROCESS_NAME)
config_realtime_process(7, 54)
cloudlog.warning("setting up CL context")
cl_context = CLContext()
cloudlog.warning("CL context ready; loading model")
model = ModelState(cl_context)
cloudlog.warning("models loaded, modeld starting")
# visionipc clients
while True:
available_streams = VisionIpcClient.available_streams("camerad", block=False)
if available_streams:
use_extra_client = VisionStreamType.VISION_STREAM_WIDE_ROAD in available_streams and VisionStreamType.VISION_STREAM_ROAD in available_streams
main_wide_camera = VisionStreamType.VISION_STREAM_ROAD not in available_streams
break
time.sleep(.1)
vipc_client_main_stream = VisionStreamType.VISION_STREAM_WIDE_ROAD if main_wide_camera else VisionStreamType.VISION_STREAM_ROAD
vipc_client_main = VisionIpcClient("camerad", vipc_client_main_stream, True, cl_context)
vipc_client_extra = VisionIpcClient("camerad", VisionStreamType.VISION_STREAM_WIDE_ROAD, False, cl_context)
cloudlog.warning(f"vision stream set up, main_wide_camera: {main_wide_camera}, use_extra_client: {use_extra_client}")
while not vipc_client_main.connect(False):
time.sleep(0.1)
while use_extra_client and not vipc_client_extra.connect(False):
time.sleep(0.1)
cloudlog.warning(f"connected main cam with buffer size: {vipc_client_main.buffer_len} ({vipc_client_main.width} x {vipc_client_main.height})")
if use_extra_client:
cloudlog.warning(f"connected extra cam with buffer size: {vipc_client_extra.buffer_len} ({vipc_client_extra.width} x {vipc_client_extra.height})")
# messaging
pm = PubMaster(["modelV2", "drivingModelData", "cameraOdometry"])
sm = SubMaster(["deviceState", "carState", "roadCameraState", "liveCalibration", "driverMonitoringState", "carControl"])
publish_state = PublishState()
params = Params()
# setup filter to track dropped frames
frame_dropped_filter = FirstOrderFilter(0., 10., 1. / ModelConstants.MODEL_FREQ)
frame_id = 0
last_vipc_frame_id = 0
run_count = 0
model_transform_main = np.zeros((3, 3), dtype=np.float32)
model_transform_extra = np.zeros((3, 3), dtype=np.float32)
live_calib_seen = False
buf_main, buf_extra = None, None
meta_main = FrameMeta()
meta_extra = FrameMeta()
if demo:
CP = get_demo_car_params()
else:
CP = messaging.log_from_bytes(params.get("CarParams", block=True), car.CarParams)
cloudlog.info("modeld got CarParams: %s", CP.brand)
# TODO this needs more thought, use .2s extra for now to estimate other delays
steer_delay = CP.steerActuatorDelay + .2
DH = DesireHelper()
while True:
# Keep receiving frames until we are at least 1 frame ahead of previous extra frame
while meta_main.timestamp_sof < meta_extra.timestamp_sof + 25000000:
buf_main = vipc_client_main.recv()
meta_main = FrameMeta(vipc_client_main)
if buf_main is None:
break
if buf_main is None:
cloudlog.debug("vipc_client_main no frame")
continue
if use_extra_client:
# Keep receiving extra frames until frame id matches main camera
while True:
buf_extra = vipc_client_extra.recv()
meta_extra = FrameMeta(vipc_client_extra)
if buf_extra is None or meta_main.timestamp_sof < meta_extra.timestamp_sof + 25000000:
break
if buf_extra is None:
cloudlog.debug("vipc_client_extra no frame")
continue
if abs(meta_main.timestamp_sof - meta_extra.timestamp_sof) > 10000000:
cloudlog.error(f"frames out of sync! main: {meta_main.frame_id} ({meta_main.timestamp_sof / 1e9:.5f}),\
extra: {meta_extra.frame_id} ({meta_extra.timestamp_sof / 1e9:.5f})")
else:
# Use single camera
buf_extra = buf_main
meta_extra = meta_main
sm.update(0)
desire = DH.desire
is_rhd = sm["driverMonitoringState"].isRHD
frame_id = sm["roadCameraState"].frameId
v_ego = max(sm["carState"].vEgo, 0.)
if sm.updated["liveCalibration"] and sm.seen['roadCameraState'] and sm.seen['deviceState']:
device_from_calib_euler = np.array(sm["liveCalibration"].rpyCalib, dtype=np.float32)
dc = DEVICE_CAMERAS[(str(sm['deviceState'].deviceType), str(sm['roadCameraState'].sensor))]
model_transform_main = get_warp_matrix(device_from_calib_euler, dc.ecam.intrinsics if main_wide_camera else dc.fcam.intrinsics, False).astype(np.float32)
model_transform_extra = get_warp_matrix(device_from_calib_euler, dc.ecam.intrinsics, True).astype(np.float32)
live_calib_seen = True
traffic_convention = np.zeros(2)
traffic_convention[int(is_rhd)] = 1
vec_desire = np.zeros(ModelConstants.DESIRE_LEN, dtype=np.float32)
if desire >= 0 and desire < ModelConstants.DESIRE_LEN:
vec_desire[desire] = 1
# tracked dropped frames
vipc_dropped_frames = max(0, meta_main.frame_id - last_vipc_frame_id - 1)
frames_dropped = frame_dropped_filter.update(min(vipc_dropped_frames, 10))
if run_count < 10: # let frame drops warm up
frame_dropped_filter.x = 0.
frames_dropped = 0.
run_count = run_count + 1
frame_drop_ratio = frames_dropped / (1 + frames_dropped)
prepare_only = vipc_dropped_frames > 0
if prepare_only:
cloudlog.error(f"skipping model eval. Dropped {vipc_dropped_frames} frames")
inputs:dict[str, np.ndarray] = {
'desire': vec_desire,
'traffic_convention': traffic_convention,
}
if "lateral_control_params" in model.numpy_inputs.keys():
inputs['lateral_control_params'] = np.array([v_ego, steer_delay], dtype=np.float32)
mt1 = time.perf_counter()
model_output = model.run(buf_main, buf_extra, model_transform_main, model_transform_extra, inputs, prepare_only)
mt2 = time.perf_counter()
model_execution_time = mt2 - mt1
if model_output is not None:
modelv2_send = messaging.new_message('modelV2')
drivingdata_send = messaging.new_message('drivingModelData')
posenet_send = messaging.new_message('cameraOdometry')
fill_model_msg(drivingdata_send, modelv2_send, model_output, v_ego, steer_delay,
publish_state, meta_main.frame_id, meta_extra.frame_id, frame_id,
frame_drop_ratio, meta_main.timestamp_eof, model_execution_time, live_calib_seen, load_meta_constants())
desire_state = modelv2_send.modelV2.meta.desireState
l_lane_change_prob = desire_state[log.Desire.laneChangeLeft]
r_lane_change_prob = desire_state[log.Desire.laneChangeRight]
lane_change_prob = l_lane_change_prob + r_lane_change_prob
DH.update(sm['carState'], sm['carControl'].latActive, lane_change_prob)
modelv2_send.modelV2.meta.laneChangeState = DH.lane_change_state
modelv2_send.modelV2.meta.laneChangeDirection = DH.lane_change_direction
drivingdata_send.drivingModelData.meta.laneChangeState = DH.lane_change_state
drivingdata_send.drivingModelData.meta.laneChangeDirection = DH.lane_change_direction
fill_pose_msg(posenet_send, model_output, meta_main.frame_id, vipc_dropped_frames, meta_main.timestamp_eof, live_calib_seen)
pm.send('modelV2', modelv2_send)
pm.send('drivingModelData', drivingdata_send)
pm.send('cameraOdometry', posenet_send)
last_vipc_frame_id = meta_main.frame_id
if __name__ == "__main__":
try:
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('--demo', action='store_true', help='A boolean for demo mode.')
args = parser.parse_args()
main(demo=args.demo)
except KeyboardInterrupt:
cloudlog.warning(f"child {PROCESS_NAME} got SIGINT")
except Exception:
sentry.capture_exception()
raise

View File

@@ -0,0 +1,62 @@
## Neural networks in openpilot
To view the architecture of the ONNX networks, you can use [netron](https://netron.app/)
## Supercombo
### Supercombo input format (Full size: 799906 x float32)
* **image stream**
* Two consecutive images (256 * 512 * 3 in RGB) recorded at 20 Hz : 393216 = 2 * 6 * 128 * 256
* Each 256 * 512 image is represented in YUV420 with 6 channels : 6 * 128 * 256
* Channels 0,1,2,3 represent the full-res Y channel and are represented in numpy as Y[::2, ::2], Y[::2, 1::2], Y[1::2, ::2], and Y[1::2, 1::2]
* Channel 4 represents the half-res U channel
* Channel 5 represents the half-res V channel
* **wide image stream**
* Two consecutive images (256 * 512 * 3 in RGB) recorded at 20 Hz : 393216 = 2 * 6 * 128 * 256
* Each 256 * 512 image is represented in YUV420 with 6 channels : 6 * 128 * 256
* Channels 0,1,2,3 represent the full-res Y channel and are represented in numpy as Y[::2, ::2], Y[::2, 1::2], Y[1::2, ::2], and Y[1::2, 1::2]
* Channel 4 represents the half-res U channel
* Channel 5 represents the half-res V channel
* **desire**
* one-hot encoded buffer to command model to execute certain actions, bit needs to be sent for the past 5 seconds (at 20FPS) : 100 * 8
* **traffic convention**
* one-hot encoded vector to tell model whether traffic is right-hand or left-hand traffic : 2
* **feature buffer**
* A buffer of intermediate features that gets appended to the current feature to form a 5 seconds temporal context (at 20FPS) : 99 * 512
### Supercombo output format (Full size: XXX x float32)
Read [here](https://github.com/commaai/openpilot/blob/90af436a121164a51da9fa48d093c29f738adf6a/selfdrive/modeld/models/driving.h#L236) for more.
## Driver Monitoring Model
* .onnx model can be run with onnx runtimes
* .dlc file is a pre-quantized model and only runs on qualcomm DSPs
### input format
* single image W = 1440 H = 960 luminance channel (Y) from the planar YUV420 format:
* full input size is 1440 * 960 = 1382400
* normalized ranging from 0.0 to 1.0 in float32 (onnx runner) or ranging from 0 to 255 in uint8 (snpe runner)
* camera calibration angles (roll, pitch, yaw) from liveCalibration: 3 x float32 inputs
### output format
* 84 x float32 outputs = 2 + 41 * 2 ([parsing example](https://github.com/commaai/openpilot/blob/22ce4e17ba0d3bfcf37f8255a4dd1dc683fe0c38/selfdrive/modeld/models/dmonitoring.cc#L33))
* for each person in the front seats (2 * 41)
* face pose: 12 = 6 + 6
* face orientation [pitch, yaw, roll] in camera frame: 3
* face position [dx, dy] relative to image center: 2
* normalized face size: 1
* standard deviations for above outputs: 6
* face visible probability: 1
* eyes: 20 = (8 + 1) + (8 + 1) + 1 + 1
* eye position and size, and their standard deviations: 8
* eye visible probability: 1
* eye closed probability: 1
* wearing sunglasses probability: 1
* face occluded probability: 1
* touching wheel probability: 1
* paying attention probability: 1
* (deprecated) distracted probabilities: 2
* using phone probability: 1
* distracted probability: 1
* common outputs 2
* poor camera vision probability: 1
* left hand drive probability: 1

View File

View File

@@ -0,0 +1,62 @@
#include "sunnypilot/modeld_v2/models/commonmodel.h"
#include <cmath>
#include <cstring>
#include "common/clutil.h"
DrivingModelFrame::DrivingModelFrame(cl_device_id device_id, cl_context context, uint8_t buffer_length) : ModelFrame(device_id, context), buffer_length(buffer_length) {
input_frames = std::make_unique<uint8_t[]>(buf_size);
input_frames_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, buf_size, NULL, &err));
img_buffer_20hz_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, buffer_length*frame_size_bytes, NULL, &err));
region.origin = (buffer_length - 1) * frame_size_bytes;
region.size = frame_size_bytes;
last_img_cl = CL_CHECK_ERR(clCreateSubBuffer(img_buffer_20hz_cl, CL_MEM_READ_WRITE, CL_BUFFER_CREATE_TYPE_REGION, &region, &err));
// printf("Buffer length: %d, region origin: %lu, region size: %lu\n", buffer_length, region.origin, region.size);
loadyuv_init(&loadyuv, context, device_id, MODEL_WIDTH, MODEL_HEIGHT);
init_transform(device_id, context, MODEL_WIDTH, MODEL_HEIGHT);
}
cl_mem* DrivingModelFrame::prepare(cl_mem yuv_cl, int frame_width, int frame_height, int frame_stride, int frame_uv_offset, const mat3& projection) {
run_transform(yuv_cl, MODEL_WIDTH, MODEL_HEIGHT, frame_width, frame_height, frame_stride, frame_uv_offset, projection);
for (int i = 0; i < (buffer_length - 1); i++) {
CL_CHECK(clEnqueueCopyBuffer(q, img_buffer_20hz_cl, img_buffer_20hz_cl, (i+1)*frame_size_bytes, i*frame_size_bytes, frame_size_bytes, 0, nullptr, nullptr));
}
loadyuv_queue(&loadyuv, q, y_cl, u_cl, v_cl, last_img_cl);
copy_queue(&loadyuv, q, img_buffer_20hz_cl, input_frames_cl, 0, 0, frame_size_bytes);
copy_queue(&loadyuv, q, last_img_cl, input_frames_cl, 0, frame_size_bytes, frame_size_bytes);
// NOTE: Since thneed is using a different command queue, this clFinish is needed to ensure the image is ready.
clFinish(q);
return &input_frames_cl;
}
DrivingModelFrame::~DrivingModelFrame() {
deinit_transform();
loadyuv_destroy(&loadyuv);
CL_CHECK(clReleaseMemObject(img_buffer_20hz_cl));
CL_CHECK(clReleaseMemObject(last_img_cl));
CL_CHECK(clReleaseCommandQueue(q));
}
MonitoringModelFrame::MonitoringModelFrame(cl_device_id device_id, cl_context context) : ModelFrame(device_id, context) {
input_frames = std::make_unique<uint8_t[]>(buf_size);
input_frame_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, buf_size, NULL, &err));
init_transform(device_id, context, MODEL_WIDTH, MODEL_HEIGHT);
}
cl_mem* MonitoringModelFrame::prepare(cl_mem yuv_cl, int frame_width, int frame_height, int frame_stride, int frame_uv_offset, const mat3& projection) {
run_transform(yuv_cl, MODEL_WIDTH, MODEL_HEIGHT, frame_width, frame_height, frame_stride, frame_uv_offset, projection);
clFinish(q);
return &y_cl;
}
MonitoringModelFrame::~MonitoringModelFrame() {
deinit_transform();
CL_CHECK(clReleaseCommandQueue(q));
}

View File

@@ -0,0 +1,97 @@
#pragma once
#include <cfloat>
#include <cstdlib>
#include <cassert>
#include <memory>
#define CL_USE_DEPRECATED_OPENCL_1_2_APIS
#ifdef __APPLE__
#include <OpenCL/cl.h>
#else
#include <CL/cl.h>
#endif
#include "common/mat.h"
#include "sunnypilot/modeld_v2/transforms/loadyuv.h"
#include "sunnypilot/modeld_v2/transforms/transform.h"
class ModelFrame {
public:
ModelFrame(cl_device_id device_id, cl_context context) {
q = CL_CHECK_ERR(clCreateCommandQueue(context, device_id, 0, &err));
}
virtual ~ModelFrame() {}
virtual cl_mem* prepare(cl_mem yuv_cl, int frame_width, int frame_height, int frame_stride, int frame_uv_offset, const mat3& projection) { return NULL; }
uint8_t* buffer_from_cl(cl_mem *in_frames, int buffer_size) {
CL_CHECK(clEnqueueReadBuffer(q, *in_frames, CL_TRUE, 0, buffer_size, input_frames.get(), 0, nullptr, nullptr));
clFinish(q);
return &input_frames[0];
}
int MODEL_WIDTH;
int MODEL_HEIGHT;
int MODEL_FRAME_SIZE;
int buf_size;
protected:
cl_mem y_cl, u_cl, v_cl;
Transform transform;
cl_command_queue q;
std::unique_ptr<uint8_t[]> input_frames;
void init_transform(cl_device_id device_id, cl_context context, int model_width, int model_height) {
y_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, model_width * model_height, NULL, &err));
u_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, (model_width / 2) * (model_height / 2), NULL, &err));
v_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, (model_width / 2) * (model_height / 2), NULL, &err));
transform_init(&transform, context, device_id);
}
void deinit_transform() {
transform_destroy(&transform);
CL_CHECK(clReleaseMemObject(v_cl));
CL_CHECK(clReleaseMemObject(u_cl));
CL_CHECK(clReleaseMemObject(y_cl));
}
void run_transform(cl_mem yuv_cl, int model_width, int model_height, int frame_width, int frame_height, int frame_stride, int frame_uv_offset, const mat3& projection) {
transform_queue(&transform, q,
yuv_cl, frame_width, frame_height, frame_stride, frame_uv_offset,
y_cl, u_cl, v_cl, model_width, model_height, projection);
}
};
class DrivingModelFrame : public ModelFrame {
public:
DrivingModelFrame(cl_device_id device_id, cl_context context, uint8_t buffer_length);
~DrivingModelFrame();
cl_mem* prepare(cl_mem yuv_cl, int frame_width, int frame_height, int frame_stride, int frame_uv_offset, const mat3& projection);
const int MODEL_WIDTH = 512;
const int MODEL_HEIGHT = 256;
const int MODEL_FRAME_SIZE = MODEL_WIDTH * MODEL_HEIGHT * 3 / 2;
const int buf_size = MODEL_FRAME_SIZE * 2;
const size_t frame_size_bytes = MODEL_FRAME_SIZE * sizeof(uint8_t);
const uint8_t buffer_length;
private:
LoadYUVState loadyuv;
cl_mem img_buffer_20hz_cl, last_img_cl, input_frames_cl;
cl_buffer_region region;
};
class MonitoringModelFrame : public ModelFrame {
public:
MonitoringModelFrame(cl_device_id device_id, cl_context context);
~MonitoringModelFrame();
cl_mem* prepare(cl_mem yuv_cl, int frame_width, int frame_height, int frame_stride, int frame_uv_offset, const mat3& projection);
const int MODEL_WIDTH = 1440;
const int MODEL_HEIGHT = 960;
const int MODEL_FRAME_SIZE = MODEL_WIDTH * MODEL_HEIGHT;
const int buf_size = MODEL_FRAME_SIZE;
private:
cl_mem input_frame_cl;
};

View File

@@ -0,0 +1,27 @@
# distutils: language = c++
from msgq.visionipc.visionipc cimport cl_device_id, cl_context, cl_mem
cdef extern from "common/mat.h":
cdef struct mat3:
float v[9]
cdef extern from "common/clutil.h":
cdef unsigned long CL_DEVICE_TYPE_DEFAULT
cl_device_id cl_get_device_id(unsigned long)
cl_context cl_create_context(cl_device_id)
void cl_release_context(cl_context)
cdef extern from "sunnypilot/modeld_v2/models/commonmodel.h":
cppclass ModelFrame:
int buf_size
unsigned char * buffer_from_cl(cl_mem*, int);
cl_mem * prepare(cl_mem, int, int, int, int, mat3)
cppclass DrivingModelFrame:
int buf_size
DrivingModelFrame(cl_device_id, cl_context, unsigned char)
cppclass MonitoringModelFrame:
int buf_size
MonitoringModelFrame(cl_device_id, cl_context)

View File

@@ -0,0 +1,13 @@
# distutils: language = c++
from msgq.visionipc.visionipc cimport cl_mem
from msgq.visionipc.visionipc_pyx cimport CLContext as BaseCLContext
cdef class CLContext(BaseCLContext):
pass
cdef class CLMem:
cdef cl_mem * mem
@staticmethod
cdef create(void*)

View File

@@ -0,0 +1,74 @@
# distutils: language = c++
# cython: c_string_encoding=ascii, language_level=3
import numpy as np
cimport numpy as cnp
from libc.string cimport memcpy
from libc.stdint cimport uintptr_t, uint8_t
from msgq.visionipc.visionipc cimport cl_mem
from msgq.visionipc.visionipc_pyx cimport VisionBuf, CLContext as BaseCLContext
from .commonmodel cimport CL_DEVICE_TYPE_DEFAULT, cl_get_device_id, cl_create_context, cl_release_context
from .commonmodel cimport mat3, ModelFrame as cppModelFrame, DrivingModelFrame as cppDrivingModelFrame, MonitoringModelFrame as cppMonitoringModelFrame
cdef class CLContext(BaseCLContext):
def __cinit__(self):
self.device_id = cl_get_device_id(CL_DEVICE_TYPE_DEFAULT)
self.context = cl_create_context(self.device_id)
def __dealloc__(self):
if self.context:
cl_release_context(self.context)
cdef class CLMem:
@staticmethod
cdef create(void * cmem):
mem = CLMem()
mem.mem = <cl_mem*> cmem
return mem
@property
def mem_address(self):
return <uintptr_t>(self.mem)
def cl_from_visionbuf(VisionBuf buf):
return CLMem.create(<void*>&buf.buf.buf_cl)
cdef class ModelFrame:
cdef cppModelFrame * frame
cdef int buf_size
def __dealloc__(self):
del self.frame
def prepare(self, VisionBuf buf, float[:] projection):
cdef mat3 cprojection
memcpy(cprojection.v, &projection[0], 9*sizeof(float))
cdef cl_mem * data
data = self.frame.prepare(buf.buf.buf_cl, buf.width, buf.height, buf.stride, buf.uv_offset, cprojection)
return CLMem.create(data)
def buffer_from_cl(self, CLMem in_frames):
cdef unsigned char * data2
data2 = self.frame.buffer_from_cl(in_frames.mem, self.buf_size)
return np.asarray(<cnp.uint8_t[:self.buf_size]> data2)
cdef class DrivingModelFrame(ModelFrame):
cdef cppDrivingModelFrame * _frame
def __cinit__(self, CLContext context, int buffer_length=2):
self._frame = new cppDrivingModelFrame(context.device_id, context.context, buffer_length)
self.frame = <cppModelFrame*>(self._frame)
self.buf_size = self._frame.buf_size
cdef class MonitoringModelFrame(ModelFrame):
cdef cppMonitoringModelFrame * _frame
def __cinit__(self, CLContext context):
self._frame = new cppMonitoringModelFrame(context.device_id, context.context)
self.frame = <cppModelFrame*>(self._frame)
self.buf_size = self._frame.buf_size

View File

@@ -0,0 +1,2 @@
fa69be01-b430-4504-9d72-7dcb058eb6dd
d9fb22d1c4fa3ca3d201dbc8edf1d0f0918e53e6

Binary file not shown.

Binary file not shown.

View File

@@ -0,0 +1,107 @@
import numpy as np
from openpilot.sunnypilot.modeld_v2.constants import ModelConstants
def safe_exp(x, out=None):
# -11 is around 10**14, more causes float16 overflow
return np.exp(np.clip(x, -np.inf, 11), out=out)
def sigmoid(x):
return 1. / (1. + safe_exp(-x))
def softmax(x, axis=-1):
x -= np.max(x, axis=axis, keepdims=True)
if x.dtype == np.float32 or x.dtype == np.float64:
safe_exp(x, out=x)
else:
x = safe_exp(x)
x /= np.sum(x, axis=axis, keepdims=True)
return x
class Parser:
def __init__(self, ignore_missing=False):
self.ignore_missing = ignore_missing
def check_missing(self, outs, name):
if name not in outs and not self.ignore_missing:
raise ValueError(f"Missing output {name}")
return name not in outs
def parse_categorical_crossentropy(self, name, outs, out_shape=None):
if self.check_missing(outs, name):
return
raw = outs[name]
if out_shape is not None:
raw = raw.reshape((raw.shape[0],) + out_shape)
outs[name] = softmax(raw, axis=-1)
def parse_binary_crossentropy(self, name, outs):
if self.check_missing(outs, name):
return
raw = outs[name]
outs[name] = sigmoid(raw)
def parse_mdn(self, name, outs, in_N=0, out_N=1, out_shape=None):
if self.check_missing(outs, name):
return
raw = outs[name]
raw = raw.reshape((raw.shape[0], max(in_N, 1), -1))
n_values = (raw.shape[2] - out_N)//2
pred_mu = raw[:,:,:n_values]
pred_std = safe_exp(raw[:,:,n_values: 2*n_values])
if in_N > 1:
weights = np.zeros((raw.shape[0], in_N, out_N), dtype=raw.dtype)
for i in range(out_N):
weights[:,:,i - out_N] = softmax(raw[:,:,i - out_N], axis=-1)
if out_N == 1:
for fidx in range(weights.shape[0]):
idxs = np.argsort(weights[fidx][:,0])[::-1]
weights[fidx] = weights[fidx][idxs]
pred_mu[fidx] = pred_mu[fidx][idxs]
pred_std[fidx] = pred_std[fidx][idxs]
full_shape = tuple([raw.shape[0], in_N] + list(out_shape))
outs[name + '_weights'] = weights
outs[name + '_hypotheses'] = pred_mu.reshape(full_shape)
outs[name + '_stds_hypotheses'] = pred_std.reshape(full_shape)
pred_mu_final = np.zeros((raw.shape[0], out_N, n_values), dtype=raw.dtype)
pred_std_final = np.zeros((raw.shape[0], out_N, n_values), dtype=raw.dtype)
for fidx in range(weights.shape[0]):
for hidx in range(out_N):
idxs = np.argsort(weights[fidx,:,hidx])[::-1]
pred_mu_final[fidx, hidx] = pred_mu[fidx, idxs[0]]
pred_std_final[fidx, hidx] = pred_std[fidx, idxs[0]]
else:
pred_mu_final = pred_mu
pred_std_final = pred_std
if out_N > 1:
final_shape = tuple([raw.shape[0], out_N] + list(out_shape))
else:
final_shape = tuple([raw.shape[0],] + list(out_shape))
outs[name] = pred_mu_final.reshape(final_shape)
outs[name + '_stds'] = pred_std_final.reshape(final_shape)
def parse_outputs(self, outs: dict[str, np.ndarray]) -> dict[str, np.ndarray]:
self.parse_mdn('plan', outs, in_N=ModelConstants.PLAN_MHP_N, out_N=ModelConstants.PLAN_MHP_SELECTION,
out_shape=(ModelConstants.IDX_N,ModelConstants.PLAN_WIDTH))
self.parse_mdn('lane_lines', outs, in_N=0, out_N=0, out_shape=(ModelConstants.NUM_LANE_LINES,ModelConstants.IDX_N,ModelConstants.LANE_LINES_WIDTH))
self.parse_mdn('road_edges', outs, in_N=0, out_N=0, out_shape=(ModelConstants.NUM_ROAD_EDGES,ModelConstants.IDX_N,ModelConstants.LANE_LINES_WIDTH))
self.parse_mdn('pose', outs, in_N=0, out_N=0, out_shape=(ModelConstants.POSE_WIDTH,))
self.parse_mdn('road_transform', outs, in_N=0, out_N=0, out_shape=(ModelConstants.POSE_WIDTH,))
if 'sim_pose' in outs:
self.parse_mdn('sim_pose', outs, in_N=0, out_N=0, out_shape=(ModelConstants.POSE_WIDTH,))
self.parse_mdn('wide_from_device_euler', outs, in_N=0, out_N=0, out_shape=(ModelConstants.WIDE_FROM_DEVICE_WIDTH,))
self.parse_mdn('lead', outs, in_N=ModelConstants.LEAD_MHP_N, out_N=ModelConstants.LEAD_MHP_SELECTION,
out_shape=(ModelConstants.LEAD_TRAJ_LEN,ModelConstants.LEAD_WIDTH))
if 'lat_planner_solution' in outs:
self.parse_mdn('lat_planner_solution', outs, in_N=0, out_N=0, out_shape=(ModelConstants.IDX_N,ModelConstants.LAT_PLANNER_SOLUTION_WIDTH))
if 'desired_curvature' in outs:
self.parse_mdn('desired_curvature', outs, in_N=0, out_N=0, out_shape=(ModelConstants.DESIRED_CURV_WIDTH,))
for k in ['lead_prob', 'lane_lines_prob', 'meta']:
self.parse_binary_crossentropy(k, outs)
self.parse_categorical_crossentropy('desire_state', outs, out_shape=(ModelConstants.DESIRE_PRED_WIDTH,))
self.parse_categorical_crossentropy('desire_pred', outs, out_shape=(ModelConstants.DESIRE_PRED_LEN,ModelConstants.DESIRE_PRED_WIDTH))
return outs

View File

@@ -0,0 +1,36 @@
import onnx
import onnxruntime as ort
import numpy as np
import itertools
ORT_TYPES_TO_NP_TYPES = {'tensor(float16)': np.float16, 'tensor(float)': np.float32, 'tensor(uint8)': np.uint8}
def attributeproto_fp16_to_fp32(attr):
float32_list = np.frombuffer(attr.raw_data, dtype=np.float16)
attr.data_type = 1
attr.raw_data = float32_list.astype(np.float32).tobytes()
def convert_fp16_to_fp32(model):
for i in model.graph.initializer:
if i.data_type == 10:
attributeproto_fp16_to_fp32(i)
for i in itertools.chain(model.graph.input, model.graph.output):
if i.type.tensor_type.elem_type == 10:
i.type.tensor_type.elem_type = 1
for i in model.graph.node:
if i.op_type == 'Cast' and i.attribute[0].i == 10:
i.attribute[0].i = 1
for a in i.attribute:
if hasattr(a, 't'):
if a.t.data_type == 10:
attributeproto_fp16_to_fp32(a.t)
return model.SerializeToString()
def make_onnx_cpu_runner(model_path):
options = ort.SessionOptions()
options.intra_op_num_threads = 4
options.execution_mode = ort.ExecutionMode.ORT_SEQUENTIAL
options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
model_data = convert_fp16_to_fp32(onnx.load(model_path))
return ort.InferenceSession(model_data, options, providers=['CPUExecutionProvider'])

View File

@@ -0,0 +1,8 @@
from tinygrad.tensor import Tensor
from tinygrad.helpers import to_mv
def qcom_tensor_from_opencl_address(opencl_address, shape, dtype):
cl_buf_desc_ptr = to_mv(opencl_address, 8).cast('Q')[0]
rawbuf_ptr = to_mv(cl_buf_desc_ptr, 0x100).cast('Q')[20] # offset 0xA0 is a raw gpu pointer.
return Tensor.from_blob(rawbuf_ptr, shape, dtype=dtype, device='QCOM')

View File

View File

@@ -0,0 +1,101 @@
// clang++ -O2 repro.cc && ./a.out
#include <sched.h>
#include <sys/types.h>
#include <unistd.h>
#include <cstdint>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <ctime>
static inline double millis_since_boot() {
struct timespec t;
clock_gettime(CLOCK_BOOTTIME, &t);
return t.tv_sec * 1000.0 + t.tv_nsec * 1e-6;
}
#define MODEL_WIDTH 320
#define MODEL_HEIGHT 640
// null function still breaks it
#define input_lambda(x) x
// this is copied from models/dmonitoring.cc, and is the code that triggers the issue
void inner(uint8_t *resized_buf, float *net_input_buf) {
int resized_width = MODEL_WIDTH;
int resized_height = MODEL_HEIGHT;
// one shot conversion, O(n) anyway
// yuvframe2tensor, normalize
for (int r = 0; r < MODEL_HEIGHT/2; r++) {
for (int c = 0; c < MODEL_WIDTH/2; c++) {
// Y_ul
net_input_buf[(c*MODEL_HEIGHT/2) + r] = input_lambda(resized_buf[(2*r*resized_width) + (2*c)]);
// Y_ur
net_input_buf[(c*MODEL_HEIGHT/2) + r + (2*(MODEL_WIDTH/2)*(MODEL_HEIGHT/2))] = input_lambda(resized_buf[(2*r*resized_width) + (2*c+1)]);
// Y_dl
net_input_buf[(c*MODEL_HEIGHT/2) + r + ((MODEL_WIDTH/2)*(MODEL_HEIGHT/2))] = input_lambda(resized_buf[(2*r*resized_width+1) + (2*c)]);
// Y_dr
net_input_buf[(c*MODEL_HEIGHT/2) + r + (3*(MODEL_WIDTH/2)*(MODEL_HEIGHT/2))] = input_lambda(resized_buf[(2*r*resized_width+1) + (2*c+1)]);
// U
net_input_buf[(c*MODEL_HEIGHT/2) + r + (4*(MODEL_WIDTH/2)*(MODEL_HEIGHT/2))] = input_lambda(resized_buf[(resized_width*resized_height) + (r*resized_width/2) + c]);
// V
net_input_buf[(c*MODEL_HEIGHT/2) + r + (5*(MODEL_WIDTH/2)*(MODEL_HEIGHT/2))] = input_lambda(resized_buf[(resized_width*resized_height) + ((resized_width/2)*(resized_height/2)) + (r*resized_width/2) + c]);
}
}
}
float trial() {
int resized_width = MODEL_WIDTH;
int resized_height = MODEL_HEIGHT;
int yuv_buf_len = (MODEL_WIDTH/2) * (MODEL_HEIGHT/2) * 6; // Y|u|v -> y|y|y|y|u|v
// allocate the buffers
uint8_t *resized_buf = (uint8_t*)malloc(resized_width*resized_height*3/2);
float *net_input_buf = (float*)malloc(yuv_buf_len*sizeof(float));
printf("allocate -- %p 0x%x -- %p 0x%lx\n", resized_buf, resized_width*resized_height*3/2, net_input_buf, yuv_buf_len*sizeof(float));
// test for bad buffers
static int CNT = 20;
float avg = 0.0;
for (int i = 0; i < CNT; i++) {
double s4 = millis_since_boot();
inner(resized_buf, net_input_buf);
double s5 = millis_since_boot();
avg += s5-s4;
}
avg /= CNT;
// once it's bad, it's reliably bad
if (avg > 10) {
printf("HIT %f\n", avg);
printf("BAD\n");
for (int i = 0; i < 200; i++) {
double s4 = millis_since_boot();
inner(resized_buf, net_input_buf);
double s5 = millis_since_boot();
printf("%.2f ", s5-s4);
}
printf("\n");
exit(0);
}
// don't free so we get a different buffer each time
//free(resized_buf);
//free(net_input_buf);
return avg;
}
int main() {
while (true) {
float ret = trial();
printf("got %f\n", ret);
}
}

View File

@@ -0,0 +1,102 @@
import numpy as np
import random
import cereal.messaging as messaging
from msgq.visionipc import VisionIpcServer, VisionStreamType
from opendbc.car.car_helpers import get_demo_car_params
from openpilot.common.params import Params
from openpilot.common.transformations.camera import DEVICE_CAMERAS
from openpilot.common.realtime import DT_MDL
from openpilot.system.manager.process_config import managed_processes
from openpilot.selfdrive.test.process_replay.vision_meta import meta_from_camera_state
CAM = DEVICE_CAMERAS[("tici", "ar0231")].fcam
IMG = np.zeros(int(CAM.width*CAM.height*(3/2)), dtype=np.uint8)
IMG_BYTES = IMG.flatten().tobytes()
class TestModeld:
def setup_method(self):
self.vipc_server = VisionIpcServer("camerad")
self.vipc_server.create_buffers(VisionStreamType.VISION_STREAM_ROAD, 40, CAM.width, CAM.height)
self.vipc_server.create_buffers(VisionStreamType.VISION_STREAM_DRIVER, 40, CAM.width, CAM.height)
self.vipc_server.create_buffers(VisionStreamType.VISION_STREAM_WIDE_ROAD, 40, CAM.width, CAM.height)
self.vipc_server.start_listener()
Params().put("CarParams", get_demo_car_params().to_bytes())
self.sm = messaging.SubMaster(['modelV2', 'cameraOdometry'])
self.pm = messaging.PubMaster(['roadCameraState', 'wideRoadCameraState', 'liveCalibration'])
managed_processes['modeld'].start()
self.pm.wait_for_readers_to_update("roadCameraState", 10)
def teardown_method(self):
managed_processes['modeld'].stop()
del self.vipc_server
def _send_frames(self, frame_id, cams=None):
if cams is None:
cams = ('roadCameraState', 'wideRoadCameraState')
cs = None
for cam in cams:
msg = messaging.new_message(cam)
cs = getattr(msg, cam)
cs.frameId = frame_id
cs.timestampSof = int((frame_id * DT_MDL) * 1e9)
cs.timestampEof = int(cs.timestampSof + (DT_MDL * 1e9))
cam_meta = meta_from_camera_state(cam)
self.pm.send(msg.which(), msg)
self.vipc_server.send(cam_meta.stream, IMG_BYTES, cs.frameId,
cs.timestampSof, cs.timestampEof)
return cs
def _wait(self):
self.sm.update(5000)
if self.sm['modelV2'].frameId != self.sm['cameraOdometry'].frameId:
self.sm.update(1000)
def test_modeld(self):
for n in range(1, 500):
cs = self._send_frames(n)
self._wait()
mdl = self.sm['modelV2']
assert mdl.frameId == n
assert mdl.frameIdExtra == n
assert mdl.timestampEof == cs.timestampEof
assert mdl.frameAge == 0
assert mdl.frameDropPerc == 0
odo = self.sm['cameraOdometry']
assert odo.frameId == n
assert odo.timestampEof == cs.timestampEof
def test_dropped_frames(self):
"""
modeld should only run on consecutive road frames
"""
frame_id = -1
road_frames = list()
for n in range(1, 50):
if (random.random() < 0.1) and n > 3:
cams = random.choice([(), ('wideRoadCameraState', )])
self._send_frames(n, cams)
else:
self._send_frames(n)
road_frames.append(n)
self._wait()
if len(road_frames) < 3 or road_frames[-1] - road_frames[-2] == 1:
frame_id = road_frames[-1]
mdl = self.sm['modelV2']
odo = self.sm['cameraOdometry']
assert mdl.frameId == frame_id
assert mdl.frameIdExtra == frame_id
assert odo.frameId == frame_id
if n != frame_id:
assert not self.sm.updated['modelV2']
assert not self.sm.updated['cameraOdometry']

View File

@@ -0,0 +1,2 @@
#!/usr/bin/env bash
clang++ -I /home/batman/one/external/tensorflow/include/ -L /home/batman/one/external/tensorflow/lib -Wl,-rpath=/home/batman/one/external/tensorflow/lib main.cc -ltensorflow

View File

@@ -0,0 +1,69 @@
#include <cassert>
#include <cstdio>
#include <cstdlib>
#include "tensorflow/c/c_api.h"
void* read_file(const char* path, size_t* out_len) {
FILE* f = fopen(path, "r");
if (!f) {
return NULL;
}
fseek(f, 0, SEEK_END);
long f_len = ftell(f);
rewind(f);
char* buf = (char*)calloc(f_len, 1);
assert(buf);
size_t num_read = fread(buf, f_len, 1, f);
fclose(f);
if (num_read != 1) {
free(buf);
return NULL;
}
if (out_len) {
*out_len = f_len;
}
return buf;
}
static void DeallocateBuffer(void* data, size_t) {
free(data);
}
int main(int argc, char* argv[]) {
TF_Buffer* buf;
TF_Graph* graph;
TF_Status* status;
char *path = argv[1];
// load model
{
size_t model_size;
char tmp[1024];
snprintf(tmp, sizeof(tmp), "%s.pb", path);
printf("loading model %s\n", tmp);
uint8_t *model_data = (uint8_t *)read_file(tmp, &model_size);
buf = TF_NewBuffer();
buf->data = model_data;
buf->length = model_size;
buf->data_deallocator = DeallocateBuffer;
printf("loaded model of size %d\n", model_size);
}
// import graph
status = TF_NewStatus();
graph = TF_NewGraph();
TF_ImportGraphDefOptions *opts = TF_NewImportGraphDefOptions();
TF_GraphImportGraphDef(graph, buf, opts, status);
TF_DeleteImportGraphDefOptions(opts);
TF_DeleteBuffer(buf);
if (TF_GetCode(status) != TF_OK) {
printf("FAIL: %s\n", TF_Message(status));
} else {
printf("SUCCESS\n");
}
}

View File

@@ -0,0 +1,8 @@
#!/usr/bin/env python3
import sys
import tensorflow as tf
with open(sys.argv[1], "rb") as f:
graph_def = tf.compat.v1.GraphDef()
graph_def.ParseFromString(f.read())
#tf.io.write_graph(graph_def, '', sys.argv[1]+".try")

View File

@@ -0,0 +1,39 @@
#!/usr/bin/env python3
# type: ignore
import os
import time
import numpy as np
import cereal.messaging as messaging
from openpilot.system.manager.process_config import managed_processes
N = int(os.getenv("N", "5"))
TIME = int(os.getenv("TIME", "30"))
if __name__ == "__main__":
sock = messaging.sub_sock('modelV2', conflate=False, timeout=1000)
execution_times = []
for _ in range(N):
os.environ['LOGPRINT'] = 'debug'
managed_processes['modeld'].start()
time.sleep(5)
t = []
start = time.monotonic()
while time.monotonic() - start < TIME:
msgs = messaging.drain_sock(sock, wait_for_one=True)
for m in msgs:
t.append(m.modelV2.modelExecutionTime)
execution_times.append(np.array(t[10:]) * 1000)
managed_processes['modeld'].stop()
print("\n\n")
print(f"ran modeld {N} times for {TIME}s each")
for _, t in enumerate(execution_times):
print(f"\tavg: {sum(t)/len(t):0.2f}ms, min: {min(t):0.2f}ms, max: {max(t):0.2f}ms")
print("\n\n")

View File

@@ -0,0 +1,76 @@
#include "sunnypilot/modeld_v2/transforms/loadyuv.h"
#include <cassert>
#include <cstdio>
#include <cstring>
void loadyuv_init(LoadYUVState* s, cl_context ctx, cl_device_id device_id, int width, int height) {
memset(s, 0, sizeof(*s));
s->width = width;
s->height = height;
char args[1024];
snprintf(args, sizeof(args),
"-cl-fast-relaxed-math -cl-denorms-are-zero "
"-DTRANSFORMED_WIDTH=%d -DTRANSFORMED_HEIGHT=%d",
width, height);
cl_program prg = cl_program_from_file(ctx, device_id, LOADYUV_PATH, args);
s->loadys_krnl = CL_CHECK_ERR(clCreateKernel(prg, "loadys", &err));
s->loaduv_krnl = CL_CHECK_ERR(clCreateKernel(prg, "loaduv", &err));
s->copy_krnl = CL_CHECK_ERR(clCreateKernel(prg, "copy", &err));
// done with this
CL_CHECK(clReleaseProgram(prg));
}
void loadyuv_destroy(LoadYUVState* s) {
CL_CHECK(clReleaseKernel(s->loadys_krnl));
CL_CHECK(clReleaseKernel(s->loaduv_krnl));
CL_CHECK(clReleaseKernel(s->copy_krnl));
}
void loadyuv_queue(LoadYUVState* s, cl_command_queue q,
cl_mem y_cl, cl_mem u_cl, cl_mem v_cl,
cl_mem out_cl) {
cl_int global_out_off = 0;
CL_CHECK(clSetKernelArg(s->loadys_krnl, 0, sizeof(cl_mem), &y_cl));
CL_CHECK(clSetKernelArg(s->loadys_krnl, 1, sizeof(cl_mem), &out_cl));
CL_CHECK(clSetKernelArg(s->loadys_krnl, 2, sizeof(cl_int), &global_out_off));
const size_t loadys_work_size = (s->width*s->height)/8;
CL_CHECK(clEnqueueNDRangeKernel(q, s->loadys_krnl, 1, NULL,
&loadys_work_size, NULL, 0, 0, NULL));
const size_t loaduv_work_size = ((s->width/2)*(s->height/2))/8;
global_out_off += (s->width*s->height);
CL_CHECK(clSetKernelArg(s->loaduv_krnl, 0, sizeof(cl_mem), &u_cl));
CL_CHECK(clSetKernelArg(s->loaduv_krnl, 1, sizeof(cl_mem), &out_cl));
CL_CHECK(clSetKernelArg(s->loaduv_krnl, 2, sizeof(cl_int), &global_out_off));
CL_CHECK(clEnqueueNDRangeKernel(q, s->loaduv_krnl, 1, NULL,
&loaduv_work_size, NULL, 0, 0, NULL));
global_out_off += (s->width/2)*(s->height/2);
CL_CHECK(clSetKernelArg(s->loaduv_krnl, 0, sizeof(cl_mem), &v_cl));
CL_CHECK(clSetKernelArg(s->loaduv_krnl, 1, sizeof(cl_mem), &out_cl));
CL_CHECK(clSetKernelArg(s->loaduv_krnl, 2, sizeof(cl_int), &global_out_off));
CL_CHECK(clEnqueueNDRangeKernel(q, s->loaduv_krnl, 1, NULL,
&loaduv_work_size, NULL, 0, 0, NULL));
}
void copy_queue(LoadYUVState* s, cl_command_queue q, cl_mem src, cl_mem dst,
size_t src_offset, size_t dst_offset, size_t size) {
CL_CHECK(clSetKernelArg(s->copy_krnl, 0, sizeof(cl_mem), &src));
CL_CHECK(clSetKernelArg(s->copy_krnl, 1, sizeof(cl_mem), &dst));
CL_CHECK(clSetKernelArg(s->copy_krnl, 2, sizeof(cl_int), &src_offset));
CL_CHECK(clSetKernelArg(s->copy_krnl, 3, sizeof(cl_int), &dst_offset));
const size_t copy_work_size = size/8;
CL_CHECK(clEnqueueNDRangeKernel(q, s->copy_krnl, 1, NULL,
&copy_work_size, NULL, 0, 0, NULL));
}

View File

@@ -0,0 +1,47 @@
#define UV_SIZE ((TRANSFORMED_WIDTH/2)*(TRANSFORMED_HEIGHT/2))
__kernel void loadys(__global uchar8 const * const Y,
__global uchar * out,
int out_offset)
{
const int gid = get_global_id(0);
const int ois = gid * 8;
const int oy = ois / TRANSFORMED_WIDTH;
const int ox = ois % TRANSFORMED_WIDTH;
const uchar8 ys = Y[gid];
// 02
// 13
__global uchar* outy0;
__global uchar* outy1;
if ((oy & 1) == 0) {
outy0 = out + out_offset; //y0
outy1 = out + out_offset + UV_SIZE*2; //y2
} else {
outy0 = out + out_offset + UV_SIZE; //y1
outy1 = out + out_offset + UV_SIZE*3; //y3
}
vstore4(ys.s0246, 0, outy0 + (oy/2) * (TRANSFORMED_WIDTH/2) + ox/2);
vstore4(ys.s1357, 0, outy1 + (oy/2) * (TRANSFORMED_WIDTH/2) + ox/2);
}
__kernel void loaduv(__global uchar8 const * const in,
__global uchar8 * out,
int out_offset)
{
const int gid = get_global_id(0);
const uchar8 inv = in[gid];
out[gid + out_offset / 8] = inv;
}
__kernel void copy(__global uchar8 * in,
__global uchar8 * out,
int in_offset,
int out_offset)
{
const int gid = get_global_id(0);
out[gid + out_offset / 8] = in[gid + in_offset / 8];
}

View File

@@ -0,0 +1,20 @@
#pragma once
#include "common/clutil.h"
typedef struct {
int width, height;
cl_kernel loadys_krnl, loaduv_krnl, copy_krnl;
} LoadYUVState;
void loadyuv_init(LoadYUVState* s, cl_context ctx, cl_device_id device_id, int width, int height);
void loadyuv_destroy(LoadYUVState* s);
void loadyuv_queue(LoadYUVState* s, cl_command_queue q,
cl_mem y_cl, cl_mem u_cl, cl_mem v_cl,
cl_mem out_cl);
void copy_queue(LoadYUVState* s, cl_command_queue q, cl_mem src, cl_mem dst,
size_t src_offset, size_t dst_offset, size_t size);

View File

@@ -0,0 +1,97 @@
#include "sunnypilot/modeld_v2/transforms/transform.h"
#include <cassert>
#include <cstring>
#include "common/clutil.h"
void transform_init(Transform* s, cl_context ctx, cl_device_id device_id) {
memset(s, 0, sizeof(*s));
cl_program prg = cl_program_from_file(ctx, device_id, TRANSFORM_PATH, "");
s->krnl = CL_CHECK_ERR(clCreateKernel(prg, "warpPerspective", &err));
// done with this
CL_CHECK(clReleaseProgram(prg));
s->m_y_cl = CL_CHECK_ERR(clCreateBuffer(ctx, CL_MEM_READ_WRITE, 3*3*sizeof(float), NULL, &err));
s->m_uv_cl = CL_CHECK_ERR(clCreateBuffer(ctx, CL_MEM_READ_WRITE, 3*3*sizeof(float), NULL, &err));
}
void transform_destroy(Transform* s) {
CL_CHECK(clReleaseMemObject(s->m_y_cl));
CL_CHECK(clReleaseMemObject(s->m_uv_cl));
CL_CHECK(clReleaseKernel(s->krnl));
}
void transform_queue(Transform* s,
cl_command_queue q,
cl_mem in_yuv, int in_width, int in_height, int in_stride, int in_uv_offset,
cl_mem out_y, cl_mem out_u, cl_mem out_v,
int out_width, int out_height,
const mat3& projection) {
const int zero = 0;
// sampled using pixel center origin
// (because that's how fastcv and opencv does it)
mat3 projection_y = projection;
// in and out uv is half the size of y.
mat3 projection_uv = transform_scale_buffer(projection, 0.5);
CL_CHECK(clEnqueueWriteBuffer(q, s->m_y_cl, CL_TRUE, 0, 3*3*sizeof(float), (void*)projection_y.v, 0, NULL, NULL));
CL_CHECK(clEnqueueWriteBuffer(q, s->m_uv_cl, CL_TRUE, 0, 3*3*sizeof(float), (void*)projection_uv.v, 0, NULL, NULL));
const int in_y_width = in_width;
const int in_y_height = in_height;
const int in_y_px_stride = 1;
const int in_uv_width = in_width/2;
const int in_uv_height = in_height/2;
const int in_uv_px_stride = 2;
const int in_u_offset = in_uv_offset;
const int in_v_offset = in_uv_offset + 1;
const int out_y_width = out_width;
const int out_y_height = out_height;
const int out_uv_width = out_width/2;
const int out_uv_height = out_height/2;
CL_CHECK(clSetKernelArg(s->krnl, 0, sizeof(cl_mem), &in_yuv)); // src
CL_CHECK(clSetKernelArg(s->krnl, 1, sizeof(cl_int), &in_stride)); // src_row_stride
CL_CHECK(clSetKernelArg(s->krnl, 2, sizeof(cl_int), &in_y_px_stride)); // src_px_stride
CL_CHECK(clSetKernelArg(s->krnl, 3, sizeof(cl_int), &zero)); // src_offset
CL_CHECK(clSetKernelArg(s->krnl, 4, sizeof(cl_int), &in_y_height)); // src_rows
CL_CHECK(clSetKernelArg(s->krnl, 5, sizeof(cl_int), &in_y_width)); // src_cols
CL_CHECK(clSetKernelArg(s->krnl, 6, sizeof(cl_mem), &out_y)); // dst
CL_CHECK(clSetKernelArg(s->krnl, 7, sizeof(cl_int), &out_y_width)); // dst_row_stride
CL_CHECK(clSetKernelArg(s->krnl, 8, sizeof(cl_int), &zero)); // dst_offset
CL_CHECK(clSetKernelArg(s->krnl, 9, sizeof(cl_int), &out_y_height)); // dst_rows
CL_CHECK(clSetKernelArg(s->krnl, 10, sizeof(cl_int), &out_y_width)); // dst_cols
CL_CHECK(clSetKernelArg(s->krnl, 11, sizeof(cl_mem), &s->m_y_cl)); // M
const size_t work_size_y[2] = {(size_t)out_y_width, (size_t)out_y_height};
CL_CHECK(clEnqueueNDRangeKernel(q, s->krnl, 2, NULL,
(const size_t*)&work_size_y, NULL, 0, 0, NULL));
const size_t work_size_uv[2] = {(size_t)out_uv_width, (size_t)out_uv_height};
CL_CHECK(clSetKernelArg(s->krnl, 2, sizeof(cl_int), &in_uv_px_stride)); // src_px_stride
CL_CHECK(clSetKernelArg(s->krnl, 3, sizeof(cl_int), &in_u_offset)); // src_offset
CL_CHECK(clSetKernelArg(s->krnl, 4, sizeof(cl_int), &in_uv_height)); // src_rows
CL_CHECK(clSetKernelArg(s->krnl, 5, sizeof(cl_int), &in_uv_width)); // src_cols
CL_CHECK(clSetKernelArg(s->krnl, 6, sizeof(cl_mem), &out_u)); // dst
CL_CHECK(clSetKernelArg(s->krnl, 7, sizeof(cl_int), &out_uv_width)); // dst_row_stride
CL_CHECK(clSetKernelArg(s->krnl, 8, sizeof(cl_int), &zero)); // dst_offset
CL_CHECK(clSetKernelArg(s->krnl, 9, sizeof(cl_int), &out_uv_height)); // dst_rows
CL_CHECK(clSetKernelArg(s->krnl, 10, sizeof(cl_int), &out_uv_width)); // dst_cols
CL_CHECK(clSetKernelArg(s->krnl, 11, sizeof(cl_mem), &s->m_uv_cl)); // M
CL_CHECK(clEnqueueNDRangeKernel(q, s->krnl, 2, NULL,
(const size_t*)&work_size_uv, NULL, 0, 0, NULL));
CL_CHECK(clSetKernelArg(s->krnl, 3, sizeof(cl_int), &in_v_offset)); // src_ofset
CL_CHECK(clSetKernelArg(s->krnl, 6, sizeof(cl_mem), &out_v)); // dst
CL_CHECK(clEnqueueNDRangeKernel(q, s->krnl, 2, NULL,
(const size_t*)&work_size_uv, NULL, 0, 0, NULL));
}

View File

@@ -0,0 +1,54 @@
#define INTER_BITS 5
#define INTER_TAB_SIZE (1 << INTER_BITS)
#define INTER_SCALE 1.f / INTER_TAB_SIZE
#define INTER_REMAP_COEF_BITS 15
#define INTER_REMAP_COEF_SCALE (1 << INTER_REMAP_COEF_BITS)
__kernel void warpPerspective(__global const uchar * src,
int src_row_stride, int src_px_stride, int src_offset, int src_rows, int src_cols,
__global uchar * dst,
int dst_row_stride, int dst_offset, int dst_rows, int dst_cols,
__constant float * M)
{
int dx = get_global_id(0);
int dy = get_global_id(1);
if (dx < dst_cols && dy < dst_rows)
{
float X0 = M[0] * dx + M[1] * dy + M[2];
float Y0 = M[3] * dx + M[4] * dy + M[5];
float W = M[6] * dx + M[7] * dy + M[8];
W = W != 0.0f ? INTER_TAB_SIZE / W : 0.0f;
int X = rint(X0 * W), Y = rint(Y0 * W);
int sx = convert_short_sat(X >> INTER_BITS);
int sy = convert_short_sat(Y >> INTER_BITS);
short sx_clamp = clamp(sx, 0, src_cols - 1);
short sx_p1_clamp = clamp(sx + 1, 0, src_cols - 1);
short sy_clamp = clamp(sy, 0, src_rows - 1);
short sy_p1_clamp = clamp(sy + 1, 0, src_rows - 1);
int v0 = convert_int(src[mad24(sy_clamp, src_row_stride, src_offset + sx_clamp*src_px_stride)]);
int v1 = convert_int(src[mad24(sy_clamp, src_row_stride, src_offset + sx_p1_clamp*src_px_stride)]);
int v2 = convert_int(src[mad24(sy_p1_clamp, src_row_stride, src_offset + sx_clamp*src_px_stride)]);
int v3 = convert_int(src[mad24(sy_p1_clamp, src_row_stride, src_offset + sx_p1_clamp*src_px_stride)]);
short ay = (short)(Y & (INTER_TAB_SIZE - 1));
short ax = (short)(X & (INTER_TAB_SIZE - 1));
float taby = 1.f/INTER_TAB_SIZE*ay;
float tabx = 1.f/INTER_TAB_SIZE*ax;
int dst_index = mad24(dy, dst_row_stride, dst_offset + dx);
int itab0 = convert_short_sat_rte( (1.0f-taby)*(1.0f-tabx) * INTER_REMAP_COEF_SCALE );
int itab1 = convert_short_sat_rte( (1.0f-taby)*tabx * INTER_REMAP_COEF_SCALE );
int itab2 = convert_short_sat_rte( taby*(1.0f-tabx) * INTER_REMAP_COEF_SCALE );
int itab3 = convert_short_sat_rte( taby*tabx * INTER_REMAP_COEF_SCALE );
int val = v0 * itab0 + v1 * itab1 + v2 * itab2 + v3 * itab3;
uchar pix = convert_uchar_sat((val + (1 << (INTER_REMAP_COEF_BITS-1))) >> INTER_REMAP_COEF_BITS);
dst[dst_index] = pix;
}
}

View File

@@ -0,0 +1,25 @@
#pragma once
#define CL_USE_DEPRECATED_OPENCL_1_2_APIS
#ifdef __APPLE__
#include <OpenCL/cl.h>
#else
#include <CL/cl.h>
#endif
#include "common/mat.h"
typedef struct {
cl_kernel krnl;
cl_mem m_y_cl, m_uv_cl;
} Transform;
void transform_init(Transform* s, cl_context ctx, cl_device_id device_id);
void transform_destroy(Transform* transform);
void transform_queue(Transform* s, cl_command_queue q,
cl_mem yuv, int in_width, int in_height, int in_stride, int in_uv_offset,
cl_mem out_y, cl_mem out_u, cl_mem out_v,
int out_width, int out_height,
const mat3& projection);

View File

@@ -60,7 +60,7 @@ def get_active_model_runner(params: Params = None, force_check=False) -> custom.
if isinstance(cached_runner_type, str) and cached_runner_type.isdigit():
return int(cached_runner_type)
runner_type = custom.ModelManagerSP.Runner.tinygrad
runner_type = custom.ModelManagerSP.Runner.stock
if active_bundle := get_active_bundle(params):
runner_type = active_bundle.runner.raw

View File

@@ -76,9 +76,13 @@ def is_snpe_model(started, params, CP: car.CarParams) -> bool:
"""Check if the active model runner is SNPE."""
return bool(get_active_model_runner(params, not started) == custom.ModelManagerSP.Runner.snpe)
def is_tinygrad_model(started, params, CP: car.CarParams) -> bool:
"""Check if the active model runner is SNPE."""
return bool(get_active_model_runner(params, not started) == custom.ModelManagerSP.Runner.tinygrad)
def is_stock_model(started, params, CP: car.CarParams) -> bool:
"""Check if the active model runner is stock."""
return not is_snpe_model(started, params, CP)
return bool(get_active_model_runner(params, not started) == custom.ModelManagerSP.Runner.stock)
def or_(*fns):
return lambda *args: operator.or_(*(fn(*args) for fn in fns))
@@ -146,6 +150,7 @@ procs = [
procs += [
PythonProcess("models_manager", "sunnypilot.models.manager", only_offroad),
NativeProcess("modeld_snpe", "sunnypilot/modeld", ["./modeld"], and_(only_onroad, is_snpe_model)),
NativeProcess("modeld_tinygrad", "sunnypilot/modeld_v2", ["./modeld"], and_(only_onroad, is_tinygrad_model)),
]
if os.path.exists("./github_runner.sh"):