StarPilot/selfdrive/modeld/modeld_v16.py

#!/usr/bin/env python3
from __future__ import annotations

import os
import pickle
import time
from pathlib import Path

from openpilot.system.hardware import TICI

os.environ["GMMU"] = "0"  # noop on qcom, improves load path when a USB GPU is present
os.environ["DEV"] = "QCOM" if TICI else "LLVM"

import cereal.messaging as messaging
import numpy as np
from cereal import car, log
from msgq.visionipc import VisionBuf, VisionIpcClient, VisionStreamType
from opendbc.car.car_helpers import get_demo_car_params
from setproctitle import setproctitle
from tinygrad.dtype import dtypes
from tinygrad.engine.jit import get_out_buffers_for_ei
from tinygrad.tensor import Tensor

from openpilot.common.file_chunker import read_file_chunked
from openpilot.common.filter_simple import FirstOrderFilter
from openpilot.common.params import Params
from openpilot.common.realtime import DT_MDL, config_realtime_process
from openpilot.common.swaglog import cloudlog
from openpilot.common.transformations.camera import DEVICE_CAMERAS
from openpilot.common.transformations.model import get_warp_matrix
from openpilot.selfdrive.controls.lib.desire_helper import DesireHelper
from openpilot.selfdrive.controls.lib.drive_helpers import get_accel_from_plan, get_curvature_from_plan, smooth_value
from openpilot.selfdrive.modeld.camera_offset import CameraOffset, DEFAULT_CAMERA_HEIGHT
from openpilot.selfdrive.modeld.compile_modeld import POLICY_INPUTS, make_input_queues
from openpilot.selfdrive.modeld.constants import ModelConstants, Plan
from openpilot.selfdrive.modeld.fill_model_msg import PublishState, fill_model_msg, fill_pose_msg
from openpilot.selfdrive.modeld.helpers import get_tg_input_devices
from openpilot.selfdrive.modeld.models.commonmodel_pyx import CLContext, DrivingModelFrame
from openpilot.selfdrive.modeld.parse_model_outputs import Parser
from openpilot.selfdrive.modeld.runners.tinygrad_helpers import qcom_tensor_from_opencl_address
from openpilot.starpilot.assets.model_manager import ModelManager
from openpilot.starpilot.common.model_versions import uses_combined_driving_artifacts
from openpilot.starpilot.common.starpilot_variables import MODELS_PATH, get_starpilot_toggles, params_memory
from openpilot.system import sentry


PROCESS_NAME = "selfdrive.modeld.modeld"
SEND_RAW_PRED = os.getenv("SEND_RAW_PRED")

BUILTIN_MODEL_KEY = "sc2"
BUILTIN_MODEL_ALIASES = {BUILTIN_MODEL_KEY, "sc"}

LAT_SMOOTH_SECONDS = 0.0
LONG_SMOOTH_SECONDS = 0.3
MIN_LAT_CONTROL_SPEED = 0.3


def _get_param_str(params: Params, key: str, default: str = "") -> str:
  try:
    value = params.get(key)
  except Exception:
    return default
  if value is None:
    return default
  if isinstance(value, bytes):
    try:
      return value.decode("utf-8")
    except Exception:
      return default
  if isinstance(value, (dict, list)):
    return default
  return str(value)


def _get_default_param_str(params: Params, key: str) -> str:
  try:
    value = params.get_default_value(key)
  except Exception:
    return ""
  if value is None:
    return ""
  if isinstance(value, bytes):
    try:
      return value.decode("utf-8")
    except Exception:
      return ""
  return str(value)


def _resolve_mirrored_param(params: Params, primary_key: str, secondary_key: str) -> str:
  primary_val = _get_param_str(params, primary_key).strip()
  secondary_val = _get_param_str(params, secondary_key).strip()
  if primary_val == secondary_val:
    return secondary_val or primary_val

  primary_default = _get_default_param_str(params, primary_key).strip()
  secondary_default = _get_default_param_str(params, secondary_key).strip()
  primary_non_default = bool(primary_val) and primary_val != primary_default
  secondary_non_default = bool(secondary_val) and secondary_val != secondary_default

  if secondary_non_default:
    return secondary_val
  if primary_non_default:
    return primary_val
  return secondary_val or primary_val


def _canonical_model_id(model_id: str) -> str:
  key = (model_id or "").strip().lower()
  return BUILTIN_MODEL_KEY if key in BUILTIN_MODEL_ALIASES else key


def _combined_model_path(model_id: str, use_builtin_model: bool) -> Path:
  if use_builtin_model:
    return Path(__file__).parent / "models" / "driving_tinygrad.pkl"
  return MODELS_PATH / f"{model_id}_driving_tinygrad.pkl"


def get_action_from_model(model_output: dict[str, np.ndarray], prev_action: log.ModelDataV2.Action, v_ego: float) -> log.ModelDataV2.Action:
  if "action" in model_output:
    desired_curv_unscaled, desired_accel = model_output["action"][0]
    desired_curvature = float(desired_curv_unscaled) / max(1.0, v_ego) ** 2
    should_stop = (v_ego < 0.3 and desired_accel < 0.1)
  else:
    plan = model_output["plan"][0]
    desired_accel, should_stop = get_accel_from_plan(
      plan[:, Plan.VELOCITY][:, 0],
      plan[:, Plan.ACCELERATION][:, 0],
      ModelConstants.T_IDXS,
      action_t=DT_MDL,
    )
    desired_curvature = get_curvature_from_plan(
      plan[:, Plan.T_FROM_CURRENT_EULER][:, 2],
      plan[:, Plan.ORIENTATION_RATE][:, 2],
      ModelConstants.T_IDXS,
      v_ego,
      DT_MDL,
    )

  desired_accel = smooth_value(float(desired_accel), prev_action.desiredAcceleration, LONG_SMOOTH_SECONDS)
  if v_ego > MIN_LAT_CONTROL_SPEED:
    desired_curvature = smooth_value(desired_curvature, prev_action.desiredCurvature, LAT_SMOOTH_SECONDS)
  else:
    desired_curvature = prev_action.desiredCurvature

  return log.ModelDataV2.Action(
    desiredCurvature=float(desired_curvature),
    desiredAcceleration=float(desired_accel),
    shouldStop=bool(should_stop),
  )


class FrameMeta:
  frame_id: int = 0
  timestamp_sof: int = 0
  timestamp_eof: int = 0

  def __init__(self, vipc=None):
    if vipc is not None:
      self.frame_id, self.timestamp_sof, self.timestamp_eof = vipc.frame_id, vipc.timestamp_sof, vipc.timestamp_eof


class ModelState:
  prev_desire: np.ndarray

  def __init__(self, context: CLContext, usbgpu: bool):
    params = Params()
    model_id_raw = _resolve_mirrored_param(params, "Model", "DrivingModel") or BUILTIN_MODEL_KEY
    self.model_id = _canonical_model_id(model_id_raw)
    self.model_version = _resolve_mirrored_param(params, "ModelVersion", "DrivingModelVersion")
    if not uses_combined_driving_artifacts(self.model_version):
      raise ValueError(f"Combined runtime requested for non-combined version {self.model_version!r}")

    use_builtin_model = self.model_id == BUILTIN_MODEL_KEY
    model_path = _combined_model_path(self.model_id, use_builtin_model)
    if not model_path.is_file():
      if use_builtin_model:
        raise FileNotFoundError(
          f"Missing builtin combined model artifact: {model_path}. "
          "Rebuild/deploy the combined builtin model before selecting this version."
        )

      cloudlog.error(f"Missing combined model artifact {model_path}, downloading {self.model_id}...")
      ModelManager(params, params_memory).download_model(self.model_id)
      if not model_path.is_file():
        raise FileNotFoundError(model_path)

    jits = pickle.loads(read_file_chunked(model_path))

    vision_metadata = jits["metadata"]["vision"]
    off_policy_metadata = jits["metadata"]["off_policy"]
    on_policy_metadata = jits["metadata"]["on_policy"]

    self.vision_input_shapes = vision_metadata["input_shapes"]
    self.vision_input_names = list(self.vision_input_shapes.keys())
    self.vision_output_slices = vision_metadata["output_slices"]
    self.off_policy_output_slices = off_policy_metadata["output_slices"]
    self.policy_input_shapes = on_policy_metadata["input_shapes"]
    self.policy_output_slices = on_policy_metadata["output_slices"]
    self.desire_key = "desire_pulse" if "desire_pulse" in self.policy_input_shapes else next(
      key for key in self.policy_input_shapes if key.startswith("desire")
    )

    self.frame_skip = ModelConstants.MODEL_RUN_FREQ // ModelConstants.MODEL_CONTEXT_FREQ
    input_devices = get_tg_input_devices(PROCESS_NAME, usbgpu)
    self.WARP_DEV, self.QUEUE_DEV = input_devices["WARP_DEV"], input_devices["QUEUE_DEV"]
    self.input_queues, self.npy = make_input_queues(
      self.vision_input_shapes, self.policy_input_shapes, self.frame_skip, device=self.QUEUE_DEV
    )
    self.frames = {name: DrivingModelFrame(context, ModelConstants.TEMPORAL_SKIP) for name in self.vision_input_names}
    self.vision_inputs: dict[str, Tensor] = {}
    self.parser = Parser()
    self.prev_desire = np.zeros(ModelConstants.DESIRE_LEN, dtype=np.float32)
    self.run_policy = jits["run_policy"]

  def slice_outputs(self, model_outputs: np.ndarray, output_slices: dict[str, slice]) -> dict[str, np.ndarray]:
    return {key: model_outputs[np.newaxis, value] for key, value in output_slices.items()}

  def read_captured_outputs(self) -> tuple[np.ndarray, np.ndarray, np.ndarray] | None:
    captured = getattr(self.run_policy, "captured", None)
    ret_output_map = getattr(captured, "ret_output_map", None)
    if captured is None or ret_output_map is None or len(ret_output_map) != 3:
      return None

    jit_outs = []
    for ji in captured.jit_cache:
      jit_outs.extend(get_out_buffers_for_ei(ji))

    outputs = []
    for idx in ret_output_map:
      if idx is None or idx >= len(jit_outs):
        return None
      outputs.append(np.frombuffer(bytes(jit_outs[idx].as_memoryview()), dtype=np.float32).copy())
    return tuple(outputs)

  def run(self, bufs: dict[str, VisionBuf], transforms: dict[str, np.ndarray], inputs: dict[str, np.ndarray], prepare_only: bool) -> dict[str, np.ndarray] | None:
    inputs[self.desire_key][0] = 0
    self.npy["desire"][:] = np.where(inputs[self.desire_key] - self.prev_desire > 0.99, inputs[self.desire_key], 0)
    self.prev_desire[:] = inputs[self.desire_key]
    self.npy["traffic_convention"][:] = inputs["traffic_convention"]
    if "action_t" in self.npy:
      self.npy["action_t"][:] = inputs["action_t"]

    if prepare_only:
      return None

    imgs_cl = {name: self.frames[name].prepare(bufs[name], transforms[name].flatten()) for name in self.vision_input_names}
    if TICI:
      for key in imgs_cl:
        if key not in self.vision_inputs:
          self.vision_inputs[key] = qcom_tensor_from_opencl_address(
            imgs_cl[key].mem_address,
            self.vision_input_shapes[key],
            dtype=dtypes.uint8,
          )
    else:
      for key in imgs_cl:
        frame_input = self.frames[key].buffer_from_cl(imgs_cl[key]).reshape(self.vision_input_shapes[key])
        self.vision_inputs[key] = Tensor(frame_input, dtype=dtypes.uint8).realize()

    vision_output, policy_output, off_policy_output = self.run_policy(
      **{key: self.input_queues[key] for key in POLICY_INPUTS if key in self.input_queues},
      img=self.vision_inputs["img"],
      big_img=self.vision_inputs["big_img"],
    )

    captured_outputs = self.read_captured_outputs()
    if captured_outputs is not None:
      vision_output, policy_output, off_policy_output = captured_outputs
    else:
      vision_output = vision_output.numpy().flatten()
      policy_output = policy_output.numpy().flatten()
      off_policy_output = off_policy_output.numpy().flatten()

    vision_outputs_dict = self.parser.parse_vision_outputs(self.slice_outputs(vision_output, self.vision_output_slices))
    off_policy_outputs_dict = self.parser.parse_off_policy_outputs(self.slice_outputs(off_policy_output, self.off_policy_output_slices))
    policy_outputs_dict = self.parser.parse_policy_outputs(self.slice_outputs(policy_output, self.policy_output_slices))
    combined_outputs_dict = {**vision_outputs_dict, **off_policy_outputs_dict, **policy_outputs_dict}

    if SEND_RAW_PRED:
      combined_outputs_dict["raw_pred"] = np.concatenate([vision_output.copy(), policy_output.copy(), off_policy_output.copy()])
    return combined_outputs_dict


def main(demo=False):
  cloudlog.warning("modeld init")

  sentry.set_tag("daemon", PROCESS_NAME)
  cloudlog.bind(daemon=PROCESS_NAME)
  setproctitle(PROCESS_NAME)
  config_realtime_process(7, 54)

  # Combined downloaded models currently ship one runtime artifact, so stay on the default
  # queue profile until a separate USBGPU artifact path exists for custom models.
  usbgpu = False

  while True:
    available_streams = VisionIpcClient.available_streams("camerad", block=False)
    if available_streams:
      use_extra_client = VisionStreamType.VISION_STREAM_WIDE_ROAD in available_streams and VisionStreamType.VISION_STREAM_ROAD in available_streams
      main_wide_camera = VisionStreamType.VISION_STREAM_ROAD not in available_streams
      break
    time.sleep(0.1)

  vipc_client_main_stream = VisionStreamType.VISION_STREAM_WIDE_ROAD if main_wide_camera else VisionStreamType.VISION_STREAM_ROAD
  vipc_client_main = VisionIpcClient("camerad", vipc_client_main_stream, True)
  vipc_client_extra = VisionIpcClient("camerad", VisionStreamType.VISION_STREAM_WIDE_ROAD, False)
  cloudlog.warning(f"vision stream set up, main_wide_camera: {main_wide_camera}, use_extra_client: {use_extra_client}")

  while not vipc_client_main.connect(False):
    time.sleep(0.1)
  while use_extra_client and not vipc_client_extra.connect(False):
    time.sleep(0.1)

  cloudlog.warning(f"connected main cam with buffer size: {vipc_client_main.buffer_len} ({vipc_client_main.width} x {vipc_client_main.height})")
  if use_extra_client:
    cloudlog.warning(f"connected extra cam with buffer size: {vipc_client_extra.buffer_len} ({vipc_client_extra.width} x {vipc_client_extra.height})")

  start_time = time.monotonic()
  cloudlog.warning("setting up CL context")
  cl_context = CLContext()
  cloudlog.warning("loading combined model")
  model = ModelState(cl_context, usbgpu)
  cloudlog.warning(f"combined model loaded in {time.monotonic() - start_time:.1f}s, modeld starting")

  pm = messaging.PubMaster(["modelV2", "drivingModelData", "cameraOdometry", "starpilotModelV2"])
  sm = messaging.SubMaster(["deviceState", "carState", "roadCameraState", "liveCalibration", "driverMonitoringState", "carControl", "liveDelay", "starpilotPlan"])

  publish_state = PublishState()
  params = Params()

  frame_dropped_filter = FirstOrderFilter(0.0, 10.0, 1.0 / ModelConstants.MODEL_RUN_FREQ)
  last_vipc_frame_id = 0
  run_count = 0

  model_transform_main = np.zeros((3, 3), dtype=np.float32)
  model_transform_extra = np.zeros((3, 3), dtype=np.float32)
  live_calib_seen = False
  buf_main, buf_extra = None, None
  meta_main = FrameMeta()
  meta_extra = FrameMeta()
  camera_offset = CameraOffset()
  camera_offset.set_target(params.get_float("CameraOffset", return_default=True))

  if demo:
    CP = get_demo_car_params()
  else:
    CP = messaging.log_from_bytes(params.get("CarParams", block=True), car.CarParams)
  cloudlog.info("modeld got CarParams: %s", CP.brand)

  long_delay = CP.longitudinalActuatorDelay + LONG_SMOOTH_SECONDS
  prev_action = log.ModelDataV2.Action()
  desire_helper = DesireHelper()
  starpilot_toggles = get_starpilot_toggles(sm)

  while True:
    while meta_main.timestamp_sof < meta_extra.timestamp_sof + 25000000:
      buf_main = vipc_client_main.recv()
      meta_main = FrameMeta(vipc_client_main)
      if buf_main is None:
        break

    if buf_main is None:
      cloudlog.debug("vipc_client_main no frame")
      continue

    if use_extra_client:
      while True:
        buf_extra = vipc_client_extra.recv()
        meta_extra = FrameMeta(vipc_client_extra)
        if buf_extra is None or meta_main.timestamp_sof < meta_extra.timestamp_sof + 25000000:
          break

      if buf_extra is None:
        cloudlog.debug("vipc_client_extra no frame")
        continue

      if abs(meta_main.timestamp_sof - meta_extra.timestamp_sof) > 10000000:
        cloudlog.error(
          f"frames out of sync! main: {meta_main.frame_id} ({meta_main.timestamp_sof / 1e9:.5f}), "
          f"extra: {meta_extra.frame_id} ({meta_extra.timestamp_sof / 1e9:.5f})"
        )
    else:
      buf_extra = buf_main
      meta_extra = meta_main

    sm.update(0)
    desire = desire_helper.desire
    is_rhd = sm["driverMonitoringState"].isRHD
    frame_id = sm["roadCameraState"].frameId
    v_ego = max(sm["carState"].vEgo, 0.0)
    lat_delay = sm["liveDelay"].lateralDelay + LAT_SMOOTH_SECONDS
    if sm.frame % 60 == 0:
      camera_offset.set_target(params.get_float("CameraOffset", return_default=True))

    if sm.updated["liveCalibration"] and sm.seen["roadCameraState"] and sm.seen["deviceState"]:
      device_from_calib_euler = np.array(sm["liveCalibration"].rpyCalib, dtype=np.float32)
      dc = DEVICE_CAMERAS[(str(sm["deviceState"].deviceType), str(sm["roadCameraState"].sensor))]
      model_transform_main = get_warp_matrix(
        device_from_calib_euler,
        dc.ecam.intrinsics if main_wide_camera else dc.fcam.intrinsics,
        False,
      ).astype(np.float32)
      model_transform_extra = get_warp_matrix(device_from_calib_euler, dc.ecam.intrinsics, True).astype(np.float32)
      camera_height = sm["liveCalibration"].height[0] if sm["liveCalibration"].height else DEFAULT_CAMERA_HEIGHT
      model_transform_main, model_transform_extra = camera_offset.update(
        model_transform_main,
        model_transform_extra,
        str(sm["deviceState"].deviceType),
        str(sm["roadCameraState"].sensor),
        camera_height,
        main_wide_camera,
      )
      live_calib_seen = True

    traffic_convention = np.zeros(2, dtype=np.float32)
    traffic_convention[int(is_rhd)] = 1

    vec_desire = np.zeros(ModelConstants.DESIRE_LEN, dtype=np.float32)
    if 0 <= desire < ModelConstants.DESIRE_LEN:
      vec_desire[desire] = 1

    vipc_dropped_frames = max(0, meta_main.frame_id - last_vipc_frame_id - 1)
    frames_dropped = frame_dropped_filter.update(min(vipc_dropped_frames, 10))
    if run_count < 10:
      frame_dropped_filter.x = 0.0
      frames_dropped = 0.0
    run_count += 1

    frame_drop_ratio = frames_dropped / (1 + frames_dropped)
    prepare_only = vipc_dropped_frames > 0
    if prepare_only:
      cloudlog.error(f"skipping model eval. Dropped {vipc_dropped_frames} frames")

    bufs = {name: buf_extra if "big" in name else buf_main for name in model.vision_input_names}
    transforms = {name: model_transform_extra if "big" in name else model_transform_main for name in model.vision_input_names}

    frame_delay = DT_MDL
    action_delay = DT_MDL / 2
    lat_action_t = lat_delay + frame_delay + action_delay
    long_action_t = long_delay + frame_delay + action_delay

    inputs: dict[str, np.ndarray] = {
      model.desire_key: vec_desire,
      "traffic_convention": traffic_convention,
    }
    if "action_t" in model.npy:
      inputs["action_t"] = np.array([lat_action_t, long_action_t], dtype=np.float32)

    start = time.perf_counter()
    model_output = model.run(bufs, transforms, inputs, prepare_only)
    end = time.perf_counter()
    model_execution_time = end - start

    if model_output is not None:
      modelv2_send = messaging.new_message("modelV2")
      starpilot_modelv2_send = messaging.new_message("starpilotModelV2")
      drivingdata_send = messaging.new_message("drivingModelData")
      posenet_send = messaging.new_message("cameraOdometry")

      action = get_action_from_model(model_output, prev_action, v_ego)
      prev_action = action
      fill_model_msg(
        drivingdata_send,
        modelv2_send,
        model_output,
        action,
        publish_state,
        meta_main.frame_id,
        meta_extra.frame_id,
        frame_id,
        frame_drop_ratio,
        meta_main.timestamp_eof,
        model_execution_time,
        live_calib_seen,
      )

      desire_state = modelv2_send.modelV2.meta.desireState
      l_lane_change_prob = desire_state[log.Desire.laneChangeLeft]
      r_lane_change_prob = desire_state[log.Desire.laneChangeRight]
      lane_change_prob = l_lane_change_prob + r_lane_change_prob
      desire_helper.update(sm["carState"], sm["carControl"].latActive, lane_change_prob, sm["starpilotPlan"], starpilot_toggles, sm["carControl"].enabled)
      modelv2_send.modelV2.meta.laneChangeState = desire_helper.lane_change_state
      modelv2_send.modelV2.meta.laneChangeDirection = desire_helper.lane_change_direction
      starpilot_modelv2_send.starpilotModelV2.turnDirection = desire_helper.turn_direction
      drivingdata_send.drivingModelData.meta.laneChangeState = desire_helper.lane_change_state
      drivingdata_send.drivingModelData.meta.laneChangeDirection = desire_helper.lane_change_direction

      fill_pose_msg(posenet_send, model_output, meta_main.frame_id, vipc_dropped_frames, meta_main.timestamp_eof, live_calib_seen)
      pm.send("modelV2", modelv2_send)
      pm.send("starpilotModelV2", starpilot_modelv2_send)
      pm.send("drivingModelData", drivingdata_send)
      pm.send("cameraOdometry", posenet_send)

    last_vipc_frame_id = meta_main.frame_id
    if sm.updated["starpilotPlan"]:
      starpilot_toggles = get_starpilot_toggles(sm)