Refactor type annotations and return types in model_runner

Removed specific type hints and return annotations for `self.inputs` and `run_model` methods to enhance flexibility and maintain consistency. These changes streamline the code and improve compatibility with varying input/output types during model inference.
Refactor type hints for improved code consistency.
2026-06-12 06:24:19 +08:00 · 2024-12-29 18:04:05 +01:00 · 2024-12-29 17:59:50 +01:00 · 2024-12-29 17:51:30 +01:00 · 2024-12-29 17:50:58 +01:00 · 2024-12-29 17:48:00 +01:00
4 changed files with 113 additions and 109 deletions
--- a/selfdrive/modeld/modeld.py
+++ b/selfdrive/modeld/modeld.py
@@ -1,22 +1,13 @@
 #!/usr/bin/env python3
-import os
 from openpilot.system.hardware import TICI

+from openpilot.selfdrive.modeld.runners.model_runner import ONNXRunner, TinygradRunner
+
 #
-USE_TINYGRAD = os.getenv('USE_TINYGRAD', True) or TICI
-if USE_TINYGRAD:
-  from tinygrad.tensor import Tensor
-  from tinygrad.dtype import dtypes
-  from openpilot.selfdrive.modeld.runners.tinygrad_helpers import qcom_tensor_from_opencl_address
-  os.environ['QCOM'] = '1'
-else:
-  from openpilot.selfdrive.modeld.runners.ort_helpers import make_onnx_cpu_runner, ORT_TYPES_TO_NP_TYPES
 import time
-import pickle
 import numpy as np
 import cereal.messaging as messaging
 from cereal import car, log
-from pathlib import Path
 from setproctitle import setproctitle
 from cereal.messaging import PubMaster, SubMaster
 from msgq.visionipc import VisionIpcClient, VisionStreamType, VisionBuf
@@ -34,13 +25,8 @@ from openpilot.selfdrive.modeld.fill_model_msg import fill_model_msg, fill_pose_
 from openpilot.selfdrive.modeld.constants import ModelConstants
 from openpilot.selfdrive.modeld.models.commonmodel_pyx import DrivingModelFrame, CLContext

-
 PROCESS_NAME = "selfdrive.modeld.modeld"
-SEND_RAW_PRED = os.getenv('SEND_RAW_PRED')

-MODEL_PATH = Path(__file__).parent / 'models/supercombo.onnx'
-MODEL_PKL_PATH = Path(__file__).parent / 'models/supercombo_tinygrad.pkl'
-METADATA_PATH = Path(__file__).parent / 'models/supercombo_metadata.pkl'

 class FrameMeta:
  frame_id: int = 0
@@ -61,42 +47,21 @@ class ModelState:
    self.frames = {'input_imgs': DrivingModelFrame(context), 'big_input_imgs': DrivingModelFrame(context)}
    self.prev_desire = np.zeros(ModelConstants.DESIRE_LEN, dtype=np.float32)
    self.full_features_20Hz = np.zeros((ModelConstants.FULL_HISTORY_BUFFER_LEN, ModelConstants.FEATURE_LEN), dtype=np.float32)
-    self.desire_20Hz = np.zeros((ModelConstants.FULL_HISTORY_BUFFER_LEN + 1, ModelConstants.DESIRE_LEN), dtype=np.float32)
+    self.desire_20Hz =  np.zeros((ModelConstants.FULL_HISTORY_BUFFER_LEN + 1, ModelConstants.DESIRE_LEN), dtype=np.float32)

    # img buffers are managed in openCL transform code
-    self.numpy_inputs = {}
+    self.numpy_inputs = {
+      'desire': np.zeros((1, (ModelConstants.HISTORY_BUFFER_LEN+1), ModelConstants.DESIRE_LEN), dtype=np.float32),
+      'traffic_convention': np.zeros((1, ModelConstants.TRAFFIC_CONVENTION_LEN), dtype=np.float32),
+      'features_buffer': np.zeros((1, ModelConstants.HISTORY_BUFFER_LEN,  ModelConstants.FEATURE_LEN), dtype=np.float32),
+    }

-    with open(METADATA_PATH, 'rb') as f:
-      model_metadata = pickle.load(f)
-    self.input_shapes =  model_metadata['input_shapes']
-
-    for key, shape in self.input_shapes.items():
-      if key not in self.frames: # Managed by opencl
-        self.numpy_inputs[key] = np.zeros(shape, dtype=np.float32)
-
-    self.output_slices = model_metadata['output_slices']
-    net_output_size = model_metadata['output_shapes']['outputs'][1]
-    self.output = np.zeros(net_output_size, dtype=np.float32)
+    # Initialize model runner
+    self.model_runner = TinygradRunner() if TICI else ONNXRunner(self.frames)
    self.parser = Parser()

-    if USE_TINYGRAD:
-      self.tensor_inputs = {k: Tensor(v, device='NPY').realize() for k,v in self.numpy_inputs.items()}
-      with open(MODEL_PKL_PATH, "rb") as f:
-        self.model_run = pickle.load(f)
-    else:
-      self.onnx_cpu_runner = make_onnx_cpu_runner(MODEL_PATH)
-      self.onnx_model_metadata = {input.name: input.type for input in self.onnx_cpu_runner.get_inputs()}
-
-    num_elements = self.numpy_inputs['features_buffer'].shape[1]
-    step_size = int(-100 / num_elements)
-    self.full_features_20Hz_idxs = np.arange(step_size, step_size * (num_elements + 1), step_size)[::-1]
-    self.desire_reshape_dims = (self.numpy_inputs['desire'].shape[0], self.numpy_inputs['desire'].shape[1], -1, self.numpy_inputs['desire'].shape[2])
-
-  def slice_outputs(self, model_outputs: np.ndarray) -> dict[str, np.ndarray]:
-    parsed_model_outputs = {k: model_outputs[np.newaxis, v] for k,v in self.output_slices.items()}
-    if SEND_RAW_PRED:
-      parsed_model_outputs['raw_pred'] = model_outputs.copy()
-    return parsed_model_outputs
+    net_output_size = self.model_runner.model_metadata['output_shapes']['outputs'][1]
+    self.output = np.zeros(net_output_size, dtype=np.float32)

  def run(self, buf: VisionBuf, wbuf: VisionBuf, transform: np.ndarray, transform_wide: np.ndarray,
                inputs: dict[str, np.ndarray], prepare_only: bool) -> dict[str, np.ndarray] | None:
@@ -107,50 +72,27 @@ class ModelState:

    self.desire_20Hz[:-1] = self.desire_20Hz[1:]
    self.desire_20Hz[-1] = new_desire
-    self.numpy_inputs['desire'][:] = self.desire_20Hz.reshape(self.desire_reshape_dims).max(axis=2)
+    self.numpy_inputs['desire'][:] = self.desire_20Hz.reshape((1,25,4,-1)).max(axis=2)

    self.numpy_inputs['traffic_convention'][:] = inputs['traffic_convention']
    imgs_cl = {'input_imgs': self.frames['input_imgs'].prepare(buf, transform.flatten()),
               'big_input_imgs': self.frames['big_input_imgs'].prepare(wbuf, transform_wide.flatten())}

-    if USE_TINYGRAD:
-      # The imgs tensors are backed by opencl memory, only need init once
-      for key in imgs_cl:
-        if not TICI or key not in self.tensor_inputs:
-          index = self.model_run.captured.expected_names.index(key)
-          _, _, dtype, device = self.model_run.captured.expected_st_vars_dtype_device[index]
-          if TICI:
-            self.tensor_inputs[key] = qcom_tensor_from_opencl_address(imgs_cl[key].mem_address, self.input_shapes[key], dtype=dtype)
-          else:
-            shape = self.frames[key].buffer_from_cl(imgs_cl[key]).reshape(self.input_shapes[key])
-            self.tensor_inputs[key] = Tensor(shape, device=device, dtype=dtype).realize()
-    else:
-      for key in imgs_cl:
-        dtype = self.onnx_model_metadata[key]
-        self.numpy_inputs[key] = self.frames[key].buffer_from_cl(imgs_cl[key]).astype(ORT_TYPES_TO_NP_TYPES[dtype]).reshape(self.input_shapes[key])
+    # Prepare inputs using the model runner
+    self.model_runner.prepare_inputs(imgs_cl, self.numpy_inputs)

    if prepare_only:
      return None

-    if USE_TINYGRAD:
-      self.output = self.model_run(**self.tensor_inputs).numpy().flatten()
-    else:
-      self.output = self.onnx_cpu_runner.run(None, self.numpy_inputs)[0].flatten()
-
-    outputs = self.parser.parse_outputs(self.slice_outputs(self.output))
+    # Run model inference
+    self.output = self.model_runner.run_model()
+    outputs = self.parser.parse_outputs(self.model_runner.slice_outputs(self.output))

    self.full_features_20Hz[:-1] = self.full_features_20Hz[1:]
    self.full_features_20Hz[-1] = outputs['hidden_state'][0, :]

-    self.numpy_inputs['features_buffer'][:] = self.full_features_20Hz[self.full_features_20Hz_idxs]
-    if "desired_curvature" in outputs:
-      if "prev_desired_curvs" in self.numpy_inputs.keys():
-        self.numpy_inputs['prev_desired_curvs'][:-1] = self.numpy_inputs['prev_desired_curvs'][1:]
-        self.numpy_inputs['prev_desired_curvs'][-1] = outputs['desired_curvature'][:, 0:1, None]  # Reshape to (1,1,1)
-      if "prev_desired_curv" in self.numpy_inputs.keys():
-        # First shift everything
-        self.numpy_inputs['prev_desired_curv'][:-ModelConstants.PREV_DESIRED_CURV_LEN] = self.numpy_inputs['prev_desired_curv'][ModelConstants.PREV_DESIRED_CURV_LEN:]
-        self.numpy_inputs['prev_desired_curv'][-ModelConstants.PREV_DESIRED_CURV_LEN:] = outputs['desired_curvature'][:, :1].reshape(1, -1, 1)
+    idxs = np.arange(-4,-100,-4)[::-1]
+    self.numpy_inputs['features_buffer'][:] = self.full_features_20Hz[idxs]
    return outputs


@@ -211,7 +153,6 @@ def main(demo=False):
  meta_main = FrameMeta()
  meta_extra = FrameMeta()

-
  if demo:
    CP = get_demo_car_params()
  else:
@@ -261,10 +202,6 @@ def main(demo=False):
    is_rhd = sm["driverMonitoringState"].isRHD
    frame_id = sm["roadCameraState"].frameId
    v_ego = max(sm["carState"].vEgo, 0.)
-    lateral_control_params = None #TODO-SP: hardcoded ,this shouldnt' be here this way. We should do it more dynamically
-    if "lateral_control_params" in model.numpy_inputs.keys(): #TODO-SP: hardcoded ,this shouldnt' be here this way. We should do it more dynamically
-      lateral_control_params = np.array([sm["carState"].vEgo, steer_delay], dtype=np.float32)
-
    if sm.updated["liveCalibration"] and sm.seen['roadCameraState'] and sm.seen['deviceState']:
      device_from_calib_euler = np.array(sm["liveCalibration"].rpyCalib, dtype=np.float32)
      dc = DEVICE_CAMERAS[(str(sm['deviceState'].deviceType), str(sm['roadCameraState'].sensor))]
@@ -296,8 +233,6 @@ def main(demo=False):
      'desire': vec_desire,
      'traffic_convention': traffic_convention,
      }
-    if "lateral_control_params" in model.numpy_inputs.keys():
-      inputs['lateral_control_params'] = lateral_control_params

    mt1 = time.perf_counter()
    model_output = model.run(buf_main, buf_extra, model_transform_main, model_transform_extra, inputs, prepare_only)
--- a/selfdrive/modeld/runners/init.py
+++ b/selfdrive/modeld/runners/init.py
--- a/selfdrive/modeld/runners/model_runner.py
+++ b/selfdrive/modeld/runners/model_runner.py
@@ -0,0 +1,92 @@
+import os
+from openpilot.system.hardware import TICI
+
+#
+if TICI:
+  from tinygrad.tensor import Tensor
+  from tinygrad.dtype import dtypes
+  from openpilot.selfdrive.modeld.runners.tinygrad_helpers import qcom_tensor_from_opencl_address
+  os.environ['QCOM'] = '1'
+else:
+  from openpilot.selfdrive.modeld.runners.ort_helpers import make_onnx_cpu_runner
+import pickle
+import numpy as np
+from pathlib import Path
+from abc import ABC, abstractmethod
+from openpilot.selfdrive.modeld.models.commonmodel_pyx import DrivingModelFrame, CLMem
+
+SEND_RAW_PRED = os.getenv('SEND_RAW_PRED')
+MODEL_PATH = Path(__file__).parent / '../models/supercombo.onnx'
+MODEL_PKL_PATH = Path(__file__).parent / '../models/supercombo_tinygrad.pkl'
+METADATA_PATH = Path(__file__).parent / '../models/supercombo_metadata.pkl'
+
+
+class ModelRunner(ABC):
+  """Abstract base class for model runners that defines the interface for running ML models."""
+
+  def __init__(self):
+    """Initialize the model runner with paths to model and metadata files."""
+    with open(METADATA_PATH, 'rb') as f:
+      self.model_metadata = pickle.load(f)
+    self.input_shapes = self.model_metadata['input_shapes']
+    self.output_slices = self.model_metadata['output_slices']
+    self.inputs: dict = {}
+
+  @abstractmethod
+  def prepare_inputs(self, imgs_cl: dict[str, CLMem], numpy_inputs: dict[str, np.ndarray])-> dict:
+    """Prepare inputs for model inference."""
+
+  @abstractmethod
+  def run_model(self):
+    """Run model inference with prepared inputs."""
+
+  def slice_outputs(self, model_outputs: np.ndarray) -> dict:
+    """Slice model outputs according to metadata configuration."""
+    parsed_outputs = {k: model_outputs[np.newaxis, v] for k, v in self.output_slices.items()}
+    if SEND_RAW_PRED:
+      parsed_outputs['raw_pred'] = model_outputs.copy()
+    return parsed_outputs
+
+
+class TinygradRunner(ModelRunner):
+  """Tinygrad implementation of model runner for TICI hardware."""
+
+  def __init__(self):
+    super().__init__()
+    # Load Tinygrad model
+    with open(MODEL_PKL_PATH, "rb") as f:
+      self.model_run = pickle.load(f)
+
+  def prepare_inputs(self, imgs_cl: dict[str, CLMem], numpy_inputs: dict[str, np.ndarray]) -> dict:
+    # Initialize image tensors if not already done
+    for key in imgs_cl:
+      if key not in self.inputs:
+        self.inputs[key] = qcom_tensor_from_opencl_address(imgs_cl[key].mem_address, self.input_shapes[key], dtype=dtypes.uint8)
+
+    # Update numpy inputs
+    for k, v in numpy_inputs.items():
+      if k not in self.inputs:
+        self.inputs[k] = Tensor(v, device='NPY').realize()
+
+    return self.inputs
+
+  def run_model(self):
+    return self.model_run(**self.inputs).numpy().flatten()
+
+
+class ONNXRunner(ModelRunner):
+  """ONNX implementation of model runner for non-TICI hardware."""
+
+  def __init__(self, frames: dict[str, DrivingModelFrame]):
+    super().__init__()
+    self.runner = make_onnx_cpu_runner(MODEL_PATH)
+    self.frames = frames
+
+  def prepare_inputs(self, imgs_cl: dict[str, CLMem], numpy_inputs: dict[str, np.ndarray]) -> dict:
+    self.inputs = numpy_inputs.copy()
+    for key in imgs_cl:
+      self.inputs[key] = self.frames[key].buffer_from_cl(imgs_cl[key]).reshape(self.input_shapes[key])
+    return self.inputs
+
+  def run_model(self):
+    return self.runner.run(None, self.inputs)[0].flatten()
--- a/tools/replay/framereader.cc
+++ b/tools/replay/framereader.cc
@@ -119,13 +119,7 @@ VideoDecoder::~VideoDecoder() {
 }

 bool VideoDecoder::open(AVCodecParameters *codecpar, bool hw_decoder) {
-  const AVCodec *decoder = avcodec_find_decoder_by_name("h264_mediacodec");
-  if (!decoder) {
-    decoder = avcodec_find_decoder_by_name("h264_qcom");
-  }
-  if (!decoder) {
-    decoder = avcodec_find_decoder(codecpar->codec_id);
-  }
+  const AVCodec *decoder = avcodec_find_decoder(codecpar->codec_id);
  if (!decoder) return false;

  decoder_ctx = avcodec_alloc_context3(decoder);
@@ -133,23 +127,6 @@ bool VideoDecoder::open(AVCodecParameters *codecpar, bool hw_decoder) {
    rError("Failed to allocate or initialize codec context");
    return false;
  }
-
-  // More aggressive settings focused on reducing lag
-  decoder_ctx->thread_count = static_cast<int>(std::min(std::thread::hardware_concurrency(), 16u));
-  decoder_ctx->thread_type = FF_THREAD_FRAME | FF_THREAD_SLICE;
-    
-  // Very aggressive frame dropping
-  decoder_ctx->flags |= AV_CODEC_FLAG_LOW_DELAY;
-  decoder_ctx->flags2 |= AV_CODEC_FLAG2_FAST;
-  decoder_ctx->skip_frame = AVDISCARD_BIDIR;  // More aggressive frame skipping
-  decoder_ctx->skip_loop_filter = AVDISCARD_ALL;
-  decoder_ctx->workaround_bugs = FF_BUG_AUTODETECT;
-    
-  // Minimize buffering
-  decoder_ctx->max_b_frames = 0;
-  decoder_ctx->strict_std_compliance = FF_COMPLIANCE_UNOFFICIAL;  // Allow faster non-standard optimizations
-  decoder_ctx->flags |= AV_CODEC_FLAG_OUTPUT_CORRUPT;  // Output frames even if slightly corrupted
-
  width = (decoder_ctx->width + 3) & ~3;
  height = decoder_ctx->height;
Author	SHA1	Message	Date
DevTekVE	40d9e092b6	Refactor type annotations and return types in model_runner Removed specific type hints and return annotations for `self.inputs` and `run_model` methods to enhance flexibility and maintain consistency. These changes streamline the code and improve compatibility with varying input/output types during model inference.	2024-12-29 18:04:05 +01:00
DevTekVE	6318aa52e3	Refactor type hints for improved code consistency. Replaced specific type hints like `dict[str, np.ndarray]` with generic `dict` in several method signatures to simplify annotations. This improves overall readability and maintains functionality, while aligning with existing code practices.	2024-12-29 17:59:50 +01:00
DevTekVE	7852fa66b1	Clean	2024-12-29 17:51:30 +01:00
DevTekVE	8d9c1e2035	Add type annotations to inputs and prepare_inputs method This commit adds explicit type annotations for the `inputs` dictionary and the return value of the `prepare_inputs` method in `model_runner.py`. These changes improve code readability and ensure type consistency, enhancing maintainability and reducing potential errors.	2024-12-29 17:50:58 +01:00
DevTekVE	c39f722f7b	Rename TinyGradRunner to TinygradRunner for consistency. The class name and references were updated to maintain naming consistency across the codebase. This aligns with naming conventions and improves code clarity. No functional changes were introduced.	2024-12-29 17:48:00 +01:00
DevTekVE	550c08ac4c	Refactor `prepare_inputs` to use explicit `CLMem` type. This update replaces the generic `any` type with the more explicit `CLMem` type for better type safety and clarity. It ensures consistency across the `prepare_inputs` method implementations in derived classes, improving code readability and robustness.	2024-12-29 17:44:23 +01:00
DevTekVE	8c838af5fa	Refactor model inference to use internal state for inputs Simplified the `run_model` method by removing the requirement to pass inputs as arguments, and instead leveraging an internal `inputs` state. Adjusted `prepare_inputs` methods across model runners to populate this internal state. This refactor improves code clarity and reduces redundancy in managing input data.	2024-12-29 17:40:45 +01:00
DevTekVE	2521d60b1b	Fix model_runner output to ensure tensor conversion to NumPy. Updated the `run_model` method to explicitly convert tensor outputs to NumPy arrays using `.numpy()`. This ensures compatibility with downstream processes relying on NumPy array inputs.	2024-12-29 17:31:27 +01:00
DevTekVE	206398d3b5	Remove unused imports and redundant pass statements. This change cleans up the code by removing unused imports and redundant `pass` statements in abstract methods. It improves code readability and adheres to cleaner coding practices.	2024-12-29 17:22:13 +01:00
DevTekVE	a9e99615cd	Remove redundant tensor assignment in model runner. The `assign` operation was unnecessary as new tensors are realized for updated inputs. This simplifies the code and avoids redundant updates, improving clarity and maintainability.	2024-12-29 17:20:15 +01:00
DevTekVE	ec44b78ad8	Refactor model runner initialization logic. Removed the `create_model_runner` factory function and replaced it with direct initialization of `TinyGradRunner` or `ONNXRunner`. Simplified the `__init__` methods by standardizing paths as constants within `model_runner.py` for cleaner and more maintainable code.	2024-12-29 17:14:08 +01:00
DevTekVE	dcd3e09294	Refactor formatting in model_runner.py for readability. Consolidated multiline function declarations and calls into single lines where appropriate to improve code readability and maintainability. No changes were made to the functionality.	2024-12-29 17:01:26 +01:00
DevTekVE	839a7a58e0	Refactoring model handling in modeld.py with ModelRunner abstraction A significant refactoring of `modeld.py` was performed to enhance the handling of model logic. A new abstraction called `ModelRunner` has been introduced which encapsulates the model-running logic. This refactor simplifies the `modeld.py` script and provides easier management across different hardware configurations. Using this segregation, varying processing methods for models can be handled distinctly ensuring cleaner and more maintainable code. An instance of the appropriate model runner is now created during initialization based on whether a TICI hardware or a different type is used.	2024-12-29 16:59:10 +01:00