Going back to pre-uint8 to test our luck

Update input handling logic in modeld.py
Refined how input keys are processed to ensure proper assignment, excluding 'desire'. This improves flexibility and correctness when managing input buffers in the model pipeline.
2026-07-27 13:42:04 +08:00 · 2025-01-06 14:11:50 +01:00 · 2025-01-06 13:07:05 +01:00 · 2025-01-06 12:50:03 +01:00 · 2025-01-06 12:25:23 +01:00 · 2025-01-06 11:47:11 +01:00
9 changed files with 169 additions and 192 deletions
@@ -1,11 +1,9 @@
 #!/usr/bin/env python3
 import os
 import time
-import pickle
 import numpy as np
 import cereal.messaging as messaging
 from cereal import car, log
-from pathlib import Path
 from setproctitle import setproctitle
 from cereal.messaging import PubMaster, SubMaster
 from msgq.visionipc import VisionIpcClient, VisionStreamType, VisionBuf
@@ -23,16 +21,14 @@ from openpilot.sunnypilot.modeld.parse_model_outputs import Parser
 from openpilot.sunnypilot.modeld.fill_model_msg import fill_model_msg, fill_pose_msg, PublishState
 from openpilot.sunnypilot.modeld.constants import ModelConstants
 from openpilot.sunnypilot.modeld.models.commonmodel_pyx import DrivingModelFrame, CLContext
+from openpilot.common.realtime import DT_MDL
+from openpilot.common.numpy_fast import interp
+
+from openpilot.sunnypilot.modeld.runners.run_helpers import load_model, load_metadata, prepare_inputs

 PROCESS_NAME = "sunnypilot.modeld.modeld"
 SEND_RAW_PRED = os.getenv('SEND_RAW_PRED')

-MODEL_PATHS = {
-  ModelRunner.THNEED: Path(__file__).parent / 'models/supercombo.thneed',
-  ModelRunner.ONNX: Path(__file__).parent / 'models/supercombo.onnx'}
-
-METADATA_PATH = Path(__file__).parent / 'models/supercombo_metadata.pkl'
-

 class FrameMeta:
  frame_id: int = 0
@@ -58,27 +54,29 @@ class ModelState:
    self.full_features_20Hz = np.zeros((ModelConstants.FULL_HISTORY_BUFFER_LEN, ModelConstants.FEATURE_LEN), dtype=np.float32)
    self.desire_20Hz =  np.zeros((ModelConstants.FULL_HISTORY_BUFFER_LEN + 1, ModelConstants.DESIRE_LEN), dtype=np.float32)

-    # img buffers are managed in openCL transform code
-    self.inputs = {
-      'desire': np.zeros(ModelConstants.DESIRE_LEN * (ModelConstants.HISTORY_BUFFER_LEN+1), dtype=np.float32),
-      'traffic_convention': np.zeros(ModelConstants.TRAFFIC_CONVENTION_LEN, dtype=np.float32),
-      'features_buffer': np.zeros(ModelConstants.HISTORY_BUFFER_LEN * ModelConstants.FEATURE_LEN, dtype=np.float32),
-    }
+    model_paths = load_model()
+    self.model_metadata = load_metadata()
+    self.inputs = prepare_inputs(self.model_metadata)

-    with open(METADATA_PATH, 'rb') as f:
-      model_metadata = pickle.load(f)
-
-    self.output_slices = model_metadata['output_slices']
-    net_output_size = model_metadata['output_shapes']['outputs'][1]
+    self.output_slices = self.model_metadata['output_slices']
+    net_output_size = self.model_metadata['output_shapes']['outputs'][1]
    self.output = np.zeros(net_output_size, dtype=np.float32)
    self.parser = Parser()

-    self.model = ModelRunner(MODEL_PATHS, self.output, Runtime.GPU, False, context)
+    self.model = ModelRunner(model_paths, self.output, Runtime.GPU, False, context)
    self.model.addInput("input_imgs", None)
    self.model.addInput("big_input_imgs", None)
    for k,v in self.inputs.items():
      self.model.addInput(k, v)

+    num_elements = self.model_metadata['input_shapes']['features_buffer'][1]
+    step_size = int(-100 / num_elements)
+    self.feature_buffer_idxs = np.arange(step_size, step_size * (num_elements + 1), step_size)[::-1]
+
+    desired_shape = self.model_metadata["input_shapes"]["desire"][1]
+    middle_dim = int(self.desire_20Hz.shape[0] / desired_shape)
+    self.desire_reshape_dims = (desired_shape, middle_dim, -1)
+
  def slice_outputs(self, model_outputs: np.ndarray) -> dict[str, np.ndarray]:
    parsed_model_outputs = {k: model_outputs[np.newaxis, v] for k,v in self.output_slices.items()}
    if SEND_RAW_PRED:
@@ -94,7 +92,11 @@ class ModelState:

    self.desire_20Hz[:-1] = self.desire_20Hz[1:]
    self.desire_20Hz[-1] = new_desire
-    self.inputs['desire'][:] = self.desire_20Hz.reshape((25,4,-1)).max(axis=1).flatten()
+    self.inputs['desire'][:] = self.desire_20Hz.reshape(self.desire_reshape_dims).max(axis=1).flatten()
+
+    for key in self.inputs:
+      if key in inputs and key not in ['desire']:
+        self.inputs[key][:] = inputs[key]

    self.inputs['traffic_convention'][:] = inputs['traffic_convention']

@@ -105,13 +107,29 @@ class ModelState:
      return None

    self.model.execute()
-    outputs = self.parser.parse_outputs(self.slice_outputs(self.output))
+    outputs = self.parser.parse_outputs(self.slice_outputs(self.output), self.inputs.keys())

    self.full_features_20Hz[:-1] = self.full_features_20Hz[1:]
    self.full_features_20Hz[-1] = outputs['hidden_state'][0, :]

-    idxs = np.arange(-4,-100,-4)[::-1]
-    self.inputs['features_buffer'][:] = self.full_features_20Hz[idxs].flatten()
+    self.inputs['features_buffer'][:] = self.full_features_20Hz[self.feature_buffer_idxs].flatten()
+    if "desired_curvature" in outputs:
+      input_name_prev = None
+
+      if "prev_desired_curvs" in self.inputs.keys():
+        input_name_prev = 'prev_desired_curvs'
+      elif "prev_desired_curv" in self.inputs.keys():
+        input_name_prev = 'prev_desired_curv'
+
+      if input_name_prev is not None:
+        len = outputs['desired_curvature'][0].size
+        self.inputs[input_name_prev][:-len] = self.inputs[input_name_prev][len:]
+        self.inputs[input_name_prev][-len:] = outputs['desired_curvature'][0, :]
+
+    if "lat_planner_solution" in outputs:
+      if "lat_planner_state" in self.inputs.keys():
+        self.inputs['lat_planner_state'][2] = interp(DT_MDL, ModelConstants.T_IDXS, outputs['lat_planner_solution'][0, :, 2])
+        self.inputs['lat_planner_state'][3] = interp(DT_MDL, ModelConstants.T_IDXS, outputs['lat_planner_solution'][0, :, 3])
    return outputs


@@ -249,10 +267,24 @@ def main(demo=False):
    if prepare_only:
      cloudlog.error(f"skipping model eval. Dropped {vipc_dropped_frames} frames")

-    inputs:dict[str, np.ndarray] = {
+    inputs: dict[str, np.ndarray] = {
      'desire': vec_desire,
      'traffic_convention': traffic_convention,
-      }
+    }
+
+    if "lateral_control_params" in model.inputs.keys():
+      inputs['lateral_control_params'] = np.array([sm["carState"].vEgo, steer_delay], dtype=np.float32)
+
+    # TODO-SP: Below should be good, but I have not tested a model with it so I can't be sure until we test it
+    # if "driving_style" in model.inputs.keys():
+    #  inputs['driving_style'] = np.array([1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0], dtype=np.float32)
+    # 
+    # if "nav_features" in model.inputs.keys():
+    #   inputs['nav_features'] = np.zeros(ModelConstants.NAV_FEATURE_LEN, dtype=np.float32)  # Get size from shape
+    # 
+    # if "nav_instructions" in model.inputs.keys():
+    #   inputs['nav_instructions'] = np.zeros(ModelConstants.NAV_INSTRUCTION_LEN, dtype=np.float32)  # Get size from shape
+

    mt1 = time.perf_counter()
    model_output = model.run(buf_main, buf_extra, model_transform_main, model_transform_extra, inputs, prepare_only)
@@ -1,69 +1,50 @@
-#include "sunnypilot/modeld/models/commonmodel.h"
+#include "selfdrive/modeld/models/commonmodel.h"

+#include <cassert>
 #include <cmath>
 #include <cstring>

 #include "common/clutil.h"

-DrivingModelFrame::DrivingModelFrame(cl_device_id device_id, cl_context context) : ModelFrame(device_id, context) {
+ModelFrame::ModelFrame(cl_device_id device_id, cl_context context) {
  input_frames = std::make_unique<uint8_t[]>(buf_size);
-  //input_frames_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, buf_size, NULL, &err));
-  img_buffer_20hz_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, 5*frame_size_bytes, NULL, &err));
-  region.origin = 4 * frame_size_bytes;
-  region.size = frame_size_bytes;
-  last_img_cl = CL_CHECK_ERR(clCreateSubBuffer(img_buffer_20hz_cl, CL_MEM_READ_WRITE, CL_BUFFER_CREATE_TYPE_REGION, &region, &err));

+  q = CL_CHECK_ERR(clCreateCommandQueue(context, device_id, 0, &err));
+  y_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, MODEL_WIDTH * MODEL_HEIGHT, NULL, &err));
+  u_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, (MODEL_WIDTH / 2) * (MODEL_HEIGHT / 2), NULL, &err));
+  v_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, (MODEL_WIDTH / 2) * (MODEL_HEIGHT / 2), NULL, &err));
+  net_input_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, MODEL_FRAME_SIZE * sizeof(uint8_t), NULL, &err));
+
+  transform_init(&transform, context, device_id);
  loadyuv_init(&loadyuv, context, device_id, MODEL_WIDTH, MODEL_HEIGHT);
-  init_transform(device_id, context, MODEL_WIDTH, MODEL_HEIGHT);
 }

-uint8_t* DrivingModelFrame::prepare(cl_mem yuv_cl, int frame_width, int frame_height, int frame_stride, int frame_uv_offset, const mat3& projection, cl_mem* output) {
-  run_transform(yuv_cl, MODEL_WIDTH, MODEL_HEIGHT, frame_width, frame_height, frame_stride, frame_uv_offset, projection);
-
-  for (int i = 0; i < 4; i++) {
-    CL_CHECK(clEnqueueCopyBuffer(q, img_buffer_20hz_cl, img_buffer_20hz_cl, (i+1)*frame_size_bytes, i*frame_size_bytes, frame_size_bytes, 0, nullptr, nullptr));
-  }
-  loadyuv_queue(&loadyuv, q, y_cl, u_cl, v_cl, last_img_cl);
+uint8_t* ModelFrame::prepare(cl_mem yuv_cl, int frame_width, int frame_height, int frame_stride, int frame_uv_offset, const mat3 &projection, cl_mem *output) {
+  transform_queue(&this->transform, q,
+                yuv_cl, frame_width, frame_height, frame_stride, frame_uv_offset,
+                y_cl, u_cl, v_cl, MODEL_WIDTH, MODEL_HEIGHT, projection);

  if (output == NULL) {
-    CL_CHECK(clEnqueueReadBuffer(q, img_buffer_20hz_cl, CL_TRUE, 0, frame_size_bytes, &input_frames[0], 0, nullptr, nullptr));
-    CL_CHECK(clEnqueueReadBuffer(q, last_img_cl, CL_TRUE, 0, frame_size_bytes, &input_frames[MODEL_FRAME_SIZE], 0, nullptr, nullptr));
+    loadyuv_queue(&loadyuv, q, y_cl, u_cl, v_cl, net_input_cl);
+
+    std::memmove(&input_frames[0], &input_frames[MODEL_FRAME_SIZE], sizeof(uint8_t) * MODEL_FRAME_SIZE);
+    CL_CHECK(clEnqueueReadBuffer(q, net_input_cl, CL_TRUE, 0, MODEL_FRAME_SIZE * sizeof(uint8_t), &input_frames[MODEL_FRAME_SIZE], 0, nullptr, nullptr));
    clFinish(q);
    return &input_frames[0];
  } else {
-    copy_queue(&loadyuv, q, img_buffer_20hz_cl, *output, 0, 0, frame_size_bytes);
-    copy_queue(&loadyuv, q, last_img_cl, *output, 0, frame_size_bytes, frame_size_bytes);
-
+    loadyuv_queue(&loadyuv, q, y_cl, u_cl, v_cl, *output, true);
    // NOTE: Since thneed is using a different command queue, this clFinish is needed to ensure the image is ready.
    clFinish(q);
    return NULL;
  }
 }

-DrivingModelFrame::~DrivingModelFrame() {
-  deinit_transform();
+ModelFrame::~ModelFrame() {
+  transform_destroy(&transform);
  loadyuv_destroy(&loadyuv);
-  CL_CHECK(clReleaseMemObject(img_buffer_20hz_cl));
-  CL_CHECK(clReleaseMemObject(last_img_cl));
+  CL_CHECK(clReleaseMemObject(net_input_cl));
+  CL_CHECK(clReleaseMemObject(v_cl));
+  CL_CHECK(clReleaseMemObject(u_cl));
+  CL_CHECK(clReleaseMemObject(y_cl));
  CL_CHECK(clReleaseCommandQueue(q));
-}
-
-
-MonitoringModelFrame::MonitoringModelFrame(cl_device_id device_id, cl_context context) : ModelFrame(device_id, context) {
-  input_frames = std::make_unique<uint8_t[]>(buf_size);
-  //input_frame_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, buf_size, NULL, &err));
-
-  init_transform(device_id, context, MODEL_WIDTH, MODEL_HEIGHT);
-}
-uint8_t* MonitoringModelFrame::prepare(cl_mem yuv_cl, int frame_width, int frame_height, int frame_stride, int frame_uv_offset, const mat3& projection, cl_mem* output) {
-  run_transform(yuv_cl, MODEL_WIDTH, MODEL_HEIGHT, frame_width, frame_height, frame_stride, frame_uv_offset, projection);
-  CL_CHECK(clEnqueueReadBuffer(q, y_cl, CL_TRUE, 0, MODEL_FRAME_SIZE * sizeof(uint8_t), input_frames.get(), 0, nullptr, nullptr));
-  clFinish(q);
-  //return &y_cl;
-  return input_frames.get();
-}
-
-MonitoringModelFrame::~MonitoringModelFrame() {
-  deinit_transform();
-  CL_CHECK(clReleaseCommandQueue(q));
-}
+}
@@ -2,7 +2,6 @@

 #include <cfloat>
 #include <cstdlib>
-#include <cassert>

 #include <memory>

@@ -19,80 +18,19 @@

 class ModelFrame {
 public:
-  ModelFrame(cl_device_id device_id, cl_context context) {
-    q = CL_CHECK_ERR(clCreateCommandQueue(context, device_id, 0, &err));
-  }
-  virtual ~ModelFrame() {}
-  virtual uint8_t* prepare(cl_mem yuv_cl, int frame_width, int frame_height, int frame_stride, int frame_uv_offset, const mat3& projection, cl_mem* output) { return NULL; }
-  /*
-  uint8_t* buffer_from_cl(cl_mem *in_frames, int buffer_size) {
-    CL_CHECK(clEnqueueReadBuffer(q, *in_frames, CL_TRUE, 0, buffer_size, input_frames.get(), 0, nullptr, nullptr));
-    clFinish(q);
-    return &input_frames[0];
-  }
-  */
-
-  int MODEL_WIDTH;
-  int MODEL_HEIGHT;
-  int MODEL_FRAME_SIZE;
-  int buf_size;
-
-protected:
-  cl_mem y_cl, u_cl, v_cl;
-  Transform transform;
-  cl_command_queue q;
-  std::unique_ptr<uint8_t[]> input_frames;
-
-  void init_transform(cl_device_id device_id, cl_context context, int model_width, int model_height) {
-    y_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, model_width * model_height, NULL, &err));
-    u_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, (model_width / 2) * (model_height / 2), NULL, &err));
-    v_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, (model_width / 2) * (model_height / 2), NULL, &err));
-    transform_init(&transform, context, device_id);
-  }
-
-  void deinit_transform() {
-    transform_destroy(&transform);
-    CL_CHECK(clReleaseMemObject(v_cl));
-    CL_CHECK(clReleaseMemObject(u_cl));
-    CL_CHECK(clReleaseMemObject(y_cl));
-  }
-
-  void run_transform(cl_mem yuv_cl, int model_width, int model_height, int frame_width, int frame_height, int frame_stride, int frame_uv_offset, const mat3& projection) {
-    transform_queue(&transform, q,
-        yuv_cl, frame_width, frame_height, frame_stride, frame_uv_offset,
-        y_cl, u_cl, v_cl, model_width, model_height, projection);
-  }
-};
-
-class DrivingModelFrame : public ModelFrame {
-public:
-  DrivingModelFrame(cl_device_id device_id, cl_context context);
-  ~DrivingModelFrame();
-  uint8_t* prepare(cl_mem yuv_cl, int frame_width, int frame_height, int frame_stride, int frame_uv_offset, const mat3& projection, cl_mem* output);
+  ModelFrame(cl_device_id device_id, cl_context context);
+  ~ModelFrame();
+  uint8_t* prepare(cl_mem yuv_cl, int width, int height, int frame_stride, int frame_uv_offset, const mat3& transform, cl_mem *output);

  const int MODEL_WIDTH = 512;
  const int MODEL_HEIGHT = 256;
  const int MODEL_FRAME_SIZE = MODEL_WIDTH * MODEL_HEIGHT * 3 / 2;
  const int buf_size = MODEL_FRAME_SIZE * 2;
-  const size_t frame_size_bytes = MODEL_FRAME_SIZE * sizeof(uint8_t);

 private:
+  Transform transform;
  LoadYUVState loadyuv;
-  cl_mem img_buffer_20hz_cl, last_img_cl;//, input_frames_cl;
-  cl_buffer_region region;
-};
-
-class MonitoringModelFrame : public ModelFrame {
-public:
-  MonitoringModelFrame(cl_device_id device_id, cl_context context);
-  ~MonitoringModelFrame();
-  uint8_t* prepare(cl_mem yuv_cl, int frame_width, int frame_height, int frame_stride, int frame_uv_offset, const mat3& projection, cl_mem* output);
-
-  const int MODEL_WIDTH = 1440;
-  const int MODEL_HEIGHT = 960;
-  const int MODEL_FRAME_SIZE = MODEL_WIDTH * MODEL_HEIGHT;
-  const int buf_size = MODEL_FRAME_SIZE;
-
-private:
-  // cl_mem input_frame_cl;
-};
+  cl_command_queue q;
+  cl_mem y_cl, u_cl, v_cl, net_input_cl;
+  std::unique_ptr<uint8_t[]> input_frames;
+};
@@ -11,16 +11,8 @@ cdef extern from "common/clutil.h":
  cl_device_id cl_get_device_id(unsigned long)
  cl_context cl_create_context(cl_device_id)

-cdef extern from "sunnypilot/modeld/models/commonmodel.h":
+cdef extern from "selfdrive/modeld/models/commonmodel.h":
  cppclass ModelFrame:
    int buf_size
-    # unsigned char * buffer_from_cl(cl_mem*, int);
-    unsigned char * prepare(cl_mem, int, int, int, int, mat3, cl_mem*)
-
-  cppclass DrivingModelFrame:
-    int buf_size
-    DrivingModelFrame(cl_device_id, cl_context)
-
-  cppclass MonitoringModelFrame:
-    int buf_size
-    MonitoringModelFrame(cl_device_id, cl_context)
+    ModelFrame(cl_device_id, cl_context)
+    unsigned char * prepare(cl_mem, int, int, int, int, mat3, cl_mem*)
@@ -4,12 +4,11 @@
 import numpy as np
 cimport numpy as cnp
 from libc.string cimport memcpy
-from libc.stdint cimport uintptr_t

 from msgq.visionipc.visionipc cimport cl_mem
 from msgq.visionipc.visionipc_pyx cimport VisionBuf, CLContext as BaseCLContext
-from sunnypilot.modeld.models.commonmodel cimport CL_DEVICE_TYPE_DEFAULT, cl_get_device_id, cl_create_context
-from sunnypilot.modeld.models.commonmodel cimport mat3, ModelFrame as cppModelFrame, DrivingModelFrame as cppDrivingModelFrame, MonitoringModelFrame as cppMonitoringModelFrame
+from .commonmodel cimport CL_DEVICE_TYPE_DEFAULT, cl_get_device_id, cl_create_context
+from .commonmodel cimport mat3, ModelFrame as cppModelFrame


 cdef class CLContext(BaseCLContext):
@@ -24,17 +23,11 @@ cdef class CLMem:
    mem.mem = <cl_mem*> cmem
    return mem

-  @property
-  def mem_address(self):
-    return <uintptr_t>(self.mem)
-
-def cl_from_visionbuf(VisionBuf buf):
-  return CLMem.create(<void*>&buf.buf.buf_cl)
-
-
 cdef class ModelFrame:
  cdef cppModelFrame * frame
-  cdef int buf_size
+
+  def __cinit__(self, CLContext context):
+    self.frame = new cppModelFrame(context.device_id, context.context)

  def __dealloc__(self):
    del self.frame
@@ -49,28 +42,4 @@ cdef class ModelFrame:
      data = self.frame.prepare(buf.buf.buf_cl, buf.width, buf.height, buf.stride, buf.uv_offset, cprojection, output.mem)
    if not data:
      return None
-
-    return np.asarray(<cnp.uint8_t[:self.buf_size]> data)
-    # return CLMem.create(data)
-
-  # def buffer_from_cl(self, CLMem in_frames):
-  #   cdef unsigned char * data2
-  #   data2 = self.frame.buffer_from_cl(in_frames.mem, self.buf_size)
-  #   return np.asarray(<cnp.uint8_t[:self.buf_size]> data2)
-
-
-cdef class DrivingModelFrame(ModelFrame):
-  cdef cppDrivingModelFrame * _frame
-
-  def __cinit__(self, CLContext context):
-    self._frame = new cppDrivingModelFrame(context.device_id, context.context)
-    self.frame = <cppModelFrame*>(self._frame)
-    self.buf_size = self._frame.buf_size
-
-cdef class MonitoringModelFrame(ModelFrame):
-  cdef cppMonitoringModelFrame * _frame
-
-  def __cinit__(self, CLContext context):
-    self._frame = new cppMonitoringModelFrame(context.device_id, context.context)
-    self.frame = <cppModelFrame*>(self._frame)
-    self.buf_size = self._frame.buf_size
+    return np.asarray(<cnp.uint8_t[:self.frame.buf_size]> data)
@@ -84,7 +84,8 @@ class Parser:
    outs[name] = pred_mu_final.reshape(final_shape)
    outs[name + '_stds'] = pred_std_final.reshape(final_shape)

-  def parse_outputs(self, outs: dict[str, np.ndarray]) -> dict[str, np.ndarray]:
+  def parse_outputs(self, outs: dict[str, np.ndarray], input_keys: [str]) -> dict[str, np.ndarray]:
+    """ Parse the model outputs into a dictionary of numpy arrays. The input_keys are used to determine how the output should be parsed. """
    self.parse_mdn('plan', outs, in_N=ModelConstants.PLAN_MHP_N, out_N=ModelConstants.PLAN_MHP_SELECTION,
                   out_shape=(ModelConstants.IDX_N,ModelConstants.PLAN_WIDTH))
    self.parse_mdn('lane_lines', outs, in_N=0, out_N=0, out_shape=(ModelConstants.NUM_LANE_LINES,ModelConstants.IDX_N,ModelConstants.LANE_LINES_WIDTH))
@@ -96,6 +97,8 @@ class Parser:
                   out_shape=(ModelConstants.LEAD_TRAJ_LEN,ModelConstants.LEAD_WIDTH))
    if 'lat_planner_solution' in outs:
      self.parse_mdn('lat_planner_solution', outs, in_N=0, out_N=0, out_shape=(ModelConstants.IDX_N,ModelConstants.LAT_PLANNER_SOLUTION_WIDTH))
+    if 'desired_curvature' in outs and "prev_desired_curv" in input_keys:
+      self.parse_mdn('desired_curvature', outs, in_N=0, out_N=0, out_shape=(ModelConstants.DESIRED_CURV_WIDTH,))
    for k in ['lead_prob', 'lane_lines_prob', 'meta']:
      self.parse_binary_crossentropy(k, outs)
    self.parse_categorical_crossentropy('desire_state', outs, out_shape=(ModelConstants.DESIRE_PRED_WIDTH,))
@@ -40,6 +40,7 @@ class ONNXModel(RunModel):
  def __init__(self, path, output, runtime, use_tf8, cl_context):
    self.inputs = {}
    self.output = output
+    self.use_tf8 = use_tf8

    self.session = create_ort_session(path, fp16_to_fp32=True)
    self.input_names = [x.name for x in self.session.get_inputs()]
@@ -63,7 +64,11 @@ class ONNXModel(RunModel):
    return None

  def execute(self):
-    inputs = {k: v.view(self.input_dtypes[k]) for k,v in self.inputs.items()}
+    # TODO-SP: The input below causes issues because its converting the input data when in reality it doesn't need conversion as it was already the target type.
+    #    I am leaving this comment and the input down because this needs to be looked before merging. I had similar issues when trying the tinygrad runner...
+    #    Also I checked to see if I found a similar change like this on thneed but I didn't find any, so probably thneed is still working fine.
+    # inputs = {k: v.view(self.input_dtypes[k]) for k,v in self.inputs.items()}
+    inputs = {k: (v.view(np.uint8) / 255. if self.use_tf8 and k == 'input_img' else v) for k,v in self.inputs.items()}
    inputs = {k: v.reshape(self.input_shapes[k]).astype(self.input_dtypes[k]) for k,v in inputs.items()}
    outputs = self.session.run(None, inputs)
    assert len(outputs) == 1, "Only single model outputs are supported"
@@ -0,0 +1,57 @@
+# Copyright (c) 2021-, Haibin Wen, sunnypilot, and a number of other contributors.
+#
+# This file is part of sunnypilot and is licensed under the MIT License.
+# See the LICENSE.md file in the root directory for more details.
+
+import os
+import pickle
+import numpy as np
+from pathlib import Path
+from cereal import custom
+from openpilot.sunnypilot.modeld.runners import ModelRunner
+from openpilot.sunnypilot.models.helpers import get_active_bundle
+from openpilot.system.hardware import PC
+from openpilot.system.hardware.hw import Paths
+
+USE_ONNX = os.getenv('USE_ONNX', PC)
+
+CUSTOM_MODEL_PATH = Paths.model_root()
+METADATA_PATH = Path(__file__).parent / '../models/supercombo_metadata.pkl'
+
+ModelManager = custom.ModelManagerSP
+
+
+def load_model():
+  if USE_ONNX:
+    model_paths = {ModelRunner.ONNX: Path(__file__).parent / '../models/supercombo.onnx'}
+  elif bundle := get_active_bundle():
+    drive_model = next(model for model in bundle.models if model.type == ModelManager.Type.drive)
+    model_paths = {ModelRunner.THNEED: f"{CUSTOM_MODEL_PATH}/{drive_model.fileName}"}
+  else:
+    model_paths = {ModelRunner.THNEED: Path(__file__).parent / '../models/supercombo.thneed'}
+
+  return model_paths
+
+
+def load_metadata():
+  if bundle := get_active_bundle():
+    metadata_model = next(model for model in bundle.models if model.type == ModelManager.Type.metadata)
+    metadata_path = f"{CUSTOM_MODEL_PATH}/{metadata_model.fileName}"
+  else:
+    metadata_path = METADATA_PATH
+
+  with open(metadata_path, 'rb') as f:
+    metadata = pickle.load(f)
+
+  return metadata
+
+
+def prepare_inputs(model_metadata) -> dict[str, np.ndarray]:
+  # img buffers are managed in openCL transform code so we don't pass them as inputs
+  inputs: dict[str, np.ndarray] = {
+    key: np.zeros(shape, dtype=np.float32).flatten()  # Inputs were defined flattened back then
+    for key, shape in model_metadata['input_shapes'].items()
+    if key not in ['input_imgs', 'big_input_imgs']
+  }
+
+  return inputs
@@ -22,7 +22,7 @@ async def verify_file(file_path: str, expected_hash: str) -> bool:
  return sha256_hash.hexdigest().lower() == expected_hash.lower()


-def get_active_bundle(params: Params) -> custom.ModelManagerSP.ModelBundle:
+def get_active_bundle(params: Params = None) -> custom.ModelManagerSP.ModelBundle:
  """Gets the active model bundle from cache"""
  if params is None:
    params = Params()
Author	SHA1	Message	Date
DevTekVE	a6ff513954	Going back to pre-uint8 to test our luck	2025-01-06 14:11:50 +01:00
DevTekVE	f46f54582c	Update input handling logic in modeld.py Refined how input keys are processed to ensure proper assignment, excluding 'desire'. This improves flexibility and correctness when managing input buffers in the model pipeline.	2025-01-06 13:07:05 +01:00
DevTekVE	bc33bea185	This was actually quite revealing! This also has correlation with behaviors I saw when porting to tinygrad!	2025-01-06 12:50:03 +01:00
DevTekVE	6be69e5a47	Cleanup	2025-01-06 12:25:23 +01:00
DevTekVE	7d361df254	Cleanup	2025-01-06 11:47:11 +01:00
DevTekVE	0339f103d1	Removing this method as it wouldn't really be scalable enough with the amount of inputs we need to define for now and the amount of dependencies they might have	2025-01-06 11:40:08 +01:00
DevTekVE	9eaa57a645	Refactor input handling for model metadata and additional params Modify `model_metadata` usage for clarity and consistency, aligning variable names across the code. Add support for dynamic input handling with keys like `lateral_control_params`, `driving_style`, and navigation-related inputs. Clean up commented legacy code for better maintainability.	2025-01-06 11:39:19 +01:00
DevTekVE	409fa050a0	Refactor input handling and correct shape retrieval. Flatten input arrays in `prepare_inputs` for consistency with legacy definitions. Update `desired_shape` to use metadata for improved reliability and maintainability.	2025-01-06 11:18:27 +01:00
DevTekVE	b0c959f162	Fix the metadata path	2025-01-06 11:02:07 +01:00
DevTekVE	c3ac2b0540	Refactor `prepare_inputs` to accept `model_metadata` as a parameter. This change improves the flexibility of the `prepare_inputs` function by passing `model_metadata` as an argument instead of relying on an internal call. Additionally, fixed import paths for `sunnypilot` modules to ensure consistency and alignment.	2025-01-06 11:02:06 +01:00
DevTekVE	bd3117f5d1	Refactor model input shaping and indexing logic. Introduce dynamic reshaping for 'desire' inputs and improve indexing for 'features_buffer' to enhance maintainability and clarity. These changes reduce hardcoding and make the code more adaptable to varying input dimensions.	2025-01-06 10:42:52 +01:00
DevTekVE	2be0e84e9f	Refactor model and metadata path handling logic Simplified the `USE_ONNX` initialization and adjusted model paths for better consistency and clarity. Fixed variable naming in `load_metadata` to improve readability. These changes enhance code maintainability and correctness.	2025-01-06 10:38:03 +01:00
Jason Wen	5809ab3baa	load model and metadata dynamically	2025-01-06 00:46:45 -05:00
Jason Wen	570789a179	parse inputs via metadata	2025-01-06 00:09:52 -05:00