MLSIM trained model V0 (#33676)

* squash f7343ba0-3f7d-4776-9819-2219b8d2d667/270 * fix broken ref * Model replay ref commit
2026-06-30 02:52:04 +08:00 · 2024-10-01 02:58:20 -07:00
parent 6c227a66a2
commit e442425c9d
6 changed files with 43 additions and 21 deletions
@@ -15,7 +15,8 @@ class ModelConstants:
  # model inputs constants
  MODEL_FREQ = 20
  FEATURE_LEN = 512
-  HISTORY_BUFFER_LEN = 99
+  FULL_HISTORY_BUFFER_LEN = 99
+  HISTORY_BUFFER_LEN = 24
  DESIRE_LEN = 8
  TRAFFIC_CONVENTION_LEN = 2
  LAT_PLANNER_STATE_LEN = 4
@@ -34,6 +34,7 @@ MODEL_PATHS = {

 METADATA_PATH = Path(__file__).parent / 'models/supercombo_metadata.pkl'

+
 class FrameMeta:
  frame_id: int = 0
  timestamp_sof: int = 0
@@ -55,6 +56,11 @@ class ModelState:
    self.frame = ModelFrame(context)
    self.wide_frame = ModelFrame(context)
    self.prev_desire = np.zeros(ModelConstants.DESIRE_LEN, dtype=np.float32)
+    self.full_features_20Hz = np.zeros((ModelConstants.FULL_HISTORY_BUFFER_LEN, ModelConstants.FEATURE_LEN), dtype=np.float32)
+    self.desire_20Hz =  np.zeros((ModelConstants.FULL_HISTORY_BUFFER_LEN + 1, ModelConstants.DESIRE_LEN), dtype=np.float32)
+    self.prev_desired_curv_20hz = np.zeros((ModelConstants.FULL_HISTORY_BUFFER_LEN + 1, ModelConstants.PREV_DESIRED_CURV_LEN), dtype=np.float32)
+
+    # img buffers are managed in openCL transform code
    self.inputs = {
      'desire': np.zeros(ModelConstants.DESIRE_LEN * (ModelConstants.HISTORY_BUFFER_LEN+1), dtype=np.float32),
      'traffic_convention': np.zeros(ModelConstants.TRAFFIC_CONVENTION_LEN, dtype=np.float32),
@@ -87,14 +93,16 @@ class ModelState:
                inputs: dict[str, np.ndarray], prepare_only: bool) -> dict[str, np.ndarray] | None:
    # Model decides when action is completed, so desire input is just a pulse triggered on rising edge
    inputs['desire'][0] = 0
-    self.inputs['desire'][:-ModelConstants.DESIRE_LEN] = self.inputs['desire'][ModelConstants.DESIRE_LEN:]
-    self.inputs['desire'][-ModelConstants.DESIRE_LEN:] = np.where(inputs['desire'] - self.prev_desire > .99, inputs['desire'], 0)
+    new_desire = np.where(inputs['desire'] - self.prev_desire > .99, inputs['desire'], 0)
    self.prev_desire[:] = inputs['desire']

+    self.desire_20Hz[:-1] = self.desire_20Hz[1:]
+    self.desire_20Hz[-1] = new_desire
+    self.inputs['desire'][:] = self.desire_20Hz.reshape((25,4,-1)).max(axis=1).flatten()
+
    self.inputs['traffic_convention'][:] = inputs['traffic_convention']
    self.inputs['lateral_control_params'][:] = inputs['lateral_control_params']

-    # if getCLBuffer is not None, frame will be None
    self.model.setInputBuffer("input_imgs", self.frame.prepare(buf, transform.flatten(), self.model.getCLBuffer("input_imgs")))
    self.model.setInputBuffer("big_input_imgs", self.wide_frame.prepare(wbuf, transform_wide.flatten(), self.model.getCLBuffer("big_input_imgs")))

@@ -104,10 +112,16 @@ class ModelState:
    self.model.execute()
    outputs = self.parser.parse_outputs(self.slice_outputs(self.output))

-    self.inputs['features_buffer'][:-ModelConstants.FEATURE_LEN] = self.inputs['features_buffer'][ModelConstants.FEATURE_LEN:]
-    self.inputs['features_buffer'][-ModelConstants.FEATURE_LEN:] = outputs['hidden_state'][0, :]
-    self.inputs['prev_desired_curv'][:-ModelConstants.PREV_DESIRED_CURV_LEN] = self.inputs['prev_desired_curv'][ModelConstants.PREV_DESIRED_CURV_LEN:]
-    self.inputs['prev_desired_curv'][-ModelConstants.PREV_DESIRED_CURV_LEN:] = outputs['desired_curvature'][0, :]
+    self.full_features_20Hz[:-1] = self.full_features_20Hz[1:]
+    self.full_features_20Hz[-1] = outputs['hidden_state'][0, :]
+
+    self.prev_desired_curv_20hz[:-1] = self.prev_desired_curv_20hz[1:]
+    self.prev_desired_curv_20hz[-1] = outputs['desired_curvature'][0, :]
+
+    idxs = np.arange(-4,-100,-4)[::-1]
+    self.inputs['features_buffer'][:] = self.full_features_20Hz[idxs].flatten()
+    # TODO model only uses last value now, once that changes we need to input strided action history buffer
+    self.inputs['prev_desired_curv'][-ModelConstants.PREV_DESIRED_CURV_LEN:] = 0. * self.prev_desired_curv_20hz[-4, :]
    return outputs


@@ -173,7 +187,6 @@ def main(demo=False):
    CP = convert_to_capnp(get_demo_car_params())
  else:
    CP = messaging.log_from_bytes(params.get("CarParams", block=True), car.CarParams)
-
  cloudlog.info("modeld got CarParams: %s", CP.carName)

  # TODO this needs more thought, use .2s extra for now to estimate other delays
@@ -13,7 +13,10 @@ ModelFrame::ModelFrame(cl_device_id device_id, cl_context context) {
  y_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, MODEL_WIDTH * MODEL_HEIGHT, NULL, &err));
  u_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, (MODEL_WIDTH / 2) * (MODEL_HEIGHT / 2), NULL, &err));
  v_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, (MODEL_WIDTH / 2) * (MODEL_HEIGHT / 2), NULL, &err));
-  net_input_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, frame_size_bytes, NULL, &err));
+  img_buffer_20hz_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, 5*frame_size_bytes, NULL, &err));
+  region.origin = 4 * frame_size_bytes;
+  region.size = frame_size_bytes;
+  last_img_cl = CL_CHECK_ERR(clCreateSubBuffer(img_buffer_20hz_cl, CL_MEM_READ_WRITE, CL_BUFFER_CREATE_TYPE_REGION, &region, &err));

  transform_init(&transform, context, device_id);
  loadyuv_init(&loadyuv, context, device_id, MODEL_WIDTH, MODEL_HEIGHT);
@@ -24,15 +27,18 @@ uint8_t* ModelFrame::prepare(cl_mem yuv_cl, int frame_width, int frame_height, i
                yuv_cl, frame_width, frame_height, frame_stride, frame_uv_offset,
                y_cl, u_cl, v_cl, MODEL_WIDTH, MODEL_HEIGHT, projection);

-  loadyuv_queue(&loadyuv, q, y_cl, u_cl, v_cl, net_input_cl);
+  for (int i = 0; i < 4; i++) {
+    CL_CHECK(clEnqueueCopyBuffer(q, img_buffer_20hz_cl, img_buffer_20hz_cl, (i+1)*frame_size_bytes, i*frame_size_bytes, frame_size_bytes, 0, nullptr, nullptr));
+  }
+  loadyuv_queue(&loadyuv, q, y_cl, u_cl, v_cl, last_img_cl);
  if (output == NULL) {
-    std::memmove(&input_frames[0], &input_frames[MODEL_FRAME_SIZE], frame_size_bytes);
-    CL_CHECK(clEnqueueReadBuffer(q, net_input_cl, CL_TRUE, 0, frame_size_bytes, &input_frames[MODEL_FRAME_SIZE], 0, nullptr, nullptr));
+    CL_CHECK(clEnqueueReadBuffer(q, img_buffer_20hz_cl, CL_TRUE, 0, frame_size_bytes, &input_frames[0], 0, nullptr, nullptr));
+    CL_CHECK(clEnqueueReadBuffer(q, last_img_cl, CL_TRUE, 0, frame_size_bytes, &input_frames[MODEL_FRAME_SIZE], 0, nullptr, nullptr));
    clFinish(q);
    return &input_frames[0];
  } else {
-    copy_queue(&loadyuv, q, *output, *output, frame_size_bytes, 0, frame_size_bytes);
-    copy_queue(&loadyuv, q, net_input_cl, *output, 0, frame_size_bytes, frame_size_bytes);
+    copy_queue(&loadyuv, q, img_buffer_20hz_cl, *output, 0, 0, frame_size_bytes);
+    copy_queue(&loadyuv, q, last_img_cl, *output, 0, frame_size_bytes, frame_size_bytes);

    // NOTE: Since thneed is using a different command queue, this clFinish is needed to ensure the image is ready.
    clFinish(q);
@@ -43,7 +49,8 @@ uint8_t* ModelFrame::prepare(cl_mem yuv_cl, int frame_width, int frame_height, i
 ModelFrame::~ModelFrame() {
  transform_destroy(&transform);
  loadyuv_destroy(&loadyuv);
-  CL_CHECK(clReleaseMemObject(net_input_cl));
+  CL_CHECK(clReleaseMemObject(img_buffer_20hz_cl));
+  CL_CHECK(clReleaseMemObject(last_img_cl));
  CL_CHECK(clReleaseMemObject(v_cl));
  CL_CHECK(clReleaseMemObject(u_cl));
  CL_CHECK(clReleaseMemObject(y_cl));
@@ -32,6 +32,7 @@ private:
  Transform transform;
  LoadYUVState loadyuv;
  cl_command_queue q;
-  cl_mem y_cl, u_cl, v_cl, net_input_cl;
+  cl_mem y_cl, u_cl, v_cl, img_buffer_20hz_cl, last_img_cl;
+  cl_buffer_region region;
  std::unique_ptr<uint8_t[]> input_frames;
-};
+};
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:21c88ca8a3de59fb11dc3e5888476f6bb627f3647eb0d199680d598e8bf31c0c
-size 62486469
+oid sha256:dd55319c8e3e9ac120f2b1bb1131cb67018669dfc0f7ebd31c27cc6de3e9f959
+size 50309976
@@ -1 +1 @@
-56743d36006d4312544ace7f53491727aa7ac302
+05b1cb87e32f280e46e0f45bbd6d76d5fd3f57a7