mirror of
https://github.com/firestar5683/StarPilot.git
synced 2026-06-30 02:52:04 +08:00
MLSIM trained model V0 (#33676)
* squash f7343ba0-3f7d-4776-9819-2219b8d2d667/270 * fix broken ref * Model replay ref commit
This commit is contained in:
@@ -15,7 +15,8 @@ class ModelConstants:
|
||||
# model inputs constants
|
||||
MODEL_FREQ = 20
|
||||
FEATURE_LEN = 512
|
||||
HISTORY_BUFFER_LEN = 99
|
||||
FULL_HISTORY_BUFFER_LEN = 99
|
||||
HISTORY_BUFFER_LEN = 24
|
||||
DESIRE_LEN = 8
|
||||
TRAFFIC_CONVENTION_LEN = 2
|
||||
LAT_PLANNER_STATE_LEN = 4
|
||||
|
||||
@@ -34,6 +34,7 @@ MODEL_PATHS = {
|
||||
|
||||
METADATA_PATH = Path(__file__).parent / 'models/supercombo_metadata.pkl'
|
||||
|
||||
|
||||
class FrameMeta:
|
||||
frame_id: int = 0
|
||||
timestamp_sof: int = 0
|
||||
@@ -55,6 +56,11 @@ class ModelState:
|
||||
self.frame = ModelFrame(context)
|
||||
self.wide_frame = ModelFrame(context)
|
||||
self.prev_desire = np.zeros(ModelConstants.DESIRE_LEN, dtype=np.float32)
|
||||
self.full_features_20Hz = np.zeros((ModelConstants.FULL_HISTORY_BUFFER_LEN, ModelConstants.FEATURE_LEN), dtype=np.float32)
|
||||
self.desire_20Hz = np.zeros((ModelConstants.FULL_HISTORY_BUFFER_LEN + 1, ModelConstants.DESIRE_LEN), dtype=np.float32)
|
||||
self.prev_desired_curv_20hz = np.zeros((ModelConstants.FULL_HISTORY_BUFFER_LEN + 1, ModelConstants.PREV_DESIRED_CURV_LEN), dtype=np.float32)
|
||||
|
||||
# img buffers are managed in openCL transform code
|
||||
self.inputs = {
|
||||
'desire': np.zeros(ModelConstants.DESIRE_LEN * (ModelConstants.HISTORY_BUFFER_LEN+1), dtype=np.float32),
|
||||
'traffic_convention': np.zeros(ModelConstants.TRAFFIC_CONVENTION_LEN, dtype=np.float32),
|
||||
@@ -87,14 +93,16 @@ class ModelState:
|
||||
inputs: dict[str, np.ndarray], prepare_only: bool) -> dict[str, np.ndarray] | None:
|
||||
# Model decides when action is completed, so desire input is just a pulse triggered on rising edge
|
||||
inputs['desire'][0] = 0
|
||||
self.inputs['desire'][:-ModelConstants.DESIRE_LEN] = self.inputs['desire'][ModelConstants.DESIRE_LEN:]
|
||||
self.inputs['desire'][-ModelConstants.DESIRE_LEN:] = np.where(inputs['desire'] - self.prev_desire > .99, inputs['desire'], 0)
|
||||
new_desire = np.where(inputs['desire'] - self.prev_desire > .99, inputs['desire'], 0)
|
||||
self.prev_desire[:] = inputs['desire']
|
||||
|
||||
self.desire_20Hz[:-1] = self.desire_20Hz[1:]
|
||||
self.desire_20Hz[-1] = new_desire
|
||||
self.inputs['desire'][:] = self.desire_20Hz.reshape((25,4,-1)).max(axis=1).flatten()
|
||||
|
||||
self.inputs['traffic_convention'][:] = inputs['traffic_convention']
|
||||
self.inputs['lateral_control_params'][:] = inputs['lateral_control_params']
|
||||
|
||||
# if getCLBuffer is not None, frame will be None
|
||||
self.model.setInputBuffer("input_imgs", self.frame.prepare(buf, transform.flatten(), self.model.getCLBuffer("input_imgs")))
|
||||
self.model.setInputBuffer("big_input_imgs", self.wide_frame.prepare(wbuf, transform_wide.flatten(), self.model.getCLBuffer("big_input_imgs")))
|
||||
|
||||
@@ -104,10 +112,16 @@ class ModelState:
|
||||
self.model.execute()
|
||||
outputs = self.parser.parse_outputs(self.slice_outputs(self.output))
|
||||
|
||||
self.inputs['features_buffer'][:-ModelConstants.FEATURE_LEN] = self.inputs['features_buffer'][ModelConstants.FEATURE_LEN:]
|
||||
self.inputs['features_buffer'][-ModelConstants.FEATURE_LEN:] = outputs['hidden_state'][0, :]
|
||||
self.inputs['prev_desired_curv'][:-ModelConstants.PREV_DESIRED_CURV_LEN] = self.inputs['prev_desired_curv'][ModelConstants.PREV_DESIRED_CURV_LEN:]
|
||||
self.inputs['prev_desired_curv'][-ModelConstants.PREV_DESIRED_CURV_LEN:] = outputs['desired_curvature'][0, :]
|
||||
self.full_features_20Hz[:-1] = self.full_features_20Hz[1:]
|
||||
self.full_features_20Hz[-1] = outputs['hidden_state'][0, :]
|
||||
|
||||
self.prev_desired_curv_20hz[:-1] = self.prev_desired_curv_20hz[1:]
|
||||
self.prev_desired_curv_20hz[-1] = outputs['desired_curvature'][0, :]
|
||||
|
||||
idxs = np.arange(-4,-100,-4)[::-1]
|
||||
self.inputs['features_buffer'][:] = self.full_features_20Hz[idxs].flatten()
|
||||
# TODO model only uses last value now, once that changes we need to input strided action history buffer
|
||||
self.inputs['prev_desired_curv'][-ModelConstants.PREV_DESIRED_CURV_LEN:] = 0. * self.prev_desired_curv_20hz[-4, :]
|
||||
return outputs
|
||||
|
||||
|
||||
@@ -173,7 +187,6 @@ def main(demo=False):
|
||||
CP = convert_to_capnp(get_demo_car_params())
|
||||
else:
|
||||
CP = messaging.log_from_bytes(params.get("CarParams", block=True), car.CarParams)
|
||||
|
||||
cloudlog.info("modeld got CarParams: %s", CP.carName)
|
||||
|
||||
# TODO this needs more thought, use .2s extra for now to estimate other delays
|
||||
|
||||
@@ -13,7 +13,10 @@ ModelFrame::ModelFrame(cl_device_id device_id, cl_context context) {
|
||||
y_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, MODEL_WIDTH * MODEL_HEIGHT, NULL, &err));
|
||||
u_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, (MODEL_WIDTH / 2) * (MODEL_HEIGHT / 2), NULL, &err));
|
||||
v_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, (MODEL_WIDTH / 2) * (MODEL_HEIGHT / 2), NULL, &err));
|
||||
net_input_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, frame_size_bytes, NULL, &err));
|
||||
img_buffer_20hz_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, 5*frame_size_bytes, NULL, &err));
|
||||
region.origin = 4 * frame_size_bytes;
|
||||
region.size = frame_size_bytes;
|
||||
last_img_cl = CL_CHECK_ERR(clCreateSubBuffer(img_buffer_20hz_cl, CL_MEM_READ_WRITE, CL_BUFFER_CREATE_TYPE_REGION, ®ion, &err));
|
||||
|
||||
transform_init(&transform, context, device_id);
|
||||
loadyuv_init(&loadyuv, context, device_id, MODEL_WIDTH, MODEL_HEIGHT);
|
||||
@@ -24,15 +27,18 @@ uint8_t* ModelFrame::prepare(cl_mem yuv_cl, int frame_width, int frame_height, i
|
||||
yuv_cl, frame_width, frame_height, frame_stride, frame_uv_offset,
|
||||
y_cl, u_cl, v_cl, MODEL_WIDTH, MODEL_HEIGHT, projection);
|
||||
|
||||
loadyuv_queue(&loadyuv, q, y_cl, u_cl, v_cl, net_input_cl);
|
||||
for (int i = 0; i < 4; i++) {
|
||||
CL_CHECK(clEnqueueCopyBuffer(q, img_buffer_20hz_cl, img_buffer_20hz_cl, (i+1)*frame_size_bytes, i*frame_size_bytes, frame_size_bytes, 0, nullptr, nullptr));
|
||||
}
|
||||
loadyuv_queue(&loadyuv, q, y_cl, u_cl, v_cl, last_img_cl);
|
||||
if (output == NULL) {
|
||||
std::memmove(&input_frames[0], &input_frames[MODEL_FRAME_SIZE], frame_size_bytes);
|
||||
CL_CHECK(clEnqueueReadBuffer(q, net_input_cl, CL_TRUE, 0, frame_size_bytes, &input_frames[MODEL_FRAME_SIZE], 0, nullptr, nullptr));
|
||||
CL_CHECK(clEnqueueReadBuffer(q, img_buffer_20hz_cl, CL_TRUE, 0, frame_size_bytes, &input_frames[0], 0, nullptr, nullptr));
|
||||
CL_CHECK(clEnqueueReadBuffer(q, last_img_cl, CL_TRUE, 0, frame_size_bytes, &input_frames[MODEL_FRAME_SIZE], 0, nullptr, nullptr));
|
||||
clFinish(q);
|
||||
return &input_frames[0];
|
||||
} else {
|
||||
copy_queue(&loadyuv, q, *output, *output, frame_size_bytes, 0, frame_size_bytes);
|
||||
copy_queue(&loadyuv, q, net_input_cl, *output, 0, frame_size_bytes, frame_size_bytes);
|
||||
copy_queue(&loadyuv, q, img_buffer_20hz_cl, *output, 0, 0, frame_size_bytes);
|
||||
copy_queue(&loadyuv, q, last_img_cl, *output, 0, frame_size_bytes, frame_size_bytes);
|
||||
|
||||
// NOTE: Since thneed is using a different command queue, this clFinish is needed to ensure the image is ready.
|
||||
clFinish(q);
|
||||
@@ -43,7 +49,8 @@ uint8_t* ModelFrame::prepare(cl_mem yuv_cl, int frame_width, int frame_height, i
|
||||
ModelFrame::~ModelFrame() {
|
||||
transform_destroy(&transform);
|
||||
loadyuv_destroy(&loadyuv);
|
||||
CL_CHECK(clReleaseMemObject(net_input_cl));
|
||||
CL_CHECK(clReleaseMemObject(img_buffer_20hz_cl));
|
||||
CL_CHECK(clReleaseMemObject(last_img_cl));
|
||||
CL_CHECK(clReleaseMemObject(v_cl));
|
||||
CL_CHECK(clReleaseMemObject(u_cl));
|
||||
CL_CHECK(clReleaseMemObject(y_cl));
|
||||
|
||||
@@ -32,6 +32,7 @@ private:
|
||||
Transform transform;
|
||||
LoadYUVState loadyuv;
|
||||
cl_command_queue q;
|
||||
cl_mem y_cl, u_cl, v_cl, net_input_cl;
|
||||
cl_mem y_cl, u_cl, v_cl, img_buffer_20hz_cl, last_img_cl;
|
||||
cl_buffer_region region;
|
||||
std::unique_ptr<uint8_t[]> input_frames;
|
||||
};
|
||||
};
|
||||
@@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:21c88ca8a3de59fb11dc3e5888476f6bb627f3647eb0d199680d598e8bf31c0c
|
||||
size 62486469
|
||||
oid sha256:dd55319c8e3e9ac120f2b1bb1131cb67018669dfc0f7ebd31c27cc6de3e9f959
|
||||
size 50309976
|
||||
|
||||
@@ -1 +1 @@
|
||||
56743d36006d4312544ace7f53491727aa7ac302
|
||||
05b1cb87e32f280e46e0f45bbd6d76d5fd3f57a7
|
||||
|
||||
Reference in New Issue
Block a user