Compare commits

...

1 Commits

Author SHA1 Message Date
DevTekVE
dc68278be2 Switch model inputs and buffers from uint8_t to float.
Updated data types across the model pipeline, including ONNX model, buffers, and transformations, to utilize `float` instead of `uint8_t`.
2024-12-31 10:07:44 +01:00
7 changed files with 75 additions and 36 deletions

View File

@@ -12,6 +12,8 @@ from msgq.visionipc import VisionIpcClient, VisionStreamType, VisionBuf
from opendbc.car.car_helpers import get_demo_car_params
from openpilot.common.swaglog import cloudlog
from openpilot.common.params import Params
from openpilot.common.realtime import DT_MDL
from openpilot.common.numpy_fast import interp
from openpilot.common.filter_simple import FirstOrderFilter
from openpilot.common.realtime import config_realtime_process
from openpilot.common.transformations.camera import DEVICE_CAMERAS
@@ -59,15 +61,15 @@ class ModelState:
self.desire_20Hz = np.zeros((ModelConstants.FULL_HISTORY_BUFFER_LEN + 1, ModelConstants.DESIRE_LEN), dtype=np.float32)
# img buffers are managed in openCL transform code
self.inputs = {
'desire': np.zeros(ModelConstants.DESIRE_LEN * (ModelConstants.HISTORY_BUFFER_LEN+1), dtype=np.float32),
'traffic_convention': np.zeros(ModelConstants.TRAFFIC_CONVENTION_LEN, dtype=np.float32),
'features_buffer': np.zeros(ModelConstants.HISTORY_BUFFER_LEN * ModelConstants.FEATURE_LEN, dtype=np.float32),
}
self.inputs = {}
with open(METADATA_PATH, 'rb') as f:
model_metadata = pickle.load(f)
for key, shape in model_metadata['input_shapes'].items():
if key not in ["input_imgs", "big_input_imgs"]:
self.inputs[key] = np.zeros(shape, dtype=np.float32).flatten()
self.output_slices = model_metadata['output_slices']
net_output_size = model_metadata['output_shapes']['outputs'][1]
self.output = np.zeros(net_output_size, dtype=np.float32)
@@ -79,6 +81,14 @@ class ModelState:
for k,v in self.inputs.items():
self.model.addInput(k, v)
num_elements = model_metadata['input_shapes']['features_buffer'][1]
step_size = int(-100 / num_elements)
self.full_features_20Hz_idxs = np.arange(step_size, step_size * (num_elements + 1), step_size)[::-1]
desired_shape = int(self.inputs['desire'].shape[0] / self.desire_20Hz.shape[1])
middle_dim = int(self.desire_20Hz.shape[0] / desired_shape)
self.desire_reshape_dims = (desired_shape, middle_dim, -1)
def slice_outputs(self, model_outputs: np.ndarray) -> dict[str, np.ndarray]:
parsed_model_outputs = {k: model_outputs[np.newaxis, v] for k,v in self.output_slices.items()}
if SEND_RAW_PRED:
@@ -94,7 +104,7 @@ class ModelState:
self.desire_20Hz[:-1] = self.desire_20Hz[1:]
self.desire_20Hz[-1] = new_desire
self.inputs['desire'][:] = self.desire_20Hz.reshape((25,4,-1)).max(axis=1).flatten()
self.inputs['desire'][:] = self.desire_20Hz.reshape(self.desire_reshape_dims).max(axis=1).flatten()
self.inputs['traffic_convention'][:] = inputs['traffic_convention']
@@ -110,8 +120,25 @@ class ModelState:
self.full_features_20Hz[:-1] = self.full_features_20Hz[1:]
self.full_features_20Hz[-1] = outputs['hidden_state'][0, :]
idxs = np.arange(-4,-100,-4)[::-1]
self.inputs['features_buffer'][:] = self.full_features_20Hz[idxs].flatten()
# idxs = np.arange(-4,-100,-4)[::-1]
self.inputs['features_buffer'][:] = self.full_features_20Hz[self.full_features_20Hz_idxs].flatten()
if "lat_planner_solution" in outputs:
if "lat_planner_state" in self.inputs.keys():
self.inputs['lat_planner_state'][2] = interp(DT_MDL, ModelConstants.T_IDXS, outputs['lat_planner_solution'][0, :, 2])
self.inputs['lat_planner_state'][3] = interp(DT_MDL, ModelConstants.T_IDXS, outputs['lat_planner_solution'][0, :, 3])
if "desired_curvature" in outputs:
input_name_prev = None
if "prev_desired_curvs" in self.inputs.keys():
input_name_prev = 'prev_desired_curvs'
elif "prev_desired_curv" in self.inputs.keys():
input_name_prev = 'prev_desired_curv'
if input_name_prev is not None:
len = outputs['desired_curvature'][0].size
self.inputs[input_name_prev][:-len] = self.inputs[input_name_prev][len:]
self.inputs[input_name_prev][-len:] = outputs['desired_curvature'][0, :len]
return outputs
@@ -254,6 +281,18 @@ def main(demo=False):
'traffic_convention': traffic_convention,
}
if "lateral_control_params" in model.inputs.keys():
inputs['lateral_control_params'] = np.array([v_ego, steer_delay], dtype=np.float32)
if "driving_style" in model.inputs.keys():
inputs['driving_style'] = np.array([1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0], dtype=np.float32)
if "nav_features" in model.inputs.keys():
inputs['nav_features'] = np.zeros(ModelConstants.NAV_FEATURE_LEN, dtype=np.float32) # Get size from shape
if "nav_instructions" in model.inputs.keys():
inputs['nav_instructions'] = np.zeros(ModelConstants.NAV_INSTRUCTION_LEN, dtype=np.float32) # Get size from shape
mt1 = time.perf_counter()
model_output = model.run(buf_main, buf_extra, model_transform_main, model_transform_extra, inputs, prepare_only)
mt2 = time.perf_counter()

View File

@@ -6,7 +6,7 @@
#include "common/clutil.h"
DrivingModelFrame::DrivingModelFrame(cl_device_id device_id, cl_context context) : ModelFrame(device_id, context) {
input_frames = std::make_unique<uint8_t[]>(buf_size);
input_frames = std::make_unique<float[]>(buf_size);
//input_frames_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, buf_size, NULL, &err));
img_buffer_20hz_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, 5*frame_size_bytes, NULL, &err));
region.origin = 4 * frame_size_bytes;
@@ -17,7 +17,7 @@ DrivingModelFrame::DrivingModelFrame(cl_device_id device_id, cl_context context)
init_transform(device_id, context, MODEL_WIDTH, MODEL_HEIGHT);
}
uint8_t* DrivingModelFrame::prepare(cl_mem yuv_cl, int frame_width, int frame_height, int frame_stride, int frame_uv_offset, const mat3& projection, cl_mem* output) {
float* DrivingModelFrame::prepare(cl_mem yuv_cl, int frame_width, int frame_height, int frame_stride, int frame_uv_offset, const mat3& projection, cl_mem* output) {
run_transform(yuv_cl, MODEL_WIDTH, MODEL_HEIGHT, frame_width, frame_height, frame_stride, frame_uv_offset, projection);
for (int i = 0; i < 4; i++) {
@@ -50,14 +50,14 @@ DrivingModelFrame::~DrivingModelFrame() {
MonitoringModelFrame::MonitoringModelFrame(cl_device_id device_id, cl_context context) : ModelFrame(device_id, context) {
input_frames = std::make_unique<uint8_t[]>(buf_size);
input_frames = std::make_unique<float[]>(buf_size);
//input_frame_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, buf_size, NULL, &err));
init_transform(device_id, context, MODEL_WIDTH, MODEL_HEIGHT);
}
uint8_t* MonitoringModelFrame::prepare(cl_mem yuv_cl, int frame_width, int frame_height, int frame_stride, int frame_uv_offset, const mat3& projection, cl_mem* output) {
float* MonitoringModelFrame::prepare(cl_mem yuv_cl, int frame_width, int frame_height, int frame_stride, int frame_uv_offset, const mat3& projection, cl_mem* output) {
run_transform(yuv_cl, MODEL_WIDTH, MODEL_HEIGHT, frame_width, frame_height, frame_stride, frame_uv_offset, projection);
CL_CHECK(clEnqueueReadBuffer(q, y_cl, CL_TRUE, 0, MODEL_FRAME_SIZE * sizeof(uint8_t), input_frames.get(), 0, nullptr, nullptr));
CL_CHECK(clEnqueueReadBuffer(q, y_cl, CL_TRUE, 0, MODEL_FRAME_SIZE * sizeof(float), input_frames.get(), 0, nullptr, nullptr));
clFinish(q);
//return &y_cl;
return input_frames.get();

View File

@@ -23,7 +23,7 @@ public:
q = CL_CHECK_ERR(clCreateCommandQueue(context, device_id, 0, &err));
}
virtual ~ModelFrame() {}
virtual uint8_t* prepare(cl_mem yuv_cl, int frame_width, int frame_height, int frame_stride, int frame_uv_offset, const mat3& projection, cl_mem* output) { return NULL; }
virtual float* prepare(cl_mem yuv_cl, int frame_width, int frame_height, int frame_stride, int frame_uv_offset, const mat3& projection, cl_mem* output) { return NULL; }
/*
uint8_t* buffer_from_cl(cl_mem *in_frames, int buffer_size) {
CL_CHECK(clEnqueueReadBuffer(q, *in_frames, CL_TRUE, 0, buffer_size, input_frames.get(), 0, nullptr, nullptr));
@@ -41,7 +41,7 @@ protected:
cl_mem y_cl, u_cl, v_cl;
Transform transform;
cl_command_queue q;
std::unique_ptr<uint8_t[]> input_frames;
std::unique_ptr<float[]> input_frames;
void init_transform(cl_device_id device_id, cl_context context, int model_width, int model_height) {
y_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, model_width * model_height, NULL, &err));
@@ -68,13 +68,13 @@ class DrivingModelFrame : public ModelFrame {
public:
DrivingModelFrame(cl_device_id device_id, cl_context context);
~DrivingModelFrame();
uint8_t* prepare(cl_mem yuv_cl, int frame_width, int frame_height, int frame_stride, int frame_uv_offset, const mat3& projection, cl_mem* output);
float* prepare(cl_mem yuv_cl, int frame_width, int frame_height, int frame_stride, int frame_uv_offset, const mat3& projection, cl_mem* output);
const int MODEL_WIDTH = 512;
const int MODEL_HEIGHT = 256;
const int MODEL_FRAME_SIZE = MODEL_WIDTH * MODEL_HEIGHT * 3 / 2;
const int buf_size = MODEL_FRAME_SIZE * 2;
const size_t frame_size_bytes = MODEL_FRAME_SIZE * sizeof(uint8_t);
const size_t frame_size_bytes = MODEL_FRAME_SIZE * sizeof(float);
private:
LoadYUVState loadyuv;
@@ -86,7 +86,7 @@ class MonitoringModelFrame : public ModelFrame {
public:
MonitoringModelFrame(cl_device_id device_id, cl_context context);
~MonitoringModelFrame();
uint8_t* prepare(cl_mem yuv_cl, int frame_width, int frame_height, int frame_stride, int frame_uv_offset, const mat3& projection, cl_mem* output);
float* prepare(cl_mem yuv_cl, int frame_width, int frame_height, int frame_stride, int frame_uv_offset, const mat3& projection, cl_mem* output);
const int MODEL_WIDTH = 1440;
const int MODEL_HEIGHT = 960;

View File

@@ -15,7 +15,7 @@ cdef extern from "selfdrive/modeld/models/commonmodel.h":
cppclass ModelFrame:
int buf_size
# unsigned char * buffer_from_cl(cl_mem*, int);
unsigned char * prepare(cl_mem, int, int, int, int, mat3, cl_mem*)
float * prepare(cl_mem, int, int, int, int, mat3, cl_mem*)
cppclass DrivingModelFrame:
int buf_size

View File

@@ -42,7 +42,7 @@ cdef class ModelFrame:
def prepare(self, VisionBuf buf, float[:] projection, CLMem output):
cdef mat3 cprojection
memcpy(cprojection.v, &projection[0], 9*sizeof(float))
cdef unsigned char * data
cdef float * data
if output is None:
data = self.frame.prepare(buf.buf.buf_cl, buf.width, buf.height, buf.stride, buf.uv_offset, cprojection, NULL)
else:
@@ -50,7 +50,7 @@ cdef class ModelFrame:
if not data:
return None
return np.asarray(<cnp.uint8_t[:self.buf_size]> data)
return np.asarray(<cnp.float32_t[:self.buf_size]> data)
# return CLMem.create(data)
# def buffer_from_cl(self, CLMem in_frames):

Binary file not shown.

View File

@@ -1,7 +1,7 @@
#define UV_SIZE ((TRANSFORMED_WIDTH/2)*(TRANSFORMED_HEIGHT/2))
__kernel void loadys(__global uchar8 const * const Y,
__global uchar * out,
__global float * out,
int out_offset)
{
const int gid = get_global_id(0);
@@ -10,12 +10,13 @@ __kernel void loadys(__global uchar8 const * const Y,
const int ox = ois % TRANSFORMED_WIDTH;
const uchar8 ys = Y[gid];
const float8 ysf = convert_float8(ys);
// 02
// 13
__global uchar* outy0;
__global uchar* outy1;
__global float* outy0;
__global float* outy1;
if ((oy & 1) == 0) {
outy0 = out + out_offset; //y0
outy1 = out + out_offset + UV_SIZE*2; //y2
@@ -24,24 +25,23 @@ __kernel void loadys(__global uchar8 const * const Y,
outy1 = out + out_offset + UV_SIZE*3; //y3
}
vstore4(ys.s0246, 0, outy0 + (oy/2) * (TRANSFORMED_WIDTH/2) + ox/2);
vstore4(ys.s1357, 0, outy1 + (oy/2) * (TRANSFORMED_WIDTH/2) + ox/2);
vstore4(ysf.s0246, 0, outy0 + (oy/2) * (TRANSFORMED_WIDTH/2) + ox/2);
vstore4(ysf.s1357, 0, outy1 + (oy/2) * (TRANSFORMED_WIDTH/2) + ox/2);
}
__kernel void loaduv(__global uchar8 const * const in,
__global uchar8 * out,
__global float8 * out,
int out_offset)
{
const int gid = get_global_id(0);
const uchar8 inv = in[gid];
out[gid + out_offset / 8] = inv;
const float8 outv = convert_float8(inv);
out[gid + out_offset / 8] = outv;
}
__kernel void copy(__global uchar8 * in,
__global uchar8 * out,
int in_offset,
int out_offset)
__kernel void copy(__global float8 * inout,
int in_offset)
{
const int gid = get_global_id(0);
out[gid + out_offset / 8] = in[gid + in_offset / 8];
}
inout[gid] = inout[gid + in_offset / 8];
}