mirror of
https://github.com/sunnypilot/sunnypilot.git
synced 2026-06-08 14:54:46 +08:00
Compare commits
1 Commits
docs
...
archive/wo
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
dc68278be2 |
@@ -12,6 +12,8 @@ from msgq.visionipc import VisionIpcClient, VisionStreamType, VisionBuf
|
||||
from opendbc.car.car_helpers import get_demo_car_params
|
||||
from openpilot.common.swaglog import cloudlog
|
||||
from openpilot.common.params import Params
|
||||
from openpilot.common.realtime import DT_MDL
|
||||
from openpilot.common.numpy_fast import interp
|
||||
from openpilot.common.filter_simple import FirstOrderFilter
|
||||
from openpilot.common.realtime import config_realtime_process
|
||||
from openpilot.common.transformations.camera import DEVICE_CAMERAS
|
||||
@@ -59,15 +61,15 @@ class ModelState:
|
||||
self.desire_20Hz = np.zeros((ModelConstants.FULL_HISTORY_BUFFER_LEN + 1, ModelConstants.DESIRE_LEN), dtype=np.float32)
|
||||
|
||||
# img buffers are managed in openCL transform code
|
||||
self.inputs = {
|
||||
'desire': np.zeros(ModelConstants.DESIRE_LEN * (ModelConstants.HISTORY_BUFFER_LEN+1), dtype=np.float32),
|
||||
'traffic_convention': np.zeros(ModelConstants.TRAFFIC_CONVENTION_LEN, dtype=np.float32),
|
||||
'features_buffer': np.zeros(ModelConstants.HISTORY_BUFFER_LEN * ModelConstants.FEATURE_LEN, dtype=np.float32),
|
||||
}
|
||||
self.inputs = {}
|
||||
|
||||
with open(METADATA_PATH, 'rb') as f:
|
||||
model_metadata = pickle.load(f)
|
||||
|
||||
for key, shape in model_metadata['input_shapes'].items():
|
||||
if key not in ["input_imgs", "big_input_imgs"]:
|
||||
self.inputs[key] = np.zeros(shape, dtype=np.float32).flatten()
|
||||
|
||||
self.output_slices = model_metadata['output_slices']
|
||||
net_output_size = model_metadata['output_shapes']['outputs'][1]
|
||||
self.output = np.zeros(net_output_size, dtype=np.float32)
|
||||
@@ -79,6 +81,14 @@ class ModelState:
|
||||
for k,v in self.inputs.items():
|
||||
self.model.addInput(k, v)
|
||||
|
||||
num_elements = model_metadata['input_shapes']['features_buffer'][1]
|
||||
step_size = int(-100 / num_elements)
|
||||
self.full_features_20Hz_idxs = np.arange(step_size, step_size * (num_elements + 1), step_size)[::-1]
|
||||
|
||||
desired_shape = int(self.inputs['desire'].shape[0] / self.desire_20Hz.shape[1])
|
||||
middle_dim = int(self.desire_20Hz.shape[0] / desired_shape)
|
||||
self.desire_reshape_dims = (desired_shape, middle_dim, -1)
|
||||
|
||||
def slice_outputs(self, model_outputs: np.ndarray) -> dict[str, np.ndarray]:
|
||||
parsed_model_outputs = {k: model_outputs[np.newaxis, v] for k,v in self.output_slices.items()}
|
||||
if SEND_RAW_PRED:
|
||||
@@ -94,7 +104,7 @@ class ModelState:
|
||||
|
||||
self.desire_20Hz[:-1] = self.desire_20Hz[1:]
|
||||
self.desire_20Hz[-1] = new_desire
|
||||
self.inputs['desire'][:] = self.desire_20Hz.reshape((25,4,-1)).max(axis=1).flatten()
|
||||
self.inputs['desire'][:] = self.desire_20Hz.reshape(self.desire_reshape_dims).max(axis=1).flatten()
|
||||
|
||||
self.inputs['traffic_convention'][:] = inputs['traffic_convention']
|
||||
|
||||
@@ -110,8 +120,25 @@ class ModelState:
|
||||
self.full_features_20Hz[:-1] = self.full_features_20Hz[1:]
|
||||
self.full_features_20Hz[-1] = outputs['hidden_state'][0, :]
|
||||
|
||||
idxs = np.arange(-4,-100,-4)[::-1]
|
||||
self.inputs['features_buffer'][:] = self.full_features_20Hz[idxs].flatten()
|
||||
# idxs = np.arange(-4,-100,-4)[::-1]
|
||||
self.inputs['features_buffer'][:] = self.full_features_20Hz[self.full_features_20Hz_idxs].flatten()
|
||||
|
||||
if "lat_planner_solution" in outputs:
|
||||
if "lat_planner_state" in self.inputs.keys():
|
||||
self.inputs['lat_planner_state'][2] = interp(DT_MDL, ModelConstants.T_IDXS, outputs['lat_planner_solution'][0, :, 2])
|
||||
self.inputs['lat_planner_state'][3] = interp(DT_MDL, ModelConstants.T_IDXS, outputs['lat_planner_solution'][0, :, 3])
|
||||
|
||||
if "desired_curvature" in outputs:
|
||||
input_name_prev = None
|
||||
if "prev_desired_curvs" in self.inputs.keys():
|
||||
input_name_prev = 'prev_desired_curvs'
|
||||
elif "prev_desired_curv" in self.inputs.keys():
|
||||
input_name_prev = 'prev_desired_curv'
|
||||
|
||||
if input_name_prev is not None:
|
||||
len = outputs['desired_curvature'][0].size
|
||||
self.inputs[input_name_prev][:-len] = self.inputs[input_name_prev][len:]
|
||||
self.inputs[input_name_prev][-len:] = outputs['desired_curvature'][0, :len]
|
||||
return outputs
|
||||
|
||||
|
||||
@@ -254,6 +281,18 @@ def main(demo=False):
|
||||
'traffic_convention': traffic_convention,
|
||||
}
|
||||
|
||||
if "lateral_control_params" in model.inputs.keys():
|
||||
inputs['lateral_control_params'] = np.array([v_ego, steer_delay], dtype=np.float32)
|
||||
|
||||
if "driving_style" in model.inputs.keys():
|
||||
inputs['driving_style'] = np.array([1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0], dtype=np.float32)
|
||||
|
||||
if "nav_features" in model.inputs.keys():
|
||||
inputs['nav_features'] = np.zeros(ModelConstants.NAV_FEATURE_LEN, dtype=np.float32) # Get size from shape
|
||||
|
||||
if "nav_instructions" in model.inputs.keys():
|
||||
inputs['nav_instructions'] = np.zeros(ModelConstants.NAV_INSTRUCTION_LEN, dtype=np.float32) # Get size from shape
|
||||
|
||||
mt1 = time.perf_counter()
|
||||
model_output = model.run(buf_main, buf_extra, model_transform_main, model_transform_extra, inputs, prepare_only)
|
||||
mt2 = time.perf_counter()
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
#include "common/clutil.h"
|
||||
|
||||
DrivingModelFrame::DrivingModelFrame(cl_device_id device_id, cl_context context) : ModelFrame(device_id, context) {
|
||||
input_frames = std::make_unique<uint8_t[]>(buf_size);
|
||||
input_frames = std::make_unique<float[]>(buf_size);
|
||||
//input_frames_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, buf_size, NULL, &err));
|
||||
img_buffer_20hz_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, 5*frame_size_bytes, NULL, &err));
|
||||
region.origin = 4 * frame_size_bytes;
|
||||
@@ -17,7 +17,7 @@ DrivingModelFrame::DrivingModelFrame(cl_device_id device_id, cl_context context)
|
||||
init_transform(device_id, context, MODEL_WIDTH, MODEL_HEIGHT);
|
||||
}
|
||||
|
||||
uint8_t* DrivingModelFrame::prepare(cl_mem yuv_cl, int frame_width, int frame_height, int frame_stride, int frame_uv_offset, const mat3& projection, cl_mem* output) {
|
||||
float* DrivingModelFrame::prepare(cl_mem yuv_cl, int frame_width, int frame_height, int frame_stride, int frame_uv_offset, const mat3& projection, cl_mem* output) {
|
||||
run_transform(yuv_cl, MODEL_WIDTH, MODEL_HEIGHT, frame_width, frame_height, frame_stride, frame_uv_offset, projection);
|
||||
|
||||
for (int i = 0; i < 4; i++) {
|
||||
@@ -50,14 +50,14 @@ DrivingModelFrame::~DrivingModelFrame() {
|
||||
|
||||
|
||||
MonitoringModelFrame::MonitoringModelFrame(cl_device_id device_id, cl_context context) : ModelFrame(device_id, context) {
|
||||
input_frames = std::make_unique<uint8_t[]>(buf_size);
|
||||
input_frames = std::make_unique<float[]>(buf_size);
|
||||
//input_frame_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, buf_size, NULL, &err));
|
||||
|
||||
init_transform(device_id, context, MODEL_WIDTH, MODEL_HEIGHT);
|
||||
}
|
||||
uint8_t* MonitoringModelFrame::prepare(cl_mem yuv_cl, int frame_width, int frame_height, int frame_stride, int frame_uv_offset, const mat3& projection, cl_mem* output) {
|
||||
float* MonitoringModelFrame::prepare(cl_mem yuv_cl, int frame_width, int frame_height, int frame_stride, int frame_uv_offset, const mat3& projection, cl_mem* output) {
|
||||
run_transform(yuv_cl, MODEL_WIDTH, MODEL_HEIGHT, frame_width, frame_height, frame_stride, frame_uv_offset, projection);
|
||||
CL_CHECK(clEnqueueReadBuffer(q, y_cl, CL_TRUE, 0, MODEL_FRAME_SIZE * sizeof(uint8_t), input_frames.get(), 0, nullptr, nullptr));
|
||||
CL_CHECK(clEnqueueReadBuffer(q, y_cl, CL_TRUE, 0, MODEL_FRAME_SIZE * sizeof(float), input_frames.get(), 0, nullptr, nullptr));
|
||||
clFinish(q);
|
||||
//return &y_cl;
|
||||
return input_frames.get();
|
||||
|
||||
@@ -23,7 +23,7 @@ public:
|
||||
q = CL_CHECK_ERR(clCreateCommandQueue(context, device_id, 0, &err));
|
||||
}
|
||||
virtual ~ModelFrame() {}
|
||||
virtual uint8_t* prepare(cl_mem yuv_cl, int frame_width, int frame_height, int frame_stride, int frame_uv_offset, const mat3& projection, cl_mem* output) { return NULL; }
|
||||
virtual float* prepare(cl_mem yuv_cl, int frame_width, int frame_height, int frame_stride, int frame_uv_offset, const mat3& projection, cl_mem* output) { return NULL; }
|
||||
/*
|
||||
uint8_t* buffer_from_cl(cl_mem *in_frames, int buffer_size) {
|
||||
CL_CHECK(clEnqueueReadBuffer(q, *in_frames, CL_TRUE, 0, buffer_size, input_frames.get(), 0, nullptr, nullptr));
|
||||
@@ -41,7 +41,7 @@ protected:
|
||||
cl_mem y_cl, u_cl, v_cl;
|
||||
Transform transform;
|
||||
cl_command_queue q;
|
||||
std::unique_ptr<uint8_t[]> input_frames;
|
||||
std::unique_ptr<float[]> input_frames;
|
||||
|
||||
void init_transform(cl_device_id device_id, cl_context context, int model_width, int model_height) {
|
||||
y_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, model_width * model_height, NULL, &err));
|
||||
@@ -68,13 +68,13 @@ class DrivingModelFrame : public ModelFrame {
|
||||
public:
|
||||
DrivingModelFrame(cl_device_id device_id, cl_context context);
|
||||
~DrivingModelFrame();
|
||||
uint8_t* prepare(cl_mem yuv_cl, int frame_width, int frame_height, int frame_stride, int frame_uv_offset, const mat3& projection, cl_mem* output);
|
||||
float* prepare(cl_mem yuv_cl, int frame_width, int frame_height, int frame_stride, int frame_uv_offset, const mat3& projection, cl_mem* output);
|
||||
|
||||
const int MODEL_WIDTH = 512;
|
||||
const int MODEL_HEIGHT = 256;
|
||||
const int MODEL_FRAME_SIZE = MODEL_WIDTH * MODEL_HEIGHT * 3 / 2;
|
||||
const int buf_size = MODEL_FRAME_SIZE * 2;
|
||||
const size_t frame_size_bytes = MODEL_FRAME_SIZE * sizeof(uint8_t);
|
||||
const size_t frame_size_bytes = MODEL_FRAME_SIZE * sizeof(float);
|
||||
|
||||
private:
|
||||
LoadYUVState loadyuv;
|
||||
@@ -86,7 +86,7 @@ class MonitoringModelFrame : public ModelFrame {
|
||||
public:
|
||||
MonitoringModelFrame(cl_device_id device_id, cl_context context);
|
||||
~MonitoringModelFrame();
|
||||
uint8_t* prepare(cl_mem yuv_cl, int frame_width, int frame_height, int frame_stride, int frame_uv_offset, const mat3& projection, cl_mem* output);
|
||||
float* prepare(cl_mem yuv_cl, int frame_width, int frame_height, int frame_stride, int frame_uv_offset, const mat3& projection, cl_mem* output);
|
||||
|
||||
const int MODEL_WIDTH = 1440;
|
||||
const int MODEL_HEIGHT = 960;
|
||||
|
||||
@@ -15,7 +15,7 @@ cdef extern from "selfdrive/modeld/models/commonmodel.h":
|
||||
cppclass ModelFrame:
|
||||
int buf_size
|
||||
# unsigned char * buffer_from_cl(cl_mem*, int);
|
||||
unsigned char * prepare(cl_mem, int, int, int, int, mat3, cl_mem*)
|
||||
float * prepare(cl_mem, int, int, int, int, mat3, cl_mem*)
|
||||
|
||||
cppclass DrivingModelFrame:
|
||||
int buf_size
|
||||
|
||||
@@ -42,7 +42,7 @@ cdef class ModelFrame:
|
||||
def prepare(self, VisionBuf buf, float[:] projection, CLMem output):
|
||||
cdef mat3 cprojection
|
||||
memcpy(cprojection.v, &projection[0], 9*sizeof(float))
|
||||
cdef unsigned char * data
|
||||
cdef float * data
|
||||
if output is None:
|
||||
data = self.frame.prepare(buf.buf.buf_cl, buf.width, buf.height, buf.stride, buf.uv_offset, cprojection, NULL)
|
||||
else:
|
||||
@@ -50,7 +50,7 @@ cdef class ModelFrame:
|
||||
if not data:
|
||||
return None
|
||||
|
||||
return np.asarray(<cnp.uint8_t[:self.buf_size]> data)
|
||||
return np.asarray(<cnp.float32_t[:self.buf_size]> data)
|
||||
# return CLMem.create(data)
|
||||
|
||||
# def buffer_from_cl(self, CLMem in_frames):
|
||||
|
||||
Binary file not shown.
@@ -1,7 +1,7 @@
|
||||
#define UV_SIZE ((TRANSFORMED_WIDTH/2)*(TRANSFORMED_HEIGHT/2))
|
||||
|
||||
__kernel void loadys(__global uchar8 const * const Y,
|
||||
__global uchar * out,
|
||||
__global float * out,
|
||||
int out_offset)
|
||||
{
|
||||
const int gid = get_global_id(0);
|
||||
@@ -10,12 +10,13 @@ __kernel void loadys(__global uchar8 const * const Y,
|
||||
const int ox = ois % TRANSFORMED_WIDTH;
|
||||
|
||||
const uchar8 ys = Y[gid];
|
||||
const float8 ysf = convert_float8(ys);
|
||||
|
||||
// 02
|
||||
// 13
|
||||
|
||||
__global uchar* outy0;
|
||||
__global uchar* outy1;
|
||||
__global float* outy0;
|
||||
__global float* outy1;
|
||||
if ((oy & 1) == 0) {
|
||||
outy0 = out + out_offset; //y0
|
||||
outy1 = out + out_offset + UV_SIZE*2; //y2
|
||||
@@ -24,24 +25,23 @@ __kernel void loadys(__global uchar8 const * const Y,
|
||||
outy1 = out + out_offset + UV_SIZE*3; //y3
|
||||
}
|
||||
|
||||
vstore4(ys.s0246, 0, outy0 + (oy/2) * (TRANSFORMED_WIDTH/2) + ox/2);
|
||||
vstore4(ys.s1357, 0, outy1 + (oy/2) * (TRANSFORMED_WIDTH/2) + ox/2);
|
||||
vstore4(ysf.s0246, 0, outy0 + (oy/2) * (TRANSFORMED_WIDTH/2) + ox/2);
|
||||
vstore4(ysf.s1357, 0, outy1 + (oy/2) * (TRANSFORMED_WIDTH/2) + ox/2);
|
||||
}
|
||||
|
||||
__kernel void loaduv(__global uchar8 const * const in,
|
||||
__global uchar8 * out,
|
||||
__global float8 * out,
|
||||
int out_offset)
|
||||
{
|
||||
const int gid = get_global_id(0);
|
||||
const uchar8 inv = in[gid];
|
||||
out[gid + out_offset / 8] = inv;
|
||||
const float8 outv = convert_float8(inv);
|
||||
out[gid + out_offset / 8] = outv;
|
||||
}
|
||||
|
||||
__kernel void copy(__global uchar8 * in,
|
||||
__global uchar8 * out,
|
||||
int in_offset,
|
||||
int out_offset)
|
||||
__kernel void copy(__global float8 * inout,
|
||||
int in_offset)
|
||||
{
|
||||
const int gid = get_global_id(0);
|
||||
out[gid + out_offset / 8] = in[gid + in_offset / 8];
|
||||
}
|
||||
inout[gid] = inout[gid + in_offset / 8];
|
||||
}
|
||||
Reference in New Issue
Block a user