mirror of
https://github.com/sunnypilot/sunnypilot.git
synced 2026-06-10 01:54:57 +08:00
Compare commits
32 Commits
master-dev
...
archive/mo
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
e28a7a5bee | ||
|
|
5e889d9ec6 | ||
|
|
5ebd6746d1 | ||
|
|
9e7203567e | ||
|
|
aba1f4b5fc | ||
|
|
5a3ee2d376 | ||
|
|
ae5a2430d5 | ||
|
|
6e56f37823 | ||
|
|
d77d0cf622 | ||
|
|
49888d2b7c | ||
|
|
efc1f6378e | ||
|
|
b0fef4b5ea | ||
|
|
e6b9497099 | ||
|
|
18bc3c5920 | ||
|
|
cfffc94a07 | ||
|
|
f72447245c | ||
|
|
c959947b8f | ||
|
|
c9a23395ea | ||
|
|
50382d7c01 | ||
|
|
68b240d09b | ||
|
|
556c72cb90 | ||
|
|
c050fa7c49 | ||
|
|
0784570ead | ||
|
|
f67e8aca47 | ||
|
|
5d1b403015 | ||
|
|
c59dead9fd | ||
|
|
8512406fa9 | ||
|
|
7087f3f400 | ||
|
|
b7e601e92d | ||
|
|
c3f92f2783 | ||
|
|
5a3e80fc92 | ||
|
|
5fac11efbb |
@@ -212,7 +212,7 @@ std::unordered_map<std::string, uint32_t> keys = {
|
||||
|
||||
// Model Manager params
|
||||
{"ModelManager_ActiveBundle", PERSISTENT},
|
||||
{"ModelManager_DownloadIndex", CLEAR_ON_MANAGER_START | CLEAR_ON_OFFROAD_TRANSITION | CLEAR_ON_ONROAD_TRANSITION},
|
||||
{"ModelManager_DownloadIndex", CLEAR_ON_MANAGER_START | CLEAR_ON_ONROAD_TRANSITION},
|
||||
{"ModelManager_LastSyncTime", CLEAR_ON_MANAGER_START | CLEAR_ON_OFFROAD_TRANSITION},
|
||||
{"ModelManager_ModelsCache", PERSISTENT | BACKUP},
|
||||
|
||||
|
||||
@@ -21,7 +21,7 @@ else:
|
||||
libs += ['OpenCL']
|
||||
|
||||
# Set path definitions
|
||||
for pathdef, fn in {'TRANSFORM': 'transforms/transform.cl', 'LOADYUV': 'transforms/loadyuv.cl'}.items():
|
||||
for pathdef, fn in {'TRANSFORM': 'transforms/transform.cl', 'LOADYUV': 'transforms/loadyuv.cl', 'LOADYUV_FLOAT': 'transforms/loadyuv_float.cl'}.items():
|
||||
for xenv in (lenv, lenvCython):
|
||||
xenv['CXXFLAGS'].append(f'-D{pathdef}_PATH=\\"{File(fn).abspath}\\"')
|
||||
|
||||
|
||||
@@ -1,21 +1,13 @@
|
||||
#!/usr/bin/env python3
|
||||
import os
|
||||
from openpilot.system.hardware import TICI
|
||||
|
||||
from openpilot.selfdrive.modeld.runners.model_runner import ONNXRunner, TinygradRunner
|
||||
|
||||
#
|
||||
if TICI:
|
||||
from tinygrad.tensor import Tensor
|
||||
from tinygrad.dtype import dtypes
|
||||
from openpilot.selfdrive.modeld.runners.tinygrad_helpers import qcom_tensor_from_opencl_address
|
||||
os.environ['QCOM'] = '1'
|
||||
else:
|
||||
from openpilot.selfdrive.modeld.runners.ort_helpers import make_onnx_cpu_runner
|
||||
import time
|
||||
import pickle
|
||||
import numpy as np
|
||||
import cereal.messaging as messaging
|
||||
from cereal import car, log
|
||||
from pathlib import Path
|
||||
from setproctitle import setproctitle
|
||||
from cereal.messaging import PubMaster, SubMaster
|
||||
from msgq.visionipc import VisionIpcClient, VisionStreamType, VisionBuf
|
||||
@@ -31,15 +23,12 @@ from openpilot.selfdrive.controls.lib.desire_helper import DesireHelper
|
||||
from openpilot.selfdrive.modeld.parse_model_outputs import Parser
|
||||
from openpilot.selfdrive.modeld.fill_model_msg import fill_model_msg, fill_pose_msg, PublishState
|
||||
from openpilot.selfdrive.modeld.constants import ModelConstants
|
||||
from openpilot.selfdrive.modeld.models.commonmodel_pyx import DrivingModelFrame, CLContext
|
||||
# from openpilot.selfdrive.modeld.models.commonmodel_pyx import DrivingModelFrame, CLContext
|
||||
from openpilot.selfdrive.modeld.models.commonmodel_pyx import DrivingModelFrameLegacy as DrivingModelFrame, CLContext
|
||||
|
||||
|
||||
PROCESS_NAME = "selfdrive.modeld.modeld"
|
||||
SEND_RAW_PRED = os.getenv('SEND_RAW_PRED')
|
||||
|
||||
MODEL_PATH = Path(__file__).parent / 'models/supercombo.onnx'
|
||||
MODEL_PKL_PATH = Path(__file__).parent / 'models/supercombo_tinygrad.pkl'
|
||||
METADATA_PATH = Path(__file__).parent / 'models/supercombo_metadata.pkl'
|
||||
|
||||
class FrameMeta:
|
||||
frame_id: int = 0
|
||||
@@ -61,35 +50,26 @@ class ModelState:
|
||||
self.prev_desire = np.zeros(ModelConstants.DESIRE_LEN, dtype=np.float32)
|
||||
self.full_features_20Hz = np.zeros((ModelConstants.FULL_HISTORY_BUFFER_LEN, ModelConstants.FEATURE_LEN), dtype=np.float32)
|
||||
self.desire_20Hz = np.zeros((ModelConstants.FULL_HISTORY_BUFFER_LEN + 1, ModelConstants.DESIRE_LEN), dtype=np.float32)
|
||||
self.is_20hz = False
|
||||
# Initialize model runner
|
||||
self.model_runner = TinygradRunner(self.frames) if TICI else ONNXRunner(self.frames)
|
||||
|
||||
# img buffers are managed in openCL transform code
|
||||
self.numpy_inputs = {
|
||||
'desire': np.zeros((1, (ModelConstants.HISTORY_BUFFER_LEN+1), ModelConstants.DESIRE_LEN), dtype=np.float32),
|
||||
'traffic_convention': np.zeros((1, ModelConstants.TRAFFIC_CONVENTION_LEN), dtype=np.float32),
|
||||
'features_buffer': np.zeros((1, ModelConstants.HISTORY_BUFFER_LEN, ModelConstants.FEATURE_LEN), dtype=np.float32),
|
||||
}
|
||||
self.numpy_inputs = {}
|
||||
|
||||
with open(METADATA_PATH, 'rb') as f:
|
||||
model_metadata = pickle.load(f)
|
||||
self.input_shapes = model_metadata['input_shapes']
|
||||
for key, shape in self.model_runner.input_shapes.items():
|
||||
if key not in self.frames: # Managed by opencl
|
||||
self.numpy_inputs[key] = np.zeros(shape, dtype=np.float32)
|
||||
|
||||
self.output_slices = model_metadata['output_slices']
|
||||
net_output_size = model_metadata['output_shapes']['outputs'][1]
|
||||
self.output = np.zeros(net_output_size, dtype=np.float32)
|
||||
self.parser = Parser()
|
||||
|
||||
if TICI:
|
||||
self.tensor_inputs = {k: Tensor(v, device='NPY').realize() for k,v in self.numpy_inputs.items()}
|
||||
with open(MODEL_PKL_PATH, "rb") as f:
|
||||
self.model_run = pickle.load(f)
|
||||
else:
|
||||
self.onnx_cpu_runner = make_onnx_cpu_runner(MODEL_PATH)
|
||||
net_output_size = self.model_runner.model_metadata['output_shapes']['outputs'][1]
|
||||
self.output = np.zeros(net_output_size, dtype=np.float32)
|
||||
|
||||
def slice_outputs(self, model_outputs: np.ndarray) -> dict[str, np.ndarray]:
|
||||
parsed_model_outputs = {k: model_outputs[np.newaxis, v] for k,v in self.output_slices.items()}
|
||||
if SEND_RAW_PRED:
|
||||
parsed_model_outputs['raw_pred'] = model_outputs.copy()
|
||||
return parsed_model_outputs
|
||||
num_elements = self.numpy_inputs['features_buffer'].shape[1]
|
||||
step_size = int(-100 / num_elements)
|
||||
self.full_features_20Hz_idxs = np.arange(step_size, step_size * (num_elements + 1), step_size)[::-1]
|
||||
self.desire_reshape_dims = (self.numpy_inputs['desire'].shape[0], self.numpy_inputs['desire'].shape[1], -1, self.numpy_inputs['desire'].shape[2])
|
||||
|
||||
def run(self, buf: VisionBuf, wbuf: VisionBuf, transform: np.ndarray, transform_wide: np.ndarray,
|
||||
inputs: dict[str, np.ndarray], prepare_only: bool) -> dict[str, np.ndarray] | None:
|
||||
@@ -98,38 +78,54 @@ class ModelState:
|
||||
new_desire = np.where(inputs['desire'] - self.prev_desire > .99, inputs['desire'], 0)
|
||||
self.prev_desire[:] = inputs['desire']
|
||||
|
||||
self.desire_20Hz[:-1] = self.desire_20Hz[1:]
|
||||
self.desire_20Hz[-1] = new_desire
|
||||
self.numpy_inputs['desire'][:] = self.desire_20Hz.reshape((1,25,4,-1)).max(axis=2)
|
||||
if self.is_20hz:
|
||||
self.desire_20Hz[:-1] = self.desire_20Hz[1:]
|
||||
self.desire_20Hz[-1] = new_desire
|
||||
self.numpy_inputs['desire'][:] = self.desire_20Hz.reshape(self.desire_reshape_dims).max(axis=2)
|
||||
else:
|
||||
len = inputs['desire'].shape[0]
|
||||
self.numpy_inputs['desire'][0, :-1] = self.numpy_inputs['desire'][0, 1:]
|
||||
self.numpy_inputs['desire'][0, -1, :len] = new_desire[:len]
|
||||
|
||||
for key in self.numpy_inputs:
|
||||
if key in inputs and key not in ['desire']:
|
||||
self.numpy_inputs[key][:] = inputs[key]
|
||||
|
||||
self.numpy_inputs['traffic_convention'][:] = inputs['traffic_convention']
|
||||
imgs_cl = {'input_imgs': self.frames['input_imgs'].prepare(buf, transform.flatten()),
|
||||
'big_input_imgs': self.frames['big_input_imgs'].prepare(wbuf, transform_wide.flatten())}
|
||||
|
||||
if TICI:
|
||||
# The imgs tensors are backed by opencl memory, only need init once
|
||||
for key in imgs_cl:
|
||||
if key not in self.tensor_inputs:
|
||||
self.tensor_inputs[key] = qcom_tensor_from_opencl_address(imgs_cl[key].mem_address, self.input_shapes[key], dtype=dtypes.uint8)
|
||||
else:
|
||||
for key in imgs_cl:
|
||||
self.numpy_inputs[key] = self.frames[key].buffer_from_cl(imgs_cl[key]).reshape(self.input_shapes[key])
|
||||
# Prepare inputs using the model runner
|
||||
self.model_runner.prepare_inputs(imgs_cl, self.numpy_inputs)
|
||||
|
||||
if prepare_only:
|
||||
return None
|
||||
|
||||
if TICI:
|
||||
self.output = self.model_run(**self.tensor_inputs).numpy().flatten()
|
||||
# Run model inference
|
||||
self.output = self.model_runner.run_model()
|
||||
outputs = self.parser.parse_outputs(self.model_runner.slice_outputs(self.output), self.numpy_inputs.keys())
|
||||
|
||||
if self.is_20hz:
|
||||
self.full_features_20Hz[:-1] = self.full_features_20Hz[1:]
|
||||
self.full_features_20Hz[-1] = outputs['hidden_state'][0, :]
|
||||
self.numpy_inputs['features_buffer'][:] = self.full_features_20Hz[self.full_features_20Hz_idxs]
|
||||
else:
|
||||
self.output = self.onnx_cpu_runner.run(None, self.numpy_inputs)[0].flatten()
|
||||
feature_len = outputs['hidden_state'].shape[1]
|
||||
self.numpy_inputs['features_buffer'][0, :-1] = self.numpy_inputs['features_buffer'][0, 1:]
|
||||
self.numpy_inputs['features_buffer'][0, -1, :feature_len] = outputs['hidden_state'][0, :feature_len]
|
||||
|
||||
outputs = self.parser.parse_outputs(self.slice_outputs(self.output))
|
||||
|
||||
self.full_features_20Hz[:-1] = self.full_features_20Hz[1:]
|
||||
self.full_features_20Hz[-1] = outputs['hidden_state'][0, :]
|
||||
if "desired_curvature" in outputs:
|
||||
input_name_prev = None
|
||||
|
||||
idxs = np.arange(-4,-100,-4)[::-1]
|
||||
self.numpy_inputs['features_buffer'][:] = self.full_features_20Hz[idxs]
|
||||
if "prev_desired_curvs" in self.numpy_inputs.keys():
|
||||
input_name_prev = 'prev_desired_curvs'
|
||||
elif "prev_desired_curv" in self.numpy_inputs.keys():
|
||||
input_name_prev = 'prev_desired_curv'
|
||||
|
||||
if input_name_prev is not None:
|
||||
len = outputs['desired_curvature'][0].size
|
||||
self.numpy_inputs[input_name_prev][0, :-len, 0] = self.numpy_inputs[input_name_prev][0, len:, 0]
|
||||
self.numpy_inputs[input_name_prev][0, -len:, 0] = outputs['desired_curvature'][0]
|
||||
return outputs
|
||||
|
||||
|
||||
@@ -190,7 +186,6 @@ def main(demo=False):
|
||||
meta_main = FrameMeta()
|
||||
meta_extra = FrameMeta()
|
||||
|
||||
|
||||
if demo:
|
||||
CP = get_demo_car_params()
|
||||
else:
|
||||
@@ -272,6 +267,9 @@ def main(demo=False):
|
||||
'traffic_convention': traffic_convention,
|
||||
}
|
||||
|
||||
if "lateral_control_params" in model.numpy_inputs.keys():
|
||||
inputs['lateral_control_params'] = np.array([sm["carState"].vEgo, steer_delay], dtype=np.float32)
|
||||
|
||||
mt1 = time.perf_counter()
|
||||
model_output = model.run(buf_main, buf_extra, model_transform_main, model_transform_extra, inputs, prepare_only)
|
||||
mt2 = time.perf_counter()
|
||||
|
||||
@@ -8,25 +8,30 @@
|
||||
DrivingModelFrame::DrivingModelFrame(cl_device_id device_id, cl_context context) : ModelFrame(device_id, context) {
|
||||
input_frames = std::make_unique<uint8_t[]>(buf_size);
|
||||
input_frames_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, buf_size, NULL, &err));
|
||||
img_buffer_20hz_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, 5*frame_size_bytes, NULL, &err));
|
||||
region.origin = 4 * frame_size_bytes;
|
||||
img_buffer_20hz_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, buffer_length*frame_size_bytes, NULL, &err));
|
||||
region.origin = (buffer_length - 1) * frame_size_bytes;
|
||||
region.size = frame_size_bytes;
|
||||
last_img_cl = CL_CHECK_ERR(clCreateSubBuffer(img_buffer_20hz_cl, CL_MEM_READ_WRITE, CL_BUFFER_CREATE_TYPE_REGION, ®ion, &err));
|
||||
printf("Buffer length: %d, region origin: %lu, region size: %lu\n", buffer_length, region.origin, region.size);
|
||||
|
||||
loadyuv_init(&loadyuv, context, device_id, MODEL_WIDTH, MODEL_HEIGHT);
|
||||
loadyuv_init(&loadyuv, context, device_id, MODEL_WIDTH, MODEL_HEIGHT, is_float);
|
||||
init_transform(device_id, context, MODEL_WIDTH, MODEL_HEIGHT);
|
||||
}
|
||||
|
||||
cl_mem* DrivingModelFrame::prepare(cl_mem yuv_cl, int frame_width, int frame_height, int frame_stride, int frame_uv_offset, const mat3& projection) {
|
||||
run_transform(yuv_cl, MODEL_WIDTH, MODEL_HEIGHT, frame_width, frame_height, frame_stride, frame_uv_offset, projection);
|
||||
|
||||
for (int i = 0; i < 4; i++) {
|
||||
for (int i = 0; i < (buffer_length - 1); i++) {
|
||||
printf("Moving %d to %d from src_offset %lu to dst_offset %lu with size %lu\n", i+1, i, (i+1)*frame_size_bytes, i*frame_size_bytes, frame_size_bytes);
|
||||
CL_CHECK(clEnqueueCopyBuffer(q, img_buffer_20hz_cl, img_buffer_20hz_cl, (i+1)*frame_size_bytes, i*frame_size_bytes, frame_size_bytes, 0, nullptr, nullptr));
|
||||
}
|
||||
printf("Loop done\n");
|
||||
loadyuv_queue(&loadyuv, q, y_cl, u_cl, v_cl, last_img_cl);
|
||||
|
||||
copy_queue(&loadyuv, q, img_buffer_20hz_cl, input_frames_cl, 0, 0, frame_size_bytes);
|
||||
printf("Copying from %p to %p with size %lu\n", img_buffer_20hz_cl, input_frames_cl, frame_size_bytes);
|
||||
copy_queue(&loadyuv, q, last_img_cl, input_frames_cl, 0, frame_size_bytes, frame_size_bytes);
|
||||
printf("Copying from %p to %p with size %lu\n", last_img_cl, input_frames_cl, frame_size_bytes);
|
||||
|
||||
// NOTE: Since thneed is using a different command queue, this clFinish is needed to ensure the image is ready.
|
||||
clFinish(q);
|
||||
@@ -41,6 +46,45 @@ DrivingModelFrame::~DrivingModelFrame() {
|
||||
CL_CHECK(clReleaseCommandQueue(q));
|
||||
}
|
||||
|
||||
DrivingModelFrameLegacy::DrivingModelFrameLegacy(cl_device_id device_id, cl_context context): ModelFrame(device_id, context) {
|
||||
input_frames = std::make_unique<float[]>(buf_size);
|
||||
input_frames_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, buf_size * sizeof(float), NULL, &err));
|
||||
// Reduce buffer to 2 frames due to float size (4x bigger than uint8_t)
|
||||
img_buffer_20hz_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, 2 * frame_size_bytes * sizeof(float), NULL, &err));
|
||||
|
||||
// Adjust region for 2-frame buffer
|
||||
region.origin = frame_size_bytes * sizeof(float); // Point to second frame
|
||||
region.size = frame_size_bytes * sizeof(float);
|
||||
last_img_cl = CL_CHECK_ERR(clCreateSubBuffer(img_buffer_20hz_cl, CL_MEM_READ_WRITE, CL_BUFFER_CREATE_TYPE_REGION, ®ion, &err));
|
||||
|
||||
loadyuv_init(&loadyuv, context, device_id, MODEL_WIDTH, MODEL_HEIGHT, true);
|
||||
init_transform(device_id, context, MODEL_WIDTH, MODEL_HEIGHT);
|
||||
}
|
||||
|
||||
cl_mem* DrivingModelFrameLegacy::prepare(cl_mem yuv_cl, int frame_width, int frame_height, int frame_stride, int frame_uv_offset,
|
||||
const mat3 &projection) {
|
||||
run_transform(yuv_cl, MODEL_WIDTH, MODEL_HEIGHT, frame_width, frame_height, frame_stride, frame_uv_offset, projection);
|
||||
|
||||
// Rolling buffer with just 2 frames
|
||||
CL_CHECK(clEnqueueCopyBuffer(q, img_buffer_20hz_cl, img_buffer_20hz_cl, frame_size_bytes * sizeof(float), 0, frame_size_bytes * sizeof(float), 0, nullptr, nullptr));
|
||||
|
||||
loadyuv_queue(&loadyuv, q, y_cl, u_cl, v_cl, last_img_cl);
|
||||
|
||||
// Copy both frames to input buffer
|
||||
copy_queue(&loadyuv, q, img_buffer_20hz_cl, input_frames_cl, 0, 0, frame_size_bytes * sizeof(float));
|
||||
copy_queue(&loadyuv, q, last_img_cl, input_frames_cl, 0, frame_size_bytes * sizeof(float), frame_size_bytes * sizeof(float));
|
||||
|
||||
clFinish(q);
|
||||
return &input_frames_cl;
|
||||
}
|
||||
|
||||
DrivingModelFrameLegacy::~DrivingModelFrameLegacy() {
|
||||
deinit_transform();
|
||||
loadyuv_destroy(&loadyuv);
|
||||
CL_CHECK(clReleaseMemObject(img_buffer_20hz_cl));
|
||||
CL_CHECK(clReleaseMemObject(input_frames_cl));
|
||||
CL_CHECK(clReleaseCommandQueue(q));
|
||||
}
|
||||
|
||||
MonitoringModelFrame::MonitoringModelFrame(cl_device_id device_id, cl_context context) : ModelFrame(device_id, context) {
|
||||
input_frames = std::make_unique<uint8_t[]>(buf_size);
|
||||
|
||||
@@ -17,6 +17,7 @@
|
||||
#include "selfdrive/modeld/transforms/loadyuv.h"
|
||||
#include "selfdrive/modeld/transforms/transform.h"
|
||||
|
||||
template <typename T = uint8_t>
|
||||
class ModelFrame {
|
||||
public:
|
||||
ModelFrame(cl_device_id device_id, cl_context context) {
|
||||
@@ -24,7 +25,7 @@ public:
|
||||
}
|
||||
virtual ~ModelFrame() {}
|
||||
virtual cl_mem* prepare(cl_mem yuv_cl, int frame_width, int frame_height, int frame_stride, int frame_uv_offset, const mat3& projection) { return NULL; }
|
||||
uint8_t* buffer_from_cl(cl_mem *in_frames, int buffer_size) {
|
||||
T* buffer_from_cl(cl_mem *in_frames, int buffer_size) {
|
||||
CL_CHECK(clEnqueueReadBuffer(q, *in_frames, CL_TRUE, 0, buffer_size, input_frames.get(), 0, nullptr, nullptr));
|
||||
clFinish(q);
|
||||
return &input_frames[0];
|
||||
@@ -39,7 +40,7 @@ protected:
|
||||
cl_mem y_cl, u_cl, v_cl;
|
||||
Transform transform;
|
||||
cl_command_queue q;
|
||||
std::unique_ptr<uint8_t[]> input_frames;
|
||||
std::unique_ptr<T[]> input_frames;
|
||||
|
||||
void init_transform(cl_device_id device_id, cl_context context, int model_width, int model_height) {
|
||||
y_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, model_width * model_height, NULL, &err));
|
||||
@@ -62,7 +63,11 @@ protected:
|
||||
}
|
||||
};
|
||||
|
||||
class DrivingModelFrame : public ModelFrame {
|
||||
class DrivingModelFrame : public ModelFrame<> {
|
||||
using ModelFrame::q, ModelFrame::y_cl, ModelFrame::u_cl, ModelFrame::v_cl;
|
||||
using ModelFrame::init_transform, ModelFrame::deinit_transform, ModelFrame::run_transform;
|
||||
using ModelFrame::input_frames;
|
||||
|
||||
public:
|
||||
DrivingModelFrame(cl_device_id device_id, cl_context context);
|
||||
~DrivingModelFrame();
|
||||
@@ -73,6 +78,8 @@ public:
|
||||
const int MODEL_FRAME_SIZE = MODEL_WIDTH * MODEL_HEIGHT * 3 / 2;
|
||||
const int buf_size = MODEL_FRAME_SIZE * 2;
|
||||
const size_t frame_size_bytes = MODEL_FRAME_SIZE * sizeof(uint8_t);
|
||||
const bool is_float = false;
|
||||
const uint8_t buffer_length = is_float ? 2 : 5;
|
||||
|
||||
private:
|
||||
LoadYUVState loadyuv;
|
||||
@@ -80,7 +87,7 @@ private:
|
||||
cl_buffer_region region;
|
||||
};
|
||||
|
||||
class MonitoringModelFrame : public ModelFrame {
|
||||
class MonitoringModelFrame : public ModelFrame<> {
|
||||
public:
|
||||
MonitoringModelFrame(cl_device_id device_id, cl_context context);
|
||||
~MonitoringModelFrame();
|
||||
@@ -94,3 +101,23 @@ public:
|
||||
private:
|
||||
cl_mem input_frame_cl;
|
||||
};
|
||||
|
||||
class DrivingModelFrameLegacy : public ModelFrame<float> {
|
||||
public:
|
||||
DrivingModelFrameLegacy(cl_device_id device_id, cl_context context);
|
||||
~DrivingModelFrameLegacy();
|
||||
cl_mem* prepare(cl_mem yuv_cl, int frame_width, int frame_height, int frame_stride, int frame_uv_offset, const mat3& projection);
|
||||
|
||||
const int MODEL_WIDTH = 512;
|
||||
const int MODEL_HEIGHT = 256;
|
||||
const int MODEL_FRAME_SIZE = MODEL_WIDTH * MODEL_HEIGHT * 3 / 2;
|
||||
const int buf_size = MODEL_FRAME_SIZE * 2;
|
||||
|
||||
private:
|
||||
cl_mem input_frames_cl;
|
||||
cl_mem img_buffer_20hz_cl;
|
||||
cl_mem last_img_cl;
|
||||
LoadYUVState loadyuv;
|
||||
cl_buffer_region region;
|
||||
const size_t frame_size_bytes = MODEL_FRAME_SIZE * sizeof(float); // This is the only change
|
||||
};
|
||||
@@ -12,15 +12,19 @@ cdef extern from "common/clutil.h":
|
||||
cl_context cl_create_context(cl_device_id)
|
||||
|
||||
cdef extern from "selfdrive/modeld/models/commonmodel.h":
|
||||
cppclass ModelFrame:
|
||||
cppclass ModelFrame[T]:
|
||||
int buf_size
|
||||
unsigned char * buffer_from_cl(cl_mem*, int);
|
||||
T * buffer_from_cl(cl_mem*, int);
|
||||
cl_mem * prepare(cl_mem, int, int, int, int, mat3)
|
||||
|
||||
cppclass DrivingModelFrame:
|
||||
int buf_size
|
||||
DrivingModelFrame(cl_device_id, cl_context)
|
||||
|
||||
cppclass DrivingModelFrameLegacy:
|
||||
int buf_size
|
||||
DrivingModelFrameLegacy(cl_device_id, cl_context)
|
||||
|
||||
cppclass MonitoringModelFrame:
|
||||
int buf_size
|
||||
MonitoringModelFrame(cl_device_id, cl_context)
|
||||
|
||||
@@ -4,12 +4,12 @@
|
||||
import numpy as np
|
||||
cimport numpy as cnp
|
||||
from libc.string cimport memcpy
|
||||
from libc.stdint cimport uintptr_t
|
||||
from libc.stdint cimport uintptr_t, uint8_t
|
||||
|
||||
from msgq.visionipc.visionipc cimport cl_mem
|
||||
from msgq.visionipc.visionipc_pyx cimport VisionBuf, CLContext as BaseCLContext
|
||||
from .commonmodel cimport CL_DEVICE_TYPE_DEFAULT, cl_get_device_id, cl_create_context
|
||||
from .commonmodel cimport mat3, ModelFrame as cppModelFrame, DrivingModelFrame as cppDrivingModelFrame, MonitoringModelFrame as cppMonitoringModelFrame
|
||||
from .commonmodel cimport mat3, ModelFrame as cppModelFrame, DrivingModelFrame as cppDrivingModelFrame, DrivingModelFrameLegacy as cppDrivingModelFrameLegacy, MonitoringModelFrame as cppMonitoringModelFrame
|
||||
|
||||
|
||||
cdef class CLContext(BaseCLContext):
|
||||
@@ -33,7 +33,7 @@ def cl_from_visionbuf(VisionBuf buf):
|
||||
|
||||
|
||||
cdef class ModelFrame:
|
||||
cdef cppModelFrame * frame
|
||||
cdef cppModelFrame[uint8_t] * frame
|
||||
cdef int buf_size
|
||||
|
||||
def __dealloc__(self):
|
||||
@@ -52,12 +52,40 @@ cdef class ModelFrame:
|
||||
return np.asarray(<cnp.uint8_t[:self.buf_size]> data2)
|
||||
|
||||
|
||||
cdef class ModelFrame_float:
|
||||
cdef cppModelFrame[float] * frame
|
||||
cdef int buf_size
|
||||
|
||||
def __dealloc__(self):
|
||||
del self.frame
|
||||
|
||||
def prepare(self, VisionBuf buf, float[:] projection):
|
||||
cdef mat3 cprojection
|
||||
memcpy(cprojection.v, &projection[0], 9*sizeof(float))
|
||||
cdef cl_mem * data
|
||||
data = self.frame.prepare(buf.buf.buf_cl, buf.width, buf.height, buf.stride, buf.uv_offset, cprojection)
|
||||
return CLMem.create(data)
|
||||
|
||||
def buffer_from_cl(self, CLMem in_frames):
|
||||
cdef float * data2
|
||||
data2 = self.frame.buffer_from_cl(in_frames.mem, self.buf_size)
|
||||
return np.asarray(<cnp.float32_t[:self.buf_size]> data2)
|
||||
|
||||
|
||||
cdef class DrivingModelFrame(ModelFrame):
|
||||
cdef cppDrivingModelFrame * _frame
|
||||
|
||||
def __cinit__(self, CLContext context):
|
||||
self._frame = new cppDrivingModelFrame(context.device_id, context.context)
|
||||
self.frame = <cppModelFrame*>(self._frame)
|
||||
self.frame = <cppModelFrame[uint8_t]*>(self._frame)
|
||||
self.buf_size = self._frame.buf_size
|
||||
|
||||
cdef class DrivingModelFrameLegacy(ModelFrame_float):
|
||||
cdef cppDrivingModelFrameLegacy * _frame
|
||||
|
||||
def __cinit__(self, CLContext context):
|
||||
self._frame = new cppDrivingModelFrameLegacy(context.device_id, context.context)
|
||||
self.frame = <cppModelFrame[float]*>(self._frame)
|
||||
self.buf_size = self._frame.buf_size
|
||||
|
||||
cdef class MonitoringModelFrame(ModelFrame):
|
||||
@@ -65,6 +93,6 @@ cdef class MonitoringModelFrame(ModelFrame):
|
||||
|
||||
def __cinit__(self, CLContext context):
|
||||
self._frame = new cppMonitoringModelFrame(context.device_id, context.context)
|
||||
self.frame = <cppModelFrame*>(self._frame)
|
||||
self.frame = <cppModelFrame[uint8_t]*>(self._frame)
|
||||
self.buf_size = self._frame.buf_size
|
||||
|
||||
|
||||
Binary file not shown.
@@ -84,7 +84,8 @@ class Parser:
|
||||
outs[name] = pred_mu_final.reshape(final_shape)
|
||||
outs[name + '_stds'] = pred_std_final.reshape(final_shape)
|
||||
|
||||
def parse_outputs(self, outs: dict[str, np.ndarray]) -> dict[str, np.ndarray]:
|
||||
def parse_outputs(self, outs: dict[str, np.ndarray], input_keys: [str]) -> dict[str, np.ndarray]:
|
||||
""" Parse the model outputs into a dictionary of numpy arrays. The input_keys are used to determine how the output should be parsed. """
|
||||
self.parse_mdn('plan', outs, in_N=ModelConstants.PLAN_MHP_N, out_N=ModelConstants.PLAN_MHP_SELECTION,
|
||||
out_shape=(ModelConstants.IDX_N,ModelConstants.PLAN_WIDTH))
|
||||
self.parse_mdn('lane_lines', outs, in_N=0, out_N=0, out_shape=(ModelConstants.NUM_LANE_LINES,ModelConstants.IDX_N,ModelConstants.LANE_LINES_WIDTH))
|
||||
@@ -96,6 +97,8 @@ class Parser:
|
||||
out_shape=(ModelConstants.LEAD_TRAJ_LEN,ModelConstants.LEAD_WIDTH))
|
||||
if 'lat_planner_solution' in outs:
|
||||
self.parse_mdn('lat_planner_solution', outs, in_N=0, out_N=0, out_shape=(ModelConstants.IDX_N,ModelConstants.LAT_PLANNER_SOLUTION_WIDTH))
|
||||
if 'desired_curvature' in outs and "prev_desired_curv" in input_keys:
|
||||
self.parse_mdn('desired_curvature', outs, in_N=0, out_N=0, out_shape=(ModelConstants.DESIRED_CURV_WIDTH,))
|
||||
for k in ['lead_prob', 'lane_lines_prob', 'meta']:
|
||||
self.parse_binary_crossentropy(k, outs)
|
||||
self.parse_categorical_crossentropy('desire_state', outs, out_shape=(ModelConstants.DESIRE_PRED_WIDTH,))
|
||||
|
||||
0
selfdrive/modeld/runners/__init__.py
Normal file
0
selfdrive/modeld/runners/__init__.py
Normal file
115
selfdrive/modeld/runners/model_runner.py
Normal file
115
selfdrive/modeld/runners/model_runner.py
Normal file
@@ -0,0 +1,115 @@
|
||||
import os
|
||||
from openpilot.system.hardware import TICI
|
||||
|
||||
#
|
||||
from tinygrad.tensor import Tensor, dtypes
|
||||
from openpilot.selfdrive.modeld.runners.tinygrad_helpers import qcom_tensor_from_opencl_address
|
||||
from openpilot.selfdrive.modeld.runners.ort_helpers import make_onnx_cpu_runner, ORT_TYPES_TO_NP_TYPES
|
||||
import pickle
|
||||
import numpy as np
|
||||
from pathlib import Path
|
||||
from abc import ABC, abstractmethod
|
||||
# from openpilot.selfdrive.modeld.models.commonmodel_pyx import DrivingModelFrame, CLMem
|
||||
from openpilot.selfdrive.modeld.models.commonmodel_pyx import DrivingModelFrameLegacy as DrivingModelFrame, CLMem
|
||||
|
||||
if TICI:
|
||||
os.environ['QCOM'] = '1'
|
||||
|
||||
SEND_RAW_PRED = os.getenv('SEND_RAW_PRED')
|
||||
MODEL_PATH = Path(__file__).parent / '../models/supercombo.onnx'
|
||||
MODEL_PKL_PATH = Path(__file__).parent / '../models/supercombo_tinygrad.pkl'
|
||||
METADATA_PATH = Path(__file__).parent / '../models/supercombo_metadata.pkl'
|
||||
|
||||
|
||||
class ModelRunner(ABC):
|
||||
"""Abstract base class for model runners that defines the interface for running ML models."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the model runner with paths to model and metadata files."""
|
||||
with open(METADATA_PATH, 'rb') as f:
|
||||
self.model_metadata = pickle.load(f)
|
||||
self.input_shapes = self.model_metadata['input_shapes']
|
||||
self.output_slices = self.model_metadata['output_slices']
|
||||
self.inputs: dict = {}
|
||||
|
||||
@abstractmethod
|
||||
def prepare_inputs(self, imgs_cl: dict[str, CLMem], numpy_inputs: dict[str, np.ndarray])-> dict:
|
||||
"""Prepare inputs for model inference."""
|
||||
|
||||
@abstractmethod
|
||||
def run_model(self):
|
||||
"""Run model inference with prepared inputs."""
|
||||
|
||||
def slice_outputs(self, model_outputs: np.ndarray) -> dict:
|
||||
"""Slice model outputs according to metadata configuration."""
|
||||
parsed_outputs = {k: model_outputs[np.newaxis, v] for k, v in self.output_slices.items()}
|
||||
if SEND_RAW_PRED:
|
||||
parsed_outputs['raw_pred'] = model_outputs.copy()
|
||||
return parsed_outputs
|
||||
|
||||
|
||||
class TinygradRunner(ModelRunner):
|
||||
"""Tinygrad implementation of model runner for TICI hardware."""
|
||||
|
||||
def __init__(self, frames: dict[str, DrivingModelFrame] | None = None):
|
||||
super().__init__()
|
||||
# Load Tinygrad model
|
||||
with open(MODEL_PKL_PATH, "rb") as f:
|
||||
self.model_run = pickle.load(f)
|
||||
|
||||
self.input_to_dtype = {}
|
||||
self.input_to_device = {}
|
||||
|
||||
for idx, name in enumerate(self.model_run.captured.expected_names):
|
||||
self.input_to_dtype[name] = self.model_run.captured.expected_st_vars_dtype_device[idx][2] # 2 is the dtype
|
||||
self.input_to_device[name] = self.model_run.captured.expected_st_vars_dtype_device[idx][3] # 3 is the device
|
||||
|
||||
assert TICI or frames is not None, "TinygradRunner requires frames for non-TICI hardware"
|
||||
self.frames = frames
|
||||
self.is_memory_model = None # Use None to indicate that it hasn't been determined yet
|
||||
|
||||
def prepare_inputs(self, imgs_cl: dict[str, CLMem], numpy_inputs: dict[str, np.ndarray]) -> dict:
|
||||
if self.is_memory_model is None:
|
||||
self.is_memory_model = any(self.input_to_dtype[key] == dtypes.uint8 for key in imgs_cl)
|
||||
print(f"Memory model: {self.is_memory_model}")
|
||||
|
||||
# Initialize image tensors if not already done
|
||||
for key in imgs_cl:
|
||||
if TICI and self.is_memory_model and key not in self.inputs:
|
||||
self.inputs[key] = qcom_tensor_from_opencl_address(imgs_cl[key].mem_address, self.input_shapes[key], dtype=dtypes.uint8)
|
||||
elif not TICI or not self.is_memory_model:
|
||||
shape = self.frames[key].buffer_from_cl(imgs_cl[key]).reshape(self.input_shapes[key])
|
||||
self.inputs[key] = Tensor(shape, device=self.input_to_device[key], dtype=self.input_to_dtype[key]).realize()
|
||||
|
||||
# Update numpy inputs
|
||||
for key, value in numpy_inputs.items():
|
||||
if key not in imgs_cl:
|
||||
self.inputs[key] = Tensor(value, device=self.input_to_device[key], dtype=self.input_to_dtype[key]).realize()
|
||||
|
||||
return self.inputs
|
||||
|
||||
def run_model(self):
|
||||
return self.model_run(**self.inputs).numpy().flatten()
|
||||
|
||||
|
||||
class ONNXRunner(ModelRunner):
|
||||
"""ONNX implementation of model runner for non-TICI hardware."""
|
||||
|
||||
def __init__(self, frames: dict[str, DrivingModelFrame]):
|
||||
super().__init__()
|
||||
self.runner = make_onnx_cpu_runner(MODEL_PATH)
|
||||
self.frames = frames
|
||||
|
||||
self.input_to_nptype = {
|
||||
model_input.name: ORT_TYPES_TO_NP_TYPES[model_input.type]
|
||||
for model_input in self.runner.get_inputs()
|
||||
}
|
||||
|
||||
def prepare_inputs(self, imgs_cl: dict[str, CLMem], numpy_inputs: dict[str, np.ndarray]) -> dict:
|
||||
self.inputs = numpy_inputs.copy()
|
||||
for key in imgs_cl:
|
||||
self.inputs[key] = self.frames[key].buffer_from_cl(imgs_cl[key]).reshape(self.input_shapes[key])
|
||||
return self.inputs
|
||||
|
||||
def run_model(self):
|
||||
return self.runner.run(None, self.inputs)[0].flatten()
|
||||
@@ -4,7 +4,7 @@
|
||||
#include <cstdio>
|
||||
#include <cstring>
|
||||
|
||||
void loadyuv_init(LoadYUVState* s, cl_context ctx, cl_device_id device_id, int width, int height) {
|
||||
void loadyuv_init(LoadYUVState* s, cl_context ctx, cl_device_id device_id, int width, int height, bool use_float) {
|
||||
memset(s, 0, sizeof(*s));
|
||||
|
||||
s->width = width;
|
||||
@@ -15,7 +15,9 @@ void loadyuv_init(LoadYUVState* s, cl_context ctx, cl_device_id device_id, int w
|
||||
"-cl-fast-relaxed-math -cl-denorms-are-zero "
|
||||
"-DTRANSFORMED_WIDTH=%d -DTRANSFORMED_HEIGHT=%d",
|
||||
width, height);
|
||||
cl_program prg = cl_program_from_file(ctx, device_id, LOADYUV_PATH, args);
|
||||
const char * loadyuv_path = use_float ? LOADYUV_FLOAT_PATH : LOADYUV_PATH;
|
||||
printf(" Use float: %d\n Using loadyuv_path: %s\n", use_float, loadyuv_path);
|
||||
cl_program prg = cl_program_from_file(ctx, device_id, loadyuv_path, args);
|
||||
|
||||
s->loadys_krnl = CL_CHECK_ERR(clCreateKernel(prg, "loadys", &err));
|
||||
s->loaduv_krnl = CL_CHECK_ERR(clCreateKernel(prg, "loaduv", &err));
|
||||
|
||||
@@ -7,7 +7,8 @@ typedef struct {
|
||||
cl_kernel loadys_krnl, loaduv_krnl, copy_krnl;
|
||||
} LoadYUVState;
|
||||
|
||||
void loadyuv_init(LoadYUVState* s, cl_context ctx, cl_device_id device_id, int width, int height);
|
||||
void loadyuv_init(LoadYUVState* s, cl_context ctx, cl_device_id device_id, int width, int height, bool use_float);
|
||||
inline void loadyuv_init(LoadYUVState* s, cl_context ctx, cl_device_id device_id, int width, int height) { loadyuv_init(s, ctx, device_id, width, height, false); };
|
||||
|
||||
void loadyuv_destroy(LoadYUVState* s);
|
||||
|
||||
|
||||
49
selfdrive/modeld/transforms/loadyuv_float.cl
Normal file
49
selfdrive/modeld/transforms/loadyuv_float.cl
Normal file
@@ -0,0 +1,49 @@
|
||||
#define UV_SIZE ((TRANSFORMED_WIDTH/2)*(TRANSFORMED_HEIGHT/2))
|
||||
|
||||
__kernel void loadys(__global uchar8 const * const Y,
|
||||
__global float * out,
|
||||
int out_offset)
|
||||
{
|
||||
const int gid = get_global_id(0);
|
||||
const int ois = gid * 8;
|
||||
const int oy = ois / TRANSFORMED_WIDTH;
|
||||
const int ox = ois % TRANSFORMED_WIDTH;
|
||||
|
||||
const uchar8 ys = Y[gid];
|
||||
const float8 ysf = convert_float8(ys);
|
||||
|
||||
// 02
|
||||
// 13
|
||||
|
||||
__global float* outy0;
|
||||
__global float* outy1;
|
||||
if ((oy & 1) == 0) {
|
||||
outy0 = out + out_offset; //y0
|
||||
outy1 = out + out_offset + UV_SIZE*2; //y2
|
||||
} else {
|
||||
outy0 = out + out_offset + UV_SIZE; //y1
|
||||
outy1 = out + out_offset + UV_SIZE*3; //y3
|
||||
}
|
||||
|
||||
vstore4(ysf.s0246, 0, outy0 + (oy/2) * (TRANSFORMED_WIDTH/2) + ox/2);
|
||||
vstore4(ysf.s1357, 0, outy1 + (oy/2) * (TRANSFORMED_WIDTH/2) + ox/2);
|
||||
}
|
||||
|
||||
__kernel void loaduv(__global uchar8 const * const in,
|
||||
__global float8 * out,
|
||||
int out_offset)
|
||||
{
|
||||
const int gid = get_global_id(0);
|
||||
const uchar8 inv = in[gid];
|
||||
const float8 outv = convert_float8(inv);
|
||||
out[gid + out_offset / 8] = outv;
|
||||
}
|
||||
|
||||
__kernel void copy(__global float8 * in,
|
||||
__global float8 * out,
|
||||
int in_offset,
|
||||
int out_offset)
|
||||
{
|
||||
const int gid = get_global_id(0);
|
||||
out[gid + out_offset / 8] = in[gid + in_offset / 8];
|
||||
}
|
||||
@@ -83,8 +83,6 @@ void SoftwarePanelSP::handleBundleDownloadProgress() {
|
||||
if (bundle.getStatus() == cereal::ModelManagerSP::DownloadStatus::DOWNLOADING) {
|
||||
currentModelLblBtn->showDescription();
|
||||
}
|
||||
|
||||
currentModelLblBtn->setEnabled(!is_onroad && !isDownloading());
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -128,7 +126,6 @@ void SoftwarePanelSP::handleCurrentModelLblBtnClicked() {
|
||||
bundleNames.append(index_to_bundle[index]);
|
||||
}
|
||||
|
||||
currentModelLblBtn->setEnabled(!is_onroad);
|
||||
currentModelLblBtn->setValue(GetActiveModelName());
|
||||
|
||||
const QString selectedBundleName = MultiOptionDialog::getSelection(
|
||||
@@ -161,6 +158,7 @@ void SoftwarePanelSP::updateLabels() {
|
||||
}
|
||||
|
||||
handleBundleDownloadProgress();
|
||||
currentModelLblBtn->setEnabled(!is_onroad && !isDownloading());
|
||||
currentModelLblBtn->setValue(GetActiveModelName());
|
||||
SoftwarePanel::updateLabels();
|
||||
}
|
||||
|
||||
@@ -24,7 +24,7 @@ private:
|
||||
const SubMaster &sm = *(uiStateSP()->sm);
|
||||
const auto model_manager = sm["modelManagerSP"].getModelManagerSP();
|
||||
|
||||
if (!model_manager.hasSelectedBundle()) {
|
||||
if (!model_manager.hasSelectedBundle() || !sm.updated("modelManagerSP")) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user