mirror of
https://github.com/sunnypilot/sunnypilot.git
synced 2026-06-10 13:34:21 +08:00
Compare commits
7 Commits
dockerize-
...
archive/mo
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6e5b17a660 | ||
|
|
8f56d4d80b | ||
|
|
265a1a0ada | ||
|
|
0fca387821 | ||
|
|
d957a92fbf | ||
|
|
5809ab3baa | ||
|
|
570789a179 |
@@ -15,8 +15,7 @@ class ModelConstants:
|
||||
# model inputs constants
|
||||
MODEL_FREQ = 20
|
||||
FEATURE_LEN = 512
|
||||
FULL_HISTORY_BUFFER_LEN = 99
|
||||
HISTORY_BUFFER_LEN = 24
|
||||
HISTORY_BUFFER_LEN = 99
|
||||
DESIRE_LEN = 8
|
||||
TRAFFIC_CONVENTION_LEN = 2
|
||||
LAT_PLANNER_STATE_LEN = 4
|
||||
|
||||
@@ -3,22 +3,11 @@ import capnp
|
||||
import numpy as np
|
||||
from cereal import log
|
||||
from openpilot.sunnypilot.modeld.constants import ModelConstants, Plan, Meta
|
||||
from openpilot.selfdrive.controls.lib.drive_helpers import MIN_SPEED
|
||||
|
||||
SEND_RAW_PRED = os.getenv('SEND_RAW_PRED')
|
||||
|
||||
ConfidenceClass = log.ModelDataV2.ConfidenceClass
|
||||
|
||||
def curv_from_psis(psi_target, psi_rate, vego, delay):
|
||||
vego = np.clip(vego, MIN_SPEED, np.inf)
|
||||
curv_from_psi = psi_target / (vego * delay) # epsilon to prevent divide-by-zero
|
||||
return 2*curv_from_psi - psi_rate / vego
|
||||
|
||||
def get_curvature_from_plan(plan, vego, delay):
|
||||
psi_target = np.interp(delay, ModelConstants.T_IDXS, plan[:, Plan.T_FROM_CURRENT_EULER][:, 2])
|
||||
psi_rate = plan[:, Plan.ORIENTATION_RATE][0, 2]
|
||||
return curv_from_psis(psi_target, psi_rate, vego, delay)
|
||||
|
||||
class PublishState:
|
||||
def __init__(self):
|
||||
self.disengage_buffer = np.zeros(ModelConstants.CONFIDENCE_BUFFER_LEN*ModelConstants.DISENGAGE_WIDTH, dtype=np.float32)
|
||||
@@ -59,24 +48,15 @@ def fill_xyz_poly(builder, degree, x, y, z):
|
||||
builder.yCoefficients = coeffs[:, 1].tolist()
|
||||
builder.zCoefficients = coeffs[:, 2].tolist()
|
||||
|
||||
def fill_lane_line_meta(builder, lane_lines, lane_line_probs):
|
||||
builder.leftY = lane_lines[1].y[0]
|
||||
builder.leftProb = lane_line_probs[1]
|
||||
builder.rightY = lane_lines[2].y[0]
|
||||
builder.rightProb = lane_line_probs[2]
|
||||
|
||||
def fill_model_msg(base_msg: capnp._DynamicStructBuilder, extended_msg: capnp._DynamicStructBuilder,
|
||||
net_output_data: dict[str, np.ndarray], v_ego: float, delay: float,
|
||||
publish_state: PublishState, vipc_frame_id: int, vipc_frame_id_extra: int,
|
||||
frame_id: int, frame_drop: float, timestamp_eof: int, model_execution_time: float,
|
||||
valid: bool) -> None:
|
||||
net_output_data: dict[str, np.ndarray], publish_state: PublishState,
|
||||
vipc_frame_id: int, vipc_frame_id_extra: int, frame_id: int, frame_drop: float,
|
||||
timestamp_eof: int, model_execution_time: float, valid: bool) -> None:
|
||||
frame_age = frame_id - vipc_frame_id if frame_id > vipc_frame_id else 0
|
||||
frame_drop_perc = frame_drop * 100
|
||||
extended_msg.valid = valid
|
||||
base_msg.valid = valid
|
||||
|
||||
desired_curv = float(get_curvature_from_plan(net_output_data['plan'][0], v_ego, delay))
|
||||
|
||||
driving_model_data = base_msg.drivingModelData
|
||||
|
||||
driving_model_data.frameId = vipc_frame_id
|
||||
@@ -85,7 +65,7 @@ def fill_model_msg(base_msg: capnp._DynamicStructBuilder, extended_msg: capnp._D
|
||||
driving_model_data.modelExecutionTime = model_execution_time
|
||||
|
||||
action = driving_model_data.action
|
||||
action.desiredCurvature = desired_curv
|
||||
action.desiredCurvature = float(net_output_data['desired_curvature'][0,0])
|
||||
|
||||
modelV2 = extended_msg.modelV2
|
||||
modelV2.frameId = vipc_frame_id
|
||||
@@ -120,7 +100,7 @@ def fill_model_msg(base_msg: capnp._DynamicStructBuilder, extended_msg: capnp._D
|
||||
|
||||
# lateral planning
|
||||
action = modelV2.action
|
||||
action.desiredCurvature = desired_curv
|
||||
action.desiredCurvature = float(net_output_data['desired_curvature'][0,0])
|
||||
|
||||
# times at X_IDXS according to model plan
|
||||
PLAN_T_IDXS = [np.nan] * ModelConstants.IDX_N
|
||||
@@ -150,7 +130,10 @@ def fill_model_msg(base_msg: capnp._DynamicStructBuilder, extended_msg: capnp._D
|
||||
modelV2.laneLineProbs = net_output_data['lane_lines_prob'][0,1::2].tolist()
|
||||
|
||||
lane_line_meta = driving_model_data.laneLineMeta
|
||||
fill_lane_line_meta(lane_line_meta, modelV2.laneLines, modelV2.laneLineProbs)
|
||||
lane_line_meta.leftY = modelV2.laneLines[1].y[0]
|
||||
lane_line_meta.leftProb = modelV2.laneLineProbs[1]
|
||||
lane_line_meta.rightY = modelV2.laneLines[2].y[0]
|
||||
lane_line_meta.rightProb = modelV2.laneLineProbs[2]
|
||||
|
||||
# road edges
|
||||
modelV2.init('roadEdges', 2)
|
||||
|
||||
@@ -17,12 +17,13 @@ from openpilot.common.realtime import config_realtime_process
|
||||
from openpilot.common.transformations.camera import DEVICE_CAMERAS
|
||||
from openpilot.common.transformations.model import get_warp_matrix
|
||||
from openpilot.system import sentry
|
||||
from openpilot.selfdrive.car.card import convert_to_capnp
|
||||
from openpilot.selfdrive.controls.lib.desire_helper import DesireHelper
|
||||
from openpilot.sunnypilot.modeld.runners import ModelRunner, Runtime
|
||||
from openpilot.sunnypilot.modeld.parse_model_outputs import Parser
|
||||
from openpilot.sunnypilot.modeld.fill_model_msg import fill_model_msg, fill_pose_msg, PublishState
|
||||
from openpilot.sunnypilot.modeld.constants import ModelConstants
|
||||
from openpilot.sunnypilot.modeld.models.commonmodel_pyx import DrivingModelFrame, CLContext
|
||||
from openpilot.sunnypilot.modeld.models.commonmodel_pyx import ModelFrame, CLContext
|
||||
|
||||
PROCESS_NAME = "sunnypilot.modeld.modeld"
|
||||
SEND_RAW_PRED = os.getenv('SEND_RAW_PRED')
|
||||
@@ -33,7 +34,6 @@ MODEL_PATHS = {
|
||||
|
||||
METADATA_PATH = Path(__file__).parent / 'models/supercombo_metadata.pkl'
|
||||
|
||||
|
||||
class FrameMeta:
|
||||
frame_id: int = 0
|
||||
timestamp_sof: int = 0
|
||||
@@ -44,24 +44,22 @@ class FrameMeta:
|
||||
self.frame_id, self.timestamp_sof, self.timestamp_eof = vipc.frame_id, vipc.timestamp_sof, vipc.timestamp_eof
|
||||
|
||||
class ModelState:
|
||||
frame: DrivingModelFrame
|
||||
wide_frame: DrivingModelFrame
|
||||
frame: ModelFrame
|
||||
wide_frame: ModelFrame
|
||||
inputs: dict[str, np.ndarray]
|
||||
output: np.ndarray
|
||||
prev_desire: np.ndarray # for tracking the rising edge of the pulse
|
||||
model: ModelRunner
|
||||
|
||||
def __init__(self, context: CLContext):
|
||||
self.frame = DrivingModelFrame(context)
|
||||
self.wide_frame = DrivingModelFrame(context)
|
||||
self.frame = ModelFrame(context)
|
||||
self.wide_frame = ModelFrame(context)
|
||||
self.prev_desire = np.zeros(ModelConstants.DESIRE_LEN, dtype=np.float32)
|
||||
self.full_features_20Hz = np.zeros((ModelConstants.FULL_HISTORY_BUFFER_LEN, ModelConstants.FEATURE_LEN), dtype=np.float32)
|
||||
self.desire_20Hz = np.zeros((ModelConstants.FULL_HISTORY_BUFFER_LEN + 1, ModelConstants.DESIRE_LEN), dtype=np.float32)
|
||||
|
||||
# img buffers are managed in openCL transform code
|
||||
self.inputs = {
|
||||
'desire': np.zeros(ModelConstants.DESIRE_LEN * (ModelConstants.HISTORY_BUFFER_LEN+1), dtype=np.float32),
|
||||
'traffic_convention': np.zeros(ModelConstants.TRAFFIC_CONVENTION_LEN, dtype=np.float32),
|
||||
'lateral_control_params': np.zeros(ModelConstants.LATERAL_CONTROL_PARAMS_LEN, dtype=np.float32),
|
||||
'prev_desired_curv': np.zeros(ModelConstants.PREV_DESIRED_CURV_LEN * (ModelConstants.HISTORY_BUFFER_LEN+1), dtype=np.float32),
|
||||
'features_buffer': np.zeros(ModelConstants.HISTORY_BUFFER_LEN * ModelConstants.FEATURE_LEN, dtype=np.float32),
|
||||
}
|
||||
|
||||
@@ -89,17 +87,17 @@ class ModelState:
|
||||
inputs: dict[str, np.ndarray], prepare_only: bool) -> dict[str, np.ndarray] | None:
|
||||
# Model decides when action is completed, so desire input is just a pulse triggered on rising edge
|
||||
inputs['desire'][0] = 0
|
||||
new_desire = np.where(inputs['desire'] - self.prev_desire > .99, inputs['desire'], 0)
|
||||
self.inputs['desire'][:-ModelConstants.DESIRE_LEN] = self.inputs['desire'][ModelConstants.DESIRE_LEN:]
|
||||
self.inputs['desire'][-ModelConstants.DESIRE_LEN:] = np.where(inputs['desire'] - self.prev_desire > .99, inputs['desire'], 0)
|
||||
self.prev_desire[:] = inputs['desire']
|
||||
|
||||
self.desire_20Hz[:-1] = self.desire_20Hz[1:]
|
||||
self.desire_20Hz[-1] = new_desire
|
||||
self.inputs['desire'][:] = self.desire_20Hz.reshape((25,4,-1)).max(axis=1).flatten()
|
||||
|
||||
self.inputs['traffic_convention'][:] = inputs['traffic_convention']
|
||||
self.inputs['lateral_control_params'][:] = inputs['lateral_control_params']
|
||||
|
||||
# if getCLBuffer is not None, frame will be None
|
||||
self.model.setInputBuffer("input_imgs", self.frame.prepare(buf, transform.flatten(), self.model.getCLBuffer("input_imgs")))
|
||||
self.model.setInputBuffer("big_input_imgs", self.wide_frame.prepare(wbuf, transform_wide.flatten(), self.model.getCLBuffer("big_input_imgs")))
|
||||
if wbuf is not None:
|
||||
self.model.setInputBuffer("big_input_imgs", self.wide_frame.prepare(wbuf, transform_wide.flatten(), self.model.getCLBuffer("big_input_imgs")))
|
||||
|
||||
if prepare_only:
|
||||
return None
|
||||
@@ -107,11 +105,10 @@ class ModelState:
|
||||
self.model.execute()
|
||||
outputs = self.parser.parse_outputs(self.slice_outputs(self.output))
|
||||
|
||||
self.full_features_20Hz[:-1] = self.full_features_20Hz[1:]
|
||||
self.full_features_20Hz[-1] = outputs['hidden_state'][0, :]
|
||||
|
||||
idxs = np.arange(-4,-100,-4)[::-1]
|
||||
self.inputs['features_buffer'][:] = self.full_features_20Hz[idxs].flatten()
|
||||
self.inputs['features_buffer'][:-ModelConstants.FEATURE_LEN] = self.inputs['features_buffer'][ModelConstants.FEATURE_LEN:]
|
||||
self.inputs['features_buffer'][-ModelConstants.FEATURE_LEN:] = outputs['hidden_state'][0, :]
|
||||
self.inputs['prev_desired_curv'][:-ModelConstants.PREV_DESIRED_CURV_LEN] = self.inputs['prev_desired_curv'][ModelConstants.PREV_DESIRED_CURV_LEN:]
|
||||
self.inputs['prev_desired_curv'][-ModelConstants.PREV_DESIRED_CURV_LEN:] = outputs['desired_curvature'][0, :]
|
||||
return outputs
|
||||
|
||||
|
||||
@@ -174,9 +171,10 @@ def main(demo=False):
|
||||
|
||||
|
||||
if demo:
|
||||
CP = get_demo_car_params()
|
||||
CP = convert_to_capnp(get_demo_car_params())
|
||||
else:
|
||||
CP = messaging.log_from_bytes(params.get("CarParams", block=True), car.CarParams)
|
||||
|
||||
cloudlog.info("modeld got CarParams: %s", CP.carName)
|
||||
|
||||
# TODO this needs more thought, use .2s extra for now to estimate other delays
|
||||
@@ -221,7 +219,7 @@ def main(demo=False):
|
||||
desire = DH.desire
|
||||
is_rhd = sm["driverMonitoringState"].isRHD
|
||||
frame_id = sm["roadCameraState"].frameId
|
||||
v_ego = max(sm["carState"].vEgo, 0.)
|
||||
lateral_control_params = np.array([sm["carState"].vEgo, steer_delay], dtype=np.float32)
|
||||
if sm.updated["liveCalibration"] and sm.seen['roadCameraState'] and sm.seen['deviceState']:
|
||||
device_from_calib_euler = np.array(sm["liveCalibration"].rpyCalib, dtype=np.float32)
|
||||
dc = DEVICE_CAMERAS[(str(sm['deviceState'].deviceType), str(sm['roadCameraState'].sensor))]
|
||||
@@ -252,6 +250,7 @@ def main(demo=False):
|
||||
inputs:dict[str, np.ndarray] = {
|
||||
'desire': vec_desire,
|
||||
'traffic_convention': traffic_convention,
|
||||
'lateral_control_params': lateral_control_params,
|
||||
}
|
||||
|
||||
mt1 = time.perf_counter()
|
||||
@@ -263,8 +262,7 @@ def main(demo=False):
|
||||
modelv2_send = messaging.new_message('modelV2')
|
||||
drivingdata_send = messaging.new_message('drivingModelData')
|
||||
posenet_send = messaging.new_message('cameraOdometry')
|
||||
fill_model_msg(drivingdata_send, modelv2_send, model_output, v_ego, steer_delay,
|
||||
publish_state, meta_main.frame_id, meta_extra.frame_id, frame_id,
|
||||
fill_model_msg(drivingdata_send, modelv2_send, model_output, publish_state, meta_main.frame_id, meta_extra.frame_id, frame_id,
|
||||
frame_drop_ratio, meta_main.timestamp_eof, model_execution_time, live_calib_seen)
|
||||
|
||||
desire_state = modelv2_send.modelV2.meta.desireState
|
||||
|
||||
@@ -1,62 +0,0 @@
|
||||
## Neural networks in openpilot
|
||||
To view the architecture of the ONNX networks, you can use [netron](https://netron.app/)
|
||||
|
||||
## Supercombo
|
||||
### Supercombo input format (Full size: 799906 x float32)
|
||||
* **image stream**
|
||||
* Two consecutive images (256 * 512 * 3 in RGB) recorded at 20 Hz : 393216 = 2 * 6 * 128 * 256
|
||||
* Each 256 * 512 image is represented in YUV420 with 6 channels : 6 * 128 * 256
|
||||
* Channels 0,1,2,3 represent the full-res Y channel and are represented in numpy as Y[::2, ::2], Y[::2, 1::2], Y[1::2, ::2], and Y[1::2, 1::2]
|
||||
* Channel 4 represents the half-res U channel
|
||||
* Channel 5 represents the half-res V channel
|
||||
* **wide image stream**
|
||||
* Two consecutive images (256 * 512 * 3 in RGB) recorded at 20 Hz : 393216 = 2 * 6 * 128 * 256
|
||||
* Each 256 * 512 image is represented in YUV420 with 6 channels : 6 * 128 * 256
|
||||
* Channels 0,1,2,3 represent the full-res Y channel and are represented in numpy as Y[::2, ::2], Y[::2, 1::2], Y[1::2, ::2], and Y[1::2, 1::2]
|
||||
* Channel 4 represents the half-res U channel
|
||||
* Channel 5 represents the half-res V channel
|
||||
* **desire**
|
||||
* one-hot encoded buffer to command model to execute certain actions, bit needs to be sent for the past 5 seconds (at 20FPS) : 100 * 8
|
||||
* **traffic convention**
|
||||
* one-hot encoded vector to tell model whether traffic is right-hand or left-hand traffic : 2
|
||||
* **feature buffer**
|
||||
* A buffer of intermediate features that gets appended to the current feature to form a 5 seconds temporal context (at 20FPS) : 99 * 512
|
||||
|
||||
|
||||
### Supercombo output format (Full size: XXX x float32)
|
||||
Read [here](https://github.com/commaai/openpilot/blob/90af436a121164a51da9fa48d093c29f738adf6a/selfdrive/modeld/models/driving.h#L236) for more.
|
||||
|
||||
|
||||
## Driver Monitoring Model
|
||||
* .onnx model can be run with onnx runtimes
|
||||
* .dlc file is a pre-quantized model and only runs on qualcomm DSPs
|
||||
|
||||
### input format
|
||||
* single image W = 1440 H = 960 luminance channel (Y) from the planar YUV420 format:
|
||||
* full input size is 1440 * 960 = 1382400
|
||||
* normalized ranging from 0.0 to 1.0 in float32 (onnx runner) or ranging from 0 to 255 in uint8 (snpe runner)
|
||||
* camera calibration angles (roll, pitch, yaw) from liveCalibration: 3 x float32 inputs
|
||||
|
||||
### output format
|
||||
* 84 x float32 outputs = 2 + 41 * 2 ([parsing example](https://github.com/commaai/openpilot/blob/22ce4e17ba0d3bfcf37f8255a4dd1dc683fe0c38/selfdrive/modeld/models/dmonitoring.cc#L33))
|
||||
* for each person in the front seats (2 * 41)
|
||||
* face pose: 12 = 6 + 6
|
||||
* face orientation [pitch, yaw, roll] in camera frame: 3
|
||||
* face position [dx, dy] relative to image center: 2
|
||||
* normalized face size: 1
|
||||
* standard deviations for above outputs: 6
|
||||
* face visible probability: 1
|
||||
* eyes: 20 = (8 + 1) + (8 + 1) + 1 + 1
|
||||
* eye position and size, and their standard deviations: 8
|
||||
* eye visible probability: 1
|
||||
* eye closed probability: 1
|
||||
* wearing sunglasses probability: 1
|
||||
* face occluded probability: 1
|
||||
* touching wheel probability: 1
|
||||
* paying attention probability: 1
|
||||
* (deprecated) distracted probabilities: 2
|
||||
* using phone probability: 1
|
||||
* distracted probability: 1
|
||||
* common outputs 2
|
||||
* poor camera vision probability: 1
|
||||
* left hand drive probability: 1
|
||||
0
sunnypilot/modeld/models/__init__.py
Normal file
0
sunnypilot/modeld/models/__init__.py
Normal file
@@ -1,69 +1,50 @@
|
||||
#include "sunnypilot/modeld/models/commonmodel.h"
|
||||
|
||||
#include <cassert>
|
||||
#include <cmath>
|
||||
#include <cstring>
|
||||
|
||||
#include "common/clutil.h"
|
||||
|
||||
DrivingModelFrame::DrivingModelFrame(cl_device_id device_id, cl_context context) : ModelFrame(device_id, context) {
|
||||
input_frames = std::make_unique<uint8_t[]>(buf_size);
|
||||
//input_frames_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, buf_size, NULL, &err));
|
||||
img_buffer_20hz_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, 5*frame_size_bytes, NULL, &err));
|
||||
region.origin = 4 * frame_size_bytes;
|
||||
region.size = frame_size_bytes;
|
||||
last_img_cl = CL_CHECK_ERR(clCreateSubBuffer(img_buffer_20hz_cl, CL_MEM_READ_WRITE, CL_BUFFER_CREATE_TYPE_REGION, ®ion, &err));
|
||||
ModelFrame::ModelFrame(cl_device_id device_id, cl_context context) {
|
||||
input_frames = std::make_unique<float[]>(buf_size);
|
||||
|
||||
q = CL_CHECK_ERR(clCreateCommandQueue(context, device_id, 0, &err));
|
||||
y_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, MODEL_WIDTH * MODEL_HEIGHT, NULL, &err));
|
||||
u_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, (MODEL_WIDTH / 2) * (MODEL_HEIGHT / 2), NULL, &err));
|
||||
v_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, (MODEL_WIDTH / 2) * (MODEL_HEIGHT / 2), NULL, &err));
|
||||
net_input_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, MODEL_FRAME_SIZE * sizeof(float), NULL, &err));
|
||||
|
||||
transform_init(&transform, context, device_id);
|
||||
loadyuv_init(&loadyuv, context, device_id, MODEL_WIDTH, MODEL_HEIGHT);
|
||||
init_transform(device_id, context, MODEL_WIDTH, MODEL_HEIGHT);
|
||||
}
|
||||
|
||||
uint8_t* DrivingModelFrame::prepare(cl_mem yuv_cl, int frame_width, int frame_height, int frame_stride, int frame_uv_offset, const mat3& projection, cl_mem* output) {
|
||||
run_transform(yuv_cl, MODEL_WIDTH, MODEL_HEIGHT, frame_width, frame_height, frame_stride, frame_uv_offset, projection);
|
||||
|
||||
for (int i = 0; i < 4; i++) {
|
||||
CL_CHECK(clEnqueueCopyBuffer(q, img_buffer_20hz_cl, img_buffer_20hz_cl, (i+1)*frame_size_bytes, i*frame_size_bytes, frame_size_bytes, 0, nullptr, nullptr));
|
||||
}
|
||||
loadyuv_queue(&loadyuv, q, y_cl, u_cl, v_cl, last_img_cl);
|
||||
float* ModelFrame::prepare(cl_mem yuv_cl, int frame_width, int frame_height, int frame_stride, int frame_uv_offset, const mat3 &projection, cl_mem *output) {
|
||||
transform_queue(&this->transform, q,
|
||||
yuv_cl, frame_width, frame_height, frame_stride, frame_uv_offset,
|
||||
y_cl, u_cl, v_cl, MODEL_WIDTH, MODEL_HEIGHT, projection);
|
||||
|
||||
if (output == NULL) {
|
||||
CL_CHECK(clEnqueueReadBuffer(q, img_buffer_20hz_cl, CL_TRUE, 0, frame_size_bytes, &input_frames[0], 0, nullptr, nullptr));
|
||||
CL_CHECK(clEnqueueReadBuffer(q, last_img_cl, CL_TRUE, 0, frame_size_bytes, &input_frames[MODEL_FRAME_SIZE], 0, nullptr, nullptr));
|
||||
loadyuv_queue(&loadyuv, q, y_cl, u_cl, v_cl, net_input_cl);
|
||||
|
||||
std::memmove(&input_frames[0], &input_frames[MODEL_FRAME_SIZE], sizeof(float) * MODEL_FRAME_SIZE);
|
||||
CL_CHECK(clEnqueueReadBuffer(q, net_input_cl, CL_TRUE, 0, MODEL_FRAME_SIZE * sizeof(float), &input_frames[MODEL_FRAME_SIZE], 0, nullptr, nullptr));
|
||||
clFinish(q);
|
||||
return &input_frames[0];
|
||||
} else {
|
||||
copy_queue(&loadyuv, q, img_buffer_20hz_cl, *output, 0, 0, frame_size_bytes);
|
||||
copy_queue(&loadyuv, q, last_img_cl, *output, 0, frame_size_bytes, frame_size_bytes);
|
||||
|
||||
loadyuv_queue(&loadyuv, q, y_cl, u_cl, v_cl, *output, true);
|
||||
// NOTE: Since thneed is using a different command queue, this clFinish is needed to ensure the image is ready.
|
||||
clFinish(q);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
DrivingModelFrame::~DrivingModelFrame() {
|
||||
deinit_transform();
|
||||
ModelFrame::~ModelFrame() {
|
||||
transform_destroy(&transform);
|
||||
loadyuv_destroy(&loadyuv);
|
||||
CL_CHECK(clReleaseMemObject(img_buffer_20hz_cl));
|
||||
CL_CHECK(clReleaseMemObject(last_img_cl));
|
||||
CL_CHECK(clReleaseMemObject(net_input_cl));
|
||||
CL_CHECK(clReleaseMemObject(v_cl));
|
||||
CL_CHECK(clReleaseMemObject(u_cl));
|
||||
CL_CHECK(clReleaseMemObject(y_cl));
|
||||
CL_CHECK(clReleaseCommandQueue(q));
|
||||
}
|
||||
|
||||
|
||||
MonitoringModelFrame::MonitoringModelFrame(cl_device_id device_id, cl_context context) : ModelFrame(device_id, context) {
|
||||
input_frames = std::make_unique<uint8_t[]>(buf_size);
|
||||
//input_frame_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, buf_size, NULL, &err));
|
||||
|
||||
init_transform(device_id, context, MODEL_WIDTH, MODEL_HEIGHT);
|
||||
}
|
||||
uint8_t* MonitoringModelFrame::prepare(cl_mem yuv_cl, int frame_width, int frame_height, int frame_stride, int frame_uv_offset, const mat3& projection, cl_mem* output) {
|
||||
run_transform(yuv_cl, MODEL_WIDTH, MODEL_HEIGHT, frame_width, frame_height, frame_stride, frame_uv_offset, projection);
|
||||
CL_CHECK(clEnqueueReadBuffer(q, y_cl, CL_TRUE, 0, MODEL_FRAME_SIZE * sizeof(uint8_t), input_frames.get(), 0, nullptr, nullptr));
|
||||
clFinish(q);
|
||||
//return &y_cl;
|
||||
return input_frames.get();
|
||||
}
|
||||
|
||||
MonitoringModelFrame::~MonitoringModelFrame() {
|
||||
deinit_transform();
|
||||
CL_CHECK(clReleaseCommandQueue(q));
|
||||
}
|
||||
}
|
||||
@@ -2,7 +2,6 @@
|
||||
|
||||
#include <cfloat>
|
||||
#include <cstdlib>
|
||||
#include <cassert>
|
||||
|
||||
#include <memory>
|
||||
|
||||
@@ -14,85 +13,24 @@
|
||||
#endif
|
||||
|
||||
#include "common/mat.h"
|
||||
#include "selfdrive/modeld/transforms/loadyuv.h"
|
||||
#include "selfdrive/modeld/transforms/transform.h"
|
||||
#include "sunnypilot/modeld/transforms/loadyuv.h"
|
||||
#include "sunnypilot/modeld/transforms/transform.h"
|
||||
|
||||
class ModelFrame {
|
||||
public:
|
||||
ModelFrame(cl_device_id device_id, cl_context context) {
|
||||
q = CL_CHECK_ERR(clCreateCommandQueue(context, device_id, 0, &err));
|
||||
}
|
||||
virtual ~ModelFrame() {}
|
||||
virtual uint8_t* prepare(cl_mem yuv_cl, int frame_width, int frame_height, int frame_stride, int frame_uv_offset, const mat3& projection, cl_mem* output) { return NULL; }
|
||||
/*
|
||||
uint8_t* buffer_from_cl(cl_mem *in_frames, int buffer_size) {
|
||||
CL_CHECK(clEnqueueReadBuffer(q, *in_frames, CL_TRUE, 0, buffer_size, input_frames.get(), 0, nullptr, nullptr));
|
||||
clFinish(q);
|
||||
return &input_frames[0];
|
||||
}
|
||||
*/
|
||||
|
||||
int MODEL_WIDTH;
|
||||
int MODEL_HEIGHT;
|
||||
int MODEL_FRAME_SIZE;
|
||||
int buf_size;
|
||||
|
||||
protected:
|
||||
cl_mem y_cl, u_cl, v_cl;
|
||||
Transform transform;
|
||||
cl_command_queue q;
|
||||
std::unique_ptr<uint8_t[]> input_frames;
|
||||
|
||||
void init_transform(cl_device_id device_id, cl_context context, int model_width, int model_height) {
|
||||
y_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, model_width * model_height, NULL, &err));
|
||||
u_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, (model_width / 2) * (model_height / 2), NULL, &err));
|
||||
v_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, (model_width / 2) * (model_height / 2), NULL, &err));
|
||||
transform_init(&transform, context, device_id);
|
||||
}
|
||||
|
||||
void deinit_transform() {
|
||||
transform_destroy(&transform);
|
||||
CL_CHECK(clReleaseMemObject(v_cl));
|
||||
CL_CHECK(clReleaseMemObject(u_cl));
|
||||
CL_CHECK(clReleaseMemObject(y_cl));
|
||||
}
|
||||
|
||||
void run_transform(cl_mem yuv_cl, int model_width, int model_height, int frame_width, int frame_height, int frame_stride, int frame_uv_offset, const mat3& projection) {
|
||||
transform_queue(&transform, q,
|
||||
yuv_cl, frame_width, frame_height, frame_stride, frame_uv_offset,
|
||||
y_cl, u_cl, v_cl, model_width, model_height, projection);
|
||||
}
|
||||
};
|
||||
|
||||
class DrivingModelFrame : public ModelFrame {
|
||||
public:
|
||||
DrivingModelFrame(cl_device_id device_id, cl_context context);
|
||||
~DrivingModelFrame();
|
||||
uint8_t* prepare(cl_mem yuv_cl, int frame_width, int frame_height, int frame_stride, int frame_uv_offset, const mat3& projection, cl_mem* output);
|
||||
ModelFrame(cl_device_id device_id, cl_context context);
|
||||
~ModelFrame();
|
||||
float* prepare(cl_mem yuv_cl, int width, int height, int frame_stride, int frame_uv_offset, const mat3& transform, cl_mem *output);
|
||||
|
||||
const int MODEL_WIDTH = 512;
|
||||
const int MODEL_HEIGHT = 256;
|
||||
const int MODEL_FRAME_SIZE = MODEL_WIDTH * MODEL_HEIGHT * 3 / 2;
|
||||
const int buf_size = MODEL_FRAME_SIZE * 2;
|
||||
const size_t frame_size_bytes = MODEL_FRAME_SIZE * sizeof(uint8_t);
|
||||
|
||||
private:
|
||||
Transform transform;
|
||||
LoadYUVState loadyuv;
|
||||
cl_mem img_buffer_20hz_cl, last_img_cl;//, input_frames_cl;
|
||||
cl_buffer_region region;
|
||||
};
|
||||
|
||||
class MonitoringModelFrame : public ModelFrame {
|
||||
public:
|
||||
MonitoringModelFrame(cl_device_id device_id, cl_context context);
|
||||
~MonitoringModelFrame();
|
||||
uint8_t* prepare(cl_mem yuv_cl, int frame_width, int frame_height, int frame_stride, int frame_uv_offset, const mat3& projection, cl_mem* output);
|
||||
|
||||
const int MODEL_WIDTH = 1440;
|
||||
const int MODEL_HEIGHT = 960;
|
||||
const int MODEL_FRAME_SIZE = MODEL_WIDTH * MODEL_HEIGHT;
|
||||
const int buf_size = MODEL_FRAME_SIZE;
|
||||
|
||||
private:
|
||||
// cl_mem input_frame_cl;
|
||||
cl_command_queue q;
|
||||
cl_mem y_cl, u_cl, v_cl, net_input_cl;
|
||||
std::unique_ptr<float[]> input_frames;
|
||||
};
|
||||
|
||||
@@ -14,13 +14,5 @@ cdef extern from "common/clutil.h":
|
||||
cdef extern from "sunnypilot/modeld/models/commonmodel.h":
|
||||
cppclass ModelFrame:
|
||||
int buf_size
|
||||
# unsigned char * buffer_from_cl(cl_mem*, int);
|
||||
unsigned char * prepare(cl_mem, int, int, int, int, mat3, cl_mem*)
|
||||
|
||||
cppclass DrivingModelFrame:
|
||||
int buf_size
|
||||
DrivingModelFrame(cl_device_id, cl_context)
|
||||
|
||||
cppclass MonitoringModelFrame:
|
||||
int buf_size
|
||||
MonitoringModelFrame(cl_device_id, cl_context)
|
||||
ModelFrame(cl_device_id, cl_context)
|
||||
float * prepare(cl_mem, int, int, int, int, mat3, cl_mem*)
|
||||
|
||||
@@ -4,12 +4,11 @@
|
||||
import numpy as np
|
||||
cimport numpy as cnp
|
||||
from libc.string cimport memcpy
|
||||
from libc.stdint cimport uintptr_t
|
||||
|
||||
from msgq.visionipc.visionipc cimport cl_mem
|
||||
from msgq.visionipc.visionipc_pyx cimport VisionBuf, CLContext as BaseCLContext
|
||||
from sunnypilot.modeld.models.commonmodel cimport CL_DEVICE_TYPE_DEFAULT, cl_get_device_id, cl_create_context
|
||||
from sunnypilot.modeld.models.commonmodel cimport mat3, ModelFrame as cppModelFrame, DrivingModelFrame as cppDrivingModelFrame, MonitoringModelFrame as cppMonitoringModelFrame
|
||||
from .commonmodel cimport CL_DEVICE_TYPE_DEFAULT, cl_get_device_id, cl_create_context
|
||||
from .commonmodel cimport mat3, ModelFrame as cppModelFrame
|
||||
|
||||
|
||||
cdef class CLContext(BaseCLContext):
|
||||
@@ -24,17 +23,11 @@ cdef class CLMem:
|
||||
mem.mem = <cl_mem*> cmem
|
||||
return mem
|
||||
|
||||
@property
|
||||
def mem_address(self):
|
||||
return <uintptr_t>(self.mem)
|
||||
|
||||
def cl_from_visionbuf(VisionBuf buf):
|
||||
return CLMem.create(<void*>&buf.buf.buf_cl)
|
||||
|
||||
|
||||
cdef class ModelFrame:
|
||||
cdef cppModelFrame * frame
|
||||
cdef int buf_size
|
||||
|
||||
def __cinit__(self, CLContext context):
|
||||
self.frame = new cppModelFrame(context.device_id, context.context)
|
||||
|
||||
def __dealloc__(self):
|
||||
del self.frame
|
||||
@@ -42,35 +35,11 @@ cdef class ModelFrame:
|
||||
def prepare(self, VisionBuf buf, float[:] projection, CLMem output):
|
||||
cdef mat3 cprojection
|
||||
memcpy(cprojection.v, &projection[0], 9*sizeof(float))
|
||||
cdef unsigned char * data
|
||||
cdef float * data
|
||||
if output is None:
|
||||
data = self.frame.prepare(buf.buf.buf_cl, buf.width, buf.height, buf.stride, buf.uv_offset, cprojection, NULL)
|
||||
else:
|
||||
data = self.frame.prepare(buf.buf.buf_cl, buf.width, buf.height, buf.stride, buf.uv_offset, cprojection, output.mem)
|
||||
if not data:
|
||||
return None
|
||||
|
||||
return np.asarray(<cnp.uint8_t[:self.buf_size]> data)
|
||||
# return CLMem.create(data)
|
||||
|
||||
# def buffer_from_cl(self, CLMem in_frames):
|
||||
# cdef unsigned char * data2
|
||||
# data2 = self.frame.buffer_from_cl(in_frames.mem, self.buf_size)
|
||||
# return np.asarray(<cnp.uint8_t[:self.buf_size]> data2)
|
||||
|
||||
|
||||
cdef class DrivingModelFrame(ModelFrame):
|
||||
cdef cppDrivingModelFrame * _frame
|
||||
|
||||
def __cinit__(self, CLContext context):
|
||||
self._frame = new cppDrivingModelFrame(context.device_id, context.context)
|
||||
self.frame = <cppModelFrame*>(self._frame)
|
||||
self.buf_size = self._frame.buf_size
|
||||
|
||||
cdef class MonitoringModelFrame(ModelFrame):
|
||||
cdef cppMonitoringModelFrame * _frame
|
||||
|
||||
def __cinit__(self, CLContext context):
|
||||
self._frame = new cppMonitoringModelFrame(context.device_id, context.context)
|
||||
self.frame = <cppModelFrame*>(self._frame)
|
||||
self.buf_size = self._frame.buf_size
|
||||
return np.asarray(<cnp.float32_t[:self.frame.buf_size]> data)
|
||||
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -96,6 +96,8 @@ class Parser:
|
||||
out_shape=(ModelConstants.LEAD_TRAJ_LEN,ModelConstants.LEAD_WIDTH))
|
||||
if 'lat_planner_solution' in outs:
|
||||
self.parse_mdn('lat_planner_solution', outs, in_N=0, out_N=0, out_shape=(ModelConstants.IDX_N,ModelConstants.LAT_PLANNER_SOLUTION_WIDTH))
|
||||
if 'desired_curvature' in outs:
|
||||
self.parse_mdn('desired_curvature', outs, in_N=0, out_N=0, out_shape=(ModelConstants.DESIRED_CURV_WIDTH,))
|
||||
for k in ['lead_prob', 'lane_lines_prob', 'meta']:
|
||||
self.parse_binary_crossentropy(k, outs)
|
||||
self.parse_categorical_crossentropy('desire_state', outs, out_shape=(ModelConstants.DESIRE_PRED_WIDTH,))
|
||||
|
||||
@@ -1,12 +1,39 @@
|
||||
import os
|
||||
import onnx
|
||||
import itertools
|
||||
import os
|
||||
import sys
|
||||
import numpy as np
|
||||
from typing import Any
|
||||
|
||||
from openpilot.sunnypilot.modeld.runners.runmodel_pyx import RunModel
|
||||
from openpilot.sunnypilot.modeld.runners.ort_helpers import convert_fp16_to_fp32, ORT_TYPES_TO_NP_TYPES
|
||||
|
||||
ORT_TYPES_TO_NP_TYPES = {'tensor(float16)': np.float16, 'tensor(float)': np.float32, 'tensor(uint8)': np.uint8}
|
||||
|
||||
def attributeproto_fp16_to_fp32(attr):
|
||||
float32_list = np.frombuffer(attr.raw_data, dtype=np.float16)
|
||||
attr.data_type = 1
|
||||
attr.raw_data = float32_list.astype(np.float32).tobytes()
|
||||
|
||||
def convert_fp16_to_fp32(onnx_path_or_bytes):
|
||||
if isinstance(onnx_path_or_bytes, bytes):
|
||||
model = onnx.load_from_string(onnx_path_or_bytes)
|
||||
elif isinstance(onnx_path_or_bytes, str):
|
||||
model = onnx.load(onnx_path_or_bytes)
|
||||
|
||||
for i in model.graph.initializer:
|
||||
if i.data_type == 10:
|
||||
attributeproto_fp16_to_fp32(i)
|
||||
for i in itertools.chain(model.graph.input, model.graph.output):
|
||||
if i.type.tensor_type.elem_type == 10:
|
||||
i.type.tensor_type.elem_type = 1
|
||||
for i in model.graph.node:
|
||||
if i.op_type == 'Cast' and i.attribute[0].i == 10:
|
||||
i.attribute[0].i = 1
|
||||
for a in i.attribute:
|
||||
if hasattr(a, 't'):
|
||||
if a.t.data_type == 10:
|
||||
attributeproto_fp16_to_fp32(a.t)
|
||||
return model.SerializeToString()
|
||||
|
||||
def create_ort_session(path, fp16_to_fp32):
|
||||
os.environ["OMP_NUM_THREADS"] = "4"
|
||||
@@ -22,14 +49,14 @@ def create_ort_session(path, fp16_to_fp32):
|
||||
provider = 'OpenVINOExecutionProvider'
|
||||
elif 'CUDAExecutionProvider' in ort.get_available_providers() and 'ONNXCPU' not in os.environ:
|
||||
options.intra_op_num_threads = 2
|
||||
provider = ('CUDAExecutionProvider', {'cudnn_conv_algo_search': 'EXHAUSTIVE'})
|
||||
provider = ('CUDAExecutionProvider', {'cudnn_conv_algo_search': 'DEFAULT'})
|
||||
else:
|
||||
options.intra_op_num_threads = 2
|
||||
options.execution_mode = ort.ExecutionMode.ORT_SEQUENTIAL
|
||||
options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
|
||||
provider = 'CPUExecutionProvider'
|
||||
|
||||
model_data = convert_fp16_to_fp32(onnx.load(path)) if fp16_to_fp32 else path
|
||||
model_data = convert_fp16_to_fp32(path) if fp16_to_fp32 else path
|
||||
print("Onnx selected provider: ", [provider], file=sys.stderr)
|
||||
ort_session = ort.InferenceSession(model_data, options, providers=[provider])
|
||||
print("Onnx using ", ort_session.get_providers(), file=sys.stderr)
|
||||
|
||||
@@ -1,36 +0,0 @@
|
||||
import onnx
|
||||
import onnxruntime as ort
|
||||
import numpy as np
|
||||
import itertools
|
||||
|
||||
ORT_TYPES_TO_NP_TYPES = {'tensor(float16)': np.float16, 'tensor(float)': np.float32, 'tensor(uint8)': np.uint8}
|
||||
|
||||
def attributeproto_fp16_to_fp32(attr):
|
||||
float32_list = np.frombuffer(attr.raw_data, dtype=np.float16)
|
||||
attr.data_type = 1
|
||||
attr.raw_data = float32_list.astype(np.float32).tobytes()
|
||||
|
||||
def convert_fp16_to_fp32(model):
|
||||
for i in model.graph.initializer:
|
||||
if i.data_type == 10:
|
||||
attributeproto_fp16_to_fp32(i)
|
||||
for i in itertools.chain(model.graph.input, model.graph.output):
|
||||
if i.type.tensor_type.elem_type == 10:
|
||||
i.type.tensor_type.elem_type = 1
|
||||
for i in model.graph.node:
|
||||
if i.op_type == 'Cast' and i.attribute[0].i == 10:
|
||||
i.attribute[0].i = 1
|
||||
for a in i.attribute:
|
||||
if hasattr(a, 't'):
|
||||
if a.t.data_type == 10:
|
||||
attributeproto_fp16_to_fp32(a.t)
|
||||
return model.SerializeToString()
|
||||
|
||||
|
||||
def make_onnx_cpu_runner(model_path):
|
||||
options = ort.SessionOptions()
|
||||
options.intra_op_num_threads = 4
|
||||
options.execution_mode = ort.ExecutionMode.ORT_SEQUENTIAL
|
||||
options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
|
||||
model_data = convert_fp16_to_fp32(onnx.load(model_path))
|
||||
return ort.InferenceSession(model_data, options, providers=['CPUExecutionProvider'])
|
||||
@@ -1,4 +1,4 @@
|
||||
#include "selfdrive/modeld/transforms/loadyuv.h"
|
||||
#include "sunnypilot/modeld/transforms/loadyuv.h"
|
||||
|
||||
#include <cassert>
|
||||
#include <cstdio>
|
||||
@@ -33,8 +33,17 @@ void loadyuv_destroy(LoadYUVState* s) {
|
||||
|
||||
void loadyuv_queue(LoadYUVState* s, cl_command_queue q,
|
||||
cl_mem y_cl, cl_mem u_cl, cl_mem v_cl,
|
||||
cl_mem out_cl) {
|
||||
cl_mem out_cl, bool do_shift) {
|
||||
cl_int global_out_off = 0;
|
||||
if (do_shift) {
|
||||
// shift the image in slot 1 to slot 0, then place the new image in slot 1
|
||||
global_out_off += (s->width*s->height) + (s->width/2)*(s->height/2)*2;
|
||||
CL_CHECK(clSetKernelArg(s->copy_krnl, 0, sizeof(cl_mem), &out_cl));
|
||||
CL_CHECK(clSetKernelArg(s->copy_krnl, 1, sizeof(cl_int), &global_out_off));
|
||||
const size_t copy_work_size = global_out_off/8;
|
||||
CL_CHECK(clEnqueueNDRangeKernel(q, s->copy_krnl, 1, NULL,
|
||||
©_work_size, NULL, 0, 0, NULL));
|
||||
}
|
||||
|
||||
CL_CHECK(clSetKernelArg(s->loadys_krnl, 0, sizeof(cl_mem), &y_cl));
|
||||
CL_CHECK(clSetKernelArg(s->loadys_krnl, 1, sizeof(cl_mem), &out_cl));
|
||||
@@ -63,14 +72,3 @@ void loadyuv_queue(LoadYUVState* s, cl_command_queue q,
|
||||
CL_CHECK(clEnqueueNDRangeKernel(q, s->loaduv_krnl, 1, NULL,
|
||||
&loaduv_work_size, NULL, 0, 0, NULL));
|
||||
}
|
||||
|
||||
void copy_queue(LoadYUVState* s, cl_command_queue q, cl_mem src, cl_mem dst,
|
||||
size_t src_offset, size_t dst_offset, size_t size) {
|
||||
CL_CHECK(clSetKernelArg(s->copy_krnl, 0, sizeof(cl_mem), &src));
|
||||
CL_CHECK(clSetKernelArg(s->copy_krnl, 1, sizeof(cl_mem), &dst));
|
||||
CL_CHECK(clSetKernelArg(s->copy_krnl, 2, sizeof(cl_int), &src_offset));
|
||||
CL_CHECK(clSetKernelArg(s->copy_krnl, 3, sizeof(cl_int), &dst_offset));
|
||||
const size_t copy_work_size = size/8;
|
||||
CL_CHECK(clEnqueueNDRangeKernel(q, s->copy_krnl, 1, NULL,
|
||||
©_work_size, NULL, 0, 0, NULL));
|
||||
}
|
||||
@@ -1,7 +1,7 @@
|
||||
#define UV_SIZE ((TRANSFORMED_WIDTH/2)*(TRANSFORMED_HEIGHT/2))
|
||||
|
||||
__kernel void loadys(__global uchar8 const * const Y,
|
||||
__global uchar * out,
|
||||
__global float * out,
|
||||
int out_offset)
|
||||
{
|
||||
const int gid = get_global_id(0);
|
||||
@@ -10,12 +10,13 @@ __kernel void loadys(__global uchar8 const * const Y,
|
||||
const int ox = ois % TRANSFORMED_WIDTH;
|
||||
|
||||
const uchar8 ys = Y[gid];
|
||||
const float8 ysf = convert_float8(ys);
|
||||
|
||||
// 02
|
||||
// 13
|
||||
|
||||
__global uchar* outy0;
|
||||
__global uchar* outy1;
|
||||
__global float* outy0;
|
||||
__global float* outy1;
|
||||
if ((oy & 1) == 0) {
|
||||
outy0 = out + out_offset; //y0
|
||||
outy1 = out + out_offset + UV_SIZE*2; //y2
|
||||
@@ -24,24 +25,23 @@ __kernel void loadys(__global uchar8 const * const Y,
|
||||
outy1 = out + out_offset + UV_SIZE*3; //y3
|
||||
}
|
||||
|
||||
vstore4(ys.s0246, 0, outy0 + (oy/2) * (TRANSFORMED_WIDTH/2) + ox/2);
|
||||
vstore4(ys.s1357, 0, outy1 + (oy/2) * (TRANSFORMED_WIDTH/2) + ox/2);
|
||||
vstore4(ysf.s0246, 0, outy0 + (oy/2) * (TRANSFORMED_WIDTH/2) + ox/2);
|
||||
vstore4(ysf.s1357, 0, outy1 + (oy/2) * (TRANSFORMED_WIDTH/2) + ox/2);
|
||||
}
|
||||
|
||||
__kernel void loaduv(__global uchar8 const * const in,
|
||||
__global uchar8 * out,
|
||||
__global float8 * out,
|
||||
int out_offset)
|
||||
{
|
||||
const int gid = get_global_id(0);
|
||||
const uchar8 inv = in[gid];
|
||||
out[gid + out_offset / 8] = inv;
|
||||
const float8 outv = convert_float8(inv);
|
||||
out[gid + out_offset / 8] = outv;
|
||||
}
|
||||
|
||||
__kernel void copy(__global uchar8 * in,
|
||||
__global uchar8 * out,
|
||||
int in_offset,
|
||||
int out_offset)
|
||||
__kernel void copy(__global float8 * inout,
|
||||
int in_offset)
|
||||
{
|
||||
const int gid = get_global_id(0);
|
||||
out[gid + out_offset / 8] = in[gid + in_offset / 8];
|
||||
inout[gid] = inout[gid + in_offset / 8];
|
||||
}
|
||||
|
||||
@@ -13,8 +13,4 @@ void loadyuv_destroy(LoadYUVState* s);
|
||||
|
||||
void loadyuv_queue(LoadYUVState* s, cl_command_queue q,
|
||||
cl_mem y_cl, cl_mem u_cl, cl_mem v_cl,
|
||||
cl_mem out_cl);
|
||||
|
||||
|
||||
void copy_queue(LoadYUVState* s, cl_command_queue q, cl_mem src, cl_mem dst,
|
||||
size_t src_offset, size_t dst_offset, size_t size);
|
||||
cl_mem out_cl, bool do_shift = false);
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#include "selfdrive/modeld/transforms/transform.h"
|
||||
#include "sunnypilot/modeld/transforms/transform.h"
|
||||
|
||||
#include <cassert>
|
||||
#include <cstring>
|
||||
|
||||
Reference in New Issue
Block a user