mirror of
https://github.com/sunnypilot/sunnypilot.git
synced 2026-06-08 13:44:54 +08:00
Compare commits
8 Commits
visuals-hi
...
archive/mo
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
82d67bba87 | ||
|
|
341b92176e | ||
|
|
d9c22271d6 | ||
|
|
9dc961ab0a | ||
|
|
42d9c14515 | ||
|
|
35fbeaf9e2 | ||
|
|
e23e078c5b | ||
|
|
deaf0c485c |
@@ -3,13 +3,14 @@ import os
|
||||
from openpilot.system.hardware import TICI
|
||||
|
||||
#
|
||||
if TICI:
|
||||
USE_TINYGRAD = os.getenv('USE_TINYGRAD', True) or TICI
|
||||
if USE_TINYGRAD:
|
||||
from tinygrad.tensor import Tensor
|
||||
from tinygrad.dtype import dtypes
|
||||
from openpilot.selfdrive.modeld.runners.tinygrad_helpers import qcom_tensor_from_opencl_address
|
||||
os.environ['QCOM'] = '1'
|
||||
else:
|
||||
from openpilot.selfdrive.modeld.runners.ort_helpers import make_onnx_cpu_runner
|
||||
from openpilot.selfdrive.modeld.runners.ort_helpers import make_onnx_cpu_runner, ORT_TYPES_TO_NP_TYPES
|
||||
import time
|
||||
import pickle
|
||||
import numpy as np
|
||||
@@ -60,30 +61,36 @@ class ModelState:
|
||||
self.frames = {'input_imgs': DrivingModelFrame(context), 'big_input_imgs': DrivingModelFrame(context)}
|
||||
self.prev_desire = np.zeros(ModelConstants.DESIRE_LEN, dtype=np.float32)
|
||||
self.full_features_20Hz = np.zeros((ModelConstants.FULL_HISTORY_BUFFER_LEN, ModelConstants.FEATURE_LEN), dtype=np.float32)
|
||||
self.desire_20Hz = np.zeros((ModelConstants.FULL_HISTORY_BUFFER_LEN + 1, ModelConstants.DESIRE_LEN), dtype=np.float32)
|
||||
self.desire_20Hz = np.zeros((ModelConstants.FULL_HISTORY_BUFFER_LEN + 1, ModelConstants.DESIRE_LEN), dtype=np.float32)
|
||||
|
||||
# img buffers are managed in openCL transform code
|
||||
self.numpy_inputs = {
|
||||
'desire': np.zeros((1, (ModelConstants.HISTORY_BUFFER_LEN+1), ModelConstants.DESIRE_LEN), dtype=np.float32),
|
||||
'traffic_convention': np.zeros((1, ModelConstants.TRAFFIC_CONVENTION_LEN), dtype=np.float32),
|
||||
'features_buffer': np.zeros((1, ModelConstants.HISTORY_BUFFER_LEN, ModelConstants.FEATURE_LEN), dtype=np.float32),
|
||||
}
|
||||
self.numpy_inputs = {}
|
||||
|
||||
with open(METADATA_PATH, 'rb') as f:
|
||||
model_metadata = pickle.load(f)
|
||||
self.input_shapes = model_metadata['input_shapes']
|
||||
|
||||
for key, shape in self.input_shapes.items():
|
||||
if key not in self.frames: # Managed by opencl
|
||||
self.numpy_inputs[key] = np.zeros(shape, dtype=np.float32)
|
||||
|
||||
self.output_slices = model_metadata['output_slices']
|
||||
net_output_size = model_metadata['output_shapes']['outputs'][1]
|
||||
self.output = np.zeros(net_output_size, dtype=np.float32)
|
||||
self.parser = Parser()
|
||||
|
||||
if TICI:
|
||||
if USE_TINYGRAD:
|
||||
self.tensor_inputs = {k: Tensor(v, device='NPY').realize() for k,v in self.numpy_inputs.items()}
|
||||
with open(MODEL_PKL_PATH, "rb") as f:
|
||||
self.model_run = pickle.load(f)
|
||||
else:
|
||||
self.onnx_cpu_runner = make_onnx_cpu_runner(MODEL_PATH)
|
||||
self.onnx_model_metadata = {input.name: input.type for input in self.onnx_cpu_runner.get_inputs()}
|
||||
|
||||
num_elements = self.numpy_inputs['features_buffer'].shape[1]
|
||||
step_size = int(-100 / num_elements)
|
||||
self.full_features_20Hz_idxs = np.arange(step_size, step_size * (num_elements + 1), step_size)[::-1]
|
||||
self.desire_reshape_dims = (self.numpy_inputs['desire'].shape[0], self.numpy_inputs['desire'].shape[1], -1, self.numpy_inputs['desire'].shape[2])
|
||||
|
||||
def slice_outputs(self, model_outputs: np.ndarray) -> dict[str, np.ndarray]:
|
||||
parsed_model_outputs = {k: model_outputs[np.newaxis, v] for k,v in self.output_slices.items()}
|
||||
@@ -100,25 +107,32 @@ class ModelState:
|
||||
|
||||
self.desire_20Hz[:-1] = self.desire_20Hz[1:]
|
||||
self.desire_20Hz[-1] = new_desire
|
||||
self.numpy_inputs['desire'][:] = self.desire_20Hz.reshape((1,25,4,-1)).max(axis=2)
|
||||
self.numpy_inputs['desire'][:] = self.desire_20Hz.reshape(self.desire_reshape_dims).max(axis=2)
|
||||
|
||||
self.numpy_inputs['traffic_convention'][:] = inputs['traffic_convention']
|
||||
imgs_cl = {'input_imgs': self.frames['input_imgs'].prepare(buf, transform.flatten()),
|
||||
'big_input_imgs': self.frames['big_input_imgs'].prepare(wbuf, transform_wide.flatten())}
|
||||
|
||||
if TICI:
|
||||
if USE_TINYGRAD:
|
||||
# The imgs tensors are backed by opencl memory, only need init once
|
||||
for key in imgs_cl:
|
||||
if key not in self.tensor_inputs:
|
||||
self.tensor_inputs[key] = qcom_tensor_from_opencl_address(imgs_cl[key].mem_address, self.input_shapes[key], dtype=dtypes.uint8)
|
||||
if not TICI or key not in self.tensor_inputs:
|
||||
index = self.model_run.captured.expected_names.index(key)
|
||||
_, _, dtype, device = self.model_run.captured.expected_st_vars_dtype_device[index]
|
||||
if TICI:
|
||||
self.tensor_inputs[key] = qcom_tensor_from_opencl_address(imgs_cl[key].mem_address, self.input_shapes[key], dtype=dtype)
|
||||
else:
|
||||
shape = self.frames[key].buffer_from_cl(imgs_cl[key]).reshape(self.input_shapes[key])
|
||||
self.tensor_inputs[key] = Tensor(shape, device=device, dtype=dtype).realize()
|
||||
else:
|
||||
for key in imgs_cl:
|
||||
self.numpy_inputs[key] = self.frames[key].buffer_from_cl(imgs_cl[key]).reshape(self.input_shapes[key])
|
||||
dtype = self.onnx_model_metadata[key]
|
||||
self.numpy_inputs[key] = self.frames[key].buffer_from_cl(imgs_cl[key]).astype(ORT_TYPES_TO_NP_TYPES[dtype]).reshape(self.input_shapes[key])
|
||||
|
||||
if prepare_only:
|
||||
return None
|
||||
|
||||
if TICI:
|
||||
if USE_TINYGRAD:
|
||||
self.output = self.model_run(**self.tensor_inputs).numpy().flatten()
|
||||
else:
|
||||
self.output = self.onnx_cpu_runner.run(None, self.numpy_inputs)[0].flatten()
|
||||
@@ -128,8 +142,15 @@ class ModelState:
|
||||
self.full_features_20Hz[:-1] = self.full_features_20Hz[1:]
|
||||
self.full_features_20Hz[-1] = outputs['hidden_state'][0, :]
|
||||
|
||||
idxs = np.arange(-4,-100,-4)[::-1]
|
||||
self.numpy_inputs['features_buffer'][:] = self.full_features_20Hz[idxs]
|
||||
self.numpy_inputs['features_buffer'][:] = self.full_features_20Hz[self.full_features_20Hz_idxs]
|
||||
if "desired_curvature" in outputs:
|
||||
if "prev_desired_curvs" in self.numpy_inputs.keys():
|
||||
self.numpy_inputs['prev_desired_curvs'][:-1] = self.numpy_inputs['prev_desired_curvs'][1:]
|
||||
self.numpy_inputs['prev_desired_curvs'][-1] = outputs['desired_curvature'][:, 0:1, None] # Reshape to (1,1,1)
|
||||
if "prev_desired_curv" in self.numpy_inputs.keys():
|
||||
# First shift everything
|
||||
self.numpy_inputs['prev_desired_curv'][:-ModelConstants.PREV_DESIRED_CURV_LEN] = self.numpy_inputs['prev_desired_curv'][ModelConstants.PREV_DESIRED_CURV_LEN:]
|
||||
self.numpy_inputs['prev_desired_curv'][-ModelConstants.PREV_DESIRED_CURV_LEN:] = outputs['desired_curvature'][:, :1].reshape(1, -1, 1)
|
||||
return outputs
|
||||
|
||||
|
||||
@@ -240,6 +261,10 @@ def main(demo=False):
|
||||
is_rhd = sm["driverMonitoringState"].isRHD
|
||||
frame_id = sm["roadCameraState"].frameId
|
||||
v_ego = max(sm["carState"].vEgo, 0.)
|
||||
lateral_control_params = None #TODO-SP: hardcoded ,this shouldnt' be here this way. We should do it more dynamically
|
||||
if "lateral_control_params" in model.numpy_inputs.keys(): #TODO-SP: hardcoded ,this shouldnt' be here this way. We should do it more dynamically
|
||||
lateral_control_params = np.array([sm["carState"].vEgo, steer_delay], dtype=np.float32)
|
||||
|
||||
if sm.updated["liveCalibration"] and sm.seen['roadCameraState'] and sm.seen['deviceState']:
|
||||
device_from_calib_euler = np.array(sm["liveCalibration"].rpyCalib, dtype=np.float32)
|
||||
dc = DEVICE_CAMERAS[(str(sm['deviceState'].deviceType), str(sm['roadCameraState'].sensor))]
|
||||
@@ -271,6 +296,8 @@ def main(demo=False):
|
||||
'desire': vec_desire,
|
||||
'traffic_convention': traffic_convention,
|
||||
}
|
||||
if "lateral_control_params" in model.numpy_inputs.keys():
|
||||
inputs['lateral_control_params'] = lateral_control_params
|
||||
|
||||
mt1 = time.perf_counter()
|
||||
model_output = model.run(buf_main, buf_extra, model_transform_main, model_transform_extra, inputs, prepare_only)
|
||||
|
||||
@@ -119,7 +119,13 @@ VideoDecoder::~VideoDecoder() {
|
||||
}
|
||||
|
||||
bool VideoDecoder::open(AVCodecParameters *codecpar, bool hw_decoder) {
|
||||
const AVCodec *decoder = avcodec_find_decoder(codecpar->codec_id);
|
||||
const AVCodec *decoder = avcodec_find_decoder_by_name("h264_mediacodec");
|
||||
if (!decoder) {
|
||||
decoder = avcodec_find_decoder_by_name("h264_qcom");
|
||||
}
|
||||
if (!decoder) {
|
||||
decoder = avcodec_find_decoder(codecpar->codec_id);
|
||||
}
|
||||
if (!decoder) return false;
|
||||
|
||||
decoder_ctx = avcodec_alloc_context3(decoder);
|
||||
@@ -127,6 +133,23 @@ bool VideoDecoder::open(AVCodecParameters *codecpar, bool hw_decoder) {
|
||||
rError("Failed to allocate or initialize codec context");
|
||||
return false;
|
||||
}
|
||||
|
||||
// More aggressive settings focused on reducing lag
|
||||
decoder_ctx->thread_count = static_cast<int>(std::min(std::thread::hardware_concurrency(), 16u));
|
||||
decoder_ctx->thread_type = FF_THREAD_FRAME | FF_THREAD_SLICE;
|
||||
|
||||
// Very aggressive frame dropping
|
||||
decoder_ctx->flags |= AV_CODEC_FLAG_LOW_DELAY;
|
||||
decoder_ctx->flags2 |= AV_CODEC_FLAG2_FAST;
|
||||
decoder_ctx->skip_frame = AVDISCARD_BIDIR; // More aggressive frame skipping
|
||||
decoder_ctx->skip_loop_filter = AVDISCARD_ALL;
|
||||
decoder_ctx->workaround_bugs = FF_BUG_AUTODETECT;
|
||||
|
||||
// Minimize buffering
|
||||
decoder_ctx->max_b_frames = 0;
|
||||
decoder_ctx->strict_std_compliance = FF_COMPLIANCE_UNOFFICIAL; // Allow faster non-standard optimizations
|
||||
decoder_ctx->flags |= AV_CODEC_FLAG_OUTPUT_CORRUPT; // Output frames even if slightly corrupted
|
||||
|
||||
width = (decoder_ctx->width + 3) & ~3;
|
||||
height = decoder_ctx->height;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user