Reapply tgwarp w NV12 fix (#37168)

* Revert "Revert tgwarp again (#37161)"

This reverts commit 45099e7fcd.

* Weird uv sizes

* Fix interleaving

* Fix on CPU

* make CPU safe

* Prevent corruption without clone

* Claude knows speeed

* fix interleaving

* less kernels

* blob caching

* This is still slightly faster

* Comment for blob cache
This commit is contained in:
Harald Schäfer
2026-02-12 08:59:19 -08:00
committed by GitHub
parent a46007d84d
commit af1583cdfc
48 changed files with 327 additions and 5240 deletions

View File

@@ -18,43 +18,6 @@ RUN /tmp/tools/install_ubuntu_dependencies.sh && \
cd /usr/lib/gcc/arm-none-eabi/* && \
rm -rf arm/ thumb/nofp thumb/v6* thumb/v8* thumb/v7+fp thumb/v7-r+fp.sp
# Add OpenCL
RUN apt-get update && apt-get install -y --no-install-recommends \
apt-utils \
alien \
unzip \
tar \
curl \
xz-utils \
dbus \
gcc-arm-none-eabi \
tmux \
vim \
libx11-6 \
wget \
&& rm -rf /var/lib/apt/lists/*
RUN mkdir -p /tmp/opencl-driver-intel && \
cd /tmp/opencl-driver-intel && \
wget https://github.com/intel/llvm/releases/download/2024-WW14/oclcpuexp-2024.17.3.0.09_rel.tar.gz && \
wget https://github.com/oneapi-src/oneTBB/releases/download/v2021.12.0/oneapi-tbb-2021.12.0-lin.tgz && \
mkdir -p /opt/intel/oclcpuexp_2024.17.3.0.09_rel && \
cd /opt/intel/oclcpuexp_2024.17.3.0.09_rel && \
tar -zxvf /tmp/opencl-driver-intel/oclcpuexp-2024.17.3.0.09_rel.tar.gz && \
mkdir -p /etc/OpenCL/vendors && \
echo /opt/intel/oclcpuexp_2024.17.3.0.09_rel/x64/libintelocl.so > /etc/OpenCL/vendors/intel_expcpu.icd && \
cd /opt/intel && \
tar -zxvf /tmp/opencl-driver-intel/oneapi-tbb-2021.12.0-lin.tgz && \
ln -s /opt/intel/oneapi-tbb-2021.12.0/lib/intel64/gcc4.8/libtbb.so /opt/intel/oclcpuexp_2024.17.3.0.09_rel/x64 && \
ln -s /opt/intel/oneapi-tbb-2021.12.0/lib/intel64/gcc4.8/libtbbmalloc.so /opt/intel/oclcpuexp_2024.17.3.0.09_rel/x64 && \
ln -s /opt/intel/oneapi-tbb-2021.12.0/lib/intel64/gcc4.8/libtbb.so.12 /opt/intel/oclcpuexp_2024.17.3.0.09_rel/x64 && \
ln -s /opt/intel/oneapi-tbb-2021.12.0/lib/intel64/gcc4.8/libtbbmalloc.so.2 /opt/intel/oclcpuexp_2024.17.3.0.09_rel/x64 && \
mkdir -p /etc/ld.so.conf.d && \
echo /opt/intel/oclcpuexp_2024.17.3.0.09_rel/x64 > /etc/ld.so.conf.d/libintelopenclexp.conf && \
ldconfig -f /etc/ld.so.conf.d/libintelopenclexp.conf && \
cd / && \
rm -rf /tmp/opencl-driver-intel
ENV NVIDIA_VISIBLE_DEVICES=all
ENV NVIDIA_DRIVER_CAPABILITIES=graphics,utility,compute
ENV QTWEBENGINE_DISABLE_SANDBOX=1

View File

@@ -94,7 +94,6 @@ env = Environment(
# Arch-specific flags and paths
if arch == "larch64":
env.Append(CPPPATH=["#third_party/opencl/include"])
env.Append(LIBPATH=[
"/usr/local/lib",
"/system/vendor/lib64",

View File

@@ -5,7 +5,6 @@ common_libs = [
'swaglog.cc',
'util.cc',
'ratekeeper.cc',
'clutil.cc',
]
_common = env.Library('common', common_libs, LIBS="json11")

View File

@@ -1,98 +0,0 @@
#include "common/clutil.h"
#include <cassert>
#include <iostream>
#include <memory>
#include "common/util.h"
#include "common/swaglog.h"
namespace { // helper functions
template <typename Func, typename Id, typename Name>
std::string get_info(Func get_info_func, Id id, Name param_name) {
size_t size = 0;
CL_CHECK(get_info_func(id, param_name, 0, NULL, &size));
std::string info(size, '\0');
CL_CHECK(get_info_func(id, param_name, size, info.data(), NULL));
return info;
}
inline std::string get_platform_info(cl_platform_id id, cl_platform_info name) { return get_info(&clGetPlatformInfo, id, name); }
inline std::string get_device_info(cl_device_id id, cl_device_info name) { return get_info(&clGetDeviceInfo, id, name); }
void cl_print_info(cl_platform_id platform, cl_device_id device) {
size_t work_group_size = 0;
cl_device_type device_type = 0;
clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(work_group_size), &work_group_size, NULL);
clGetDeviceInfo(device, CL_DEVICE_TYPE, sizeof(device_type), &device_type, NULL);
const char *type_str = "Other...";
switch (device_type) {
case CL_DEVICE_TYPE_CPU: type_str ="CL_DEVICE_TYPE_CPU"; break;
case CL_DEVICE_TYPE_GPU: type_str = "CL_DEVICE_TYPE_GPU"; break;
case CL_DEVICE_TYPE_ACCELERATOR: type_str = "CL_DEVICE_TYPE_ACCELERATOR"; break;
}
LOGD("vendor: %s", get_platform_info(platform, CL_PLATFORM_VENDOR).c_str());
LOGD("platform version: %s", get_platform_info(platform, CL_PLATFORM_VERSION).c_str());
LOGD("profile: %s", get_platform_info(platform, CL_PLATFORM_PROFILE).c_str());
LOGD("extensions: %s", get_platform_info(platform, CL_PLATFORM_EXTENSIONS).c_str());
LOGD("name: %s", get_device_info(device, CL_DEVICE_NAME).c_str());
LOGD("device version: %s", get_device_info(device, CL_DEVICE_VERSION).c_str());
LOGD("max work group size: %zu", work_group_size);
LOGD("type = %d, %s", (int)device_type, type_str);
}
void cl_print_build_errors(cl_program program, cl_device_id device) {
cl_build_status status;
clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_STATUS, sizeof(status), &status, NULL);
size_t log_size;
clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size);
std::string log(log_size, '\0');
clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, log_size, &log[0], NULL);
LOGE("build failed; status=%d, log: %s", status, log.c_str());
}
} // namespace
cl_device_id cl_get_device_id(cl_device_type device_type) {
cl_uint num_platforms = 0;
CL_CHECK(clGetPlatformIDs(0, NULL, &num_platforms));
std::unique_ptr<cl_platform_id[]> platform_ids = std::make_unique<cl_platform_id[]>(num_platforms);
CL_CHECK(clGetPlatformIDs(num_platforms, &platform_ids[0], NULL));
for (size_t i = 0; i < num_platforms; ++i) {
LOGD("platform[%zu] CL_PLATFORM_NAME: %s", i, get_platform_info(platform_ids[i], CL_PLATFORM_NAME).c_str());
// Get first device
if (cl_device_id device_id = NULL; clGetDeviceIDs(platform_ids[i], device_type, 1, &device_id, NULL) == 0 && device_id) {
cl_print_info(platform_ids[i], device_id);
return device_id;
}
}
LOGE("No valid openCL platform found");
assert(0);
return nullptr;
}
cl_context cl_create_context(cl_device_id device_id) {
return CL_CHECK_ERR(clCreateContext(NULL, 1, &device_id, NULL, NULL, &err));
}
void cl_release_context(cl_context context) {
clReleaseContext(context);
}
cl_program cl_program_from_file(cl_context ctx, cl_device_id device_id, const char* path, const char* args) {
return cl_program_from_source(ctx, device_id, util::read_file(path), args);
}
cl_program cl_program_from_source(cl_context ctx, cl_device_id device_id, const std::string& src, const char* args) {
const char *csrc = src.c_str();
cl_program prg = CL_CHECK_ERR(clCreateProgramWithSource(ctx, 1, &csrc, NULL, &err));
if (int err = clBuildProgram(prg, 1, &device_id, args, NULL, NULL); err != 0) {
cl_print_build_errors(prg, device_id);
assert(0);
}
return prg;
}

View File

@@ -1,28 +0,0 @@
#pragma once
#ifdef __APPLE__
#include <OpenCL/cl.h>
#else
#include <CL/cl.h>
#endif
#include <string>
#define CL_CHECK(_expr) \
do { \
assert(CL_SUCCESS == (_expr)); \
} while (0)
#define CL_CHECK_ERR(_expr) \
({ \
cl_int err = CL_INVALID_VALUE; \
__typeof__(_expr) _ret = _expr; \
assert(_ret&& err == CL_SUCCESS); \
_ret; \
})
cl_device_id cl_get_device_id(cl_device_type device_type);
cl_context cl_create_context(cl_device_id device_id);
void cl_release_context(cl_context context);
cl_program cl_program_from_source(cl_context ctx, cl_device_id device_id, const std::string& src, const char* args = nullptr);
cl_program cl_program_from_file(cl_context ctx, cl_device_id device_id, const char* path, const char* args);

View File

@@ -1,85 +0,0 @@
#pragma once
typedef struct vec3 {
float v[3];
} vec3;
typedef struct vec4 {
float v[4];
} vec4;
typedef struct mat3 {
float v[3*3];
} mat3;
typedef struct mat4 {
float v[4*4];
} mat4;
static inline mat3 matmul3(const mat3 &a, const mat3 &b) {
mat3 ret = {{0.0}};
for (int r=0; r<3; r++) {
for (int c=0; c<3; c++) {
float v = 0.0;
for (int k=0; k<3; k++) {
v += a.v[r*3+k] * b.v[k*3+c];
}
ret.v[r*3+c] = v;
}
}
return ret;
}
static inline vec3 matvecmul3(const mat3 &a, const vec3 &b) {
vec3 ret = {{0.0}};
for (int r=0; r<3; r++) {
for (int c=0; c<3; c++) {
ret.v[r] += a.v[r*3+c] * b.v[c];
}
}
return ret;
}
static inline mat4 matmul(const mat4 &a, const mat4 &b) {
mat4 ret = {{0.0}};
for (int r=0; r<4; r++) {
for (int c=0; c<4; c++) {
float v = 0.0;
for (int k=0; k<4; k++) {
v += a.v[r*4+k] * b.v[k*4+c];
}
ret.v[r*4+c] = v;
}
}
return ret;
}
static inline vec4 matvecmul(const mat4 &a, const vec4 &b) {
vec4 ret = {{0.0}};
for (int r=0; r<4; r++) {
for (int c=0; c<4; c++) {
ret.v[r] += a.v[r*4+c] * b.v[c];
}
}
return ret;
}
// scales the input and output space of a transformation matrix
// that assumes pixel-center origin.
static inline mat3 transform_scale_buffer(const mat3 &in, float s) {
// in_pt = ( transform(out_pt/s + 0.5) - 0.5) * s
mat3 transform_out = {{
1.0f/s, 0.0f, 0.5f,
0.0f, 1.0f/s, 0.5f,
0.0f, 0.0f, 1.0f,
}};
mat3 transform_in = {{
s, 0.0f, -0.5f*s,
0.0f, s, -0.5f*s,
0.0f, 0.0f, 1.0f,
}};
return matmul3(transform_in, matmul3(in, transform_out));
}

View File

@@ -1,35 +1,12 @@
import os
import glob
Import('env', 'envCython', 'arch', 'cereal', 'messaging', 'common', 'visionipc')
Import('env', 'arch')
lenv = env.Clone()
lenvCython = envCython.Clone()
libs = [cereal, messaging, visionipc, common, 'capnp', 'kj', 'pthread']
frameworks = []
common_src = [
"models/commonmodel.cc",
"transforms/loadyuv.cc",
"transforms/transform.cc",
]
# OpenCL is a framework on Mac
if arch == "Darwin":
frameworks += ['OpenCL']
else:
libs += ['OpenCL']
# Set path definitions
for pathdef, fn in {'TRANSFORM': 'transforms/transform.cl', 'LOADYUV': 'transforms/loadyuv.cl'}.items():
for xenv in (lenv, lenvCython):
xenv['CXXFLAGS'].append(f'-D{pathdef}_PATH=\\"{File(fn).abspath}\\"')
# Compile cython
cython_libs = envCython["LIBS"] + libs
commonmodel_lib = lenv.Library('commonmodel', common_src)
lenvCython.Program('models/commonmodel_pyx.so', 'models/commonmodel_pyx.pyx', LIBS=[commonmodel_lib, *cython_libs], FRAMEWORKS=frameworks)
tinygrad_files = sorted(["#"+x for x in glob.glob(env.Dir("#tinygrad_repo").relpath + "/**", recursive=True, root_dir=env.Dir("#").abspath) if 'pycache' not in x])
tinygrad_root = env.Dir("#").abspath
tinygrad_files = ["#"+x for x in glob.glob(env.Dir("#tinygrad_repo").relpath + "/**", recursive=True, root_dir=tinygrad_root)
if 'pycache' not in x and os.path.isfile(os.path.join(tinygrad_root, x))]
# Get model metadata
for model_name in ['driving_vision', 'driving_policy', 'dmonitoring_model']:
@@ -38,22 +15,36 @@ for model_name in ['driving_vision', 'driving_policy', 'dmonitoring_model']:
cmd = f'python3 {Dir("#selfdrive/modeld").abspath}/get_model_metadata.py {fn}.onnx'
lenv.Command(fn + "_metadata.pkl", [fn + ".onnx"] + tinygrad_files + script_files, cmd)
# compile warp
# THREADS=0 is need to prevent bug: https://github.com/tinygrad/tinygrad/issues/14689
tg_flags = {
'larch64': 'DEV=QCOM FLOAT16=1 NOLOCALS=1 JIT_BATCH_SIZE=0',
'Darwin': f'DEV=CPU THREADS=0 HOME={os.path.expanduser("~")}', # tinygrad calls brew which needs a $HOME in the env
}.get(arch, 'DEV=CPU CPU_LLVM=1 THREADS=0')
image_flag = {
'larch64': 'IMAGE=2',
}.get(arch, 'IMAGE=0')
script_files = [File(Dir("#selfdrive/modeld").File("compile_warp.py").abspath)]
cmd = f'{tg_flags} python3 {Dir("#selfdrive/modeld").abspath}/compile_warp.py '
from openpilot.common.transformations.camera import _ar_ox_fisheye, _os_fisheye
warp_targets = []
for cam in [_ar_ox_fisheye, _os_fisheye]:
w, h = cam.width, cam.height
warp_targets += [File(f"models/warp_{w}x{h}_tinygrad.pkl").abspath, File(f"models/dm_warp_{w}x{h}_tinygrad.pkl").abspath]
lenv.Command(warp_targets, tinygrad_files + script_files, cmd)
def tg_compile(flags, model_name):
pythonpath_string = 'PYTHONPATH="${PYTHONPATH}:' + env.Dir("#tinygrad_repo").abspath + '"'
fn = File(f"models/{model_name}").abspath
return lenv.Command(
fn + "_tinygrad.pkl",
[fn + ".onnx"] + tinygrad_files,
f'{pythonpath_string} {flags} python3 {Dir("#tinygrad_repo").abspath}/examples/openpilot/compile3.py {fn}.onnx {fn}_tinygrad.pkl'
f'{pythonpath_string} {flags} {image_flag} python3 {Dir("#tinygrad_repo").abspath}/examples/openpilot/compile3.py {fn}.onnx {fn}_tinygrad.pkl'
)
# Compile small models
for model_name in ['driving_vision', 'driving_policy', 'dmonitoring_model']:
flags = {
'larch64': 'DEV=QCOM FLOAT16=1 NOLOCALS=1 IMAGE=2 JIT_BATCH_SIZE=0',
'Darwin': f'DEV=CPU HOME={os.path.expanduser("~")}', # tinygrad calls brew which needs a $HOME in the env
}.get(arch, 'DEV=CPU CPU_LLVM=1')
tg_compile(flags, model_name)
tg_compile(tg_flags, model_name)
# Compile BIG model if USB GPU is available
if "USBGPU" in os.environ:

209
selfdrive/modeld/compile_warp.py Executable file
View File

@@ -0,0 +1,209 @@
#!/usr/bin/env python3
import time
import pickle
import numpy as np
from pathlib import Path
from tinygrad.tensor import Tensor
from tinygrad.helpers import Context
from tinygrad.device import Device
from tinygrad.engine.jit import TinyJit
from openpilot.system.camerad.cameras.nv12_info import get_nv12_info
from openpilot.common.transformations.model import MEDMODEL_INPUT_SIZE, DM_INPUT_SIZE
from openpilot.common.transformations.camera import _ar_ox_fisheye, _os_fisheye
MODELS_DIR = Path(__file__).parent / 'models'
CAMERA_CONFIGS = [
(_ar_ox_fisheye.width, _ar_ox_fisheye.height), # tici: 1928x1208
(_os_fisheye.width, _os_fisheye.height), # mici: 1344x760
]
UV_SCALE_MATRIX = np.array([[0.5, 0, 0], [0, 0.5, 0], [0, 0, 1]], dtype=np.float32)
UV_SCALE_MATRIX_INV = np.linalg.inv(UV_SCALE_MATRIX)
IMG_BUFFER_SHAPE = (30, MEDMODEL_INPUT_SIZE[1] // 2, MEDMODEL_INPUT_SIZE[0] // 2)
def warp_pkl_path(w, h):
return MODELS_DIR / f'warp_{w}x{h}_tinygrad.pkl'
def dm_warp_pkl_path(w, h):
return MODELS_DIR / f'dm_warp_{w}x{h}_tinygrad.pkl'
def warp_perspective_tinygrad(src_flat, M_inv, dst_shape, src_shape, stride_pad):
w_dst, h_dst = dst_shape
h_src, w_src = src_shape
x = Tensor.arange(w_dst).reshape(1, w_dst).expand(h_dst, w_dst).reshape(-1)
y = Tensor.arange(h_dst).reshape(h_dst, 1).expand(h_dst, w_dst).reshape(-1)
# inline 3x3 matmul as elementwise to avoid reduce op (enables fusion with gather)
src_x = M_inv[0, 0] * x + M_inv[0, 1] * y + M_inv[0, 2]
src_y = M_inv[1, 0] * x + M_inv[1, 1] * y + M_inv[1, 2]
src_w = M_inv[2, 0] * x + M_inv[2, 1] * y + M_inv[2, 2]
src_x = src_x / src_w
src_y = src_y / src_w
x_nn_clipped = Tensor.round(src_x).clip(0, w_src - 1).cast('int')
y_nn_clipped = Tensor.round(src_y).clip(0, h_src - 1).cast('int')
idx = y_nn_clipped * (w_src + stride_pad) + x_nn_clipped
return src_flat[idx]
def frames_to_tensor(frames, model_w, model_h):
H = (frames.shape[0] * 2) // 3
W = frames.shape[1]
in_img1 = Tensor.cat(frames[0:H:2, 0::2],
frames[1:H:2, 0::2],
frames[0:H:2, 1::2],
frames[1:H:2, 1::2],
frames[H:H+H//4].reshape((H//2, W//2)),
frames[H+H//4:H+H//2].reshape((H//2, W//2)), dim=0).reshape((6, H//2, W//2))
return in_img1
def make_frame_prepare(cam_w, cam_h, model_w, model_h):
stride, y_height, uv_height, _ = get_nv12_info(cam_w, cam_h)
uv_offset = stride * y_height
stride_pad = stride - cam_w
def frame_prepare_tinygrad(input_frame, M_inv):
# UV_SCALE @ M_inv @ UV_SCALE_INV simplifies to elementwise scaling
M_inv_uv = M_inv * Tensor([[1.0, 1.0, 0.5], [1.0, 1.0, 0.5], [2.0, 2.0, 1.0]])
# deinterleave NV12 UV plane (UVUV... -> separate U, V)
uv = input_frame[uv_offset:uv_offset + uv_height * stride].reshape(uv_height, stride)
with Context(SPLIT_REDUCEOP=0):
y = warp_perspective_tinygrad(input_frame[:cam_h*stride],
M_inv, (model_w, model_h),
(cam_h, cam_w), stride_pad).realize()
u = warp_perspective_tinygrad(uv[:cam_h//2, :cam_w:2].flatten(),
M_inv_uv, (model_w//2, model_h//2),
(cam_h//2, cam_w//2), 0).realize()
v = warp_perspective_tinygrad(uv[:cam_h//2, 1:cam_w:2].flatten(),
M_inv_uv, (model_w//2, model_h//2),
(cam_h//2, cam_w//2), 0).realize()
yuv = y.cat(u).cat(v).reshape((model_h * 3 // 2, model_w))
tensor = frames_to_tensor(yuv, model_w, model_h)
return tensor
return frame_prepare_tinygrad
def make_update_img_input(frame_prepare, model_w, model_h):
def update_img_input_tinygrad(tensor, frame, M_inv):
M_inv = M_inv.to(Device.DEFAULT)
new_img = frame_prepare(frame, M_inv)
full_buffer = tensor[6:].cat(new_img, dim=0).contiguous()
return full_buffer, Tensor.cat(full_buffer[:6], full_buffer[-6:], dim=0).contiguous().reshape(1, 12, model_h//2, model_w//2)
return update_img_input_tinygrad
def make_update_both_imgs(frame_prepare, model_w, model_h):
update_img = make_update_img_input(frame_prepare, model_w, model_h)
def update_both_imgs_tinygrad(calib_img_buffer, new_img, M_inv,
calib_big_img_buffer, new_big_img, M_inv_big):
calib_img_buffer, calib_img_pair = update_img(calib_img_buffer, new_img, M_inv)
calib_big_img_buffer, calib_big_img_pair = update_img(calib_big_img_buffer, new_big_img, M_inv_big)
return calib_img_buffer, calib_img_pair, calib_big_img_buffer, calib_big_img_pair
return update_both_imgs_tinygrad
def make_warp_dm(cam_w, cam_h, dm_w, dm_h):
stride, y_height, _, _ = get_nv12_info(cam_w, cam_h)
stride_pad = stride - cam_w
def warp_dm(input_frame, M_inv):
M_inv = M_inv.to(Device.DEFAULT)
result = warp_perspective_tinygrad(input_frame[:cam_h*stride], M_inv, (dm_w, dm_h), (cam_h, cam_w), stride_pad).reshape(-1, dm_h * dm_w)
return result
return warp_dm
def compile_modeld_warp(cam_w, cam_h):
model_w, model_h = MEDMODEL_INPUT_SIZE
_, _, _, yuv_size = get_nv12_info(cam_w, cam_h)
print(f"Compiling modeld warp for {cam_w}x{cam_h}...")
frame_prepare = make_frame_prepare(cam_w, cam_h, model_w, model_h)
update_both_imgs = make_update_both_imgs(frame_prepare, model_w, model_h)
update_img_jit = TinyJit(update_both_imgs, prune=True)
full_buffer = Tensor.zeros(IMG_BUFFER_SHAPE, dtype='uint8').contiguous().realize()
big_full_buffer = Tensor.zeros(IMG_BUFFER_SHAPE, dtype='uint8').contiguous().realize()
full_buffer_np = np.zeros(IMG_BUFFER_SHAPE, dtype=np.uint8)
big_full_buffer_np = np.zeros(IMG_BUFFER_SHAPE, dtype=np.uint8)
for i in range(10):
new_frame_np = (32 * np.random.randn(yuv_size).astype(np.float32) + 128).clip(0, 255).astype(np.uint8)
img_inputs = [full_buffer,
Tensor.from_blob(new_frame_np.ctypes.data, (yuv_size,), dtype='uint8').realize(),
Tensor(Tensor.randn(3, 3).mul(8).realize().numpy(), device='NPY')]
new_big_frame_np = (32 * np.random.randn(yuv_size).astype(np.float32) + 128).clip(0, 255).astype(np.uint8)
big_img_inputs = [big_full_buffer,
Tensor.from_blob(new_big_frame_np.ctypes.data, (yuv_size,), dtype='uint8').realize(),
Tensor(Tensor.randn(3, 3).mul(8).realize().numpy(), device='NPY')]
inputs = img_inputs + big_img_inputs
Device.default.synchronize()
inputs_np = [x.numpy() for x in inputs]
inputs_np[0] = full_buffer_np
inputs_np[3] = big_full_buffer_np
st = time.perf_counter()
out = update_img_jit(*inputs)
full_buffer = out[0].contiguous().realize().clone()
big_full_buffer = out[2].contiguous().realize().clone()
mt = time.perf_counter()
Device.default.synchronize()
et = time.perf_counter()
print(f" [{i+1}/10] enqueue {(mt-st)*1e3:6.2f} ms -- total {(et-st)*1e3:6.2f} ms")
pkl_path = warp_pkl_path(cam_w, cam_h)
with open(pkl_path, "wb") as f:
pickle.dump(update_img_jit, f)
print(f" Saved to {pkl_path}")
jit = pickle.load(open(pkl_path, "rb"))
jit(*inputs)
def compile_dm_warp(cam_w, cam_h):
dm_w, dm_h = DM_INPUT_SIZE
_, _, _, yuv_size = get_nv12_info(cam_w, cam_h)
print(f"Compiling DM warp for {cam_w}x{cam_h}...")
warp_dm = make_warp_dm(cam_w, cam_h, dm_w, dm_h)
warp_dm_jit = TinyJit(warp_dm, prune=True)
for i in range(10):
inputs = [Tensor.from_blob((32 * Tensor.randn(yuv_size,) + 128).cast(dtype='uint8').realize().numpy().ctypes.data, (yuv_size,), dtype='uint8'),
Tensor(Tensor.randn(3, 3).mul(8).realize().numpy(), device='NPY')]
Device.default.synchronize()
st = time.perf_counter()
warp_dm_jit(*inputs)
mt = time.perf_counter()
Device.default.synchronize()
et = time.perf_counter()
print(f" [{i+1}/10] enqueue {(mt-st)*1e3:6.2f} ms -- total {(et-st)*1e3:6.2f} ms")
pkl_path = dm_warp_pkl_path(cam_w, cam_h)
with open(pkl_path, "wb") as f:
pickle.dump(warp_dm_jit, f)
print(f" Saved to {pkl_path}")
def run_and_save_pickle():
for cam_w, cam_h in CAMERA_CONFIGS:
compile_modeld_warp(cam_w, cam_h)
compile_dm_warp(cam_w, cam_h)
if __name__ == "__main__":
run_and_save_pickle()

View File

@@ -3,7 +3,6 @@ import os
from openpilot.system.hardware import TICI
os.environ['DEV'] = 'QCOM' if TICI else 'CPU'
from tinygrad.tensor import Tensor
from tinygrad.dtype import dtypes
import time
import pickle
import numpy as np
@@ -16,32 +15,35 @@ from openpilot.common.swaglog import cloudlog
from openpilot.common.realtime import config_realtime_process
from openpilot.common.transformations.model import dmonitoringmodel_intrinsics
from openpilot.common.transformations.camera import _ar_ox_fisheye, _os_fisheye
from openpilot.selfdrive.modeld.models.commonmodel_pyx import CLContext, MonitoringModelFrame
from openpilot.system.camerad.cameras.nv12_info import get_nv12_info
from openpilot.selfdrive.modeld.parse_model_outputs import sigmoid, safe_exp
from openpilot.selfdrive.modeld.runners.tinygrad_helpers import qcom_tensor_from_opencl_address
PROCESS_NAME = "selfdrive.modeld.dmonitoringmodeld"
SEND_RAW_PRED = os.getenv('SEND_RAW_PRED')
MODEL_PKL_PATH = Path(__file__).parent / 'models/dmonitoring_model_tinygrad.pkl'
METADATA_PATH = Path(__file__).parent / 'models/dmonitoring_model_metadata.pkl'
MODELS_DIR = Path(__file__).parent / 'models'
class ModelState:
inputs: dict[str, np.ndarray]
output: np.ndarray
def __init__(self, cl_ctx):
def __init__(self):
with open(METADATA_PATH, 'rb') as f:
model_metadata = pickle.load(f)
self.input_shapes = model_metadata['input_shapes']
self.output_slices = model_metadata['output_slices']
self.frame = MonitoringModelFrame(cl_ctx)
self.numpy_inputs = {
'calib': np.zeros(self.input_shapes['calib'], dtype=np.float32),
}
self.warp_inputs_np = {'transform': np.zeros((3,3), dtype=np.float32)}
self.warp_inputs = {k: Tensor(v, device='NPY') for k,v in self.warp_inputs_np.items()}
self.frame_buf_params = None
self.tensor_inputs = {k: Tensor(v, device='NPY').realize() for k,v in self.numpy_inputs.items()}
self._blob_cache : dict[int, Tensor] = {}
self.image_warp = None
with open(MODEL_PKL_PATH, "rb") as f:
self.model_run = pickle.load(f)
@@ -50,16 +52,20 @@ class ModelState:
t1 = time.perf_counter()
input_img_cl = self.frame.prepare(buf, transform.flatten())
if TICI:
# The imgs tensors are backed by opencl memory, only need init once
if 'input_img' not in self.tensor_inputs:
self.tensor_inputs['input_img'] = qcom_tensor_from_opencl_address(input_img_cl.mem_address, self.input_shapes['input_img'], dtype=dtypes.uint8)
else:
self.tensor_inputs['input_img'] = Tensor(self.frame.buffer_from_cl(input_img_cl).reshape(self.input_shapes['input_img']), dtype=dtypes.uint8).realize()
if self.image_warp is None:
self.frame_buf_params = get_nv12_info(buf.width, buf.height)
warp_path = MODELS_DIR / f'dm_warp_{buf.width}x{buf.height}_tinygrad.pkl'
with open(warp_path, "rb") as f:
self.image_warp = pickle.load(f)
ptr = buf.data.ctypes.data
# There is a ringbuffer of imgs, just cache tensors pointing to all of them
if ptr not in self._blob_cache:
self._blob_cache[ptr] = Tensor.from_blob(ptr, (self.frame_buf_params[3],), dtype='uint8')
self.warp_inputs_np['transform'][:] = transform[:]
self.tensor_inputs['input_img'] = self.image_warp(self._blob_cache[ptr], self.warp_inputs['transform']).realize()
output = self.model_run(**self.tensor_inputs).numpy().flatten()
output = self.model_run(**self.tensor_inputs).contiguous().realize().uop.base.buffer.numpy().flatten()
t2 = time.perf_counter()
return output, t2 - t1
@@ -107,12 +113,11 @@ def get_driverstate_packet(model_output, frame_id: int, location_ts: int, exec_t
def main():
config_realtime_process(7, 5)
cl_context = CLContext()
model = ModelState(cl_context)
model = ModelState()
cloudlog.warning("models loaded, dmonitoringmodeld starting")
cloudlog.warning("connecting to driver stream")
vipc_client = VisionIpcClient("camerad", VisionStreamType.VISION_STREAM_DRIVER, True, cl_context)
vipc_client = VisionIpcClient("camerad", VisionStreamType.VISION_STREAM_DRIVER, True)
while not vipc_client.connect(False):
time.sleep(0.1)
assert vipc_client.is_connected()

View File

@@ -7,7 +7,6 @@ if USBGPU:
os.environ['DEV'] = 'AMD'
os.environ['AMD_IFACE'] = 'USB'
from tinygrad.tensor import Tensor
from tinygrad.dtype import dtypes
import time
import pickle
import numpy as np
@@ -22,14 +21,13 @@ from openpilot.common.params import Params
from openpilot.common.filter_simple import FirstOrderFilter
from openpilot.common.realtime import config_realtime_process, DT_MDL
from openpilot.common.transformations.camera import DEVICE_CAMERAS
from openpilot.system.camerad.cameras.nv12_info import get_nv12_info
from openpilot.common.transformations.model import get_warp_matrix
from openpilot.selfdrive.controls.lib.desire_helper import DesireHelper
from openpilot.selfdrive.controls.lib.drive_helpers import get_accel_from_plan, smooth_value, get_curvature_from_plan
from openpilot.selfdrive.modeld.parse_model_outputs import Parser
from openpilot.selfdrive.modeld.fill_model_msg import fill_model_msg, fill_pose_msg, PublishState
from openpilot.selfdrive.modeld.constants import ModelConstants, Plan
from openpilot.selfdrive.modeld.models.commonmodel_pyx import DrivingModelFrame, CLContext
from openpilot.selfdrive.modeld.runners.tinygrad_helpers import qcom_tensor_from_opencl_address
PROCESS_NAME = "selfdrive.modeld.modeld"
@@ -39,11 +37,15 @@ VISION_PKL_PATH = Path(__file__).parent / 'models/driving_vision_tinygrad.pkl'
POLICY_PKL_PATH = Path(__file__).parent / 'models/driving_policy_tinygrad.pkl'
VISION_METADATA_PATH = Path(__file__).parent / 'models/driving_vision_metadata.pkl'
POLICY_METADATA_PATH = Path(__file__).parent / 'models/driving_policy_metadata.pkl'
MODELS_DIR = Path(__file__).parent / 'models'
LAT_SMOOTH_SECONDS = 0.0
LONG_SMOOTH_SECONDS = 0.3
MIN_LAT_CONTROL_SPEED = 0.3
IMG_QUEUE_SHAPE = (6*(ModelConstants.MODEL_RUN_FREQ//ModelConstants.MODEL_CONTEXT_FREQ + 1), 128, 256)
assert IMG_QUEUE_SHAPE[0] == 30
def get_action_from_model(model_output: dict[str, np.ndarray], prev_action: log.ModelDataV2.Action,
lat_action_t: float, long_action_t: float, v_ego: float) -> log.ModelDataV2.Action:
@@ -136,12 +138,11 @@ class InputQueues:
return out
class ModelState:
frames: dict[str, DrivingModelFrame]
inputs: dict[str, np.ndarray]
output: np.ndarray
prev_desire: np.ndarray # for tracking the rising edge of the pulse
def __init__(self, context: CLContext):
def __init__(self):
with open(VISION_METADATA_PATH, 'rb') as f:
vision_metadata = pickle.load(f)
self.vision_input_shapes = vision_metadata['input_shapes']
@@ -155,7 +156,6 @@ class ModelState:
self.policy_output_slices = policy_metadata['output_slices']
policy_output_size = policy_metadata['output_shapes']['outputs'][1]
self.frames = {name: DrivingModelFrame(context, ModelConstants.MODEL_RUN_FREQ//ModelConstants.MODEL_CONTEXT_FREQ) for name in self.vision_input_names}
self.prev_desire = np.zeros(ModelConstants.DESIRE_LEN, dtype=np.float32)
# policy inputs
@@ -165,13 +165,18 @@ class ModelState:
self.full_input_queues.update_dtypes_and_shapes({k: self.numpy_inputs[k].dtype}, {k: self.numpy_inputs[k].shape})
self.full_input_queues.reset()
# img buffers are managed in openCL transform code
self.vision_inputs: dict[str, Tensor] = {}
self.img_queues = {'img': Tensor.zeros(IMG_QUEUE_SHAPE, dtype='uint8').contiguous().realize(),
'big_img': Tensor.zeros(IMG_QUEUE_SHAPE, dtype='uint8').contiguous().realize()}
self.full_frames : dict[str, Tensor] = {}
self._blob_cache : dict[int, Tensor] = {}
self.transforms_np = {k: np.zeros((3,3), dtype=np.float32) for k in self.img_queues}
self.transforms = {k: Tensor(v, device='NPY').realize() for k, v in self.transforms_np.items()}
self.vision_output = np.zeros(vision_output_size, dtype=np.float32)
self.policy_inputs = {k: Tensor(v, device='NPY').realize() for k,v in self.numpy_inputs.items()}
self.policy_output = np.zeros(policy_output_size, dtype=np.float32)
self.parser = Parser()
self.frame_buf_params : dict[str, tuple[int, int, int, int]] = {}
self.update_imgs = None
with open(VISION_PKL_PATH, "rb") as f:
self.vision_run = pickle.load(f)
@@ -188,23 +193,33 @@ class ModelState:
inputs['desire_pulse'][0] = 0
new_desire = np.where(inputs['desire_pulse'] - self.prev_desire > .99, inputs['desire_pulse'], 0)
self.prev_desire[:] = inputs['desire_pulse']
if self.update_imgs is None:
for key in bufs.keys():
w, h = bufs[key].width, bufs[key].height
self.frame_buf_params[key] = get_nv12_info(w, h)
warp_path = MODELS_DIR / f'warp_{w}x{h}_tinygrad.pkl'
with open(warp_path, "rb") as f:
self.update_imgs = pickle.load(f)
imgs_cl = {name: self.frames[name].prepare(bufs[name], transforms[name].flatten()) for name in self.vision_input_names}
for key in bufs.keys():
ptr = bufs[key].data.ctypes.data
yuv_size = self.frame_buf_params[key][3]
# There is a ringbuffer of imgs, just cache tensors pointing to all of them
if ptr not in self._blob_cache:
self._blob_cache[ptr] = Tensor.from_blob(ptr, (yuv_size,), dtype='uint8')
self.full_frames[key] = self._blob_cache[ptr]
for key in bufs.keys():
self.transforms_np[key][:,:] = transforms[key][:,:]
if TICI and not USBGPU:
# The imgs tensors are backed by opencl memory, only need init once
for key in imgs_cl:
if key not in self.vision_inputs:
self.vision_inputs[key] = qcom_tensor_from_opencl_address(imgs_cl[key].mem_address, self.vision_input_shapes[key], dtype=dtypes.uint8)
else:
for key in imgs_cl:
frame_input = self.frames[key].buffer_from_cl(imgs_cl[key]).reshape(self.vision_input_shapes[key])
self.vision_inputs[key] = Tensor(frame_input, dtype=dtypes.uint8).realize()
out = self.update_imgs(self.img_queues['img'], self.full_frames['img'], self.transforms['img'],
self.img_queues['big_img'], self.full_frames['big_img'], self.transforms['big_img'])
self.img_queues['img'], self.img_queues['big_img'] = out[0].realize(), out[2].realize()
vision_inputs = {'img': out[1], 'big_img': out[3]}
if prepare_only:
return None
self.vision_output = self.vision_run(**self.vision_inputs).contiguous().realize().uop.base.buffer.numpy()
self.vision_output = self.vision_run(**vision_inputs).contiguous().realize().uop.base.buffer.numpy().flatten()
vision_outputs_dict = self.parser.parse_vision_outputs(self.slice_outputs(self.vision_output, self.vision_output_slices))
self.full_input_queues.enqueue({'features_buffer': vision_outputs_dict['hidden_state'], 'desire_pulse': new_desire})
@@ -212,9 +227,8 @@ class ModelState:
self.numpy_inputs[k][:] = self.full_input_queues.get(k)[k]
self.numpy_inputs['traffic_convention'][:] = inputs['traffic_convention']
self.policy_output = self.policy_run(**self.policy_inputs).numpy().flatten()
self.policy_output = self.policy_run(**self.policy_inputs).contiguous().realize().uop.base.buffer.numpy().flatten()
policy_outputs_dict = self.parser.parse_policy_outputs(self.slice_outputs(self.policy_output, self.policy_output_slices))
combined_outputs_dict = {**vision_outputs_dict, **policy_outputs_dict}
if SEND_RAW_PRED:
combined_outputs_dict['raw_pred'] = np.concatenate([self.vision_output.copy(), self.policy_output.copy()])
@@ -231,10 +245,8 @@ def main(demo=False):
config_realtime_process(7, 54)
st = time.monotonic()
cloudlog.warning("setting up CL context")
cl_context = CLContext()
cloudlog.warning("CL context ready; loading model")
model = ModelState(cl_context)
cloudlog.warning("loading model")
model = ModelState()
cloudlog.warning(f"models loaded in {time.monotonic() - st:.1f}s, modeld starting")
# visionipc clients
@@ -247,8 +259,8 @@ def main(demo=False):
time.sleep(.1)
vipc_client_main_stream = VisionStreamType.VISION_STREAM_WIDE_ROAD if main_wide_camera else VisionStreamType.VISION_STREAM_ROAD
vipc_client_main = VisionIpcClient("camerad", vipc_client_main_stream, True, cl_context)
vipc_client_extra = VisionIpcClient("camerad", VisionStreamType.VISION_STREAM_WIDE_ROAD, False, cl_context)
vipc_client_main = VisionIpcClient("camerad", vipc_client_main_stream, True)
vipc_client_extra = VisionIpcClient("camerad", VisionStreamType.VISION_STREAM_WIDE_ROAD, False)
cloudlog.warning(f"vision stream set up, main_wide_camera: {main_wide_camera}, use_extra_client: {use_extra_client}")
while not vipc_client_main.connect(False):

View File

@@ -1,64 +0,0 @@
#include "selfdrive/modeld/models/commonmodel.h"
#include <cmath>
#include <cstring>
#include "common/clutil.h"
DrivingModelFrame::DrivingModelFrame(cl_device_id device_id, cl_context context, int _temporal_skip) : ModelFrame(device_id, context) {
input_frames = std::make_unique<uint8_t[]>(buf_size);
temporal_skip = _temporal_skip;
input_frames_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, buf_size, NULL, &err));
img_buffer_20hz_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, (temporal_skip+1)*frame_size_bytes, NULL, &err));
region.origin = temporal_skip * frame_size_bytes;
region.size = frame_size_bytes;
last_img_cl = CL_CHECK_ERR(clCreateSubBuffer(img_buffer_20hz_cl, CL_MEM_READ_WRITE, CL_BUFFER_CREATE_TYPE_REGION, &region, &err));
loadyuv_init(&loadyuv, context, device_id, MODEL_WIDTH, MODEL_HEIGHT);
init_transform(device_id, context, MODEL_WIDTH, MODEL_HEIGHT);
}
cl_mem* DrivingModelFrame::prepare(cl_mem yuv_cl, int frame_width, int frame_height, int frame_stride, int frame_uv_offset, const mat3& projection) {
run_transform(yuv_cl, MODEL_WIDTH, MODEL_HEIGHT, frame_width, frame_height, frame_stride, frame_uv_offset, projection);
for (int i = 0; i < temporal_skip; i++) {
CL_CHECK(clEnqueueCopyBuffer(q, img_buffer_20hz_cl, img_buffer_20hz_cl, (i+1)*frame_size_bytes, i*frame_size_bytes, frame_size_bytes, 0, nullptr, nullptr));
}
loadyuv_queue(&loadyuv, q, y_cl, u_cl, v_cl, last_img_cl);
copy_queue(&loadyuv, q, img_buffer_20hz_cl, input_frames_cl, 0, 0, frame_size_bytes);
copy_queue(&loadyuv, q, last_img_cl, input_frames_cl, 0, frame_size_bytes, frame_size_bytes);
// NOTE: Since thneed is using a different command queue, this clFinish is needed to ensure the image is ready.
clFinish(q);
return &input_frames_cl;
}
DrivingModelFrame::~DrivingModelFrame() {
deinit_transform();
loadyuv_destroy(&loadyuv);
CL_CHECK(clReleaseMemObject(input_frames_cl));
CL_CHECK(clReleaseMemObject(img_buffer_20hz_cl));
CL_CHECK(clReleaseMemObject(last_img_cl));
CL_CHECK(clReleaseCommandQueue(q));
}
MonitoringModelFrame::MonitoringModelFrame(cl_device_id device_id, cl_context context) : ModelFrame(device_id, context) {
input_frames = std::make_unique<uint8_t[]>(buf_size);
input_frame_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, buf_size, NULL, &err));
init_transform(device_id, context, MODEL_WIDTH, MODEL_HEIGHT);
}
cl_mem* MonitoringModelFrame::prepare(cl_mem yuv_cl, int frame_width, int frame_height, int frame_stride, int frame_uv_offset, const mat3& projection) {
run_transform(yuv_cl, MODEL_WIDTH, MODEL_HEIGHT, frame_width, frame_height, frame_stride, frame_uv_offset, projection);
clFinish(q);
return &y_cl;
}
MonitoringModelFrame::~MonitoringModelFrame() {
deinit_transform();
CL_CHECK(clReleaseMemObject(input_frame_cl));
CL_CHECK(clReleaseCommandQueue(q));
}

View File

@@ -1,97 +0,0 @@
#pragma once
#include <cfloat>
#include <cstdlib>
#include <cassert>
#include <memory>
#define CL_USE_DEPRECATED_OPENCL_1_2_APIS
#ifdef __APPLE__
#include <OpenCL/cl.h>
#else
#include <CL/cl.h>
#endif
#include "common/mat.h"
#include "selfdrive/modeld/transforms/loadyuv.h"
#include "selfdrive/modeld/transforms/transform.h"
class ModelFrame {
public:
ModelFrame(cl_device_id device_id, cl_context context) {
q = CL_CHECK_ERR(clCreateCommandQueue(context, device_id, 0, &err));
}
virtual ~ModelFrame() {}
virtual cl_mem* prepare(cl_mem yuv_cl, int frame_width, int frame_height, int frame_stride, int frame_uv_offset, const mat3& projection) { return NULL; }
uint8_t* buffer_from_cl(cl_mem *in_frames, int buffer_size) {
CL_CHECK(clEnqueueReadBuffer(q, *in_frames, CL_TRUE, 0, buffer_size, input_frames.get(), 0, nullptr, nullptr));
clFinish(q);
return &input_frames[0];
}
int MODEL_WIDTH;
int MODEL_HEIGHT;
int MODEL_FRAME_SIZE;
int buf_size;
protected:
cl_mem y_cl, u_cl, v_cl;
Transform transform;
cl_command_queue q;
std::unique_ptr<uint8_t[]> input_frames;
void init_transform(cl_device_id device_id, cl_context context, int model_width, int model_height) {
y_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, model_width * model_height, NULL, &err));
u_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, (model_width / 2) * (model_height / 2), NULL, &err));
v_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, (model_width / 2) * (model_height / 2), NULL, &err));
transform_init(&transform, context, device_id);
}
void deinit_transform() {
transform_destroy(&transform);
CL_CHECK(clReleaseMemObject(v_cl));
CL_CHECK(clReleaseMemObject(u_cl));
CL_CHECK(clReleaseMemObject(y_cl));
}
void run_transform(cl_mem yuv_cl, int model_width, int model_height, int frame_width, int frame_height, int frame_stride, int frame_uv_offset, const mat3& projection) {
transform_queue(&transform, q,
yuv_cl, frame_width, frame_height, frame_stride, frame_uv_offset,
y_cl, u_cl, v_cl, model_width, model_height, projection);
}
};
class DrivingModelFrame : public ModelFrame {
public:
DrivingModelFrame(cl_device_id device_id, cl_context context, int _temporal_skip);
~DrivingModelFrame();
cl_mem* prepare(cl_mem yuv_cl, int frame_width, int frame_height, int frame_stride, int frame_uv_offset, const mat3& projection);
const int MODEL_WIDTH = 512;
const int MODEL_HEIGHT = 256;
const int MODEL_FRAME_SIZE = MODEL_WIDTH * MODEL_HEIGHT * 3 / 2;
const int buf_size = MODEL_FRAME_SIZE * 2; // 2 frames are temporal_skip frames apart
const size_t frame_size_bytes = MODEL_FRAME_SIZE * sizeof(uint8_t);
private:
LoadYUVState loadyuv;
cl_mem img_buffer_20hz_cl, last_img_cl, input_frames_cl;
cl_buffer_region region;
int temporal_skip;
};
class MonitoringModelFrame : public ModelFrame {
public:
MonitoringModelFrame(cl_device_id device_id, cl_context context);
~MonitoringModelFrame();
cl_mem* prepare(cl_mem yuv_cl, int frame_width, int frame_height, int frame_stride, int frame_uv_offset, const mat3& projection);
const int MODEL_WIDTH = 1440;
const int MODEL_HEIGHT = 960;
const int MODEL_FRAME_SIZE = MODEL_WIDTH * MODEL_HEIGHT;
const int buf_size = MODEL_FRAME_SIZE;
private:
cl_mem input_frame_cl;
};

View File

@@ -1,27 +0,0 @@
# distutils: language = c++
from msgq.visionipc.visionipc cimport cl_device_id, cl_context, cl_mem
cdef extern from "common/mat.h":
cdef struct mat3:
float v[9]
cdef extern from "common/clutil.h":
cdef unsigned long CL_DEVICE_TYPE_DEFAULT
cl_device_id cl_get_device_id(unsigned long)
cl_context cl_create_context(cl_device_id)
void cl_release_context(cl_context)
cdef extern from "selfdrive/modeld/models/commonmodel.h":
cppclass ModelFrame:
int buf_size
unsigned char * buffer_from_cl(cl_mem*, int);
cl_mem * prepare(cl_mem, int, int, int, int, mat3)
cppclass DrivingModelFrame:
int buf_size
DrivingModelFrame(cl_device_id, cl_context, int)
cppclass MonitoringModelFrame:
int buf_size
MonitoringModelFrame(cl_device_id, cl_context)

View File

@@ -1,13 +0,0 @@
# distutils: language = c++
from msgq.visionipc.visionipc cimport cl_mem
from msgq.visionipc.visionipc_pyx cimport CLContext as BaseCLContext
cdef class CLContext(BaseCLContext):
pass
cdef class CLMem:
cdef cl_mem * mem
@staticmethod
cdef create(void*)

View File

@@ -1,74 +0,0 @@
# distutils: language = c++
# cython: c_string_encoding=ascii, language_level=3
import numpy as np
cimport numpy as cnp
from libc.string cimport memcpy
from libc.stdint cimport uintptr_t
from msgq.visionipc.visionipc cimport cl_mem
from msgq.visionipc.visionipc_pyx cimport VisionBuf, CLContext as BaseCLContext
from .commonmodel cimport CL_DEVICE_TYPE_DEFAULT, cl_get_device_id, cl_create_context, cl_release_context
from .commonmodel cimport mat3, ModelFrame as cppModelFrame, DrivingModelFrame as cppDrivingModelFrame, MonitoringModelFrame as cppMonitoringModelFrame
cdef class CLContext(BaseCLContext):
def __cinit__(self):
self.device_id = cl_get_device_id(CL_DEVICE_TYPE_DEFAULT)
self.context = cl_create_context(self.device_id)
def __dealloc__(self):
if self.context:
cl_release_context(self.context)
cdef class CLMem:
@staticmethod
cdef create(void * cmem):
mem = CLMem()
mem.mem = <cl_mem*> cmem
return mem
@property
def mem_address(self):
return <uintptr_t>(self.mem)
def cl_from_visionbuf(VisionBuf buf):
return CLMem.create(<void*>&buf.buf.buf_cl)
cdef class ModelFrame:
cdef cppModelFrame * frame
cdef int buf_size
def __dealloc__(self):
del self.frame
def prepare(self, VisionBuf buf, float[:] projection):
cdef mat3 cprojection
memcpy(cprojection.v, &projection[0], 9*sizeof(float))
cdef cl_mem * data
data = self.frame.prepare(buf.buf.buf_cl, buf.width, buf.height, buf.stride, buf.uv_offset, cprojection)
return CLMem.create(data)
def buffer_from_cl(self, CLMem in_frames):
cdef unsigned char * data2
data2 = self.frame.buffer_from_cl(in_frames.mem, self.buf_size)
return np.asarray(<cnp.uint8_t[:self.buf_size]> data2)
cdef class DrivingModelFrame(ModelFrame):
cdef cppDrivingModelFrame * _frame
def __cinit__(self, CLContext context, int temporal_skip):
self._frame = new cppDrivingModelFrame(context.device_id, context.context, temporal_skip)
self.frame = <cppModelFrame*>(self._frame)
self.buf_size = self._frame.buf_size
cdef class MonitoringModelFrame(ModelFrame):
cdef cppMonitoringModelFrame * _frame
def __cinit__(self, CLContext context):
self._frame = new cppMonitoringModelFrame(context.device_id, context.context)
self.frame = <cppModelFrame*>(self._frame)
self.buf_size = self._frame.buf_size

View File

@@ -1,8 +0,0 @@
from tinygrad.tensor import Tensor
from tinygrad.helpers import to_mv
def qcom_tensor_from_opencl_address(opencl_address, shape, dtype):
cl_buf_desc_ptr = to_mv(opencl_address, 8).cast('Q')[0]
rawbuf_ptr = to_mv(cl_buf_desc_ptr, 0x100).cast('Q')[20] # offset 0xA0 is a raw gpu pointer.
return Tensor.from_blob(rawbuf_ptr, shape, dtype=dtype, device='QCOM')

View File

@@ -1,76 +0,0 @@
#include "selfdrive/modeld/transforms/loadyuv.h"
#include <cassert>
#include <cstdio>
#include <cstring>
void loadyuv_init(LoadYUVState* s, cl_context ctx, cl_device_id device_id, int width, int height) {
memset(s, 0, sizeof(*s));
s->width = width;
s->height = height;
char args[1024];
snprintf(args, sizeof(args),
"-cl-fast-relaxed-math -cl-denorms-are-zero "
"-DTRANSFORMED_WIDTH=%d -DTRANSFORMED_HEIGHT=%d",
width, height);
cl_program prg = cl_program_from_file(ctx, device_id, LOADYUV_PATH, args);
s->loadys_krnl = CL_CHECK_ERR(clCreateKernel(prg, "loadys", &err));
s->loaduv_krnl = CL_CHECK_ERR(clCreateKernel(prg, "loaduv", &err));
s->copy_krnl = CL_CHECK_ERR(clCreateKernel(prg, "copy", &err));
// done with this
CL_CHECK(clReleaseProgram(prg));
}
void loadyuv_destroy(LoadYUVState* s) {
CL_CHECK(clReleaseKernel(s->loadys_krnl));
CL_CHECK(clReleaseKernel(s->loaduv_krnl));
CL_CHECK(clReleaseKernel(s->copy_krnl));
}
void loadyuv_queue(LoadYUVState* s, cl_command_queue q,
cl_mem y_cl, cl_mem u_cl, cl_mem v_cl,
cl_mem out_cl) {
cl_int global_out_off = 0;
CL_CHECK(clSetKernelArg(s->loadys_krnl, 0, sizeof(cl_mem), &y_cl));
CL_CHECK(clSetKernelArg(s->loadys_krnl, 1, sizeof(cl_mem), &out_cl));
CL_CHECK(clSetKernelArg(s->loadys_krnl, 2, sizeof(cl_int), &global_out_off));
const size_t loadys_work_size = (s->width*s->height)/8;
CL_CHECK(clEnqueueNDRangeKernel(q, s->loadys_krnl, 1, NULL,
&loadys_work_size, NULL, 0, 0, NULL));
const size_t loaduv_work_size = ((s->width/2)*(s->height/2))/8;
global_out_off += (s->width*s->height);
CL_CHECK(clSetKernelArg(s->loaduv_krnl, 0, sizeof(cl_mem), &u_cl));
CL_CHECK(clSetKernelArg(s->loaduv_krnl, 1, sizeof(cl_mem), &out_cl));
CL_CHECK(clSetKernelArg(s->loaduv_krnl, 2, sizeof(cl_int), &global_out_off));
CL_CHECK(clEnqueueNDRangeKernel(q, s->loaduv_krnl, 1, NULL,
&loaduv_work_size, NULL, 0, 0, NULL));
global_out_off += (s->width/2)*(s->height/2);
CL_CHECK(clSetKernelArg(s->loaduv_krnl, 0, sizeof(cl_mem), &v_cl));
CL_CHECK(clSetKernelArg(s->loaduv_krnl, 1, sizeof(cl_mem), &out_cl));
CL_CHECK(clSetKernelArg(s->loaduv_krnl, 2, sizeof(cl_int), &global_out_off));
CL_CHECK(clEnqueueNDRangeKernel(q, s->loaduv_krnl, 1, NULL,
&loaduv_work_size, NULL, 0, 0, NULL));
}
void copy_queue(LoadYUVState* s, cl_command_queue q, cl_mem src, cl_mem dst,
size_t src_offset, size_t dst_offset, size_t size) {
CL_CHECK(clSetKernelArg(s->copy_krnl, 0, sizeof(cl_mem), &src));
CL_CHECK(clSetKernelArg(s->copy_krnl, 1, sizeof(cl_mem), &dst));
CL_CHECK(clSetKernelArg(s->copy_krnl, 2, sizeof(cl_int), &src_offset));
CL_CHECK(clSetKernelArg(s->copy_krnl, 3, sizeof(cl_int), &dst_offset));
const size_t copy_work_size = size/8;
CL_CHECK(clEnqueueNDRangeKernel(q, s->copy_krnl, 1, NULL,
&copy_work_size, NULL, 0, 0, NULL));
}

View File

@@ -1,47 +0,0 @@
#define UV_SIZE ((TRANSFORMED_WIDTH/2)*(TRANSFORMED_HEIGHT/2))
__kernel void loadys(__global uchar8 const * const Y,
__global uchar * out,
int out_offset)
{
const int gid = get_global_id(0);
const int ois = gid * 8;
const int oy = ois / TRANSFORMED_WIDTH;
const int ox = ois % TRANSFORMED_WIDTH;
const uchar8 ys = Y[gid];
// 02
// 13
__global uchar* outy0;
__global uchar* outy1;
if ((oy & 1) == 0) {
outy0 = out + out_offset; //y0
outy1 = out + out_offset + UV_SIZE*2; //y2
} else {
outy0 = out + out_offset + UV_SIZE; //y1
outy1 = out + out_offset + UV_SIZE*3; //y3
}
vstore4(ys.s0246, 0, outy0 + (oy/2) * (TRANSFORMED_WIDTH/2) + ox/2);
vstore4(ys.s1357, 0, outy1 + (oy/2) * (TRANSFORMED_WIDTH/2) + ox/2);
}
__kernel void loaduv(__global uchar8 const * const in,
__global uchar8 * out,
int out_offset)
{
const int gid = get_global_id(0);
const uchar8 inv = in[gid];
out[gid + out_offset / 8] = inv;
}
__kernel void copy(__global uchar8 * in,
__global uchar8 * out,
int in_offset,
int out_offset)
{
const int gid = get_global_id(0);
out[gid + out_offset / 8] = in[gid + in_offset / 8];
}

View File

@@ -1,20 +0,0 @@
#pragma once
#include "common/clutil.h"
typedef struct {
int width, height;
cl_kernel loadys_krnl, loaduv_krnl, copy_krnl;
} LoadYUVState;
void loadyuv_init(LoadYUVState* s, cl_context ctx, cl_device_id device_id, int width, int height);
void loadyuv_destroy(LoadYUVState* s);
void loadyuv_queue(LoadYUVState* s, cl_command_queue q,
cl_mem y_cl, cl_mem u_cl, cl_mem v_cl,
cl_mem out_cl);
void copy_queue(LoadYUVState* s, cl_command_queue q, cl_mem src, cl_mem dst,
size_t src_offset, size_t dst_offset, size_t size);

View File

@@ -1,97 +0,0 @@
#include "selfdrive/modeld/transforms/transform.h"
#include <cassert>
#include <cstring>
#include "common/clutil.h"
void transform_init(Transform* s, cl_context ctx, cl_device_id device_id) {
memset(s, 0, sizeof(*s));
cl_program prg = cl_program_from_file(ctx, device_id, TRANSFORM_PATH, "");
s->krnl = CL_CHECK_ERR(clCreateKernel(prg, "warpPerspective", &err));
// done with this
CL_CHECK(clReleaseProgram(prg));
s->m_y_cl = CL_CHECK_ERR(clCreateBuffer(ctx, CL_MEM_READ_WRITE, 3*3*sizeof(float), NULL, &err));
s->m_uv_cl = CL_CHECK_ERR(clCreateBuffer(ctx, CL_MEM_READ_WRITE, 3*3*sizeof(float), NULL, &err));
}
void transform_destroy(Transform* s) {
CL_CHECK(clReleaseMemObject(s->m_y_cl));
CL_CHECK(clReleaseMemObject(s->m_uv_cl));
CL_CHECK(clReleaseKernel(s->krnl));
}
void transform_queue(Transform* s,
cl_command_queue q,
cl_mem in_yuv, int in_width, int in_height, int in_stride, int in_uv_offset,
cl_mem out_y, cl_mem out_u, cl_mem out_v,
int out_width, int out_height,
const mat3& projection) {
const int zero = 0;
// sampled using pixel center origin
// (because that's how fastcv and opencv does it)
mat3 projection_y = projection;
// in and out uv is half the size of y.
mat3 projection_uv = transform_scale_buffer(projection, 0.5);
CL_CHECK(clEnqueueWriteBuffer(q, s->m_y_cl, CL_TRUE, 0, 3*3*sizeof(float), (void*)projection_y.v, 0, NULL, NULL));
CL_CHECK(clEnqueueWriteBuffer(q, s->m_uv_cl, CL_TRUE, 0, 3*3*sizeof(float), (void*)projection_uv.v, 0, NULL, NULL));
const int in_y_width = in_width;
const int in_y_height = in_height;
const int in_y_px_stride = 1;
const int in_uv_width = in_width/2;
const int in_uv_height = in_height/2;
const int in_uv_px_stride = 2;
const int in_u_offset = in_uv_offset;
const int in_v_offset = in_uv_offset + 1;
const int out_y_width = out_width;
const int out_y_height = out_height;
const int out_uv_width = out_width/2;
const int out_uv_height = out_height/2;
CL_CHECK(clSetKernelArg(s->krnl, 0, sizeof(cl_mem), &in_yuv)); // src
CL_CHECK(clSetKernelArg(s->krnl, 1, sizeof(cl_int), &in_stride)); // src_row_stride
CL_CHECK(clSetKernelArg(s->krnl, 2, sizeof(cl_int), &in_y_px_stride)); // src_px_stride
CL_CHECK(clSetKernelArg(s->krnl, 3, sizeof(cl_int), &zero)); // src_offset
CL_CHECK(clSetKernelArg(s->krnl, 4, sizeof(cl_int), &in_y_height)); // src_rows
CL_CHECK(clSetKernelArg(s->krnl, 5, sizeof(cl_int), &in_y_width)); // src_cols
CL_CHECK(clSetKernelArg(s->krnl, 6, sizeof(cl_mem), &out_y)); // dst
CL_CHECK(clSetKernelArg(s->krnl, 7, sizeof(cl_int), &out_y_width)); // dst_row_stride
CL_CHECK(clSetKernelArg(s->krnl, 8, sizeof(cl_int), &zero)); // dst_offset
CL_CHECK(clSetKernelArg(s->krnl, 9, sizeof(cl_int), &out_y_height)); // dst_rows
CL_CHECK(clSetKernelArg(s->krnl, 10, sizeof(cl_int), &out_y_width)); // dst_cols
CL_CHECK(clSetKernelArg(s->krnl, 11, sizeof(cl_mem), &s->m_y_cl)); // M
const size_t work_size_y[2] = {(size_t)out_y_width, (size_t)out_y_height};
CL_CHECK(clEnqueueNDRangeKernel(q, s->krnl, 2, NULL,
(const size_t*)&work_size_y, NULL, 0, 0, NULL));
const size_t work_size_uv[2] = {(size_t)out_uv_width, (size_t)out_uv_height};
CL_CHECK(clSetKernelArg(s->krnl, 2, sizeof(cl_int), &in_uv_px_stride)); // src_px_stride
CL_CHECK(clSetKernelArg(s->krnl, 3, sizeof(cl_int), &in_u_offset)); // src_offset
CL_CHECK(clSetKernelArg(s->krnl, 4, sizeof(cl_int), &in_uv_height)); // src_rows
CL_CHECK(clSetKernelArg(s->krnl, 5, sizeof(cl_int), &in_uv_width)); // src_cols
CL_CHECK(clSetKernelArg(s->krnl, 6, sizeof(cl_mem), &out_u)); // dst
CL_CHECK(clSetKernelArg(s->krnl, 7, sizeof(cl_int), &out_uv_width)); // dst_row_stride
CL_CHECK(clSetKernelArg(s->krnl, 8, sizeof(cl_int), &zero)); // dst_offset
CL_CHECK(clSetKernelArg(s->krnl, 9, sizeof(cl_int), &out_uv_height)); // dst_rows
CL_CHECK(clSetKernelArg(s->krnl, 10, sizeof(cl_int), &out_uv_width)); // dst_cols
CL_CHECK(clSetKernelArg(s->krnl, 11, sizeof(cl_mem), &s->m_uv_cl)); // M
CL_CHECK(clEnqueueNDRangeKernel(q, s->krnl, 2, NULL,
(const size_t*)&work_size_uv, NULL, 0, 0, NULL));
CL_CHECK(clSetKernelArg(s->krnl, 3, sizeof(cl_int), &in_v_offset)); // src_ofset
CL_CHECK(clSetKernelArg(s->krnl, 6, sizeof(cl_mem), &out_v)); // dst
CL_CHECK(clEnqueueNDRangeKernel(q, s->krnl, 2, NULL,
(const size_t*)&work_size_uv, NULL, 0, 0, NULL));
}

View File

@@ -1,54 +0,0 @@
#define INTER_BITS 5
#define INTER_TAB_SIZE (1 << INTER_BITS)
#define INTER_SCALE 1.f / INTER_TAB_SIZE
#define INTER_REMAP_COEF_BITS 15
#define INTER_REMAP_COEF_SCALE (1 << INTER_REMAP_COEF_BITS)
__kernel void warpPerspective(__global const uchar * src,
int src_row_stride, int src_px_stride, int src_offset, int src_rows, int src_cols,
__global uchar * dst,
int dst_row_stride, int dst_offset, int dst_rows, int dst_cols,
__constant float * M)
{
int dx = get_global_id(0);
int dy = get_global_id(1);
if (dx < dst_cols && dy < dst_rows)
{
float X0 = M[0] * dx + M[1] * dy + M[2];
float Y0 = M[3] * dx + M[4] * dy + M[5];
float W = M[6] * dx + M[7] * dy + M[8];
W = W != 0.0f ? INTER_TAB_SIZE / W : 0.0f;
int X = rint(X0 * W), Y = rint(Y0 * W);
int sx = convert_short_sat(X >> INTER_BITS);
int sy = convert_short_sat(Y >> INTER_BITS);
short sx_clamp = clamp(sx, 0, src_cols - 1);
short sx_p1_clamp = clamp(sx + 1, 0, src_cols - 1);
short sy_clamp = clamp(sy, 0, src_rows - 1);
short sy_p1_clamp = clamp(sy + 1, 0, src_rows - 1);
int v0 = convert_int(src[mad24(sy_clamp, src_row_stride, src_offset + sx_clamp*src_px_stride)]);
int v1 = convert_int(src[mad24(sy_clamp, src_row_stride, src_offset + sx_p1_clamp*src_px_stride)]);
int v2 = convert_int(src[mad24(sy_p1_clamp, src_row_stride, src_offset + sx_clamp*src_px_stride)]);
int v3 = convert_int(src[mad24(sy_p1_clamp, src_row_stride, src_offset + sx_p1_clamp*src_px_stride)]);
short ay = (short)(Y & (INTER_TAB_SIZE - 1));
short ax = (short)(X & (INTER_TAB_SIZE - 1));
float taby = 1.f/INTER_TAB_SIZE*ay;
float tabx = 1.f/INTER_TAB_SIZE*ax;
int dst_index = mad24(dy, dst_row_stride, dst_offset + dx);
int itab0 = convert_short_sat_rte( (1.0f-taby)*(1.0f-tabx) * INTER_REMAP_COEF_SCALE );
int itab1 = convert_short_sat_rte( (1.0f-taby)*tabx * INTER_REMAP_COEF_SCALE );
int itab2 = convert_short_sat_rte( taby*(1.0f-tabx) * INTER_REMAP_COEF_SCALE );
int itab3 = convert_short_sat_rte( taby*tabx * INTER_REMAP_COEF_SCALE );
int val = v0 * itab0 + v1 * itab1 + v2 * itab2 + v3 * itab3;
uchar pix = convert_uchar_sat((val + (1 << (INTER_REMAP_COEF_BITS-1))) >> INTER_REMAP_COEF_BITS);
dst[dst_index] = pix;
}
}

View File

@@ -1,25 +0,0 @@
#pragma once
#define CL_USE_DEPRECATED_OPENCL_1_2_APIS
#ifdef __APPLE__
#include <OpenCL/cl.h>
#else
#include <CL/cl.h>
#endif
#include "common/mat.h"
typedef struct {
cl_kernel krnl;
cl_mem m_y_cl, m_uv_cl;
} Transform;
void transform_init(Transform* s, cl_context ctx, cl_device_id device_id);
void transform_destroy(Transform* transform);
void transform_queue(Transform* s, cl_command_queue q,
cl_mem yuv, int in_width, int in_height, int in_stride, int in_uv_offset,
cl_mem out_y, cl_mem out_u, cl_mem out_v,
int out_width, int out_height,
const mat3& projection);

View File

@@ -34,8 +34,8 @@ GITHUB = GithubUtils(API_TOKEN, DATA_TOKEN)
EXEC_TIMINGS = [
# model, instant max, average max
("modelV2", 0.035, 0.025),
("driverStateV2", 0.02, 0.015),
("modelV2", 0.05, 0.028),
("driverStateV2", 0.05, 0.015),
]
def get_log_fn(test_route, ref="master"):

View File

@@ -4,6 +4,7 @@ import time
import copy
import heapq
import signal
import numpy as np
from collections import Counter
from dataclasses import dataclass, field
from itertools import islice
@@ -23,6 +24,7 @@ from openpilot.common.params import Params
from openpilot.common.prefix import OpenpilotPrefix
from openpilot.common.timeout import Timeout
from openpilot.common.realtime import DT_CTRL
from openpilot.system.camerad.cameras.nv12_info import get_nv12_info
from openpilot.system.manager.process_config import managed_processes
from openpilot.selfdrive.test.process_replay.vision_meta import meta_from_camera_state, available_streams
from openpilot.selfdrive.test.process_replay.migration import migrate_all
@@ -203,7 +205,8 @@ class ProcessContainer:
if meta.camera_state in self.cfg.vision_pubs:
assert frs[meta.camera_state].pix_fmt == 'nv12'
frame_size = (frs[meta.camera_state].w, frs[meta.camera_state].h)
vipc_server.create_buffers(meta.stream, 2, *frame_size)
stride, y_height, _, yuv_size = get_nv12_info(frame_size[0], frame_size[1])
vipc_server.create_buffers_with_sizes(meta.stream, 2, frame_size[0], frame_size[1], yuv_size, stride, stride * y_height)
vipc_server.start_listener()
self.vipc_server = vipc_server
@@ -300,7 +303,17 @@ class ProcessContainer:
camera_meta = meta_from_camera_state(m.which())
assert frs is not None
img = frs[m.which()].get(camera_state.frameId)
self.vipc_server.send(camera_meta.stream, img.flatten().tobytes(),
h, w = frs[m.which()].h, frs[m.which()].w
stride, y_height, _, yuv_size = get_nv12_info(w, h)
uv_offset = stride * y_height
padded_img = np.zeros(((uv_offset //stride) + (h // 2), stride))
padded_img[:h, :w] = img[:h * w].reshape((-1, w))
padded_img[uv_offset // stride:uv_offset // stride + h // 2, :w] = img[h * w:].reshape((-1, w))
img_bytes = np.zeros((yuv_size,), dtype=np.uint8)
img_bytes[:padded_img.size] = padded_img.flatten()
self.vipc_server.send(camera_meta.stream, img_bytes.tobytes(),
camera_state.frameId, camera_state.timestampSof, camera_state.timestampEof)
self.msg_queue = []

View File

@@ -1,6 +1,6 @@
Import('env', 'arch', 'messaging', 'common', 'visionipc')
libs = [common, 'OpenCL', messaging, visionipc]
libs = [common, messaging, visionipc]
if arch != "Darwin":
camera_obj = env.Object(['cameras/camera_qcom2.cc', 'cameras/camera_common.cc', 'cameras/spectra.cc',

View File

@@ -7,7 +7,7 @@
#include "system/camerad/cameras/spectra.h"
void CameraBuf::init(cl_device_id device_id, cl_context context, SpectraCamera *cam, VisionIpcServer * v, int frame_cnt, VisionStreamType type) {
void CameraBuf::init(SpectraCamera *cam, VisionIpcServer * v, int frame_cnt, VisionStreamType type) {
vipc_server = v;
stream_type = type;
frame_buf_count = frame_cnt;
@@ -21,9 +21,8 @@ void CameraBuf::init(cl_device_id device_id, cl_context context, SpectraCamera *
const int raw_frame_size = (sensor->frame_height + sensor->extra_height) * sensor->frame_stride;
for (int i = 0; i < frame_buf_count; i++) {
camera_bufs_raw[i].allocate(raw_frame_size);
camera_bufs_raw[i].init_cl(device_id, context);
}
LOGD("allocated %d CL buffers", frame_buf_count);
LOGD("allocated %d buffers", frame_buf_count);
}
vipc_server->create_buffers_with_sizes(stream_type, VIPC_BUFFER_COUNT, out_img_width, out_img_height, cam->yuv_size, cam->stride, cam->uv_offset);

View File

@@ -36,7 +36,7 @@ public:
CameraBuf() = default;
~CameraBuf();
void init(cl_device_id device_id, cl_context context, SpectraCamera *cam, VisionIpcServer * v, int frame_cnt, VisionStreamType type);
void init(SpectraCamera *cam, VisionIpcServer * v, int frame_cnt, VisionStreamType type);
void sendFrameToVipc();
};

View File

@@ -12,16 +12,8 @@
#include <string>
#include <vector>
#ifdef __TICI__
#include "CL/cl_ext_qcom.h"
#else
#define CL_PRIORITY_HINT_HIGH_QCOM NULL
#define CL_CONTEXT_PRIORITY_HINT_QCOM NULL
#endif
#include "media/cam_sensor_cmn_header.h"
#include "common/clutil.h"
#include "common/params.h"
#include "common/swaglog.h"
@@ -57,7 +49,7 @@ public:
CameraState(SpectraMaster *master, const CameraConfig &config) : camera(master, config) {};
~CameraState();
void init(VisionIpcServer *v, cl_device_id device_id, cl_context ctx);
void init(VisionIpcServer *v);
void update_exposure_score(float desired_ev, int exp_t, int exp_g_idx, float exp_gain);
void set_camera_exposure(float grey_frac);
void set_exposure_rect();
@@ -68,8 +60,8 @@ public:
}
};
void CameraState::init(VisionIpcServer *v, cl_device_id device_id, cl_context ctx) {
camera.camera_open(v, device_id, ctx);
void CameraState::init(VisionIpcServer *v) {
camera.camera_open(v);
if (!camera.enabled) return;
@@ -257,11 +249,7 @@ void CameraState::sendState() {
void camerad_thread() {
// TODO: centralize enabled handling
cl_device_id device_id = cl_get_device_id(CL_DEVICE_TYPE_DEFAULT);
const cl_context_properties props[] = {CL_CONTEXT_PRIORITY_HINT_QCOM, CL_PRIORITY_HINT_HIGH_QCOM, 0};
cl_context ctx = CL_CHECK_ERR(clCreateContext(props, 1, &device_id, NULL, NULL, &err));
VisionIpcServer v("camerad", device_id, ctx);
VisionIpcServer v("camerad");
// *** initial ISP init ***
SpectraMaster m;
@@ -271,7 +259,7 @@ void camerad_thread() {
std::vector<std::unique_ptr<CameraState>> cams;
for (const auto &config : ALL_CAMERA_CONFIGS) {
auto cam = std::make_unique<CameraState>(&m, config);
cam->init(&v, device_id, ctx);
cam->init(&v);
cams.emplace_back(std::move(cam));
}

View File

@@ -274,7 +274,7 @@ int SpectraCamera::clear_req_queue() {
return ret;
}
void SpectraCamera::camera_open(VisionIpcServer *v, cl_device_id device_id, cl_context ctx) {
void SpectraCamera::camera_open(VisionIpcServer *v) {
if (!openSensor()) {
return;
}
@@ -296,7 +296,7 @@ void SpectraCamera::camera_open(VisionIpcServer *v, cl_device_id device_id, cl_c
linkDevices();
LOGD("camera init %d", cc.camera_num);
buf.init(device_id, ctx, this, v, ife_buf_depth, cc.stream_type);
buf.init(this, v, ife_buf_depth, cc.stream_type);
camera_map_bufs();
clearAndRequeue(1);
}

View File

@@ -113,7 +113,7 @@ public:
SpectraCamera(SpectraMaster *master, const CameraConfig &config);
~SpectraCamera();
void camera_open(VisionIpcServer *v, cl_device_id device_id, cl_context ctx);
void camera_open(VisionIpcServer *v);
bool handle_camera_event(const cam_req_mgr_message *event_data);
void camera_close();
void camera_map_bufs();

View File

@@ -32,7 +32,7 @@ class Proc:
PROCS = [
Proc(['camerad'], 1.65, atol=0.4, msgs=['roadCameraState', 'wideRoadCameraState', 'driverCameraState']),
Proc(['modeld'], 1.24, atol=0.2, msgs=['modelV2']),
Proc(['modeld'], 1.5, atol=0.2, msgs=['modelV2']),
Proc(['dmonitoringmodeld'], 0.65, atol=0.35, msgs=['driverStateV2']),
Proc(['encoderd'], 0.23, msgs=[]),
]

View File

@@ -2,16 +2,13 @@ Import('env', 'arch', 'messaging', 'common', 'visionipc')
libs = [common, messaging, visionipc,
'avformat', 'avcodec', 'avutil',
'yuv', 'OpenCL', 'pthread', 'zstd']
'yuv', 'pthread', 'zstd']
src = ['logger.cc', 'zstd_writer.cc', 'video_writer.cc', 'encoder/encoder.cc', 'encoder/v4l_encoder.cc', 'encoder/jpeg_encoder.cc']
if arch != "larch64":
src += ['encoder/ffmpeg_encoder.cc']
if arch == "Darwin":
# fix OpenCL
del libs[libs.index('OpenCL')]
env['FRAMEWORKS'] = ['OpenCL']
# exclude v4l
del src[src.index('encoder/v4l_encoder.cc')]

File diff suppressed because it is too large Load Diff

View File

@@ -1,131 +0,0 @@
/**********************************************************************************
* Copyright (c) 2008-2015 The Khronos Group Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and/or associated documentation files (the
* "Materials"), to deal in the Materials without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Materials, and to
* permit persons to whom the Materials are furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Materials.
*
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
* https://www.khronos.org/registry/
*
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
**********************************************************************************/
/* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */
#ifndef __OPENCL_CL_D3D10_H
#define __OPENCL_CL_D3D10_H
#include <d3d10.h>
#include <CL/cl.h>
#include <CL/cl_platform.h>
#ifdef __cplusplus
extern "C" {
#endif
/******************************************************************************
* cl_khr_d3d10_sharing */
#define cl_khr_d3d10_sharing 1
typedef cl_uint cl_d3d10_device_source_khr;
typedef cl_uint cl_d3d10_device_set_khr;
/******************************************************************************/
/* Error Codes */
#define CL_INVALID_D3D10_DEVICE_KHR -1002
#define CL_INVALID_D3D10_RESOURCE_KHR -1003
#define CL_D3D10_RESOURCE_ALREADY_ACQUIRED_KHR -1004
#define CL_D3D10_RESOURCE_NOT_ACQUIRED_KHR -1005
/* cl_d3d10_device_source_nv */
#define CL_D3D10_DEVICE_KHR 0x4010
#define CL_D3D10_DXGI_ADAPTER_KHR 0x4011
/* cl_d3d10_device_set_nv */
#define CL_PREFERRED_DEVICES_FOR_D3D10_KHR 0x4012
#define CL_ALL_DEVICES_FOR_D3D10_KHR 0x4013
/* cl_context_info */
#define CL_CONTEXT_D3D10_DEVICE_KHR 0x4014
#define CL_CONTEXT_D3D10_PREFER_SHARED_RESOURCES_KHR 0x402C
/* cl_mem_info */
#define CL_MEM_D3D10_RESOURCE_KHR 0x4015
/* cl_image_info */
#define CL_IMAGE_D3D10_SUBRESOURCE_KHR 0x4016
/* cl_command_type */
#define CL_COMMAND_ACQUIRE_D3D10_OBJECTS_KHR 0x4017
#define CL_COMMAND_RELEASE_D3D10_OBJECTS_KHR 0x4018
/******************************************************************************/
typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetDeviceIDsFromD3D10KHR_fn)(
cl_platform_id platform,
cl_d3d10_device_source_khr d3d_device_source,
void * d3d_object,
cl_d3d10_device_set_khr d3d_device_set,
cl_uint num_entries,
cl_device_id * devices,
cl_uint * num_devices) CL_API_SUFFIX__VERSION_1_0;
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D10BufferKHR_fn)(
cl_context context,
cl_mem_flags flags,
ID3D10Buffer * resource,
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D10Texture2DKHR_fn)(
cl_context context,
cl_mem_flags flags,
ID3D10Texture2D * resource,
UINT subresource,
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D10Texture3DKHR_fn)(
cl_context context,
cl_mem_flags flags,
ID3D10Texture3D * resource,
UINT subresource,
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireD3D10ObjectsKHR_fn)(
cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem * mem_objects,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event) CL_API_SUFFIX__VERSION_1_0;
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseD3D10ObjectsKHR_fn)(
cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem * mem_objects,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event) CL_API_SUFFIX__VERSION_1_0;
#ifdef __cplusplus
}
#endif
#endif /* __OPENCL_CL_D3D10_H */

View File

@@ -1,131 +0,0 @@
/**********************************************************************************
* Copyright (c) 2008-2015 The Khronos Group Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and/or associated documentation files (the
* "Materials"), to deal in the Materials without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Materials, and to
* permit persons to whom the Materials are furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Materials.
*
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
* https://www.khronos.org/registry/
*
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
**********************************************************************************/
/* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */
#ifndef __OPENCL_CL_D3D11_H
#define __OPENCL_CL_D3D11_H
#include <d3d11.h>
#include <CL/cl.h>
#include <CL/cl_platform.h>
#ifdef __cplusplus
extern "C" {
#endif
/******************************************************************************
* cl_khr_d3d11_sharing */
#define cl_khr_d3d11_sharing 1
typedef cl_uint cl_d3d11_device_source_khr;
typedef cl_uint cl_d3d11_device_set_khr;
/******************************************************************************/
/* Error Codes */
#define CL_INVALID_D3D11_DEVICE_KHR -1006
#define CL_INVALID_D3D11_RESOURCE_KHR -1007
#define CL_D3D11_RESOURCE_ALREADY_ACQUIRED_KHR -1008
#define CL_D3D11_RESOURCE_NOT_ACQUIRED_KHR -1009
/* cl_d3d11_device_source */
#define CL_D3D11_DEVICE_KHR 0x4019
#define CL_D3D11_DXGI_ADAPTER_KHR 0x401A
/* cl_d3d11_device_set */
#define CL_PREFERRED_DEVICES_FOR_D3D11_KHR 0x401B
#define CL_ALL_DEVICES_FOR_D3D11_KHR 0x401C
/* cl_context_info */
#define CL_CONTEXT_D3D11_DEVICE_KHR 0x401D
#define CL_CONTEXT_D3D11_PREFER_SHARED_RESOURCES_KHR 0x402D
/* cl_mem_info */
#define CL_MEM_D3D11_RESOURCE_KHR 0x401E
/* cl_image_info */
#define CL_IMAGE_D3D11_SUBRESOURCE_KHR 0x401F
/* cl_command_type */
#define CL_COMMAND_ACQUIRE_D3D11_OBJECTS_KHR 0x4020
#define CL_COMMAND_RELEASE_D3D11_OBJECTS_KHR 0x4021
/******************************************************************************/
typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetDeviceIDsFromD3D11KHR_fn)(
cl_platform_id platform,
cl_d3d11_device_source_khr d3d_device_source,
void * d3d_object,
cl_d3d11_device_set_khr d3d_device_set,
cl_uint num_entries,
cl_device_id * devices,
cl_uint * num_devices) CL_API_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D11BufferKHR_fn)(
cl_context context,
cl_mem_flags flags,
ID3D11Buffer * resource,
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D11Texture2DKHR_fn)(
cl_context context,
cl_mem_flags flags,
ID3D11Texture2D * resource,
UINT subresource,
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D11Texture3DKHR_fn)(
cl_context context,
cl_mem_flags flags,
ID3D11Texture3D * resource,
UINT subresource,
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireD3D11ObjectsKHR_fn)(
cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem * mem_objects,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event) CL_API_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseD3D11ObjectsKHR_fn)(
cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem * mem_objects,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event) CL_API_SUFFIX__VERSION_1_2;
#ifdef __cplusplus
}
#endif
#endif /* __OPENCL_CL_D3D11_H */

View File

@@ -1,132 +0,0 @@
/**********************************************************************************
* Copyright (c) 2008-2015 The Khronos Group Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and/or associated documentation files (the
* "Materials"), to deal in the Materials without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Materials, and to
* permit persons to whom the Materials are furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Materials.
*
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
* https://www.khronos.org/registry/
*
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
**********************************************************************************/
/* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */
#ifndef __OPENCL_CL_DX9_MEDIA_SHARING_H
#define __OPENCL_CL_DX9_MEDIA_SHARING_H
#include <CL/cl.h>
#include <CL/cl_platform.h>
#ifdef __cplusplus
extern "C" {
#endif
/******************************************************************************/
/* cl_khr_dx9_media_sharing */
#define cl_khr_dx9_media_sharing 1
typedef cl_uint cl_dx9_media_adapter_type_khr;
typedef cl_uint cl_dx9_media_adapter_set_khr;
#if defined(_WIN32)
#include <d3d9.h>
typedef struct _cl_dx9_surface_info_khr
{
IDirect3DSurface9 *resource;
HANDLE shared_handle;
} cl_dx9_surface_info_khr;
#endif
/******************************************************************************/
/* Error Codes */
#define CL_INVALID_DX9_MEDIA_ADAPTER_KHR -1010
#define CL_INVALID_DX9_MEDIA_SURFACE_KHR -1011
#define CL_DX9_MEDIA_SURFACE_ALREADY_ACQUIRED_KHR -1012
#define CL_DX9_MEDIA_SURFACE_NOT_ACQUIRED_KHR -1013
/* cl_media_adapter_type_khr */
#define CL_ADAPTER_D3D9_KHR 0x2020
#define CL_ADAPTER_D3D9EX_KHR 0x2021
#define CL_ADAPTER_DXVA_KHR 0x2022
/* cl_media_adapter_set_khr */
#define CL_PREFERRED_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR 0x2023
#define CL_ALL_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR 0x2024
/* cl_context_info */
#define CL_CONTEXT_ADAPTER_D3D9_KHR 0x2025
#define CL_CONTEXT_ADAPTER_D3D9EX_KHR 0x2026
#define CL_CONTEXT_ADAPTER_DXVA_KHR 0x2027
/* cl_mem_info */
#define CL_MEM_DX9_MEDIA_ADAPTER_TYPE_KHR 0x2028
#define CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR 0x2029
/* cl_image_info */
#define CL_IMAGE_DX9_MEDIA_PLANE_KHR 0x202A
/* cl_command_type */
#define CL_COMMAND_ACQUIRE_DX9_MEDIA_SURFACES_KHR 0x202B
#define CL_COMMAND_RELEASE_DX9_MEDIA_SURFACES_KHR 0x202C
/******************************************************************************/
typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetDeviceIDsFromDX9MediaAdapterKHR_fn)(
cl_platform_id platform,
cl_uint num_media_adapters,
cl_dx9_media_adapter_type_khr * media_adapter_type,
void * media_adapters,
cl_dx9_media_adapter_set_khr media_adapter_set,
cl_uint num_entries,
cl_device_id * devices,
cl_uint * num_devices) CL_API_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromDX9MediaSurfaceKHR_fn)(
cl_context context,
cl_mem_flags flags,
cl_dx9_media_adapter_type_khr adapter_type,
void * surface_info,
cl_uint plane,
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireDX9MediaSurfacesKHR_fn)(
cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem * mem_objects,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event) CL_API_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseDX9MediaSurfacesKHR_fn)(
cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem * mem_objects,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event) CL_API_SUFFIX__VERSION_1_2;
#ifdef __cplusplus
}
#endif
#endif /* __OPENCL_CL_DX9_MEDIA_SHARING_H */

View File

@@ -1,136 +0,0 @@
/*******************************************************************************
* Copyright (c) 2008-2015 The Khronos Group Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and/or associated documentation files (the
* "Materials"), to deal in the Materials without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Materials, and to
* permit persons to whom the Materials are furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Materials.
*
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
* https://www.khronos.org/registry/
*
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
******************************************************************************/
#ifndef __OPENCL_CL_EGL_H
#define __OPENCL_CL_EGL_H
#ifdef __APPLE__
#else
#include <CL/cl.h>
#endif
#ifdef __cplusplus
extern "C" {
#endif
/* Command type for events created with clEnqueueAcquireEGLObjectsKHR */
#define CL_COMMAND_EGL_FENCE_SYNC_OBJECT_KHR 0x202F
#define CL_COMMAND_ACQUIRE_EGL_OBJECTS_KHR 0x202D
#define CL_COMMAND_RELEASE_EGL_OBJECTS_KHR 0x202E
/* Error type for clCreateFromEGLImageKHR */
#define CL_INVALID_EGL_OBJECT_KHR -1093
#define CL_EGL_RESOURCE_NOT_ACQUIRED_KHR -1092
/* CLeglImageKHR is an opaque handle to an EGLImage */
typedef void* CLeglImageKHR;
/* CLeglDisplayKHR is an opaque handle to an EGLDisplay */
typedef void* CLeglDisplayKHR;
/* CLeglSyncKHR is an opaque handle to an EGLSync object */
typedef void* CLeglSyncKHR;
/* properties passed to clCreateFromEGLImageKHR */
typedef intptr_t cl_egl_image_properties_khr;
#define cl_khr_egl_image 1
extern CL_API_ENTRY cl_mem CL_API_CALL
clCreateFromEGLImageKHR(cl_context /* context */,
CLeglDisplayKHR /* egldisplay */,
CLeglImageKHR /* eglimage */,
cl_mem_flags /* flags */,
const cl_egl_image_properties_khr * /* properties */,
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromEGLImageKHR_fn)(
cl_context context,
CLeglDisplayKHR egldisplay,
CLeglImageKHR eglimage,
cl_mem_flags flags,
const cl_egl_image_properties_khr * properties,
cl_int * errcode_ret);
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueAcquireEGLObjectsKHR(cl_command_queue /* command_queue */,
cl_uint /* num_objects */,
const cl_mem * /* mem_objects */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireEGLObjectsKHR_fn)(
cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem * mem_objects,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event);
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueReleaseEGLObjectsKHR(cl_command_queue /* command_queue */,
cl_uint /* num_objects */,
const cl_mem * /* mem_objects */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseEGLObjectsKHR_fn)(
cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem * mem_objects,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event);
#define cl_khr_egl_event 1
extern CL_API_ENTRY cl_event CL_API_CALL
clCreateEventFromEGLSyncKHR(cl_context /* context */,
CLeglSyncKHR /* sync */,
CLeglDisplayKHR /* display */,
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
typedef CL_API_ENTRY cl_event (CL_API_CALL *clCreateEventFromEGLSyncKHR_fn)(
cl_context context,
CLeglSyncKHR sync,
CLeglDisplayKHR display,
cl_int * errcode_ret);
#ifdef __cplusplus
}
#endif
#endif /* __OPENCL_CL_EGL_H */

View File

@@ -1,391 +0,0 @@
/*******************************************************************************
* Copyright (c) 2008-2015 The Khronos Group Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and/or associated documentation files (the
* "Materials"), to deal in the Materials without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Materials, and to
* permit persons to whom the Materials are furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Materials.
*
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
* https://www.khronos.org/registry/
*
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
******************************************************************************/
/* $Revision: 11928 $ on $Date: 2010-07-13 09:04:56 -0700 (Tue, 13 Jul 2010) $ */
/* cl_ext.h contains OpenCL extensions which don't have external */
/* (OpenGL, D3D) dependencies. */
#ifndef __CL_EXT_H
#define __CL_EXT_H
#ifdef __cplusplus
extern "C" {
#endif
#ifdef __APPLE__
#include <OpenCL/cl.h>
#include <AvailabilityMacros.h>
#else
#include <CL/cl.h>
#endif
/* cl_khr_fp16 extension - no extension #define since it has no functions */
#define CL_DEVICE_HALF_FP_CONFIG 0x1033
/* Memory object destruction
*
* Apple extension for use to manage externally allocated buffers used with cl_mem objects with CL_MEM_USE_HOST_PTR
*
* Registers a user callback function that will be called when the memory object is deleted and its resources
* freed. Each call to clSetMemObjectCallbackFn registers the specified user callback function on a callback
* stack associated with memobj. The registered user callback functions are called in the reverse order in
* which they were registered. The user callback functions are called and then the memory object is deleted
* and its resources freed. This provides a mechanism for the application (and libraries) using memobj to be
* notified when the memory referenced by host_ptr, specified when the memory object is created and used as
* the storage bits for the memory object, can be reused or freed.
*
* The application may not call CL api's with the cl_mem object passed to the pfn_notify.
*
* Please check for the "cl_APPLE_SetMemObjectDestructor" extension using clGetDeviceInfo(CL_DEVICE_EXTENSIONS)
* before using.
*/
#define cl_APPLE_SetMemObjectDestructor 1
cl_int CL_API_ENTRY clSetMemObjectDestructorAPPLE( cl_mem /* memobj */,
void (* /*pfn_notify*/)( cl_mem /* memobj */, void* /*user_data*/),
void * /*user_data */ ) CL_EXT_SUFFIX__VERSION_1_0;
/* Context Logging Functions
*
* The next three convenience functions are intended to be used as the pfn_notify parameter to clCreateContext().
* Please check for the "cl_APPLE_ContextLoggingFunctions" extension using clGetDeviceInfo(CL_DEVICE_EXTENSIONS)
* before using.
*
* clLogMessagesToSystemLog fowards on all log messages to the Apple System Logger
*/
#define cl_APPLE_ContextLoggingFunctions 1
extern void CL_API_ENTRY clLogMessagesToSystemLogAPPLE( const char * /* errstr */,
const void * /* private_info */,
size_t /* cb */,
void * /* user_data */ ) CL_EXT_SUFFIX__VERSION_1_0;
/* clLogMessagesToStdout sends all log messages to the file descriptor stdout */
extern void CL_API_ENTRY clLogMessagesToStdoutAPPLE( const char * /* errstr */,
const void * /* private_info */,
size_t /* cb */,
void * /* user_data */ ) CL_EXT_SUFFIX__VERSION_1_0;
/* clLogMessagesToStderr sends all log messages to the file descriptor stderr */
extern void CL_API_ENTRY clLogMessagesToStderrAPPLE( const char * /* errstr */,
const void * /* private_info */,
size_t /* cb */,
void * /* user_data */ ) CL_EXT_SUFFIX__VERSION_1_0;
/************************
* cl_khr_icd extension *
************************/
#define cl_khr_icd 1
/* cl_platform_info */
#define CL_PLATFORM_ICD_SUFFIX_KHR 0x0920
/* Additional Error Codes */
#define CL_PLATFORM_NOT_FOUND_KHR -1001
extern CL_API_ENTRY cl_int CL_API_CALL
clIcdGetPlatformIDsKHR(cl_uint /* num_entries */,
cl_platform_id * /* platforms */,
cl_uint * /* num_platforms */);
typedef CL_API_ENTRY cl_int (CL_API_CALL *clIcdGetPlatformIDsKHR_fn)(
cl_uint /* num_entries */,
cl_platform_id * /* platforms */,
cl_uint * /* num_platforms */);
/* Extension: cl_khr_image2D_buffer
*
* This extension allows a 2D image to be created from a cl_mem buffer without a copy.
* The type associated with a 2D image created from a buffer in an OpenCL program is image2d_t.
* Both the sampler and sampler-less read_image built-in functions are supported for 2D images
* and 2D images created from a buffer. Similarly, the write_image built-ins are also supported
* for 2D images created from a buffer.
*
* When the 2D image from buffer is created, the client must specify the width,
* height, image format (i.e. channel order and channel data type) and optionally the row pitch
*
* The pitch specified must be a multiple of CL_DEVICE_IMAGE_PITCH_ALIGNMENT pixels.
* The base address of the buffer must be aligned to CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT pixels.
*/
/*************************************
* cl_khr_initalize_memory extension *
*************************************/
#define CL_CONTEXT_MEMORY_INITIALIZE_KHR 0x2030
/**************************************
* cl_khr_terminate_context extension *
**************************************/
#define CL_DEVICE_TERMINATE_CAPABILITY_KHR 0x2031
#define CL_CONTEXT_TERMINATE_KHR 0x2032
#define cl_khr_terminate_context 1
extern CL_API_ENTRY cl_int CL_API_CALL clTerminateContextKHR(cl_context /* context */) CL_EXT_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_int (CL_API_CALL *clTerminateContextKHR_fn)(cl_context /* context */) CL_EXT_SUFFIX__VERSION_1_2;
/*
* Extension: cl_khr_spir
*
* This extension adds support to create an OpenCL program object from a
* Standard Portable Intermediate Representation (SPIR) instance
*/
#define CL_DEVICE_SPIR_VERSIONS 0x40E0
#define CL_PROGRAM_BINARY_TYPE_INTERMEDIATE 0x40E1
/******************************************
* cl_nv_device_attribute_query extension *
******************************************/
/* cl_nv_device_attribute_query extension - no extension #define since it has no functions */
#define CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV 0x4000
#define CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV 0x4001
#define CL_DEVICE_REGISTERS_PER_BLOCK_NV 0x4002
#define CL_DEVICE_WARP_SIZE_NV 0x4003
#define CL_DEVICE_GPU_OVERLAP_NV 0x4004
#define CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV 0x4005
#define CL_DEVICE_INTEGRATED_MEMORY_NV 0x4006
/*********************************
* cl_amd_device_attribute_query *
*********************************/
#define CL_DEVICE_PROFILING_TIMER_OFFSET_AMD 0x4036
/*********************************
* cl_arm_printf extension
*********************************/
#define CL_PRINTF_CALLBACK_ARM 0x40B0
#define CL_PRINTF_BUFFERSIZE_ARM 0x40B1
#ifdef CL_VERSION_1_1
/***********************************
* cl_ext_device_fission extension *
***********************************/
#define cl_ext_device_fission 1
extern CL_API_ENTRY cl_int CL_API_CALL
clReleaseDeviceEXT( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1;
typedef CL_API_ENTRY cl_int
(CL_API_CALL *clReleaseDeviceEXT_fn)( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1;
extern CL_API_ENTRY cl_int CL_API_CALL
clRetainDeviceEXT( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1;
typedef CL_API_ENTRY cl_int
(CL_API_CALL *clRetainDeviceEXT_fn)( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1;
typedef cl_ulong cl_device_partition_property_ext;
extern CL_API_ENTRY cl_int CL_API_CALL
clCreateSubDevicesEXT( cl_device_id /*in_device*/,
const cl_device_partition_property_ext * /* properties */,
cl_uint /*num_entries*/,
cl_device_id * /*out_devices*/,
cl_uint * /*num_devices*/ ) CL_EXT_SUFFIX__VERSION_1_1;
typedef CL_API_ENTRY cl_int
( CL_API_CALL * clCreateSubDevicesEXT_fn)( cl_device_id /*in_device*/,
const cl_device_partition_property_ext * /* properties */,
cl_uint /*num_entries*/,
cl_device_id * /*out_devices*/,
cl_uint * /*num_devices*/ ) CL_EXT_SUFFIX__VERSION_1_1;
/* cl_device_partition_property_ext */
#define CL_DEVICE_PARTITION_EQUALLY_EXT 0x4050
#define CL_DEVICE_PARTITION_BY_COUNTS_EXT 0x4051
#define CL_DEVICE_PARTITION_BY_NAMES_EXT 0x4052
#define CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN_EXT 0x4053
/* clDeviceGetInfo selectors */
#define CL_DEVICE_PARENT_DEVICE_EXT 0x4054
#define CL_DEVICE_PARTITION_TYPES_EXT 0x4055
#define CL_DEVICE_AFFINITY_DOMAINS_EXT 0x4056
#define CL_DEVICE_REFERENCE_COUNT_EXT 0x4057
#define CL_DEVICE_PARTITION_STYLE_EXT 0x4058
/* error codes */
#define CL_DEVICE_PARTITION_FAILED_EXT -1057
#define CL_INVALID_PARTITION_COUNT_EXT -1058
#define CL_INVALID_PARTITION_NAME_EXT -1059
/* CL_AFFINITY_DOMAINs */
#define CL_AFFINITY_DOMAIN_L1_CACHE_EXT 0x1
#define CL_AFFINITY_DOMAIN_L2_CACHE_EXT 0x2
#define CL_AFFINITY_DOMAIN_L3_CACHE_EXT 0x3
#define CL_AFFINITY_DOMAIN_L4_CACHE_EXT 0x4
#define CL_AFFINITY_DOMAIN_NUMA_EXT 0x10
#define CL_AFFINITY_DOMAIN_NEXT_FISSIONABLE_EXT 0x100
/* cl_device_partition_property_ext list terminators */
#define CL_PROPERTIES_LIST_END_EXT ((cl_device_partition_property_ext) 0)
#define CL_PARTITION_BY_COUNTS_LIST_END_EXT ((cl_device_partition_property_ext) 0)
#define CL_PARTITION_BY_NAMES_LIST_END_EXT ((cl_device_partition_property_ext) 0 - 1)
/*********************************
* cl_qcom_ext_host_ptr extension
*********************************/
#define CL_MEM_EXT_HOST_PTR_QCOM (1 << 29)
#define CL_DEVICE_EXT_MEM_PADDING_IN_BYTES_QCOM 0x40A0
#define CL_DEVICE_PAGE_SIZE_QCOM 0x40A1
#define CL_IMAGE_ROW_ALIGNMENT_QCOM 0x40A2
#define CL_IMAGE_SLICE_ALIGNMENT_QCOM 0x40A3
#define CL_MEM_HOST_UNCACHED_QCOM 0x40A4
#define CL_MEM_HOST_WRITEBACK_QCOM 0x40A5
#define CL_MEM_HOST_WRITETHROUGH_QCOM 0x40A6
#define CL_MEM_HOST_WRITE_COMBINING_QCOM 0x40A7
typedef cl_uint cl_image_pitch_info_qcom;
extern CL_API_ENTRY cl_int CL_API_CALL
clGetDeviceImageInfoQCOM(cl_device_id device,
size_t image_width,
size_t image_height,
const cl_image_format *image_format,
cl_image_pitch_info_qcom param_name,
size_t param_value_size,
void *param_value,
size_t *param_value_size_ret);
typedef struct _cl_mem_ext_host_ptr
{
/* Type of external memory allocation. */
/* Legal values will be defined in layered extensions. */
cl_uint allocation_type;
/* Host cache policy for this external memory allocation. */
cl_uint host_cache_policy;
} cl_mem_ext_host_ptr;
/*********************************
* cl_qcom_ion_host_ptr extension
*********************************/
#define CL_MEM_ION_HOST_PTR_QCOM 0x40A8
typedef struct _cl_mem_ion_host_ptr
{
/* Type of external memory allocation. */
/* Must be CL_MEM_ION_HOST_PTR_QCOM for ION allocations. */
cl_mem_ext_host_ptr ext_host_ptr;
/* ION file descriptor */
int ion_filedesc;
/* Host pointer to the ION allocated memory */
void* ion_hostptr;
} cl_mem_ion_host_ptr;
#endif /* CL_VERSION_1_1 */
#ifdef CL_VERSION_2_0
/*********************************
* cl_khr_sub_groups extension
*********************************/
#define cl_khr_sub_groups 1
typedef cl_uint cl_kernel_sub_group_info_khr;
/* cl_khr_sub_group_info */
#define CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR 0x2033
#define CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR 0x2034
extern CL_API_ENTRY cl_int CL_API_CALL
clGetKernelSubGroupInfoKHR(cl_kernel /* in_kernel */,
cl_device_id /*in_device*/,
cl_kernel_sub_group_info_khr /* param_name */,
size_t /*input_value_size*/,
const void * /*input_value*/,
size_t /*param_value_size*/,
void* /*param_value*/,
size_t* /*param_value_size_ret*/ ) CL_EXT_SUFFIX__VERSION_2_0_DEPRECATED;
typedef CL_API_ENTRY cl_int
( CL_API_CALL * clGetKernelSubGroupInfoKHR_fn)(cl_kernel /* in_kernel */,
cl_device_id /*in_device*/,
cl_kernel_sub_group_info_khr /* param_name */,
size_t /*input_value_size*/,
const void * /*input_value*/,
size_t /*param_value_size*/,
void* /*param_value*/,
size_t* /*param_value_size_ret*/ ) CL_EXT_SUFFIX__VERSION_2_0_DEPRECATED;
#endif /* CL_VERSION_2_0 */
#ifdef CL_VERSION_2_1
/*********************************
* cl_khr_priority_hints extension
*********************************/
#define cl_khr_priority_hints 1
typedef cl_uint cl_queue_priority_khr;
/* cl_command_queue_properties */
#define CL_QUEUE_PRIORITY_KHR 0x1096
/* cl_queue_priority_khr */
#define CL_QUEUE_PRIORITY_HIGH_KHR (1<<0)
#define CL_QUEUE_PRIORITY_MED_KHR (1<<1)
#define CL_QUEUE_PRIORITY_LOW_KHR (1<<2)
#endif /* CL_VERSION_2_1 */
#ifdef CL_VERSION_2_1
/*********************************
* cl_khr_throttle_hints extension
*********************************/
#define cl_khr_throttle_hints 1
typedef cl_uint cl_queue_throttle_khr;
/* cl_command_queue_properties */
#define CL_QUEUE_THROTTLE_KHR 0x1097
/* cl_queue_throttle_khr */
#define CL_QUEUE_THROTTLE_HIGH_KHR (1<<0)
#define CL_QUEUE_THROTTLE_MED_KHR (1<<1)
#define CL_QUEUE_THROTTLE_LOW_KHR (1<<2)
#endif /* CL_VERSION_2_1 */
#ifdef __cplusplus
}
#endif
#endif /* __CL_EXT_H */

View File

@@ -1,255 +0,0 @@
/* Copyright (c) 2009-2017 Qualcomm Technologies, Inc. All Rights Reserved.
* Qualcomm Technologies Proprietary and Confidential.
*/
#ifndef __OPENCL_CL_EXT_QCOM_H
#define __OPENCL_CL_EXT_QCOM_H
// Needed by cl_khr_egl_event extension
#include <EGL/egl.h>
#include <EGL/eglext.h>
#include <CL/cl_ext.h>
#ifdef __cplusplus
extern "C" {
#endif
/************************************
* cl_qcom_create_buffer_from_image *
************************************/
#define CL_BUFFER_FROM_IMAGE_ROW_PITCH_QCOM 0x40C0
#define CL_BUFFER_FROM_IMAGE_SLICE_PITCH_QCOM 0x40C1
extern CL_API_ENTRY cl_mem CL_API_CALL
clCreateBufferFromImageQCOM(cl_mem image,
cl_mem_flags flags,
cl_int *errcode_ret);
/************************************
* cl_qcom_limited_printf extension *
************************************/
/* Builtin printf function buffer size in bytes. */
#define CL_DEVICE_PRINTF_BUFFER_SIZE_QCOM 0x1049
/*************************************
* cl_qcom_extended_images extension *
*************************************/
#define CL_CONTEXT_ENABLE_EXTENDED_IMAGES_QCOM 0x40AA
#define CL_DEVICE_EXTENDED_IMAGE2D_MAX_WIDTH_QCOM 0x40AB
#define CL_DEVICE_EXTENDED_IMAGE2D_MAX_HEIGHT_QCOM 0x40AC
#define CL_DEVICE_EXTENDED_IMAGE3D_MAX_WIDTH_QCOM 0x40AD
#define CL_DEVICE_EXTENDED_IMAGE3D_MAX_HEIGHT_QCOM 0x40AE
#define CL_DEVICE_EXTENDED_IMAGE3D_MAX_DEPTH_QCOM 0x40AF
/*************************************
* cl_qcom_perf_hint extension *
*************************************/
typedef cl_uint cl_perf_hint;
#define CL_CONTEXT_PERF_HINT_QCOM 0x40C2
/*cl_perf_hint*/
#define CL_PERF_HINT_HIGH_QCOM 0x40C3
#define CL_PERF_HINT_NORMAL_QCOM 0x40C4
#define CL_PERF_HINT_LOW_QCOM 0x40C5
extern CL_API_ENTRY cl_int CL_API_CALL
clSetPerfHintQCOM(cl_context context,
cl_perf_hint perf_hint);
// This extension is published at Khronos, so its definitions are made in cl_ext.h.
// This duplication is for backward compatibility.
#ifndef CL_MEM_ANDROID_NATIVE_BUFFER_HOST_PTR_QCOM
/*********************************
* cl_qcom_android_native_buffer_host_ptr extension
*********************************/
#define CL_MEM_ANDROID_NATIVE_BUFFER_HOST_PTR_QCOM 0x40C6
typedef struct _cl_mem_android_native_buffer_host_ptr
{
// Type of external memory allocation.
// Must be CL_MEM_ANDROID_NATIVE_BUFFER_HOST_PTR_QCOM for Android native buffers.
cl_mem_ext_host_ptr ext_host_ptr;
// Virtual pointer to the android native buffer
void* anb_ptr;
} cl_mem_android_native_buffer_host_ptr;
#endif //#ifndef CL_MEM_ANDROID_NATIVE_BUFFER_HOST_PTR_QCOM
/***********************************
* cl_img_egl_image extension *
************************************/
typedef void* CLeglImageIMG;
typedef void* CLeglDisplayIMG;
extern CL_API_ENTRY cl_mem CL_API_CALL
clCreateFromEGLImageIMG(cl_context context,
cl_mem_flags flags,
CLeglImageIMG image,
CLeglDisplayIMG display,
cl_int *errcode_ret);
/*********************************
* cl_qcom_other_image extension
*********************************/
// Extended flag for creating/querying QCOM non-standard images
#define CL_MEM_OTHER_IMAGE_QCOM (1<<25)
// cl_channel_type
#define CL_QCOM_UNORM_MIPI10 0x4159
#define CL_QCOM_UNORM_MIPI12 0x415A
#define CL_QCOM_UNSIGNED_MIPI10 0x415B
#define CL_QCOM_UNSIGNED_MIPI12 0x415C
#define CL_QCOM_UNORM_INT10 0x415D
#define CL_QCOM_UNORM_INT12 0x415E
#define CL_QCOM_UNSIGNED_INT16 0x415F
// cl_channel_order
// Dedicate 0x4130-0x415F range for QCOM extended image formats
// 0x4130 - 0x4132 range is assigned to pixel-oriented compressed format
#define CL_QCOM_BAYER 0x414E
#define CL_QCOM_NV12 0x4133
#define CL_QCOM_NV12_Y 0x4134
#define CL_QCOM_NV12_UV 0x4135
#define CL_QCOM_TILED_NV12 0x4136
#define CL_QCOM_TILED_NV12_Y 0x4137
#define CL_QCOM_TILED_NV12_UV 0x4138
#define CL_QCOM_P010 0x413C
#define CL_QCOM_P010_Y 0x413D
#define CL_QCOM_P010_UV 0x413E
#define CL_QCOM_TILED_P010 0x413F
#define CL_QCOM_TILED_P010_Y 0x4140
#define CL_QCOM_TILED_P010_UV 0x4141
#define CL_QCOM_TP10 0x4145
#define CL_QCOM_TP10_Y 0x4146
#define CL_QCOM_TP10_UV 0x4147
#define CL_QCOM_TILED_TP10 0x4148
#define CL_QCOM_TILED_TP10_Y 0x4149
#define CL_QCOM_TILED_TP10_UV 0x414A
/*********************************
* cl_qcom_compressed_image extension
*********************************/
// Extended flag for creating/querying QCOM non-planar compressed images
#define CL_MEM_COMPRESSED_IMAGE_QCOM (1<<27)
// Extended image format
// cl_channel_order
#define CL_QCOM_COMPRESSED_RGBA 0x4130
#define CL_QCOM_COMPRESSED_RGBx 0x4131
#define CL_QCOM_COMPRESSED_NV12_Y 0x413A
#define CL_QCOM_COMPRESSED_NV12_UV 0x413B
#define CL_QCOM_COMPRESSED_P010 0x4142
#define CL_QCOM_COMPRESSED_P010_Y 0x4143
#define CL_QCOM_COMPRESSED_P010_UV 0x4144
#define CL_QCOM_COMPRESSED_TP10 0x414B
#define CL_QCOM_COMPRESSED_TP10_Y 0x414C
#define CL_QCOM_COMPRESSED_TP10_UV 0x414D
#define CL_QCOM_COMPRESSED_NV12_4R 0x414F
#define CL_QCOM_COMPRESSED_NV12_4R_Y 0x4150
#define CL_QCOM_COMPRESSED_NV12_4R_UV 0x4151
/*********************************
* cl_qcom_compressed_yuv_image_read extension
*********************************/
// Extended flag for creating/querying QCOM compressed images
#define CL_MEM_COMPRESSED_YUV_IMAGE_QCOM (1<<28)
// Extended image format
#define CL_QCOM_COMPRESSED_NV12 0x10C4
// Extended flag for setting ION buffer allocation type
#define CL_MEM_ION_HOST_PTR_COMPRESSED_YUV_QCOM 0x40CD
#define CL_MEM_ION_HOST_PTR_PROTECTED_COMPRESSED_YUV_QCOM 0x40CE
/*********************************
* cl_qcom_accelerated_image_ops
*********************************/
#define CL_MEM_OBJECT_WEIGHT_IMAGE_QCOM 0x4110
#define CL_DEVICE_HOF_MAX_NUM_PHASES_QCOM 0x4111
#define CL_DEVICE_HOF_MAX_FILTER_SIZE_X_QCOM 0x4112
#define CL_DEVICE_HOF_MAX_FILTER_SIZE_Y_QCOM 0x4113
#define CL_DEVICE_BLOCK_MATCHING_MAX_REGION_SIZE_X_QCOM 0x4114
#define CL_DEVICE_BLOCK_MATCHING_MAX_REGION_SIZE_Y_QCOM 0x4115
//Extended flag for specifying weight image type
#define CL_WEIGHT_IMAGE_SEPARABLE_QCOM (1<<0)
// Box Filter
typedef struct _cl_box_filter_size_qcom
{
// Width of box filter on X direction.
float box_filter_width;
// Height of box filter on Y direction.
float box_filter_height;
} cl_box_filter_size_qcom;
// HOF Weight Image Desc
typedef struct _cl_weight_desc_qcom
{
/** Coordinate of the "center" point of the weight image,
based on the weight image's top-left corner as the origin. */
size_t center_coord_x;
size_t center_coord_y;
cl_bitfield flags;
} cl_weight_desc_qcom;
typedef struct _cl_weight_image_desc_qcom
{
cl_image_desc image_desc;
cl_weight_desc_qcom weight_desc;
} cl_weight_image_desc_qcom;
/*************************************
* cl_qcom_protected_context extension *
*************************************/
#define CL_CONTEXT_PROTECTED_QCOM 0x40C7
#define CL_MEM_ION_HOST_PTR_PROTECTED_QCOM 0x40C8
/*************************************
* cl_qcom_priority_hint extension *
*************************************/
#define CL_PRIORITY_HINT_NONE_QCOM 0
typedef cl_uint cl_priority_hint;
#define CL_CONTEXT_PRIORITY_HINT_QCOM 0x40C9
/*cl_priority_hint*/
#define CL_PRIORITY_HINT_HIGH_QCOM 0x40CA
#define CL_PRIORITY_HINT_NORMAL_QCOM 0x40CB
#define CL_PRIORITY_HINT_LOW_QCOM 0x40CC
#ifdef __cplusplus
}
#endif
#endif /* __OPENCL_CL_EXT_QCOM_H */

View File

@@ -1,167 +0,0 @@
/**********************************************************************************
* Copyright (c) 2008-2015 The Khronos Group Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and/or associated documentation files (the
* "Materials"), to deal in the Materials without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Materials, and to
* permit persons to whom the Materials are furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Materials.
*
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
* https://www.khronos.org/registry/
*
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
**********************************************************************************/
#ifndef __OPENCL_CL_GL_H
#define __OPENCL_CL_GL_H
#ifdef __APPLE__
#include <OpenCL/cl.h>
#else
#include <CL/cl.h>
#endif
#ifdef __cplusplus
extern "C" {
#endif
typedef cl_uint cl_gl_object_type;
typedef cl_uint cl_gl_texture_info;
typedef cl_uint cl_gl_platform_info;
typedef struct __GLsync *cl_GLsync;
/* cl_gl_object_type = 0x2000 - 0x200F enum values are currently taken */
#define CL_GL_OBJECT_BUFFER 0x2000
#define CL_GL_OBJECT_TEXTURE2D 0x2001
#define CL_GL_OBJECT_TEXTURE3D 0x2002
#define CL_GL_OBJECT_RENDERBUFFER 0x2003
#define CL_GL_OBJECT_TEXTURE2D_ARRAY 0x200E
#define CL_GL_OBJECT_TEXTURE1D 0x200F
#define CL_GL_OBJECT_TEXTURE1D_ARRAY 0x2010
#define CL_GL_OBJECT_TEXTURE_BUFFER 0x2011
/* cl_gl_texture_info */
#define CL_GL_TEXTURE_TARGET 0x2004
#define CL_GL_MIPMAP_LEVEL 0x2005
#define CL_GL_NUM_SAMPLES 0x2012
extern CL_API_ENTRY cl_mem CL_API_CALL
clCreateFromGLBuffer(cl_context /* context */,
cl_mem_flags /* flags */,
cl_GLuint /* bufobj */,
int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_mem CL_API_CALL
clCreateFromGLTexture(cl_context /* context */,
cl_mem_flags /* flags */,
cl_GLenum /* target */,
cl_GLint /* miplevel */,
cl_GLuint /* texture */,
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2;
extern CL_API_ENTRY cl_mem CL_API_CALL
clCreateFromGLRenderbuffer(cl_context /* context */,
cl_mem_flags /* flags */,
cl_GLuint /* renderbuffer */,
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clGetGLObjectInfo(cl_mem /* memobj */,
cl_gl_object_type * /* gl_object_type */,
cl_GLuint * /* gl_object_name */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clGetGLTextureInfo(cl_mem /* memobj */,
cl_gl_texture_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueAcquireGLObjects(cl_command_queue /* command_queue */,
cl_uint /* num_objects */,
const cl_mem * /* mem_objects */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueReleaseGLObjects(cl_command_queue /* command_queue */,
cl_uint /* num_objects */,
const cl_mem * /* mem_objects */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
/* Deprecated OpenCL 1.1 APIs */
extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL
clCreateFromGLTexture2D(cl_context /* context */,
cl_mem_flags /* flags */,
cl_GLenum /* target */,
cl_GLint /* miplevel */,
cl_GLuint /* texture */,
cl_int * /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL
clCreateFromGLTexture3D(cl_context /* context */,
cl_mem_flags /* flags */,
cl_GLenum /* target */,
cl_GLint /* miplevel */,
cl_GLuint /* texture */,
cl_int * /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
/* cl_khr_gl_sharing extension */
#define cl_khr_gl_sharing 1
typedef cl_uint cl_gl_context_info;
/* Additional Error Codes */
#define CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR -1000
/* cl_gl_context_info */
#define CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR 0x2006
#define CL_DEVICES_FOR_GL_CONTEXT_KHR 0x2007
/* Additional cl_context_properties */
#define CL_GL_CONTEXT_KHR 0x2008
#define CL_EGL_DISPLAY_KHR 0x2009
#define CL_GLX_DISPLAY_KHR 0x200A
#define CL_WGL_HDC_KHR 0x200B
#define CL_CGL_SHAREGROUP_KHR 0x200C
extern CL_API_ENTRY cl_int CL_API_CALL
clGetGLContextInfoKHR(const cl_context_properties * /* properties */,
cl_gl_context_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetGLContextInfoKHR_fn)(
const cl_context_properties * properties,
cl_gl_context_info param_name,
size_t param_value_size,
void * param_value,
size_t * param_value_size_ret);
#ifdef __cplusplus
}
#endif
#endif /* __OPENCL_CL_GL_H */

View File

@@ -1,74 +0,0 @@
/**********************************************************************************
* Copyright (c) 2008-2015 The Khronos Group Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and/or associated documentation files (the
* "Materials"), to deal in the Materials without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Materials, and to
* permit persons to whom the Materials are furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Materials.
*
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
* https://www.khronos.org/registry/
*
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
**********************************************************************************/
/* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */
/* cl_gl_ext.h contains vendor (non-KHR) OpenCL extensions which have */
/* OpenGL dependencies. */
#ifndef __OPENCL_CL_GL_EXT_H
#define __OPENCL_CL_GL_EXT_H
#ifdef __cplusplus
extern "C" {
#endif
#ifdef __APPLE__
#include <OpenCL/cl_gl.h>
#else
#include <CL/cl_gl.h>
#endif
/*
* For each extension, follow this template
* cl_VEN_extname extension */
/* #define cl_VEN_extname 1
* ... define new types, if any
* ... define new tokens, if any
* ... define new APIs, if any
*
* If you need GLtypes here, mirror them with a cl_GLtype, rather than including a GL header
* This allows us to avoid having to decide whether to include GL headers or GLES here.
*/
/*
* cl_khr_gl_event extension
* See section 9.9 in the OpenCL 1.1 spec for more information
*/
#define CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR 0x200D
extern CL_API_ENTRY cl_event CL_API_CALL
clCreateEventFromGLsyncKHR(cl_context /* context */,
cl_GLsync /* cl_GLsync */,
cl_int * /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1;
#ifdef __cplusplus
}
#endif
#endif /* __OPENCL_CL_GL_EXT_H */

File diff suppressed because it is too large Load Diff

View File

@@ -1,59 +0,0 @@
/*******************************************************************************
* Copyright (c) 2008-2015 The Khronos Group Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and/or associated documentation files (the
* "Materials"), to deal in the Materials without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Materials, and to
* permit persons to whom the Materials are furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Materials.
*
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
* https://www.khronos.org/registry/
*
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
******************************************************************************/
/* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */
#ifndef __OPENCL_H
#define __OPENCL_H
#ifdef __cplusplus
extern "C" {
#endif
#ifdef __APPLE__
#include <OpenCL/cl.h>
#include <OpenCL/cl_gl.h>
#include <OpenCL/cl_gl_ext.h>
#include <OpenCL/cl_ext.h>
#else
#include <CL/cl.h>
#include <CL/cl_gl.h>
#include <CL/cl_gl_ext.h>
#include <CL/cl_ext.h>
#endif
#ifdef __cplusplus
}
#endif
#endif /* __OPENCL_H */

View File

@@ -57,11 +57,9 @@ base_frameworks = qt_env['FRAMEWORKS']
base_libs = [common, messaging, cereal, visionipc, 'm', 'ssl', 'crypto', 'pthread'] + qt_env["LIBS"]
if arch == "Darwin":
base_frameworks.append('OpenCL')
base_frameworks.append('QtCharts')
base_frameworks.append('QtSerialBus')
else:
base_libs.append('OpenCL')
base_libs.append('Qt5Charts')
base_libs.append('Qt5SerialBus')

View File

@@ -58,9 +58,6 @@ function install_ubuntu_common_requirements() {
libzmq3-dev \
libzstd-dev \
libsqlite3-dev \
opencl-headers \
ocl-icd-libopencl1 \
ocl-icd-opencl-dev \
portaudio19-dev \
qttools5-dev-tools \
libqt5svg5-dev \

View File

@@ -6,11 +6,6 @@ replay_env['CCFLAGS'] += ['-Wno-deprecated-declarations']
base_frameworks = []
base_libs = [common, messaging, cereal, visionipc, 'm', 'ssl', 'crypto', 'pthread']
if arch == "Darwin":
base_frameworks.append('OpenCL')
else:
base_libs.append('OpenCL')
replay_lib_src = ["replay.cc", "consoleui.cc", "camera.cc", "filereader.cc", "logreader.cc", "framereader.cc",
"route.cc", "util.cc", "seg_mgr.cc", "timeline.cc", "api.cc"]
if arch != "Darwin":

View File

@@ -10,10 +10,6 @@ What's needed:
## Setup openpilot
- Follow [this readme](../README.md) to install and build the requirements
- Install OpenCL Driver (Ubuntu)
```
sudo apt install pocl-opencl-icd
```
## Connect the hardware
- Connect the camera first