Reapply tgwarp w NV12 fix (#37168)
* Revert "Revert tgwarp again (#37161)"
This reverts commit 45099e7fcd.
* Weird uv sizes
* Fix interleaving
* Fix on CPU
* make CPU safe
* Prevent corruption without clone
* Claude knows speeed
* fix interleaving
* less kernels
* blob caching
* This is still slightly faster
* Comment for blob cache
This commit is contained in:
@@ -18,43 +18,6 @@ RUN /tmp/tools/install_ubuntu_dependencies.sh && \
|
||||
cd /usr/lib/gcc/arm-none-eabi/* && \
|
||||
rm -rf arm/ thumb/nofp thumb/v6* thumb/v8* thumb/v7+fp thumb/v7-r+fp.sp
|
||||
|
||||
# Add OpenCL
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
apt-utils \
|
||||
alien \
|
||||
unzip \
|
||||
tar \
|
||||
curl \
|
||||
xz-utils \
|
||||
dbus \
|
||||
gcc-arm-none-eabi \
|
||||
tmux \
|
||||
vim \
|
||||
libx11-6 \
|
||||
wget \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
RUN mkdir -p /tmp/opencl-driver-intel && \
|
||||
cd /tmp/opencl-driver-intel && \
|
||||
wget https://github.com/intel/llvm/releases/download/2024-WW14/oclcpuexp-2024.17.3.0.09_rel.tar.gz && \
|
||||
wget https://github.com/oneapi-src/oneTBB/releases/download/v2021.12.0/oneapi-tbb-2021.12.0-lin.tgz && \
|
||||
mkdir -p /opt/intel/oclcpuexp_2024.17.3.0.09_rel && \
|
||||
cd /opt/intel/oclcpuexp_2024.17.3.0.09_rel && \
|
||||
tar -zxvf /tmp/opencl-driver-intel/oclcpuexp-2024.17.3.0.09_rel.tar.gz && \
|
||||
mkdir -p /etc/OpenCL/vendors && \
|
||||
echo /opt/intel/oclcpuexp_2024.17.3.0.09_rel/x64/libintelocl.so > /etc/OpenCL/vendors/intel_expcpu.icd && \
|
||||
cd /opt/intel && \
|
||||
tar -zxvf /tmp/opencl-driver-intel/oneapi-tbb-2021.12.0-lin.tgz && \
|
||||
ln -s /opt/intel/oneapi-tbb-2021.12.0/lib/intel64/gcc4.8/libtbb.so /opt/intel/oclcpuexp_2024.17.3.0.09_rel/x64 && \
|
||||
ln -s /opt/intel/oneapi-tbb-2021.12.0/lib/intel64/gcc4.8/libtbbmalloc.so /opt/intel/oclcpuexp_2024.17.3.0.09_rel/x64 && \
|
||||
ln -s /opt/intel/oneapi-tbb-2021.12.0/lib/intel64/gcc4.8/libtbb.so.12 /opt/intel/oclcpuexp_2024.17.3.0.09_rel/x64 && \
|
||||
ln -s /opt/intel/oneapi-tbb-2021.12.0/lib/intel64/gcc4.8/libtbbmalloc.so.2 /opt/intel/oclcpuexp_2024.17.3.0.09_rel/x64 && \
|
||||
mkdir -p /etc/ld.so.conf.d && \
|
||||
echo /opt/intel/oclcpuexp_2024.17.3.0.09_rel/x64 > /etc/ld.so.conf.d/libintelopenclexp.conf && \
|
||||
ldconfig -f /etc/ld.so.conf.d/libintelopenclexp.conf && \
|
||||
cd / && \
|
||||
rm -rf /tmp/opencl-driver-intel
|
||||
|
||||
ENV NVIDIA_VISIBLE_DEVICES=all
|
||||
ENV NVIDIA_DRIVER_CAPABILITIES=graphics,utility,compute
|
||||
ENV QTWEBENGINE_DISABLE_SANDBOX=1
|
||||
|
||||
@@ -94,7 +94,6 @@ env = Environment(
|
||||
|
||||
# Arch-specific flags and paths
|
||||
if arch == "larch64":
|
||||
env.Append(CPPPATH=["#third_party/opencl/include"])
|
||||
env.Append(LIBPATH=[
|
||||
"/usr/local/lib",
|
||||
"/system/vendor/lib64",
|
||||
|
||||
@@ -5,7 +5,6 @@ common_libs = [
|
||||
'swaglog.cc',
|
||||
'util.cc',
|
||||
'ratekeeper.cc',
|
||||
'clutil.cc',
|
||||
]
|
||||
|
||||
_common = env.Library('common', common_libs, LIBS="json11")
|
||||
|
||||
@@ -1,98 +0,0 @@
|
||||
#include "common/clutil.h"
|
||||
|
||||
#include <cassert>
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
|
||||
#include "common/util.h"
|
||||
#include "common/swaglog.h"
|
||||
|
||||
namespace { // helper functions
|
||||
|
||||
template <typename Func, typename Id, typename Name>
|
||||
std::string get_info(Func get_info_func, Id id, Name param_name) {
|
||||
size_t size = 0;
|
||||
CL_CHECK(get_info_func(id, param_name, 0, NULL, &size));
|
||||
std::string info(size, '\0');
|
||||
CL_CHECK(get_info_func(id, param_name, size, info.data(), NULL));
|
||||
return info;
|
||||
}
|
||||
inline std::string get_platform_info(cl_platform_id id, cl_platform_info name) { return get_info(&clGetPlatformInfo, id, name); }
|
||||
inline std::string get_device_info(cl_device_id id, cl_device_info name) { return get_info(&clGetDeviceInfo, id, name); }
|
||||
|
||||
void cl_print_info(cl_platform_id platform, cl_device_id device) {
|
||||
size_t work_group_size = 0;
|
||||
cl_device_type device_type = 0;
|
||||
clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(work_group_size), &work_group_size, NULL);
|
||||
clGetDeviceInfo(device, CL_DEVICE_TYPE, sizeof(device_type), &device_type, NULL);
|
||||
const char *type_str = "Other...";
|
||||
switch (device_type) {
|
||||
case CL_DEVICE_TYPE_CPU: type_str ="CL_DEVICE_TYPE_CPU"; break;
|
||||
case CL_DEVICE_TYPE_GPU: type_str = "CL_DEVICE_TYPE_GPU"; break;
|
||||
case CL_DEVICE_TYPE_ACCELERATOR: type_str = "CL_DEVICE_TYPE_ACCELERATOR"; break;
|
||||
}
|
||||
|
||||
LOGD("vendor: %s", get_platform_info(platform, CL_PLATFORM_VENDOR).c_str());
|
||||
LOGD("platform version: %s", get_platform_info(platform, CL_PLATFORM_VERSION).c_str());
|
||||
LOGD("profile: %s", get_platform_info(platform, CL_PLATFORM_PROFILE).c_str());
|
||||
LOGD("extensions: %s", get_platform_info(platform, CL_PLATFORM_EXTENSIONS).c_str());
|
||||
LOGD("name: %s", get_device_info(device, CL_DEVICE_NAME).c_str());
|
||||
LOGD("device version: %s", get_device_info(device, CL_DEVICE_VERSION).c_str());
|
||||
LOGD("max work group size: %zu", work_group_size);
|
||||
LOGD("type = %d, %s", (int)device_type, type_str);
|
||||
}
|
||||
|
||||
void cl_print_build_errors(cl_program program, cl_device_id device) {
|
||||
cl_build_status status;
|
||||
clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_STATUS, sizeof(status), &status, NULL);
|
||||
size_t log_size;
|
||||
clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size);
|
||||
std::string log(log_size, '\0');
|
||||
clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, log_size, &log[0], NULL);
|
||||
|
||||
LOGE("build failed; status=%d, log: %s", status, log.c_str());
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
cl_device_id cl_get_device_id(cl_device_type device_type) {
|
||||
cl_uint num_platforms = 0;
|
||||
CL_CHECK(clGetPlatformIDs(0, NULL, &num_platforms));
|
||||
std::unique_ptr<cl_platform_id[]> platform_ids = std::make_unique<cl_platform_id[]>(num_platforms);
|
||||
CL_CHECK(clGetPlatformIDs(num_platforms, &platform_ids[0], NULL));
|
||||
|
||||
for (size_t i = 0; i < num_platforms; ++i) {
|
||||
LOGD("platform[%zu] CL_PLATFORM_NAME: %s", i, get_platform_info(platform_ids[i], CL_PLATFORM_NAME).c_str());
|
||||
|
||||
// Get first device
|
||||
if (cl_device_id device_id = NULL; clGetDeviceIDs(platform_ids[i], device_type, 1, &device_id, NULL) == 0 && device_id) {
|
||||
cl_print_info(platform_ids[i], device_id);
|
||||
return device_id;
|
||||
}
|
||||
}
|
||||
LOGE("No valid openCL platform found");
|
||||
assert(0);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
cl_context cl_create_context(cl_device_id device_id) {
|
||||
return CL_CHECK_ERR(clCreateContext(NULL, 1, &device_id, NULL, NULL, &err));
|
||||
}
|
||||
|
||||
void cl_release_context(cl_context context) {
|
||||
clReleaseContext(context);
|
||||
}
|
||||
|
||||
cl_program cl_program_from_file(cl_context ctx, cl_device_id device_id, const char* path, const char* args) {
|
||||
return cl_program_from_source(ctx, device_id, util::read_file(path), args);
|
||||
}
|
||||
|
||||
cl_program cl_program_from_source(cl_context ctx, cl_device_id device_id, const std::string& src, const char* args) {
|
||||
const char *csrc = src.c_str();
|
||||
cl_program prg = CL_CHECK_ERR(clCreateProgramWithSource(ctx, 1, &csrc, NULL, &err));
|
||||
if (int err = clBuildProgram(prg, 1, &device_id, args, NULL, NULL); err != 0) {
|
||||
cl_print_build_errors(prg, device_id);
|
||||
assert(0);
|
||||
}
|
||||
return prg;
|
||||
}
|
||||
@@ -1,28 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#ifdef __APPLE__
|
||||
#include <OpenCL/cl.h>
|
||||
#else
|
||||
#include <CL/cl.h>
|
||||
#endif
|
||||
|
||||
#include <string>
|
||||
|
||||
#define CL_CHECK(_expr) \
|
||||
do { \
|
||||
assert(CL_SUCCESS == (_expr)); \
|
||||
} while (0)
|
||||
|
||||
#define CL_CHECK_ERR(_expr) \
|
||||
({ \
|
||||
cl_int err = CL_INVALID_VALUE; \
|
||||
__typeof__(_expr) _ret = _expr; \
|
||||
assert(_ret&& err == CL_SUCCESS); \
|
||||
_ret; \
|
||||
})
|
||||
|
||||
cl_device_id cl_get_device_id(cl_device_type device_type);
|
||||
cl_context cl_create_context(cl_device_id device_id);
|
||||
void cl_release_context(cl_context context);
|
||||
cl_program cl_program_from_source(cl_context ctx, cl_device_id device_id, const std::string& src, const char* args = nullptr);
|
||||
cl_program cl_program_from_file(cl_context ctx, cl_device_id device_id, const char* path, const char* args);
|
||||
85
common/mat.h
85
common/mat.h
@@ -1,85 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
typedef struct vec3 {
|
||||
float v[3];
|
||||
} vec3;
|
||||
|
||||
typedef struct vec4 {
|
||||
float v[4];
|
||||
} vec4;
|
||||
|
||||
typedef struct mat3 {
|
||||
float v[3*3];
|
||||
} mat3;
|
||||
|
||||
typedef struct mat4 {
|
||||
float v[4*4];
|
||||
} mat4;
|
||||
|
||||
static inline mat3 matmul3(const mat3 &a, const mat3 &b) {
|
||||
mat3 ret = {{0.0}};
|
||||
for (int r=0; r<3; r++) {
|
||||
for (int c=0; c<3; c++) {
|
||||
float v = 0.0;
|
||||
for (int k=0; k<3; k++) {
|
||||
v += a.v[r*3+k] * b.v[k*3+c];
|
||||
}
|
||||
ret.v[r*3+c] = v;
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline vec3 matvecmul3(const mat3 &a, const vec3 &b) {
|
||||
vec3 ret = {{0.0}};
|
||||
for (int r=0; r<3; r++) {
|
||||
for (int c=0; c<3; c++) {
|
||||
ret.v[r] += a.v[r*3+c] * b.v[c];
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline mat4 matmul(const mat4 &a, const mat4 &b) {
|
||||
mat4 ret = {{0.0}};
|
||||
for (int r=0; r<4; r++) {
|
||||
for (int c=0; c<4; c++) {
|
||||
float v = 0.0;
|
||||
for (int k=0; k<4; k++) {
|
||||
v += a.v[r*4+k] * b.v[k*4+c];
|
||||
}
|
||||
ret.v[r*4+c] = v;
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline vec4 matvecmul(const mat4 &a, const vec4 &b) {
|
||||
vec4 ret = {{0.0}};
|
||||
for (int r=0; r<4; r++) {
|
||||
for (int c=0; c<4; c++) {
|
||||
ret.v[r] += a.v[r*4+c] * b.v[c];
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
// scales the input and output space of a transformation matrix
|
||||
// that assumes pixel-center origin.
|
||||
static inline mat3 transform_scale_buffer(const mat3 &in, float s) {
|
||||
// in_pt = ( transform(out_pt/s + 0.5) - 0.5) * s
|
||||
|
||||
mat3 transform_out = {{
|
||||
1.0f/s, 0.0f, 0.5f,
|
||||
0.0f, 1.0f/s, 0.5f,
|
||||
0.0f, 0.0f, 1.0f,
|
||||
}};
|
||||
|
||||
mat3 transform_in = {{
|
||||
s, 0.0f, -0.5f*s,
|
||||
0.0f, s, -0.5f*s,
|
||||
0.0f, 0.0f, 1.0f,
|
||||
}};
|
||||
|
||||
return matmul3(transform_in, matmul3(in, transform_out));
|
||||
}
|
||||
Submodule msgq_repo updated: 4c4e814ed5...2c191c1a72
@@ -1,35 +1,12 @@
|
||||
import os
|
||||
import glob
|
||||
|
||||
Import('env', 'envCython', 'arch', 'cereal', 'messaging', 'common', 'visionipc')
|
||||
Import('env', 'arch')
|
||||
lenv = env.Clone()
|
||||
lenvCython = envCython.Clone()
|
||||
|
||||
libs = [cereal, messaging, visionipc, common, 'capnp', 'kj', 'pthread']
|
||||
frameworks = []
|
||||
|
||||
common_src = [
|
||||
"models/commonmodel.cc",
|
||||
"transforms/loadyuv.cc",
|
||||
"transforms/transform.cc",
|
||||
]
|
||||
|
||||
# OpenCL is a framework on Mac
|
||||
if arch == "Darwin":
|
||||
frameworks += ['OpenCL']
|
||||
else:
|
||||
libs += ['OpenCL']
|
||||
|
||||
# Set path definitions
|
||||
for pathdef, fn in {'TRANSFORM': 'transforms/transform.cl', 'LOADYUV': 'transforms/loadyuv.cl'}.items():
|
||||
for xenv in (lenv, lenvCython):
|
||||
xenv['CXXFLAGS'].append(f'-D{pathdef}_PATH=\\"{File(fn).abspath}\\"')
|
||||
|
||||
# Compile cython
|
||||
cython_libs = envCython["LIBS"] + libs
|
||||
commonmodel_lib = lenv.Library('commonmodel', common_src)
|
||||
lenvCython.Program('models/commonmodel_pyx.so', 'models/commonmodel_pyx.pyx', LIBS=[commonmodel_lib, *cython_libs], FRAMEWORKS=frameworks)
|
||||
tinygrad_files = sorted(["#"+x for x in glob.glob(env.Dir("#tinygrad_repo").relpath + "/**", recursive=True, root_dir=env.Dir("#").abspath) if 'pycache' not in x])
|
||||
tinygrad_root = env.Dir("#").abspath
|
||||
tinygrad_files = ["#"+x for x in glob.glob(env.Dir("#tinygrad_repo").relpath + "/**", recursive=True, root_dir=tinygrad_root)
|
||||
if 'pycache' not in x and os.path.isfile(os.path.join(tinygrad_root, x))]
|
||||
|
||||
# Get model metadata
|
||||
for model_name in ['driving_vision', 'driving_policy', 'dmonitoring_model']:
|
||||
@@ -38,22 +15,36 @@ for model_name in ['driving_vision', 'driving_policy', 'dmonitoring_model']:
|
||||
cmd = f'python3 {Dir("#selfdrive/modeld").abspath}/get_model_metadata.py {fn}.onnx'
|
||||
lenv.Command(fn + "_metadata.pkl", [fn + ".onnx"] + tinygrad_files + script_files, cmd)
|
||||
|
||||
# compile warp
|
||||
# THREADS=0 is need to prevent bug: https://github.com/tinygrad/tinygrad/issues/14689
|
||||
tg_flags = {
|
||||
'larch64': 'DEV=QCOM FLOAT16=1 NOLOCALS=1 JIT_BATCH_SIZE=0',
|
||||
'Darwin': f'DEV=CPU THREADS=0 HOME={os.path.expanduser("~")}', # tinygrad calls brew which needs a $HOME in the env
|
||||
}.get(arch, 'DEV=CPU CPU_LLVM=1 THREADS=0')
|
||||
image_flag = {
|
||||
'larch64': 'IMAGE=2',
|
||||
}.get(arch, 'IMAGE=0')
|
||||
script_files = [File(Dir("#selfdrive/modeld").File("compile_warp.py").abspath)]
|
||||
cmd = f'{tg_flags} python3 {Dir("#selfdrive/modeld").abspath}/compile_warp.py '
|
||||
from openpilot.common.transformations.camera import _ar_ox_fisheye, _os_fisheye
|
||||
warp_targets = []
|
||||
for cam in [_ar_ox_fisheye, _os_fisheye]:
|
||||
w, h = cam.width, cam.height
|
||||
warp_targets += [File(f"models/warp_{w}x{h}_tinygrad.pkl").abspath, File(f"models/dm_warp_{w}x{h}_tinygrad.pkl").abspath]
|
||||
lenv.Command(warp_targets, tinygrad_files + script_files, cmd)
|
||||
|
||||
def tg_compile(flags, model_name):
|
||||
pythonpath_string = 'PYTHONPATH="${PYTHONPATH}:' + env.Dir("#tinygrad_repo").abspath + '"'
|
||||
fn = File(f"models/{model_name}").abspath
|
||||
return lenv.Command(
|
||||
fn + "_tinygrad.pkl",
|
||||
[fn + ".onnx"] + tinygrad_files,
|
||||
f'{pythonpath_string} {flags} python3 {Dir("#tinygrad_repo").abspath}/examples/openpilot/compile3.py {fn}.onnx {fn}_tinygrad.pkl'
|
||||
f'{pythonpath_string} {flags} {image_flag} python3 {Dir("#tinygrad_repo").abspath}/examples/openpilot/compile3.py {fn}.onnx {fn}_tinygrad.pkl'
|
||||
)
|
||||
|
||||
# Compile small models
|
||||
for model_name in ['driving_vision', 'driving_policy', 'dmonitoring_model']:
|
||||
flags = {
|
||||
'larch64': 'DEV=QCOM FLOAT16=1 NOLOCALS=1 IMAGE=2 JIT_BATCH_SIZE=0',
|
||||
'Darwin': f'DEV=CPU HOME={os.path.expanduser("~")}', # tinygrad calls brew which needs a $HOME in the env
|
||||
}.get(arch, 'DEV=CPU CPU_LLVM=1')
|
||||
tg_compile(flags, model_name)
|
||||
tg_compile(tg_flags, model_name)
|
||||
|
||||
# Compile BIG model if USB GPU is available
|
||||
if "USBGPU" in os.environ:
|
||||
|
||||
209
selfdrive/modeld/compile_warp.py
Executable file
209
selfdrive/modeld/compile_warp.py
Executable file
@@ -0,0 +1,209 @@
|
||||
#!/usr/bin/env python3
|
||||
import time
|
||||
import pickle
|
||||
import numpy as np
|
||||
from pathlib import Path
|
||||
from tinygrad.tensor import Tensor
|
||||
from tinygrad.helpers import Context
|
||||
from tinygrad.device import Device
|
||||
from tinygrad.engine.jit import TinyJit
|
||||
|
||||
from openpilot.system.camerad.cameras.nv12_info import get_nv12_info
|
||||
from openpilot.common.transformations.model import MEDMODEL_INPUT_SIZE, DM_INPUT_SIZE
|
||||
from openpilot.common.transformations.camera import _ar_ox_fisheye, _os_fisheye
|
||||
|
||||
MODELS_DIR = Path(__file__).parent / 'models'
|
||||
|
||||
CAMERA_CONFIGS = [
|
||||
(_ar_ox_fisheye.width, _ar_ox_fisheye.height), # tici: 1928x1208
|
||||
(_os_fisheye.width, _os_fisheye.height), # mici: 1344x760
|
||||
]
|
||||
|
||||
UV_SCALE_MATRIX = np.array([[0.5, 0, 0], [0, 0.5, 0], [0, 0, 1]], dtype=np.float32)
|
||||
UV_SCALE_MATRIX_INV = np.linalg.inv(UV_SCALE_MATRIX)
|
||||
|
||||
IMG_BUFFER_SHAPE = (30, MEDMODEL_INPUT_SIZE[1] // 2, MEDMODEL_INPUT_SIZE[0] // 2)
|
||||
|
||||
|
||||
def warp_pkl_path(w, h):
|
||||
return MODELS_DIR / f'warp_{w}x{h}_tinygrad.pkl'
|
||||
|
||||
|
||||
def dm_warp_pkl_path(w, h):
|
||||
return MODELS_DIR / f'dm_warp_{w}x{h}_tinygrad.pkl'
|
||||
|
||||
|
||||
def warp_perspective_tinygrad(src_flat, M_inv, dst_shape, src_shape, stride_pad):
|
||||
w_dst, h_dst = dst_shape
|
||||
h_src, w_src = src_shape
|
||||
|
||||
x = Tensor.arange(w_dst).reshape(1, w_dst).expand(h_dst, w_dst).reshape(-1)
|
||||
y = Tensor.arange(h_dst).reshape(h_dst, 1).expand(h_dst, w_dst).reshape(-1)
|
||||
|
||||
# inline 3x3 matmul as elementwise to avoid reduce op (enables fusion with gather)
|
||||
src_x = M_inv[0, 0] * x + M_inv[0, 1] * y + M_inv[0, 2]
|
||||
src_y = M_inv[1, 0] * x + M_inv[1, 1] * y + M_inv[1, 2]
|
||||
src_w = M_inv[2, 0] * x + M_inv[2, 1] * y + M_inv[2, 2]
|
||||
|
||||
src_x = src_x / src_w
|
||||
src_y = src_y / src_w
|
||||
|
||||
x_nn_clipped = Tensor.round(src_x).clip(0, w_src - 1).cast('int')
|
||||
y_nn_clipped = Tensor.round(src_y).clip(0, h_src - 1).cast('int')
|
||||
idx = y_nn_clipped * (w_src + stride_pad) + x_nn_clipped
|
||||
|
||||
return src_flat[idx]
|
||||
|
||||
|
||||
def frames_to_tensor(frames, model_w, model_h):
|
||||
H = (frames.shape[0] * 2) // 3
|
||||
W = frames.shape[1]
|
||||
in_img1 = Tensor.cat(frames[0:H:2, 0::2],
|
||||
frames[1:H:2, 0::2],
|
||||
frames[0:H:2, 1::2],
|
||||
frames[1:H:2, 1::2],
|
||||
frames[H:H+H//4].reshape((H//2, W//2)),
|
||||
frames[H+H//4:H+H//2].reshape((H//2, W//2)), dim=0).reshape((6, H//2, W//2))
|
||||
return in_img1
|
||||
|
||||
|
||||
def make_frame_prepare(cam_w, cam_h, model_w, model_h):
|
||||
stride, y_height, uv_height, _ = get_nv12_info(cam_w, cam_h)
|
||||
uv_offset = stride * y_height
|
||||
stride_pad = stride - cam_w
|
||||
|
||||
def frame_prepare_tinygrad(input_frame, M_inv):
|
||||
# UV_SCALE @ M_inv @ UV_SCALE_INV simplifies to elementwise scaling
|
||||
M_inv_uv = M_inv * Tensor([[1.0, 1.0, 0.5], [1.0, 1.0, 0.5], [2.0, 2.0, 1.0]])
|
||||
# deinterleave NV12 UV plane (UVUV... -> separate U, V)
|
||||
uv = input_frame[uv_offset:uv_offset + uv_height * stride].reshape(uv_height, stride)
|
||||
with Context(SPLIT_REDUCEOP=0):
|
||||
y = warp_perspective_tinygrad(input_frame[:cam_h*stride],
|
||||
M_inv, (model_w, model_h),
|
||||
(cam_h, cam_w), stride_pad).realize()
|
||||
u = warp_perspective_tinygrad(uv[:cam_h//2, :cam_w:2].flatten(),
|
||||
M_inv_uv, (model_w//2, model_h//2),
|
||||
(cam_h//2, cam_w//2), 0).realize()
|
||||
v = warp_perspective_tinygrad(uv[:cam_h//2, 1:cam_w:2].flatten(),
|
||||
M_inv_uv, (model_w//2, model_h//2),
|
||||
(cam_h//2, cam_w//2), 0).realize()
|
||||
yuv = y.cat(u).cat(v).reshape((model_h * 3 // 2, model_w))
|
||||
tensor = frames_to_tensor(yuv, model_w, model_h)
|
||||
return tensor
|
||||
return frame_prepare_tinygrad
|
||||
|
||||
|
||||
def make_update_img_input(frame_prepare, model_w, model_h):
|
||||
def update_img_input_tinygrad(tensor, frame, M_inv):
|
||||
M_inv = M_inv.to(Device.DEFAULT)
|
||||
new_img = frame_prepare(frame, M_inv)
|
||||
full_buffer = tensor[6:].cat(new_img, dim=0).contiguous()
|
||||
return full_buffer, Tensor.cat(full_buffer[:6], full_buffer[-6:], dim=0).contiguous().reshape(1, 12, model_h//2, model_w//2)
|
||||
return update_img_input_tinygrad
|
||||
|
||||
|
||||
def make_update_both_imgs(frame_prepare, model_w, model_h):
|
||||
update_img = make_update_img_input(frame_prepare, model_w, model_h)
|
||||
|
||||
def update_both_imgs_tinygrad(calib_img_buffer, new_img, M_inv,
|
||||
calib_big_img_buffer, new_big_img, M_inv_big):
|
||||
calib_img_buffer, calib_img_pair = update_img(calib_img_buffer, new_img, M_inv)
|
||||
calib_big_img_buffer, calib_big_img_pair = update_img(calib_big_img_buffer, new_big_img, M_inv_big)
|
||||
return calib_img_buffer, calib_img_pair, calib_big_img_buffer, calib_big_img_pair
|
||||
return update_both_imgs_tinygrad
|
||||
|
||||
|
||||
def make_warp_dm(cam_w, cam_h, dm_w, dm_h):
|
||||
stride, y_height, _, _ = get_nv12_info(cam_w, cam_h)
|
||||
stride_pad = stride - cam_w
|
||||
|
||||
def warp_dm(input_frame, M_inv):
|
||||
M_inv = M_inv.to(Device.DEFAULT)
|
||||
result = warp_perspective_tinygrad(input_frame[:cam_h*stride], M_inv, (dm_w, dm_h), (cam_h, cam_w), stride_pad).reshape(-1, dm_h * dm_w)
|
||||
return result
|
||||
return warp_dm
|
||||
|
||||
|
||||
def compile_modeld_warp(cam_w, cam_h):
|
||||
model_w, model_h = MEDMODEL_INPUT_SIZE
|
||||
_, _, _, yuv_size = get_nv12_info(cam_w, cam_h)
|
||||
|
||||
print(f"Compiling modeld warp for {cam_w}x{cam_h}...")
|
||||
|
||||
frame_prepare = make_frame_prepare(cam_w, cam_h, model_w, model_h)
|
||||
update_both_imgs = make_update_both_imgs(frame_prepare, model_w, model_h)
|
||||
update_img_jit = TinyJit(update_both_imgs, prune=True)
|
||||
|
||||
full_buffer = Tensor.zeros(IMG_BUFFER_SHAPE, dtype='uint8').contiguous().realize()
|
||||
big_full_buffer = Tensor.zeros(IMG_BUFFER_SHAPE, dtype='uint8').contiguous().realize()
|
||||
full_buffer_np = np.zeros(IMG_BUFFER_SHAPE, dtype=np.uint8)
|
||||
big_full_buffer_np = np.zeros(IMG_BUFFER_SHAPE, dtype=np.uint8)
|
||||
|
||||
for i in range(10):
|
||||
new_frame_np = (32 * np.random.randn(yuv_size).astype(np.float32) + 128).clip(0, 255).astype(np.uint8)
|
||||
img_inputs = [full_buffer,
|
||||
Tensor.from_blob(new_frame_np.ctypes.data, (yuv_size,), dtype='uint8').realize(),
|
||||
Tensor(Tensor.randn(3, 3).mul(8).realize().numpy(), device='NPY')]
|
||||
new_big_frame_np = (32 * np.random.randn(yuv_size).astype(np.float32) + 128).clip(0, 255).astype(np.uint8)
|
||||
big_img_inputs = [big_full_buffer,
|
||||
Tensor.from_blob(new_big_frame_np.ctypes.data, (yuv_size,), dtype='uint8').realize(),
|
||||
Tensor(Tensor.randn(3, 3).mul(8).realize().numpy(), device='NPY')]
|
||||
inputs = img_inputs + big_img_inputs
|
||||
Device.default.synchronize()
|
||||
|
||||
inputs_np = [x.numpy() for x in inputs]
|
||||
inputs_np[0] = full_buffer_np
|
||||
inputs_np[3] = big_full_buffer_np
|
||||
|
||||
st = time.perf_counter()
|
||||
out = update_img_jit(*inputs)
|
||||
full_buffer = out[0].contiguous().realize().clone()
|
||||
big_full_buffer = out[2].contiguous().realize().clone()
|
||||
mt = time.perf_counter()
|
||||
Device.default.synchronize()
|
||||
et = time.perf_counter()
|
||||
print(f" [{i+1}/10] enqueue {(mt-st)*1e3:6.2f} ms -- total {(et-st)*1e3:6.2f} ms")
|
||||
|
||||
pkl_path = warp_pkl_path(cam_w, cam_h)
|
||||
with open(pkl_path, "wb") as f:
|
||||
pickle.dump(update_img_jit, f)
|
||||
print(f" Saved to {pkl_path}")
|
||||
|
||||
jit = pickle.load(open(pkl_path, "rb"))
|
||||
jit(*inputs)
|
||||
|
||||
|
||||
def compile_dm_warp(cam_w, cam_h):
|
||||
dm_w, dm_h = DM_INPUT_SIZE
|
||||
_, _, _, yuv_size = get_nv12_info(cam_w, cam_h)
|
||||
|
||||
print(f"Compiling DM warp for {cam_w}x{cam_h}...")
|
||||
|
||||
warp_dm = make_warp_dm(cam_w, cam_h, dm_w, dm_h)
|
||||
warp_dm_jit = TinyJit(warp_dm, prune=True)
|
||||
|
||||
for i in range(10):
|
||||
inputs = [Tensor.from_blob((32 * Tensor.randn(yuv_size,) + 128).cast(dtype='uint8').realize().numpy().ctypes.data, (yuv_size,), dtype='uint8'),
|
||||
Tensor(Tensor.randn(3, 3).mul(8).realize().numpy(), device='NPY')]
|
||||
Device.default.synchronize()
|
||||
st = time.perf_counter()
|
||||
warp_dm_jit(*inputs)
|
||||
mt = time.perf_counter()
|
||||
Device.default.synchronize()
|
||||
et = time.perf_counter()
|
||||
print(f" [{i+1}/10] enqueue {(mt-st)*1e3:6.2f} ms -- total {(et-st)*1e3:6.2f} ms")
|
||||
|
||||
pkl_path = dm_warp_pkl_path(cam_w, cam_h)
|
||||
with open(pkl_path, "wb") as f:
|
||||
pickle.dump(warp_dm_jit, f)
|
||||
print(f" Saved to {pkl_path}")
|
||||
|
||||
|
||||
def run_and_save_pickle():
|
||||
for cam_w, cam_h in CAMERA_CONFIGS:
|
||||
compile_modeld_warp(cam_w, cam_h)
|
||||
compile_dm_warp(cam_w, cam_h)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
run_and_save_pickle()
|
||||
@@ -3,7 +3,6 @@ import os
|
||||
from openpilot.system.hardware import TICI
|
||||
os.environ['DEV'] = 'QCOM' if TICI else 'CPU'
|
||||
from tinygrad.tensor import Tensor
|
||||
from tinygrad.dtype import dtypes
|
||||
import time
|
||||
import pickle
|
||||
import numpy as np
|
||||
@@ -16,32 +15,35 @@ from openpilot.common.swaglog import cloudlog
|
||||
from openpilot.common.realtime import config_realtime_process
|
||||
from openpilot.common.transformations.model import dmonitoringmodel_intrinsics
|
||||
from openpilot.common.transformations.camera import _ar_ox_fisheye, _os_fisheye
|
||||
from openpilot.selfdrive.modeld.models.commonmodel_pyx import CLContext, MonitoringModelFrame
|
||||
from openpilot.system.camerad.cameras.nv12_info import get_nv12_info
|
||||
from openpilot.selfdrive.modeld.parse_model_outputs import sigmoid, safe_exp
|
||||
from openpilot.selfdrive.modeld.runners.tinygrad_helpers import qcom_tensor_from_opencl_address
|
||||
|
||||
PROCESS_NAME = "selfdrive.modeld.dmonitoringmodeld"
|
||||
SEND_RAW_PRED = os.getenv('SEND_RAW_PRED')
|
||||
MODEL_PKL_PATH = Path(__file__).parent / 'models/dmonitoring_model_tinygrad.pkl'
|
||||
METADATA_PATH = Path(__file__).parent / 'models/dmonitoring_model_metadata.pkl'
|
||||
|
||||
MODELS_DIR = Path(__file__).parent / 'models'
|
||||
|
||||
class ModelState:
|
||||
inputs: dict[str, np.ndarray]
|
||||
output: np.ndarray
|
||||
|
||||
def __init__(self, cl_ctx):
|
||||
def __init__(self):
|
||||
with open(METADATA_PATH, 'rb') as f:
|
||||
model_metadata = pickle.load(f)
|
||||
self.input_shapes = model_metadata['input_shapes']
|
||||
self.output_slices = model_metadata['output_slices']
|
||||
|
||||
self.frame = MonitoringModelFrame(cl_ctx)
|
||||
self.numpy_inputs = {
|
||||
'calib': np.zeros(self.input_shapes['calib'], dtype=np.float32),
|
||||
}
|
||||
|
||||
self.warp_inputs_np = {'transform': np.zeros((3,3), dtype=np.float32)}
|
||||
self.warp_inputs = {k: Tensor(v, device='NPY') for k,v in self.warp_inputs_np.items()}
|
||||
self.frame_buf_params = None
|
||||
self.tensor_inputs = {k: Tensor(v, device='NPY').realize() for k,v in self.numpy_inputs.items()}
|
||||
self._blob_cache : dict[int, Tensor] = {}
|
||||
self.image_warp = None
|
||||
with open(MODEL_PKL_PATH, "rb") as f:
|
||||
self.model_run = pickle.load(f)
|
||||
|
||||
@@ -50,16 +52,20 @@ class ModelState:
|
||||
|
||||
t1 = time.perf_counter()
|
||||
|
||||
input_img_cl = self.frame.prepare(buf, transform.flatten())
|
||||
if TICI:
|
||||
# The imgs tensors are backed by opencl memory, only need init once
|
||||
if 'input_img' not in self.tensor_inputs:
|
||||
self.tensor_inputs['input_img'] = qcom_tensor_from_opencl_address(input_img_cl.mem_address, self.input_shapes['input_img'], dtype=dtypes.uint8)
|
||||
else:
|
||||
self.tensor_inputs['input_img'] = Tensor(self.frame.buffer_from_cl(input_img_cl).reshape(self.input_shapes['input_img']), dtype=dtypes.uint8).realize()
|
||||
if self.image_warp is None:
|
||||
self.frame_buf_params = get_nv12_info(buf.width, buf.height)
|
||||
warp_path = MODELS_DIR / f'dm_warp_{buf.width}x{buf.height}_tinygrad.pkl'
|
||||
with open(warp_path, "rb") as f:
|
||||
self.image_warp = pickle.load(f)
|
||||
ptr = buf.data.ctypes.data
|
||||
# There is a ringbuffer of imgs, just cache tensors pointing to all of them
|
||||
if ptr not in self._blob_cache:
|
||||
self._blob_cache[ptr] = Tensor.from_blob(ptr, (self.frame_buf_params[3],), dtype='uint8')
|
||||
|
||||
self.warp_inputs_np['transform'][:] = transform[:]
|
||||
self.tensor_inputs['input_img'] = self.image_warp(self._blob_cache[ptr], self.warp_inputs['transform']).realize()
|
||||
|
||||
output = self.model_run(**self.tensor_inputs).numpy().flatten()
|
||||
output = self.model_run(**self.tensor_inputs).contiguous().realize().uop.base.buffer.numpy().flatten()
|
||||
|
||||
t2 = time.perf_counter()
|
||||
return output, t2 - t1
|
||||
@@ -107,12 +113,11 @@ def get_driverstate_packet(model_output, frame_id: int, location_ts: int, exec_t
|
||||
def main():
|
||||
config_realtime_process(7, 5)
|
||||
|
||||
cl_context = CLContext()
|
||||
model = ModelState(cl_context)
|
||||
model = ModelState()
|
||||
cloudlog.warning("models loaded, dmonitoringmodeld starting")
|
||||
|
||||
cloudlog.warning("connecting to driver stream")
|
||||
vipc_client = VisionIpcClient("camerad", VisionStreamType.VISION_STREAM_DRIVER, True, cl_context)
|
||||
vipc_client = VisionIpcClient("camerad", VisionStreamType.VISION_STREAM_DRIVER, True)
|
||||
while not vipc_client.connect(False):
|
||||
time.sleep(0.1)
|
||||
assert vipc_client.is_connected()
|
||||
|
||||
@@ -7,7 +7,6 @@ if USBGPU:
|
||||
os.environ['DEV'] = 'AMD'
|
||||
os.environ['AMD_IFACE'] = 'USB'
|
||||
from tinygrad.tensor import Tensor
|
||||
from tinygrad.dtype import dtypes
|
||||
import time
|
||||
import pickle
|
||||
import numpy as np
|
||||
@@ -22,14 +21,13 @@ from openpilot.common.params import Params
|
||||
from openpilot.common.filter_simple import FirstOrderFilter
|
||||
from openpilot.common.realtime import config_realtime_process, DT_MDL
|
||||
from openpilot.common.transformations.camera import DEVICE_CAMERAS
|
||||
from openpilot.system.camerad.cameras.nv12_info import get_nv12_info
|
||||
from openpilot.common.transformations.model import get_warp_matrix
|
||||
from openpilot.selfdrive.controls.lib.desire_helper import DesireHelper
|
||||
from openpilot.selfdrive.controls.lib.drive_helpers import get_accel_from_plan, smooth_value, get_curvature_from_plan
|
||||
from openpilot.selfdrive.modeld.parse_model_outputs import Parser
|
||||
from openpilot.selfdrive.modeld.fill_model_msg import fill_model_msg, fill_pose_msg, PublishState
|
||||
from openpilot.selfdrive.modeld.constants import ModelConstants, Plan
|
||||
from openpilot.selfdrive.modeld.models.commonmodel_pyx import DrivingModelFrame, CLContext
|
||||
from openpilot.selfdrive.modeld.runners.tinygrad_helpers import qcom_tensor_from_opencl_address
|
||||
|
||||
|
||||
PROCESS_NAME = "selfdrive.modeld.modeld"
|
||||
@@ -39,11 +37,15 @@ VISION_PKL_PATH = Path(__file__).parent / 'models/driving_vision_tinygrad.pkl'
|
||||
POLICY_PKL_PATH = Path(__file__).parent / 'models/driving_policy_tinygrad.pkl'
|
||||
VISION_METADATA_PATH = Path(__file__).parent / 'models/driving_vision_metadata.pkl'
|
||||
POLICY_METADATA_PATH = Path(__file__).parent / 'models/driving_policy_metadata.pkl'
|
||||
MODELS_DIR = Path(__file__).parent / 'models'
|
||||
|
||||
LAT_SMOOTH_SECONDS = 0.0
|
||||
LONG_SMOOTH_SECONDS = 0.3
|
||||
MIN_LAT_CONTROL_SPEED = 0.3
|
||||
|
||||
IMG_QUEUE_SHAPE = (6*(ModelConstants.MODEL_RUN_FREQ//ModelConstants.MODEL_CONTEXT_FREQ + 1), 128, 256)
|
||||
assert IMG_QUEUE_SHAPE[0] == 30
|
||||
|
||||
|
||||
def get_action_from_model(model_output: dict[str, np.ndarray], prev_action: log.ModelDataV2.Action,
|
||||
lat_action_t: float, long_action_t: float, v_ego: float) -> log.ModelDataV2.Action:
|
||||
@@ -136,12 +138,11 @@ class InputQueues:
|
||||
return out
|
||||
|
||||
class ModelState:
|
||||
frames: dict[str, DrivingModelFrame]
|
||||
inputs: dict[str, np.ndarray]
|
||||
output: np.ndarray
|
||||
prev_desire: np.ndarray # for tracking the rising edge of the pulse
|
||||
|
||||
def __init__(self, context: CLContext):
|
||||
def __init__(self):
|
||||
with open(VISION_METADATA_PATH, 'rb') as f:
|
||||
vision_metadata = pickle.load(f)
|
||||
self.vision_input_shapes = vision_metadata['input_shapes']
|
||||
@@ -155,7 +156,6 @@ class ModelState:
|
||||
self.policy_output_slices = policy_metadata['output_slices']
|
||||
policy_output_size = policy_metadata['output_shapes']['outputs'][1]
|
||||
|
||||
self.frames = {name: DrivingModelFrame(context, ModelConstants.MODEL_RUN_FREQ//ModelConstants.MODEL_CONTEXT_FREQ) for name in self.vision_input_names}
|
||||
self.prev_desire = np.zeros(ModelConstants.DESIRE_LEN, dtype=np.float32)
|
||||
|
||||
# policy inputs
|
||||
@@ -165,13 +165,18 @@ class ModelState:
|
||||
self.full_input_queues.update_dtypes_and_shapes({k: self.numpy_inputs[k].dtype}, {k: self.numpy_inputs[k].shape})
|
||||
self.full_input_queues.reset()
|
||||
|
||||
# img buffers are managed in openCL transform code
|
||||
self.vision_inputs: dict[str, Tensor] = {}
|
||||
self.img_queues = {'img': Tensor.zeros(IMG_QUEUE_SHAPE, dtype='uint8').contiguous().realize(),
|
||||
'big_img': Tensor.zeros(IMG_QUEUE_SHAPE, dtype='uint8').contiguous().realize()}
|
||||
self.full_frames : dict[str, Tensor] = {}
|
||||
self._blob_cache : dict[int, Tensor] = {}
|
||||
self.transforms_np = {k: np.zeros((3,3), dtype=np.float32) for k in self.img_queues}
|
||||
self.transforms = {k: Tensor(v, device='NPY').realize() for k, v in self.transforms_np.items()}
|
||||
self.vision_output = np.zeros(vision_output_size, dtype=np.float32)
|
||||
self.policy_inputs = {k: Tensor(v, device='NPY').realize() for k,v in self.numpy_inputs.items()}
|
||||
self.policy_output = np.zeros(policy_output_size, dtype=np.float32)
|
||||
self.parser = Parser()
|
||||
|
||||
self.frame_buf_params : dict[str, tuple[int, int, int, int]] = {}
|
||||
self.update_imgs = None
|
||||
with open(VISION_PKL_PATH, "rb") as f:
|
||||
self.vision_run = pickle.load(f)
|
||||
|
||||
@@ -188,23 +193,33 @@ class ModelState:
|
||||
inputs['desire_pulse'][0] = 0
|
||||
new_desire = np.where(inputs['desire_pulse'] - self.prev_desire > .99, inputs['desire_pulse'], 0)
|
||||
self.prev_desire[:] = inputs['desire_pulse']
|
||||
if self.update_imgs is None:
|
||||
for key in bufs.keys():
|
||||
w, h = bufs[key].width, bufs[key].height
|
||||
self.frame_buf_params[key] = get_nv12_info(w, h)
|
||||
warp_path = MODELS_DIR / f'warp_{w}x{h}_tinygrad.pkl'
|
||||
with open(warp_path, "rb") as f:
|
||||
self.update_imgs = pickle.load(f)
|
||||
|
||||
imgs_cl = {name: self.frames[name].prepare(bufs[name], transforms[name].flatten()) for name in self.vision_input_names}
|
||||
for key in bufs.keys():
|
||||
ptr = bufs[key].data.ctypes.data
|
||||
yuv_size = self.frame_buf_params[key][3]
|
||||
# There is a ringbuffer of imgs, just cache tensors pointing to all of them
|
||||
if ptr not in self._blob_cache:
|
||||
self._blob_cache[ptr] = Tensor.from_blob(ptr, (yuv_size,), dtype='uint8')
|
||||
self.full_frames[key] = self._blob_cache[ptr]
|
||||
for key in bufs.keys():
|
||||
self.transforms_np[key][:,:] = transforms[key][:,:]
|
||||
|
||||
if TICI and not USBGPU:
|
||||
# The imgs tensors are backed by opencl memory, only need init once
|
||||
for key in imgs_cl:
|
||||
if key not in self.vision_inputs:
|
||||
self.vision_inputs[key] = qcom_tensor_from_opencl_address(imgs_cl[key].mem_address, self.vision_input_shapes[key], dtype=dtypes.uint8)
|
||||
else:
|
||||
for key in imgs_cl:
|
||||
frame_input = self.frames[key].buffer_from_cl(imgs_cl[key]).reshape(self.vision_input_shapes[key])
|
||||
self.vision_inputs[key] = Tensor(frame_input, dtype=dtypes.uint8).realize()
|
||||
out = self.update_imgs(self.img_queues['img'], self.full_frames['img'], self.transforms['img'],
|
||||
self.img_queues['big_img'], self.full_frames['big_img'], self.transforms['big_img'])
|
||||
self.img_queues['img'], self.img_queues['big_img'] = out[0].realize(), out[2].realize()
|
||||
vision_inputs = {'img': out[1], 'big_img': out[3]}
|
||||
|
||||
if prepare_only:
|
||||
return None
|
||||
|
||||
self.vision_output = self.vision_run(**self.vision_inputs).contiguous().realize().uop.base.buffer.numpy()
|
||||
self.vision_output = self.vision_run(**vision_inputs).contiguous().realize().uop.base.buffer.numpy().flatten()
|
||||
vision_outputs_dict = self.parser.parse_vision_outputs(self.slice_outputs(self.vision_output, self.vision_output_slices))
|
||||
|
||||
self.full_input_queues.enqueue({'features_buffer': vision_outputs_dict['hidden_state'], 'desire_pulse': new_desire})
|
||||
@@ -212,9 +227,8 @@ class ModelState:
|
||||
self.numpy_inputs[k][:] = self.full_input_queues.get(k)[k]
|
||||
self.numpy_inputs['traffic_convention'][:] = inputs['traffic_convention']
|
||||
|
||||
self.policy_output = self.policy_run(**self.policy_inputs).numpy().flatten()
|
||||
self.policy_output = self.policy_run(**self.policy_inputs).contiguous().realize().uop.base.buffer.numpy().flatten()
|
||||
policy_outputs_dict = self.parser.parse_policy_outputs(self.slice_outputs(self.policy_output, self.policy_output_slices))
|
||||
|
||||
combined_outputs_dict = {**vision_outputs_dict, **policy_outputs_dict}
|
||||
if SEND_RAW_PRED:
|
||||
combined_outputs_dict['raw_pred'] = np.concatenate([self.vision_output.copy(), self.policy_output.copy()])
|
||||
@@ -231,10 +245,8 @@ def main(demo=False):
|
||||
config_realtime_process(7, 54)
|
||||
|
||||
st = time.monotonic()
|
||||
cloudlog.warning("setting up CL context")
|
||||
cl_context = CLContext()
|
||||
cloudlog.warning("CL context ready; loading model")
|
||||
model = ModelState(cl_context)
|
||||
cloudlog.warning("loading model")
|
||||
model = ModelState()
|
||||
cloudlog.warning(f"models loaded in {time.monotonic() - st:.1f}s, modeld starting")
|
||||
|
||||
# visionipc clients
|
||||
@@ -247,8 +259,8 @@ def main(demo=False):
|
||||
time.sleep(.1)
|
||||
|
||||
vipc_client_main_stream = VisionStreamType.VISION_STREAM_WIDE_ROAD if main_wide_camera else VisionStreamType.VISION_STREAM_ROAD
|
||||
vipc_client_main = VisionIpcClient("camerad", vipc_client_main_stream, True, cl_context)
|
||||
vipc_client_extra = VisionIpcClient("camerad", VisionStreamType.VISION_STREAM_WIDE_ROAD, False, cl_context)
|
||||
vipc_client_main = VisionIpcClient("camerad", vipc_client_main_stream, True)
|
||||
vipc_client_extra = VisionIpcClient("camerad", VisionStreamType.VISION_STREAM_WIDE_ROAD, False)
|
||||
cloudlog.warning(f"vision stream set up, main_wide_camera: {main_wide_camera}, use_extra_client: {use_extra_client}")
|
||||
|
||||
while not vipc_client_main.connect(False):
|
||||
|
||||
@@ -1,64 +0,0 @@
|
||||
#include "selfdrive/modeld/models/commonmodel.h"
|
||||
|
||||
#include <cmath>
|
||||
#include <cstring>
|
||||
|
||||
#include "common/clutil.h"
|
||||
|
||||
DrivingModelFrame::DrivingModelFrame(cl_device_id device_id, cl_context context, int _temporal_skip) : ModelFrame(device_id, context) {
|
||||
input_frames = std::make_unique<uint8_t[]>(buf_size);
|
||||
temporal_skip = _temporal_skip;
|
||||
input_frames_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, buf_size, NULL, &err));
|
||||
img_buffer_20hz_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, (temporal_skip+1)*frame_size_bytes, NULL, &err));
|
||||
region.origin = temporal_skip * frame_size_bytes;
|
||||
region.size = frame_size_bytes;
|
||||
last_img_cl = CL_CHECK_ERR(clCreateSubBuffer(img_buffer_20hz_cl, CL_MEM_READ_WRITE, CL_BUFFER_CREATE_TYPE_REGION, ®ion, &err));
|
||||
|
||||
loadyuv_init(&loadyuv, context, device_id, MODEL_WIDTH, MODEL_HEIGHT);
|
||||
init_transform(device_id, context, MODEL_WIDTH, MODEL_HEIGHT);
|
||||
}
|
||||
|
||||
cl_mem* DrivingModelFrame::prepare(cl_mem yuv_cl, int frame_width, int frame_height, int frame_stride, int frame_uv_offset, const mat3& projection) {
|
||||
run_transform(yuv_cl, MODEL_WIDTH, MODEL_HEIGHT, frame_width, frame_height, frame_stride, frame_uv_offset, projection);
|
||||
|
||||
for (int i = 0; i < temporal_skip; i++) {
|
||||
CL_CHECK(clEnqueueCopyBuffer(q, img_buffer_20hz_cl, img_buffer_20hz_cl, (i+1)*frame_size_bytes, i*frame_size_bytes, frame_size_bytes, 0, nullptr, nullptr));
|
||||
}
|
||||
loadyuv_queue(&loadyuv, q, y_cl, u_cl, v_cl, last_img_cl);
|
||||
|
||||
copy_queue(&loadyuv, q, img_buffer_20hz_cl, input_frames_cl, 0, 0, frame_size_bytes);
|
||||
copy_queue(&loadyuv, q, last_img_cl, input_frames_cl, 0, frame_size_bytes, frame_size_bytes);
|
||||
|
||||
// NOTE: Since thneed is using a different command queue, this clFinish is needed to ensure the image is ready.
|
||||
clFinish(q);
|
||||
return &input_frames_cl;
|
||||
}
|
||||
|
||||
DrivingModelFrame::~DrivingModelFrame() {
|
||||
deinit_transform();
|
||||
loadyuv_destroy(&loadyuv);
|
||||
CL_CHECK(clReleaseMemObject(input_frames_cl));
|
||||
CL_CHECK(clReleaseMemObject(img_buffer_20hz_cl));
|
||||
CL_CHECK(clReleaseMemObject(last_img_cl));
|
||||
CL_CHECK(clReleaseCommandQueue(q));
|
||||
}
|
||||
|
||||
|
||||
MonitoringModelFrame::MonitoringModelFrame(cl_device_id device_id, cl_context context) : ModelFrame(device_id, context) {
|
||||
input_frames = std::make_unique<uint8_t[]>(buf_size);
|
||||
input_frame_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, buf_size, NULL, &err));
|
||||
|
||||
init_transform(device_id, context, MODEL_WIDTH, MODEL_HEIGHT);
|
||||
}
|
||||
|
||||
cl_mem* MonitoringModelFrame::prepare(cl_mem yuv_cl, int frame_width, int frame_height, int frame_stride, int frame_uv_offset, const mat3& projection) {
|
||||
run_transform(yuv_cl, MODEL_WIDTH, MODEL_HEIGHT, frame_width, frame_height, frame_stride, frame_uv_offset, projection);
|
||||
clFinish(q);
|
||||
return &y_cl;
|
||||
}
|
||||
|
||||
MonitoringModelFrame::~MonitoringModelFrame() {
|
||||
deinit_transform();
|
||||
CL_CHECK(clReleaseMemObject(input_frame_cl));
|
||||
CL_CHECK(clReleaseCommandQueue(q));
|
||||
}
|
||||
@@ -1,97 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <cfloat>
|
||||
#include <cstdlib>
|
||||
#include <cassert>
|
||||
|
||||
#include <memory>
|
||||
|
||||
#define CL_USE_DEPRECATED_OPENCL_1_2_APIS
|
||||
#ifdef __APPLE__
|
||||
#include <OpenCL/cl.h>
|
||||
#else
|
||||
#include <CL/cl.h>
|
||||
#endif
|
||||
|
||||
#include "common/mat.h"
|
||||
#include "selfdrive/modeld/transforms/loadyuv.h"
|
||||
#include "selfdrive/modeld/transforms/transform.h"
|
||||
|
||||
class ModelFrame {
|
||||
public:
|
||||
ModelFrame(cl_device_id device_id, cl_context context) {
|
||||
q = CL_CHECK_ERR(clCreateCommandQueue(context, device_id, 0, &err));
|
||||
}
|
||||
virtual ~ModelFrame() {}
|
||||
virtual cl_mem* prepare(cl_mem yuv_cl, int frame_width, int frame_height, int frame_stride, int frame_uv_offset, const mat3& projection) { return NULL; }
|
||||
uint8_t* buffer_from_cl(cl_mem *in_frames, int buffer_size) {
|
||||
CL_CHECK(clEnqueueReadBuffer(q, *in_frames, CL_TRUE, 0, buffer_size, input_frames.get(), 0, nullptr, nullptr));
|
||||
clFinish(q);
|
||||
return &input_frames[0];
|
||||
}
|
||||
|
||||
int MODEL_WIDTH;
|
||||
int MODEL_HEIGHT;
|
||||
int MODEL_FRAME_SIZE;
|
||||
int buf_size;
|
||||
|
||||
protected:
|
||||
cl_mem y_cl, u_cl, v_cl;
|
||||
Transform transform;
|
||||
cl_command_queue q;
|
||||
std::unique_ptr<uint8_t[]> input_frames;
|
||||
|
||||
void init_transform(cl_device_id device_id, cl_context context, int model_width, int model_height) {
|
||||
y_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, model_width * model_height, NULL, &err));
|
||||
u_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, (model_width / 2) * (model_height / 2), NULL, &err));
|
||||
v_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, (model_width / 2) * (model_height / 2), NULL, &err));
|
||||
transform_init(&transform, context, device_id);
|
||||
}
|
||||
|
||||
void deinit_transform() {
|
||||
transform_destroy(&transform);
|
||||
CL_CHECK(clReleaseMemObject(v_cl));
|
||||
CL_CHECK(clReleaseMemObject(u_cl));
|
||||
CL_CHECK(clReleaseMemObject(y_cl));
|
||||
}
|
||||
|
||||
void run_transform(cl_mem yuv_cl, int model_width, int model_height, int frame_width, int frame_height, int frame_stride, int frame_uv_offset, const mat3& projection) {
|
||||
transform_queue(&transform, q,
|
||||
yuv_cl, frame_width, frame_height, frame_stride, frame_uv_offset,
|
||||
y_cl, u_cl, v_cl, model_width, model_height, projection);
|
||||
}
|
||||
};
|
||||
|
||||
class DrivingModelFrame : public ModelFrame {
|
||||
public:
|
||||
DrivingModelFrame(cl_device_id device_id, cl_context context, int _temporal_skip);
|
||||
~DrivingModelFrame();
|
||||
cl_mem* prepare(cl_mem yuv_cl, int frame_width, int frame_height, int frame_stride, int frame_uv_offset, const mat3& projection);
|
||||
|
||||
const int MODEL_WIDTH = 512;
|
||||
const int MODEL_HEIGHT = 256;
|
||||
const int MODEL_FRAME_SIZE = MODEL_WIDTH * MODEL_HEIGHT * 3 / 2;
|
||||
const int buf_size = MODEL_FRAME_SIZE * 2; // 2 frames are temporal_skip frames apart
|
||||
const size_t frame_size_bytes = MODEL_FRAME_SIZE * sizeof(uint8_t);
|
||||
|
||||
private:
|
||||
LoadYUVState loadyuv;
|
||||
cl_mem img_buffer_20hz_cl, last_img_cl, input_frames_cl;
|
||||
cl_buffer_region region;
|
||||
int temporal_skip;
|
||||
};
|
||||
|
||||
class MonitoringModelFrame : public ModelFrame {
|
||||
public:
|
||||
MonitoringModelFrame(cl_device_id device_id, cl_context context);
|
||||
~MonitoringModelFrame();
|
||||
cl_mem* prepare(cl_mem yuv_cl, int frame_width, int frame_height, int frame_stride, int frame_uv_offset, const mat3& projection);
|
||||
|
||||
const int MODEL_WIDTH = 1440;
|
||||
const int MODEL_HEIGHT = 960;
|
||||
const int MODEL_FRAME_SIZE = MODEL_WIDTH * MODEL_HEIGHT;
|
||||
const int buf_size = MODEL_FRAME_SIZE;
|
||||
|
||||
private:
|
||||
cl_mem input_frame_cl;
|
||||
};
|
||||
@@ -1,27 +0,0 @@
|
||||
# distutils: language = c++
|
||||
|
||||
from msgq.visionipc.visionipc cimport cl_device_id, cl_context, cl_mem
|
||||
|
||||
cdef extern from "common/mat.h":
|
||||
cdef struct mat3:
|
||||
float v[9]
|
||||
|
||||
cdef extern from "common/clutil.h":
|
||||
cdef unsigned long CL_DEVICE_TYPE_DEFAULT
|
||||
cl_device_id cl_get_device_id(unsigned long)
|
||||
cl_context cl_create_context(cl_device_id)
|
||||
void cl_release_context(cl_context)
|
||||
|
||||
cdef extern from "selfdrive/modeld/models/commonmodel.h":
|
||||
cppclass ModelFrame:
|
||||
int buf_size
|
||||
unsigned char * buffer_from_cl(cl_mem*, int);
|
||||
cl_mem * prepare(cl_mem, int, int, int, int, mat3)
|
||||
|
||||
cppclass DrivingModelFrame:
|
||||
int buf_size
|
||||
DrivingModelFrame(cl_device_id, cl_context, int)
|
||||
|
||||
cppclass MonitoringModelFrame:
|
||||
int buf_size
|
||||
MonitoringModelFrame(cl_device_id, cl_context)
|
||||
@@ -1,13 +0,0 @@
|
||||
# distutils: language = c++
|
||||
|
||||
from msgq.visionipc.visionipc cimport cl_mem
|
||||
from msgq.visionipc.visionipc_pyx cimport CLContext as BaseCLContext
|
||||
|
||||
cdef class CLContext(BaseCLContext):
|
||||
pass
|
||||
|
||||
cdef class CLMem:
|
||||
cdef cl_mem * mem
|
||||
|
||||
@staticmethod
|
||||
cdef create(void*)
|
||||
@@ -1,74 +0,0 @@
|
||||
# distutils: language = c++
|
||||
# cython: c_string_encoding=ascii, language_level=3
|
||||
|
||||
import numpy as np
|
||||
cimport numpy as cnp
|
||||
from libc.string cimport memcpy
|
||||
from libc.stdint cimport uintptr_t
|
||||
|
||||
from msgq.visionipc.visionipc cimport cl_mem
|
||||
from msgq.visionipc.visionipc_pyx cimport VisionBuf, CLContext as BaseCLContext
|
||||
from .commonmodel cimport CL_DEVICE_TYPE_DEFAULT, cl_get_device_id, cl_create_context, cl_release_context
|
||||
from .commonmodel cimport mat3, ModelFrame as cppModelFrame, DrivingModelFrame as cppDrivingModelFrame, MonitoringModelFrame as cppMonitoringModelFrame
|
||||
|
||||
|
||||
cdef class CLContext(BaseCLContext):
|
||||
def __cinit__(self):
|
||||
self.device_id = cl_get_device_id(CL_DEVICE_TYPE_DEFAULT)
|
||||
self.context = cl_create_context(self.device_id)
|
||||
|
||||
def __dealloc__(self):
|
||||
if self.context:
|
||||
cl_release_context(self.context)
|
||||
|
||||
cdef class CLMem:
|
||||
@staticmethod
|
||||
cdef create(void * cmem):
|
||||
mem = CLMem()
|
||||
mem.mem = <cl_mem*> cmem
|
||||
return mem
|
||||
|
||||
@property
|
||||
def mem_address(self):
|
||||
return <uintptr_t>(self.mem)
|
||||
|
||||
def cl_from_visionbuf(VisionBuf buf):
|
||||
return CLMem.create(<void*>&buf.buf.buf_cl)
|
||||
|
||||
|
||||
cdef class ModelFrame:
|
||||
cdef cppModelFrame * frame
|
||||
cdef int buf_size
|
||||
|
||||
def __dealloc__(self):
|
||||
del self.frame
|
||||
|
||||
def prepare(self, VisionBuf buf, float[:] projection):
|
||||
cdef mat3 cprojection
|
||||
memcpy(cprojection.v, &projection[0], 9*sizeof(float))
|
||||
cdef cl_mem * data
|
||||
data = self.frame.prepare(buf.buf.buf_cl, buf.width, buf.height, buf.stride, buf.uv_offset, cprojection)
|
||||
return CLMem.create(data)
|
||||
|
||||
def buffer_from_cl(self, CLMem in_frames):
|
||||
cdef unsigned char * data2
|
||||
data2 = self.frame.buffer_from_cl(in_frames.mem, self.buf_size)
|
||||
return np.asarray(<cnp.uint8_t[:self.buf_size]> data2)
|
||||
|
||||
|
||||
cdef class DrivingModelFrame(ModelFrame):
|
||||
cdef cppDrivingModelFrame * _frame
|
||||
|
||||
def __cinit__(self, CLContext context, int temporal_skip):
|
||||
self._frame = new cppDrivingModelFrame(context.device_id, context.context, temporal_skip)
|
||||
self.frame = <cppModelFrame*>(self._frame)
|
||||
self.buf_size = self._frame.buf_size
|
||||
|
||||
cdef class MonitoringModelFrame(ModelFrame):
|
||||
cdef cppMonitoringModelFrame * _frame
|
||||
|
||||
def __cinit__(self, CLContext context):
|
||||
self._frame = new cppMonitoringModelFrame(context.device_id, context.context)
|
||||
self.frame = <cppModelFrame*>(self._frame)
|
||||
self.buf_size = self._frame.buf_size
|
||||
|
||||
@@ -1,8 +0,0 @@
|
||||
|
||||
from tinygrad.tensor import Tensor
|
||||
from tinygrad.helpers import to_mv
|
||||
|
||||
def qcom_tensor_from_opencl_address(opencl_address, shape, dtype):
|
||||
cl_buf_desc_ptr = to_mv(opencl_address, 8).cast('Q')[0]
|
||||
rawbuf_ptr = to_mv(cl_buf_desc_ptr, 0x100).cast('Q')[20] # offset 0xA0 is a raw gpu pointer.
|
||||
return Tensor.from_blob(rawbuf_ptr, shape, dtype=dtype, device='QCOM')
|
||||
@@ -1,76 +0,0 @@
|
||||
#include "selfdrive/modeld/transforms/loadyuv.h"
|
||||
|
||||
#include <cassert>
|
||||
#include <cstdio>
|
||||
#include <cstring>
|
||||
|
||||
void loadyuv_init(LoadYUVState* s, cl_context ctx, cl_device_id device_id, int width, int height) {
|
||||
memset(s, 0, sizeof(*s));
|
||||
|
||||
s->width = width;
|
||||
s->height = height;
|
||||
|
||||
char args[1024];
|
||||
snprintf(args, sizeof(args),
|
||||
"-cl-fast-relaxed-math -cl-denorms-are-zero "
|
||||
"-DTRANSFORMED_WIDTH=%d -DTRANSFORMED_HEIGHT=%d",
|
||||
width, height);
|
||||
cl_program prg = cl_program_from_file(ctx, device_id, LOADYUV_PATH, args);
|
||||
|
||||
s->loadys_krnl = CL_CHECK_ERR(clCreateKernel(prg, "loadys", &err));
|
||||
s->loaduv_krnl = CL_CHECK_ERR(clCreateKernel(prg, "loaduv", &err));
|
||||
s->copy_krnl = CL_CHECK_ERR(clCreateKernel(prg, "copy", &err));
|
||||
|
||||
// done with this
|
||||
CL_CHECK(clReleaseProgram(prg));
|
||||
}
|
||||
|
||||
void loadyuv_destroy(LoadYUVState* s) {
|
||||
CL_CHECK(clReleaseKernel(s->loadys_krnl));
|
||||
CL_CHECK(clReleaseKernel(s->loaduv_krnl));
|
||||
CL_CHECK(clReleaseKernel(s->copy_krnl));
|
||||
}
|
||||
|
||||
void loadyuv_queue(LoadYUVState* s, cl_command_queue q,
|
||||
cl_mem y_cl, cl_mem u_cl, cl_mem v_cl,
|
||||
cl_mem out_cl) {
|
||||
cl_int global_out_off = 0;
|
||||
|
||||
CL_CHECK(clSetKernelArg(s->loadys_krnl, 0, sizeof(cl_mem), &y_cl));
|
||||
CL_CHECK(clSetKernelArg(s->loadys_krnl, 1, sizeof(cl_mem), &out_cl));
|
||||
CL_CHECK(clSetKernelArg(s->loadys_krnl, 2, sizeof(cl_int), &global_out_off));
|
||||
|
||||
const size_t loadys_work_size = (s->width*s->height)/8;
|
||||
CL_CHECK(clEnqueueNDRangeKernel(q, s->loadys_krnl, 1, NULL,
|
||||
&loadys_work_size, NULL, 0, 0, NULL));
|
||||
|
||||
const size_t loaduv_work_size = ((s->width/2)*(s->height/2))/8;
|
||||
global_out_off += (s->width*s->height);
|
||||
|
||||
CL_CHECK(clSetKernelArg(s->loaduv_krnl, 0, sizeof(cl_mem), &u_cl));
|
||||
CL_CHECK(clSetKernelArg(s->loaduv_krnl, 1, sizeof(cl_mem), &out_cl));
|
||||
CL_CHECK(clSetKernelArg(s->loaduv_krnl, 2, sizeof(cl_int), &global_out_off));
|
||||
|
||||
CL_CHECK(clEnqueueNDRangeKernel(q, s->loaduv_krnl, 1, NULL,
|
||||
&loaduv_work_size, NULL, 0, 0, NULL));
|
||||
|
||||
global_out_off += (s->width/2)*(s->height/2);
|
||||
|
||||
CL_CHECK(clSetKernelArg(s->loaduv_krnl, 0, sizeof(cl_mem), &v_cl));
|
||||
CL_CHECK(clSetKernelArg(s->loaduv_krnl, 1, sizeof(cl_mem), &out_cl));
|
||||
CL_CHECK(clSetKernelArg(s->loaduv_krnl, 2, sizeof(cl_int), &global_out_off));
|
||||
|
||||
CL_CHECK(clEnqueueNDRangeKernel(q, s->loaduv_krnl, 1, NULL,
|
||||
&loaduv_work_size, NULL, 0, 0, NULL));
|
||||
}
|
||||
|
||||
void copy_queue(LoadYUVState* s, cl_command_queue q, cl_mem src, cl_mem dst,
|
||||
size_t src_offset, size_t dst_offset, size_t size) {
|
||||
CL_CHECK(clSetKernelArg(s->copy_krnl, 0, sizeof(cl_mem), &src));
|
||||
CL_CHECK(clSetKernelArg(s->copy_krnl, 1, sizeof(cl_mem), &dst));
|
||||
CL_CHECK(clSetKernelArg(s->copy_krnl, 2, sizeof(cl_int), &src_offset));
|
||||
CL_CHECK(clSetKernelArg(s->copy_krnl, 3, sizeof(cl_int), &dst_offset));
|
||||
const size_t copy_work_size = size/8;
|
||||
CL_CHECK(clEnqueueNDRangeKernel(q, s->copy_krnl, 1, NULL,
|
||||
©_work_size, NULL, 0, 0, NULL));
|
||||
}
|
||||
@@ -1,47 +0,0 @@
|
||||
#define UV_SIZE ((TRANSFORMED_WIDTH/2)*(TRANSFORMED_HEIGHT/2))
|
||||
|
||||
__kernel void loadys(__global uchar8 const * const Y,
|
||||
__global uchar * out,
|
||||
int out_offset)
|
||||
{
|
||||
const int gid = get_global_id(0);
|
||||
const int ois = gid * 8;
|
||||
const int oy = ois / TRANSFORMED_WIDTH;
|
||||
const int ox = ois % TRANSFORMED_WIDTH;
|
||||
|
||||
const uchar8 ys = Y[gid];
|
||||
|
||||
// 02
|
||||
// 13
|
||||
|
||||
__global uchar* outy0;
|
||||
__global uchar* outy1;
|
||||
if ((oy & 1) == 0) {
|
||||
outy0 = out + out_offset; //y0
|
||||
outy1 = out + out_offset + UV_SIZE*2; //y2
|
||||
} else {
|
||||
outy0 = out + out_offset + UV_SIZE; //y1
|
||||
outy1 = out + out_offset + UV_SIZE*3; //y3
|
||||
}
|
||||
|
||||
vstore4(ys.s0246, 0, outy0 + (oy/2) * (TRANSFORMED_WIDTH/2) + ox/2);
|
||||
vstore4(ys.s1357, 0, outy1 + (oy/2) * (TRANSFORMED_WIDTH/2) + ox/2);
|
||||
}
|
||||
|
||||
__kernel void loaduv(__global uchar8 const * const in,
|
||||
__global uchar8 * out,
|
||||
int out_offset)
|
||||
{
|
||||
const int gid = get_global_id(0);
|
||||
const uchar8 inv = in[gid];
|
||||
out[gid + out_offset / 8] = inv;
|
||||
}
|
||||
|
||||
__kernel void copy(__global uchar8 * in,
|
||||
__global uchar8 * out,
|
||||
int in_offset,
|
||||
int out_offset)
|
||||
{
|
||||
const int gid = get_global_id(0);
|
||||
out[gid + out_offset / 8] = in[gid + in_offset / 8];
|
||||
}
|
||||
@@ -1,20 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include "common/clutil.h"
|
||||
|
||||
typedef struct {
|
||||
int width, height;
|
||||
cl_kernel loadys_krnl, loaduv_krnl, copy_krnl;
|
||||
} LoadYUVState;
|
||||
|
||||
void loadyuv_init(LoadYUVState* s, cl_context ctx, cl_device_id device_id, int width, int height);
|
||||
|
||||
void loadyuv_destroy(LoadYUVState* s);
|
||||
|
||||
void loadyuv_queue(LoadYUVState* s, cl_command_queue q,
|
||||
cl_mem y_cl, cl_mem u_cl, cl_mem v_cl,
|
||||
cl_mem out_cl);
|
||||
|
||||
|
||||
void copy_queue(LoadYUVState* s, cl_command_queue q, cl_mem src, cl_mem dst,
|
||||
size_t src_offset, size_t dst_offset, size_t size);
|
||||
@@ -1,97 +0,0 @@
|
||||
#include "selfdrive/modeld/transforms/transform.h"
|
||||
|
||||
#include <cassert>
|
||||
#include <cstring>
|
||||
|
||||
#include "common/clutil.h"
|
||||
|
||||
void transform_init(Transform* s, cl_context ctx, cl_device_id device_id) {
|
||||
memset(s, 0, sizeof(*s));
|
||||
|
||||
cl_program prg = cl_program_from_file(ctx, device_id, TRANSFORM_PATH, "");
|
||||
s->krnl = CL_CHECK_ERR(clCreateKernel(prg, "warpPerspective", &err));
|
||||
// done with this
|
||||
CL_CHECK(clReleaseProgram(prg));
|
||||
|
||||
s->m_y_cl = CL_CHECK_ERR(clCreateBuffer(ctx, CL_MEM_READ_WRITE, 3*3*sizeof(float), NULL, &err));
|
||||
s->m_uv_cl = CL_CHECK_ERR(clCreateBuffer(ctx, CL_MEM_READ_WRITE, 3*3*sizeof(float), NULL, &err));
|
||||
}
|
||||
|
||||
void transform_destroy(Transform* s) {
|
||||
CL_CHECK(clReleaseMemObject(s->m_y_cl));
|
||||
CL_CHECK(clReleaseMemObject(s->m_uv_cl));
|
||||
CL_CHECK(clReleaseKernel(s->krnl));
|
||||
}
|
||||
|
||||
void transform_queue(Transform* s,
|
||||
cl_command_queue q,
|
||||
cl_mem in_yuv, int in_width, int in_height, int in_stride, int in_uv_offset,
|
||||
cl_mem out_y, cl_mem out_u, cl_mem out_v,
|
||||
int out_width, int out_height,
|
||||
const mat3& projection) {
|
||||
const int zero = 0;
|
||||
|
||||
// sampled using pixel center origin
|
||||
// (because that's how fastcv and opencv does it)
|
||||
|
||||
mat3 projection_y = projection;
|
||||
|
||||
// in and out uv is half the size of y.
|
||||
mat3 projection_uv = transform_scale_buffer(projection, 0.5);
|
||||
|
||||
CL_CHECK(clEnqueueWriteBuffer(q, s->m_y_cl, CL_TRUE, 0, 3*3*sizeof(float), (void*)projection_y.v, 0, NULL, NULL));
|
||||
CL_CHECK(clEnqueueWriteBuffer(q, s->m_uv_cl, CL_TRUE, 0, 3*3*sizeof(float), (void*)projection_uv.v, 0, NULL, NULL));
|
||||
|
||||
const int in_y_width = in_width;
|
||||
const int in_y_height = in_height;
|
||||
const int in_y_px_stride = 1;
|
||||
const int in_uv_width = in_width/2;
|
||||
const int in_uv_height = in_height/2;
|
||||
const int in_uv_px_stride = 2;
|
||||
const int in_u_offset = in_uv_offset;
|
||||
const int in_v_offset = in_uv_offset + 1;
|
||||
|
||||
const int out_y_width = out_width;
|
||||
const int out_y_height = out_height;
|
||||
const int out_uv_width = out_width/2;
|
||||
const int out_uv_height = out_height/2;
|
||||
|
||||
CL_CHECK(clSetKernelArg(s->krnl, 0, sizeof(cl_mem), &in_yuv)); // src
|
||||
CL_CHECK(clSetKernelArg(s->krnl, 1, sizeof(cl_int), &in_stride)); // src_row_stride
|
||||
CL_CHECK(clSetKernelArg(s->krnl, 2, sizeof(cl_int), &in_y_px_stride)); // src_px_stride
|
||||
CL_CHECK(clSetKernelArg(s->krnl, 3, sizeof(cl_int), &zero)); // src_offset
|
||||
CL_CHECK(clSetKernelArg(s->krnl, 4, sizeof(cl_int), &in_y_height)); // src_rows
|
||||
CL_CHECK(clSetKernelArg(s->krnl, 5, sizeof(cl_int), &in_y_width)); // src_cols
|
||||
CL_CHECK(clSetKernelArg(s->krnl, 6, sizeof(cl_mem), &out_y)); // dst
|
||||
CL_CHECK(clSetKernelArg(s->krnl, 7, sizeof(cl_int), &out_y_width)); // dst_row_stride
|
||||
CL_CHECK(clSetKernelArg(s->krnl, 8, sizeof(cl_int), &zero)); // dst_offset
|
||||
CL_CHECK(clSetKernelArg(s->krnl, 9, sizeof(cl_int), &out_y_height)); // dst_rows
|
||||
CL_CHECK(clSetKernelArg(s->krnl, 10, sizeof(cl_int), &out_y_width)); // dst_cols
|
||||
CL_CHECK(clSetKernelArg(s->krnl, 11, sizeof(cl_mem), &s->m_y_cl)); // M
|
||||
|
||||
const size_t work_size_y[2] = {(size_t)out_y_width, (size_t)out_y_height};
|
||||
|
||||
CL_CHECK(clEnqueueNDRangeKernel(q, s->krnl, 2, NULL,
|
||||
(const size_t*)&work_size_y, NULL, 0, 0, NULL));
|
||||
|
||||
const size_t work_size_uv[2] = {(size_t)out_uv_width, (size_t)out_uv_height};
|
||||
|
||||
CL_CHECK(clSetKernelArg(s->krnl, 2, sizeof(cl_int), &in_uv_px_stride)); // src_px_stride
|
||||
CL_CHECK(clSetKernelArg(s->krnl, 3, sizeof(cl_int), &in_u_offset)); // src_offset
|
||||
CL_CHECK(clSetKernelArg(s->krnl, 4, sizeof(cl_int), &in_uv_height)); // src_rows
|
||||
CL_CHECK(clSetKernelArg(s->krnl, 5, sizeof(cl_int), &in_uv_width)); // src_cols
|
||||
CL_CHECK(clSetKernelArg(s->krnl, 6, sizeof(cl_mem), &out_u)); // dst
|
||||
CL_CHECK(clSetKernelArg(s->krnl, 7, sizeof(cl_int), &out_uv_width)); // dst_row_stride
|
||||
CL_CHECK(clSetKernelArg(s->krnl, 8, sizeof(cl_int), &zero)); // dst_offset
|
||||
CL_CHECK(clSetKernelArg(s->krnl, 9, sizeof(cl_int), &out_uv_height)); // dst_rows
|
||||
CL_CHECK(clSetKernelArg(s->krnl, 10, sizeof(cl_int), &out_uv_width)); // dst_cols
|
||||
CL_CHECK(clSetKernelArg(s->krnl, 11, sizeof(cl_mem), &s->m_uv_cl)); // M
|
||||
|
||||
CL_CHECK(clEnqueueNDRangeKernel(q, s->krnl, 2, NULL,
|
||||
(const size_t*)&work_size_uv, NULL, 0, 0, NULL));
|
||||
CL_CHECK(clSetKernelArg(s->krnl, 3, sizeof(cl_int), &in_v_offset)); // src_ofset
|
||||
CL_CHECK(clSetKernelArg(s->krnl, 6, sizeof(cl_mem), &out_v)); // dst
|
||||
|
||||
CL_CHECK(clEnqueueNDRangeKernel(q, s->krnl, 2, NULL,
|
||||
(const size_t*)&work_size_uv, NULL, 0, 0, NULL));
|
||||
}
|
||||
@@ -1,54 +0,0 @@
|
||||
#define INTER_BITS 5
|
||||
#define INTER_TAB_SIZE (1 << INTER_BITS)
|
||||
#define INTER_SCALE 1.f / INTER_TAB_SIZE
|
||||
|
||||
#define INTER_REMAP_COEF_BITS 15
|
||||
#define INTER_REMAP_COEF_SCALE (1 << INTER_REMAP_COEF_BITS)
|
||||
|
||||
__kernel void warpPerspective(__global const uchar * src,
|
||||
int src_row_stride, int src_px_stride, int src_offset, int src_rows, int src_cols,
|
||||
__global uchar * dst,
|
||||
int dst_row_stride, int dst_offset, int dst_rows, int dst_cols,
|
||||
__constant float * M)
|
||||
{
|
||||
int dx = get_global_id(0);
|
||||
int dy = get_global_id(1);
|
||||
|
||||
if (dx < dst_cols && dy < dst_rows)
|
||||
{
|
||||
float X0 = M[0] * dx + M[1] * dy + M[2];
|
||||
float Y0 = M[3] * dx + M[4] * dy + M[5];
|
||||
float W = M[6] * dx + M[7] * dy + M[8];
|
||||
W = W != 0.0f ? INTER_TAB_SIZE / W : 0.0f;
|
||||
int X = rint(X0 * W), Y = rint(Y0 * W);
|
||||
|
||||
int sx = convert_short_sat(X >> INTER_BITS);
|
||||
int sy = convert_short_sat(Y >> INTER_BITS);
|
||||
|
||||
short sx_clamp = clamp(sx, 0, src_cols - 1);
|
||||
short sx_p1_clamp = clamp(sx + 1, 0, src_cols - 1);
|
||||
short sy_clamp = clamp(sy, 0, src_rows - 1);
|
||||
short sy_p1_clamp = clamp(sy + 1, 0, src_rows - 1);
|
||||
int v0 = convert_int(src[mad24(sy_clamp, src_row_stride, src_offset + sx_clamp*src_px_stride)]);
|
||||
int v1 = convert_int(src[mad24(sy_clamp, src_row_stride, src_offset + sx_p1_clamp*src_px_stride)]);
|
||||
int v2 = convert_int(src[mad24(sy_p1_clamp, src_row_stride, src_offset + sx_clamp*src_px_stride)]);
|
||||
int v3 = convert_int(src[mad24(sy_p1_clamp, src_row_stride, src_offset + sx_p1_clamp*src_px_stride)]);
|
||||
|
||||
short ay = (short)(Y & (INTER_TAB_SIZE - 1));
|
||||
short ax = (short)(X & (INTER_TAB_SIZE - 1));
|
||||
float taby = 1.f/INTER_TAB_SIZE*ay;
|
||||
float tabx = 1.f/INTER_TAB_SIZE*ax;
|
||||
|
||||
int dst_index = mad24(dy, dst_row_stride, dst_offset + dx);
|
||||
|
||||
int itab0 = convert_short_sat_rte( (1.0f-taby)*(1.0f-tabx) * INTER_REMAP_COEF_SCALE );
|
||||
int itab1 = convert_short_sat_rte( (1.0f-taby)*tabx * INTER_REMAP_COEF_SCALE );
|
||||
int itab2 = convert_short_sat_rte( taby*(1.0f-tabx) * INTER_REMAP_COEF_SCALE );
|
||||
int itab3 = convert_short_sat_rte( taby*tabx * INTER_REMAP_COEF_SCALE );
|
||||
|
||||
int val = v0 * itab0 + v1 * itab1 + v2 * itab2 + v3 * itab3;
|
||||
|
||||
uchar pix = convert_uchar_sat((val + (1 << (INTER_REMAP_COEF_BITS-1))) >> INTER_REMAP_COEF_BITS);
|
||||
dst[dst_index] = pix;
|
||||
}
|
||||
}
|
||||
@@ -1,25 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#define CL_USE_DEPRECATED_OPENCL_1_2_APIS
|
||||
#ifdef __APPLE__
|
||||
#include <OpenCL/cl.h>
|
||||
#else
|
||||
#include <CL/cl.h>
|
||||
#endif
|
||||
|
||||
#include "common/mat.h"
|
||||
|
||||
typedef struct {
|
||||
cl_kernel krnl;
|
||||
cl_mem m_y_cl, m_uv_cl;
|
||||
} Transform;
|
||||
|
||||
void transform_init(Transform* s, cl_context ctx, cl_device_id device_id);
|
||||
|
||||
void transform_destroy(Transform* transform);
|
||||
|
||||
void transform_queue(Transform* s, cl_command_queue q,
|
||||
cl_mem yuv, int in_width, int in_height, int in_stride, int in_uv_offset,
|
||||
cl_mem out_y, cl_mem out_u, cl_mem out_v,
|
||||
int out_width, int out_height,
|
||||
const mat3& projection);
|
||||
@@ -34,8 +34,8 @@ GITHUB = GithubUtils(API_TOKEN, DATA_TOKEN)
|
||||
|
||||
EXEC_TIMINGS = [
|
||||
# model, instant max, average max
|
||||
("modelV2", 0.035, 0.025),
|
||||
("driverStateV2", 0.02, 0.015),
|
||||
("modelV2", 0.05, 0.028),
|
||||
("driverStateV2", 0.05, 0.015),
|
||||
]
|
||||
|
||||
def get_log_fn(test_route, ref="master"):
|
||||
|
||||
@@ -4,6 +4,7 @@ import time
|
||||
import copy
|
||||
import heapq
|
||||
import signal
|
||||
import numpy as np
|
||||
from collections import Counter
|
||||
from dataclasses import dataclass, field
|
||||
from itertools import islice
|
||||
@@ -23,6 +24,7 @@ from openpilot.common.params import Params
|
||||
from openpilot.common.prefix import OpenpilotPrefix
|
||||
from openpilot.common.timeout import Timeout
|
||||
from openpilot.common.realtime import DT_CTRL
|
||||
from openpilot.system.camerad.cameras.nv12_info import get_nv12_info
|
||||
from openpilot.system.manager.process_config import managed_processes
|
||||
from openpilot.selfdrive.test.process_replay.vision_meta import meta_from_camera_state, available_streams
|
||||
from openpilot.selfdrive.test.process_replay.migration import migrate_all
|
||||
@@ -203,7 +205,8 @@ class ProcessContainer:
|
||||
if meta.camera_state in self.cfg.vision_pubs:
|
||||
assert frs[meta.camera_state].pix_fmt == 'nv12'
|
||||
frame_size = (frs[meta.camera_state].w, frs[meta.camera_state].h)
|
||||
vipc_server.create_buffers(meta.stream, 2, *frame_size)
|
||||
stride, y_height, _, yuv_size = get_nv12_info(frame_size[0], frame_size[1])
|
||||
vipc_server.create_buffers_with_sizes(meta.stream, 2, frame_size[0], frame_size[1], yuv_size, stride, stride * y_height)
|
||||
vipc_server.start_listener()
|
||||
|
||||
self.vipc_server = vipc_server
|
||||
@@ -300,7 +303,17 @@ class ProcessContainer:
|
||||
camera_meta = meta_from_camera_state(m.which())
|
||||
assert frs is not None
|
||||
img = frs[m.which()].get(camera_state.frameId)
|
||||
self.vipc_server.send(camera_meta.stream, img.flatten().tobytes(),
|
||||
|
||||
h, w = frs[m.which()].h, frs[m.which()].w
|
||||
stride, y_height, _, yuv_size = get_nv12_info(w, h)
|
||||
uv_offset = stride * y_height
|
||||
padded_img = np.zeros(((uv_offset //stride) + (h // 2), stride))
|
||||
padded_img[:h, :w] = img[:h * w].reshape((-1, w))
|
||||
padded_img[uv_offset // stride:uv_offset // stride + h // 2, :w] = img[h * w:].reshape((-1, w))
|
||||
img_bytes = np.zeros((yuv_size,), dtype=np.uint8)
|
||||
img_bytes[:padded_img.size] = padded_img.flatten()
|
||||
|
||||
self.vipc_server.send(camera_meta.stream, img_bytes.tobytes(),
|
||||
camera_state.frameId, camera_state.timestampSof, camera_state.timestampEof)
|
||||
self.msg_queue = []
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
Import('env', 'arch', 'messaging', 'common', 'visionipc')
|
||||
|
||||
libs = [common, 'OpenCL', messaging, visionipc]
|
||||
libs = [common, messaging, visionipc]
|
||||
|
||||
if arch != "Darwin":
|
||||
camera_obj = env.Object(['cameras/camera_qcom2.cc', 'cameras/camera_common.cc', 'cameras/spectra.cc',
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
#include "system/camerad/cameras/spectra.h"
|
||||
|
||||
|
||||
void CameraBuf::init(cl_device_id device_id, cl_context context, SpectraCamera *cam, VisionIpcServer * v, int frame_cnt, VisionStreamType type) {
|
||||
void CameraBuf::init(SpectraCamera *cam, VisionIpcServer * v, int frame_cnt, VisionStreamType type) {
|
||||
vipc_server = v;
|
||||
stream_type = type;
|
||||
frame_buf_count = frame_cnt;
|
||||
@@ -21,9 +21,8 @@ void CameraBuf::init(cl_device_id device_id, cl_context context, SpectraCamera *
|
||||
const int raw_frame_size = (sensor->frame_height + sensor->extra_height) * sensor->frame_stride;
|
||||
for (int i = 0; i < frame_buf_count; i++) {
|
||||
camera_bufs_raw[i].allocate(raw_frame_size);
|
||||
camera_bufs_raw[i].init_cl(device_id, context);
|
||||
}
|
||||
LOGD("allocated %d CL buffers", frame_buf_count);
|
||||
LOGD("allocated %d buffers", frame_buf_count);
|
||||
}
|
||||
|
||||
vipc_server->create_buffers_with_sizes(stream_type, VIPC_BUFFER_COUNT, out_img_width, out_img_height, cam->yuv_size, cam->stride, cam->uv_offset);
|
||||
|
||||
@@ -36,7 +36,7 @@ public:
|
||||
|
||||
CameraBuf() = default;
|
||||
~CameraBuf();
|
||||
void init(cl_device_id device_id, cl_context context, SpectraCamera *cam, VisionIpcServer * v, int frame_cnt, VisionStreamType type);
|
||||
void init(SpectraCamera *cam, VisionIpcServer * v, int frame_cnt, VisionStreamType type);
|
||||
void sendFrameToVipc();
|
||||
};
|
||||
|
||||
|
||||
@@ -12,16 +12,8 @@
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#ifdef __TICI__
|
||||
#include "CL/cl_ext_qcom.h"
|
||||
#else
|
||||
#define CL_PRIORITY_HINT_HIGH_QCOM NULL
|
||||
#define CL_CONTEXT_PRIORITY_HINT_QCOM NULL
|
||||
#endif
|
||||
|
||||
#include "media/cam_sensor_cmn_header.h"
|
||||
|
||||
#include "common/clutil.h"
|
||||
#include "common/params.h"
|
||||
#include "common/swaglog.h"
|
||||
|
||||
@@ -57,7 +49,7 @@ public:
|
||||
|
||||
CameraState(SpectraMaster *master, const CameraConfig &config) : camera(master, config) {};
|
||||
~CameraState();
|
||||
void init(VisionIpcServer *v, cl_device_id device_id, cl_context ctx);
|
||||
void init(VisionIpcServer *v);
|
||||
void update_exposure_score(float desired_ev, int exp_t, int exp_g_idx, float exp_gain);
|
||||
void set_camera_exposure(float grey_frac);
|
||||
void set_exposure_rect();
|
||||
@@ -68,8 +60,8 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
void CameraState::init(VisionIpcServer *v, cl_device_id device_id, cl_context ctx) {
|
||||
camera.camera_open(v, device_id, ctx);
|
||||
void CameraState::init(VisionIpcServer *v) {
|
||||
camera.camera_open(v);
|
||||
|
||||
if (!camera.enabled) return;
|
||||
|
||||
@@ -257,11 +249,7 @@ void CameraState::sendState() {
|
||||
void camerad_thread() {
|
||||
// TODO: centralize enabled handling
|
||||
|
||||
cl_device_id device_id = cl_get_device_id(CL_DEVICE_TYPE_DEFAULT);
|
||||
const cl_context_properties props[] = {CL_CONTEXT_PRIORITY_HINT_QCOM, CL_PRIORITY_HINT_HIGH_QCOM, 0};
|
||||
cl_context ctx = CL_CHECK_ERR(clCreateContext(props, 1, &device_id, NULL, NULL, &err));
|
||||
|
||||
VisionIpcServer v("camerad", device_id, ctx);
|
||||
VisionIpcServer v("camerad");
|
||||
|
||||
// *** initial ISP init ***
|
||||
SpectraMaster m;
|
||||
@@ -271,7 +259,7 @@ void camerad_thread() {
|
||||
std::vector<std::unique_ptr<CameraState>> cams;
|
||||
for (const auto &config : ALL_CAMERA_CONFIGS) {
|
||||
auto cam = std::make_unique<CameraState>(&m, config);
|
||||
cam->init(&v, device_id, ctx);
|
||||
cam->init(&v);
|
||||
cams.emplace_back(std::move(cam));
|
||||
}
|
||||
|
||||
|
||||
@@ -274,7 +274,7 @@ int SpectraCamera::clear_req_queue() {
|
||||
return ret;
|
||||
}
|
||||
|
||||
void SpectraCamera::camera_open(VisionIpcServer *v, cl_device_id device_id, cl_context ctx) {
|
||||
void SpectraCamera::camera_open(VisionIpcServer *v) {
|
||||
if (!openSensor()) {
|
||||
return;
|
||||
}
|
||||
@@ -296,7 +296,7 @@ void SpectraCamera::camera_open(VisionIpcServer *v, cl_device_id device_id, cl_c
|
||||
linkDevices();
|
||||
|
||||
LOGD("camera init %d", cc.camera_num);
|
||||
buf.init(device_id, ctx, this, v, ife_buf_depth, cc.stream_type);
|
||||
buf.init(this, v, ife_buf_depth, cc.stream_type);
|
||||
camera_map_bufs();
|
||||
clearAndRequeue(1);
|
||||
}
|
||||
|
||||
@@ -113,7 +113,7 @@ public:
|
||||
SpectraCamera(SpectraMaster *master, const CameraConfig &config);
|
||||
~SpectraCamera();
|
||||
|
||||
void camera_open(VisionIpcServer *v, cl_device_id device_id, cl_context ctx);
|
||||
void camera_open(VisionIpcServer *v);
|
||||
bool handle_camera_event(const cam_req_mgr_message *event_data);
|
||||
void camera_close();
|
||||
void camera_map_bufs();
|
||||
|
||||
@@ -32,7 +32,7 @@ class Proc:
|
||||
|
||||
PROCS = [
|
||||
Proc(['camerad'], 1.65, atol=0.4, msgs=['roadCameraState', 'wideRoadCameraState', 'driverCameraState']),
|
||||
Proc(['modeld'], 1.24, atol=0.2, msgs=['modelV2']),
|
||||
Proc(['modeld'], 1.5, atol=0.2, msgs=['modelV2']),
|
||||
Proc(['dmonitoringmodeld'], 0.65, atol=0.35, msgs=['driverStateV2']),
|
||||
Proc(['encoderd'], 0.23, msgs=[]),
|
||||
]
|
||||
|
||||
@@ -2,16 +2,13 @@ Import('env', 'arch', 'messaging', 'common', 'visionipc')
|
||||
|
||||
libs = [common, messaging, visionipc,
|
||||
'avformat', 'avcodec', 'avutil',
|
||||
'yuv', 'OpenCL', 'pthread', 'zstd']
|
||||
'yuv', 'pthread', 'zstd']
|
||||
|
||||
src = ['logger.cc', 'zstd_writer.cc', 'video_writer.cc', 'encoder/encoder.cc', 'encoder/v4l_encoder.cc', 'encoder/jpeg_encoder.cc']
|
||||
if arch != "larch64":
|
||||
src += ['encoder/ffmpeg_encoder.cc']
|
||||
|
||||
if arch == "Darwin":
|
||||
# fix OpenCL
|
||||
del libs[libs.index('OpenCL')]
|
||||
env['FRAMEWORKS'] = ['OpenCL']
|
||||
# exclude v4l
|
||||
del src[src.index('encoder/v4l_encoder.cc')]
|
||||
|
||||
|
||||
1452
third_party/opencl/include/CL/cl.h
vendored
1452
third_party/opencl/include/CL/cl.h
vendored
File diff suppressed because it is too large
Load Diff
131
third_party/opencl/include/CL/cl_d3d10.h
vendored
131
third_party/opencl/include/CL/cl_d3d10.h
vendored
@@ -1,131 +0,0 @@
|
||||
/**********************************************************************************
|
||||
* Copyright (c) 2008-2015 The Khronos Group Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and/or associated documentation files (the
|
||||
* "Materials"), to deal in the Materials without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Materials, and to
|
||||
* permit persons to whom the Materials are furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Materials.
|
||||
*
|
||||
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
|
||||
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
|
||||
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
|
||||
* https://www.khronos.org/registry/
|
||||
*
|
||||
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
|
||||
**********************************************************************************/
|
||||
|
||||
/* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */
|
||||
|
||||
#ifndef __OPENCL_CL_D3D10_H
|
||||
#define __OPENCL_CL_D3D10_H
|
||||
|
||||
#include <d3d10.h>
|
||||
#include <CL/cl.h>
|
||||
#include <CL/cl_platform.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/******************************************************************************
|
||||
* cl_khr_d3d10_sharing */
|
||||
#define cl_khr_d3d10_sharing 1
|
||||
|
||||
typedef cl_uint cl_d3d10_device_source_khr;
|
||||
typedef cl_uint cl_d3d10_device_set_khr;
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
/* Error Codes */
|
||||
#define CL_INVALID_D3D10_DEVICE_KHR -1002
|
||||
#define CL_INVALID_D3D10_RESOURCE_KHR -1003
|
||||
#define CL_D3D10_RESOURCE_ALREADY_ACQUIRED_KHR -1004
|
||||
#define CL_D3D10_RESOURCE_NOT_ACQUIRED_KHR -1005
|
||||
|
||||
/* cl_d3d10_device_source_nv */
|
||||
#define CL_D3D10_DEVICE_KHR 0x4010
|
||||
#define CL_D3D10_DXGI_ADAPTER_KHR 0x4011
|
||||
|
||||
/* cl_d3d10_device_set_nv */
|
||||
#define CL_PREFERRED_DEVICES_FOR_D3D10_KHR 0x4012
|
||||
#define CL_ALL_DEVICES_FOR_D3D10_KHR 0x4013
|
||||
|
||||
/* cl_context_info */
|
||||
#define CL_CONTEXT_D3D10_DEVICE_KHR 0x4014
|
||||
#define CL_CONTEXT_D3D10_PREFER_SHARED_RESOURCES_KHR 0x402C
|
||||
|
||||
/* cl_mem_info */
|
||||
#define CL_MEM_D3D10_RESOURCE_KHR 0x4015
|
||||
|
||||
/* cl_image_info */
|
||||
#define CL_IMAGE_D3D10_SUBRESOURCE_KHR 0x4016
|
||||
|
||||
/* cl_command_type */
|
||||
#define CL_COMMAND_ACQUIRE_D3D10_OBJECTS_KHR 0x4017
|
||||
#define CL_COMMAND_RELEASE_D3D10_OBJECTS_KHR 0x4018
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetDeviceIDsFromD3D10KHR_fn)(
|
||||
cl_platform_id platform,
|
||||
cl_d3d10_device_source_khr d3d_device_source,
|
||||
void * d3d_object,
|
||||
cl_d3d10_device_set_khr d3d_device_set,
|
||||
cl_uint num_entries,
|
||||
cl_device_id * devices,
|
||||
cl_uint * num_devices) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D10BufferKHR_fn)(
|
||||
cl_context context,
|
||||
cl_mem_flags flags,
|
||||
ID3D10Buffer * resource,
|
||||
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D10Texture2DKHR_fn)(
|
||||
cl_context context,
|
||||
cl_mem_flags flags,
|
||||
ID3D10Texture2D * resource,
|
||||
UINT subresource,
|
||||
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D10Texture3DKHR_fn)(
|
||||
cl_context context,
|
||||
cl_mem_flags flags,
|
||||
ID3D10Texture3D * resource,
|
||||
UINT subresource,
|
||||
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireD3D10ObjectsKHR_fn)(
|
||||
cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem * mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event * event_wait_list,
|
||||
cl_event * event) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseD3D10ObjectsKHR_fn)(
|
||||
cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem * mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event * event_wait_list,
|
||||
cl_event * event) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __OPENCL_CL_D3D10_H */
|
||||
|
||||
131
third_party/opencl/include/CL/cl_d3d11.h
vendored
131
third_party/opencl/include/CL/cl_d3d11.h
vendored
@@ -1,131 +0,0 @@
|
||||
/**********************************************************************************
|
||||
* Copyright (c) 2008-2015 The Khronos Group Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and/or associated documentation files (the
|
||||
* "Materials"), to deal in the Materials without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Materials, and to
|
||||
* permit persons to whom the Materials are furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Materials.
|
||||
*
|
||||
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
|
||||
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
|
||||
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
|
||||
* https://www.khronos.org/registry/
|
||||
*
|
||||
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
|
||||
**********************************************************************************/
|
||||
|
||||
/* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */
|
||||
|
||||
#ifndef __OPENCL_CL_D3D11_H
|
||||
#define __OPENCL_CL_D3D11_H
|
||||
|
||||
#include <d3d11.h>
|
||||
#include <CL/cl.h>
|
||||
#include <CL/cl_platform.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/******************************************************************************
|
||||
* cl_khr_d3d11_sharing */
|
||||
#define cl_khr_d3d11_sharing 1
|
||||
|
||||
typedef cl_uint cl_d3d11_device_source_khr;
|
||||
typedef cl_uint cl_d3d11_device_set_khr;
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
/* Error Codes */
|
||||
#define CL_INVALID_D3D11_DEVICE_KHR -1006
|
||||
#define CL_INVALID_D3D11_RESOURCE_KHR -1007
|
||||
#define CL_D3D11_RESOURCE_ALREADY_ACQUIRED_KHR -1008
|
||||
#define CL_D3D11_RESOURCE_NOT_ACQUIRED_KHR -1009
|
||||
|
||||
/* cl_d3d11_device_source */
|
||||
#define CL_D3D11_DEVICE_KHR 0x4019
|
||||
#define CL_D3D11_DXGI_ADAPTER_KHR 0x401A
|
||||
|
||||
/* cl_d3d11_device_set */
|
||||
#define CL_PREFERRED_DEVICES_FOR_D3D11_KHR 0x401B
|
||||
#define CL_ALL_DEVICES_FOR_D3D11_KHR 0x401C
|
||||
|
||||
/* cl_context_info */
|
||||
#define CL_CONTEXT_D3D11_DEVICE_KHR 0x401D
|
||||
#define CL_CONTEXT_D3D11_PREFER_SHARED_RESOURCES_KHR 0x402D
|
||||
|
||||
/* cl_mem_info */
|
||||
#define CL_MEM_D3D11_RESOURCE_KHR 0x401E
|
||||
|
||||
/* cl_image_info */
|
||||
#define CL_IMAGE_D3D11_SUBRESOURCE_KHR 0x401F
|
||||
|
||||
/* cl_command_type */
|
||||
#define CL_COMMAND_ACQUIRE_D3D11_OBJECTS_KHR 0x4020
|
||||
#define CL_COMMAND_RELEASE_D3D11_OBJECTS_KHR 0x4021
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetDeviceIDsFromD3D11KHR_fn)(
|
||||
cl_platform_id platform,
|
||||
cl_d3d11_device_source_khr d3d_device_source,
|
||||
void * d3d_object,
|
||||
cl_d3d11_device_set_khr d3d_device_set,
|
||||
cl_uint num_entries,
|
||||
cl_device_id * devices,
|
||||
cl_uint * num_devices) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D11BufferKHR_fn)(
|
||||
cl_context context,
|
||||
cl_mem_flags flags,
|
||||
ID3D11Buffer * resource,
|
||||
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D11Texture2DKHR_fn)(
|
||||
cl_context context,
|
||||
cl_mem_flags flags,
|
||||
ID3D11Texture2D * resource,
|
||||
UINT subresource,
|
||||
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D11Texture3DKHR_fn)(
|
||||
cl_context context,
|
||||
cl_mem_flags flags,
|
||||
ID3D11Texture3D * resource,
|
||||
UINT subresource,
|
||||
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireD3D11ObjectsKHR_fn)(
|
||||
cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem * mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event * event_wait_list,
|
||||
cl_event * event) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseD3D11ObjectsKHR_fn)(
|
||||
cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem * mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event * event_wait_list,
|
||||
cl_event * event) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __OPENCL_CL_D3D11_H */
|
||||
|
||||
132
third_party/opencl/include/CL/cl_dx9_media_sharing.h
vendored
132
third_party/opencl/include/CL/cl_dx9_media_sharing.h
vendored
@@ -1,132 +0,0 @@
|
||||
/**********************************************************************************
|
||||
* Copyright (c) 2008-2015 The Khronos Group Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and/or associated documentation files (the
|
||||
* "Materials"), to deal in the Materials without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Materials, and to
|
||||
* permit persons to whom the Materials are furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Materials.
|
||||
*
|
||||
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
|
||||
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
|
||||
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
|
||||
* https://www.khronos.org/registry/
|
||||
*
|
||||
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
|
||||
**********************************************************************************/
|
||||
|
||||
/* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */
|
||||
|
||||
#ifndef __OPENCL_CL_DX9_MEDIA_SHARING_H
|
||||
#define __OPENCL_CL_DX9_MEDIA_SHARING_H
|
||||
|
||||
#include <CL/cl.h>
|
||||
#include <CL/cl_platform.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/******************************************************************************/
|
||||
/* cl_khr_dx9_media_sharing */
|
||||
#define cl_khr_dx9_media_sharing 1
|
||||
|
||||
typedef cl_uint cl_dx9_media_adapter_type_khr;
|
||||
typedef cl_uint cl_dx9_media_adapter_set_khr;
|
||||
|
||||
#if defined(_WIN32)
|
||||
#include <d3d9.h>
|
||||
typedef struct _cl_dx9_surface_info_khr
|
||||
{
|
||||
IDirect3DSurface9 *resource;
|
||||
HANDLE shared_handle;
|
||||
} cl_dx9_surface_info_khr;
|
||||
#endif
|
||||
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
/* Error Codes */
|
||||
#define CL_INVALID_DX9_MEDIA_ADAPTER_KHR -1010
|
||||
#define CL_INVALID_DX9_MEDIA_SURFACE_KHR -1011
|
||||
#define CL_DX9_MEDIA_SURFACE_ALREADY_ACQUIRED_KHR -1012
|
||||
#define CL_DX9_MEDIA_SURFACE_NOT_ACQUIRED_KHR -1013
|
||||
|
||||
/* cl_media_adapter_type_khr */
|
||||
#define CL_ADAPTER_D3D9_KHR 0x2020
|
||||
#define CL_ADAPTER_D3D9EX_KHR 0x2021
|
||||
#define CL_ADAPTER_DXVA_KHR 0x2022
|
||||
|
||||
/* cl_media_adapter_set_khr */
|
||||
#define CL_PREFERRED_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR 0x2023
|
||||
#define CL_ALL_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR 0x2024
|
||||
|
||||
/* cl_context_info */
|
||||
#define CL_CONTEXT_ADAPTER_D3D9_KHR 0x2025
|
||||
#define CL_CONTEXT_ADAPTER_D3D9EX_KHR 0x2026
|
||||
#define CL_CONTEXT_ADAPTER_DXVA_KHR 0x2027
|
||||
|
||||
/* cl_mem_info */
|
||||
#define CL_MEM_DX9_MEDIA_ADAPTER_TYPE_KHR 0x2028
|
||||
#define CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR 0x2029
|
||||
|
||||
/* cl_image_info */
|
||||
#define CL_IMAGE_DX9_MEDIA_PLANE_KHR 0x202A
|
||||
|
||||
/* cl_command_type */
|
||||
#define CL_COMMAND_ACQUIRE_DX9_MEDIA_SURFACES_KHR 0x202B
|
||||
#define CL_COMMAND_RELEASE_DX9_MEDIA_SURFACES_KHR 0x202C
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetDeviceIDsFromDX9MediaAdapterKHR_fn)(
|
||||
cl_platform_id platform,
|
||||
cl_uint num_media_adapters,
|
||||
cl_dx9_media_adapter_type_khr * media_adapter_type,
|
||||
void * media_adapters,
|
||||
cl_dx9_media_adapter_set_khr media_adapter_set,
|
||||
cl_uint num_entries,
|
||||
cl_device_id * devices,
|
||||
cl_uint * num_devices) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromDX9MediaSurfaceKHR_fn)(
|
||||
cl_context context,
|
||||
cl_mem_flags flags,
|
||||
cl_dx9_media_adapter_type_khr adapter_type,
|
||||
void * surface_info,
|
||||
cl_uint plane,
|
||||
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireDX9MediaSurfacesKHR_fn)(
|
||||
cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem * mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event * event_wait_list,
|
||||
cl_event * event) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseDX9MediaSurfacesKHR_fn)(
|
||||
cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem * mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event * event_wait_list,
|
||||
cl_event * event) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __OPENCL_CL_DX9_MEDIA_SHARING_H */
|
||||
|
||||
136
third_party/opencl/include/CL/cl_egl.h
vendored
136
third_party/opencl/include/CL/cl_egl.h
vendored
@@ -1,136 +0,0 @@
|
||||
/*******************************************************************************
|
||||
* Copyright (c) 2008-2015 The Khronos Group Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and/or associated documentation files (the
|
||||
* "Materials"), to deal in the Materials without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Materials, and to
|
||||
* permit persons to whom the Materials are furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Materials.
|
||||
*
|
||||
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
|
||||
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
|
||||
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
|
||||
* https://www.khronos.org/registry/
|
||||
*
|
||||
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
|
||||
******************************************************************************/
|
||||
|
||||
#ifndef __OPENCL_CL_EGL_H
|
||||
#define __OPENCL_CL_EGL_H
|
||||
|
||||
#ifdef __APPLE__
|
||||
|
||||
#else
|
||||
#include <CL/cl.h>
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
/* Command type for events created with clEnqueueAcquireEGLObjectsKHR */
|
||||
#define CL_COMMAND_EGL_FENCE_SYNC_OBJECT_KHR 0x202F
|
||||
#define CL_COMMAND_ACQUIRE_EGL_OBJECTS_KHR 0x202D
|
||||
#define CL_COMMAND_RELEASE_EGL_OBJECTS_KHR 0x202E
|
||||
|
||||
/* Error type for clCreateFromEGLImageKHR */
|
||||
#define CL_INVALID_EGL_OBJECT_KHR -1093
|
||||
#define CL_EGL_RESOURCE_NOT_ACQUIRED_KHR -1092
|
||||
|
||||
/* CLeglImageKHR is an opaque handle to an EGLImage */
|
||||
typedef void* CLeglImageKHR;
|
||||
|
||||
/* CLeglDisplayKHR is an opaque handle to an EGLDisplay */
|
||||
typedef void* CLeglDisplayKHR;
|
||||
|
||||
/* CLeglSyncKHR is an opaque handle to an EGLSync object */
|
||||
typedef void* CLeglSyncKHR;
|
||||
|
||||
/* properties passed to clCreateFromEGLImageKHR */
|
||||
typedef intptr_t cl_egl_image_properties_khr;
|
||||
|
||||
|
||||
#define cl_khr_egl_image 1
|
||||
|
||||
extern CL_API_ENTRY cl_mem CL_API_CALL
|
||||
clCreateFromEGLImageKHR(cl_context /* context */,
|
||||
CLeglDisplayKHR /* egldisplay */,
|
||||
CLeglImageKHR /* eglimage */,
|
||||
cl_mem_flags /* flags */,
|
||||
const cl_egl_image_properties_khr * /* properties */,
|
||||
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromEGLImageKHR_fn)(
|
||||
cl_context context,
|
||||
CLeglDisplayKHR egldisplay,
|
||||
CLeglImageKHR eglimage,
|
||||
cl_mem_flags flags,
|
||||
const cl_egl_image_properties_khr * properties,
|
||||
cl_int * errcode_ret);
|
||||
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clEnqueueAcquireEGLObjectsKHR(cl_command_queue /* command_queue */,
|
||||
cl_uint /* num_objects */,
|
||||
const cl_mem * /* mem_objects */,
|
||||
cl_uint /* num_events_in_wait_list */,
|
||||
const cl_event * /* event_wait_list */,
|
||||
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireEGLObjectsKHR_fn)(
|
||||
cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem * mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event * event_wait_list,
|
||||
cl_event * event);
|
||||
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clEnqueueReleaseEGLObjectsKHR(cl_command_queue /* command_queue */,
|
||||
cl_uint /* num_objects */,
|
||||
const cl_mem * /* mem_objects */,
|
||||
cl_uint /* num_events_in_wait_list */,
|
||||
const cl_event * /* event_wait_list */,
|
||||
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseEGLObjectsKHR_fn)(
|
||||
cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem * mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event * event_wait_list,
|
||||
cl_event * event);
|
||||
|
||||
|
||||
#define cl_khr_egl_event 1
|
||||
|
||||
extern CL_API_ENTRY cl_event CL_API_CALL
|
||||
clCreateEventFromEGLSyncKHR(cl_context /* context */,
|
||||
CLeglSyncKHR /* sync */,
|
||||
CLeglDisplayKHR /* display */,
|
||||
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef CL_API_ENTRY cl_event (CL_API_CALL *clCreateEventFromEGLSyncKHR_fn)(
|
||||
cl_context context,
|
||||
CLeglSyncKHR sync,
|
||||
CLeglDisplayKHR display,
|
||||
cl_int * errcode_ret);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __OPENCL_CL_EGL_H */
|
||||
391
third_party/opencl/include/CL/cl_ext.h
vendored
391
third_party/opencl/include/CL/cl_ext.h
vendored
@@ -1,391 +0,0 @@
|
||||
/*******************************************************************************
|
||||
* Copyright (c) 2008-2015 The Khronos Group Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and/or associated documentation files (the
|
||||
* "Materials"), to deal in the Materials without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Materials, and to
|
||||
* permit persons to whom the Materials are furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Materials.
|
||||
*
|
||||
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
|
||||
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
|
||||
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
|
||||
* https://www.khronos.org/registry/
|
||||
*
|
||||
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
|
||||
******************************************************************************/
|
||||
|
||||
/* $Revision: 11928 $ on $Date: 2010-07-13 09:04:56 -0700 (Tue, 13 Jul 2010) $ */
|
||||
|
||||
/* cl_ext.h contains OpenCL extensions which don't have external */
|
||||
/* (OpenGL, D3D) dependencies. */
|
||||
|
||||
#ifndef __CL_EXT_H
|
||||
#define __CL_EXT_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#ifdef __APPLE__
|
||||
#include <OpenCL/cl.h>
|
||||
#include <AvailabilityMacros.h>
|
||||
#else
|
||||
#include <CL/cl.h>
|
||||
#endif
|
||||
|
||||
/* cl_khr_fp16 extension - no extension #define since it has no functions */
|
||||
#define CL_DEVICE_HALF_FP_CONFIG 0x1033
|
||||
|
||||
/* Memory object destruction
|
||||
*
|
||||
* Apple extension for use to manage externally allocated buffers used with cl_mem objects with CL_MEM_USE_HOST_PTR
|
||||
*
|
||||
* Registers a user callback function that will be called when the memory object is deleted and its resources
|
||||
* freed. Each call to clSetMemObjectCallbackFn registers the specified user callback function on a callback
|
||||
* stack associated with memobj. The registered user callback functions are called in the reverse order in
|
||||
* which they were registered. The user callback functions are called and then the memory object is deleted
|
||||
* and its resources freed. This provides a mechanism for the application (and libraries) using memobj to be
|
||||
* notified when the memory referenced by host_ptr, specified when the memory object is created and used as
|
||||
* the storage bits for the memory object, can be reused or freed.
|
||||
*
|
||||
* The application may not call CL api's with the cl_mem object passed to the pfn_notify.
|
||||
*
|
||||
* Please check for the "cl_APPLE_SetMemObjectDestructor" extension using clGetDeviceInfo(CL_DEVICE_EXTENSIONS)
|
||||
* before using.
|
||||
*/
|
||||
#define cl_APPLE_SetMemObjectDestructor 1
|
||||
cl_int CL_API_ENTRY clSetMemObjectDestructorAPPLE( cl_mem /* memobj */,
|
||||
void (* /*pfn_notify*/)( cl_mem /* memobj */, void* /*user_data*/),
|
||||
void * /*user_data */ ) CL_EXT_SUFFIX__VERSION_1_0;
|
||||
|
||||
|
||||
/* Context Logging Functions
|
||||
*
|
||||
* The next three convenience functions are intended to be used as the pfn_notify parameter to clCreateContext().
|
||||
* Please check for the "cl_APPLE_ContextLoggingFunctions" extension using clGetDeviceInfo(CL_DEVICE_EXTENSIONS)
|
||||
* before using.
|
||||
*
|
||||
* clLogMessagesToSystemLog fowards on all log messages to the Apple System Logger
|
||||
*/
|
||||
#define cl_APPLE_ContextLoggingFunctions 1
|
||||
extern void CL_API_ENTRY clLogMessagesToSystemLogAPPLE( const char * /* errstr */,
|
||||
const void * /* private_info */,
|
||||
size_t /* cb */,
|
||||
void * /* user_data */ ) CL_EXT_SUFFIX__VERSION_1_0;
|
||||
|
||||
/* clLogMessagesToStdout sends all log messages to the file descriptor stdout */
|
||||
extern void CL_API_ENTRY clLogMessagesToStdoutAPPLE( const char * /* errstr */,
|
||||
const void * /* private_info */,
|
||||
size_t /* cb */,
|
||||
void * /* user_data */ ) CL_EXT_SUFFIX__VERSION_1_0;
|
||||
|
||||
/* clLogMessagesToStderr sends all log messages to the file descriptor stderr */
|
||||
extern void CL_API_ENTRY clLogMessagesToStderrAPPLE( const char * /* errstr */,
|
||||
const void * /* private_info */,
|
||||
size_t /* cb */,
|
||||
void * /* user_data */ ) CL_EXT_SUFFIX__VERSION_1_0;
|
||||
|
||||
|
||||
/************************
|
||||
* cl_khr_icd extension *
|
||||
************************/
|
||||
#define cl_khr_icd 1
|
||||
|
||||
/* cl_platform_info */
|
||||
#define CL_PLATFORM_ICD_SUFFIX_KHR 0x0920
|
||||
|
||||
/* Additional Error Codes */
|
||||
#define CL_PLATFORM_NOT_FOUND_KHR -1001
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clIcdGetPlatformIDsKHR(cl_uint /* num_entries */,
|
||||
cl_platform_id * /* platforms */,
|
||||
cl_uint * /* num_platforms */);
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clIcdGetPlatformIDsKHR_fn)(
|
||||
cl_uint /* num_entries */,
|
||||
cl_platform_id * /* platforms */,
|
||||
cl_uint * /* num_platforms */);
|
||||
|
||||
|
||||
/* Extension: cl_khr_image2D_buffer
|
||||
*
|
||||
* This extension allows a 2D image to be created from a cl_mem buffer without a copy.
|
||||
* The type associated with a 2D image created from a buffer in an OpenCL program is image2d_t.
|
||||
* Both the sampler and sampler-less read_image built-in functions are supported for 2D images
|
||||
* and 2D images created from a buffer. Similarly, the write_image built-ins are also supported
|
||||
* for 2D images created from a buffer.
|
||||
*
|
||||
* When the 2D image from buffer is created, the client must specify the width,
|
||||
* height, image format (i.e. channel order and channel data type) and optionally the row pitch
|
||||
*
|
||||
* The pitch specified must be a multiple of CL_DEVICE_IMAGE_PITCH_ALIGNMENT pixels.
|
||||
* The base address of the buffer must be aligned to CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT pixels.
|
||||
*/
|
||||
|
||||
/*************************************
|
||||
* cl_khr_initalize_memory extension *
|
||||
*************************************/
|
||||
|
||||
#define CL_CONTEXT_MEMORY_INITIALIZE_KHR 0x2030
|
||||
|
||||
|
||||
/**************************************
|
||||
* cl_khr_terminate_context extension *
|
||||
**************************************/
|
||||
|
||||
#define CL_DEVICE_TERMINATE_CAPABILITY_KHR 0x2031
|
||||
#define CL_CONTEXT_TERMINATE_KHR 0x2032
|
||||
|
||||
#define cl_khr_terminate_context 1
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL clTerminateContextKHR(cl_context /* context */) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clTerminateContextKHR_fn)(cl_context /* context */) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
|
||||
/*
|
||||
* Extension: cl_khr_spir
|
||||
*
|
||||
* This extension adds support to create an OpenCL program object from a
|
||||
* Standard Portable Intermediate Representation (SPIR) instance
|
||||
*/
|
||||
|
||||
#define CL_DEVICE_SPIR_VERSIONS 0x40E0
|
||||
#define CL_PROGRAM_BINARY_TYPE_INTERMEDIATE 0x40E1
|
||||
|
||||
|
||||
/******************************************
|
||||
* cl_nv_device_attribute_query extension *
|
||||
******************************************/
|
||||
/* cl_nv_device_attribute_query extension - no extension #define since it has no functions */
|
||||
#define CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV 0x4000
|
||||
#define CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV 0x4001
|
||||
#define CL_DEVICE_REGISTERS_PER_BLOCK_NV 0x4002
|
||||
#define CL_DEVICE_WARP_SIZE_NV 0x4003
|
||||
#define CL_DEVICE_GPU_OVERLAP_NV 0x4004
|
||||
#define CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV 0x4005
|
||||
#define CL_DEVICE_INTEGRATED_MEMORY_NV 0x4006
|
||||
|
||||
/*********************************
|
||||
* cl_amd_device_attribute_query *
|
||||
*********************************/
|
||||
#define CL_DEVICE_PROFILING_TIMER_OFFSET_AMD 0x4036
|
||||
|
||||
/*********************************
|
||||
* cl_arm_printf extension
|
||||
*********************************/
|
||||
#define CL_PRINTF_CALLBACK_ARM 0x40B0
|
||||
#define CL_PRINTF_BUFFERSIZE_ARM 0x40B1
|
||||
|
||||
#ifdef CL_VERSION_1_1
|
||||
/***********************************
|
||||
* cl_ext_device_fission extension *
|
||||
***********************************/
|
||||
#define cl_ext_device_fission 1
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clReleaseDeviceEXT( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1;
|
||||
|
||||
typedef CL_API_ENTRY cl_int
|
||||
(CL_API_CALL *clReleaseDeviceEXT_fn)( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clRetainDeviceEXT( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1;
|
||||
|
||||
typedef CL_API_ENTRY cl_int
|
||||
(CL_API_CALL *clRetainDeviceEXT_fn)( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1;
|
||||
|
||||
typedef cl_ulong cl_device_partition_property_ext;
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clCreateSubDevicesEXT( cl_device_id /*in_device*/,
|
||||
const cl_device_partition_property_ext * /* properties */,
|
||||
cl_uint /*num_entries*/,
|
||||
cl_device_id * /*out_devices*/,
|
||||
cl_uint * /*num_devices*/ ) CL_EXT_SUFFIX__VERSION_1_1;
|
||||
|
||||
typedef CL_API_ENTRY cl_int
|
||||
( CL_API_CALL * clCreateSubDevicesEXT_fn)( cl_device_id /*in_device*/,
|
||||
const cl_device_partition_property_ext * /* properties */,
|
||||
cl_uint /*num_entries*/,
|
||||
cl_device_id * /*out_devices*/,
|
||||
cl_uint * /*num_devices*/ ) CL_EXT_SUFFIX__VERSION_1_1;
|
||||
|
||||
/* cl_device_partition_property_ext */
|
||||
#define CL_DEVICE_PARTITION_EQUALLY_EXT 0x4050
|
||||
#define CL_DEVICE_PARTITION_BY_COUNTS_EXT 0x4051
|
||||
#define CL_DEVICE_PARTITION_BY_NAMES_EXT 0x4052
|
||||
#define CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN_EXT 0x4053
|
||||
|
||||
/* clDeviceGetInfo selectors */
|
||||
#define CL_DEVICE_PARENT_DEVICE_EXT 0x4054
|
||||
#define CL_DEVICE_PARTITION_TYPES_EXT 0x4055
|
||||
#define CL_DEVICE_AFFINITY_DOMAINS_EXT 0x4056
|
||||
#define CL_DEVICE_REFERENCE_COUNT_EXT 0x4057
|
||||
#define CL_DEVICE_PARTITION_STYLE_EXT 0x4058
|
||||
|
||||
/* error codes */
|
||||
#define CL_DEVICE_PARTITION_FAILED_EXT -1057
|
||||
#define CL_INVALID_PARTITION_COUNT_EXT -1058
|
||||
#define CL_INVALID_PARTITION_NAME_EXT -1059
|
||||
|
||||
/* CL_AFFINITY_DOMAINs */
|
||||
#define CL_AFFINITY_DOMAIN_L1_CACHE_EXT 0x1
|
||||
#define CL_AFFINITY_DOMAIN_L2_CACHE_EXT 0x2
|
||||
#define CL_AFFINITY_DOMAIN_L3_CACHE_EXT 0x3
|
||||
#define CL_AFFINITY_DOMAIN_L4_CACHE_EXT 0x4
|
||||
#define CL_AFFINITY_DOMAIN_NUMA_EXT 0x10
|
||||
#define CL_AFFINITY_DOMAIN_NEXT_FISSIONABLE_EXT 0x100
|
||||
|
||||
/* cl_device_partition_property_ext list terminators */
|
||||
#define CL_PROPERTIES_LIST_END_EXT ((cl_device_partition_property_ext) 0)
|
||||
#define CL_PARTITION_BY_COUNTS_LIST_END_EXT ((cl_device_partition_property_ext) 0)
|
||||
#define CL_PARTITION_BY_NAMES_LIST_END_EXT ((cl_device_partition_property_ext) 0 - 1)
|
||||
|
||||
/*********************************
|
||||
* cl_qcom_ext_host_ptr extension
|
||||
*********************************/
|
||||
|
||||
#define CL_MEM_EXT_HOST_PTR_QCOM (1 << 29)
|
||||
|
||||
#define CL_DEVICE_EXT_MEM_PADDING_IN_BYTES_QCOM 0x40A0
|
||||
#define CL_DEVICE_PAGE_SIZE_QCOM 0x40A1
|
||||
#define CL_IMAGE_ROW_ALIGNMENT_QCOM 0x40A2
|
||||
#define CL_IMAGE_SLICE_ALIGNMENT_QCOM 0x40A3
|
||||
#define CL_MEM_HOST_UNCACHED_QCOM 0x40A4
|
||||
#define CL_MEM_HOST_WRITEBACK_QCOM 0x40A5
|
||||
#define CL_MEM_HOST_WRITETHROUGH_QCOM 0x40A6
|
||||
#define CL_MEM_HOST_WRITE_COMBINING_QCOM 0x40A7
|
||||
|
||||
typedef cl_uint cl_image_pitch_info_qcom;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clGetDeviceImageInfoQCOM(cl_device_id device,
|
||||
size_t image_width,
|
||||
size_t image_height,
|
||||
const cl_image_format *image_format,
|
||||
cl_image_pitch_info_qcom param_name,
|
||||
size_t param_value_size,
|
||||
void *param_value,
|
||||
size_t *param_value_size_ret);
|
||||
|
||||
typedef struct _cl_mem_ext_host_ptr
|
||||
{
|
||||
/* Type of external memory allocation. */
|
||||
/* Legal values will be defined in layered extensions. */
|
||||
cl_uint allocation_type;
|
||||
|
||||
/* Host cache policy for this external memory allocation. */
|
||||
cl_uint host_cache_policy;
|
||||
|
||||
} cl_mem_ext_host_ptr;
|
||||
|
||||
/*********************************
|
||||
* cl_qcom_ion_host_ptr extension
|
||||
*********************************/
|
||||
|
||||
#define CL_MEM_ION_HOST_PTR_QCOM 0x40A8
|
||||
|
||||
typedef struct _cl_mem_ion_host_ptr
|
||||
{
|
||||
/* Type of external memory allocation. */
|
||||
/* Must be CL_MEM_ION_HOST_PTR_QCOM for ION allocations. */
|
||||
cl_mem_ext_host_ptr ext_host_ptr;
|
||||
|
||||
/* ION file descriptor */
|
||||
int ion_filedesc;
|
||||
|
||||
/* Host pointer to the ION allocated memory */
|
||||
void* ion_hostptr;
|
||||
|
||||
} cl_mem_ion_host_ptr;
|
||||
|
||||
#endif /* CL_VERSION_1_1 */
|
||||
|
||||
|
||||
#ifdef CL_VERSION_2_0
|
||||
/*********************************
|
||||
* cl_khr_sub_groups extension
|
||||
*********************************/
|
||||
#define cl_khr_sub_groups 1
|
||||
|
||||
typedef cl_uint cl_kernel_sub_group_info_khr;
|
||||
|
||||
/* cl_khr_sub_group_info */
|
||||
#define CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR 0x2033
|
||||
#define CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR 0x2034
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clGetKernelSubGroupInfoKHR(cl_kernel /* in_kernel */,
|
||||
cl_device_id /*in_device*/,
|
||||
cl_kernel_sub_group_info_khr /* param_name */,
|
||||
size_t /*input_value_size*/,
|
||||
const void * /*input_value*/,
|
||||
size_t /*param_value_size*/,
|
||||
void* /*param_value*/,
|
||||
size_t* /*param_value_size_ret*/ ) CL_EXT_SUFFIX__VERSION_2_0_DEPRECATED;
|
||||
|
||||
typedef CL_API_ENTRY cl_int
|
||||
( CL_API_CALL * clGetKernelSubGroupInfoKHR_fn)(cl_kernel /* in_kernel */,
|
||||
cl_device_id /*in_device*/,
|
||||
cl_kernel_sub_group_info_khr /* param_name */,
|
||||
size_t /*input_value_size*/,
|
||||
const void * /*input_value*/,
|
||||
size_t /*param_value_size*/,
|
||||
void* /*param_value*/,
|
||||
size_t* /*param_value_size_ret*/ ) CL_EXT_SUFFIX__VERSION_2_0_DEPRECATED;
|
||||
#endif /* CL_VERSION_2_0 */
|
||||
|
||||
#ifdef CL_VERSION_2_1
|
||||
/*********************************
|
||||
* cl_khr_priority_hints extension
|
||||
*********************************/
|
||||
#define cl_khr_priority_hints 1
|
||||
|
||||
typedef cl_uint cl_queue_priority_khr;
|
||||
|
||||
/* cl_command_queue_properties */
|
||||
#define CL_QUEUE_PRIORITY_KHR 0x1096
|
||||
|
||||
/* cl_queue_priority_khr */
|
||||
#define CL_QUEUE_PRIORITY_HIGH_KHR (1<<0)
|
||||
#define CL_QUEUE_PRIORITY_MED_KHR (1<<1)
|
||||
#define CL_QUEUE_PRIORITY_LOW_KHR (1<<2)
|
||||
|
||||
#endif /* CL_VERSION_2_1 */
|
||||
|
||||
#ifdef CL_VERSION_2_1
|
||||
/*********************************
|
||||
* cl_khr_throttle_hints extension
|
||||
*********************************/
|
||||
#define cl_khr_throttle_hints 1
|
||||
|
||||
typedef cl_uint cl_queue_throttle_khr;
|
||||
|
||||
/* cl_command_queue_properties */
|
||||
#define CL_QUEUE_THROTTLE_KHR 0x1097
|
||||
|
||||
/* cl_queue_throttle_khr */
|
||||
#define CL_QUEUE_THROTTLE_HIGH_KHR (1<<0)
|
||||
#define CL_QUEUE_THROTTLE_MED_KHR (1<<1)
|
||||
#define CL_QUEUE_THROTTLE_LOW_KHR (1<<2)
|
||||
|
||||
#endif /* CL_VERSION_2_1 */
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#endif /* __CL_EXT_H */
|
||||
255
third_party/opencl/include/CL/cl_ext_qcom.h
vendored
255
third_party/opencl/include/CL/cl_ext_qcom.h
vendored
@@ -1,255 +0,0 @@
|
||||
/* Copyright (c) 2009-2017 Qualcomm Technologies, Inc. All Rights Reserved.
|
||||
* Qualcomm Technologies Proprietary and Confidential.
|
||||
*/
|
||||
|
||||
#ifndef __OPENCL_CL_EXT_QCOM_H
|
||||
#define __OPENCL_CL_EXT_QCOM_H
|
||||
|
||||
// Needed by cl_khr_egl_event extension
|
||||
#include <EGL/egl.h>
|
||||
#include <EGL/eglext.h>
|
||||
#include <CL/cl_ext.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
/************************************
|
||||
* cl_qcom_create_buffer_from_image *
|
||||
************************************/
|
||||
|
||||
#define CL_BUFFER_FROM_IMAGE_ROW_PITCH_QCOM 0x40C0
|
||||
#define CL_BUFFER_FROM_IMAGE_SLICE_PITCH_QCOM 0x40C1
|
||||
|
||||
extern CL_API_ENTRY cl_mem CL_API_CALL
|
||||
clCreateBufferFromImageQCOM(cl_mem image,
|
||||
cl_mem_flags flags,
|
||||
cl_int *errcode_ret);
|
||||
|
||||
|
||||
/************************************
|
||||
* cl_qcom_limited_printf extension *
|
||||
************************************/
|
||||
|
||||
/* Builtin printf function buffer size in bytes. */
|
||||
#define CL_DEVICE_PRINTF_BUFFER_SIZE_QCOM 0x1049
|
||||
|
||||
|
||||
/*************************************
|
||||
* cl_qcom_extended_images extension *
|
||||
*************************************/
|
||||
|
||||
#define CL_CONTEXT_ENABLE_EXTENDED_IMAGES_QCOM 0x40AA
|
||||
#define CL_DEVICE_EXTENDED_IMAGE2D_MAX_WIDTH_QCOM 0x40AB
|
||||
#define CL_DEVICE_EXTENDED_IMAGE2D_MAX_HEIGHT_QCOM 0x40AC
|
||||
#define CL_DEVICE_EXTENDED_IMAGE3D_MAX_WIDTH_QCOM 0x40AD
|
||||
#define CL_DEVICE_EXTENDED_IMAGE3D_MAX_HEIGHT_QCOM 0x40AE
|
||||
#define CL_DEVICE_EXTENDED_IMAGE3D_MAX_DEPTH_QCOM 0x40AF
|
||||
|
||||
/*************************************
|
||||
* cl_qcom_perf_hint extension *
|
||||
*************************************/
|
||||
|
||||
typedef cl_uint cl_perf_hint;
|
||||
|
||||
#define CL_CONTEXT_PERF_HINT_QCOM 0x40C2
|
||||
|
||||
/*cl_perf_hint*/
|
||||
#define CL_PERF_HINT_HIGH_QCOM 0x40C3
|
||||
#define CL_PERF_HINT_NORMAL_QCOM 0x40C4
|
||||
#define CL_PERF_HINT_LOW_QCOM 0x40C5
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clSetPerfHintQCOM(cl_context context,
|
||||
cl_perf_hint perf_hint);
|
||||
|
||||
// This extension is published at Khronos, so its definitions are made in cl_ext.h.
|
||||
// This duplication is for backward compatibility.
|
||||
|
||||
#ifndef CL_MEM_ANDROID_NATIVE_BUFFER_HOST_PTR_QCOM
|
||||
|
||||
/*********************************
|
||||
* cl_qcom_android_native_buffer_host_ptr extension
|
||||
*********************************/
|
||||
|
||||
#define CL_MEM_ANDROID_NATIVE_BUFFER_HOST_PTR_QCOM 0x40C6
|
||||
|
||||
|
||||
typedef struct _cl_mem_android_native_buffer_host_ptr
|
||||
{
|
||||
// Type of external memory allocation.
|
||||
// Must be CL_MEM_ANDROID_NATIVE_BUFFER_HOST_PTR_QCOM for Android native buffers.
|
||||
cl_mem_ext_host_ptr ext_host_ptr;
|
||||
|
||||
// Virtual pointer to the android native buffer
|
||||
void* anb_ptr;
|
||||
|
||||
} cl_mem_android_native_buffer_host_ptr;
|
||||
|
||||
#endif //#ifndef CL_MEM_ANDROID_NATIVE_BUFFER_HOST_PTR_QCOM
|
||||
|
||||
/***********************************
|
||||
* cl_img_egl_image extension *
|
||||
************************************/
|
||||
typedef void* CLeglImageIMG;
|
||||
typedef void* CLeglDisplayIMG;
|
||||
|
||||
extern CL_API_ENTRY cl_mem CL_API_CALL
|
||||
clCreateFromEGLImageIMG(cl_context context,
|
||||
cl_mem_flags flags,
|
||||
CLeglImageIMG image,
|
||||
CLeglDisplayIMG display,
|
||||
cl_int *errcode_ret);
|
||||
|
||||
|
||||
/*********************************
|
||||
* cl_qcom_other_image extension
|
||||
*********************************/
|
||||
|
||||
// Extended flag for creating/querying QCOM non-standard images
|
||||
#define CL_MEM_OTHER_IMAGE_QCOM (1<<25)
|
||||
|
||||
// cl_channel_type
|
||||
#define CL_QCOM_UNORM_MIPI10 0x4159
|
||||
#define CL_QCOM_UNORM_MIPI12 0x415A
|
||||
#define CL_QCOM_UNSIGNED_MIPI10 0x415B
|
||||
#define CL_QCOM_UNSIGNED_MIPI12 0x415C
|
||||
#define CL_QCOM_UNORM_INT10 0x415D
|
||||
#define CL_QCOM_UNORM_INT12 0x415E
|
||||
#define CL_QCOM_UNSIGNED_INT16 0x415F
|
||||
|
||||
// cl_channel_order
|
||||
// Dedicate 0x4130-0x415F range for QCOM extended image formats
|
||||
// 0x4130 - 0x4132 range is assigned to pixel-oriented compressed format
|
||||
#define CL_QCOM_BAYER 0x414E
|
||||
|
||||
#define CL_QCOM_NV12 0x4133
|
||||
#define CL_QCOM_NV12_Y 0x4134
|
||||
#define CL_QCOM_NV12_UV 0x4135
|
||||
|
||||
#define CL_QCOM_TILED_NV12 0x4136
|
||||
#define CL_QCOM_TILED_NV12_Y 0x4137
|
||||
#define CL_QCOM_TILED_NV12_UV 0x4138
|
||||
|
||||
#define CL_QCOM_P010 0x413C
|
||||
#define CL_QCOM_P010_Y 0x413D
|
||||
#define CL_QCOM_P010_UV 0x413E
|
||||
|
||||
#define CL_QCOM_TILED_P010 0x413F
|
||||
#define CL_QCOM_TILED_P010_Y 0x4140
|
||||
#define CL_QCOM_TILED_P010_UV 0x4141
|
||||
|
||||
|
||||
#define CL_QCOM_TP10 0x4145
|
||||
#define CL_QCOM_TP10_Y 0x4146
|
||||
#define CL_QCOM_TP10_UV 0x4147
|
||||
|
||||
#define CL_QCOM_TILED_TP10 0x4148
|
||||
#define CL_QCOM_TILED_TP10_Y 0x4149
|
||||
#define CL_QCOM_TILED_TP10_UV 0x414A
|
||||
|
||||
/*********************************
|
||||
* cl_qcom_compressed_image extension
|
||||
*********************************/
|
||||
|
||||
// Extended flag for creating/querying QCOM non-planar compressed images
|
||||
#define CL_MEM_COMPRESSED_IMAGE_QCOM (1<<27)
|
||||
|
||||
// Extended image format
|
||||
// cl_channel_order
|
||||
#define CL_QCOM_COMPRESSED_RGBA 0x4130
|
||||
#define CL_QCOM_COMPRESSED_RGBx 0x4131
|
||||
|
||||
#define CL_QCOM_COMPRESSED_NV12_Y 0x413A
|
||||
#define CL_QCOM_COMPRESSED_NV12_UV 0x413B
|
||||
|
||||
#define CL_QCOM_COMPRESSED_P010 0x4142
|
||||
#define CL_QCOM_COMPRESSED_P010_Y 0x4143
|
||||
#define CL_QCOM_COMPRESSED_P010_UV 0x4144
|
||||
|
||||
#define CL_QCOM_COMPRESSED_TP10 0x414B
|
||||
#define CL_QCOM_COMPRESSED_TP10_Y 0x414C
|
||||
#define CL_QCOM_COMPRESSED_TP10_UV 0x414D
|
||||
|
||||
#define CL_QCOM_COMPRESSED_NV12_4R 0x414F
|
||||
#define CL_QCOM_COMPRESSED_NV12_4R_Y 0x4150
|
||||
#define CL_QCOM_COMPRESSED_NV12_4R_UV 0x4151
|
||||
/*********************************
|
||||
* cl_qcom_compressed_yuv_image_read extension
|
||||
*********************************/
|
||||
|
||||
// Extended flag for creating/querying QCOM compressed images
|
||||
#define CL_MEM_COMPRESSED_YUV_IMAGE_QCOM (1<<28)
|
||||
|
||||
// Extended image format
|
||||
#define CL_QCOM_COMPRESSED_NV12 0x10C4
|
||||
|
||||
// Extended flag for setting ION buffer allocation type
|
||||
#define CL_MEM_ION_HOST_PTR_COMPRESSED_YUV_QCOM 0x40CD
|
||||
#define CL_MEM_ION_HOST_PTR_PROTECTED_COMPRESSED_YUV_QCOM 0x40CE
|
||||
|
||||
/*********************************
|
||||
* cl_qcom_accelerated_image_ops
|
||||
*********************************/
|
||||
#define CL_MEM_OBJECT_WEIGHT_IMAGE_QCOM 0x4110
|
||||
#define CL_DEVICE_HOF_MAX_NUM_PHASES_QCOM 0x4111
|
||||
#define CL_DEVICE_HOF_MAX_FILTER_SIZE_X_QCOM 0x4112
|
||||
#define CL_DEVICE_HOF_MAX_FILTER_SIZE_Y_QCOM 0x4113
|
||||
#define CL_DEVICE_BLOCK_MATCHING_MAX_REGION_SIZE_X_QCOM 0x4114
|
||||
#define CL_DEVICE_BLOCK_MATCHING_MAX_REGION_SIZE_Y_QCOM 0x4115
|
||||
|
||||
//Extended flag for specifying weight image type
|
||||
#define CL_WEIGHT_IMAGE_SEPARABLE_QCOM (1<<0)
|
||||
|
||||
// Box Filter
|
||||
typedef struct _cl_box_filter_size_qcom
|
||||
{
|
||||
// Width of box filter on X direction.
|
||||
float box_filter_width;
|
||||
|
||||
// Height of box filter on Y direction.
|
||||
float box_filter_height;
|
||||
} cl_box_filter_size_qcom;
|
||||
|
||||
// HOF Weight Image Desc
|
||||
typedef struct _cl_weight_desc_qcom
|
||||
{
|
||||
/** Coordinate of the "center" point of the weight image,
|
||||
based on the weight image's top-left corner as the origin. */
|
||||
size_t center_coord_x;
|
||||
size_t center_coord_y;
|
||||
cl_bitfield flags;
|
||||
} cl_weight_desc_qcom;
|
||||
|
||||
typedef struct _cl_weight_image_desc_qcom
|
||||
{
|
||||
cl_image_desc image_desc;
|
||||
cl_weight_desc_qcom weight_desc;
|
||||
} cl_weight_image_desc_qcom;
|
||||
|
||||
/*************************************
|
||||
* cl_qcom_protected_context extension *
|
||||
*************************************/
|
||||
|
||||
#define CL_CONTEXT_PROTECTED_QCOM 0x40C7
|
||||
#define CL_MEM_ION_HOST_PTR_PROTECTED_QCOM 0x40C8
|
||||
|
||||
/*************************************
|
||||
* cl_qcom_priority_hint extension *
|
||||
*************************************/
|
||||
#define CL_PRIORITY_HINT_NONE_QCOM 0
|
||||
typedef cl_uint cl_priority_hint;
|
||||
|
||||
#define CL_CONTEXT_PRIORITY_HINT_QCOM 0x40C9
|
||||
|
||||
/*cl_priority_hint*/
|
||||
#define CL_PRIORITY_HINT_HIGH_QCOM 0x40CA
|
||||
#define CL_PRIORITY_HINT_NORMAL_QCOM 0x40CB
|
||||
#define CL_PRIORITY_HINT_LOW_QCOM 0x40CC
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __OPENCL_CL_EXT_QCOM_H */
|
||||
167
third_party/opencl/include/CL/cl_gl.h
vendored
167
third_party/opencl/include/CL/cl_gl.h
vendored
@@ -1,167 +0,0 @@
|
||||
/**********************************************************************************
|
||||
* Copyright (c) 2008-2015 The Khronos Group Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and/or associated documentation files (the
|
||||
* "Materials"), to deal in the Materials without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Materials, and to
|
||||
* permit persons to whom the Materials are furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Materials.
|
||||
*
|
||||
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
|
||||
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
|
||||
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
|
||||
* https://www.khronos.org/registry/
|
||||
*
|
||||
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
|
||||
**********************************************************************************/
|
||||
|
||||
#ifndef __OPENCL_CL_GL_H
|
||||
#define __OPENCL_CL_GL_H
|
||||
|
||||
#ifdef __APPLE__
|
||||
#include <OpenCL/cl.h>
|
||||
#else
|
||||
#include <CL/cl.h>
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef cl_uint cl_gl_object_type;
|
||||
typedef cl_uint cl_gl_texture_info;
|
||||
typedef cl_uint cl_gl_platform_info;
|
||||
typedef struct __GLsync *cl_GLsync;
|
||||
|
||||
/* cl_gl_object_type = 0x2000 - 0x200F enum values are currently taken */
|
||||
#define CL_GL_OBJECT_BUFFER 0x2000
|
||||
#define CL_GL_OBJECT_TEXTURE2D 0x2001
|
||||
#define CL_GL_OBJECT_TEXTURE3D 0x2002
|
||||
#define CL_GL_OBJECT_RENDERBUFFER 0x2003
|
||||
#define CL_GL_OBJECT_TEXTURE2D_ARRAY 0x200E
|
||||
#define CL_GL_OBJECT_TEXTURE1D 0x200F
|
||||
#define CL_GL_OBJECT_TEXTURE1D_ARRAY 0x2010
|
||||
#define CL_GL_OBJECT_TEXTURE_BUFFER 0x2011
|
||||
|
||||
/* cl_gl_texture_info */
|
||||
#define CL_GL_TEXTURE_TARGET 0x2004
|
||||
#define CL_GL_MIPMAP_LEVEL 0x2005
|
||||
#define CL_GL_NUM_SAMPLES 0x2012
|
||||
|
||||
|
||||
extern CL_API_ENTRY cl_mem CL_API_CALL
|
||||
clCreateFromGLBuffer(cl_context /* context */,
|
||||
cl_mem_flags /* flags */,
|
||||
cl_GLuint /* bufobj */,
|
||||
int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
extern CL_API_ENTRY cl_mem CL_API_CALL
|
||||
clCreateFromGLTexture(cl_context /* context */,
|
||||
cl_mem_flags /* flags */,
|
||||
cl_GLenum /* target */,
|
||||
cl_GLint /* miplevel */,
|
||||
cl_GLuint /* texture */,
|
||||
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
extern CL_API_ENTRY cl_mem CL_API_CALL
|
||||
clCreateFromGLRenderbuffer(cl_context /* context */,
|
||||
cl_mem_flags /* flags */,
|
||||
cl_GLuint /* renderbuffer */,
|
||||
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clGetGLObjectInfo(cl_mem /* memobj */,
|
||||
cl_gl_object_type * /* gl_object_type */,
|
||||
cl_GLuint * /* gl_object_name */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clGetGLTextureInfo(cl_mem /* memobj */,
|
||||
cl_gl_texture_info /* param_name */,
|
||||
size_t /* param_value_size */,
|
||||
void * /* param_value */,
|
||||
size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clEnqueueAcquireGLObjects(cl_command_queue /* command_queue */,
|
||||
cl_uint /* num_objects */,
|
||||
const cl_mem * /* mem_objects */,
|
||||
cl_uint /* num_events_in_wait_list */,
|
||||
const cl_event * /* event_wait_list */,
|
||||
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clEnqueueReleaseGLObjects(cl_command_queue /* command_queue */,
|
||||
cl_uint /* num_objects */,
|
||||
const cl_mem * /* mem_objects */,
|
||||
cl_uint /* num_events_in_wait_list */,
|
||||
const cl_event * /* event_wait_list */,
|
||||
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
|
||||
/* Deprecated OpenCL 1.1 APIs */
|
||||
extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL
|
||||
clCreateFromGLTexture2D(cl_context /* context */,
|
||||
cl_mem_flags /* flags */,
|
||||
cl_GLenum /* target */,
|
||||
cl_GLint /* miplevel */,
|
||||
cl_GLuint /* texture */,
|
||||
cl_int * /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
|
||||
|
||||
extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL
|
||||
clCreateFromGLTexture3D(cl_context /* context */,
|
||||
cl_mem_flags /* flags */,
|
||||
cl_GLenum /* target */,
|
||||
cl_GLint /* miplevel */,
|
||||
cl_GLuint /* texture */,
|
||||
cl_int * /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
|
||||
|
||||
/* cl_khr_gl_sharing extension */
|
||||
|
||||
#define cl_khr_gl_sharing 1
|
||||
|
||||
typedef cl_uint cl_gl_context_info;
|
||||
|
||||
/* Additional Error Codes */
|
||||
#define CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR -1000
|
||||
|
||||
/* cl_gl_context_info */
|
||||
#define CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR 0x2006
|
||||
#define CL_DEVICES_FOR_GL_CONTEXT_KHR 0x2007
|
||||
|
||||
/* Additional cl_context_properties */
|
||||
#define CL_GL_CONTEXT_KHR 0x2008
|
||||
#define CL_EGL_DISPLAY_KHR 0x2009
|
||||
#define CL_GLX_DISPLAY_KHR 0x200A
|
||||
#define CL_WGL_HDC_KHR 0x200B
|
||||
#define CL_CGL_SHAREGROUP_KHR 0x200C
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clGetGLContextInfoKHR(const cl_context_properties * /* properties */,
|
||||
cl_gl_context_info /* param_name */,
|
||||
size_t /* param_value_size */,
|
||||
void * /* param_value */,
|
||||
size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetGLContextInfoKHR_fn)(
|
||||
const cl_context_properties * properties,
|
||||
cl_gl_context_info param_name,
|
||||
size_t param_value_size,
|
||||
void * param_value,
|
||||
size_t * param_value_size_ret);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __OPENCL_CL_GL_H */
|
||||
74
third_party/opencl/include/CL/cl_gl_ext.h
vendored
74
third_party/opencl/include/CL/cl_gl_ext.h
vendored
@@ -1,74 +0,0 @@
|
||||
/**********************************************************************************
|
||||
* Copyright (c) 2008-2015 The Khronos Group Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and/or associated documentation files (the
|
||||
* "Materials"), to deal in the Materials without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Materials, and to
|
||||
* permit persons to whom the Materials are furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Materials.
|
||||
*
|
||||
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
|
||||
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
|
||||
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
|
||||
* https://www.khronos.org/registry/
|
||||
*
|
||||
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
|
||||
**********************************************************************************/
|
||||
|
||||
/* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */
|
||||
|
||||
/* cl_gl_ext.h contains vendor (non-KHR) OpenCL extensions which have */
|
||||
/* OpenGL dependencies. */
|
||||
|
||||
#ifndef __OPENCL_CL_GL_EXT_H
|
||||
#define __OPENCL_CL_GL_EXT_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#ifdef __APPLE__
|
||||
#include <OpenCL/cl_gl.h>
|
||||
#else
|
||||
#include <CL/cl_gl.h>
|
||||
#endif
|
||||
|
||||
/*
|
||||
* For each extension, follow this template
|
||||
* cl_VEN_extname extension */
|
||||
/* #define cl_VEN_extname 1
|
||||
* ... define new types, if any
|
||||
* ... define new tokens, if any
|
||||
* ... define new APIs, if any
|
||||
*
|
||||
* If you need GLtypes here, mirror them with a cl_GLtype, rather than including a GL header
|
||||
* This allows us to avoid having to decide whether to include GL headers or GLES here.
|
||||
*/
|
||||
|
||||
/*
|
||||
* cl_khr_gl_event extension
|
||||
* See section 9.9 in the OpenCL 1.1 spec for more information
|
||||
*/
|
||||
#define CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR 0x200D
|
||||
|
||||
extern CL_API_ENTRY cl_event CL_API_CALL
|
||||
clCreateEventFromGLsyncKHR(cl_context /* context */,
|
||||
cl_GLsync /* cl_GLsync */,
|
||||
cl_int * /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __OPENCL_CL_GL_EXT_H */
|
||||
1333
third_party/opencl/include/CL/cl_platform.h
vendored
1333
third_party/opencl/include/CL/cl_platform.h
vendored
File diff suppressed because it is too large
Load Diff
59
third_party/opencl/include/CL/opencl.h
vendored
59
third_party/opencl/include/CL/opencl.h
vendored
@@ -1,59 +0,0 @@
|
||||
/*******************************************************************************
|
||||
* Copyright (c) 2008-2015 The Khronos Group Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and/or associated documentation files (the
|
||||
* "Materials"), to deal in the Materials without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Materials, and to
|
||||
* permit persons to whom the Materials are furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Materials.
|
||||
*
|
||||
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
|
||||
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
|
||||
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
|
||||
* https://www.khronos.org/registry/
|
||||
*
|
||||
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
|
||||
******************************************************************************/
|
||||
|
||||
/* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */
|
||||
|
||||
#ifndef __OPENCL_H
|
||||
#define __OPENCL_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#ifdef __APPLE__
|
||||
|
||||
#include <OpenCL/cl.h>
|
||||
#include <OpenCL/cl_gl.h>
|
||||
#include <OpenCL/cl_gl_ext.h>
|
||||
#include <OpenCL/cl_ext.h>
|
||||
|
||||
#else
|
||||
|
||||
#include <CL/cl.h>
|
||||
#include <CL/cl_gl.h>
|
||||
#include <CL/cl_gl_ext.h>
|
||||
#include <CL/cl_ext.h>
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __OPENCL_H */
|
||||
|
||||
@@ -57,11 +57,9 @@ base_frameworks = qt_env['FRAMEWORKS']
|
||||
base_libs = [common, messaging, cereal, visionipc, 'm', 'ssl', 'crypto', 'pthread'] + qt_env["LIBS"]
|
||||
|
||||
if arch == "Darwin":
|
||||
base_frameworks.append('OpenCL')
|
||||
base_frameworks.append('QtCharts')
|
||||
base_frameworks.append('QtSerialBus')
|
||||
else:
|
||||
base_libs.append('OpenCL')
|
||||
base_libs.append('Qt5Charts')
|
||||
base_libs.append('Qt5SerialBus')
|
||||
|
||||
|
||||
@@ -58,9 +58,6 @@ function install_ubuntu_common_requirements() {
|
||||
libzmq3-dev \
|
||||
libzstd-dev \
|
||||
libsqlite3-dev \
|
||||
opencl-headers \
|
||||
ocl-icd-libopencl1 \
|
||||
ocl-icd-opencl-dev \
|
||||
portaudio19-dev \
|
||||
qttools5-dev-tools \
|
||||
libqt5svg5-dev \
|
||||
|
||||
@@ -6,11 +6,6 @@ replay_env['CCFLAGS'] += ['-Wno-deprecated-declarations']
|
||||
base_frameworks = []
|
||||
base_libs = [common, messaging, cereal, visionipc, 'm', 'ssl', 'crypto', 'pthread']
|
||||
|
||||
if arch == "Darwin":
|
||||
base_frameworks.append('OpenCL')
|
||||
else:
|
||||
base_libs.append('OpenCL')
|
||||
|
||||
replay_lib_src = ["replay.cc", "consoleui.cc", "camera.cc", "filereader.cc", "logreader.cc", "framereader.cc",
|
||||
"route.cc", "util.cc", "seg_mgr.cc", "timeline.cc", "api.cc"]
|
||||
if arch != "Darwin":
|
||||
|
||||
@@ -10,10 +10,6 @@ What's needed:
|
||||
|
||||
## Setup openpilot
|
||||
- Follow [this readme](../README.md) to install and build the requirements
|
||||
- Install OpenCL Driver (Ubuntu)
|
||||
```
|
||||
sudo apt install pocl-opencl-icd
|
||||
```
|
||||
|
||||
## Connect the hardware
|
||||
- Connect the camera first
|
||||
|
||||
Reference in New Issue
Block a user