mirror of
https://github.com/firestar5683/StarPilot.git
synced 2026-07-04 21:12:07 +08:00
Model and YUV pipeline to uint8 (#33671)
* Squash * 78cec5a0-577b-49ac-b443-f7cd327649bd/400 * bump tinygrad
This commit is contained in:
@@ -7,28 +7,28 @@
|
||||
#include "common/clutil.h"
|
||||
|
||||
ModelFrame::ModelFrame(cl_device_id device_id, cl_context context) {
|
||||
input_frames = std::make_unique<float[]>(buf_size);
|
||||
input_frames = std::make_unique<uint8_t[]>(buf_size);
|
||||
|
||||
q = CL_CHECK_ERR(clCreateCommandQueue(context, device_id, 0, &err));
|
||||
y_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, MODEL_WIDTH * MODEL_HEIGHT, NULL, &err));
|
||||
u_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, (MODEL_WIDTH / 2) * (MODEL_HEIGHT / 2), NULL, &err));
|
||||
v_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, (MODEL_WIDTH / 2) * (MODEL_HEIGHT / 2), NULL, &err));
|
||||
net_input_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, MODEL_FRAME_SIZE * sizeof(float), NULL, &err));
|
||||
net_input_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, MODEL_FRAME_SIZE * sizeof(uint8_t), NULL, &err));
|
||||
|
||||
transform_init(&transform, context, device_id);
|
||||
loadyuv_init(&loadyuv, context, device_id, MODEL_WIDTH, MODEL_HEIGHT);
|
||||
}
|
||||
|
||||
float* ModelFrame::prepare(cl_mem yuv_cl, int frame_width, int frame_height, int frame_stride, int frame_uv_offset, const mat3 &projection, cl_mem *output) {
|
||||
uint8_t* ModelFrame::prepare(cl_mem yuv_cl, int frame_width, int frame_height, int frame_stride, int frame_uv_offset, const mat3 &projection, cl_mem *output) {
|
||||
transform_queue(&this->transform, q,
|
||||
yuv_cl, frame_width, frame_height, frame_stride, frame_uv_offset,
|
||||
y_cl, u_cl, v_cl, MODEL_WIDTH, MODEL_HEIGHT, projection);
|
||||
yuv_cl, frame_width, frame_height, frame_stride, frame_uv_offset,
|
||||
y_cl, u_cl, v_cl, MODEL_WIDTH, MODEL_HEIGHT, projection);
|
||||
|
||||
if (output == NULL) {
|
||||
loadyuv_queue(&loadyuv, q, y_cl, u_cl, v_cl, net_input_cl);
|
||||
|
||||
std::memmove(&input_frames[0], &input_frames[MODEL_FRAME_SIZE], sizeof(float) * MODEL_FRAME_SIZE);
|
||||
CL_CHECK(clEnqueueReadBuffer(q, net_input_cl, CL_TRUE, 0, MODEL_FRAME_SIZE * sizeof(float), &input_frames[MODEL_FRAME_SIZE], 0, nullptr, nullptr));
|
||||
std::memmove(&input_frames[0], &input_frames[MODEL_FRAME_SIZE], sizeof(uint8_t) * MODEL_FRAME_SIZE);
|
||||
CL_CHECK(clEnqueueReadBuffer(q, net_input_cl, CL_TRUE, 0, MODEL_FRAME_SIZE * sizeof(uint8_t), &input_frames[MODEL_FRAME_SIZE], 0, nullptr, nullptr));
|
||||
clFinish(q);
|
||||
return &input_frames[0];
|
||||
} else {
|
||||
|
||||
@@ -20,7 +20,7 @@ class ModelFrame {
|
||||
public:
|
||||
ModelFrame(cl_device_id device_id, cl_context context);
|
||||
~ModelFrame();
|
||||
float* prepare(cl_mem yuv_cl, int width, int height, int frame_stride, int frame_uv_offset, const mat3& transform, cl_mem *output);
|
||||
uint8_t* prepare(cl_mem yuv_cl, int width, int height, int frame_stride, int frame_uv_offset, const mat3& transform, cl_mem *output);
|
||||
|
||||
const int MODEL_WIDTH = 512;
|
||||
const int MODEL_HEIGHT = 256;
|
||||
@@ -32,5 +32,5 @@ private:
|
||||
LoadYUVState loadyuv;
|
||||
cl_command_queue q;
|
||||
cl_mem y_cl, u_cl, v_cl, net_input_cl;
|
||||
std::unique_ptr<float[]> input_frames;
|
||||
std::unique_ptr<uint8_t[]> input_frames;
|
||||
};
|
||||
|
||||
@@ -15,4 +15,4 @@ cdef extern from "selfdrive/modeld/models/commonmodel.h":
|
||||
cppclass ModelFrame:
|
||||
int buf_size
|
||||
ModelFrame(cl_device_id, cl_context)
|
||||
float * prepare(cl_mem, int, int, int, int, mat3, cl_mem*)
|
||||
unsigned char * prepare(cl_mem, int, int, int, int, mat3, cl_mem*)
|
||||
|
||||
@@ -35,11 +35,11 @@ cdef class ModelFrame:
|
||||
def prepare(self, VisionBuf buf, float[:] projection, CLMem output):
|
||||
cdef mat3 cprojection
|
||||
memcpy(cprojection.v, &projection[0], 9*sizeof(float))
|
||||
cdef float * data
|
||||
cdef unsigned char * data
|
||||
if output is None:
|
||||
data = self.frame.prepare(buf.buf.buf_cl, buf.width, buf.height, buf.stride, buf.uv_offset, cprojection, NULL)
|
||||
else:
|
||||
data = self.frame.prepare(buf.buf.buf_cl, buf.width, buf.height, buf.stride, buf.uv_offset, cprojection, output.mem)
|
||||
if not data:
|
||||
return None
|
||||
return np.asarray(<cnp.float32_t[:self.frame.buf_size]> data)
|
||||
return np.asarray(<cnp.uint8_t[:self.frame.buf_size]> data)
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:fb2018c74cdd9e5cb070ec7bed7f8581fabd55e39057d0a03aaffd2e42408154
|
||||
size 62486347
|
||||
oid sha256:47417a13d8a8d6af6a1562834eee538e3b43242c6e277ab6f1978a78a6785b7d
|
||||
size 62486469
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#define UV_SIZE ((TRANSFORMED_WIDTH/2)*(TRANSFORMED_HEIGHT/2))
|
||||
|
||||
__kernel void loadys(__global uchar8 const * const Y,
|
||||
__global float * out,
|
||||
__global uchar * out,
|
||||
int out_offset)
|
||||
{
|
||||
const int gid = get_global_id(0);
|
||||
@@ -10,13 +10,12 @@ __kernel void loadys(__global uchar8 const * const Y,
|
||||
const int ox = ois % TRANSFORMED_WIDTH;
|
||||
|
||||
const uchar8 ys = Y[gid];
|
||||
const float8 ysf = convert_float8(ys);
|
||||
|
||||
// 02
|
||||
// 13
|
||||
|
||||
__global float* outy0;
|
||||
__global float* outy1;
|
||||
__global uchar* outy0;
|
||||
__global uchar* outy1;
|
||||
if ((oy & 1) == 0) {
|
||||
outy0 = out + out_offset; //y0
|
||||
outy1 = out + out_offset + UV_SIZE*2; //y2
|
||||
@@ -25,21 +24,20 @@ __kernel void loadys(__global uchar8 const * const Y,
|
||||
outy1 = out + out_offset + UV_SIZE*3; //y3
|
||||
}
|
||||
|
||||
vstore4(ysf.s0246, 0, outy0 + (oy/2) * (TRANSFORMED_WIDTH/2) + ox/2);
|
||||
vstore4(ysf.s1357, 0, outy1 + (oy/2) * (TRANSFORMED_WIDTH/2) + ox/2);
|
||||
vstore4(ys.s0246, 0, outy0 + (oy/2) * (TRANSFORMED_WIDTH/2) + ox/2);
|
||||
vstore4(ys.s1357, 0, outy1 + (oy/2) * (TRANSFORMED_WIDTH/2) + ox/2);
|
||||
}
|
||||
|
||||
__kernel void loaduv(__global uchar8 const * const in,
|
||||
__global float8 * out,
|
||||
__global uchar8 * out,
|
||||
int out_offset)
|
||||
{
|
||||
const int gid = get_global_id(0);
|
||||
const uchar8 inv = in[gid];
|
||||
const float8 outv = convert_float8(inv);
|
||||
out[gid + out_offset / 8] = outv;
|
||||
out[gid + out_offset / 8] = inv;
|
||||
}
|
||||
|
||||
__kernel void copy(__global float8 * inout,
|
||||
__kernel void copy(__global uchar8 * inout,
|
||||
int in_offset)
|
||||
{
|
||||
const int gid = get_global_id(0);
|
||||
|
||||
+1
-1
Submodule tinygrad_repo updated: 3e15fa0dae...9dda6d260d
Reference in New Issue
Block a user