From 96fc1d6582d17407bf5006419644ae278026924a Mon Sep 17 00:00:00 2001 From: Pablo Tello Date: Tue, 17 Jul 2018 17:10:59 +0100 Subject: COMPMID-1394: Fixed OCLGrind failures ColorConvert Change-Id: Ibf688e68205eac2dc07ce88c96498cc66c153cee Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/140440 Reviewed-by: Anthony Barbier Tested-by: Jenkins --- src/core/CL/cl_kernels/color_convert.cl | 96 ++++++++++++++-------------- src/core/CL/kernels/CLColorConvertKernel.cpp | 10 ++- 2 files changed, 56 insertions(+), 50 deletions(-) (limited to 'src/core') diff --git a/src/core/CL/cl_kernels/color_convert.cl b/src/core/CL/cl_kernels/color_convert.cl index 72c74930b0..02a0c8ee2a 100644 --- a/src/core/CL/cl_kernels/color_convert.cl +++ b/src/core/CL/cl_kernels/color_convert.cl @@ -149,9 +149,9 @@ __kernel void UYVY422_to_RGB888_bt709( f_g += lumav; f_b += lumav; - uchar8 r_0 = convert_uchar8_rtz(f_r); - uchar8 g_0 = convert_uchar8_rtz(f_g); - uchar8 b_0 = convert_uchar8_rtz(f_b); + uchar8 r_0 = convert_uchar8_sat_rtz(f_r); + uchar8 g_0 = convert_uchar8_sat_rtz(f_g); + uchar8 b_0 = convert_uchar8_sat_rtz(f_b); uchar16 rgb_0 = (uchar16)(r_0.s0, g_0.s0, b_0.s0, r_0.s1, g_0.s1, b_0.s1, r_0.s2, g_0.s2, b_0.s2, r_0.s3, g_0.s3, b_0.s3, r_0.s4, g_0.s4, b_0.s4, r_0.s5); @@ -207,9 +207,9 @@ __kernel void UYVY422_to_RGBA8888_bt709( f_g += lumav; f_b += lumav; - uchar8 r_0 = convert_uchar8_rtz(f_r); - uchar8 g_0 = convert_uchar8_rtz(f_g); - uchar8 b_0 = convert_uchar8_rtz(f_b); + uchar8 r_0 = convert_uchar8_sat_rtz(f_r); + uchar8 g_0 = convert_uchar8_sat_rtz(f_g); + uchar8 b_0 = convert_uchar8_sat_rtz(f_b); uchar16 rgba_0 = (uchar16)(r_0.s0, g_0.s0, b_0.s0, 255, r_0.s1, g_0.s1, b_0.s1, 255, r_0.s2, g_0.s2, b_0.s2, 255, r_0.s3, g_0.s3, b_0.s3, 255); @@ -266,9 +266,9 @@ __kernel void YUYV422_to_RGB888_bt709( f_g += lumav; f_b += lumav; - uchar8 r_0 = convert_uchar8_rtz(f_r); - uchar8 g_0 = convert_uchar8_rtz(f_g); - uchar8 b_0 = convert_uchar8_rtz(f_b); + uchar8 r_0 = convert_uchar8_sat_rtz(f_r); + uchar8 g_0 = convert_uchar8_sat_rtz(f_g); + uchar8 b_0 = convert_uchar8_sat_rtz(f_b); uchar16 rgb_0 = (uchar16)(r_0.s0, g_0.s0, b_0.s0, r_0.s1, g_0.s1, b_0.s1, r_0.s2, g_0.s2, b_0.s2, r_0.s3, g_0.s3, b_0.s3, r_0.s4, g_0.s4, b_0.s4, r_0.s5); @@ -324,9 +324,9 @@ __kernel void YUYV422_to_RGBA8888_bt709( f_g += lumav; f_b += lumav; - uchar8 r_0 = convert_uchar8_rtz(f_r); - uchar8 g_0 = convert_uchar8_rtz(f_g); - uchar8 b_0 = convert_uchar8_rtz(f_b); + uchar8 r_0 = convert_uchar8_sat_rtz(f_r); + uchar8 g_0 = convert_uchar8_sat_rtz(f_g); + uchar8 b_0 = convert_uchar8_sat_rtz(f_b); uchar16 rgba_0 = (uchar16)(r_0.s0, g_0.s0, b_0.s0, 255, r_0.s1, g_0.s1, b_0.s1, 255, r_0.s2, g_0.s2, b_0.s2, 255, r_0.s3, g_0.s3, b_0.s3, 255); @@ -471,9 +471,9 @@ __kernel void NV12_to_RGB888_bt709( float4 f_g = convert_float4(luma_0) + temp1; float4 f_b = convert_float4(luma_0) + temp2; - uchar4 r_0 = convert_uchar4_rtz(f_r); - uchar4 g_0 = convert_uchar4_rtz(f_g); - uchar4 b_0 = convert_uchar4_rtz(f_b); + uchar4 r_0 = convert_uchar4_sat_rtz(f_r); + uchar4 g_0 = convert_uchar4_sat_rtz(f_g); + uchar4 b_0 = convert_uchar4_sat_rtz(f_b); uchar8 rgb_0 = (uchar8)(r_0.s0, g_0.s0, b_0.s0, r_0.s1, g_0.s1, b_0.s1, r_0.s2, g_0.s2); uchar4 rgb_1 = (uchar4)(b_0.s2, r_0.s3, g_0.s3, b_0.s3); @@ -484,9 +484,9 @@ __kernel void NV12_to_RGB888_bt709( f_g = convert_float4(luma_1) + temp1; f_b = convert_float4(luma_1) + temp2; - r_0 = convert_uchar4_rtz(f_r); - g_0 = convert_uchar4_rtz(f_g); - b_0 = convert_uchar4_rtz(f_b); + r_0 = convert_uchar4_sat_rtz(f_r); + g_0 = convert_uchar4_sat_rtz(f_g); + b_0 = convert_uchar4_sat_rtz(f_b); rgb_0 = (uchar8)(r_0.s0, g_0.s0, b_0.s0, r_0.s1, g_0.s1, b_0.s1, r_0.s2, g_0.s2); rgb_1 = (uchar4)(b_0.s2, r_0.s3, g_0.s3, b_0.s3); @@ -927,9 +927,9 @@ __kernel void NV12_to_RGBA8888_bt709( float4 f_g = convert_float4(luma_0) + temp1; float4 f_b = convert_float4(luma_0) + temp2; - uchar4 r_0 = convert_uchar4_rtz(f_r); - uchar4 g_0 = convert_uchar4_rtz(f_g); - uchar4 b_0 = convert_uchar4_rtz(f_b); + uchar4 r_0 = convert_uchar4_sat_rtz(f_r); + uchar4 g_0 = convert_uchar4_sat_rtz(f_g); + uchar4 b_0 = convert_uchar4_sat_rtz(f_b); uchar8 rgb_0 = (uchar8)(r_0.s0, g_0.s0, b_0.s0, 255, r_0.s1, g_0.s1, b_0.s1, 255); uchar8 rgb_1 = (uchar8)(r_0.s2, g_0.s2, b_0.s2, 255, r_0.s3, g_0.s3, b_0.s3, 255); @@ -940,9 +940,9 @@ __kernel void NV12_to_RGBA8888_bt709( f_g = convert_float4(luma_1) + temp1; f_b = convert_float4(luma_1) + temp2; - r_0 = convert_uchar4_rtz(f_r); - g_0 = convert_uchar4_rtz(f_g); - b_0 = convert_uchar4_rtz(f_b); + r_0 = convert_uchar4_sat_rtz(f_r); + g_0 = convert_uchar4_sat_rtz(f_g); + b_0 = convert_uchar4_sat_rtz(f_b); rgb_0 = (uchar8)(r_0.s0, g_0.s0, b_0.s0, 255, r_0.s1, g_0.s1, b_0.s1, 255); rgb_1 = (uchar8)(r_0.s2, g_0.s2, b_0.s2, 255, r_0.s3, g_0.s3, b_0.s3, 255); @@ -1126,9 +1126,9 @@ __kernel void NV21_to_RGB888_bt709( float4 f_g = convert_float4(luma_0) + temp1; float4 f_b = convert_float4(luma_0) + temp2; - uchar4 r_0 = convert_uchar4_rtz(f_r); - uchar4 g_0 = convert_uchar4_rtz(f_g); - uchar4 b_0 = convert_uchar4_rtz(f_b); + uchar4 r_0 = convert_uchar4_sat_rtz(f_r); + uchar4 g_0 = convert_uchar4_sat_rtz(f_g); + uchar4 b_0 = convert_uchar4_sat_rtz(f_b); uchar8 rgb_0 = (uchar8)(r_0.s0, g_0.s0, b_0.s0, r_0.s1, g_0.s1, b_0.s1, r_0.s2, g_0.s2); uchar4 rgb_1 = (uchar4)(b_0.s2, r_0.s3, g_0.s3, b_0.s3); @@ -1139,9 +1139,9 @@ __kernel void NV21_to_RGB888_bt709( f_g = convert_float4(luma_1) + temp1; f_b = convert_float4(luma_1) + temp2; - r_0 = convert_uchar4_rtz(f_r); - g_0 = convert_uchar4_rtz(f_g); - b_0 = convert_uchar4_rtz(f_b); + r_0 = convert_uchar4_sat_rtz(f_r); + g_0 = convert_uchar4_sat_rtz(f_g); + b_0 = convert_uchar4_sat_rtz(f_b); rgb_0 = (uchar8)(r_0.s0, g_0.s0, b_0.s0, r_0.s1, g_0.s1, b_0.s1, r_0.s2, g_0.s2); rgb_1 = (uchar4)(b_0.s2, r_0.s3, g_0.s3, b_0.s3); @@ -1197,9 +1197,9 @@ __kernel void NV21_to_RGBA8888_bt709( float4 f_g = convert_float4(luma_0) + temp1; float4 f_b = convert_float4(luma_0) + temp2; - uchar4 r_0 = convert_uchar4_rtz(f_r); - uchar4 g_0 = convert_uchar4_rtz(f_g); - uchar4 b_0 = convert_uchar4_rtz(f_b); + uchar4 r_0 = convert_uchar4_sat_rtz(f_r); + uchar4 g_0 = convert_uchar4_sat_rtz(f_g); + uchar4 b_0 = convert_uchar4_sat_rtz(f_b); uchar8 rgb_0 = (uchar8)(r_0.s0, g_0.s0, b_0.s0, 255, r_0.s1, g_0.s1, b_0.s1, 255); uchar8 rgb_1 = (uchar8)(r_0.s2, g_0.s2, b_0.s2, 255, r_0.s3, g_0.s3, b_0.s3, 255); @@ -1210,9 +1210,9 @@ __kernel void NV21_to_RGBA8888_bt709( f_g = convert_float4(luma_1) + temp1; f_b = convert_float4(luma_1) + temp2; - r_0 = convert_uchar4_rtz(f_r); - g_0 = convert_uchar4_rtz(f_g); - b_0 = convert_uchar4_rtz(f_b); + r_0 = convert_uchar4_sat_rtz(f_r); + g_0 = convert_uchar4_sat_rtz(f_g); + b_0 = convert_uchar4_sat_rtz(f_b); rgb_0 = (uchar8)(r_0.s0, g_0.s0, b_0.s0, 255, r_0.s1, g_0.s1, b_0.s1, 255); rgb_1 = (uchar8)(r_0.s2, g_0.s2, b_0.s2, 255, r_0.s3, g_0.s3, b_0.s3, 255); @@ -1525,9 +1525,9 @@ __kernel void IYUV_to_RGB888_bt709( float4 f_g = convert_float4(luma_0) + temp1; float4 f_b = convert_float4(luma_0) + temp2; - uchar4 r_0 = convert_uchar4_rtz(f_r); - uchar4 g_0 = convert_uchar4_rtz(f_g); - uchar4 b_0 = convert_uchar4_rtz(f_b); + uchar4 r_0 = convert_uchar4_sat_rtz(f_r); + uchar4 g_0 = convert_uchar4_sat_rtz(f_g); + uchar4 b_0 = convert_uchar4_sat_rtz(f_b); uchar8 rgb_0 = (uchar8)(r_0.s0, g_0.s0, b_0.s0, r_0.s1, g_0.s1, b_0.s1, r_0.s2, g_0.s2); uchar4 rgb_1 = (uchar4)(b_0.s2, r_0.s3, g_0.s3, b_0.s3); @@ -1538,9 +1538,9 @@ __kernel void IYUV_to_RGB888_bt709( f_g = convert_float4(luma_1) + temp1; f_b = convert_float4(luma_1) + temp2; - r_0 = convert_uchar4_rtz(f_r); - g_0 = convert_uchar4_rtz(f_g); - b_0 = convert_uchar4_rtz(f_b); + r_0 = convert_uchar4_sat_rtz(f_r); + g_0 = convert_uchar4_sat_rtz(f_g); + b_0 = convert_uchar4_sat_rtz(f_b); rgb_0 = (uchar8)(r_0.s0, g_0.s0, b_0.s0, r_0.s1, g_0.s1, b_0.s1, r_0.s2, g_0.s2); rgb_1 = (uchar4)(b_0.s2, r_0.s3, g_0.s3, b_0.s3); @@ -1604,9 +1604,9 @@ __kernel void IYUV_to_RGBA8888_bt709( float4 f_g = convert_float4(luma_0) + temp1; float4 f_b = convert_float4(luma_0) + temp2; - uchar4 r_0 = convert_uchar4_rtz(f_r); - uchar4 g_0 = convert_uchar4_rtz(f_g); - uchar4 b_0 = convert_uchar4_rtz(f_b); + uchar4 r_0 = convert_uchar4_sat_rtz(f_r); + uchar4 g_0 = convert_uchar4_sat_rtz(f_g); + uchar4 b_0 = convert_uchar4_sat_rtz(f_b); uchar8 rgb_0 = (uchar8)(r_0.s0, g_0.s0, b_0.s0, 255, r_0.s1, g_0.s1, b_0.s1, 255); uchar8 rgb_1 = (uchar8)(r_0.s2, g_0.s2, b_0.s2, 255, r_0.s3, g_0.s3, b_0.s3, 255); @@ -1617,9 +1617,9 @@ __kernel void IYUV_to_RGBA8888_bt709( f_g = convert_float4(luma_1) + temp1; f_b = convert_float4(luma_1) + temp2; - r_0 = convert_uchar4_rtz(f_r); - g_0 = convert_uchar4_rtz(f_g); - b_0 = convert_uchar4_rtz(f_b); + r_0 = convert_uchar4_sat_rtz(f_r); + g_0 = convert_uchar4_sat_rtz(f_g); + b_0 = convert_uchar4_sat_rtz(f_b); rgb_0 = (uchar8)(r_0.s0, g_0.s0, b_0.s0, 255, r_0.s1, g_0.s1, b_0.s1, 255); rgb_1 = (uchar8)(r_0.s2, g_0.s2, b_0.s2, 255, r_0.s3, g_0.s3, b_0.s3, 255); diff --git a/src/core/CL/kernels/CLColorConvertKernel.cpp b/src/core/CL/kernels/CLColorConvertKernel.cpp index 52d9be9418..2b894989e1 100644 --- a/src/core/CL/kernels/CLColorConvertKernel.cpp +++ b/src/core/CL/kernels/CLColorConvertKernel.cpp @@ -198,6 +198,7 @@ void CLColorConvertKernel::configure(const ICLImage *input, ICLMultiImage *outpu ARM_COMPUTE_ERROR_ON(output == nullptr); unsigned int num_elems_processed_per_iteration = 0; + unsigned int num_elems_read_per_iteration_x = 0; bool has_two_planes = (output->info()->format() == Format::NV12) || (output->info()->format() == Format::NV21); float sub_sampling = (has_two_planes || (output->info()->format() == Format::IYUV)) ? 0.5f : 1; @@ -212,9 +213,11 @@ void CLColorConvertKernel::configure(const ICLImage *input, ICLMultiImage *outpu case Format::NV12: case Format::IYUV: num_elems_processed_per_iteration = 2; + num_elems_read_per_iteration_x = 8; break; case Format::YUV444: num_elems_processed_per_iteration = 4; + num_elems_read_per_iteration_x = 16; break; default: break; @@ -229,6 +232,7 @@ void CLColorConvertKernel::configure(const ICLImage *input, ICLMultiImage *outpu case Format::NV12: case Format::IYUV: num_elems_processed_per_iteration = 8; + num_elems_read_per_iteration_x = 8; break; default: break; @@ -238,6 +242,7 @@ void CLColorConvertKernel::configure(const ICLImage *input, ICLMultiImage *outpu default: break; } + ARM_COMPUTE_ERROR_ON_MSG(num_elems_processed_per_iteration == 0, "Conversion from %s to %s not supported", string_from_format(input->info()->format()).c_str(), string_from_format(output->info()->format()).c_str()); @@ -248,7 +253,6 @@ void CLColorConvertKernel::configure(const ICLImage *input, ICLMultiImage *outpu kernel_name << "_to_"; kernel_name << string_from_format(output->info()->format()); kernel_name << "_bt709"; - _input = input; _multi_output = output; @@ -267,8 +271,10 @@ void CLColorConvertKernel::configure(const ICLImage *input, ICLMultiImage *outpu AccessWindowRectangle output_plane2_access(has_two_planes ? nullptr : output->plane(2)->info(), 0, 0, num_elems_processed_per_iteration, 1, sub_sampling, sub_sampling); + AccessWindowHorizontal input_access(input->info(), 0, num_elems_read_per_iteration_x); + update_window_and_padding(win, - AccessWindowHorizontal(input->info(), 0, num_elems_processed_per_iteration), + input_access, output_plane0_access, output_plane1_access, output_plane2_access); -- cgit v1.2.1