From 96fc1d6582d17407bf5006419644ae278026924a Mon Sep 17 00:00:00 2001 From: Pablo Tello Date: Tue, 17 Jul 2018 17:10:59 +0100 Subject: COMPMID-1394: Fixed OCLGrind failures ColorConvert Change-Id: Ibf688e68205eac2dc07ce88c96498cc66c153cee Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/140440 Reviewed-by: Anthony Barbier Tested-by: Jenkins --- src/core/CL/cl_kernels/color_convert.cl | 96 +++++++++--------- src/core/CL/kernels/CLColorConvertKernel.cpp | 10 +- tests/validation/reference/ColorConvertHelper.h | 126 +++++++++--------------- 3 files changed, 105 insertions(+), 127 deletions(-) diff --git a/src/core/CL/cl_kernels/color_convert.cl b/src/core/CL/cl_kernels/color_convert.cl index 72c74930b0..02a0c8ee2a 100644 --- a/src/core/CL/cl_kernels/color_convert.cl +++ b/src/core/CL/cl_kernels/color_convert.cl @@ -149,9 +149,9 @@ __kernel void UYVY422_to_RGB888_bt709( f_g += lumav; f_b += lumav; - uchar8 r_0 = convert_uchar8_rtz(f_r); - uchar8 g_0 = convert_uchar8_rtz(f_g); - uchar8 b_0 = convert_uchar8_rtz(f_b); + uchar8 r_0 = convert_uchar8_sat_rtz(f_r); + uchar8 g_0 = convert_uchar8_sat_rtz(f_g); + uchar8 b_0 = convert_uchar8_sat_rtz(f_b); uchar16 rgb_0 = (uchar16)(r_0.s0, g_0.s0, b_0.s0, r_0.s1, g_0.s1, b_0.s1, r_0.s2, g_0.s2, b_0.s2, r_0.s3, g_0.s3, b_0.s3, r_0.s4, g_0.s4, b_0.s4, r_0.s5); @@ -207,9 +207,9 @@ __kernel void UYVY422_to_RGBA8888_bt709( f_g += lumav; f_b += lumav; - uchar8 r_0 = convert_uchar8_rtz(f_r); - uchar8 g_0 = convert_uchar8_rtz(f_g); - uchar8 b_0 = convert_uchar8_rtz(f_b); + uchar8 r_0 = convert_uchar8_sat_rtz(f_r); + uchar8 g_0 = convert_uchar8_sat_rtz(f_g); + uchar8 b_0 = convert_uchar8_sat_rtz(f_b); uchar16 rgba_0 = (uchar16)(r_0.s0, g_0.s0, b_0.s0, 255, r_0.s1, g_0.s1, b_0.s1, 255, r_0.s2, g_0.s2, b_0.s2, 255, r_0.s3, g_0.s3, b_0.s3, 255); @@ -266,9 +266,9 @@ __kernel void YUYV422_to_RGB888_bt709( f_g += lumav; f_b += lumav; - uchar8 r_0 = convert_uchar8_rtz(f_r); - uchar8 g_0 = convert_uchar8_rtz(f_g); - uchar8 b_0 = convert_uchar8_rtz(f_b); + uchar8 r_0 = convert_uchar8_sat_rtz(f_r); + uchar8 g_0 = convert_uchar8_sat_rtz(f_g); + uchar8 b_0 = convert_uchar8_sat_rtz(f_b); uchar16 rgb_0 = (uchar16)(r_0.s0, g_0.s0, b_0.s0, r_0.s1, g_0.s1, b_0.s1, r_0.s2, g_0.s2, b_0.s2, r_0.s3, g_0.s3, b_0.s3, r_0.s4, g_0.s4, b_0.s4, r_0.s5); @@ -324,9 +324,9 @@ __kernel void YUYV422_to_RGBA8888_bt709( f_g += lumav; f_b += lumav; - uchar8 r_0 = convert_uchar8_rtz(f_r); - uchar8 g_0 = convert_uchar8_rtz(f_g); - uchar8 b_0 = convert_uchar8_rtz(f_b); + uchar8 r_0 = convert_uchar8_sat_rtz(f_r); + uchar8 g_0 = convert_uchar8_sat_rtz(f_g); + uchar8 b_0 = convert_uchar8_sat_rtz(f_b); uchar16 rgba_0 = (uchar16)(r_0.s0, g_0.s0, b_0.s0, 255, r_0.s1, g_0.s1, b_0.s1, 255, r_0.s2, g_0.s2, b_0.s2, 255, r_0.s3, g_0.s3, b_0.s3, 255); @@ -471,9 +471,9 @@ __kernel void NV12_to_RGB888_bt709( float4 f_g = convert_float4(luma_0) + temp1; float4 f_b = convert_float4(luma_0) + temp2; - uchar4 r_0 = convert_uchar4_rtz(f_r); - uchar4 g_0 = convert_uchar4_rtz(f_g); - uchar4 b_0 = convert_uchar4_rtz(f_b); + uchar4 r_0 = convert_uchar4_sat_rtz(f_r); + uchar4 g_0 = convert_uchar4_sat_rtz(f_g); + uchar4 b_0 = convert_uchar4_sat_rtz(f_b); uchar8 rgb_0 = (uchar8)(r_0.s0, g_0.s0, b_0.s0, r_0.s1, g_0.s1, b_0.s1, r_0.s2, g_0.s2); uchar4 rgb_1 = (uchar4)(b_0.s2, r_0.s3, g_0.s3, b_0.s3); @@ -484,9 +484,9 @@ __kernel void NV12_to_RGB888_bt709( f_g = convert_float4(luma_1) + temp1; f_b = convert_float4(luma_1) + temp2; - r_0 = convert_uchar4_rtz(f_r); - g_0 = convert_uchar4_rtz(f_g); - b_0 = convert_uchar4_rtz(f_b); + r_0 = convert_uchar4_sat_rtz(f_r); + g_0 = convert_uchar4_sat_rtz(f_g); + b_0 = convert_uchar4_sat_rtz(f_b); rgb_0 = (uchar8)(r_0.s0, g_0.s0, b_0.s0, r_0.s1, g_0.s1, b_0.s1, r_0.s2, g_0.s2); rgb_1 = (uchar4)(b_0.s2, r_0.s3, g_0.s3, b_0.s3); @@ -927,9 +927,9 @@ __kernel void NV12_to_RGBA8888_bt709( float4 f_g = convert_float4(luma_0) + temp1; float4 f_b = convert_float4(luma_0) + temp2; - uchar4 r_0 = convert_uchar4_rtz(f_r); - uchar4 g_0 = convert_uchar4_rtz(f_g); - uchar4 b_0 = convert_uchar4_rtz(f_b); + uchar4 r_0 = convert_uchar4_sat_rtz(f_r); + uchar4 g_0 = convert_uchar4_sat_rtz(f_g); + uchar4 b_0 = convert_uchar4_sat_rtz(f_b); uchar8 rgb_0 = (uchar8)(r_0.s0, g_0.s0, b_0.s0, 255, r_0.s1, g_0.s1, b_0.s1, 255); uchar8 rgb_1 = (uchar8)(r_0.s2, g_0.s2, b_0.s2, 255, r_0.s3, g_0.s3, b_0.s3, 255); @@ -940,9 +940,9 @@ __kernel void NV12_to_RGBA8888_bt709( f_g = convert_float4(luma_1) + temp1; f_b = convert_float4(luma_1) + temp2; - r_0 = convert_uchar4_rtz(f_r); - g_0 = convert_uchar4_rtz(f_g); - b_0 = convert_uchar4_rtz(f_b); + r_0 = convert_uchar4_sat_rtz(f_r); + g_0 = convert_uchar4_sat_rtz(f_g); + b_0 = convert_uchar4_sat_rtz(f_b); rgb_0 = (uchar8)(r_0.s0, g_0.s0, b_0.s0, 255, r_0.s1, g_0.s1, b_0.s1, 255); rgb_1 = (uchar8)(r_0.s2, g_0.s2, b_0.s2, 255, r_0.s3, g_0.s3, b_0.s3, 255); @@ -1126,9 +1126,9 @@ __kernel void NV21_to_RGB888_bt709( float4 f_g = convert_float4(luma_0) + temp1; float4 f_b = convert_float4(luma_0) + temp2; - uchar4 r_0 = convert_uchar4_rtz(f_r); - uchar4 g_0 = convert_uchar4_rtz(f_g); - uchar4 b_0 = convert_uchar4_rtz(f_b); + uchar4 r_0 = convert_uchar4_sat_rtz(f_r); + uchar4 g_0 = convert_uchar4_sat_rtz(f_g); + uchar4 b_0 = convert_uchar4_sat_rtz(f_b); uchar8 rgb_0 = (uchar8)(r_0.s0, g_0.s0, b_0.s0, r_0.s1, g_0.s1, b_0.s1, r_0.s2, g_0.s2); uchar4 rgb_1 = (uchar4)(b_0.s2, r_0.s3, g_0.s3, b_0.s3); @@ -1139,9 +1139,9 @@ __kernel void NV21_to_RGB888_bt709( f_g = convert_float4(luma_1) + temp1; f_b = convert_float4(luma_1) + temp2; - r_0 = convert_uchar4_rtz(f_r); - g_0 = convert_uchar4_rtz(f_g); - b_0 = convert_uchar4_rtz(f_b); + r_0 = convert_uchar4_sat_rtz(f_r); + g_0 = convert_uchar4_sat_rtz(f_g); + b_0 = convert_uchar4_sat_rtz(f_b); rgb_0 = (uchar8)(r_0.s0, g_0.s0, b_0.s0, r_0.s1, g_0.s1, b_0.s1, r_0.s2, g_0.s2); rgb_1 = (uchar4)(b_0.s2, r_0.s3, g_0.s3, b_0.s3); @@ -1197,9 +1197,9 @@ __kernel void NV21_to_RGBA8888_bt709( float4 f_g = convert_float4(luma_0) + temp1; float4 f_b = convert_float4(luma_0) + temp2; - uchar4 r_0 = convert_uchar4_rtz(f_r); - uchar4 g_0 = convert_uchar4_rtz(f_g); - uchar4 b_0 = convert_uchar4_rtz(f_b); + uchar4 r_0 = convert_uchar4_sat_rtz(f_r); + uchar4 g_0 = convert_uchar4_sat_rtz(f_g); + uchar4 b_0 = convert_uchar4_sat_rtz(f_b); uchar8 rgb_0 = (uchar8)(r_0.s0, g_0.s0, b_0.s0, 255, r_0.s1, g_0.s1, b_0.s1, 255); uchar8 rgb_1 = (uchar8)(r_0.s2, g_0.s2, b_0.s2, 255, r_0.s3, g_0.s3, b_0.s3, 255); @@ -1210,9 +1210,9 @@ __kernel void NV21_to_RGBA8888_bt709( f_g = convert_float4(luma_1) + temp1; f_b = convert_float4(luma_1) + temp2; - r_0 = convert_uchar4_rtz(f_r); - g_0 = convert_uchar4_rtz(f_g); - b_0 = convert_uchar4_rtz(f_b); + r_0 = convert_uchar4_sat_rtz(f_r); + g_0 = convert_uchar4_sat_rtz(f_g); + b_0 = convert_uchar4_sat_rtz(f_b); rgb_0 = (uchar8)(r_0.s0, g_0.s0, b_0.s0, 255, r_0.s1, g_0.s1, b_0.s1, 255); rgb_1 = (uchar8)(r_0.s2, g_0.s2, b_0.s2, 255, r_0.s3, g_0.s3, b_0.s3, 255); @@ -1525,9 +1525,9 @@ __kernel void IYUV_to_RGB888_bt709( float4 f_g = convert_float4(luma_0) + temp1; float4 f_b = convert_float4(luma_0) + temp2; - uchar4 r_0 = convert_uchar4_rtz(f_r); - uchar4 g_0 = convert_uchar4_rtz(f_g); - uchar4 b_0 = convert_uchar4_rtz(f_b); + uchar4 r_0 = convert_uchar4_sat_rtz(f_r); + uchar4 g_0 = convert_uchar4_sat_rtz(f_g); + uchar4 b_0 = convert_uchar4_sat_rtz(f_b); uchar8 rgb_0 = (uchar8)(r_0.s0, g_0.s0, b_0.s0, r_0.s1, g_0.s1, b_0.s1, r_0.s2, g_0.s2); uchar4 rgb_1 = (uchar4)(b_0.s2, r_0.s3, g_0.s3, b_0.s3); @@ -1538,9 +1538,9 @@ __kernel void IYUV_to_RGB888_bt709( f_g = convert_float4(luma_1) + temp1; f_b = convert_float4(luma_1) + temp2; - r_0 = convert_uchar4_rtz(f_r); - g_0 = convert_uchar4_rtz(f_g); - b_0 = convert_uchar4_rtz(f_b); + r_0 = convert_uchar4_sat_rtz(f_r); + g_0 = convert_uchar4_sat_rtz(f_g); + b_0 = convert_uchar4_sat_rtz(f_b); rgb_0 = (uchar8)(r_0.s0, g_0.s0, b_0.s0, r_0.s1, g_0.s1, b_0.s1, r_0.s2, g_0.s2); rgb_1 = (uchar4)(b_0.s2, r_0.s3, g_0.s3, b_0.s3); @@ -1604,9 +1604,9 @@ __kernel void IYUV_to_RGBA8888_bt709( float4 f_g = convert_float4(luma_0) + temp1; float4 f_b = convert_float4(luma_0) + temp2; - uchar4 r_0 = convert_uchar4_rtz(f_r); - uchar4 g_0 = convert_uchar4_rtz(f_g); - uchar4 b_0 = convert_uchar4_rtz(f_b); + uchar4 r_0 = convert_uchar4_sat_rtz(f_r); + uchar4 g_0 = convert_uchar4_sat_rtz(f_g); + uchar4 b_0 = convert_uchar4_sat_rtz(f_b); uchar8 rgb_0 = (uchar8)(r_0.s0, g_0.s0, b_0.s0, 255, r_0.s1, g_0.s1, b_0.s1, 255); uchar8 rgb_1 = (uchar8)(r_0.s2, g_0.s2, b_0.s2, 255, r_0.s3, g_0.s3, b_0.s3, 255); @@ -1617,9 +1617,9 @@ __kernel void IYUV_to_RGBA8888_bt709( f_g = convert_float4(luma_1) + temp1; f_b = convert_float4(luma_1) + temp2; - r_0 = convert_uchar4_rtz(f_r); - g_0 = convert_uchar4_rtz(f_g); - b_0 = convert_uchar4_rtz(f_b); + r_0 = convert_uchar4_sat_rtz(f_r); + g_0 = convert_uchar4_sat_rtz(f_g); + b_0 = convert_uchar4_sat_rtz(f_b); rgb_0 = (uchar8)(r_0.s0, g_0.s0, b_0.s0, 255, r_0.s1, g_0.s1, b_0.s1, 255); rgb_1 = (uchar8)(r_0.s2, g_0.s2, b_0.s2, 255, r_0.s3, g_0.s3, b_0.s3, 255); diff --git a/src/core/CL/kernels/CLColorConvertKernel.cpp b/src/core/CL/kernels/CLColorConvertKernel.cpp index 52d9be9418..2b894989e1 100644 --- a/src/core/CL/kernels/CLColorConvertKernel.cpp +++ b/src/core/CL/kernels/CLColorConvertKernel.cpp @@ -198,6 +198,7 @@ void CLColorConvertKernel::configure(const ICLImage *input, ICLMultiImage *outpu ARM_COMPUTE_ERROR_ON(output == nullptr); unsigned int num_elems_processed_per_iteration = 0; + unsigned int num_elems_read_per_iteration_x = 0; bool has_two_planes = (output->info()->format() == Format::NV12) || (output->info()->format() == Format::NV21); float sub_sampling = (has_two_planes || (output->info()->format() == Format::IYUV)) ? 0.5f : 1; @@ -212,9 +213,11 @@ void CLColorConvertKernel::configure(const ICLImage *input, ICLMultiImage *outpu case Format::NV12: case Format::IYUV: num_elems_processed_per_iteration = 2; + num_elems_read_per_iteration_x = 8; break; case Format::YUV444: num_elems_processed_per_iteration = 4; + num_elems_read_per_iteration_x = 16; break; default: break; @@ -229,6 +232,7 @@ void CLColorConvertKernel::configure(const ICLImage *input, ICLMultiImage *outpu case Format::NV12: case Format::IYUV: num_elems_processed_per_iteration = 8; + num_elems_read_per_iteration_x = 8; break; default: break; @@ -238,6 +242,7 @@ void CLColorConvertKernel::configure(const ICLImage *input, ICLMultiImage *outpu default: break; } + ARM_COMPUTE_ERROR_ON_MSG(num_elems_processed_per_iteration == 0, "Conversion from %s to %s not supported", string_from_format(input->info()->format()).c_str(), string_from_format(output->info()->format()).c_str()); @@ -248,7 +253,6 @@ void CLColorConvertKernel::configure(const ICLImage *input, ICLMultiImage *outpu kernel_name << "_to_"; kernel_name << string_from_format(output->info()->format()); kernel_name << "_bt709"; - _input = input; _multi_output = output; @@ -267,8 +271,10 @@ void CLColorConvertKernel::configure(const ICLImage *input, ICLMultiImage *outpu AccessWindowRectangle output_plane2_access(has_two_planes ? nullptr : output->plane(2)->info(), 0, 0, num_elems_processed_per_iteration, 1, sub_sampling, sub_sampling); + AccessWindowHorizontal input_access(input->info(), 0, num_elems_read_per_iteration_x); + update_window_and_padding(win, - AccessWindowHorizontal(input->info(), 0, num_elems_processed_per_iteration), + input_access, output_plane0_access, output_plane1_access, output_plane2_access); diff --git a/tests/validation/reference/ColorConvertHelper.h b/tests/validation/reference/ColorConvertHelper.h index ee446683d6..7a8b547486 100644 --- a/tests/validation/reference/ColorConvertHelper.h +++ b/tests/validation/reference/ColorConvertHelper.h @@ -119,99 +119,71 @@ inline void rgb_to_yuv_calculation(const SimpleTensor rvec, const SimpleTenso } } } +inline float compute_rgb_value(int y_value, int v_value, int u_value, unsigned char channel_idx) +{ + float result = 0.f; + switch(channel_idx) + { + case 0: + { + const float red = (v_value - 128.f) * red_coef_bt709; + result = y_value + red; + break; + } + case 1: + { + const float green = (u_value - 128.f) * green_coef_bt709 + (v_value - 128.f) * green_coef2_bt709; + result = y_value + green; + break; + } + case 2: + { + const float blue = (u_value - 128.f) * blue_coef_bt709; + result = y_value + blue; + break; + } + default: + { + //Assuming Alpha channel + return 255; + } + } + return std::min(std::max(0.f, result), 255.f); +} template inline void yuyv_to_rgb_calculation(const SimpleTensor yvec, const SimpleTensor vvec, const SimpleTensor yyvec, const SimpleTensor uvec, SimpleTensor &dst) { const int dst_width = dst.shape().x(); const int dst_height = dst.shape().y(); - for(int y = 0; y < dst_height; ++y) { int x_coord = 0; for(int x = 0; x < dst_width; x += 2, ++x_coord) { - Coordinates dst_coord{ x, y }; - auto *dst_pixel = reinterpret_cast(dst(dst_coord)); - float result = 0.f; - - T border_value(0); - const int yvec_val = validation::tensor_elem_at(yvec, { x_coord, y }, BorderMode::CONSTANT, border_value); - const int vvec_val = validation::tensor_elem_at(vvec, { x_coord, y }, BorderMode::CONSTANT, border_value); - const int yyvec_val = validation::tensor_elem_at(yyvec, { x_coord, y }, BorderMode::CONSTANT, border_value); - const int uvec_val = validation::tensor_elem_at(uvec, { x_coord, y }, BorderMode::CONSTANT, border_value); - const float red = (vvec_val - 128.f) * red_coef_bt709; - const float green = (uvec_val - 128.f) * green_coef_bt709 + (vvec_val - 128.f) * green_coef2_bt709; - const float blue = (uvec_val - 128.f) * blue_coef_bt709; - + const Coordinates dst_coord{ x, y }; + auto *dst_pixel = reinterpret_cast(dst(dst_coord)); + const T border_value(0); + const int yvec_val = validation::tensor_elem_at(yvec, { x_coord, y }, BorderMode::CONSTANT, border_value); + const int vvec_val = validation::tensor_elem_at(vvec, { x_coord, y }, BorderMode::CONSTANT, border_value); + const int yyvec_val = validation::tensor_elem_at(yyvec, { x_coord, y }, BorderMode::CONSTANT, border_value); + const int uvec_val = validation::tensor_elem_at(uvec, { x_coord, y }, BorderMode::CONSTANT, border_value); + //Compute first RGB value using Y plane for(int channel_idx = 0; channel_idx < dst.num_channels(); ++channel_idx) { - if(channel_idx == 0) - { - // Channel 'R' - result = yvec_val + red; - } - else if(channel_idx == 1) - { - // Channel 'G' - result = yvec_val + green; - } - else if(channel_idx == 2) - { - // Channel 'B' - result = yvec_val + blue; - } - else - { - // Channel 'A' - result = 255; - } - - if(result < 0) - { - result = 0; - } - else if(result > 255) - { - result = 255; - } - dst_pixel[channel_idx] = result; + const float channel_value = compute_rgb_value(yvec_val, vvec_val, uvec_val, channel_idx); + dst_pixel[channel_idx] = channel_value; } - - dst_coord.set(0, x + 1); - dst_pixel = reinterpret_cast(dst(dst_coord)); + //Compute second RGB value using YY plane + const Coordinates dst_coord2 + { + x + 1, y + }; + dst_pixel = reinterpret_cast(dst(dst_coord2)); for(int channel_idx = 0; channel_idx < dst.num_channels(); ++channel_idx) { - if(channel_idx == 0) - { - // Channel 'R' - result = yyvec_val + red; - } - else if(channel_idx == 1) - { - // Channel 'G' - result = yyvec_val + green; - } - else if(channel_idx == 2) - { - // Channel 'B' - result = yyvec_val + blue; - } - else - { - // Channel 'A' - result = 255; - } - - if(result < 0) - { - result = 0; - } - else if(result > 255) - { - result = 255; - } - dst_pixel[channel_idx] = result; + const float channel_value = compute_rgb_value(yyvec_val, vvec_val, uvec_val, channel_idx); + dst_pixel[channel_idx] = channel_value; } } } -- cgit v1.2.1