aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPablo Tello <pablo.tello@arm.com>2018-07-17 17:10:59 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:54:54 +0000
commit96fc1d6582d17407bf5006419644ae278026924a (patch)
tree08658c8f471e5d747408f3057ddef0a1dffa7db2
parent40606df5cc5e687e4d72af842af8d65fc4228cbc (diff)
downloadComputeLibrary-96fc1d6582d17407bf5006419644ae278026924a.tar.gz
COMPMID-1394: Fixed OCLGrind failures ColorConvert
Change-Id: Ibf688e68205eac2dc07ce88c96498cc66c153cee Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/140440 Reviewed-by: Anthony Barbier <anthony.barbier@arm.com> Tested-by: Jenkins <bsgcomp@arm.com>
-rw-r--r--src/core/CL/cl_kernels/color_convert.cl96
-rw-r--r--src/core/CL/kernels/CLColorConvertKernel.cpp10
-rw-r--r--tests/validation/reference/ColorConvertHelper.h126
3 files changed, 105 insertions, 127 deletions
diff --git a/src/core/CL/cl_kernels/color_convert.cl b/src/core/CL/cl_kernels/color_convert.cl
index 72c74930b0..02a0c8ee2a 100644
--- a/src/core/CL/cl_kernels/color_convert.cl
+++ b/src/core/CL/cl_kernels/color_convert.cl
@@ -149,9 +149,9 @@ __kernel void UYVY422_to_RGB888_bt709(
f_g += lumav;
f_b += lumav;
- uchar8 r_0 = convert_uchar8_rtz(f_r);
- uchar8 g_0 = convert_uchar8_rtz(f_g);
- uchar8 b_0 = convert_uchar8_rtz(f_b);
+ uchar8 r_0 = convert_uchar8_sat_rtz(f_r);
+ uchar8 g_0 = convert_uchar8_sat_rtz(f_g);
+ uchar8 b_0 = convert_uchar8_sat_rtz(f_b);
uchar16 rgb_0 = (uchar16)(r_0.s0, g_0.s0, b_0.s0, r_0.s1, g_0.s1, b_0.s1, r_0.s2, g_0.s2, b_0.s2,
r_0.s3, g_0.s3, b_0.s3, r_0.s4, g_0.s4, b_0.s4, r_0.s5);
@@ -207,9 +207,9 @@ __kernel void UYVY422_to_RGBA8888_bt709(
f_g += lumav;
f_b += lumav;
- uchar8 r_0 = convert_uchar8_rtz(f_r);
- uchar8 g_0 = convert_uchar8_rtz(f_g);
- uchar8 b_0 = convert_uchar8_rtz(f_b);
+ uchar8 r_0 = convert_uchar8_sat_rtz(f_r);
+ uchar8 g_0 = convert_uchar8_sat_rtz(f_g);
+ uchar8 b_0 = convert_uchar8_sat_rtz(f_b);
uchar16 rgba_0 = (uchar16)(r_0.s0, g_0.s0, b_0.s0, 255, r_0.s1, g_0.s1, b_0.s1, 255,
r_0.s2, g_0.s2, b_0.s2, 255, r_0.s3, g_0.s3, b_0.s3, 255);
@@ -266,9 +266,9 @@ __kernel void YUYV422_to_RGB888_bt709(
f_g += lumav;
f_b += lumav;
- uchar8 r_0 = convert_uchar8_rtz(f_r);
- uchar8 g_0 = convert_uchar8_rtz(f_g);
- uchar8 b_0 = convert_uchar8_rtz(f_b);
+ uchar8 r_0 = convert_uchar8_sat_rtz(f_r);
+ uchar8 g_0 = convert_uchar8_sat_rtz(f_g);
+ uchar8 b_0 = convert_uchar8_sat_rtz(f_b);
uchar16 rgb_0 = (uchar16)(r_0.s0, g_0.s0, b_0.s0, r_0.s1, g_0.s1, b_0.s1, r_0.s2, g_0.s2, b_0.s2,
r_0.s3, g_0.s3, b_0.s3, r_0.s4, g_0.s4, b_0.s4, r_0.s5);
@@ -324,9 +324,9 @@ __kernel void YUYV422_to_RGBA8888_bt709(
f_g += lumav;
f_b += lumav;
- uchar8 r_0 = convert_uchar8_rtz(f_r);
- uchar8 g_0 = convert_uchar8_rtz(f_g);
- uchar8 b_0 = convert_uchar8_rtz(f_b);
+ uchar8 r_0 = convert_uchar8_sat_rtz(f_r);
+ uchar8 g_0 = convert_uchar8_sat_rtz(f_g);
+ uchar8 b_0 = convert_uchar8_sat_rtz(f_b);
uchar16 rgba_0 = (uchar16)(r_0.s0, g_0.s0, b_0.s0, 255, r_0.s1, g_0.s1, b_0.s1, 255,
r_0.s2, g_0.s2, b_0.s2, 255, r_0.s3, g_0.s3, b_0.s3, 255);
@@ -471,9 +471,9 @@ __kernel void NV12_to_RGB888_bt709(
float4 f_g = convert_float4(luma_0) + temp1;
float4 f_b = convert_float4(luma_0) + temp2;
- uchar4 r_0 = convert_uchar4_rtz(f_r);
- uchar4 g_0 = convert_uchar4_rtz(f_g);
- uchar4 b_0 = convert_uchar4_rtz(f_b);
+ uchar4 r_0 = convert_uchar4_sat_rtz(f_r);
+ uchar4 g_0 = convert_uchar4_sat_rtz(f_g);
+ uchar4 b_0 = convert_uchar4_sat_rtz(f_b);
uchar8 rgb_0 = (uchar8)(r_0.s0, g_0.s0, b_0.s0, r_0.s1, g_0.s1, b_0.s1, r_0.s2, g_0.s2);
uchar4 rgb_1 = (uchar4)(b_0.s2, r_0.s3, g_0.s3, b_0.s3);
@@ -484,9 +484,9 @@ __kernel void NV12_to_RGB888_bt709(
f_g = convert_float4(luma_1) + temp1;
f_b = convert_float4(luma_1) + temp2;
- r_0 = convert_uchar4_rtz(f_r);
- g_0 = convert_uchar4_rtz(f_g);
- b_0 = convert_uchar4_rtz(f_b);
+ r_0 = convert_uchar4_sat_rtz(f_r);
+ g_0 = convert_uchar4_sat_rtz(f_g);
+ b_0 = convert_uchar4_sat_rtz(f_b);
rgb_0 = (uchar8)(r_0.s0, g_0.s0, b_0.s0, r_0.s1, g_0.s1, b_0.s1, r_0.s2, g_0.s2);
rgb_1 = (uchar4)(b_0.s2, r_0.s3, g_0.s3, b_0.s3);
@@ -927,9 +927,9 @@ __kernel void NV12_to_RGBA8888_bt709(
float4 f_g = convert_float4(luma_0) + temp1;
float4 f_b = convert_float4(luma_0) + temp2;
- uchar4 r_0 = convert_uchar4_rtz(f_r);
- uchar4 g_0 = convert_uchar4_rtz(f_g);
- uchar4 b_0 = convert_uchar4_rtz(f_b);
+ uchar4 r_0 = convert_uchar4_sat_rtz(f_r);
+ uchar4 g_0 = convert_uchar4_sat_rtz(f_g);
+ uchar4 b_0 = convert_uchar4_sat_rtz(f_b);
uchar8 rgb_0 = (uchar8)(r_0.s0, g_0.s0, b_0.s0, 255, r_0.s1, g_0.s1, b_0.s1, 255);
uchar8 rgb_1 = (uchar8)(r_0.s2, g_0.s2, b_0.s2, 255, r_0.s3, g_0.s3, b_0.s3, 255);
@@ -940,9 +940,9 @@ __kernel void NV12_to_RGBA8888_bt709(
f_g = convert_float4(luma_1) + temp1;
f_b = convert_float4(luma_1) + temp2;
- r_0 = convert_uchar4_rtz(f_r);
- g_0 = convert_uchar4_rtz(f_g);
- b_0 = convert_uchar4_rtz(f_b);
+ r_0 = convert_uchar4_sat_rtz(f_r);
+ g_0 = convert_uchar4_sat_rtz(f_g);
+ b_0 = convert_uchar4_sat_rtz(f_b);
rgb_0 = (uchar8)(r_0.s0, g_0.s0, b_0.s0, 255, r_0.s1, g_0.s1, b_0.s1, 255);
rgb_1 = (uchar8)(r_0.s2, g_0.s2, b_0.s2, 255, r_0.s3, g_0.s3, b_0.s3, 255);
@@ -1126,9 +1126,9 @@ __kernel void NV21_to_RGB888_bt709(
float4 f_g = convert_float4(luma_0) + temp1;
float4 f_b = convert_float4(luma_0) + temp2;
- uchar4 r_0 = convert_uchar4_rtz(f_r);
- uchar4 g_0 = convert_uchar4_rtz(f_g);
- uchar4 b_0 = convert_uchar4_rtz(f_b);
+ uchar4 r_0 = convert_uchar4_sat_rtz(f_r);
+ uchar4 g_0 = convert_uchar4_sat_rtz(f_g);
+ uchar4 b_0 = convert_uchar4_sat_rtz(f_b);
uchar8 rgb_0 = (uchar8)(r_0.s0, g_0.s0, b_0.s0, r_0.s1, g_0.s1, b_0.s1, r_0.s2, g_0.s2);
uchar4 rgb_1 = (uchar4)(b_0.s2, r_0.s3, g_0.s3, b_0.s3);
@@ -1139,9 +1139,9 @@ __kernel void NV21_to_RGB888_bt709(
f_g = convert_float4(luma_1) + temp1;
f_b = convert_float4(luma_1) + temp2;
- r_0 = convert_uchar4_rtz(f_r);
- g_0 = convert_uchar4_rtz(f_g);
- b_0 = convert_uchar4_rtz(f_b);
+ r_0 = convert_uchar4_sat_rtz(f_r);
+ g_0 = convert_uchar4_sat_rtz(f_g);
+ b_0 = convert_uchar4_sat_rtz(f_b);
rgb_0 = (uchar8)(r_0.s0, g_0.s0, b_0.s0, r_0.s1, g_0.s1, b_0.s1, r_0.s2, g_0.s2);
rgb_1 = (uchar4)(b_0.s2, r_0.s3, g_0.s3, b_0.s3);
@@ -1197,9 +1197,9 @@ __kernel void NV21_to_RGBA8888_bt709(
float4 f_g = convert_float4(luma_0) + temp1;
float4 f_b = convert_float4(luma_0) + temp2;
- uchar4 r_0 = convert_uchar4_rtz(f_r);
- uchar4 g_0 = convert_uchar4_rtz(f_g);
- uchar4 b_0 = convert_uchar4_rtz(f_b);
+ uchar4 r_0 = convert_uchar4_sat_rtz(f_r);
+ uchar4 g_0 = convert_uchar4_sat_rtz(f_g);
+ uchar4 b_0 = convert_uchar4_sat_rtz(f_b);
uchar8 rgb_0 = (uchar8)(r_0.s0, g_0.s0, b_0.s0, 255, r_0.s1, g_0.s1, b_0.s1, 255);
uchar8 rgb_1 = (uchar8)(r_0.s2, g_0.s2, b_0.s2, 255, r_0.s3, g_0.s3, b_0.s3, 255);
@@ -1210,9 +1210,9 @@ __kernel void NV21_to_RGBA8888_bt709(
f_g = convert_float4(luma_1) + temp1;
f_b = convert_float4(luma_1) + temp2;
- r_0 = convert_uchar4_rtz(f_r);
- g_0 = convert_uchar4_rtz(f_g);
- b_0 = convert_uchar4_rtz(f_b);
+ r_0 = convert_uchar4_sat_rtz(f_r);
+ g_0 = convert_uchar4_sat_rtz(f_g);
+ b_0 = convert_uchar4_sat_rtz(f_b);
rgb_0 = (uchar8)(r_0.s0, g_0.s0, b_0.s0, 255, r_0.s1, g_0.s1, b_0.s1, 255);
rgb_1 = (uchar8)(r_0.s2, g_0.s2, b_0.s2, 255, r_0.s3, g_0.s3, b_0.s3, 255);
@@ -1525,9 +1525,9 @@ __kernel void IYUV_to_RGB888_bt709(
float4 f_g = convert_float4(luma_0) + temp1;
float4 f_b = convert_float4(luma_0) + temp2;
- uchar4 r_0 = convert_uchar4_rtz(f_r);
- uchar4 g_0 = convert_uchar4_rtz(f_g);
- uchar4 b_0 = convert_uchar4_rtz(f_b);
+ uchar4 r_0 = convert_uchar4_sat_rtz(f_r);
+ uchar4 g_0 = convert_uchar4_sat_rtz(f_g);
+ uchar4 b_0 = convert_uchar4_sat_rtz(f_b);
uchar8 rgb_0 = (uchar8)(r_0.s0, g_0.s0, b_0.s0, r_0.s1, g_0.s1, b_0.s1, r_0.s2, g_0.s2);
uchar4 rgb_1 = (uchar4)(b_0.s2, r_0.s3, g_0.s3, b_0.s3);
@@ -1538,9 +1538,9 @@ __kernel void IYUV_to_RGB888_bt709(
f_g = convert_float4(luma_1) + temp1;
f_b = convert_float4(luma_1) + temp2;
- r_0 = convert_uchar4_rtz(f_r);
- g_0 = convert_uchar4_rtz(f_g);
- b_0 = convert_uchar4_rtz(f_b);
+ r_0 = convert_uchar4_sat_rtz(f_r);
+ g_0 = convert_uchar4_sat_rtz(f_g);
+ b_0 = convert_uchar4_sat_rtz(f_b);
rgb_0 = (uchar8)(r_0.s0, g_0.s0, b_0.s0, r_0.s1, g_0.s1, b_0.s1, r_0.s2, g_0.s2);
rgb_1 = (uchar4)(b_0.s2, r_0.s3, g_0.s3, b_0.s3);
@@ -1604,9 +1604,9 @@ __kernel void IYUV_to_RGBA8888_bt709(
float4 f_g = convert_float4(luma_0) + temp1;
float4 f_b = convert_float4(luma_0) + temp2;
- uchar4 r_0 = convert_uchar4_rtz(f_r);
- uchar4 g_0 = convert_uchar4_rtz(f_g);
- uchar4 b_0 = convert_uchar4_rtz(f_b);
+ uchar4 r_0 = convert_uchar4_sat_rtz(f_r);
+ uchar4 g_0 = convert_uchar4_sat_rtz(f_g);
+ uchar4 b_0 = convert_uchar4_sat_rtz(f_b);
uchar8 rgb_0 = (uchar8)(r_0.s0, g_0.s0, b_0.s0, 255, r_0.s1, g_0.s1, b_0.s1, 255);
uchar8 rgb_1 = (uchar8)(r_0.s2, g_0.s2, b_0.s2, 255, r_0.s3, g_0.s3, b_0.s3, 255);
@@ -1617,9 +1617,9 @@ __kernel void IYUV_to_RGBA8888_bt709(
f_g = convert_float4(luma_1) + temp1;
f_b = convert_float4(luma_1) + temp2;
- r_0 = convert_uchar4_rtz(f_r);
- g_0 = convert_uchar4_rtz(f_g);
- b_0 = convert_uchar4_rtz(f_b);
+ r_0 = convert_uchar4_sat_rtz(f_r);
+ g_0 = convert_uchar4_sat_rtz(f_g);
+ b_0 = convert_uchar4_sat_rtz(f_b);
rgb_0 = (uchar8)(r_0.s0, g_0.s0, b_0.s0, 255, r_0.s1, g_0.s1, b_0.s1, 255);
rgb_1 = (uchar8)(r_0.s2, g_0.s2, b_0.s2, 255, r_0.s3, g_0.s3, b_0.s3, 255);
diff --git a/src/core/CL/kernels/CLColorConvertKernel.cpp b/src/core/CL/kernels/CLColorConvertKernel.cpp
index 52d9be9418..2b894989e1 100644
--- a/src/core/CL/kernels/CLColorConvertKernel.cpp
+++ b/src/core/CL/kernels/CLColorConvertKernel.cpp
@@ -198,6 +198,7 @@ void CLColorConvertKernel::configure(const ICLImage *input, ICLMultiImage *outpu
ARM_COMPUTE_ERROR_ON(output == nullptr);
unsigned int num_elems_processed_per_iteration = 0;
+ unsigned int num_elems_read_per_iteration_x = 0;
bool has_two_planes = (output->info()->format() == Format::NV12) || (output->info()->format() == Format::NV21);
float sub_sampling = (has_two_planes || (output->info()->format() == Format::IYUV)) ? 0.5f : 1;
@@ -212,9 +213,11 @@ void CLColorConvertKernel::configure(const ICLImage *input, ICLMultiImage *outpu
case Format::NV12:
case Format::IYUV:
num_elems_processed_per_iteration = 2;
+ num_elems_read_per_iteration_x = 8;
break;
case Format::YUV444:
num_elems_processed_per_iteration = 4;
+ num_elems_read_per_iteration_x = 16;
break;
default:
break;
@@ -229,6 +232,7 @@ void CLColorConvertKernel::configure(const ICLImage *input, ICLMultiImage *outpu
case Format::NV12:
case Format::IYUV:
num_elems_processed_per_iteration = 8;
+ num_elems_read_per_iteration_x = 8;
break;
default:
break;
@@ -238,6 +242,7 @@ void CLColorConvertKernel::configure(const ICLImage *input, ICLMultiImage *outpu
default:
break;
}
+
ARM_COMPUTE_ERROR_ON_MSG(num_elems_processed_per_iteration == 0, "Conversion from %s to %s not supported",
string_from_format(input->info()->format()).c_str(),
string_from_format(output->info()->format()).c_str());
@@ -248,7 +253,6 @@ void CLColorConvertKernel::configure(const ICLImage *input, ICLMultiImage *outpu
kernel_name << "_to_";
kernel_name << string_from_format(output->info()->format());
kernel_name << "_bt709";
-
_input = input;
_multi_output = output;
@@ -267,8 +271,10 @@ void CLColorConvertKernel::configure(const ICLImage *input, ICLMultiImage *outpu
AccessWindowRectangle output_plane2_access(has_two_planes ? nullptr : output->plane(2)->info(), 0, 0,
num_elems_processed_per_iteration, 1, sub_sampling, sub_sampling);
+ AccessWindowHorizontal input_access(input->info(), 0, num_elems_read_per_iteration_x);
+
update_window_and_padding(win,
- AccessWindowHorizontal(input->info(), 0, num_elems_processed_per_iteration),
+ input_access,
output_plane0_access,
output_plane1_access,
output_plane2_access);
diff --git a/tests/validation/reference/ColorConvertHelper.h b/tests/validation/reference/ColorConvertHelper.h
index ee446683d6..7a8b547486 100644
--- a/tests/validation/reference/ColorConvertHelper.h
+++ b/tests/validation/reference/ColorConvertHelper.h
@@ -119,99 +119,71 @@ inline void rgb_to_yuv_calculation(const SimpleTensor<T> rvec, const SimpleTenso
}
}
}
+inline float compute_rgb_value(int y_value, int v_value, int u_value, unsigned char channel_idx)
+{
+ float result = 0.f;
+ switch(channel_idx)
+ {
+ case 0:
+ {
+ const float red = (v_value - 128.f) * red_coef_bt709;
+ result = y_value + red;
+ break;
+ }
+ case 1:
+ {
+ const float green = (u_value - 128.f) * green_coef_bt709 + (v_value - 128.f) * green_coef2_bt709;
+ result = y_value + green;
+ break;
+ }
+ case 2:
+ {
+ const float blue = (u_value - 128.f) * blue_coef_bt709;
+ result = y_value + blue;
+ break;
+ }
+ default:
+ {
+ //Assuming Alpha channel
+ return 255;
+ }
+ }
+ return std::min(std::max(0.f, result), 255.f);
+}
template <typename T>
inline void yuyv_to_rgb_calculation(const SimpleTensor<T> yvec, const SimpleTensor<T> vvec, const SimpleTensor<T> yyvec, const SimpleTensor<T> uvec, SimpleTensor<T> &dst)
{
const int dst_width = dst.shape().x();
const int dst_height = dst.shape().y();
-
for(int y = 0; y < dst_height; ++y)
{
int x_coord = 0;
for(int x = 0; x < dst_width; x += 2, ++x_coord)
{
- Coordinates dst_coord{ x, y };
- auto *dst_pixel = reinterpret_cast<T *>(dst(dst_coord));
- float result = 0.f;
-
- T border_value(0);
- const int yvec_val = validation::tensor_elem_at(yvec, { x_coord, y }, BorderMode::CONSTANT, border_value);
- const int vvec_val = validation::tensor_elem_at(vvec, { x_coord, y }, BorderMode::CONSTANT, border_value);
- const int yyvec_val = validation::tensor_elem_at(yyvec, { x_coord, y }, BorderMode::CONSTANT, border_value);
- const int uvec_val = validation::tensor_elem_at(uvec, { x_coord, y }, BorderMode::CONSTANT, border_value);
- const float red = (vvec_val - 128.f) * red_coef_bt709;
- const float green = (uvec_val - 128.f) * green_coef_bt709 + (vvec_val - 128.f) * green_coef2_bt709;
- const float blue = (uvec_val - 128.f) * blue_coef_bt709;
-
+ const Coordinates dst_coord{ x, y };
+ auto *dst_pixel = reinterpret_cast<T *>(dst(dst_coord));
+ const T border_value(0);
+ const int yvec_val = validation::tensor_elem_at(yvec, { x_coord, y }, BorderMode::CONSTANT, border_value);
+ const int vvec_val = validation::tensor_elem_at(vvec, { x_coord, y }, BorderMode::CONSTANT, border_value);
+ const int yyvec_val = validation::tensor_elem_at(yyvec, { x_coord, y }, BorderMode::CONSTANT, border_value);
+ const int uvec_val = validation::tensor_elem_at(uvec, { x_coord, y }, BorderMode::CONSTANT, border_value);
+ //Compute first RGB value using Y plane
for(int channel_idx = 0; channel_idx < dst.num_channels(); ++channel_idx)
{
- if(channel_idx == 0)
- {
- // Channel 'R'
- result = yvec_val + red;
- }
- else if(channel_idx == 1)
- {
- // Channel 'G'
- result = yvec_val + green;
- }
- else if(channel_idx == 2)
- {
- // Channel 'B'
- result = yvec_val + blue;
- }
- else
- {
- // Channel 'A'
- result = 255;
- }
-
- if(result < 0)
- {
- result = 0;
- }
- else if(result > 255)
- {
- result = 255;
- }
- dst_pixel[channel_idx] = result;
+ const float channel_value = compute_rgb_value(yvec_val, vvec_val, uvec_val, channel_idx);
+ dst_pixel[channel_idx] = channel_value;
}
-
- dst_coord.set(0, x + 1);
- dst_pixel = reinterpret_cast<T *>(dst(dst_coord));
+ //Compute second RGB value using YY plane
+ const Coordinates dst_coord2
+ {
+ x + 1, y
+ };
+ dst_pixel = reinterpret_cast<T *>(dst(dst_coord2));
for(int channel_idx = 0; channel_idx < dst.num_channels(); ++channel_idx)
{
- if(channel_idx == 0)
- {
- // Channel 'R'
- result = yyvec_val + red;
- }
- else if(channel_idx == 1)
- {
- // Channel 'G'
- result = yyvec_val + green;
- }
- else if(channel_idx == 2)
- {
- // Channel 'B'
- result = yyvec_val + blue;
- }
- else
- {
- // Channel 'A'
- result = 255;
- }
-
- if(result < 0)
- {
- result = 0;
- }
- else if(result > 255)
- {
- result = 255;
- }
- dst_pixel[channel_idx] = result;
+ const float channel_value = compute_rgb_value(yyvec_val, vvec_val, uvec_val, channel_idx);
+ dst_pixel[channel_idx] = channel_value;
}
}
}