aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorPablo Tello <pablo.tello@arm.com>2018-07-17 17:10:59 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:54:54 +0000
commit96fc1d6582d17407bf5006419644ae278026924a (patch)
tree08658c8f471e5d747408f3057ddef0a1dffa7db2 /src
parent40606df5cc5e687e4d72af842af8d65fc4228cbc (diff)
downloadComputeLibrary-96fc1d6582d17407bf5006419644ae278026924a.tar.gz
COMPMID-1394: Fixed OCLGrind failures ColorConvert
Change-Id: Ibf688e68205eac2dc07ce88c96498cc66c153cee Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/140440 Reviewed-by: Anthony Barbier <anthony.barbier@arm.com> Tested-by: Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src')
-rw-r--r--src/core/CL/cl_kernels/color_convert.cl96
-rw-r--r--src/core/CL/kernels/CLColorConvertKernel.cpp10
2 files changed, 56 insertions, 50 deletions
diff --git a/src/core/CL/cl_kernels/color_convert.cl b/src/core/CL/cl_kernels/color_convert.cl
index 72c74930b0..02a0c8ee2a 100644
--- a/src/core/CL/cl_kernels/color_convert.cl
+++ b/src/core/CL/cl_kernels/color_convert.cl
@@ -149,9 +149,9 @@ __kernel void UYVY422_to_RGB888_bt709(
f_g += lumav;
f_b += lumav;
- uchar8 r_0 = convert_uchar8_rtz(f_r);
- uchar8 g_0 = convert_uchar8_rtz(f_g);
- uchar8 b_0 = convert_uchar8_rtz(f_b);
+ uchar8 r_0 = convert_uchar8_sat_rtz(f_r);
+ uchar8 g_0 = convert_uchar8_sat_rtz(f_g);
+ uchar8 b_0 = convert_uchar8_sat_rtz(f_b);
uchar16 rgb_0 = (uchar16)(r_0.s0, g_0.s0, b_0.s0, r_0.s1, g_0.s1, b_0.s1, r_0.s2, g_0.s2, b_0.s2,
r_0.s3, g_0.s3, b_0.s3, r_0.s4, g_0.s4, b_0.s4, r_0.s5);
@@ -207,9 +207,9 @@ __kernel void UYVY422_to_RGBA8888_bt709(
f_g += lumav;
f_b += lumav;
- uchar8 r_0 = convert_uchar8_rtz(f_r);
- uchar8 g_0 = convert_uchar8_rtz(f_g);
- uchar8 b_0 = convert_uchar8_rtz(f_b);
+ uchar8 r_0 = convert_uchar8_sat_rtz(f_r);
+ uchar8 g_0 = convert_uchar8_sat_rtz(f_g);
+ uchar8 b_0 = convert_uchar8_sat_rtz(f_b);
uchar16 rgba_0 = (uchar16)(r_0.s0, g_0.s0, b_0.s0, 255, r_0.s1, g_0.s1, b_0.s1, 255,
r_0.s2, g_0.s2, b_0.s2, 255, r_0.s3, g_0.s3, b_0.s3, 255);
@@ -266,9 +266,9 @@ __kernel void YUYV422_to_RGB888_bt709(
f_g += lumav;
f_b += lumav;
- uchar8 r_0 = convert_uchar8_rtz(f_r);
- uchar8 g_0 = convert_uchar8_rtz(f_g);
- uchar8 b_0 = convert_uchar8_rtz(f_b);
+ uchar8 r_0 = convert_uchar8_sat_rtz(f_r);
+ uchar8 g_0 = convert_uchar8_sat_rtz(f_g);
+ uchar8 b_0 = convert_uchar8_sat_rtz(f_b);
uchar16 rgb_0 = (uchar16)(r_0.s0, g_0.s0, b_0.s0, r_0.s1, g_0.s1, b_0.s1, r_0.s2, g_0.s2, b_0.s2,
r_0.s3, g_0.s3, b_0.s3, r_0.s4, g_0.s4, b_0.s4, r_0.s5);
@@ -324,9 +324,9 @@ __kernel void YUYV422_to_RGBA8888_bt709(
f_g += lumav;
f_b += lumav;
- uchar8 r_0 = convert_uchar8_rtz(f_r);
- uchar8 g_0 = convert_uchar8_rtz(f_g);
- uchar8 b_0 = convert_uchar8_rtz(f_b);
+ uchar8 r_0 = convert_uchar8_sat_rtz(f_r);
+ uchar8 g_0 = convert_uchar8_sat_rtz(f_g);
+ uchar8 b_0 = convert_uchar8_sat_rtz(f_b);
uchar16 rgba_0 = (uchar16)(r_0.s0, g_0.s0, b_0.s0, 255, r_0.s1, g_0.s1, b_0.s1, 255,
r_0.s2, g_0.s2, b_0.s2, 255, r_0.s3, g_0.s3, b_0.s3, 255);
@@ -471,9 +471,9 @@ __kernel void NV12_to_RGB888_bt709(
float4 f_g = convert_float4(luma_0) + temp1;
float4 f_b = convert_float4(luma_0) + temp2;
- uchar4 r_0 = convert_uchar4_rtz(f_r);
- uchar4 g_0 = convert_uchar4_rtz(f_g);
- uchar4 b_0 = convert_uchar4_rtz(f_b);
+ uchar4 r_0 = convert_uchar4_sat_rtz(f_r);
+ uchar4 g_0 = convert_uchar4_sat_rtz(f_g);
+ uchar4 b_0 = convert_uchar4_sat_rtz(f_b);
uchar8 rgb_0 = (uchar8)(r_0.s0, g_0.s0, b_0.s0, r_0.s1, g_0.s1, b_0.s1, r_0.s2, g_0.s2);
uchar4 rgb_1 = (uchar4)(b_0.s2, r_0.s3, g_0.s3, b_0.s3);
@@ -484,9 +484,9 @@ __kernel void NV12_to_RGB888_bt709(
f_g = convert_float4(luma_1) + temp1;
f_b = convert_float4(luma_1) + temp2;
- r_0 = convert_uchar4_rtz(f_r);
- g_0 = convert_uchar4_rtz(f_g);
- b_0 = convert_uchar4_rtz(f_b);
+ r_0 = convert_uchar4_sat_rtz(f_r);
+ g_0 = convert_uchar4_sat_rtz(f_g);
+ b_0 = convert_uchar4_sat_rtz(f_b);
rgb_0 = (uchar8)(r_0.s0, g_0.s0, b_0.s0, r_0.s1, g_0.s1, b_0.s1, r_0.s2, g_0.s2);
rgb_1 = (uchar4)(b_0.s2, r_0.s3, g_0.s3, b_0.s3);
@@ -927,9 +927,9 @@ __kernel void NV12_to_RGBA8888_bt709(
float4 f_g = convert_float4(luma_0) + temp1;
float4 f_b = convert_float4(luma_0) + temp2;
- uchar4 r_0 = convert_uchar4_rtz(f_r);
- uchar4 g_0 = convert_uchar4_rtz(f_g);
- uchar4 b_0 = convert_uchar4_rtz(f_b);
+ uchar4 r_0 = convert_uchar4_sat_rtz(f_r);
+ uchar4 g_0 = convert_uchar4_sat_rtz(f_g);
+ uchar4 b_0 = convert_uchar4_sat_rtz(f_b);
uchar8 rgb_0 = (uchar8)(r_0.s0, g_0.s0, b_0.s0, 255, r_0.s1, g_0.s1, b_0.s1, 255);
uchar8 rgb_1 = (uchar8)(r_0.s2, g_0.s2, b_0.s2, 255, r_0.s3, g_0.s3, b_0.s3, 255);
@@ -940,9 +940,9 @@ __kernel void NV12_to_RGBA8888_bt709(
f_g = convert_float4(luma_1) + temp1;
f_b = convert_float4(luma_1) + temp2;
- r_0 = convert_uchar4_rtz(f_r);
- g_0 = convert_uchar4_rtz(f_g);
- b_0 = convert_uchar4_rtz(f_b);
+ r_0 = convert_uchar4_sat_rtz(f_r);
+ g_0 = convert_uchar4_sat_rtz(f_g);
+ b_0 = convert_uchar4_sat_rtz(f_b);
rgb_0 = (uchar8)(r_0.s0, g_0.s0, b_0.s0, 255, r_0.s1, g_0.s1, b_0.s1, 255);
rgb_1 = (uchar8)(r_0.s2, g_0.s2, b_0.s2, 255, r_0.s3, g_0.s3, b_0.s3, 255);
@@ -1126,9 +1126,9 @@ __kernel void NV21_to_RGB888_bt709(
float4 f_g = convert_float4(luma_0) + temp1;
float4 f_b = convert_float4(luma_0) + temp2;
- uchar4 r_0 = convert_uchar4_rtz(f_r);
- uchar4 g_0 = convert_uchar4_rtz(f_g);
- uchar4 b_0 = convert_uchar4_rtz(f_b);
+ uchar4 r_0 = convert_uchar4_sat_rtz(f_r);
+ uchar4 g_0 = convert_uchar4_sat_rtz(f_g);
+ uchar4 b_0 = convert_uchar4_sat_rtz(f_b);
uchar8 rgb_0 = (uchar8)(r_0.s0, g_0.s0, b_0.s0, r_0.s1, g_0.s1, b_0.s1, r_0.s2, g_0.s2);
uchar4 rgb_1 = (uchar4)(b_0.s2, r_0.s3, g_0.s3, b_0.s3);
@@ -1139,9 +1139,9 @@ __kernel void NV21_to_RGB888_bt709(
f_g = convert_float4(luma_1) + temp1;
f_b = convert_float4(luma_1) + temp2;
- r_0 = convert_uchar4_rtz(f_r);
- g_0 = convert_uchar4_rtz(f_g);
- b_0 = convert_uchar4_rtz(f_b);
+ r_0 = convert_uchar4_sat_rtz(f_r);
+ g_0 = convert_uchar4_sat_rtz(f_g);
+ b_0 = convert_uchar4_sat_rtz(f_b);
rgb_0 = (uchar8)(r_0.s0, g_0.s0, b_0.s0, r_0.s1, g_0.s1, b_0.s1, r_0.s2, g_0.s2);
rgb_1 = (uchar4)(b_0.s2, r_0.s3, g_0.s3, b_0.s3);
@@ -1197,9 +1197,9 @@ __kernel void NV21_to_RGBA8888_bt709(
float4 f_g = convert_float4(luma_0) + temp1;
float4 f_b = convert_float4(luma_0) + temp2;
- uchar4 r_0 = convert_uchar4_rtz(f_r);
- uchar4 g_0 = convert_uchar4_rtz(f_g);
- uchar4 b_0 = convert_uchar4_rtz(f_b);
+ uchar4 r_0 = convert_uchar4_sat_rtz(f_r);
+ uchar4 g_0 = convert_uchar4_sat_rtz(f_g);
+ uchar4 b_0 = convert_uchar4_sat_rtz(f_b);
uchar8 rgb_0 = (uchar8)(r_0.s0, g_0.s0, b_0.s0, 255, r_0.s1, g_0.s1, b_0.s1, 255);
uchar8 rgb_1 = (uchar8)(r_0.s2, g_0.s2, b_0.s2, 255, r_0.s3, g_0.s3, b_0.s3, 255);
@@ -1210,9 +1210,9 @@ __kernel void NV21_to_RGBA8888_bt709(
f_g = convert_float4(luma_1) + temp1;
f_b = convert_float4(luma_1) + temp2;
- r_0 = convert_uchar4_rtz(f_r);
- g_0 = convert_uchar4_rtz(f_g);
- b_0 = convert_uchar4_rtz(f_b);
+ r_0 = convert_uchar4_sat_rtz(f_r);
+ g_0 = convert_uchar4_sat_rtz(f_g);
+ b_0 = convert_uchar4_sat_rtz(f_b);
rgb_0 = (uchar8)(r_0.s0, g_0.s0, b_0.s0, 255, r_0.s1, g_0.s1, b_0.s1, 255);
rgb_1 = (uchar8)(r_0.s2, g_0.s2, b_0.s2, 255, r_0.s3, g_0.s3, b_0.s3, 255);
@@ -1525,9 +1525,9 @@ __kernel void IYUV_to_RGB888_bt709(
float4 f_g = convert_float4(luma_0) + temp1;
float4 f_b = convert_float4(luma_0) + temp2;
- uchar4 r_0 = convert_uchar4_rtz(f_r);
- uchar4 g_0 = convert_uchar4_rtz(f_g);
- uchar4 b_0 = convert_uchar4_rtz(f_b);
+ uchar4 r_0 = convert_uchar4_sat_rtz(f_r);
+ uchar4 g_0 = convert_uchar4_sat_rtz(f_g);
+ uchar4 b_0 = convert_uchar4_sat_rtz(f_b);
uchar8 rgb_0 = (uchar8)(r_0.s0, g_0.s0, b_0.s0, r_0.s1, g_0.s1, b_0.s1, r_0.s2, g_0.s2);
uchar4 rgb_1 = (uchar4)(b_0.s2, r_0.s3, g_0.s3, b_0.s3);
@@ -1538,9 +1538,9 @@ __kernel void IYUV_to_RGB888_bt709(
f_g = convert_float4(luma_1) + temp1;
f_b = convert_float4(luma_1) + temp2;
- r_0 = convert_uchar4_rtz(f_r);
- g_0 = convert_uchar4_rtz(f_g);
- b_0 = convert_uchar4_rtz(f_b);
+ r_0 = convert_uchar4_sat_rtz(f_r);
+ g_0 = convert_uchar4_sat_rtz(f_g);
+ b_0 = convert_uchar4_sat_rtz(f_b);
rgb_0 = (uchar8)(r_0.s0, g_0.s0, b_0.s0, r_0.s1, g_0.s1, b_0.s1, r_0.s2, g_0.s2);
rgb_1 = (uchar4)(b_0.s2, r_0.s3, g_0.s3, b_0.s3);
@@ -1604,9 +1604,9 @@ __kernel void IYUV_to_RGBA8888_bt709(
float4 f_g = convert_float4(luma_0) + temp1;
float4 f_b = convert_float4(luma_0) + temp2;
- uchar4 r_0 = convert_uchar4_rtz(f_r);
- uchar4 g_0 = convert_uchar4_rtz(f_g);
- uchar4 b_0 = convert_uchar4_rtz(f_b);
+ uchar4 r_0 = convert_uchar4_sat_rtz(f_r);
+ uchar4 g_0 = convert_uchar4_sat_rtz(f_g);
+ uchar4 b_0 = convert_uchar4_sat_rtz(f_b);
uchar8 rgb_0 = (uchar8)(r_0.s0, g_0.s0, b_0.s0, 255, r_0.s1, g_0.s1, b_0.s1, 255);
uchar8 rgb_1 = (uchar8)(r_0.s2, g_0.s2, b_0.s2, 255, r_0.s3, g_0.s3, b_0.s3, 255);
@@ -1617,9 +1617,9 @@ __kernel void IYUV_to_RGBA8888_bt709(
f_g = convert_float4(luma_1) + temp1;
f_b = convert_float4(luma_1) + temp2;
- r_0 = convert_uchar4_rtz(f_r);
- g_0 = convert_uchar4_rtz(f_g);
- b_0 = convert_uchar4_rtz(f_b);
+ r_0 = convert_uchar4_sat_rtz(f_r);
+ g_0 = convert_uchar4_sat_rtz(f_g);
+ b_0 = convert_uchar4_sat_rtz(f_b);
rgb_0 = (uchar8)(r_0.s0, g_0.s0, b_0.s0, 255, r_0.s1, g_0.s1, b_0.s1, 255);
rgb_1 = (uchar8)(r_0.s2, g_0.s2, b_0.s2, 255, r_0.s3, g_0.s3, b_0.s3, 255);
diff --git a/src/core/CL/kernels/CLColorConvertKernel.cpp b/src/core/CL/kernels/CLColorConvertKernel.cpp
index 52d9be9418..2b894989e1 100644
--- a/src/core/CL/kernels/CLColorConvertKernel.cpp
+++ b/src/core/CL/kernels/CLColorConvertKernel.cpp
@@ -198,6 +198,7 @@ void CLColorConvertKernel::configure(const ICLImage *input, ICLMultiImage *outpu
ARM_COMPUTE_ERROR_ON(output == nullptr);
unsigned int num_elems_processed_per_iteration = 0;
+ unsigned int num_elems_read_per_iteration_x = 0;
bool has_two_planes = (output->info()->format() == Format::NV12) || (output->info()->format() == Format::NV21);
float sub_sampling = (has_two_planes || (output->info()->format() == Format::IYUV)) ? 0.5f : 1;
@@ -212,9 +213,11 @@ void CLColorConvertKernel::configure(const ICLImage *input, ICLMultiImage *outpu
case Format::NV12:
case Format::IYUV:
num_elems_processed_per_iteration = 2;
+ num_elems_read_per_iteration_x = 8;
break;
case Format::YUV444:
num_elems_processed_per_iteration = 4;
+ num_elems_read_per_iteration_x = 16;
break;
default:
break;
@@ -229,6 +232,7 @@ void CLColorConvertKernel::configure(const ICLImage *input, ICLMultiImage *outpu
case Format::NV12:
case Format::IYUV:
num_elems_processed_per_iteration = 8;
+ num_elems_read_per_iteration_x = 8;
break;
default:
break;
@@ -238,6 +242,7 @@ void CLColorConvertKernel::configure(const ICLImage *input, ICLMultiImage *outpu
default:
break;
}
+
ARM_COMPUTE_ERROR_ON_MSG(num_elems_processed_per_iteration == 0, "Conversion from %s to %s not supported",
string_from_format(input->info()->format()).c_str(),
string_from_format(output->info()->format()).c_str());
@@ -248,7 +253,6 @@ void CLColorConvertKernel::configure(const ICLImage *input, ICLMultiImage *outpu
kernel_name << "_to_";
kernel_name << string_from_format(output->info()->format());
kernel_name << "_bt709";
-
_input = input;
_multi_output = output;
@@ -267,8 +271,10 @@ void CLColorConvertKernel::configure(const ICLImage *input, ICLMultiImage *outpu
AccessWindowRectangle output_plane2_access(has_two_planes ? nullptr : output->plane(2)->info(), 0, 0,
num_elems_processed_per_iteration, 1, sub_sampling, sub_sampling);
+ AccessWindowHorizontal input_access(input->info(), 0, num_elems_read_per_iteration_x);
+
update_window_and_padding(win,
- AccessWindowHorizontal(input->info(), 0, num_elems_processed_per_iteration),
+ input_access,
output_plane0_access,
output_plane1_access,
output_plane2_access);