aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorManuel Bottini <manuel.bottini@arm.com>2018-09-26 17:38:19 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:55:19 +0000
commitacaf21d7ae82e4de8da578c36e243fdc4c77c53d (patch)
treea8feb51cf78179cc4595ca96f7ada2c436ee1345 /src
parente22aa1301a30dc97341aa7dfce933d71b0d226ea (diff)
downloadComputeLibrary-acaf21d7ae82e4de8da578c36e243fdc4c77c53d.tar.gz
COMPMID-286: CL colour convert to U8
Change-Id: I62bbf510cc106a90ed2884be3c9c0c127da25898 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/150681 Reviewed-by: Giuseppe Rossini <giuseppe.rossini@arm.com> Tested-by: bsgcomp <bsgcomp@arm.com>
Diffstat (limited to 'src')
-rw-r--r--src/core/CL/CLKernelLibrary.cpp1
-rw-r--r--src/core/CL/cl_kernels/color_convert.cl48
-rw-r--r--src/core/CL/kernels/CLColorConvertKernel.cpp9
3 files changed, 58 insertions, 0 deletions
diff --git a/src/core/CL/CLKernelLibrary.cpp b/src/core/CL/CLKernelLibrary.cpp
index 7e8ef6b22d..87b588e16c 100644
--- a/src/core/CL/CLKernelLibrary.cpp
+++ b/src/core/CL/CLKernelLibrary.cpp
@@ -352,6 +352,7 @@ const std::map<std::string, std::string> CLKernelLibrary::_kernel_program_map =
{ "RGB888_to_IYUV_bt709", "color_convert.cl" },
{ "RGB888_to_NV12_bt709", "color_convert.cl" },
{ "RGB888_to_RGBA8888_bt709", "color_convert.cl" },
+ { "RGB888_to_U8_bt709", "color_convert.cl" },
{ "RGB888_to_YUV444_bt709", "color_convert.cl" },
{ "RGBA8888_to_IYUV_bt709", "color_convert.cl" },
{ "RGBA8888_to_NV12_bt709", "color_convert.cl" },
diff --git a/src/core/CL/cl_kernels/color_convert.cl b/src/core/CL/cl_kernels/color_convert.cl
index 02a0c8ee2a..7a872b47b5 100644
--- a/src/core/CL/cl_kernels/color_convert.cl
+++ b/src/core/CL/cl_kernels/color_convert.cl
@@ -64,6 +64,54 @@ __kernel void RGB888_to_RGBA8888_bt709(
vstore16(rgba_3, 0, out.ptr + 48);
}
+/** Convert an RGB888 image to U8
+ *
+ * Global Workgroup Size [ DIV_CEIL(width, 16), height ]
+ * No offset.
+ *
+ * @param[in] input_ptr Pointer to the source image. Supported Format: RGB888
+ * @param[in] input_stride_x Stride of the source image in X dimension (in bytes)
+ * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in] input_stride_y Stride of the source image in Y dimension (in bytes)
+ * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source image
+ * @param[out] output_ptr Pointer to the destination image. Supported Format: U8
+ * @param[in] output_stride_x Stride of the destination image in X dimension (in bytes)
+ * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in] output_stride_y Stride of the destination image in Y dimension (in bytes)
+ * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination image
+ */
+__kernel void RGB888_to_U8_bt709(
+ IMAGE_DECLARATION(input),
+ IMAGE_DECLARATION(output))
+{
+ Image in = CONVERT_TO_IMAGE_STRUCT(input);
+ Image out = CONVERT_TO_IMAGE_STRUCT(output);
+
+ // handle 16 pixels every time
+ const uchar16 rgb_0 = vload16(0, in.ptr);
+ const uchar16 rgb_1 = vload16(0, in.ptr + 16);
+ const uchar16 rgb_2 = vload16(0, in.ptr + 32);
+
+ //Resequence values from a sequence of 16 RGB values to sequence of 16 R, 16 G, 16 B values
+ const uchar16 rgb_r = (uchar16)(rgb_0.s0369, rgb_0.scf, rgb_1.s258b, rgb_1.se, rgb_2.s147a, rgb_2.sd);
+ const uchar16 rgb_g = (uchar16)(rgb_0.s147a, rgb_0.sd, rgb_1.s0369, rgb_1.scf, rgb_2.s258b, rgb_2.se);
+ const uchar16 rgb_b = (uchar16)(rgb_0.s258b, rgb_0.se, rgb_1.s147a, rgb_1.sd, rgb_2.s0369, rgb_2.scf);
+
+ const float16 rgb2u8_red_coef_bt709 = 0.2126f;
+ const float16 rgb2u8_green_coef_bt709 = 0.7152f;
+ const float16 rgb2u8_blue_coef_bt709 = 0.0722f;
+
+ //Computation of 16 greyscale values in float
+ const float16 greyscale_f_0 = rgb2u8_red_coef_bt709 * convert_float16(rgb_r) + rgb2u8_green_coef_bt709 * convert_float16(rgb_g) + rgb2u8_blue_coef_bt709 * convert_float16(rgb_b);
+
+ //Convert it to 16 grayscale uchar values
+ const uchar16 greyscale_u8_0 = convert_uchar16_sat_rtz(greyscale_f_0);
+
+ vstore16(greyscale_u8_0, 0, out.ptr);
+}
+
/** Convert an RGB888 image to RGBX8888
*
* Global Workgroup Size [ DIV_CEIL(width, 16), height ]
diff --git a/src/core/CL/kernels/CLColorConvertKernel.cpp b/src/core/CL/kernels/CLColorConvertKernel.cpp
index e79019eab9..4f178c9d75 100644
--- a/src/core/CL/kernels/CLColorConvertKernel.cpp
+++ b/src/core/CL/kernels/CLColorConvertKernel.cpp
@@ -61,6 +61,7 @@ void CLColorConvertKernel::configure(const ICLTensor *input, ICLTensor *output)
num_elems_processed_per_iteration = 16;
break;
default:
+ ARM_COMPUTE_ERROR("Not supported");
break;
}
break;
@@ -75,6 +76,7 @@ void CLColorConvertKernel::configure(const ICLTensor *input, ICLTensor *output)
num_elems_processed_per_iteration = 8;
break;
default:
+ ARM_COMPUTE_ERROR("Not supported");
break;
}
break;
@@ -84,9 +86,11 @@ void CLColorConvertKernel::configure(const ICLTensor *input, ICLTensor *output)
switch(output->info()->format())
{
case Format::RGBA8888:
+ case Format::U8:
num_elems_processed_per_iteration = 16;
break;
default:
+ ARM_COMPUTE_ERROR("Not supported");
break;
}
break;
@@ -143,6 +147,7 @@ void CLColorConvertKernel::configure(const ICLMultiImage *input, ICLImage *outpu
num_elems_processed_per_iteration = 4;
break;
default:
+ ARM_COMPUTE_ERROR("Not supported");
break;
}
break;
@@ -220,6 +225,7 @@ void CLColorConvertKernel::configure(const ICLImage *input, ICLMultiImage *outpu
num_elems_read_per_iteration_x = 16;
break;
default:
+ ARM_COMPUTE_ERROR("Not supported");
break;
}
break;
@@ -235,6 +241,7 @@ void CLColorConvertKernel::configure(const ICLImage *input, ICLMultiImage *outpu
num_elems_read_per_iteration_x = 8;
break;
default:
+ ARM_COMPUTE_ERROR("Not supported");
break;
}
break;
@@ -303,6 +310,7 @@ void CLColorConvertKernel::configure(const ICLMultiImage *input, ICLMultiImage *
num_elems_processed_per_iteration = 16;
break;
default:
+ ARM_COMPUTE_ERROR("Not supported");
break;
}
break;
@@ -316,6 +324,7 @@ void CLColorConvertKernel::configure(const ICLMultiImage *input, ICLMultiImage *
num_elems_processed_per_iteration = 16;
break;
default:
+ ARM_COMPUTE_ERROR("Not supported");
break;
}
break;