From 4284bfab4594d4babb23123001ef63db7bebeccb Mon Sep 17 00:00:00 2001
From: Manuel Bottini <manuel.bottini@arm.com>
Date: Wed, 26 Sep 2018 15:33:15 +0100
Subject: COMPMID-287: NEON colour convert to U8

Change-Id: I47033fa70881fd32b13266adb6ccbf10c202aabc
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/150344
Tested-by: bsgcomp <bsgcomp@arm.com>
Reviewed-by: Pablo Tello <pablo.tello@arm.com>
---
 arm_compute/core/NEON/NEColorConvertHelper.inl     | 142 +++++++++++++++------
 .../core/NEON/kernels/NEColorConvertKernel.h       |   3 +-
 .../runtime/NEON/functions/NEColorConvert.h        |   5 +-
 src/core/NEON/kernels/NEColorConvertKernel.cpp     |   4 +
 tests/benchmark/NEON/ColorConvert.cpp              |  10 ++
 tests/validation/NEON/ColorConvert.cpp             |  28 ++++
 tests/validation/reference/ColorConvert.cpp        |   4 +
 tests/validation/reference/ColorConvertHelper.h    |  27 ++++
 8 files changed, 181 insertions(+), 42 deletions(-)

diff --git a/arm_compute/core/NEON/NEColorConvertHelper.inl b/arm_compute/core/NEON/NEColorConvertHelper.inl
index 0da5affe18..7540d33830 100644
--- a/arm_compute/core/NEON/NEColorConvertHelper.inl
+++ b/arm_compute/core/NEON/NEColorConvertHelper.inl
@@ -45,14 +45,20 @@ constexpr float rgb2yuv_bt709_cu = 0.5389f;
 // C_v = 1 / (2 * (1 - K_r))
 constexpr float rgb2yuv_bt709_cv = 0.6350f;
 
-inline void convert_uint8x16_to_float32x4x4(const uint8x16_t &in, float32x4x4_t &out)
+constexpr float rgb2u8_red_coef   = 0.2126f;
+constexpr float rgb2u8_green_coef = 0.7152f;
+constexpr float rgb2u8_blue_coef  = 0.0722f;
+
+inline float32x4x4_t convert_uint8x16_to_float32x4x4(const uint8x16_t &in)
 {
-    const auto tmp1 = vmovl_u8(vget_low_u8(in));
-    out.val[0]      = vcvtq_f32_u32(vmovl_u16(vget_low_u16(tmp1)));
-    out.val[1]      = vcvtq_f32_u32(vmovl_u16(vget_high_u16(tmp1)));
-    const auto tmp2 = vmovl_u8(vget_high_u8(in));
-    out.val[2]      = vcvtq_f32_u32(vmovl_u16(vget_low_u16(tmp2)));
-    out.val[3]      = vcvtq_f32_u32(vmovl_u16(vget_high_u16(tmp2)));
+    float32x4x4_t out;
+    const auto    tmp1 = vmovl_u8(vget_low_u8(in));
+    out.val[0]         = vcvtq_f32_u32(vmovl_u16(vget_low_u16(tmp1)));
+    out.val[1]         = vcvtq_f32_u32(vmovl_u16(vget_high_u16(tmp1)));
+    const auto tmp2    = vmovl_u8(vget_high_u8(in));
+    out.val[2]         = vcvtq_f32_u32(vmovl_u16(vget_low_u16(tmp2)));
+    out.val[3]         = vcvtq_f32_u32(vmovl_u16(vget_high_u16(tmp2)));
+    return out;
 }
 
 inline void convert_float32x4x3_to_uint8x8x3(const float32x4x3_t &in1, const float32x4x3_t &in2, uint8x8x3_t &out)
@@ -74,6 +80,42 @@ inline void convert_float32x4x4_to_unit8x16(const float32x4x4_t &in, uint8x16_t
     out = vcombine_u8(vqmovn_u16(low), vqmovn_u16(high));
 }
 
+inline float32x4_t rgb_to_greyscale_calculation(const float32x4_t &rcolor,const float32x4_t &gcolor,  const float32x4_t &bcolor, 
+        const float rcoef, const float gcoef, const float bcoef)
+{
+    float32x4_t greyscale = vmulq_n_f32(rcolor, rcoef);
+    greyscale = vmlaq_n_f32(greyscale, gcolor, gcoef);
+    greyscale = vmlaq_n_f32(greyscale, bcolor, bcoef);
+    return greyscale;
+}
+
+inline void rgb_to_u8_conversion(const uint8x16x3_t &in, uint8x16_t &out)
+{
+    float32x4x4_t out_float32;
+
+    //Conversion from 3(RGB) 4 uint8s to 3(RGB) 4 floats
+    const float32x4x4_t r_float32 = convert_uint8x16_to_float32x4x4(in.val[0]);
+    const float32x4x4_t g_float32 = convert_uint8x16_to_float32x4x4(in.val[1]);
+    const float32x4x4_t b_float32 = convert_uint8x16_to_float32x4x4(in.val[2]);
+
+    //New grayscale image = ( (RED_COEFF * R) + (GREEN_COEFF * G) + (BLUE_COEFF * B) )
+    //Computation of 1(Greyscale) 4 uint8 using 3(RGB) 4 uint8s float
+    out_float32.val[0] = rgb_to_greyscale_calculation(r_float32.val[0], g_float32.val[0], b_float32.val[0],
+            rgb2u8_red_coef, rgb2u8_green_coef, rgb2u8_blue_coef);
+
+    out_float32.val[1] = rgb_to_greyscale_calculation(r_float32.val[1], g_float32.val[1], b_float32.val[1],
+            rgb2u8_red_coef, rgb2u8_green_coef, rgb2u8_blue_coef);
+
+    out_float32.val[2] = rgb_to_greyscale_calculation(r_float32.val[2], g_float32.val[2], b_float32.val[2],
+            rgb2u8_red_coef, rgb2u8_green_coef, rgb2u8_blue_coef);
+
+    out_float32.val[3] = rgb_to_greyscale_calculation(r_float32.val[3], g_float32.val[3], b_float32.val[3],
+            rgb2u8_red_coef, rgb2u8_green_coef, rgb2u8_blue_coef);
+
+    //Conversion from 1(Greyscale) 4 floats to 1(Greyscale) 4 uint8s
+    convert_float32x4x4_to_unit8x16(out_float32, out);
+}
+
 inline void rgb_to_yuv_calculation(const float32x4_t &rvec, const float32x4_t &gvec, const float32x4_t &bvec,
                                    float32x4_t &yvec, float32x4_t &uvec, float32x4_t &vvec)
 {
@@ -183,15 +225,13 @@ inline uint8x16x3_t load_rgb(const unsigned char *const ptr, const bool alpha)
 inline void rgb_to_yuv_conversion(uint8x16x3_t &vec_top, uint8x16x3_t &vec_bottom)
 {
     // Convert the uint8x16_t to float32x4x4_t
-    float32x4x4_t frvec_top, fgvec_top, fbvec_top;
-    convert_uint8x16_to_float32x4x4(vec_top.val[0], frvec_top);
-    convert_uint8x16_to_float32x4x4(vec_top.val[1], fgvec_top);
-    convert_uint8x16_to_float32x4x4(vec_top.val[2], fbvec_top);
+    const float32x4x4_t frvec_top = convert_uint8x16_to_float32x4x4(vec_top.val[0]);
+    const float32x4x4_t fgvec_top = convert_uint8x16_to_float32x4x4(vec_top.val[1]);
+    const float32x4x4_t fbvec_top = convert_uint8x16_to_float32x4x4(vec_top.val[2]);
 
-    float32x4x4_t frvec_bottom, fgvec_bottom, fbvec_bottom;
-    convert_uint8x16_to_float32x4x4(vec_bottom.val[0], frvec_bottom);
-    convert_uint8x16_to_float32x4x4(vec_bottom.val[1], fgvec_bottom);
-    convert_uint8x16_to_float32x4x4(vec_bottom.val[2], fbvec_bottom);
+    const float32x4x4_t frvec_bottom = convert_uint8x16_to_float32x4x4(vec_bottom.val[0]);
+    const float32x4x4_t fgvec_bottom = convert_uint8x16_to_float32x4x4(vec_bottom.val[1]);
+    const float32x4x4_t fbvec_bottom = convert_uint8x16_to_float32x4x4(vec_bottom.val[2]);
 
     float32x4x4_t fyvec_top, fuvec_top, fvvec_top;
     float32x4x4_t fyvec_bottom, fuvec_bottom, fvvec_bottom;
@@ -276,10 +316,9 @@ inline void store_rgb_to_yuv4(const uint8x16_t &rvec, const uint8x16_t &gvec, co
                               unsigned char *const __restrict out_v)
 {
     // Convert the uint8x16_t to float32x4x4_t
-    float32x4x4_t frvec, fgvec, fbvec;
-    convert_uint8x16_to_float32x4x4(rvec, frvec);
-    convert_uint8x16_to_float32x4x4(gvec, fgvec);
-    convert_uint8x16_to_float32x4x4(bvec, fbvec);
+    const float32x4x4_t frvec = convert_uint8x16_to_float32x4x4(rvec);
+    const float32x4x4_t fgvec = convert_uint8x16_to_float32x4x4(gvec);
+    const float32x4x4_t fbvec = convert_uint8x16_to_float32x4x4(bvec);
 
     float32x4x4_t fyvec, fuvec, fvvec;
     for(auto i = 0; i < 4; ++i)
@@ -333,6 +372,34 @@ void colorconvert_rgb_to_rgbx(const void *__restrict input, void *__restrict out
     in, out);
 }
 
+/** Convert RGB to U8.
+ *
+ * @param[in]  input  Input RGB data buffer.
+ * @param[out] output Output U8 buffer.
+ * @param[in]  win    Window for iterating the buffers.
+ *
+ */
+void colorconvert_rgb_to_u8(const void *__restrict input, void *__restrict output, const Window &win)
+{
+    ARM_COMPUTE_ERROR_ON(nullptr == input);
+    ARM_COMPUTE_ERROR_ON(nullptr == output);
+
+    const auto input_ptr  = static_cast<const IImage *__restrict>(input);
+    const auto output_ptr = static_cast<IImage *__restrict>(output);
+
+    Iterator in(input_ptr, win);
+    Iterator out(output_ptr, win);
+
+    execute_window_loop(win, [&](const Coordinates & id)
+    {
+        const auto ta1 = vld3q_u8(in.ptr());
+        uint8x16_t ta2;
+        rgb_to_u8_conversion(ta1, ta2);
+        vst1q_u8(out.ptr(), ta2);
+    },
+    in, out);
+}
+
 /** Convert RGBX to RGB.
  *
  * @param[in]  input  Input RGBX data buffer.
@@ -387,18 +454,17 @@ void colorconvert_yuyv_to_rgb(const void *__restrict input, void *__restrict out
 
     execute_window_loop(win, [&](const Coordinates & id)
     {
-        float32x4x4_t uvec, yvec, vvec, yyvec;
-        const auto    ta = vld4q_u8(in.ptr());
+        const auto ta = vld4q_u8(in.ptr());
         //ta.val[0] = Y0 Y2 Y4 Y6 ...
         //ta.val[1] = U0 U2 U4 U6 ...
         //ta.val[2] = Y1 Y3 Y5 Y7 ...
         //ta.val[3] = V0 V2 V4 V7 ...
 
         // Convert the uint8x16x4_t to float32x4x4_t
-        convert_uint8x16_to_float32x4x4(ta.val[0 + shift], yvec);
-        convert_uint8x16_to_float32x4x4(ta.val[1 - shift], uvec);
-        convert_uint8x16_to_float32x4x4(ta.val[2 + shift], yyvec);
-        convert_uint8x16_to_float32x4x4(ta.val[3 - shift], vvec);
+        const float32x4x4_t yvec  = convert_uint8x16_to_float32x4x4(ta.val[0 + shift]);
+        const float32x4x4_t uvec  = convert_uint8x16_to_float32x4x4(ta.val[1 - shift]);
+        const float32x4x4_t yyvec = convert_uint8x16_to_float32x4x4(ta.val[2 + shift]);
+        const float32x4x4_t vvec  = convert_uint8x16_to_float32x4x4(ta.val[3 - shift]);
 
         yuyv_to_rgb_calculation(yvec.val[0], uvec.val[0], yyvec.val[0], vvec.val[0], out.ptr() + 0 * element_size, alpha);
         yuyv_to_rgb_calculation(yvec.val[1], uvec.val[1], yyvec.val[1], vvec.val[1], out.ptr() + 1 * element_size, alpha);
@@ -450,13 +516,12 @@ void colorconvert_nv12_to_rgb(const void *__restrict input, void *__restrict out
         //ta_uv.val[1] = V0 V2 V4 V6 ...
 
         // Convert the uint8x16x4_t to float32x4x4_t
-        float32x4x4_t yvec_top, yyvec_top, yvec_bottom, yyvec_bottom, uvec, vvec;
-        convert_uint8x16_to_float32x4x4(ta_y_top.val[0], yvec_top);
-        convert_uint8x16_to_float32x4x4(ta_y_top.val[1], yyvec_top);
-        convert_uint8x16_to_float32x4x4(ta_y_bottom.val[0], yvec_bottom);
-        convert_uint8x16_to_float32x4x4(ta_y_bottom.val[1], yyvec_bottom);
-        convert_uint8x16_to_float32x4x4(ta_uv.val[0 + shift], uvec);
-        convert_uint8x16_to_float32x4x4(ta_uv.val[1 - shift], vvec);
+        float32x4x4_t yvec_top     = convert_uint8x16_to_float32x4x4(ta_y_top.val[0]);
+        float32x4x4_t yyvec_top    = convert_uint8x16_to_float32x4x4(ta_y_top.val[1]);
+        float32x4x4_t yvec_bottom  = convert_uint8x16_to_float32x4x4(ta_y_bottom.val[0]);
+        float32x4x4_t yyvec_bottom = convert_uint8x16_to_float32x4x4(ta_y_bottom.val[1]);
+        float32x4x4_t uvec         = convert_uint8x16_to_float32x4x4(ta_uv.val[0 + shift]);
+        float32x4x4_t vvec         = convert_uint8x16_to_float32x4x4(ta_uv.val[1 - shift]);
 
         yuyv_to_rgb_calculation(yvec_top.val[0], uvec.val[0], yyvec_top.val[0], vvec.val[0], out.ptr() + 0 * element_size, alpha);
         yuyv_to_rgb_calculation(yvec_top.val[1], uvec.val[1], yyvec_top.val[1], vvec.val[1], out.ptr() + 1 * element_size, alpha);
@@ -514,13 +579,12 @@ void colorconvert_iyuv_to_rgb(const void *__restrict input, void *__restrict out
         //ta_v.val[0] = V0 V2 V4 V6 ...
 
         // Convert the uint8x16x4_t to float32x4x4_t
-        float32x4x4_t yvec_top, yyvec_top, yvec_bottom, yyvec_bottom, uvec, vvec;
-        convert_uint8x16_to_float32x4x4(ta_y_top.val[0], yvec_top);
-        convert_uint8x16_to_float32x4x4(ta_y_top.val[1], yyvec_top);
-        convert_uint8x16_to_float32x4x4(ta_y_bottom.val[0], yvec_bottom);
-        convert_uint8x16_to_float32x4x4(ta_y_bottom.val[1], yyvec_bottom);
-        convert_uint8x16_to_float32x4x4(ta_u, uvec);
-        convert_uint8x16_to_float32x4x4(ta_v, vvec);
+        float32x4x4_t yvec_top     = convert_uint8x16_to_float32x4x4(ta_y_top.val[0]);
+        float32x4x4_t yyvec_top    = convert_uint8x16_to_float32x4x4(ta_y_top.val[1]);
+        float32x4x4_t yvec_bottom  = convert_uint8x16_to_float32x4x4(ta_y_bottom.val[0]);
+        float32x4x4_t yyvec_bottom = convert_uint8x16_to_float32x4x4(ta_y_bottom.val[1]);
+        float32x4x4_t uvec         = convert_uint8x16_to_float32x4x4(ta_u);
+        float32x4x4_t vvec         = convert_uint8x16_to_float32x4x4(ta_v);
 
         yuyv_to_rgb_calculation(yvec_top.val[0], uvec.val[0], yyvec_top.val[0], vvec.val[0], out.ptr() + 0 * element_size, alpha);
         yuyv_to_rgb_calculation(yvec_top.val[1], uvec.val[1], yyvec_top.val[1], vvec.val[1], out.ptr() + 1 * element_size, alpha);
diff --git a/arm_compute/core/NEON/kernels/NEColorConvertKernel.h b/arm_compute/core/NEON/kernels/NEColorConvertKernel.h
index 608172ccde..4f1ac973e7 100644
--- a/arm_compute/core/NEON/kernels/NEColorConvertKernel.h
+++ b/arm_compute/core/NEON/kernels/NEColorConvertKernel.h
@@ -57,7 +57,8 @@ public:
      *
      * @param[in]  input  Source tensor. Formats supported: RGBA8888/UYVY422/YUYV422/RGB888
      * @param[out] output Destination tensor. Formats supported: RGB888 (if the formats of @p input are RGBA8888/UYVY422/YUYV422),
-     *                                                          RGBA8888 (if the formats of @p input are UYVY422/YUYV422/RGB888/)
+     *                                                          RGBA8888 (if the formats of @p input are UYVY422/YUYV422/RGB888/),
+     *                                                          U8 (if the formats of @p input is RGB888)
      */
     void configure(const ITensor *input, ITensor *output);
     /** Set the input and output of the kernel
diff --git a/arm_compute/runtime/NEON/functions/NEColorConvert.h b/arm_compute/runtime/NEON/functions/NEColorConvert.h
index ab0bf14609..73eb3f94ea 100644
--- a/arm_compute/runtime/NEON/functions/NEColorConvert.h
+++ b/arm_compute/runtime/NEON/functions/NEColorConvert.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -40,7 +40,8 @@ public:
      *
      * @param[in]  input  Source tensor. Formats supported: RGBA8888/UYVY422/YUYV422/RGB888
      * @param[out] output Destination tensor. Formats supported: RGB888 (if the formats of @p input are RGBA8888/UYVY422/YUYV422),
-     *                                                          RGBA8888 (if the formats of @p input are UYVY422/YUYV422/RGB888/)
+     *                                                          RGBA8888 (if the formats of @p input are UYVY422/YUYV422/RGB888/),
+     *                                                          U8 (if the formats of @p input is RGB888)
      */
     void configure(const ITensor *input, ITensor *output);
     /** Initialize the function's source, destination
diff --git a/src/core/NEON/kernels/NEColorConvertKernel.cpp b/src/core/NEON/kernels/NEColorConvertKernel.cpp
index 4582c88487..7a66b6cc3a 100644
--- a/src/core/NEON/kernels/NEColorConvertKernel.cpp
+++ b/src/core/NEON/kernels/NEColorConvertKernel.cpp
@@ -112,6 +112,10 @@ void NEColorConvertKernel::configure(const ITensor *input, ITensor *output)
                     _func                             = colorconvert_rgb_to_rgbx;
                     num_elems_processed_per_iteration = 16;
                     break;
+                case Format::U8:
+                    _func                             = colorconvert_rgb_to_u8;
+                    num_elems_processed_per_iteration = 16;
+                    break;
                 default:
                     ARM_COMPUTE_ERROR("Not supported");
                     break;
diff --git a/tests/benchmark/NEON/ColorConvert.cpp b/tests/benchmark/NEON/ColorConvert.cpp
index 6aef5723d3..8223551635 100644
--- a/tests/benchmark/NEON/ColorConvert.cpp
+++ b/tests/benchmark/NEON/ColorConvert.cpp
@@ -47,6 +47,9 @@ const auto ColorConvert_RGBA_to_RGB = combine(framework::dataset::make("FormatTy
 const auto ColorConvert_RGB_to_RGBA = combine(framework::dataset::make("FormatType", { Format::RGB888 }),
                                               framework::dataset::make("FormatType", { Format::RGBA8888 }));
 
+const auto ColorConvert_RGB_to_U8 = combine(framework::dataset::make("FormatType", { Format::RGB888 }),
+                                            framework::dataset::make("FormatType", { Format::U8 }));
+
 const auto ColorConvert_YUYVDataset_to_RGBDataset = combine(YUYVDataset,
                                                             RGBDataset);
 
@@ -82,6 +85,13 @@ REGISTER_FIXTURE_DATA_TEST_CASE(RunSmall, NEColorConvertFixture, framework::Data
 REGISTER_FIXTURE_DATA_TEST_CASE(RunLarge, NEColorConvertFixture, framework::DatasetMode::NIGHTLY, combine(datasets::Large2DShapes(), ColorConvert_RGB_to_RGBA));
 TEST_SUITE_END()
 
+TEST_SUITE(RGBtoU8)
+// *INDENT-OFF*
+// clang-format off
+REGISTER_FIXTURE_DATA_TEST_CASE(RunSmall, NEColorConvertFixture, framework::DatasetMode::PRECOMMIT, combine(datasets::Small2DShapes(), ColorConvert_RGB_to_U8));
+REGISTER_FIXTURE_DATA_TEST_CASE(RunLarge, NEColorConvertFixture, framework::DatasetMode::NIGHTLY, combine(datasets::Large2DShapes(), ColorConvert_RGB_to_U8));
+TEST_SUITE_END()
+
 TEST_SUITE(YUYV)
 // *INDENT-OFF*
 // clang-format off
diff --git a/tests/validation/NEON/ColorConvert.cpp b/tests/validation/NEON/ColorConvert.cpp
index c34df2b557..ecd95f23e0 100644
--- a/tests/validation/NEON/ColorConvert.cpp
+++ b/tests/validation/NEON/ColorConvert.cpp
@@ -52,6 +52,9 @@ const auto ColorConvert_RGBA_to_RGB = combine(framework::dataset::make("FormatTy
 const auto ColorConvert_RGB_to_RGBA = combine(framework::dataset::make("FormatType", { Format::RGB888 }),
                                               framework::dataset::make("FormatType", { Format::RGBA8888 }));
 
+const auto ColorConvert_RGB_to_U8 = combine(framework::dataset::make("FormatType", { Format::RGB888 }),
+                                            framework::dataset::make("FormatType", { Format::U8 }));
+
 const auto ColorConvert_YUYVDataset_to_RGBDataset = combine(YUYVDataset,
                                                             RGBDataset);
 
@@ -143,6 +146,12 @@ DATA_TEST_CASE(RGB, framework::DatasetMode::ALL, combine(concat(datasets::Small2
     validate_configuration(shape, src_format, dst_format);
 }
 
+DATA_TEST_CASE(RGBtoU8, framework::DatasetMode::ALL, combine(concat(datasets::Small2DShapes(), datasets::Large2DShapes()), ColorConvert_RGB_to_U8),
+               shape, src_format, dst_format)
+{
+    validate_configuration(shape, src_format, dst_format);
+}
+
 DATA_TEST_CASE(YUV, framework::DatasetMode::ALL, combine(concat(datasets::Small2DShapes(), datasets::Large2DShapes()), ColorConvert_YUYVDataset_to_RGBDataset),
                shape, src_format, dst_format)
 {
@@ -213,6 +222,25 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEColorConvertFixture<uint8_t>, framework::Data
 }
 TEST_SUITE_END()
 
+TEST_SUITE(RGBtoU8)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEColorConvertFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(datasets::Small2DShapes(), ColorConvert_RGB_to_U8))
+{
+    // Validate output
+    for(unsigned int plane_idx = 0; plane_idx < _dst_num_planes; ++plane_idx)
+    {
+        validate(Accessor(*_target.plane(plane_idx)), _reference[plane_idx]);
+    }
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, NEColorConvertFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::Large2DShapes(), ColorConvert_RGB_to_U8))
+{
+    // Validate output
+    for(unsigned int plane_idx = 0; plane_idx < _dst_num_planes; ++plane_idx)
+    {
+        validate(Accessor(*_target.plane(plane_idx)), _reference[plane_idx]);
+    }
+}
+TEST_SUITE_END()
+
 TEST_SUITE(YUV)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEColorConvertFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(datasets::Small2DShapes(), ColorConvert_YUYVDataset_to_RGBDataset))
 {
diff --git a/tests/validation/reference/ColorConvert.cpp b/tests/validation/reference/ColorConvert.cpp
index 8047b34688..9090319a86 100644
--- a/tests/validation/reference/ColorConvert.cpp
+++ b/tests/validation/reference/ColorConvert.cpp
@@ -46,6 +46,7 @@ inline std::vector<SimpleTensor<T>> create_image_planes(const TensorShape &shape
 
     switch(format)
     {
+        case Format::U8:
         case Format::RGB888:
         case Format::RGBA8888:
         case Format::YUYV422:
@@ -102,6 +103,9 @@ std::vector<SimpleTensor<T>> color_convert(const TensorShape &shape, const std::
                 case Format::RGBA8888:
                     colorconvert_helper::detail::colorconvert_rgb_to_rgbx(tensor_planes[0], dst[0]);
                     break;
+                case Format::U8:
+                    colorconvert_helper::detail::colorconvert_rgb_to_u8(tensor_planes[0], dst[0]);
+                    break;
                 case Format::NV12:
                     colorconvert_helper::detail::colorconvert_rgb_to_nv12(tensor_planes[0], dst);
                     break;
diff --git a/tests/validation/reference/ColorConvertHelper.h b/tests/validation/reference/ColorConvertHelper.h
index 7a8b547486..b2ae6f2f80 100644
--- a/tests/validation/reference/ColorConvertHelper.h
+++ b/tests/validation/reference/ColorConvertHelper.h
@@ -48,6 +48,10 @@ constexpr float rgb2yuv_bt709_cu = 0.5389f;
 // C_v = 1 / (2 * (1 - K_r))
 constexpr float rgb2yuv_bt709_cv = 0.6350f;
 
+constexpr float rgb2u8_red_coef   = 0.2126f;
+constexpr float rgb2u8_green_coef = 0.7152f;
+constexpr float rgb2u8_blue_coef  = 0.0722f;
+
 template <typename T>
 inline void store_rgb_from_src(const SimpleTensor<T> src, SimpleTensor<T> &rvec, SimpleTensor<T> &gvec, SimpleTensor<T> &bvec)
 {
@@ -218,6 +222,29 @@ inline void colorconvert_rgb_to_rgbx(const SimpleTensor<T> src, SimpleTensor<T>
     }
 }
 
+template <typename T>
+inline void colorconvert_rgb_to_u8(const SimpleTensor<T> src, SimpleTensor<T> &dst)
+{
+    const int width  = dst.shape().x();
+    const int height = dst.shape().y();
+
+    for(int y = 0; y < height; ++y)
+    {
+        for(int x = 0; x < width; ++x)
+        {
+            const Coordinates src_coord{ x, y };
+            const Coordinates dst_coord{ x, y };
+
+            const auto *src_pixel = reinterpret_cast<const T *>(src(src_coord));
+            auto       *dst_pixel = reinterpret_cast<T *>(dst(dst_coord));
+
+            const float result = rgb2u8_red_coef * src_pixel[0] + rgb2u8_green_coef * src_pixel[1] + rgb2u8_blue_coef * src_pixel[2];
+
+            dst_pixel[0] = utility::clamp<float>(result, 0, 255);
+        }
+    }
+}
+
 template <typename T>
 inline void colorconvert_rgbx_to_rgb(const SimpleTensor<T> src, SimpleTensor<T> &dst)
 {
-- 
cgit v1.2.1