From 4284bfab4594d4babb23123001ef63db7bebeccb Mon Sep 17 00:00:00 2001 From: Manuel Bottini Date: Wed, 26 Sep 2018 15:33:15 +0100 Subject: COMPMID-287: NEON colour convert to U8 Change-Id: I47033fa70881fd32b13266adb6ccbf10c202aabc Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/150344 Tested-by: bsgcomp Reviewed-by: Pablo Tello --- arm_compute/core/NEON/NEColorConvertHelper.inl | 142 +++++++++++++++------ .../core/NEON/kernels/NEColorConvertKernel.h | 3 +- .../runtime/NEON/functions/NEColorConvert.h | 5 +- src/core/NEON/kernels/NEColorConvertKernel.cpp | 4 + tests/benchmark/NEON/ColorConvert.cpp | 10 ++ tests/validation/NEON/ColorConvert.cpp | 28 ++++ tests/validation/reference/ColorConvert.cpp | 4 + tests/validation/reference/ColorConvertHelper.h | 27 ++++ 8 files changed, 181 insertions(+), 42 deletions(-) diff --git a/arm_compute/core/NEON/NEColorConvertHelper.inl b/arm_compute/core/NEON/NEColorConvertHelper.inl index 0da5affe18..7540d33830 100644 --- a/arm_compute/core/NEON/NEColorConvertHelper.inl +++ b/arm_compute/core/NEON/NEColorConvertHelper.inl @@ -45,14 +45,20 @@ constexpr float rgb2yuv_bt709_cu = 0.5389f; // C_v = 1 / (2 * (1 - K_r)) constexpr float rgb2yuv_bt709_cv = 0.6350f; -inline void convert_uint8x16_to_float32x4x4(const uint8x16_t &in, float32x4x4_t &out) +constexpr float rgb2u8_red_coef = 0.2126f; +constexpr float rgb2u8_green_coef = 0.7152f; +constexpr float rgb2u8_blue_coef = 0.0722f; + +inline float32x4x4_t convert_uint8x16_to_float32x4x4(const uint8x16_t &in) { - const auto tmp1 = vmovl_u8(vget_low_u8(in)); - out.val[0] = vcvtq_f32_u32(vmovl_u16(vget_low_u16(tmp1))); - out.val[1] = vcvtq_f32_u32(vmovl_u16(vget_high_u16(tmp1))); - const auto tmp2 = vmovl_u8(vget_high_u8(in)); - out.val[2] = vcvtq_f32_u32(vmovl_u16(vget_low_u16(tmp2))); - out.val[3] = vcvtq_f32_u32(vmovl_u16(vget_high_u16(tmp2))); + float32x4x4_t out; + const auto tmp1 = vmovl_u8(vget_low_u8(in)); + out.val[0] = vcvtq_f32_u32(vmovl_u16(vget_low_u16(tmp1))); + out.val[1] = vcvtq_f32_u32(vmovl_u16(vget_high_u16(tmp1))); + const auto tmp2 = vmovl_u8(vget_high_u8(in)); + out.val[2] = vcvtq_f32_u32(vmovl_u16(vget_low_u16(tmp2))); + out.val[3] = vcvtq_f32_u32(vmovl_u16(vget_high_u16(tmp2))); + return out; } inline void convert_float32x4x3_to_uint8x8x3(const float32x4x3_t &in1, const float32x4x3_t &in2, uint8x8x3_t &out) @@ -74,6 +80,42 @@ inline void convert_float32x4x4_to_unit8x16(const float32x4x4_t &in, uint8x16_t out = vcombine_u8(vqmovn_u16(low), vqmovn_u16(high)); } +inline float32x4_t rgb_to_greyscale_calculation(const float32x4_t &rcolor,const float32x4_t &gcolor, const float32x4_t &bcolor, + const float rcoef, const float gcoef, const float bcoef) +{ + float32x4_t greyscale = vmulq_n_f32(rcolor, rcoef); + greyscale = vmlaq_n_f32(greyscale, gcolor, gcoef); + greyscale = vmlaq_n_f32(greyscale, bcolor, bcoef); + return greyscale; +} + +inline void rgb_to_u8_conversion(const uint8x16x3_t &in, uint8x16_t &out) +{ + float32x4x4_t out_float32; + + //Conversion from 3(RGB) 4 uint8s to 3(RGB) 4 floats + const float32x4x4_t r_float32 = convert_uint8x16_to_float32x4x4(in.val[0]); + const float32x4x4_t g_float32 = convert_uint8x16_to_float32x4x4(in.val[1]); + const float32x4x4_t b_float32 = convert_uint8x16_to_float32x4x4(in.val[2]); + + //New grayscale image = ( (RED_COEFF * R) + (GREEN_COEFF * G) + (BLUE_COEFF * B) ) + //Computation of 1(Greyscale) 4 uint8 using 3(RGB) 4 uint8s float + out_float32.val[0] = rgb_to_greyscale_calculation(r_float32.val[0], g_float32.val[0], b_float32.val[0], + rgb2u8_red_coef, rgb2u8_green_coef, rgb2u8_blue_coef); + + out_float32.val[1] = rgb_to_greyscale_calculation(r_float32.val[1], g_float32.val[1], b_float32.val[1], + rgb2u8_red_coef, rgb2u8_green_coef, rgb2u8_blue_coef); + + out_float32.val[2] = rgb_to_greyscale_calculation(r_float32.val[2], g_float32.val[2], b_float32.val[2], + rgb2u8_red_coef, rgb2u8_green_coef, rgb2u8_blue_coef); + + out_float32.val[3] = rgb_to_greyscale_calculation(r_float32.val[3], g_float32.val[3], b_float32.val[3], + rgb2u8_red_coef, rgb2u8_green_coef, rgb2u8_blue_coef); + + //Conversion from 1(Greyscale) 4 floats to 1(Greyscale) 4 uint8s + convert_float32x4x4_to_unit8x16(out_float32, out); +} + inline void rgb_to_yuv_calculation(const float32x4_t &rvec, const float32x4_t &gvec, const float32x4_t &bvec, float32x4_t &yvec, float32x4_t &uvec, float32x4_t &vvec) { @@ -183,15 +225,13 @@ inline uint8x16x3_t load_rgb(const unsigned char *const ptr, const bool alpha) inline void rgb_to_yuv_conversion(uint8x16x3_t &vec_top, uint8x16x3_t &vec_bottom) { // Convert the uint8x16_t to float32x4x4_t - float32x4x4_t frvec_top, fgvec_top, fbvec_top; - convert_uint8x16_to_float32x4x4(vec_top.val[0], frvec_top); - convert_uint8x16_to_float32x4x4(vec_top.val[1], fgvec_top); - convert_uint8x16_to_float32x4x4(vec_top.val[2], fbvec_top); + const float32x4x4_t frvec_top = convert_uint8x16_to_float32x4x4(vec_top.val[0]); + const float32x4x4_t fgvec_top = convert_uint8x16_to_float32x4x4(vec_top.val[1]); + const float32x4x4_t fbvec_top = convert_uint8x16_to_float32x4x4(vec_top.val[2]); - float32x4x4_t frvec_bottom, fgvec_bottom, fbvec_bottom; - convert_uint8x16_to_float32x4x4(vec_bottom.val[0], frvec_bottom); - convert_uint8x16_to_float32x4x4(vec_bottom.val[1], fgvec_bottom); - convert_uint8x16_to_float32x4x4(vec_bottom.val[2], fbvec_bottom); + const float32x4x4_t frvec_bottom = convert_uint8x16_to_float32x4x4(vec_bottom.val[0]); + const float32x4x4_t fgvec_bottom = convert_uint8x16_to_float32x4x4(vec_bottom.val[1]); + const float32x4x4_t fbvec_bottom = convert_uint8x16_to_float32x4x4(vec_bottom.val[2]); float32x4x4_t fyvec_top, fuvec_top, fvvec_top; float32x4x4_t fyvec_bottom, fuvec_bottom, fvvec_bottom; @@ -276,10 +316,9 @@ inline void store_rgb_to_yuv4(const uint8x16_t &rvec, const uint8x16_t &gvec, co unsigned char *const __restrict out_v) { // Convert the uint8x16_t to float32x4x4_t - float32x4x4_t frvec, fgvec, fbvec; - convert_uint8x16_to_float32x4x4(rvec, frvec); - convert_uint8x16_to_float32x4x4(gvec, fgvec); - convert_uint8x16_to_float32x4x4(bvec, fbvec); + const float32x4x4_t frvec = convert_uint8x16_to_float32x4x4(rvec); + const float32x4x4_t fgvec = convert_uint8x16_to_float32x4x4(gvec); + const float32x4x4_t fbvec = convert_uint8x16_to_float32x4x4(bvec); float32x4x4_t fyvec, fuvec, fvvec; for(auto i = 0; i < 4; ++i) @@ -333,6 +372,34 @@ void colorconvert_rgb_to_rgbx(const void *__restrict input, void *__restrict out in, out); } +/** Convert RGB to U8. + * + * @param[in] input Input RGB data buffer. + * @param[out] output Output U8 buffer. + * @param[in] win Window for iterating the buffers. + * + */ +void colorconvert_rgb_to_u8(const void *__restrict input, void *__restrict output, const Window &win) +{ + ARM_COMPUTE_ERROR_ON(nullptr == input); + ARM_COMPUTE_ERROR_ON(nullptr == output); + + const auto input_ptr = static_cast(input); + const auto output_ptr = static_cast(output); + + Iterator in(input_ptr, win); + Iterator out(output_ptr, win); + + execute_window_loop(win, [&](const Coordinates & id) + { + const auto ta1 = vld3q_u8(in.ptr()); + uint8x16_t ta2; + rgb_to_u8_conversion(ta1, ta2); + vst1q_u8(out.ptr(), ta2); + }, + in, out); +} + /** Convert RGBX to RGB. * * @param[in] input Input RGBX data buffer. @@ -387,18 +454,17 @@ void colorconvert_yuyv_to_rgb(const void *__restrict input, void *__restrict out execute_window_loop(win, [&](const Coordinates & id) { - float32x4x4_t uvec, yvec, vvec, yyvec; - const auto ta = vld4q_u8(in.ptr()); + const auto ta = vld4q_u8(in.ptr()); //ta.val[0] = Y0 Y2 Y4 Y6 ... //ta.val[1] = U0 U2 U4 U6 ... //ta.val[2] = Y1 Y3 Y5 Y7 ... //ta.val[3] = V0 V2 V4 V7 ... // Convert the uint8x16x4_t to float32x4x4_t - convert_uint8x16_to_float32x4x4(ta.val[0 + shift], yvec); - convert_uint8x16_to_float32x4x4(ta.val[1 - shift], uvec); - convert_uint8x16_to_float32x4x4(ta.val[2 + shift], yyvec); - convert_uint8x16_to_float32x4x4(ta.val[3 - shift], vvec); + const float32x4x4_t yvec = convert_uint8x16_to_float32x4x4(ta.val[0 + shift]); + const float32x4x4_t uvec = convert_uint8x16_to_float32x4x4(ta.val[1 - shift]); + const float32x4x4_t yyvec = convert_uint8x16_to_float32x4x4(ta.val[2 + shift]); + const float32x4x4_t vvec = convert_uint8x16_to_float32x4x4(ta.val[3 - shift]); yuyv_to_rgb_calculation(yvec.val[0], uvec.val[0], yyvec.val[0], vvec.val[0], out.ptr() + 0 * element_size, alpha); yuyv_to_rgb_calculation(yvec.val[1], uvec.val[1], yyvec.val[1], vvec.val[1], out.ptr() + 1 * element_size, alpha); @@ -450,13 +516,12 @@ void colorconvert_nv12_to_rgb(const void *__restrict input, void *__restrict out //ta_uv.val[1] = V0 V2 V4 V6 ... // Convert the uint8x16x4_t to float32x4x4_t - float32x4x4_t yvec_top, yyvec_top, yvec_bottom, yyvec_bottom, uvec, vvec; - convert_uint8x16_to_float32x4x4(ta_y_top.val[0], yvec_top); - convert_uint8x16_to_float32x4x4(ta_y_top.val[1], yyvec_top); - convert_uint8x16_to_float32x4x4(ta_y_bottom.val[0], yvec_bottom); - convert_uint8x16_to_float32x4x4(ta_y_bottom.val[1], yyvec_bottom); - convert_uint8x16_to_float32x4x4(ta_uv.val[0 + shift], uvec); - convert_uint8x16_to_float32x4x4(ta_uv.val[1 - shift], vvec); + float32x4x4_t yvec_top = convert_uint8x16_to_float32x4x4(ta_y_top.val[0]); + float32x4x4_t yyvec_top = convert_uint8x16_to_float32x4x4(ta_y_top.val[1]); + float32x4x4_t yvec_bottom = convert_uint8x16_to_float32x4x4(ta_y_bottom.val[0]); + float32x4x4_t yyvec_bottom = convert_uint8x16_to_float32x4x4(ta_y_bottom.val[1]); + float32x4x4_t uvec = convert_uint8x16_to_float32x4x4(ta_uv.val[0 + shift]); + float32x4x4_t vvec = convert_uint8x16_to_float32x4x4(ta_uv.val[1 - shift]); yuyv_to_rgb_calculation(yvec_top.val[0], uvec.val[0], yyvec_top.val[0], vvec.val[0], out.ptr() + 0 * element_size, alpha); yuyv_to_rgb_calculation(yvec_top.val[1], uvec.val[1], yyvec_top.val[1], vvec.val[1], out.ptr() + 1 * element_size, alpha); @@ -514,13 +579,12 @@ void colorconvert_iyuv_to_rgb(const void *__restrict input, void *__restrict out //ta_v.val[0] = V0 V2 V4 V6 ... // Convert the uint8x16x4_t to float32x4x4_t - float32x4x4_t yvec_top, yyvec_top, yvec_bottom, yyvec_bottom, uvec, vvec; - convert_uint8x16_to_float32x4x4(ta_y_top.val[0], yvec_top); - convert_uint8x16_to_float32x4x4(ta_y_top.val[1], yyvec_top); - convert_uint8x16_to_float32x4x4(ta_y_bottom.val[0], yvec_bottom); - convert_uint8x16_to_float32x4x4(ta_y_bottom.val[1], yyvec_bottom); - convert_uint8x16_to_float32x4x4(ta_u, uvec); - convert_uint8x16_to_float32x4x4(ta_v, vvec); + float32x4x4_t yvec_top = convert_uint8x16_to_float32x4x4(ta_y_top.val[0]); + float32x4x4_t yyvec_top = convert_uint8x16_to_float32x4x4(ta_y_top.val[1]); + float32x4x4_t yvec_bottom = convert_uint8x16_to_float32x4x4(ta_y_bottom.val[0]); + float32x4x4_t yyvec_bottom = convert_uint8x16_to_float32x4x4(ta_y_bottom.val[1]); + float32x4x4_t uvec = convert_uint8x16_to_float32x4x4(ta_u); + float32x4x4_t vvec = convert_uint8x16_to_float32x4x4(ta_v); yuyv_to_rgb_calculation(yvec_top.val[0], uvec.val[0], yyvec_top.val[0], vvec.val[0], out.ptr() + 0 * element_size, alpha); yuyv_to_rgb_calculation(yvec_top.val[1], uvec.val[1], yyvec_top.val[1], vvec.val[1], out.ptr() + 1 * element_size, alpha); diff --git a/arm_compute/core/NEON/kernels/NEColorConvertKernel.h b/arm_compute/core/NEON/kernels/NEColorConvertKernel.h index 608172ccde..4f1ac973e7 100644 --- a/arm_compute/core/NEON/kernels/NEColorConvertKernel.h +++ b/arm_compute/core/NEON/kernels/NEColorConvertKernel.h @@ -57,7 +57,8 @@ public: * * @param[in] input Source tensor. Formats supported: RGBA8888/UYVY422/YUYV422/RGB888 * @param[out] output Destination tensor. Formats supported: RGB888 (if the formats of @p input are RGBA8888/UYVY422/YUYV422), - * RGBA8888 (if the formats of @p input are UYVY422/YUYV422/RGB888/) + * RGBA8888 (if the formats of @p input are UYVY422/YUYV422/RGB888/), + * U8 (if the formats of @p input is RGB888) */ void configure(const ITensor *input, ITensor *output); /** Set the input and output of the kernel diff --git a/arm_compute/runtime/NEON/functions/NEColorConvert.h b/arm_compute/runtime/NEON/functions/NEColorConvert.h index ab0bf14609..73eb3f94ea 100644 --- a/arm_compute/runtime/NEON/functions/NEColorConvert.h +++ b/arm_compute/runtime/NEON/functions/NEColorConvert.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017 ARM Limited. + * Copyright (c) 2016-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -40,7 +40,8 @@ public: * * @param[in] input Source tensor. Formats supported: RGBA8888/UYVY422/YUYV422/RGB888 * @param[out] output Destination tensor. Formats supported: RGB888 (if the formats of @p input are RGBA8888/UYVY422/YUYV422), - * RGBA8888 (if the formats of @p input are UYVY422/YUYV422/RGB888/) + * RGBA8888 (if the formats of @p input are UYVY422/YUYV422/RGB888/), + * U8 (if the formats of @p input is RGB888) */ void configure(const ITensor *input, ITensor *output); /** Initialize the function's source, destination diff --git a/src/core/NEON/kernels/NEColorConvertKernel.cpp b/src/core/NEON/kernels/NEColorConvertKernel.cpp index 4582c88487..7a66b6cc3a 100644 --- a/src/core/NEON/kernels/NEColorConvertKernel.cpp +++ b/src/core/NEON/kernels/NEColorConvertKernel.cpp @@ -112,6 +112,10 @@ void NEColorConvertKernel::configure(const ITensor *input, ITensor *output) _func = colorconvert_rgb_to_rgbx; num_elems_processed_per_iteration = 16; break; + case Format::U8: + _func = colorconvert_rgb_to_u8; + num_elems_processed_per_iteration = 16; + break; default: ARM_COMPUTE_ERROR("Not supported"); break; diff --git a/tests/benchmark/NEON/ColorConvert.cpp b/tests/benchmark/NEON/ColorConvert.cpp index 6aef5723d3..8223551635 100644 --- a/tests/benchmark/NEON/ColorConvert.cpp +++ b/tests/benchmark/NEON/ColorConvert.cpp @@ -47,6 +47,9 @@ const auto ColorConvert_RGBA_to_RGB = combine(framework::dataset::make("FormatTy const auto ColorConvert_RGB_to_RGBA = combine(framework::dataset::make("FormatType", { Format::RGB888 }), framework::dataset::make("FormatType", { Format::RGBA8888 })); +const auto ColorConvert_RGB_to_U8 = combine(framework::dataset::make("FormatType", { Format::RGB888 }), + framework::dataset::make("FormatType", { Format::U8 })); + const auto ColorConvert_YUYVDataset_to_RGBDataset = combine(YUYVDataset, RGBDataset); @@ -82,6 +85,13 @@ REGISTER_FIXTURE_DATA_TEST_CASE(RunSmall, NEColorConvertFixture, framework::Data REGISTER_FIXTURE_DATA_TEST_CASE(RunLarge, NEColorConvertFixture, framework::DatasetMode::NIGHTLY, combine(datasets::Large2DShapes(), ColorConvert_RGB_to_RGBA)); TEST_SUITE_END() +TEST_SUITE(RGBtoU8) +// *INDENT-OFF* +// clang-format off +REGISTER_FIXTURE_DATA_TEST_CASE(RunSmall, NEColorConvertFixture, framework::DatasetMode::PRECOMMIT, combine(datasets::Small2DShapes(), ColorConvert_RGB_to_U8)); +REGISTER_FIXTURE_DATA_TEST_CASE(RunLarge, NEColorConvertFixture, framework::DatasetMode::NIGHTLY, combine(datasets::Large2DShapes(), ColorConvert_RGB_to_U8)); +TEST_SUITE_END() + TEST_SUITE(YUYV) // *INDENT-OFF* // clang-format off diff --git a/tests/validation/NEON/ColorConvert.cpp b/tests/validation/NEON/ColorConvert.cpp index c34df2b557..ecd95f23e0 100644 --- a/tests/validation/NEON/ColorConvert.cpp +++ b/tests/validation/NEON/ColorConvert.cpp @@ -52,6 +52,9 @@ const auto ColorConvert_RGBA_to_RGB = combine(framework::dataset::make("FormatTy const auto ColorConvert_RGB_to_RGBA = combine(framework::dataset::make("FormatType", { Format::RGB888 }), framework::dataset::make("FormatType", { Format::RGBA8888 })); +const auto ColorConvert_RGB_to_U8 = combine(framework::dataset::make("FormatType", { Format::RGB888 }), + framework::dataset::make("FormatType", { Format::U8 })); + const auto ColorConvert_YUYVDataset_to_RGBDataset = combine(YUYVDataset, RGBDataset); @@ -143,6 +146,12 @@ DATA_TEST_CASE(RGB, framework::DatasetMode::ALL, combine(concat(datasets::Small2 validate_configuration(shape, src_format, dst_format); } +DATA_TEST_CASE(RGBtoU8, framework::DatasetMode::ALL, combine(concat(datasets::Small2DShapes(), datasets::Large2DShapes()), ColorConvert_RGB_to_U8), + shape, src_format, dst_format) +{ + validate_configuration(shape, src_format, dst_format); +} + DATA_TEST_CASE(YUV, framework::DatasetMode::ALL, combine(concat(datasets::Small2DShapes(), datasets::Large2DShapes()), ColorConvert_YUYVDataset_to_RGBDataset), shape, src_format, dst_format) { @@ -213,6 +222,25 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEColorConvertFixture, framework::Data } TEST_SUITE_END() +TEST_SUITE(RGBtoU8) +FIXTURE_DATA_TEST_CASE(RunSmall, NEColorConvertFixture, framework::DatasetMode::PRECOMMIT, combine(datasets::Small2DShapes(), ColorConvert_RGB_to_U8)) +{ + // Validate output + for(unsigned int plane_idx = 0; plane_idx < _dst_num_planes; ++plane_idx) + { + validate(Accessor(*_target.plane(plane_idx)), _reference[plane_idx]); + } +} +FIXTURE_DATA_TEST_CASE(RunLarge, NEColorConvertFixture, framework::DatasetMode::NIGHTLY, combine(datasets::Large2DShapes(), ColorConvert_RGB_to_U8)) +{ + // Validate output + for(unsigned int plane_idx = 0; plane_idx < _dst_num_planes; ++plane_idx) + { + validate(Accessor(*_target.plane(plane_idx)), _reference[plane_idx]); + } +} +TEST_SUITE_END() + TEST_SUITE(YUV) FIXTURE_DATA_TEST_CASE(RunSmall, NEColorConvertFixture, framework::DatasetMode::PRECOMMIT, combine(datasets::Small2DShapes(), ColorConvert_YUYVDataset_to_RGBDataset)) { diff --git a/tests/validation/reference/ColorConvert.cpp b/tests/validation/reference/ColorConvert.cpp index 8047b34688..9090319a86 100644 --- a/tests/validation/reference/ColorConvert.cpp +++ b/tests/validation/reference/ColorConvert.cpp @@ -46,6 +46,7 @@ inline std::vector> create_image_planes(const TensorShape &shape switch(format) { + case Format::U8: case Format::RGB888: case Format::RGBA8888: case Format::YUYV422: @@ -102,6 +103,9 @@ std::vector> color_convert(const TensorShape &shape, const std:: case Format::RGBA8888: colorconvert_helper::detail::colorconvert_rgb_to_rgbx(tensor_planes[0], dst[0]); break; + case Format::U8: + colorconvert_helper::detail::colorconvert_rgb_to_u8(tensor_planes[0], dst[0]); + break; case Format::NV12: colorconvert_helper::detail::colorconvert_rgb_to_nv12(tensor_planes[0], dst); break; diff --git a/tests/validation/reference/ColorConvertHelper.h b/tests/validation/reference/ColorConvertHelper.h index 7a8b547486..b2ae6f2f80 100644 --- a/tests/validation/reference/ColorConvertHelper.h +++ b/tests/validation/reference/ColorConvertHelper.h @@ -48,6 +48,10 @@ constexpr float rgb2yuv_bt709_cu = 0.5389f; // C_v = 1 / (2 * (1 - K_r)) constexpr float rgb2yuv_bt709_cv = 0.6350f; +constexpr float rgb2u8_red_coef = 0.2126f; +constexpr float rgb2u8_green_coef = 0.7152f; +constexpr float rgb2u8_blue_coef = 0.0722f; + template inline void store_rgb_from_src(const SimpleTensor src, SimpleTensor &rvec, SimpleTensor &gvec, SimpleTensor &bvec) { @@ -218,6 +222,29 @@ inline void colorconvert_rgb_to_rgbx(const SimpleTensor src, SimpleTensor } } +template +inline void colorconvert_rgb_to_u8(const SimpleTensor src, SimpleTensor &dst) +{ + const int width = dst.shape().x(); + const int height = dst.shape().y(); + + for(int y = 0; y < height; ++y) + { + for(int x = 0; x < width; ++x) + { + const Coordinates src_coord{ x, y }; + const Coordinates dst_coord{ x, y }; + + const auto *src_pixel = reinterpret_cast(src(src_coord)); + auto *dst_pixel = reinterpret_cast(dst(dst_coord)); + + const float result = rgb2u8_red_coef * src_pixel[0] + rgb2u8_green_coef * src_pixel[1] + rgb2u8_blue_coef * src_pixel[2]; + + dst_pixel[0] = utility::clamp(result, 0, 255); + } + } +} + template inline void colorconvert_rgbx_to_rgb(const SimpleTensor src, SimpleTensor &dst) { -- cgit v1.2.1