From ddb93bbf12fc9d685e7ddbef703a886d67cbda9b Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Fri, 2 Oct 2020 16:38:59 +0100 Subject: COMPMID-3637: Move wrapper to src Signed-off-by: Georgios Pinitas Change-Id: I524b0c4b49c7a7035b7d078b9585d77b0d438e10 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4083 Reviewed-by: Michele Di Giorgio Reviewed-by: Michalis Spyrou Comments-Addressed: Arm Jenkins --- arm_compute/core/NEON/NEColorConvertHelper.inl | 1045 ------------------------ 1 file changed, 1045 deletions(-) delete mode 100644 arm_compute/core/NEON/NEColorConvertHelper.inl (limited to 'arm_compute/core/NEON/NEColorConvertHelper.inl') diff --git a/arm_compute/core/NEON/NEColorConvertHelper.inl b/arm_compute/core/NEON/NEColorConvertHelper.inl deleted file mode 100644 index 9fc1be5406..0000000000 --- a/arm_compute/core/NEON/NEColorConvertHelper.inl +++ /dev/null @@ -1,1045 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/core/Error.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/IMultiImage.h" -#include "arm_compute/core/NEON/NEMath.h" -#include "arm_compute/core/Utils.h" - -#include - -namespace -{ -#ifndef DOXYGEN_SKIP_THIS -constexpr float red_coef_bt709 = 1.5748F; -constexpr float green_coef_bt709 = -0.1873f; -constexpr float green_coef2_bt709 = -0.4681f; -constexpr float blue_coef_bt709 = 1.8556f; - -constexpr float rgb2yuv_bt709_kr = 0.2126f; -constexpr float rgb2yuv_bt709_kb = 0.0722f; -// K_g = 1 - K_r - K_b -constexpr float rgb2yuv_bt709_kg = 0.7152f; -// C_u = 1 / (2 * (1 - K_b)) -constexpr float rgb2yuv_bt709_cu = 0.5389f; -// C_v = 1 / (2 * (1 - K_r)) -constexpr float rgb2yuv_bt709_cv = 0.6350f; - -constexpr float rgb2u8_red_coef = 0.2126f; -constexpr float rgb2u8_green_coef = 0.7152f; -constexpr float rgb2u8_blue_coef = 0.0722f; - -inline float32x4_t rgb_to_greyscale_calculation(const float32x4_t &rcolor, const float32x4_t &gcolor, const float32x4_t &bcolor, - const float rcoef, const float gcoef, const float bcoef) -{ - float32x4_t greyscale = vmulq_n_f32(rcolor, rcoef); - greyscale = vmlaq_n_f32(greyscale, gcolor, gcoef); - greyscale = vmlaq_n_f32(greyscale, bcolor, bcoef); - return greyscale; -} - -inline void rgb_to_u8_conversion(const uint8x16x3_t &in, uint8x16_t &out) -{ - float32x4x4_t out_float32; - - //Conversion from 3(RGB) 4 uint8s to 3(RGB) 4 floats - const float32x4x4_t r_float32 = arm_compute::convert_uint8x16_to_float32x4x4(in.val[0]); - const float32x4x4_t g_float32 = arm_compute::convert_uint8x16_to_float32x4x4(in.val[1]); - const float32x4x4_t b_float32 = arm_compute::convert_uint8x16_to_float32x4x4(in.val[2]); - - //New grayscale image = ( (RED_COEFF * R) + (GREEN_COEFF * G) + (BLUE_COEFF * B) ) - //Computation of 1(Greyscale) 4 uint8 using 3(RGB) 4 uint8s float - out_float32.val[0] = rgb_to_greyscale_calculation(r_float32.val[0], g_float32.val[0], b_float32.val[0], - rgb2u8_red_coef, rgb2u8_green_coef, rgb2u8_blue_coef); - - out_float32.val[1] = rgb_to_greyscale_calculation(r_float32.val[1], g_float32.val[1], b_float32.val[1], - rgb2u8_red_coef, rgb2u8_green_coef, rgb2u8_blue_coef); - - out_float32.val[2] = rgb_to_greyscale_calculation(r_float32.val[2], g_float32.val[2], b_float32.val[2], - rgb2u8_red_coef, rgb2u8_green_coef, rgb2u8_blue_coef); - - out_float32.val[3] = rgb_to_greyscale_calculation(r_float32.val[3], g_float32.val[3], b_float32.val[3], - rgb2u8_red_coef, rgb2u8_green_coef, rgb2u8_blue_coef); - - //Conversion from 1(Greyscale) 4 floats to 1(Greyscale) 4 uint8s - arm_compute::convert_float32x4x4_to_uint8x16(out_float32, out); -} - -inline void rgb_to_yuv_calculation(const float32x4_t &rvec, const float32x4_t &gvec, const float32x4_t &bvec, - float32x4_t &yvec, float32x4_t &uvec, float32x4_t &vvec) -{ - /* - Y'= 0.2126*R' + 0.7152*G' + 0.0722*B' - U'=-0.1146*R' - 0.3854*G' + 0.5000*B' - V'= 0.5000*R' - 0.4542*G' - 0.0458*B' - */ - const auto c128 = vdupq_n_f32(128.f); - - // Y = R * K_r + G * (1 - K_r - K_b) * B * K_b - yvec = vmulq_n_f32(rvec, rgb2yuv_bt709_kr); - yvec = vmlaq_n_f32(yvec, gvec, rgb2yuv_bt709_kg); - yvec = vmlaq_n_f32(yvec, bvec, rgb2yuv_bt709_kb); - - // U = (B - Y) / (2 * (1 - K_b)) - uvec = vsubq_f32(bvec, yvec); - uvec = vmlaq_n_f32(c128, uvec, rgb2yuv_bt709_cu); - - // V = (R - Y) / (2 * (1 - K_r)) - vvec = vsubq_f32(rvec, yvec); - vvec = vmlaq_n_f32(c128, vvec, rgb2yuv_bt709_cv); -} - -inline void yuyv_to_rgb_calculation(const float32x4_t &yvec_val, float32x4_t uvec_val, const float32x4_t &yyvec_val, - float32x4_t vvec_val, unsigned char *output_ptr, const bool alpha) -{ - float32x4x3_t rgb1, rgb2; - - // Compute: cb - 128 and cr - 128; - const auto c128 = vdupq_n_f32(128.f); - uvec_val = vsubq_f32(uvec_val, c128); - vvec_val = vsubq_f32(vvec_val, c128); - - // Compute: - // r = 0.0000f*f_u + 1.5748f*f_v; - // g = 0.1873f*f_u - 0.4681f*f_v; - // b = 1.8556f*f_u + 0.0000f*f_v; - const auto red = vmulq_n_f32(vvec_val, red_coef_bt709); - const auto blue = vmulq_n_f32(uvec_val, blue_coef_bt709); - const auto green = vaddq_f32(vmulq_n_f32(uvec_val, green_coef_bt709), - vmulq_n_f32(vvec_val, green_coef2_bt709)); - - // Compute the final r,g,b values using y1 for the first texel and y2 for the second one. - // the result is stored in two float32x4x3_t which then are converted to one uint8x8x3_t - // and written back to memory using vst3 instruction - - rgb1.val[0] = vaddq_f32(yvec_val, red); - rgb1.val[1] = vaddq_f32(yvec_val, green); - rgb1.val[2] = vaddq_f32(yvec_val, blue); - - rgb2.val[0] = vaddq_f32(yyvec_val, red); - rgb2.val[1] = vaddq_f32(yyvec_val, green); - rgb2.val[2] = vaddq_f32(yyvec_val, blue); - - uint8x8x3_t u8_rgb; - arm_compute::convert_float32x4x3_to_uint8x8x3(rgb1, rgb2, u8_rgb); - - if(!alpha) - { - vst3_lane_u8(&output_ptr[0], u8_rgb, 0); - vst3_lane_u8(&output_ptr[3], u8_rgb, 4); - vst3_lane_u8(&output_ptr[6], u8_rgb, 1); - vst3_lane_u8(&output_ptr[9], u8_rgb, 5); - vst3_lane_u8(&output_ptr[12], u8_rgb, 2); - vst3_lane_u8(&output_ptr[15], u8_rgb, 6); - vst3_lane_u8(&output_ptr[18], u8_rgb, 3); - vst3_lane_u8(&output_ptr[21], u8_rgb, 7); - } - else - { - uint8x8x4_t u8_rgba; - u8_rgba.val[0] = u8_rgb.val[0]; - u8_rgba.val[1] = u8_rgb.val[1]; - u8_rgba.val[2] = u8_rgb.val[2]; - u8_rgba.val[3] = vdup_n_u8(255); - vst4_lane_u8(&output_ptr[0], u8_rgba, 0); - vst4_lane_u8(&output_ptr[4], u8_rgba, 4); - vst4_lane_u8(&output_ptr[8], u8_rgba, 1); - vst4_lane_u8(&output_ptr[12], u8_rgba, 5); - vst4_lane_u8(&output_ptr[16], u8_rgba, 2); - vst4_lane_u8(&output_ptr[20], u8_rgba, 6); - vst4_lane_u8(&output_ptr[24], u8_rgba, 3); - vst4_lane_u8(&output_ptr[28], u8_rgba, 7); - } -} - -inline uint8x16x3_t load_rgb(const unsigned char *const ptr, const bool alpha) -{ - uint8x16x3_t rgb; - - if(alpha) - { - const auto tmp = vld4q_u8(ptr); - rgb.val[0] = tmp.val[0]; - rgb.val[1] = tmp.val[1]; - rgb.val[2] = tmp.val[2]; - } - else - { - rgb = vld3q_u8(ptr); - } - - return rgb; -} - -inline void rgb_to_yuv_conversion(uint8x16x3_t &vec_top, uint8x16x3_t &vec_bottom) -{ - // Convert the uint8x16_t to float32x4x4_t - const float32x4x4_t frvec_top = arm_compute::convert_uint8x16_to_float32x4x4(vec_top.val[0]); - const float32x4x4_t fgvec_top = arm_compute::convert_uint8x16_to_float32x4x4(vec_top.val[1]); - const float32x4x4_t fbvec_top = arm_compute::convert_uint8x16_to_float32x4x4(vec_top.val[2]); - - const float32x4x4_t frvec_bottom = arm_compute::convert_uint8x16_to_float32x4x4(vec_bottom.val[0]); - const float32x4x4_t fgvec_bottom = arm_compute::convert_uint8x16_to_float32x4x4(vec_bottom.val[1]); - const float32x4x4_t fbvec_bottom = arm_compute::convert_uint8x16_to_float32x4x4(vec_bottom.val[2]); - - float32x4x4_t fyvec_top, fuvec_top, fvvec_top; - float32x4x4_t fyvec_bottom, fuvec_bottom, fvvec_bottom; - - for(auto i = 0; i < 4; ++i) - { - rgb_to_yuv_calculation(frvec_top.val[i], fgvec_top.val[i], fbvec_top.val[i], - fyvec_top.val[i], fuvec_top.val[i], fvvec_top.val[i]); - rgb_to_yuv_calculation(frvec_bottom.val[i], fgvec_bottom.val[i], fbvec_bottom.val[i], - fyvec_bottom.val[i], fuvec_bottom.val[i], fvvec_bottom.val[i]); - } - - arm_compute::convert_float32x4x4_to_uint8x16(fyvec_top, vec_top.val[0]); - arm_compute::convert_float32x4x4_to_uint8x16(fuvec_top, vec_top.val[1]); - arm_compute::convert_float32x4x4_to_uint8x16(fvvec_top, vec_top.val[2]); - arm_compute::convert_float32x4x4_to_uint8x16(fyvec_bottom, vec_bottom.val[0]); - arm_compute::convert_float32x4x4_to_uint8x16(fuvec_bottom, vec_bottom.val[1]); - arm_compute::convert_float32x4x4_to_uint8x16(fvvec_bottom, vec_bottom.val[2]); -} - -inline void store_rgb_to_nv12(const uint8x16_t &rvec_top, const uint8x16_t &gvec_top, const uint8x16_t &bvec_top, - const uint8x16_t &rvec_bottom, const uint8x16_t &gvec_bottom, const uint8x16_t &bvec_bottom, - unsigned char *const __restrict out_y_top, unsigned char *const __restrict out_y_bottom, - unsigned char *const __restrict out_uv) -{ - uint8x16x3_t vec_top, vec_bottom; - vec_top.val[0] = rvec_top; - vec_top.val[1] = gvec_top; - vec_top.val[2] = bvec_top; - vec_bottom.val[0] = rvec_bottom; - vec_bottom.val[1] = gvec_bottom; - vec_bottom.val[2] = bvec_bottom; - - rgb_to_yuv_conversion(vec_top, vec_bottom); - - vst1q_u8(out_y_top, vec_top.val[0]); - vst1q_u8(out_y_bottom, vec_bottom.val[0]); - - const auto uvec = vuzpq_u8(vec_top.val[1], vec_bottom.val[1]); - const auto vvec = vuzpq_u8(vec_top.val[2], vec_bottom.val[2]); - const auto utmp = vrhaddq_u8(uvec.val[0], uvec.val[1]); - const auto vtmp = vrhaddq_u8(vvec.val[0], vvec.val[1]); - - uint8x8x2_t uvvec; - uvvec.val[0] = vhadd_u8(vget_low_u8(utmp), vget_high_u8(utmp)); - uvvec.val[1] = vhadd_u8(vget_low_u8(vtmp), vget_high_u8(vtmp)); - - vst2_u8(out_uv, uvvec); -} - -inline void store_rgb_to_iyuv(const uint8x16_t &rvec_top, const uint8x16_t &gvec_top, const uint8x16_t &bvec_top, - const uint8x16_t &rvec_bottom, const uint8x16_t &gvec_bottom, const uint8x16_t &bvec_bottom, - unsigned char *const __restrict out_y_top, unsigned char *const __restrict out_y_bottom, - unsigned char *const __restrict out_u, - unsigned char *const __restrict out_v) -{ - uint8x16x3_t vec_top, vec_bottom; - vec_top.val[0] = rvec_top; - vec_top.val[1] = gvec_top; - vec_top.val[2] = bvec_top; - vec_bottom.val[0] = rvec_bottom; - vec_bottom.val[1] = gvec_bottom; - vec_bottom.val[2] = bvec_bottom; - - rgb_to_yuv_conversion(vec_top, vec_bottom); - - vst1q_u8(out_y_top, vec_top.val[0]); - vst1q_u8(out_y_bottom, vec_bottom.val[0]); - - const auto uvvec_top = vuzpq_u8(vec_top.val[1], vec_top.val[2]); - const auto uvvec_bottom = vuzpq_u8(vec_bottom.val[1], vec_bottom.val[2]); - const auto uvvec = vhaddq_u8(vrhaddq_u8(uvvec_top.val[0], uvvec_top.val[1]), - vrhaddq_u8(uvvec_bottom.val[0], uvvec_bottom.val[1])); - - vst1_u8(out_u, vget_low_u8(uvvec)); - vst1_u8(out_v, vget_high_u8(uvvec)); -} - -inline void store_rgb_to_yuv4(const uint8x16_t &rvec, const uint8x16_t &gvec, const uint8x16_t &bvec, - unsigned char *const __restrict out_y, - unsigned char *const __restrict out_u, - unsigned char *const __restrict out_v) -{ - // Convert the uint8x16_t to float32x4x4_t - const float32x4x4_t frvec = arm_compute::convert_uint8x16_to_float32x4x4(rvec); - const float32x4x4_t fgvec = arm_compute::convert_uint8x16_to_float32x4x4(gvec); - const float32x4x4_t fbvec = arm_compute::convert_uint8x16_to_float32x4x4(bvec); - - float32x4x4_t fyvec, fuvec, fvvec; - for(auto i = 0; i < 4; ++i) - { - rgb_to_yuv_calculation(frvec.val[i], fgvec.val[i], fbvec.val[i], - fyvec.val[i], fuvec.val[i], fvvec.val[i]); - } - - uint8x16_t yvec, uvec, vvec; - arm_compute::convert_float32x4x4_to_uint8x16(fyvec, yvec); - arm_compute::convert_float32x4x4_to_uint8x16(fuvec, uvec); - arm_compute::convert_float32x4x4_to_uint8x16(fvvec, vvec); - - vst1q_u8(out_y, yvec); - vst1q_u8(out_u, uvec); - vst1q_u8(out_v, vvec); -} -#endif /* DOXYGEN_SKIP_THIS */ -} - -namespace arm_compute -{ -/** Convert RGB to RGBX. - * - * @param[in] input Input RGB data buffer. - * @param[out] output Output RGBX buffer. - * @param[in] win Window for iterating the buffers. - * - */ -void colorconvert_rgb_to_rgbx(const void *__restrict input, void *__restrict output, const Window &win) -{ - ARM_COMPUTE_ERROR_ON(nullptr == input); - ARM_COMPUTE_ERROR_ON(nullptr == output); - - const auto input_ptr = static_cast(input); - const auto output_ptr = static_cast(output); - - Iterator in(input_ptr, win); - Iterator out(output_ptr, win); - - execute_window_loop(win, [&](const Coordinates &) - { - const auto ta1 = vld3q_u8(in.ptr()); - uint8x16x4_t ta2; - ta2.val[0] = ta1.val[0]; - ta2.val[1] = ta1.val[1]; - ta2.val[2] = ta1.val[2]; - ta2.val[3] = vdupq_n_u8(255); - vst4q_u8(out.ptr(), ta2); - }, - in, out); -} - -/** Convert RGB to U8. - * - * @param[in] input Input RGB data buffer. - * @param[out] output Output U8 buffer. - * @param[in] win Window for iterating the buffers. - * - */ -void colorconvert_rgb_to_u8(const void *__restrict input, void *__restrict output, const Window &win) -{ - ARM_COMPUTE_ERROR_ON(nullptr == input); - ARM_COMPUTE_ERROR_ON(nullptr == output); - - const auto input_ptr = static_cast(input); - const auto output_ptr = static_cast(output); - - Iterator in(input_ptr, win); - Iterator out(output_ptr, win); - - execute_window_loop(win, [&](const Coordinates &) - { - const auto ta1 = vld3q_u8(in.ptr()); - uint8x16_t ta2; - rgb_to_u8_conversion(ta1, ta2); - vst1q_u8(out.ptr(), ta2); - }, - in, out); -} - -/** Convert RGBX to RGB. - * - * @param[in] input Input RGBX data buffer. - * @param[out] output Output RGB buffer. - * @param[in] win Window for iterating the buffers. - * - */ -void colorconvert_rgbx_to_rgb(const void *input, void *output, const Window &win) -{ - ARM_COMPUTE_ERROR_ON(nullptr == input); - ARM_COMPUTE_ERROR_ON(nullptr == output); - - const auto input_ptr = static_cast(input); - const auto output_ptr = static_cast(output); - - Iterator in(input_ptr, win); - Iterator out(output_ptr, win); - - execute_window_loop(win, [&](const Coordinates &) - { - const auto ta1 = vld4q_u8(in.ptr()); - uint8x16x3_t ta2; - ta2.val[0] = ta1.val[0]; - ta2.val[1] = ta1.val[1]; - ta2.val[2] = ta1.val[2]; - vst3q_u8(out.ptr(), ta2); - }, - in, out); -} - -/** Convert YUYV to RGB. - * - * @param[in] input Input YUYV data buffer. - * @param[out] output Output RGB buffer. - * @param[in] win Window for iterating the buffers. - * - */ -template -void colorconvert_yuyv_to_rgb(const void *__restrict input, void *__restrict output, const Window &win) -{ - ARM_COMPUTE_ERROR_ON(nullptr == input); - ARM_COMPUTE_ERROR_ON(nullptr == output); - - const auto input_ptr = static_cast(input); - const auto output_ptr = static_cast(output); - - constexpr auto element_size = alpha ? 32 : 24; - constexpr auto shift = yuyv ? 0 : 1; - - Iterator in(input_ptr, win); - Iterator out(output_ptr, win); - - execute_window_loop(win, [&](const Coordinates &) - { - const auto ta = vld4q_u8(in.ptr()); - //ta.val[0] = Y0 Y2 Y4 Y6 ... - //ta.val[1] = U0 U2 U4 U6 ... - //ta.val[2] = Y1 Y3 Y5 Y7 ... - //ta.val[3] = V0 V2 V4 V7 ... - - // Convert the uint8x16x4_t to float32x4x4_t - const float32x4x4_t yvec = arm_compute::convert_uint8x16_to_float32x4x4(ta.val[0 + shift]); - const float32x4x4_t uvec = arm_compute::convert_uint8x16_to_float32x4x4(ta.val[1 - shift]); - const float32x4x4_t yyvec = arm_compute::convert_uint8x16_to_float32x4x4(ta.val[2 + shift]); - const float32x4x4_t vvec = arm_compute::convert_uint8x16_to_float32x4x4(ta.val[3 - shift]); - - yuyv_to_rgb_calculation(yvec.val[0], uvec.val[0], yyvec.val[0], vvec.val[0], out.ptr() + 0 * element_size, alpha); - yuyv_to_rgb_calculation(yvec.val[1], uvec.val[1], yyvec.val[1], vvec.val[1], out.ptr() + 1 * element_size, alpha); - yuyv_to_rgb_calculation(yvec.val[2], uvec.val[2], yyvec.val[2], vvec.val[2], out.ptr() + 2 * element_size, alpha); - yuyv_to_rgb_calculation(yvec.val[3], uvec.val[3], yyvec.val[3], vvec.val[3], out.ptr() + 3 * element_size, alpha); - }, - in, out); -} - -/** Convert NV12 to RGB. - * - * @param[in] input Input NV12 data buffer. - * @param[out] output Output RGB buffer. - * @param[in] win Window for iterating the buffers. - * - */ -template -void colorconvert_nv12_to_rgb(const void *__restrict input, void *__restrict output, const Window &win) -{ - ARM_COMPUTE_ERROR_ON(nullptr == input); - ARM_COMPUTE_ERROR_ON(nullptr == output); - win.validate(); - - const auto input_ptr = static_cast(input); - const auto output_ptr = static_cast(output); - - constexpr auto element_size = alpha ? 32 : 24; - const auto out_stride = output_ptr->info()->strides_in_bytes().y(); - constexpr auto shift = uv ? 0 : 1; - - // UV's width and height are subsampled - Window win_uv(win); - win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win.x().step() / 2)); - win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1)); - win_uv.validate(); - - Iterator in_y(input_ptr->plane(0), win); - Iterator in_uv(input_ptr->plane(1), win_uv); - Iterator out(output_ptr, win); - - execute_window_loop(win, [&](const Coordinates &) - { - const auto ta_y_top = vld2q_u8(in_y.ptr()); - const auto ta_y_bottom = vld2q_u8(in_y.ptr() + input_ptr->plane(0)->info()->strides_in_bytes().y()); - const auto ta_uv = vld2q_u8(in_uv.ptr()); - //ta_y.val[0] = Y0 Y2 Y4 Y6 ... - //ta_y.val[1] = Y1 Y3 Y5 Y7 ... - //ta_uv.val[0] = U0 U2 U4 U6 ... - //ta_uv.val[1] = V0 V2 V4 V6 ... - - // Convert the uint8x16x4_t to float32x4x4_t - float32x4x4_t yvec_top = arm_compute::convert_uint8x16_to_float32x4x4(ta_y_top.val[0]); - float32x4x4_t yyvec_top = arm_compute::convert_uint8x16_to_float32x4x4(ta_y_top.val[1]); - float32x4x4_t yvec_bottom = arm_compute::convert_uint8x16_to_float32x4x4(ta_y_bottom.val[0]); - float32x4x4_t yyvec_bottom = arm_compute::convert_uint8x16_to_float32x4x4(ta_y_bottom.val[1]); - float32x4x4_t uvec = arm_compute::convert_uint8x16_to_float32x4x4(ta_uv.val[0 + shift]); - float32x4x4_t vvec = arm_compute::convert_uint8x16_to_float32x4x4(ta_uv.val[1 - shift]); - - yuyv_to_rgb_calculation(yvec_top.val[0], uvec.val[0], yyvec_top.val[0], vvec.val[0], out.ptr() + 0 * element_size, alpha); - yuyv_to_rgb_calculation(yvec_top.val[1], uvec.val[1], yyvec_top.val[1], vvec.val[1], out.ptr() + 1 * element_size, alpha); - yuyv_to_rgb_calculation(yvec_top.val[2], uvec.val[2], yyvec_top.val[2], vvec.val[2], out.ptr() + 2 * element_size, alpha); - yuyv_to_rgb_calculation(yvec_top.val[3], uvec.val[3], yyvec_top.val[3], vvec.val[3], out.ptr() + 3 * element_size, alpha); - - yuyv_to_rgb_calculation(yvec_bottom.val[0], uvec.val[0], yyvec_bottom.val[0], vvec.val[0], out.ptr() + out_stride + 0 * element_size, alpha); - yuyv_to_rgb_calculation(yvec_bottom.val[1], uvec.val[1], yyvec_bottom.val[1], vvec.val[1], out.ptr() + out_stride + 1 * element_size, alpha); - yuyv_to_rgb_calculation(yvec_bottom.val[2], uvec.val[2], yyvec_bottom.val[2], vvec.val[2], out.ptr() + out_stride + 2 * element_size, alpha); - yuyv_to_rgb_calculation(yvec_bottom.val[3], uvec.val[3], yyvec_bottom.val[3], vvec.val[3], out.ptr() + out_stride + 3 * element_size, alpha); - }, - in_y, in_uv, out); -} - -/** Convert IYUV to RGB. - * - * @param[in] input Input IYUV data buffer. - * @param[out] output Output RGB buffer. - * @param[in] win Window for iterating the buffers. - * - */ -template -void colorconvert_iyuv_to_rgb(const void *__restrict input, void *__restrict output, const Window &win) -{ - ARM_COMPUTE_ERROR_ON(nullptr == input); - ARM_COMPUTE_ERROR_ON(nullptr == output); - win.validate(); - - const auto input_ptr = static_cast(input); - const auto output_ptr = static_cast(output); - - constexpr auto element_size = alpha ? 32 : 24; - const auto out_stride = output_ptr->info()->strides_in_bytes().y(); - - // UV's width and height are subsampled - Window win_uv(win); - win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2)); - win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1)); - win_uv.validate(); - - Iterator in_y(input_ptr->plane(0), win); - Iterator in_u(input_ptr->plane(1), win_uv); - Iterator in_v(input_ptr->plane(2), win_uv); - Iterator out(output_ptr, win); - - execute_window_loop(win, [&](const Coordinates &) - { - const auto *y_top_ptr = in_y.ptr(); - const auto *y_bottom_ptr = in_y.ptr() + input_ptr->plane(0)->info()->strides_in_bytes().y(); - const auto *u_ptr = in_u.ptr(); - const auto *v_ptr = in_v.ptr(); - - // Work-around issue in gcc 9(>=) where vld2q might cause issues with register allocation -#if defined(__arch64__) - const auto ta0_y_top = vld1q_u8(y_top_ptr); - const auto ta1_y_top = vld1q_u8(y_top_ptr + 16); - const auto ta0_y_bottom = vld1q_u8(y_bottom_ptr); - const auto ta1_y_bottom = vld1q_u8(y_bottom_ptr + 16); - const auto ta_u = vld1q_u8(u_ptr); - const auto ta_v = vld1q_u8(v_ptr); - - // Convert the uint8x16x4_t to float32x4x4_t - float32x4x4_t yvec_top = arm_compute::convert_uint8x16_to_float32x4x4(vuzp1q_u8(ta0_y_top, ta1_y_top)); - float32x4x4_t yyvec_top = arm_compute::convert_uint8x16_to_float32x4x4(vuzp2q_u8(ta0_y_top, ta1_y_top)); - float32x4x4_t yvec_bottom = arm_compute::convert_uint8x16_to_float32x4x4(vuzp1q_u8(ta0_y_bottom, ta1_y_bottom)); - float32x4x4_t yyvec_bottom = arm_compute::convert_uint8x16_to_float32x4x4(vuzp2q_u8(ta0_y_bottom, ta1_y_bottom)); - float32x4x4_t uvec = arm_compute::convert_uint8x16_to_float32x4x4(ta_u); - float32x4x4_t vvec = arm_compute::convert_uint8x16_to_float32x4x4(ta_v); -#else /* defined(__arch64__) */ - const auto ta_y_top = vld2q_u8(y_top_ptr); - const auto ta_y_bottom = vld2q_u8(y_bottom_ptr); - const auto ta_u = vld1q_u8(u_ptr); - const auto ta_v = vld1q_u8(v_ptr); - //ta_y.val[0] = Y0 Y2 Y4 Y6 ... - //ta_y.val[1] = Y1 Y3 Y5 Y7 ... - //ta_u.val[0] = U0 U2 U4 U6 ... - //ta_v.val[0] = V0 V2 V4 V6 ... - - // Convert the uint8x16x4_t to float32x4x4_t - float32x4x4_t yvec_top = arm_compute::convert_uint8x16_to_float32x4x4(ta_y_top.val[0]); - float32x4x4_t yyvec_top = arm_compute::convert_uint8x16_to_float32x4x4(ta_y_top.val[1]); - float32x4x4_t yvec_bottom = arm_compute::convert_uint8x16_to_float32x4x4(ta_y_bottom.val[0]); - float32x4x4_t yyvec_bottom = arm_compute::convert_uint8x16_to_float32x4x4(ta_y_bottom.val[1]); - float32x4x4_t uvec = arm_compute::convert_uint8x16_to_float32x4x4(ta_u); - float32x4x4_t vvec = arm_compute::convert_uint8x16_to_float32x4x4(ta_v); -#endif /* defined(__arch64__) */ - - yuyv_to_rgb_calculation(yvec_top.val[0], uvec.val[0], yyvec_top.val[0], vvec.val[0], out.ptr() + 0 * element_size, alpha); - yuyv_to_rgb_calculation(yvec_top.val[1], uvec.val[1], yyvec_top.val[1], vvec.val[1], out.ptr() + 1 * element_size, alpha); - yuyv_to_rgb_calculation(yvec_top.val[2], uvec.val[2], yyvec_top.val[2], vvec.val[2], out.ptr() + 2 * element_size, alpha); - yuyv_to_rgb_calculation(yvec_top.val[3], uvec.val[3], yyvec_top.val[3], vvec.val[3], out.ptr() + 3 * element_size, alpha); - - yuyv_to_rgb_calculation(yvec_bottom.val[0], uvec.val[0], yyvec_bottom.val[0], vvec.val[0], out.ptr() + out_stride + 0 * element_size, alpha); - yuyv_to_rgb_calculation(yvec_bottom.val[1], uvec.val[1], yyvec_bottom.val[1], vvec.val[1], out.ptr() + out_stride + 1 * element_size, alpha); - yuyv_to_rgb_calculation(yvec_bottom.val[2], uvec.val[2], yyvec_bottom.val[2], vvec.val[2], out.ptr() + out_stride + 2 * element_size, alpha); - yuyv_to_rgb_calculation(yvec_bottom.val[3], uvec.val[3], yyvec_bottom.val[3], vvec.val[3], out.ptr() + out_stride + 3 * element_size, alpha); - }, - in_y, in_u, in_v, out); -} - -/** Convert YUYV to NV12. - * - * @param[in] input Input YUYV data buffer. - * @param[out] output Output NV12 buffer. - * @param[in] win Window for iterating the buffers. - * - */ -template -void colorconvert_yuyv_to_nv12(const void *__restrict input, void *__restrict output, const Window &win) -{ - ARM_COMPUTE_ERROR_ON(nullptr == input); - ARM_COMPUTE_ERROR_ON(nullptr == output); - win.validate(); - - const auto input_ptr = static_cast(input); - const auto output_ptr = static_cast(output); - - constexpr auto shift = yuyv ? 0 : 1; - - // NV12's UV's width and height are subsampled - Window win_uv(win); - win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2)); - win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1)); - win_uv.validate(); - - Iterator in(input_ptr, win); - Iterator out_y(output_ptr->plane(0), win); - Iterator out_uv(output_ptr->plane(1), win_uv); - - execute_window_loop(win, [&](const Coordinates &) - { - const auto ta_top = vld4q_u8(in.ptr()); - const auto ta_bottom = vld4q_u8(in.ptr() + input_ptr->info()->strides_in_bytes().y()); - //ta.val[0] = Y0 Y2 Y4 Y6 ... - //ta.val[1] = U0 U2 U4 U6 ... - //ta.val[2] = Y1 Y3 Y5 Y7 ... - //ta.val[3] = V0 V2 V4 V7 ... - - uint8x16x2_t yvec; - yvec.val[0] = ta_top.val[0 + shift]; - yvec.val[1] = ta_top.val[2 + shift]; - vst2q_u8(out_y.ptr(), yvec); - - uint8x16x2_t yyvec; - yyvec.val[0] = ta_bottom.val[0 + shift]; - yyvec.val[1] = ta_bottom.val[2 + shift]; - vst2q_u8(out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), yyvec); - - uint8x16x2_t uvvec; - uvvec.val[0] = vhaddq_u8(ta_top.val[1 - shift], ta_bottom.val[1 - shift]); - uvvec.val[1] = vhaddq_u8(ta_top.val[3 - shift], ta_bottom.val[3 - shift]); - vst2q_u8(out_uv.ptr(), uvvec); - }, - in, out_y, out_uv); -} - -/** Convert IYUV to NV12. - * - * @param[in] input Input IYUV data buffer. - * @param[out] output Output NV12 buffer. - * @param[in] win Window for iterating the buffers. - * - */ -void colorconvert_iyuv_to_nv12(const void *__restrict input, void *__restrict output, const Window &win) -{ - ARM_COMPUTE_ERROR_ON(nullptr == input); - ARM_COMPUTE_ERROR_ON(nullptr == output); - win.validate(); - - const auto input_ptr = static_cast(input); - const auto output_ptr = static_cast(output); - - // UV's width and height are subsampled - Window win_uv(win); - win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2)); - win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1)); - win_uv.validate(); - - Iterator in_y(input_ptr->plane(0), win); - Iterator in_u(input_ptr->plane(1), win_uv); - Iterator in_v(input_ptr->plane(2), win_uv); - Iterator out_y(output_ptr->plane(0), win); - Iterator out_uv(output_ptr->plane(1), win_uv); - - execute_window_loop(win, [&](const Coordinates &) - { - const auto ta_y_top = vld2q_u8(in_y.ptr()); - const auto ta_y_bottom = vld2q_u8(in_y.ptr() + input_ptr->plane(0)->info()->strides_in_bytes().y()); - uint8x16x2_t ta_uv; - ta_uv.val[0] = vld1q_u8(in_u.ptr()); - ta_uv.val[1] = vld1q_u8(in_v.ptr()); - //ta_y.val[0] = Y0 Y2 Y4 Y6 ... - //ta_y.val[1] = Y1 Y3 Y5 Y7 ... - //ta_uv.val[0] = U0 U2 U4 U6 ... - //ta_uv.val[1] = V0 V2 V4 V6 ... - - vst2q_u8(out_y.ptr(), ta_y_top); - vst2q_u8(out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), ta_y_bottom); - vst2q_u8(out_uv.ptr(), ta_uv); - }, - in_y, in_u, in_v, out_y, out_uv); -} - -/** Convert NV12 to IYUV. - * - * @param[in] input Input NV12 data buffer. - * @param[out] output Output IYUV buffer. - * @param[in] win Window for iterating the buffers. - * - */ -template -void colorconvert_nv12_to_iyuv(const void *__restrict input, void *__restrict output, const Window &win) -{ - ARM_COMPUTE_ERROR_ON(nullptr == input); - ARM_COMPUTE_ERROR_ON(nullptr == output); - win.validate(); - - const auto input_ptr = static_cast(input); - const auto output_ptr = static_cast(output); - - constexpr auto shift = uv ? 0 : 1; - - // UV's width and height are subsampled - Window win_uv(win); - win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2)); - win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1)); - win_uv.validate(); - - Iterator in_y(input_ptr->plane(0), win); - Iterator in_uv(input_ptr->plane(1), win_uv); - Iterator out_y(output_ptr->plane(0), win); - Iterator out_u(output_ptr->plane(1), win_uv); - Iterator out_v(output_ptr->plane(2), win_uv); - - execute_window_loop(win, [&](const Coordinates &) - { - const auto ta_y_top = vld2q_u8(in_y.ptr()); - const auto ta_y_bottom = vld2q_u8(in_y.ptr() + input_ptr->plane(0)->info()->strides_in_bytes().y()); - const auto ta_uv = vld2q_u8(in_uv.ptr()); - //ta_y.val[0] = Y0 Y2 Y4 Y6 ... - //ta_y.val[1] = Y1 Y3 Y5 Y7 ... - //ta_uv.val[0] = U0 U2 U4 U6 ... - //ta_uv.val[1] = V0 V2 V4 V6 ... - - vst2q_u8(out_y.ptr(), ta_y_top); - vst2q_u8(out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), ta_y_bottom); - vst1q_u8(out_u.ptr(), ta_uv.val[0 + shift]); - vst1q_u8(out_v.ptr(), ta_uv.val[1 - shift]); - }, - in_y, in_uv, out_y, out_u, out_v); -} - -/** Convert YUYV to IYUV. - * - * @param[in] input Input YUYV data buffer. - * @param[out] output Output IYUV buffer. - * @param[in] win Window for iterating the buffers. - * - */ -template -void colorconvert_yuyv_to_iyuv(const void *__restrict input, void *__restrict output, const Window &win) -{ - ARM_COMPUTE_ERROR_ON(nullptr == input); - ARM_COMPUTE_ERROR_ON(nullptr == output); - win.validate(); - - const auto input_ptr = static_cast(input); - const auto output_ptr = static_cast(output); - - constexpr auto shift = yuyv ? 0 : 1; - - // Destination's UV's width and height are subsampled - Window win_uv(win); - win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2)); - win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1)); - win_uv.validate(); - - Iterator in(input_ptr, win); - Iterator out_y(output_ptr->plane(0), win); - Iterator out_u(output_ptr->plane(1), win_uv); - Iterator out_v(output_ptr->plane(2), win_uv); - - execute_window_loop(win, [&](const Coordinates &) - { - const auto ta_top = vld4q_u8(in.ptr()); - const auto ta_bottom = vld4q_u8(in.ptr() + input_ptr->info()->strides_in_bytes().y()); - //ta.val[0] = Y0 Y2 Y4 Y6 ... - //ta.val[1] = U0 U2 U4 U6 ... - //ta.val[2] = Y1 Y3 Y5 Y7 ... - //ta.val[3] = V0 V2 V4 V7 ... - - uint8x16x2_t yvec; - yvec.val[0] = ta_top.val[0 + shift]; - yvec.val[1] = ta_top.val[2 + shift]; - vst2q_u8(out_y.ptr(), yvec); - - uint8x16x2_t yyvec; - yyvec.val[0] = ta_bottom.val[0 + shift]; - yyvec.val[1] = ta_bottom.val[2 + shift]; - vst2q_u8(out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), yyvec); - - uint8x16_t uvec; - uvec = vhaddq_u8(ta_top.val[1 - shift], ta_bottom.val[1 - shift]); - vst1q_u8(out_u.ptr(), uvec); - - uint8x16_t vvec; - vvec = vhaddq_u8(ta_top.val[3 - shift], ta_bottom.val[3 - shift]); - vst1q_u8(out_v.ptr(), vvec); - }, - in, out_y, out_u, out_v); -} - -/** Convert NV12 to YUV4. - * - * @param[in] input Input NV12 data buffer. - * @param[out] output Output YUV4 buffer. - * @param[in] win Window for iterating the buffers. - * - */ -template -void colorconvert_nv12_to_yuv4(const void *__restrict input, void *__restrict output, const Window &win) -{ - ARM_COMPUTE_ERROR_ON(nullptr == input); - ARM_COMPUTE_ERROR_ON(nullptr == output); - win.validate(); - - const auto input_ptr = static_cast(input); - const auto output_ptr = static_cast(output); - - constexpr auto shift = uv ? 0 : 1; - - // UV's width and height are subsampled - Window win_uv(win); - win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2)); - win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1)); - win_uv.validate(); - - Iterator in_y(input_ptr->plane(0), win); - Iterator in_uv(input_ptr->plane(1), win_uv); - Iterator out_y(output_ptr->plane(0), win); - Iterator out_u(output_ptr->plane(1), win); - Iterator out_v(output_ptr->plane(2), win); - - execute_window_loop(win, [&](const Coordinates &) - { - const auto ta_y_top = vld2q_u8(in_y.ptr()); - const auto ta_y_bottom = vld2q_u8(in_y.ptr() + input_ptr->plane(0)->info()->strides_in_bytes().y()); - const auto ta_uv = vld2q_u8(in_uv.ptr()); - //ta_y.val[0] = Y0 Y2 Y4 Y6 ... - //ta_y.val[1] = Y1 Y3 Y5 Y7 ... - //ta_uv.val[0] = U0 U2 U4 U6 ... - //ta_uv.val[1] = V0 V2 V4 V6 ... - - vst2q_u8(out_y.ptr(), ta_y_top); - vst2q_u8(out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), ta_y_bottom); - - uint8x16x2_t uvec; - uvec.val[0] = ta_uv.val[0 + shift]; - uvec.val[1] = ta_uv.val[0 + shift]; - vst2q_u8(out_u.ptr(), uvec); - vst2q_u8(out_u.ptr() + output_ptr->plane(1)->info()->strides_in_bytes().y(), uvec); - - uint8x16x2_t vvec; - vvec.val[0] = ta_uv.val[1 - shift]; - vvec.val[1] = ta_uv.val[1 - shift]; - vst2q_u8(out_v.ptr(), vvec); - vst2q_u8(out_v.ptr() + output_ptr->plane(2)->info()->strides_in_bytes().y(), vvec); - }, - in_y, in_uv, out_y, out_u, out_v); -} - -/** Convert IYUV to YUV4. - * - * @param[in] input Input IYUV data buffer. - * @param[out] output Output YUV4 buffer. - * @param[in] win Window for iterating the buffers. - * - */ -void colorconvert_iyuv_to_yuv4(const void *__restrict input, void *__restrict output, const Window &win) -{ - ARM_COMPUTE_ERROR_ON(nullptr == input); - ARM_COMPUTE_ERROR_ON(nullptr == output); - win.validate(); - - const auto input_ptr = static_cast(input); - const auto output_ptr = static_cast(output); - - // UV's width and height are subsampled - Window win_uv(win); - win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2)); - win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1)); - win_uv.validate(); - - Iterator in_y(input_ptr->plane(0), win); - Iterator in_u(input_ptr->plane(1), win_uv); - Iterator in_v(input_ptr->plane(2), win_uv); - Iterator out_y(output_ptr->plane(0), win); - Iterator out_u(output_ptr->plane(1), win); - Iterator out_v(output_ptr->plane(2), win); - - execute_window_loop(win, [&](const Coordinates &) - { - const auto ta_y_top = vld2q_u8(in_y.ptr()); - const auto ta_y_bottom = vld2q_u8(in_y.ptr() + input_ptr->plane(0)->info()->strides_in_bytes().y()); - const auto ta_u = vld1q_u8(in_u.ptr()); - const auto ta_v = vld1q_u8(in_v.ptr()); - //ta_y.val[0] = Y0 Y2 Y4 Y6 ... - //ta_y.val[1] = Y1 Y3 Y5 Y7 ... - //ta_u = U0 U2 U4 U6 ... - //ta_v = V0 V2 V4 V6 ... - - vst2q_u8(out_y.ptr(), ta_y_top); - vst2q_u8(out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), ta_y_bottom); - - uint8x16x2_t uvec; - uvec.val[0] = ta_u; - uvec.val[1] = ta_u; - vst2q_u8(out_u.ptr(), uvec); - vst2q_u8(out_u.ptr() + output_ptr->plane(1)->info()->strides_in_bytes().y(), uvec); - - uint8x16x2_t vvec; - vvec.val[0] = ta_v; - vvec.val[1] = ta_v; - vst2q_u8(out_v.ptr(), vvec); - vst2q_u8(out_v.ptr() + output_ptr->plane(2)->info()->strides_in_bytes().y(), vvec); - }, - in_y, in_u, in_v, out_y, out_u, out_v); -} - -/** Convert RGB to NV12. - * - * @param[in] input Input RGB data buffer. - * @param[out] output Output NV12 buffer. - * @param[in] win Window for iterating the buffers. - * - */ -template -void colorconvert_rgb_to_nv12(const void *__restrict input, void *__restrict output, const Window &win) -{ - ARM_COMPUTE_ERROR_ON(nullptr == input); - ARM_COMPUTE_ERROR_ON(nullptr == output); - win.validate(); - - const auto input_ptr = static_cast(input); - const auto output_ptr = static_cast(output); - - // UV's width and height are subsampled - Window win_uv(win); - win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2)); - win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1)); - win_uv.validate(); - - Iterator in(input_ptr, win); - Iterator out_y(output_ptr->plane(0), win); - Iterator out_uv(output_ptr->plane(1), win_uv); - - execute_window_loop(win, [&](const Coordinates &) - { - const auto ta_rgb_top = load_rgb(in.ptr(), alpha); - const auto ta_rgb_bottom = load_rgb(in.ptr() + input_ptr->info()->strides_in_bytes().y(), alpha); - //ta_rgb.val[0] = R0 R1 R2 R3 ... - //ta_rgb.val[1] = G0 G1 G2 G3 ... - //ta_rgb.val[2] = B0 B1 B2 B3 ... - - store_rgb_to_nv12(ta_rgb_top.val[0], ta_rgb_top.val[1], ta_rgb_top.val[2], - ta_rgb_bottom.val[0], ta_rgb_bottom.val[1], ta_rgb_bottom.val[2], - out_y.ptr(), out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), - out_uv.ptr()); - }, - in, out_y, out_uv); -} - -/** Convert RGB to IYUV. - * - * @param[in] input Input RGB data buffer. - * @param[out] output Output IYUV buffer. - * @param[in] win Window for iterating the buffers. - * - */ -template -void colorconvert_rgb_to_iyuv(const void *__restrict input, void *__restrict output, const Window &win) -{ - ARM_COMPUTE_ERROR_ON(nullptr == input); - ARM_COMPUTE_ERROR_ON(nullptr == output); - win.validate(); - - const auto input_ptr = static_cast(input); - const auto output_ptr = static_cast(output); - - // UV's width and height are subsampled - Window win_uv(win); - win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2)); - win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1)); - win_uv.validate(); - - Iterator in(input_ptr, win); - Iterator out_y(output_ptr->plane(0), win); - Iterator out_u(output_ptr->plane(1), win_uv); - Iterator out_v(output_ptr->plane(2), win_uv); - - execute_window_loop(win, [&](const Coordinates &) - { - const auto ta_rgb_top = load_rgb(in.ptr(), alpha); - const auto ta_rgb_bottom = load_rgb(in.ptr() + input_ptr->info()->strides_in_bytes().y(), alpha); - //ta_rgb.val[0] = R0 R1 R2 R3 ... - //ta_rgb.val[1] = G0 G1 G2 G3 ... - //ta_rgb.val[2] = B0 B1 B2 B3 ... - - store_rgb_to_iyuv(ta_rgb_top.val[0], ta_rgb_top.val[1], ta_rgb_top.val[2], - ta_rgb_bottom.val[0], ta_rgb_bottom.val[1], ta_rgb_bottom.val[2], - out_y.ptr(), out_y.ptr() + output_ptr->plane(0)->info()->strides_in_bytes().y(), - out_u.ptr(), out_v.ptr()); - }, - in, out_y, out_u, out_v); -} - -/** Convert RGB to YUV4. - * - * @param[in] input Input RGB data buffer. - * @param[out] output Output YUV4 buffer. - * @param[in] win Window for iterating the buffers. - * - */ -template -void colorconvert_rgb_to_yuv4(const void *__restrict input, void *__restrict output, const Window &win) -{ - ARM_COMPUTE_ERROR_ON(nullptr == input); - ARM_COMPUTE_ERROR_ON(nullptr == output); - win.validate(); - - const auto input_ptr = static_cast(input); - const auto output_ptr = static_cast(output); - - Iterator in(input_ptr, win); - Iterator out_y(output_ptr->plane(0), win); - Iterator out_u(output_ptr->plane(1), win); - Iterator out_v(output_ptr->plane(2), win); - - execute_window_loop(win, [&](const Coordinates &) - { - const auto ta_rgb = load_rgb(in.ptr(), alpha); - //ta_rgb.val[0] = R0 R1 R2 R3 ... - //ta_rgb.val[1] = G0 G1 G2 G3 ... - //ta_rgb.val[2] = B0 B1 B2 B3 ... - - store_rgb_to_yuv4(ta_rgb.val[0], ta_rgb.val[1], ta_rgb.val[2], - out_y.ptr(), out_u.ptr(), out_v.ptr()); - }, - in, out_y, out_u, out_v); -} -} // namespace arm_compute -- cgit v1.2.1