From 583137cc60580023abfd9d05abf933e7e117e29f Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Thu, 31 Aug 2017 18:12:42 +0100 Subject: COMPMID-417: Add support for floats in scale. Change-Id: I7d714ba13861509080a89817f54e9d32da83e970 Reviewed-on: http://mpd-gerrit.cambridge.arm.com/86026 Reviewed-by: Pablo Tello Tested-by: Kaizen --- arm_compute/core/CL/kernels/CLScaleKernel.h | 8 ++-- arm_compute/core/Helpers.h | 66 +++++++++++++++++++++++---- arm_compute/core/Helpers.inl | 49 -------------------- arm_compute/core/NEON/kernels/NEScaleKernel.h | 4 +- arm_compute/core/PixelValue.h | 38 ++++++--------- arm_compute/core/Types.h | 4 ++ arm_compute/runtime/CL/functions/CLScale.h | 6 +-- arm_compute/runtime/NEON/functions/NEScale.h | 6 +-- 8 files changed, 87 insertions(+), 94 deletions(-) (limited to 'arm_compute') diff --git a/arm_compute/core/CL/kernels/CLScaleKernel.h b/arm_compute/core/CL/kernels/CLScaleKernel.h index e74a7cb82a..0a4bbf0b5a 100644 --- a/arm_compute/core/CL/kernels/CLScaleKernel.h +++ b/arm_compute/core/CL/kernels/CLScaleKernel.h @@ -31,16 +31,14 @@ namespace arm_compute { class ICLTensor; -/** Interface for the warp affine kernel.*/ +/** Interface for the scale kernel */ class CLScaleKernel : public ICLSimple2DKernel { public: /** Initialise the kernel's inputs, output and interpolation policy * - * @note dx, dy and offsets have the same dimensions (width and height) of the output tensor - * - * @param[in] input Source tensor. Data types supported: U8, S16. - * @param[out] output Destination tensor. Data types supported: U8, S16 (Must be the same as the input tensor). + * @param[in] input Source tensor. Data types supported: U8/S16/F16/F32 + * @param[out] output Destination tensor. Data types supported: Same as @p input * All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane. * @param[in] policy Interpolation type to use * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. diff --git a/arm_compute/core/Helpers.h b/arm_compute/core/Helpers.h index b6461bc47a..6e4d987180 100644 --- a/arm_compute/core/Helpers.h +++ b/arm_compute/core/Helpers.h @@ -26,12 +26,14 @@ #include "arm_compute/core/CL/CLTypes.h" #include "arm_compute/core/Coordinates.h" +#include "arm_compute/core/Error.h" #include "arm_compute/core/IAccessWindow.h" #include "arm_compute/core/Steps.h" #include "arm_compute/core/Strides.h" #include "arm_compute/core/TensorShape.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/Window.h" + #include #include #include @@ -82,9 +84,9 @@ struct is_contained> : is_contained } /** Computes bilinear interpolation using the pointer to the top-left pixel and the pixel's distance between - * the real coordinates and the smallest following integer coordinates. + * the real coordinates and the smallest following integer coordinates. Input must be in single channel format. * - * @param[in] pixel_ptr Pointer to the top-left pixel value. Format: Single channel U8 + * @param[in] pixel_ptr Pointer to the top-left pixel value of a single channel input. * @param[in] stride Stride to access the bottom-left and bottom-right pixel values * @param[in] dx Pixel's distance between the X real coordinate and the smallest X following integer * @param[in] dy Pixel's distance between the Y real coordinate and the smallest Y following integer @@ -93,26 +95,57 @@ struct is_contained> : is_contained * * @return The bilinear interpolated pixel value */ -inline uint8_t delta_bilinear_c1u8(const uint8_t *pixel_ptr, size_t stride, float dx, float dy); +template +inline T delta_bilinear_c1(const T *pixel_ptr, size_t stride, float dx, float dy) +{ + ARM_COMPUTE_ERROR_ON(pixel_ptr == nullptr); + + const float dx1 = 1.0f - dx; + const float dy1 = 1.0f - dy; + + const T a00 = *pixel_ptr; + const T a01 = *(pixel_ptr + 1); + const T a10 = *(pixel_ptr + stride); + const T a11 = *(pixel_ptr + stride + 1); + + const float w1 = dx1 * dy1; + const float w2 = dx * dy1; + const float w3 = dx1 * dy; + const float w4 = dx * dy; -/** Return the pixel at (x,y) using bilinear interpolation. The image must be single channel U8 + return static_cast(a00 * w1 + a01 * w2 + a10 * w3 + a11 * w4); +} + +/** Return the pixel at (x,y) using bilinear interpolation. * * @warning Only works if the iterator was created with an IImage * - * @param[in] first_pixel_ptr Pointer to the first pixel of a single channel U8 image. + * @param[in] first_pixel_ptr Pointer to the first pixel of a single channel input. * @param[in] stride Stride in bytes of the image; * @param[in] x X position of the wanted pixel * @param[in] y Y position of the wanted pixel * * @return The pixel at (x, y) using bilinear interpolation. */ -inline uint8_t pixel_bilinear_c1u8(const uint8_t *first_pixel_ptr, size_t stride, float x, float y); +template +inline T pixel_bilinear_c1(const T *first_pixel_ptr, size_t stride, float x, float y) +{ + ARM_COMPUTE_ERROR_ON(first_pixel_ptr == nullptr); + + const int32_t xi = std::floor(x); + const int32_t yi = std::floor(y); -/** Return the pixel at (x,y) using bilinear interpolation by clamping when out of borders. The image must be single channel U8 + const float dx = x - xi; + const float dy = y - yi; + + return delta_bilinear_c1(first_pixel_ptr + xi + yi * stride, stride, dx, dy); +} + +/** Return the pixel at (x,y) using bilinear interpolation by clamping when out of borders. The image must be single channel input * * @warning Only works if the iterator was created with an IImage * - * @param[in] first_pixel_ptr Pointer to the first pixel of a single channel U8 image. + * @param[in] first_pixel_ptr Pointer to the first pixel of a single channel image. * @param[in] stride Stride in bytes of the image * @param[in] width Width of the image * @param[in] height Height of the image @@ -121,7 +154,22 @@ inline uint8_t pixel_bilinear_c1u8(const uint8_t *first_pixel_ptr, size_t stride * * @return The pixel at (x, y) using bilinear interpolation. */ -inline uint8_t pixel_bilinear_c1u8_clamp(const uint8_t *first_pixel_ptr, size_t stride, size_t width, size_t height, float x, float y); +template +inline uint8_t pixel_bilinear_c1_clamp(const T *first_pixel_ptr, size_t stride, size_t width, size_t height, float x, float y) +{ + ARM_COMPUTE_ERROR_ON(first_pixel_ptr == nullptr); + + x = std::max(-1.f, std::min(x, static_cast(width))); + y = std::max(-1.f, std::min(y, static_cast(height))); + + const float xi = std::floor(x); + const float yi = std::floor(y); + + const float dx = x - xi; + const float dy = y - yi; + + return delta_bilinear_c1(first_pixel_ptr + static_cast(xi) + static_cast(yi) * stride, stride, dx, dy); +} /** Return the pixel at (x,y) using area interpolation by clamping when out of borders. The image must be single channel U8 * diff --git a/arm_compute/core/Helpers.inl b/arm_compute/core/Helpers.inl index c2ca3b44b3..de6c85ec76 100644 --- a/arm_compute/core/Helpers.inl +++ b/arm_compute/core/Helpers.inl @@ -29,55 +29,6 @@ namespace arm_compute { -inline uint8_t delta_bilinear_c1u8(const uint8_t *pixel_ptr, size_t stride, float dx, float dy) -{ - ARM_COMPUTE_ERROR_ON(pixel_ptr == nullptr); - - const float dx1 = 1.0f - dx; - const float dy1 = 1.0f - dy; - - const float a00 = *pixel_ptr; - const float a01 = *(pixel_ptr + 1); - const float a10 = *(pixel_ptr + stride); - const float a11 = *(pixel_ptr + stride + 1); - - const float w1 = dx1 * dy1; - const float w2 = dx * dy1; - const float w3 = dx1 * dy; - const float w4 = dx * dy; - - return a00 * w1 + a01 * w2 + a10 * w3 + a11 * w4; -} - -inline uint8_t pixel_bilinear_c1u8(const uint8_t *first_pixel_ptr, size_t stride, float x, float y) -{ - ARM_COMPUTE_ERROR_ON(first_pixel_ptr == nullptr); - - const int32_t xi = std::floor(x); - const int32_t yi = std::floor(y); - - const float dx = x - xi; - const float dy = y - yi; - - return delta_bilinear_c1u8(first_pixel_ptr + xi + yi * stride, stride, dx, dy); -} - -inline uint8_t pixel_bilinear_c1u8_clamp(const uint8_t *first_pixel_ptr, size_t stride, size_t width, size_t height, float x, float y) -{ - ARM_COMPUTE_ERROR_ON(first_pixel_ptr == nullptr); - - x = std::max(-1.f, std::min(x, static_cast(width))); - y = std::max(-1.f, std::min(y, static_cast(height))); - - const float xi = std::floor(x); - const float yi = std::floor(y); - - const float dx = x - xi; - const float dy = y - yi; - - return delta_bilinear_c1u8(first_pixel_ptr + static_cast(xi) + static_cast(yi) * stride, stride, dx, dy); -} - inline uint8_t pixel_area_c1u8_clamp(const uint8_t *first_pixel_ptr, size_t stride, size_t width, size_t height, float wr, float hr, int x, int y) { ARM_COMPUTE_ERROR_ON(first_pixel_ptr == nullptr); diff --git a/arm_compute/core/NEON/kernels/NEScaleKernel.h b/arm_compute/core/NEON/kernels/NEScaleKernel.h index 3cac023d8a..660ecfdf72 100644 --- a/arm_compute/core/NEON/kernels/NEScaleKernel.h +++ b/arm_compute/core/NEON/kernels/NEScaleKernel.h @@ -52,11 +52,11 @@ public: * * @note dx, dy and offsets have the same dimensions (width and height) of the output tensor * - * @param[in] input Source tensor. Data types supported: U8/S16. + * @param[in] input Source tensor. Data types supported: U8/S16/F32. * @param[in] dx Pixel's distance between the X real coordinate and the smallest X following integer. Data type supported: F32 * @param[in] dy Pixel's distance between the Y real coordinate and the smallest Y following integer. Data type supported: F32 * @param[in] offsets Offset to access the pixel with NEAREST interpolation or the top-left pixel with BILINEAR interpolation in the input tensor. Data type supported: S32. - * @param[out] output Destination tensor. Data types supported: U8/S16. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane. + * @param[out] output Destination tensor. Data types supported: Same as @p input. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane. * @param[in] policy Interpolation type to use * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. */ diff --git a/arm_compute/core/PixelValue.h b/arm_compute/core/PixelValue.h index 1b1a5a3845..63405560ea 100644 --- a/arm_compute/core/PixelValue.h +++ b/arm_compute/core/PixelValue.h @@ -24,11 +24,9 @@ #ifndef __ARM_COMPUTE_PIXELVALUE_H__ #define __ARM_COMPUTE_PIXELVALUE_H__ -#include +#include "arm_compute/core/Types.h" -#if ARM_COMPUTE_ENABLE_FP16 -#include // needed for float16_t -#endif /* ARM_COMPUTE_ENABLE_FP16 */ +#include namespace arm_compute { @@ -86,17 +84,15 @@ public: { value.s32 = v; } -#if ARM_COMPUTE_ENABLE_FP16 /** Initialize the union with a F16 pixel value * * @param[in] v F16 value. */ - PixelValue(float16_t v) + PixelValue(half v) : PixelValue() { value.f16 = v; } -#endif /* ARM_COMPUTE_ENABLE_FP16 */ /** Initialize the union with a F32 pixel value * * @param[in] v F32 value. @@ -111,19 +107,17 @@ public: */ union { - uint8_t rgb[3]; /**< 3 channels: RGB888 */ - uint8_t yuv[3]; /**< 3 channels: Any YUV format */ - uint8_t rgbx[4]; /**< 4 channels: RGBX8888 */ - float f32; /**< Single channel float 32 */ -#if ARM_COMPUTE_ENABLE_FP16 - float16_t f16; /**< Single channel F16 */ -#endif /* ARM_COMPUTE_ENABLE_FP16 */ - uint8_t u8; /**< Single channel U8 */ - int8_t s8; /**< Single channel S8 */ - uint16_t u16; /**< Single channel U16 */ - int16_t s16; /**< Single channel S16 */ - uint32_t u32; /**< Single channel U32 */ - int32_t s32; /**< Single channel S32 */ + uint8_t rgb[3]; /**< 3 channels: RGB888 */ + uint8_t yuv[3]; /**< 3 channels: Any YUV format */ + uint8_t rgbx[4]; /**< 4 channels: RGBX8888 */ + float f32; /**< Single channel float 32 */ + half f16; /**< Single channel F16 */ + uint8_t u8; /**< Single channel U8 */ + int8_t s8; /**< Single channel S8 */ + uint16_t u16; /**< Single channel U16 */ + int16_t s16; /**< Single channel S16 */ + uint32_t u32; /**< Single channel U32 */ + int32_t s32; /**< Single channel S32 */ } value; /** Interpret the pixel value as a U8 * @@ -173,16 +167,14 @@ public: { v = value.s32; } -#if ARM_COMPUTE_ENABLE_FP16 /** Interpret the pixel value as a F16 * * @param[out] v Returned value */ - void get(float16_t &v) const + void get(half &v) const { v = value.f16; } -#endif /* ARM_COMPUTE_ENABLE_FP16 */ /** Interpret the pixel value as a F32 * * @param[out] v Returned value diff --git a/arm_compute/core/Types.h b/arm_compute/core/Types.h index b90798e5ff..7d9cd4e0cc 100644 --- a/arm_compute/core/Types.h +++ b/arm_compute/core/Types.h @@ -26,6 +26,7 @@ #include "arm_compute/core/Coordinates.h" #include "arm_compute/core/TensorShape.h" +#include "support/Half.h" #include #include @@ -34,6 +35,9 @@ namespace arm_compute { +/** 16-bit floating point type */ +using half = half_float::half; + /** Image colour formats */ enum class Format { diff --git a/arm_compute/runtime/CL/functions/CLScale.h b/arm_compute/runtime/CL/functions/CLScale.h index c2438ddf9b..db491c1a44 100644 --- a/arm_compute/runtime/CL/functions/CLScale.h +++ b/arm_compute/runtime/CL/functions/CLScale.h @@ -39,14 +39,14 @@ class CLScale : public ICLSimpleFunction public: /** Initialize the function's source, destination, interpolation type and border_mode. * - * @param[in,out] input Source tensor. Data types supported: U8, S16. (Written to only for @p border_mode != UNDEFINED) - * @param[out] output Destination tensor. Data types supported: U8, S16 (Must be the same as the input tensor). + * @param[in,out] input Source tensor. Data types supported: U8/S16/F16/F32. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Destination tensor. Data types supported: Same as @p input * All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane. * @param[in] policy The interpolation type. * @param[in] border_mode Strategy to use for borders. * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. */ - void configure(ICLTensor *input, ICLTensor *output, InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value = 0); + void configure(ICLTensor *input, ICLTensor *output, InterpolationPolicy policy, BorderMode border_mode, PixelValue constant_border_value = PixelValue()); }; } #endif /*__ARM_COMPUTE_CLSCALE_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEScale.h b/arm_compute/runtime/NEON/functions/NEScale.h index 00a368eb72..7297880a7a 100644 --- a/arm_compute/runtime/NEON/functions/NEScale.h +++ b/arm_compute/runtime/NEON/functions/NEScale.h @@ -47,13 +47,13 @@ public: NEScale(); /** Initialize the function's source, destination, interpolation type and border_mode. * - * @param[in, out] input Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED) - * @param[out] output Destination tensor. Data type supported: U8. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane. + * @param[in, out] input Source tensor. Data type supported: U8/F32. (Written to only for @p border_mode != UNDEFINED) + * @param[out] output Destination tensor. Data type supported: Same as @p input. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane. * @param[in] policy The interpolation type. * @param[in] border_mode Strategy to use for borders. * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. */ - void configure(ITensor *input, ITensor *output, InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value = 0); + void configure(ITensor *input, ITensor *output, InterpolationPolicy policy, BorderMode border_mode, PixelValue constant_border_value = PixelValue()); // Inherited methods overridden: void run() override; -- cgit v1.2.1