diff options
author | Georgios Pinitas <georgios.pinitas@arm.com> | 2017-08-31 18:12:42 +0100 |
---|---|---|
committer | Anthony Barbier <anthony.barbier@arm.com> | 2018-11-02 16:35:24 +0000 |
commit | 583137cc60580023abfd9d05abf933e7e117e29f (patch) | |
tree | b29ec55c11b65e2882e60c0cf8b592bf25e78b1b /arm_compute/core | |
parent | 3021edfb5e72ef4cd91dbc754ce6ac55388ebc4e (diff) | |
download | ComputeLibrary-583137cc60580023abfd9d05abf933e7e117e29f.tar.gz |
COMPMID-417: Add support for floats in scale.
Change-Id: I7d714ba13861509080a89817f54e9d32da83e970
Reviewed-on: http://mpd-gerrit.cambridge.arm.com/86026
Reviewed-by: Pablo Tello <pablo.tello@arm.com>
Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com>
Diffstat (limited to 'arm_compute/core')
-rw-r--r-- | arm_compute/core/CL/kernels/CLScaleKernel.h | 8 | ||||
-rw-r--r-- | arm_compute/core/Helpers.h | 66 | ||||
-rw-r--r-- | arm_compute/core/Helpers.inl | 49 | ||||
-rw-r--r-- | arm_compute/core/NEON/kernels/NEScaleKernel.h | 4 | ||||
-rw-r--r-- | arm_compute/core/PixelValue.h | 38 | ||||
-rw-r--r-- | arm_compute/core/Types.h | 4 |
6 files changed, 81 insertions, 88 deletions
diff --git a/arm_compute/core/CL/kernels/CLScaleKernel.h b/arm_compute/core/CL/kernels/CLScaleKernel.h index e74a7cb82a..0a4bbf0b5a 100644 --- a/arm_compute/core/CL/kernels/CLScaleKernel.h +++ b/arm_compute/core/CL/kernels/CLScaleKernel.h @@ -31,16 +31,14 @@ namespace arm_compute { class ICLTensor; -/** Interface for the warp affine kernel.*/ +/** Interface for the scale kernel */ class CLScaleKernel : public ICLSimple2DKernel { public: /** Initialise the kernel's inputs, output and interpolation policy * - * @note dx, dy and offsets have the same dimensions (width and height) of the output tensor - * - * @param[in] input Source tensor. Data types supported: U8, S16. - * @param[out] output Destination tensor. Data types supported: U8, S16 (Must be the same as the input tensor). + * @param[in] input Source tensor. Data types supported: U8/S16/F16/F32 + * @param[out] output Destination tensor. Data types supported: Same as @p input * All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane. * @param[in] policy Interpolation type to use * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. diff --git a/arm_compute/core/Helpers.h b/arm_compute/core/Helpers.h index b6461bc47a..6e4d987180 100644 --- a/arm_compute/core/Helpers.h +++ b/arm_compute/core/Helpers.h @@ -26,12 +26,14 @@ #include "arm_compute/core/CL/CLTypes.h" #include "arm_compute/core/Coordinates.h" +#include "arm_compute/core/Error.h" #include "arm_compute/core/IAccessWindow.h" #include "arm_compute/core/Steps.h" #include "arm_compute/core/Strides.h" #include "arm_compute/core/TensorShape.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/Window.h" + #include <array> #include <cstddef> #include <cstdint> @@ -82,9 +84,9 @@ struct is_contained<T, std::tuple<U, Ts...>> : is_contained<T, std::tuple<Ts...> } /** Computes bilinear interpolation using the pointer to the top-left pixel and the pixel's distance between - * the real coordinates and the smallest following integer coordinates. + * the real coordinates and the smallest following integer coordinates. Input must be in single channel format. * - * @param[in] pixel_ptr Pointer to the top-left pixel value. Format: Single channel U8 + * @param[in] pixel_ptr Pointer to the top-left pixel value of a single channel input. * @param[in] stride Stride to access the bottom-left and bottom-right pixel values * @param[in] dx Pixel's distance between the X real coordinate and the smallest X following integer * @param[in] dy Pixel's distance between the Y real coordinate and the smallest Y following integer @@ -93,26 +95,57 @@ struct is_contained<T, std::tuple<U, Ts...>> : is_contained<T, std::tuple<Ts...> * * @return The bilinear interpolated pixel value */ -inline uint8_t delta_bilinear_c1u8(const uint8_t *pixel_ptr, size_t stride, float dx, float dy); +template <typename T> +inline T delta_bilinear_c1(const T *pixel_ptr, size_t stride, float dx, float dy) +{ + ARM_COMPUTE_ERROR_ON(pixel_ptr == nullptr); + + const float dx1 = 1.0f - dx; + const float dy1 = 1.0f - dy; + + const T a00 = *pixel_ptr; + const T a01 = *(pixel_ptr + 1); + const T a10 = *(pixel_ptr + stride); + const T a11 = *(pixel_ptr + stride + 1); + + const float w1 = dx1 * dy1; + const float w2 = dx * dy1; + const float w3 = dx1 * dy; + const float w4 = dx * dy; -/** Return the pixel at (x,y) using bilinear interpolation. The image must be single channel U8 + return static_cast<T>(a00 * w1 + a01 * w2 + a10 * w3 + a11 * w4); +} + +/** Return the pixel at (x,y) using bilinear interpolation. * * @warning Only works if the iterator was created with an IImage * - * @param[in] first_pixel_ptr Pointer to the first pixel of a single channel U8 image. + * @param[in] first_pixel_ptr Pointer to the first pixel of a single channel input. * @param[in] stride Stride in bytes of the image; * @param[in] x X position of the wanted pixel * @param[in] y Y position of the wanted pixel * * @return The pixel at (x, y) using bilinear interpolation. */ -inline uint8_t pixel_bilinear_c1u8(const uint8_t *first_pixel_ptr, size_t stride, float x, float y); +template <typename T> +inline T pixel_bilinear_c1(const T *first_pixel_ptr, size_t stride, float x, float y) +{ + ARM_COMPUTE_ERROR_ON(first_pixel_ptr == nullptr); + + const int32_t xi = std::floor(x); + const int32_t yi = std::floor(y); -/** Return the pixel at (x,y) using bilinear interpolation by clamping when out of borders. The image must be single channel U8 + const float dx = x - xi; + const float dy = y - yi; + + return delta_bilinear_c1(first_pixel_ptr + xi + yi * stride, stride, dx, dy); +} + +/** Return the pixel at (x,y) using bilinear interpolation by clamping when out of borders. The image must be single channel input * * @warning Only works if the iterator was created with an IImage * - * @param[in] first_pixel_ptr Pointer to the first pixel of a single channel U8 image. + * @param[in] first_pixel_ptr Pointer to the first pixel of a single channel image. * @param[in] stride Stride in bytes of the image * @param[in] width Width of the image * @param[in] height Height of the image @@ -121,7 +154,22 @@ inline uint8_t pixel_bilinear_c1u8(const uint8_t *first_pixel_ptr, size_t stride * * @return The pixel at (x, y) using bilinear interpolation. */ -inline uint8_t pixel_bilinear_c1u8_clamp(const uint8_t *first_pixel_ptr, size_t stride, size_t width, size_t height, float x, float y); +template <typename T> +inline uint8_t pixel_bilinear_c1_clamp(const T *first_pixel_ptr, size_t stride, size_t width, size_t height, float x, float y) +{ + ARM_COMPUTE_ERROR_ON(first_pixel_ptr == nullptr); + + x = std::max(-1.f, std::min(x, static_cast<float>(width))); + y = std::max(-1.f, std::min(y, static_cast<float>(height))); + + const float xi = std::floor(x); + const float yi = std::floor(y); + + const float dx = x - xi; + const float dy = y - yi; + + return delta_bilinear_c1(first_pixel_ptr + static_cast<int32_t>(xi) + static_cast<int32_t>(yi) * stride, stride, dx, dy); +} /** Return the pixel at (x,y) using area interpolation by clamping when out of borders. The image must be single channel U8 * diff --git a/arm_compute/core/Helpers.inl b/arm_compute/core/Helpers.inl index c2ca3b44b3..de6c85ec76 100644 --- a/arm_compute/core/Helpers.inl +++ b/arm_compute/core/Helpers.inl @@ -29,55 +29,6 @@ namespace arm_compute { -inline uint8_t delta_bilinear_c1u8(const uint8_t *pixel_ptr, size_t stride, float dx, float dy) -{ - ARM_COMPUTE_ERROR_ON(pixel_ptr == nullptr); - - const float dx1 = 1.0f - dx; - const float dy1 = 1.0f - dy; - - const float a00 = *pixel_ptr; - const float a01 = *(pixel_ptr + 1); - const float a10 = *(pixel_ptr + stride); - const float a11 = *(pixel_ptr + stride + 1); - - const float w1 = dx1 * dy1; - const float w2 = dx * dy1; - const float w3 = dx1 * dy; - const float w4 = dx * dy; - - return a00 * w1 + a01 * w2 + a10 * w3 + a11 * w4; -} - -inline uint8_t pixel_bilinear_c1u8(const uint8_t *first_pixel_ptr, size_t stride, float x, float y) -{ - ARM_COMPUTE_ERROR_ON(first_pixel_ptr == nullptr); - - const int32_t xi = std::floor(x); - const int32_t yi = std::floor(y); - - const float dx = x - xi; - const float dy = y - yi; - - return delta_bilinear_c1u8(first_pixel_ptr + xi + yi * stride, stride, dx, dy); -} - -inline uint8_t pixel_bilinear_c1u8_clamp(const uint8_t *first_pixel_ptr, size_t stride, size_t width, size_t height, float x, float y) -{ - ARM_COMPUTE_ERROR_ON(first_pixel_ptr == nullptr); - - x = std::max(-1.f, std::min(x, static_cast<float>(width))); - y = std::max(-1.f, std::min(y, static_cast<float>(height))); - - const float xi = std::floor(x); - const float yi = std::floor(y); - - const float dx = x - xi; - const float dy = y - yi; - - return delta_bilinear_c1u8(first_pixel_ptr + static_cast<int32_t>(xi) + static_cast<int32_t>(yi) * stride, stride, dx, dy); -} - inline uint8_t pixel_area_c1u8_clamp(const uint8_t *first_pixel_ptr, size_t stride, size_t width, size_t height, float wr, float hr, int x, int y) { ARM_COMPUTE_ERROR_ON(first_pixel_ptr == nullptr); diff --git a/arm_compute/core/NEON/kernels/NEScaleKernel.h b/arm_compute/core/NEON/kernels/NEScaleKernel.h index 3cac023d8a..660ecfdf72 100644 --- a/arm_compute/core/NEON/kernels/NEScaleKernel.h +++ b/arm_compute/core/NEON/kernels/NEScaleKernel.h @@ -52,11 +52,11 @@ public: * * @note dx, dy and offsets have the same dimensions (width and height) of the output tensor * - * @param[in] input Source tensor. Data types supported: U8/S16. + * @param[in] input Source tensor. Data types supported: U8/S16/F32. * @param[in] dx Pixel's distance between the X real coordinate and the smallest X following integer. Data type supported: F32 * @param[in] dy Pixel's distance between the Y real coordinate and the smallest Y following integer. Data type supported: F32 * @param[in] offsets Offset to access the pixel with NEAREST interpolation or the top-left pixel with BILINEAR interpolation in the input tensor. Data type supported: S32. - * @param[out] output Destination tensor. Data types supported: U8/S16. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane. + * @param[out] output Destination tensor. Data types supported: Same as @p input. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane. * @param[in] policy Interpolation type to use * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. */ diff --git a/arm_compute/core/PixelValue.h b/arm_compute/core/PixelValue.h index 1b1a5a3845..63405560ea 100644 --- a/arm_compute/core/PixelValue.h +++ b/arm_compute/core/PixelValue.h @@ -24,11 +24,9 @@ #ifndef __ARM_COMPUTE_PIXELVALUE_H__ #define __ARM_COMPUTE_PIXELVALUE_H__ -#include <cstdint> +#include "arm_compute/core/Types.h" -#if ARM_COMPUTE_ENABLE_FP16 -#include <arm_fp16.h> // needed for float16_t -#endif /* ARM_COMPUTE_ENABLE_FP16 */ +#include <cstdint> namespace arm_compute { @@ -86,17 +84,15 @@ public: { value.s32 = v; } -#if ARM_COMPUTE_ENABLE_FP16 /** Initialize the union with a F16 pixel value * * @param[in] v F16 value. */ - PixelValue(float16_t v) + PixelValue(half v) : PixelValue() { value.f16 = v; } -#endif /* ARM_COMPUTE_ENABLE_FP16 */ /** Initialize the union with a F32 pixel value * * @param[in] v F32 value. @@ -111,19 +107,17 @@ public: */ union { - uint8_t rgb[3]; /**< 3 channels: RGB888 */ - uint8_t yuv[3]; /**< 3 channels: Any YUV format */ - uint8_t rgbx[4]; /**< 4 channels: RGBX8888 */ - float f32; /**< Single channel float 32 */ -#if ARM_COMPUTE_ENABLE_FP16 - float16_t f16; /**< Single channel F16 */ -#endif /* ARM_COMPUTE_ENABLE_FP16 */ - uint8_t u8; /**< Single channel U8 */ - int8_t s8; /**< Single channel S8 */ - uint16_t u16; /**< Single channel U16 */ - int16_t s16; /**< Single channel S16 */ - uint32_t u32; /**< Single channel U32 */ - int32_t s32; /**< Single channel S32 */ + uint8_t rgb[3]; /**< 3 channels: RGB888 */ + uint8_t yuv[3]; /**< 3 channels: Any YUV format */ + uint8_t rgbx[4]; /**< 4 channels: RGBX8888 */ + float f32; /**< Single channel float 32 */ + half f16; /**< Single channel F16 */ + uint8_t u8; /**< Single channel U8 */ + int8_t s8; /**< Single channel S8 */ + uint16_t u16; /**< Single channel U16 */ + int16_t s16; /**< Single channel S16 */ + uint32_t u32; /**< Single channel U32 */ + int32_t s32; /**< Single channel S32 */ } value; /** Interpret the pixel value as a U8 * @@ -173,16 +167,14 @@ public: { v = value.s32; } -#if ARM_COMPUTE_ENABLE_FP16 /** Interpret the pixel value as a F16 * * @param[out] v Returned value */ - void get(float16_t &v) const + void get(half &v) const { v = value.f16; } -#endif /* ARM_COMPUTE_ENABLE_FP16 */ /** Interpret the pixel value as a F32 * * @param[out] v Returned value diff --git a/arm_compute/core/Types.h b/arm_compute/core/Types.h index b90798e5ff..7d9cd4e0cc 100644 --- a/arm_compute/core/Types.h +++ b/arm_compute/core/Types.h @@ -26,6 +26,7 @@ #include "arm_compute/core/Coordinates.h" #include "arm_compute/core/TensorShape.h" +#include "support/Half.h" #include <cstddef> #include <cstdint> @@ -34,6 +35,9 @@ namespace arm_compute { +/** 16-bit floating point type */ +using half = half_float::half; + /** Image colour formats */ enum class Format { |