From 0c34fe29c298057091d48cde332cb60bb14efee1 Mon Sep 17 00:00:00 2001 From: Pablo Tello Date: Mon, 26 Jun 2017 17:17:42 +0100 Subject: COMPMID-421: Added FP16 support in Pooling Layer Change-Id: I6b6119c8770051c1656da40aa073c539c15b493e Reviewed-on: http://mpd-gerrit.cambridge.arm.com/78985 Reviewed-by: Moritz Pflanzer Tested-by: Kaizen --- .../core/NEON/kernels/NEPoolingLayerKernel.h | 17 +++++++- arm_compute/core/PixelValue.h | 48 +++++++++++++++++----- 2 files changed, 54 insertions(+), 11 deletions(-) (limited to 'arm_compute') diff --git a/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h b/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h index bf06fdd639..a5de81137b 100644 --- a/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h @@ -48,7 +48,7 @@ public: ~NEPoolingLayerKernel() = default; /** Set the input and output tensors. * - * @param[in] input Source tensor. Data types supported: QS8/F32. + * @param[in] input Source tensor. Data types supported: QS8/F16/F32. * @param[out] output Destination tensor. Data types supported: Same as @p input. * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo. */ @@ -66,6 +66,14 @@ private: */ template void pooling2_f32(const Window &window_input, const Window &window); + /** Function to perform 2x2 pooling for float16_t. + * + * @param[in] window_input Input region on which to execute the kernel. + * @param[in] window Output region on which to execute the kernel. + */ + template + void pooling2_f16(const Window &window_input, const Window &window); + /** Function to perform 2x2 pooling for 8bit fixed point. * * @param[in] window_input Input region on which to execute the kernel. @@ -80,6 +88,13 @@ private: */ template void pooling3_f32(const Window &window_input, const Window &window); + /** Function to perform 3x3 pooling. + * + * @param[in] window_input Input region on which to execute the kernel. + * @param[in] window Output region on which to execute the kernel. + */ + template + void pooling3_f16(const Window &window_input, const Window &window); /** Function to perform 3x3 pooling for 8bit fixed point. * * @param[in] window_input Input region on which to execute the kernel. diff --git a/arm_compute/core/PixelValue.h b/arm_compute/core/PixelValue.h index b4912ce15a..1b1a5a3845 100644 --- a/arm_compute/core/PixelValue.h +++ b/arm_compute/core/PixelValue.h @@ -26,6 +26,10 @@ #include +#if ARM_COMPUTE_ENABLE_FP16 +#include // needed for float16_t +#endif /* ARM_COMPUTE_ENABLE_FP16 */ + namespace arm_compute { /** Class describing the value of a pixel for any image format. */ @@ -82,6 +86,17 @@ public: { value.s32 = v; } +#if ARM_COMPUTE_ENABLE_FP16 + /** Initialize the union with a F16 pixel value + * + * @param[in] v F16 value. + */ + PixelValue(float16_t v) + : PixelValue() + { + value.f16 = v; + } +#endif /* ARM_COMPUTE_ENABLE_FP16 */ /** Initialize the union with a F32 pixel value * * @param[in] v F32 value. @@ -96,16 +111,19 @@ public: */ union { - uint8_t rgb[3]; /**< 3 channels: RGB888 */ - uint8_t yuv[3]; /**< 3 channels: Any YUV format */ - uint8_t rgbx[4]; /**< 4 channels: RGBX8888 */ - float f32; /**< Single channel float 32 */ - uint8_t u8; /**< Single channel U8 */ - int8_t s8; /**< Single channel S8 */ - uint16_t u16; /**< Single channel U16 */ - int16_t s16; /**< Single channel S16 */ - uint32_t u32; /**< Single channel U32 */ - int32_t s32; /**< Single channel S32 */ + uint8_t rgb[3]; /**< 3 channels: RGB888 */ + uint8_t yuv[3]; /**< 3 channels: Any YUV format */ + uint8_t rgbx[4]; /**< 4 channels: RGBX8888 */ + float f32; /**< Single channel float 32 */ +#if ARM_COMPUTE_ENABLE_FP16 + float16_t f16; /**< Single channel F16 */ +#endif /* ARM_COMPUTE_ENABLE_FP16 */ + uint8_t u8; /**< Single channel U8 */ + int8_t s8; /**< Single channel S8 */ + uint16_t u16; /**< Single channel U16 */ + int16_t s16; /**< Single channel S16 */ + uint32_t u32; /**< Single channel U32 */ + int32_t s32; /**< Single channel S32 */ } value; /** Interpret the pixel value as a U8 * @@ -155,6 +173,16 @@ public: { v = value.s32; } +#if ARM_COMPUTE_ENABLE_FP16 + /** Interpret the pixel value as a F16 + * + * @param[out] v Returned value + */ + void get(float16_t &v) const + { + v = value.f16; + } +#endif /* ARM_COMPUTE_ENABLE_FP16 */ /** Interpret the pixel value as a F32 * * @param[out] v Returned value -- cgit v1.2.1