diff options
Diffstat (limited to 'arm_compute/core')
-rw-r--r-- | arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h | 17 | ||||
-rw-r--r-- | arm_compute/core/PixelValue.h | 48 |
2 files changed, 54 insertions, 11 deletions
diff --git a/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h b/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h index bf06fdd639..a5de81137b 100644 --- a/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h @@ -48,7 +48,7 @@ public: ~NEPoolingLayerKernel() = default; /** Set the input and output tensors. * - * @param[in] input Source tensor. Data types supported: QS8/F32. + * @param[in] input Source tensor. Data types supported: QS8/F16/F32. * @param[out] output Destination tensor. Data types supported: Same as @p input. * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo. */ @@ -66,6 +66,14 @@ private: */ template <PoolingType pooling_type> void pooling2_f32(const Window &window_input, const Window &window); + /** Function to perform 2x2 pooling for float16_t. + * + * @param[in] window_input Input region on which to execute the kernel. + * @param[in] window Output region on which to execute the kernel. + */ + template <PoolingType pooling_type> + void pooling2_f16(const Window &window_input, const Window &window); + /** Function to perform 2x2 pooling for 8bit fixed point. * * @param[in] window_input Input region on which to execute the kernel. @@ -80,6 +88,13 @@ private: */ template <PoolingType pooling_type> void pooling3_f32(const Window &window_input, const Window &window); + /** Function to perform 3x3 pooling. + * + * @param[in] window_input Input region on which to execute the kernel. + * @param[in] window Output region on which to execute the kernel. + */ + template <PoolingType pooling_type> + void pooling3_f16(const Window &window_input, const Window &window); /** Function to perform 3x3 pooling for 8bit fixed point. * * @param[in] window_input Input region on which to execute the kernel. diff --git a/arm_compute/core/PixelValue.h b/arm_compute/core/PixelValue.h index b4912ce15a..1b1a5a3845 100644 --- a/arm_compute/core/PixelValue.h +++ b/arm_compute/core/PixelValue.h @@ -26,6 +26,10 @@ #include <cstdint> +#if ARM_COMPUTE_ENABLE_FP16 +#include <arm_fp16.h> // needed for float16_t +#endif /* ARM_COMPUTE_ENABLE_FP16 */ + namespace arm_compute { /** Class describing the value of a pixel for any image format. */ @@ -82,6 +86,17 @@ public: { value.s32 = v; } +#if ARM_COMPUTE_ENABLE_FP16 + /** Initialize the union with a F16 pixel value + * + * @param[in] v F16 value. + */ + PixelValue(float16_t v) + : PixelValue() + { + value.f16 = v; + } +#endif /* ARM_COMPUTE_ENABLE_FP16 */ /** Initialize the union with a F32 pixel value * * @param[in] v F32 value. @@ -96,16 +111,19 @@ public: */ union { - uint8_t rgb[3]; /**< 3 channels: RGB888 */ - uint8_t yuv[3]; /**< 3 channels: Any YUV format */ - uint8_t rgbx[4]; /**< 4 channels: RGBX8888 */ - float f32; /**< Single channel float 32 */ - uint8_t u8; /**< Single channel U8 */ - int8_t s8; /**< Single channel S8 */ - uint16_t u16; /**< Single channel U16 */ - int16_t s16; /**< Single channel S16 */ - uint32_t u32; /**< Single channel U32 */ - int32_t s32; /**< Single channel S32 */ + uint8_t rgb[3]; /**< 3 channels: RGB888 */ + uint8_t yuv[3]; /**< 3 channels: Any YUV format */ + uint8_t rgbx[4]; /**< 4 channels: RGBX8888 */ + float f32; /**< Single channel float 32 */ +#if ARM_COMPUTE_ENABLE_FP16 + float16_t f16; /**< Single channel F16 */ +#endif /* ARM_COMPUTE_ENABLE_FP16 */ + uint8_t u8; /**< Single channel U8 */ + int8_t s8; /**< Single channel S8 */ + uint16_t u16; /**< Single channel U16 */ + int16_t s16; /**< Single channel S16 */ + uint32_t u32; /**< Single channel U32 */ + int32_t s32; /**< Single channel S32 */ } value; /** Interpret the pixel value as a U8 * @@ -155,6 +173,16 @@ public: { v = value.s32; } +#if ARM_COMPUTE_ENABLE_FP16 + /** Interpret the pixel value as a F16 + * + * @param[out] v Returned value + */ + void get(float16_t &v) const + { + v = value.f16; + } +#endif /* ARM_COMPUTE_ENABLE_FP16 */ /** Interpret the pixel value as a F32 * * @param[out] v Returned value |