2 files changed, 54 insertions, 11 deletions
diff --git a/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h b/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h
index bf06fdd639..a5de81137b 100644
--- a/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h
+++ b/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h
@@ -48,7 +48,7 @@ public:
     ~NEPoolingLayerKernel() = default;
     /** Set the input and output tensors.
      *
-     * @param[in]  input     Source tensor. Data types supported: QS8/F32.
+     * @param[in]  input     Source tensor. Data types supported: QS8/F16/F32.
      * @param[out] output    Destination tensor. Data types supported: Same as @p input.
      * @param[in]  pool_info Contains pooling operation information described in @ref PoolingLayerInfo.
      */
@@ -66,6 +66,14 @@ private:
      */
     template <PoolingType pooling_type>
     void pooling2_f32(const Window &window_input, const Window &window);
+    /** Function to perform 2x2 pooling for float16_t.
+     *
+     * @param[in] window_input Input region on which to execute the kernel.
+     * @param[in] window       Output region on which to execute the kernel.
+     */
+    template <PoolingType pooling_type>
+    void pooling2_f16(const Window &window_input, const Window &window);
+
     /** Function to perform 2x2 pooling for 8bit fixed point.
      *
      * @param[in] window_input Input region on which to execute the kernel.
@@ -80,6 +88,13 @@ private:
      */
     template <PoolingType pooling_type>
     void pooling3_f32(const Window &window_input, const Window &window);
+    /** Function to perform 3x3 pooling.
+     *
+     * @param[in] window_input Input region on which to execute the kernel.
+     * @param[in] window       Output region on which to execute the kernel.
+     */
+    template <PoolingType pooling_type>
+    void pooling3_f16(const Window &window_input, const Window &window);
     /** Function to perform 3x3 pooling for 8bit fixed point.
      *
      * @param[in] window_input Input region on which to execute the kernel.
diff --git a/arm_compute/core/PixelValue.h b/arm_compute/core/PixelValue.h
index b4912ce15a..1b1a5a3845 100644
--- a/arm_compute/core/PixelValue.h
+++ b/arm_compute/core/PixelValue.h
@@ -26,6 +26,10 @@
 
 #include <cstdint>
 
+#if ARM_COMPUTE_ENABLE_FP16
+#include <arm_fp16.h> // needed for float16_t
+#endif                /* ARM_COMPUTE_ENABLE_FP16 */
+
 namespace arm_compute
 {
 /** Class describing the value of a pixel for any image format. */
@@ -82,6 +86,17 @@ public:
     {
         value.s32 = v;
     }
+#if ARM_COMPUTE_ENABLE_FP16
+    /** Initialize the union with a F16 pixel value
+     *
+     * @param[in] v F16 value.
+     */
+    PixelValue(float16_t v)
+        : PixelValue()
+    {
+        value.f16 = v;
+    }
+#endif /* ARM_COMPUTE_ENABLE_FP16 */
     /** Initialize the union with a F32 pixel value
      *
      * @param[in] v F32 value.
@@ -96,16 +111,19 @@ public:
      */
     union
         {
-            uint8_t  rgb[3];  /**< 3 channels: RGB888 */
-            uint8_t  yuv[3];  /**< 3 channels: Any YUV format */
-            uint8_t  rgbx[4]; /**< 4 channels: RGBX8888 */
-            float    f32;     /**< Single channel float 32 */
-            uint8_t  u8;      /**< Single channel U8 */
-            int8_t   s8;      /**< Single channel S8 */
-            uint16_t u16;     /**< Single channel U16 */
-            int16_t  s16;     /**< Single channel S16 */
-            uint32_t u32;     /**< Single channel U32 */
-            int32_t  s32;     /**< Single channel S32 */
+            uint8_t rgb[3];  /**< 3 channels: RGB888 */
+            uint8_t yuv[3];  /**< 3 channels: Any YUV format */
+            uint8_t rgbx[4]; /**< 4 channels: RGBX8888 */
+            float   f32;     /**< Single channel float 32 */
+#if ARM_COMPUTE_ENABLE_FP16
+            float16_t f16; /**< Single channel F16 */
+#endif                 /* ARM_COMPUTE_ENABLE_FP16 */
+            uint8_t  u8;   /**< Single channel U8 */
+            int8_t   s8;   /**< Single channel S8 */
+            uint16_t u16;  /**< Single channel U16 */
+            int16_t  s16;  /**< Single channel S16 */
+            uint32_t u32;  /**< Single channel U32 */
+            int32_t  s32;  /**< Single channel S32 */
         } value;
     /** Interpret the pixel value as a U8
      *
@@ -155,6 +173,16 @@ public:
     {
         v = value.s32;
     }
+#if ARM_COMPUTE_ENABLE_FP16
+    /** Interpret the pixel value as a F16
+     *
+     * @param[out] v Returned value
+     */
+    void get(float16_t &v) const
+    {
+        v = value.f16;
+    }
+#endif /* ARM_COMPUTE_ENABLE_FP16 */
     /** Interpret the pixel value as a F32
      *
      * @param[out] v Returned value