2 files changed, 28 insertions, 14 deletions
diff --git a/arm_compute/core/NEON/kernels/NEInstanceNormalizationLayerKernel.h b/arm_compute/core/NEON/kernels/NEInstanceNormalizationLayerKernel.h
index 43f6ec3f13..a5bd453ac7 100644
--- a/arm_compute/core/NEON/kernels/NEInstanceNormalizationLayerKernel.h
+++ b/arm_compute/core/NEON/kernels/NEInstanceNormalizationLayerKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019 Arm Limited.
+ * Copyright (c) 2019-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -29,6 +29,7 @@
 namespace arm_compute
 {
 class ITensor;
+struct InstanceNormalizationLayerKernelInfo;
 
 /** Interface for performing an instance normalization */
 class NEInstanceNormalizationLayerKernel : public INEKernel
@@ -52,26 +53,22 @@ public:
     ~NEInstanceNormalizationLayerKernel() = default;
     /** Set the input and output tensors.
      *
-     * @param[in, out] input   Source tensor. Data types supported: F16/F32. Data layout supported: NCHW
-     *                         In case of @p output tensor = nullptr this tensor will store the result of the normalization.
-     * @param[out]     output  Destination tensor. Data types and data layouts supported: same as @p input.
-     * @param[in]      gamma   (Optional) The scale scalar value applied to the normalized tensor. Defaults to 1.0
-     * @param[in]      beta    (Optional) The offset scalar value applied to the normalized tensor. Defaults to 0.0
-     * @param[in]      epsilon (Optional) Lower bound value for the normalization. Defaults to 1e-12
+     * @param[in, out] input  Source tensor. Data types supported: F16/F32. Data layout supported: NCHW
+     *                        In case of @p output tensor = nullptr this tensor will store the result of the normalization.
+     * @param[out]     output Destination tensor. Data types and data layouts supported: same as @p input.
+     * @param[in]      info   Kernel meta-data descriptor
      */
-    void configure(ITensor *input, ITensor *output, float gamma = 1.0f, float beta = 0.0f, float epsilon = 1e-12f);
+    void configure(ITensor *input, ITensor *output, const InstanceNormalizationLayerKernelInfo &info);
 
     /** Static function to check if given info will lead to a valid configuration of @ref NEInstanceNormalizationLayer.
      *
-     * @param[in] input   Source tensor info. Data types supported: F16/F32. Data layout supported: NCHW
-     * @param[in] output  Destination tensor info. Data types and data layouts supported: same as @p input.
-     * @param[in] gamma   (Optional) The scale scalar value applied to the normalized tensor. Defaults to 1.0
-     * @param[in] beta    (Optional) The offset scalar value applied to the normalized tensor. Defaults to 0.0
-     * @param[in] epsilon (Optional) Lower bound value for the normalization. Defaults to 1e-12
+     * @param[in] input  Source tensor info. Data types supported: F16/F32. Data layout supported: NCHW
+     * @param[in] output Destination tensor info. Data types and data layouts supported: same as @p input.
+     * @param[in] info   Kernel meta-data descriptor
      *
      * @return a status
      */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *output, float gamma = 1.0f, float beta = 0.0f, float epsilon = 1e-12f);
+    static Status validate(const ITensorInfo *input, const ITensorInfo *output, const InstanceNormalizationLayerKernelInfo &info);
 
     // Inherited methods overridden:
     void run(const Window &window, const ThreadInfo &info) override;
@@ -93,6 +90,7 @@ private:
     float                  _gamma;
     float                  _beta;
     float                  _epsilon;
+    bool                   _use_mixed_precision{ true };
 };
 } // namespace arm_compute
 #endif /*ARM_COMPUTE_NEINSTANCENORMALIZATIONLAYERKERNEL_H */
diff --git a/arm_compute/core/NEON/wrapper/intrinsics/cvt.h b/arm_compute/core/NEON/wrapper/intrinsics/cvt.h
index de1261bdd0..6e79a92bc2 100644
--- a/arm_compute/core/NEON/wrapper/intrinsics/cvt.h
+++ b/arm_compute/core/NEON/wrapper/intrinsics/cvt.h
@@ -40,8 +40,24 @@ namespace wrapper
 
 VCVT_TO_F32_IMPL(float32x4_t, uint32x4_t, vcvtq, f32, u32)
 VCVT_TO_F32_IMPL(float32x4_t, int32x4_t, vcvtq, f32, s32)
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+VCVT_TO_F32_IMPL(float32x4_t, float16x4_t, vcvt, f32, f16)
+#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
 #undef VCVT_TO_F32_IMPL
 
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#define VCVT_TO_F16_IMPL(ptype, vtype, prefix, postfix1, postfix2)                       \
+    template <typename T>                                                                \
+    inline typename std::enable_if<std::is_same<T, float16_t>::value, float16x4_t>::type \
+    vcvt(const vtype &a)                                                                 \
+    {                                                                                    \
+        return prefix##_##postfix1##_##postfix2(a);                                      \
+    }
+
+VCVT_TO_F16_IMPL(float16x4_t, float32x4_t, vcvt, f16, f32)
+#undef VCVT_TO_F16_IMPL
+#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+
 template <typename T>
 inline typename std::enable_if<std::is_same<T, uint8_t>::value, uint32x4_t>::type
 vcvt(const float32x4_t &a)