aboutsummaryrefslogtreecommitdiff
path: root/arm_compute/core
diff options
context:
space:
mode:
Diffstat (limited to 'arm_compute/core')
-rw-r--r--arm_compute/core/NEON/kernels/NEInstanceNormalizationLayerKernel.h26
-rw-r--r--arm_compute/core/NEON/wrapper/intrinsics/cvt.h16
2 files changed, 28 insertions, 14 deletions
diff --git a/arm_compute/core/NEON/kernels/NEInstanceNormalizationLayerKernel.h b/arm_compute/core/NEON/kernels/NEInstanceNormalizationLayerKernel.h
index 43f6ec3f13..a5bd453ac7 100644
--- a/arm_compute/core/NEON/kernels/NEInstanceNormalizationLayerKernel.h
+++ b/arm_compute/core/NEON/kernels/NEInstanceNormalizationLayerKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019 Arm Limited.
+ * Copyright (c) 2019-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -29,6 +29,7 @@
namespace arm_compute
{
class ITensor;
+struct InstanceNormalizationLayerKernelInfo;
/** Interface for performing an instance normalization */
class NEInstanceNormalizationLayerKernel : public INEKernel
@@ -52,26 +53,22 @@ public:
~NEInstanceNormalizationLayerKernel() = default;
/** Set the input and output tensors.
*
- * @param[in, out] input Source tensor. Data types supported: F16/F32. Data layout supported: NCHW
- * In case of @p output tensor = nullptr this tensor will store the result of the normalization.
- * @param[out] output Destination tensor. Data types and data layouts supported: same as @p input.
- * @param[in] gamma (Optional) The scale scalar value applied to the normalized tensor. Defaults to 1.0
- * @param[in] beta (Optional) The offset scalar value applied to the normalized tensor. Defaults to 0.0
- * @param[in] epsilon (Optional) Lower bound value for the normalization. Defaults to 1e-12
+ * @param[in, out] input Source tensor. Data types supported: F16/F32. Data layout supported: NCHW
+ * In case of @p output tensor = nullptr this tensor will store the result of the normalization.
+ * @param[out] output Destination tensor. Data types and data layouts supported: same as @p input.
+ * @param[in] info Kernel meta-data descriptor
*/
- void configure(ITensor *input, ITensor *output, float gamma = 1.0f, float beta = 0.0f, float epsilon = 1e-12f);
+ void configure(ITensor *input, ITensor *output, const InstanceNormalizationLayerKernelInfo &info);
/** Static function to check if given info will lead to a valid configuration of @ref NEInstanceNormalizationLayer.
*
- * @param[in] input Source tensor info. Data types supported: F16/F32. Data layout supported: NCHW
- * @param[in] output Destination tensor info. Data types and data layouts supported: same as @p input.
- * @param[in] gamma (Optional) The scale scalar value applied to the normalized tensor. Defaults to 1.0
- * @param[in] beta (Optional) The offset scalar value applied to the normalized tensor. Defaults to 0.0
- * @param[in] epsilon (Optional) Lower bound value for the normalization. Defaults to 1e-12
+ * @param[in] input Source tensor info. Data types supported: F16/F32. Data layout supported: NCHW
+ * @param[in] output Destination tensor info. Data types and data layouts supported: same as @p input.
+ * @param[in] info Kernel meta-data descriptor
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, float gamma = 1.0f, float beta = 0.0f, float epsilon = 1e-12f);
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output, const InstanceNormalizationLayerKernelInfo &info);
// Inherited methods overridden:
void run(const Window &window, const ThreadInfo &info) override;
@@ -93,6 +90,7 @@ private:
float _gamma;
float _beta;
float _epsilon;
+ bool _use_mixed_precision{ true };
};
} // namespace arm_compute
#endif /*ARM_COMPUTE_NEINSTANCENORMALIZATIONLAYERKERNEL_H */
diff --git a/arm_compute/core/NEON/wrapper/intrinsics/cvt.h b/arm_compute/core/NEON/wrapper/intrinsics/cvt.h
index de1261bdd0..6e79a92bc2 100644
--- a/arm_compute/core/NEON/wrapper/intrinsics/cvt.h
+++ b/arm_compute/core/NEON/wrapper/intrinsics/cvt.h
@@ -40,8 +40,24 @@ namespace wrapper
VCVT_TO_F32_IMPL(float32x4_t, uint32x4_t, vcvtq, f32, u32)
VCVT_TO_F32_IMPL(float32x4_t, int32x4_t, vcvtq, f32, s32)
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+VCVT_TO_F32_IMPL(float32x4_t, float16x4_t, vcvt, f32, f16)
+#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
#undef VCVT_TO_F32_IMPL
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#define VCVT_TO_F16_IMPL(ptype, vtype, prefix, postfix1, postfix2) \
+ template <typename T> \
+ inline typename std::enable_if<std::is_same<T, float16_t>::value, float16x4_t>::type \
+ vcvt(const vtype &a) \
+ { \
+ return prefix##_##postfix1##_##postfix2(a); \
+ }
+
+VCVT_TO_F16_IMPL(float16x4_t, float32x4_t, vcvt, f16, f32)
+#undef VCVT_TO_F16_IMPL
+#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+
template <typename T>
inline typename std::enable_if<std::is_same<T, uint8_t>::value, uint32x4_t>::type
vcvt(const float32x4_t &a)