diff options
Diffstat (limited to 'arm_compute')
-rw-r--r-- | arm_compute/core/NEON/kernels/NEInstanceNormalizationLayerKernel.h | 26 | ||||
-rw-r--r-- | arm_compute/core/NEON/wrapper/intrinsics/cvt.h | 16 |
2 files changed, 28 insertions, 14 deletions
diff --git a/arm_compute/core/NEON/kernels/NEInstanceNormalizationLayerKernel.h b/arm_compute/core/NEON/kernels/NEInstanceNormalizationLayerKernel.h index 43f6ec3f13..a5bd453ac7 100644 --- a/arm_compute/core/NEON/kernels/NEInstanceNormalizationLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NEInstanceNormalizationLayerKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 Arm Limited. + * Copyright (c) 2019-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -29,6 +29,7 @@ namespace arm_compute { class ITensor; +struct InstanceNormalizationLayerKernelInfo; /** Interface for performing an instance normalization */ class NEInstanceNormalizationLayerKernel : public INEKernel @@ -52,26 +53,22 @@ public: ~NEInstanceNormalizationLayerKernel() = default; /** Set the input and output tensors. * - * @param[in, out] input Source tensor. Data types supported: F16/F32. Data layout supported: NCHW - * In case of @p output tensor = nullptr this tensor will store the result of the normalization. - * @param[out] output Destination tensor. Data types and data layouts supported: same as @p input. - * @param[in] gamma (Optional) The scale scalar value applied to the normalized tensor. Defaults to 1.0 - * @param[in] beta (Optional) The offset scalar value applied to the normalized tensor. Defaults to 0.0 - * @param[in] epsilon (Optional) Lower bound value for the normalization. Defaults to 1e-12 + * @param[in, out] input Source tensor. Data types supported: F16/F32. Data layout supported: NCHW + * In case of @p output tensor = nullptr this tensor will store the result of the normalization. + * @param[out] output Destination tensor. Data types and data layouts supported: same as @p input. + * @param[in] info Kernel meta-data descriptor */ - void configure(ITensor *input, ITensor *output, float gamma = 1.0f, float beta = 0.0f, float epsilon = 1e-12f); + void configure(ITensor *input, ITensor *output, const InstanceNormalizationLayerKernelInfo &info); /** Static function to check if given info will lead to a valid configuration of @ref NEInstanceNormalizationLayer. * - * @param[in] input Source tensor info. Data types supported: F16/F32. Data layout supported: NCHW - * @param[in] output Destination tensor info. Data types and data layouts supported: same as @p input. - * @param[in] gamma (Optional) The scale scalar value applied to the normalized tensor. Defaults to 1.0 - * @param[in] beta (Optional) The offset scalar value applied to the normalized tensor. Defaults to 0.0 - * @param[in] epsilon (Optional) Lower bound value for the normalization. Defaults to 1e-12 + * @param[in] input Source tensor info. Data types supported: F16/F32. Data layout supported: NCHW + * @param[in] output Destination tensor info. Data types and data layouts supported: same as @p input. + * @param[in] info Kernel meta-data descriptor * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, float gamma = 1.0f, float beta = 0.0f, float epsilon = 1e-12f); + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const InstanceNormalizationLayerKernelInfo &info); // Inherited methods overridden: void run(const Window &window, const ThreadInfo &info) override; @@ -93,6 +90,7 @@ private: float _gamma; float _beta; float _epsilon; + bool _use_mixed_precision{ true }; }; } // namespace arm_compute #endif /*ARM_COMPUTE_NEINSTANCENORMALIZATIONLAYERKERNEL_H */ diff --git a/arm_compute/core/NEON/wrapper/intrinsics/cvt.h b/arm_compute/core/NEON/wrapper/intrinsics/cvt.h index de1261bdd0..6e79a92bc2 100644 --- a/arm_compute/core/NEON/wrapper/intrinsics/cvt.h +++ b/arm_compute/core/NEON/wrapper/intrinsics/cvt.h @@ -40,8 +40,24 @@ namespace wrapper VCVT_TO_F32_IMPL(float32x4_t, uint32x4_t, vcvtq, f32, u32) VCVT_TO_F32_IMPL(float32x4_t, int32x4_t, vcvtq, f32, s32) +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +VCVT_TO_F32_IMPL(float32x4_t, float16x4_t, vcvt, f32, f16) +#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC #undef VCVT_TO_F32_IMPL +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#define VCVT_TO_F16_IMPL(ptype, vtype, prefix, postfix1, postfix2) \ + template <typename T> \ + inline typename std::enable_if<std::is_same<T, float16_t>::value, float16x4_t>::type \ + vcvt(const vtype &a) \ + { \ + return prefix##_##postfix1##_##postfix2(a); \ + } + +VCVT_TO_F16_IMPL(float16x4_t, float32x4_t, vcvt, f16, f32) +#undef VCVT_TO_F16_IMPL +#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC + template <typename T> inline typename std::enable_if<std::is_same<T, uint8_t>::value, uint32x4_t>::type vcvt(const float32x4_t &a) |