From 980a9168b81d778f4902973b4920b54c103907e0 Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Wed, 3 Jun 2020 20:16:46 +0100 Subject: COMPMID-3177: Remove padding from NEBatchNormalizationLayer Signed-off-by: Georgios Pinitas Change-Id: I9be23e6ef1f552eb159e39fda16c82fa20124094 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3307 Tested-by: Arm Jenkins Reviewed-by: Gian Marco Iodice Comments-Addressed: Arm Jenkins --- .../NEON/kernels/NEBatchNormalizationLayerKernel.h | 29 ++---- .../kernels/detail/NEActivationFunctionDetail.h | 108 +++++++++++++++++---- 2 files changed, 96 insertions(+), 41 deletions(-) (limited to 'arm_compute/core/NEON') diff --git a/arm_compute/core/NEON/kernels/NEBatchNormalizationLayerKernel.h b/arm_compute/core/NEON/kernels/NEBatchNormalizationLayerKernel.h index d59ed7baf0..7371e3c177 100644 --- a/arm_compute/core/NEON/kernels/NEBatchNormalizationLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NEBatchNormalizationLayerKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -28,6 +28,7 @@ namespace arm_compute { +// Forward declarations class ITensor; /** Interface for the batch normalization layer kernel. @@ -97,40 +98,26 @@ private: /** Configure execution function in case of fused activation **/ void configure_fused(); - /** Template function to run batch normalization on fp16 - * - * @tparam fused_activation Boolean that flags if its a fused activation or not - * - * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). - */ - template - void batch_normalization_fp16_nchw(const Window &window); - /** Template function to run batch normalization on fp16 on tensors with NHWC format - * - * @tparam fused_activation Boolean that flags if its a fused activation or not - * - * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). - */ - template - void batch_normalization_fp16_nhwc(const Window &window); /** Template function to run batch normalization on fp32 * + * @tparam T Specialization data type * @tparam fused_activation Boolean that flags if its a fused activation or not * @tparam F Activation function functor to run * * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). */ - template - void batch_normalization_fp32_nchw(const Window &window); + template + void batch_normalization_nchw(const Window &window); /** Template function to run batch normalization on fp32 on tensors with NHWC format * + * @tparam T Specialization data type * @tparam fused_activation Boolean that flags if its a fused activation or not * @tparam F Activation function functor to run * * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). */ - template - void batch_normalization_fp32_nhwc(const Window &window); + template + void batch_normalization_nhwc(const Window &window); /** Common signature for all the batch normalization functions * * @param[in] window Region on which to execute the kernel. diff --git a/arm_compute/core/NEON/kernels/detail/NEActivationFunctionDetail.h b/arm_compute/core/NEON/kernels/detail/NEActivationFunctionDetail.h index 4861559695..7945418ac5 100644 --- a/arm_compute/core/NEON/kernels/detail/NEActivationFunctionDetail.h +++ b/arm_compute/core/NEON/kernels/detail/NEActivationFunctionDetail.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 ARM Limited. + * Copyright (c) 2018-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -45,6 +45,7 @@ struct dummy { ARM_COMPUTE_UNUSED(act_info); } + /** Run activation function. * * @param[in] vval Vector of values. @@ -53,6 +54,15 @@ struct dummy { ARM_COMPUTE_UNUSED(vval); } + + /** Run activation function. + * + * @param[in] val Scalar value. + */ + void operator()(T &val) + { + ARM_COMPUTE_UNUSED(val); + } }; /** Linear activation object */ template @@ -68,8 +78,10 @@ struct linear * @param[in] act_info Activation layer information. */ explicit linear(ActivationLayerInfo act_info) - : valpha(wrapper::vdup_n(static_cast(act_info.a()), ExactTagType{})), - vbeta(wrapper::vdup_n(static_cast(act_info.b()), ExactTagType{})) + : alpha(act_info.a()), + beta(act_info.b()), + valpha(wrapper::vdup_n(static_cast(alpha), ExactTagType{})), + vbeta(wrapper::vdup_n(static_cast(beta), ExactTagType{})) { } @@ -79,13 +91,22 @@ struct linear */ void operator()(ExactType &vval) { - vval = wrapper::vmla(vval, valpha, vbeta); + vval = wrapper::vmla(vbeta, vval, valpha); } - /** Vector of alphas. */ - const ExactType valpha; - /** Vector of betas. */ - const ExactType vbeta; + /** Run activation function. + * + * @param[in] val Scalar value. + */ + void operator()(T &val) + { + val = alpha * val + beta; + } + + const T alpha; /**< Scalar alpha */ + const T beta; /**< Scalar alpha */ + const ExactType valpha; /**< Vector of alphas. */ + const ExactType vbeta; /**< Vector of betas. */ }; /** Square activation object */ template @@ -113,6 +134,15 @@ struct square { vval = wrapper::vmul(vval, vval); } + + /** Run activation function. + * + * @param[in] val Scalar value. + */ + void operator()(T &val) + { + val = val * val; + } }; /** Logistic activation object */ template @@ -128,7 +158,7 @@ struct logistic * @param[in] act_info Activation layer information. */ explicit logistic(ActivationLayerInfo act_info) - : vone(wrapper::vdup_n(static_cast(1.f), ExactTagType{})) + : vone(wrapper::vdup_n(static_cast(1), ExactTagType{})) { ARM_COMPUTE_UNUSED(act_info); } @@ -142,6 +172,15 @@ struct logistic vval = wrapper::vinv(wrapper::vadd(vone, wrapper::vexpq(wrapper::vneg(vval)))); } + /** Run activation function. + * + * @param[in] val Scalar value. + */ + void operator()(T &val) + { + val = 1 / (1 + std::exp(-val)); + } + /** Vector of ones. */ const ExactType vone; }; @@ -159,7 +198,7 @@ struct relu * @param[in] act_info Activation layer information. */ explicit relu(ActivationLayerInfo act_info) - : vzero(wrapper::vdup_n(static_cast(0.f), ExactTagType{})) + : vzero(wrapper::vdup_n(static_cast(0), ExactTagType{})) { ARM_COMPUTE_UNUSED(act_info); } @@ -173,6 +212,15 @@ struct relu vval = wrapper::vmax(vzero, vval); } + /** Run activation function. + * + * @param[in] val Scalar value. + */ + void operator()(T &val) + { + val = std::max(static_cast(0), val); + } + /** Vector of zeroes. */ const ExactType vzero; }; @@ -190,7 +238,8 @@ struct brelu * @param[in] act_info Activation layer information. */ explicit brelu(ActivationLayerInfo act_info) - : vzero(wrapper::vdup_n(static_cast(0.f), ExactTagType{})), + : alpha(act_info.a()), + vzero(wrapper::vdup_n(static_cast(0), ExactTagType{})), valpha(wrapper::vdup_n(static_cast(act_info.a()), ExactTagType{})) { } @@ -204,10 +253,18 @@ struct brelu vval = wrapper::vmin(valpha, wrapper::vmax(vzero, vval)); } - /** Vector of zeroes. */ - const ExactType vzero; - /** Vector of alphas. */ - const ExactType valpha; + /** Run activation function. + * + * @param[in] val Scalar value. + */ + void operator()(T &val) + { + val = std::min(alpha, std::max(static_cast(0), val)); + } + + const T alpha; /** Scalar alpha */ + const ExactType vzero; /** Vector of zeroes. */ + const ExactType valpha; /** Vector of alphas. */ }; /** Lower-Upper Bounded RELU activation object */ template @@ -223,7 +280,9 @@ struct lubrelu * @param[in] act_info Activation layer information. */ explicit lubrelu(ActivationLayerInfo act_info) - : valpha(wrapper::vdup_n(static_cast(act_info.a()), ExactTagType{})), + : alpha(act_info.a()), + beta(act_info.b()), + valpha(wrapper::vdup_n(static_cast(act_info.a()), ExactTagType{})), vbeta(wrapper::vdup_n(static_cast(act_info.b()), ExactTagType{})) { } @@ -237,10 +296,19 @@ struct lubrelu vval = wrapper::vmin(valpha, wrapper::vmax(vbeta, vval)); } - /** Vector of alphas. */ - const ExactType valpha; - /** Vector of betas. */ - const ExactType vbeta; + /** Run activation function. + * + * @param[in] val Scalar value. + */ + void operator()(T &val) + { + val = std::min(alpha, std::max(beta, val)); + } + + const T alpha; /**< Scalar alpha */ + const T beta; /**< Scalar alpha */ + const ExactType valpha; /** Vector of alphas. */ + const ExactType vbeta; /** Vector of betas. */ }; } // namespace detail } // namespace arm_compute -- cgit v1.2.1