aboutsummaryrefslogtreecommitdiff
path: root/arm_compute/core/NEON
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2020-06-03 20:16:46 +0100
committerGeorgios Pinitas <georgios.pinitas@arm.com>2020-06-09 11:58:01 +0000
commit980a9168b81d778f4902973b4920b54c103907e0 (patch)
treed2e8bf3527db8fe39cec8c51c6a914b721c35b03 /arm_compute/core/NEON
parent2d10f186aacfc56b601b3cdaffa942cc6e6d1f53 (diff)
downloadComputeLibrary-980a9168b81d778f4902973b4920b54c103907e0.tar.gz
COMPMID-3177: Remove padding from NEBatchNormalizationLayer
Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com> Change-Id: I9be23e6ef1f552eb159e39fda16c82fa20124094 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3307 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'arm_compute/core/NEON')
-rw-r--r--arm_compute/core/NEON/kernels/NEBatchNormalizationLayerKernel.h29
-rw-r--r--arm_compute/core/NEON/kernels/detail/NEActivationFunctionDetail.h108
2 files changed, 96 insertions, 41 deletions
diff --git a/arm_compute/core/NEON/kernels/NEBatchNormalizationLayerKernel.h b/arm_compute/core/NEON/kernels/NEBatchNormalizationLayerKernel.h
index d59ed7baf0..7371e3c177 100644
--- a/arm_compute/core/NEON/kernels/NEBatchNormalizationLayerKernel.h
+++ b/arm_compute/core/NEON/kernels/NEBatchNormalizationLayerKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -28,6 +28,7 @@
namespace arm_compute
{
+// Forward declarations
class ITensor;
/** Interface for the batch normalization layer kernel.
@@ -97,40 +98,26 @@ private:
/** Configure execution function in case of fused activation **/
void configure_fused();
- /** Template function to run batch normalization on fp16
- *
- * @tparam fused_activation Boolean that flags if its a fused activation or not
- *
- * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
- */
- template <bool fused_activation, typename F>
- void batch_normalization_fp16_nchw(const Window &window);
- /** Template function to run batch normalization on fp16 on tensors with NHWC format
- *
- * @tparam fused_activation Boolean that flags if its a fused activation or not
- *
- * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
- */
- template <bool fused_activation, typename F>
- void batch_normalization_fp16_nhwc(const Window &window);
/** Template function to run batch normalization on fp32
*
+ * @tparam T Specialization data type
* @tparam fused_activation Boolean that flags if its a fused activation or not
* @tparam F Activation function functor to run
*
* @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
*/
- template <bool fused_activation, typename F>
- void batch_normalization_fp32_nchw(const Window &window);
+ template <typename T, bool fused_activation, typename F>
+ void batch_normalization_nchw(const Window &window);
/** Template function to run batch normalization on fp32 on tensors with NHWC format
*
+ * @tparam T Specialization data type
* @tparam fused_activation Boolean that flags if its a fused activation or not
* @tparam F Activation function functor to run
*
* @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
*/
- template <bool fused_activation, typename F>
- void batch_normalization_fp32_nhwc(const Window &window);
+ template <typename T, bool fused_activation, typename F>
+ void batch_normalization_nhwc(const Window &window);
/** Common signature for all the batch normalization functions
*
* @param[in] window Region on which to execute the kernel.
diff --git a/arm_compute/core/NEON/kernels/detail/NEActivationFunctionDetail.h b/arm_compute/core/NEON/kernels/detail/NEActivationFunctionDetail.h
index 4861559695..7945418ac5 100644
--- a/arm_compute/core/NEON/kernels/detail/NEActivationFunctionDetail.h
+++ b/arm_compute/core/NEON/kernels/detail/NEActivationFunctionDetail.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -45,6 +45,7 @@ struct dummy
{
ARM_COMPUTE_UNUSED(act_info);
}
+
/** Run activation function.
*
* @param[in] vval Vector of values.
@@ -53,6 +54,15 @@ struct dummy
{
ARM_COMPUTE_UNUSED(vval);
}
+
+ /** Run activation function.
+ *
+ * @param[in] val Scalar value.
+ */
+ void operator()(T &val)
+ {
+ ARM_COMPUTE_UNUSED(val);
+ }
};
/** Linear activation object */
template <typename T, int S>
@@ -68,8 +78,10 @@ struct linear
* @param[in] act_info Activation layer information.
*/
explicit linear(ActivationLayerInfo act_info)
- : valpha(wrapper::vdup_n(static_cast<T>(act_info.a()), ExactTagType{})),
- vbeta(wrapper::vdup_n(static_cast<T>(act_info.b()), ExactTagType{}))
+ : alpha(act_info.a()),
+ beta(act_info.b()),
+ valpha(wrapper::vdup_n(static_cast<T>(alpha), ExactTagType{})),
+ vbeta(wrapper::vdup_n(static_cast<T>(beta), ExactTagType{}))
{
}
@@ -79,13 +91,22 @@ struct linear
*/
void operator()(ExactType &vval)
{
- vval = wrapper::vmla(vval, valpha, vbeta);
+ vval = wrapper::vmla(vbeta, vval, valpha);
}
- /** Vector of alphas. */
- const ExactType valpha;
- /** Vector of betas. */
- const ExactType vbeta;
+ /** Run activation function.
+ *
+ * @param[in] val Scalar value.
+ */
+ void operator()(T &val)
+ {
+ val = alpha * val + beta;
+ }
+
+ const T alpha; /**< Scalar alpha */
+ const T beta; /**< Scalar alpha */
+ const ExactType valpha; /**< Vector of alphas. */
+ const ExactType vbeta; /**< Vector of betas. */
};
/** Square activation object */
template <typename T, int S>
@@ -113,6 +134,15 @@ struct square
{
vval = wrapper::vmul(vval, vval);
}
+
+ /** Run activation function.
+ *
+ * @param[in] val Scalar value.
+ */
+ void operator()(T &val)
+ {
+ val = val * val;
+ }
};
/** Logistic activation object */
template <typename T, int S>
@@ -128,7 +158,7 @@ struct logistic
* @param[in] act_info Activation layer information.
*/
explicit logistic(ActivationLayerInfo act_info)
- : vone(wrapper::vdup_n(static_cast<T>(1.f), ExactTagType{}))
+ : vone(wrapper::vdup_n(static_cast<T>(1), ExactTagType{}))
{
ARM_COMPUTE_UNUSED(act_info);
}
@@ -142,6 +172,15 @@ struct logistic
vval = wrapper::vinv(wrapper::vadd(vone, wrapper::vexpq(wrapper::vneg(vval))));
}
+ /** Run activation function.
+ *
+ * @param[in] val Scalar value.
+ */
+ void operator()(T &val)
+ {
+ val = 1 / (1 + std::exp(-val));
+ }
+
/** Vector of ones. */
const ExactType vone;
};
@@ -159,7 +198,7 @@ struct relu
* @param[in] act_info Activation layer information.
*/
explicit relu(ActivationLayerInfo act_info)
- : vzero(wrapper::vdup_n(static_cast<T>(0.f), ExactTagType{}))
+ : vzero(wrapper::vdup_n(static_cast<T>(0), ExactTagType{}))
{
ARM_COMPUTE_UNUSED(act_info);
}
@@ -173,6 +212,15 @@ struct relu
vval = wrapper::vmax(vzero, vval);
}
+ /** Run activation function.
+ *
+ * @param[in] val Scalar value.
+ */
+ void operator()(T &val)
+ {
+ val = std::max(static_cast<T>(0), val);
+ }
+
/** Vector of zeroes. */
const ExactType vzero;
};
@@ -190,7 +238,8 @@ struct brelu
* @param[in] act_info Activation layer information.
*/
explicit brelu(ActivationLayerInfo act_info)
- : vzero(wrapper::vdup_n(static_cast<T>(0.f), ExactTagType{})),
+ : alpha(act_info.a()),
+ vzero(wrapper::vdup_n(static_cast<T>(0), ExactTagType{})),
valpha(wrapper::vdup_n(static_cast<T>(act_info.a()), ExactTagType{}))
{
}
@@ -204,10 +253,18 @@ struct brelu
vval = wrapper::vmin(valpha, wrapper::vmax(vzero, vval));
}
- /** Vector of zeroes. */
- const ExactType vzero;
- /** Vector of alphas. */
- const ExactType valpha;
+ /** Run activation function.
+ *
+ * @param[in] val Scalar value.
+ */
+ void operator()(T &val)
+ {
+ val = std::min(alpha, std::max(static_cast<T>(0), val));
+ }
+
+ const T alpha; /** Scalar alpha */
+ const ExactType vzero; /** Vector of zeroes. */
+ const ExactType valpha; /** Vector of alphas. */
};
/** Lower-Upper Bounded RELU activation object */
template <typename T, int S>
@@ -223,7 +280,9 @@ struct lubrelu
* @param[in] act_info Activation layer information.
*/
explicit lubrelu(ActivationLayerInfo act_info)
- : valpha(wrapper::vdup_n(static_cast<T>(act_info.a()), ExactTagType{})),
+ : alpha(act_info.a()),
+ beta(act_info.b()),
+ valpha(wrapper::vdup_n(static_cast<T>(act_info.a()), ExactTagType{})),
vbeta(wrapper::vdup_n(static_cast<T>(act_info.b()), ExactTagType{}))
{
}
@@ -237,10 +296,19 @@ struct lubrelu
vval = wrapper::vmin(valpha, wrapper::vmax(vbeta, vval));
}
- /** Vector of alphas. */
- const ExactType valpha;
- /** Vector of betas. */
- const ExactType vbeta;
+ /** Run activation function.
+ *
+ * @param[in] val Scalar value.
+ */
+ void operator()(T &val)
+ {
+ val = std::min(alpha, std::max(beta, val));
+ }
+
+ const T alpha; /**< Scalar alpha */
+ const T beta; /**< Scalar alpha */
+ const ExactType valpha; /** Vector of alphas. */
+ const ExactType vbeta; /** Vector of betas. */
};
} // namespace detail
} // namespace arm_compute