From 6b0bf9972975aff01e42e5790f7b7c98cd835afb Mon Sep 17 00:00:00 2001
From: Sang-Hoon Park <sang-hoon.park@arm.com>
Date: Wed, 17 Feb 2021 13:12:53 +0000
Subject: Add CpuPRelu operators

Implements: COMPMID-4184

Change-Id: I252168b460a18f837a26df5641664e95ddbd9c7e
Signed-off-by: Sang-Hoon Park <sang-hoon.park@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5237
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Manuel Bottini <manuel.bottini@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
---
 arm_compute/runtime/NEON/functions/NEPReluLayer.h | 31 +--------
 src/runtime/NEON/functions/NEPReluLayer.cpp       | 29 +++-----
 src/runtime/cpu/operators/CpuElementwise.cpp      | 37 +++-------
 src/runtime/cpu/operators/CpuElementwise.h        | 84 ++++++-----------------
 src/runtime/cpu/operators/CpuPRelu.h              | 38 ++++++++++
 5 files changed, 78 insertions(+), 141 deletions(-)
 create mode 100644 src/runtime/cpu/operators/CpuPRelu.h
diff --git a/arm_compute/runtime/NEON/functions/NEPReluLayer.h b/arm_compute/runtime/NEON/functions/NEPReluLayer.h
index 12ffb8da7b..b07febfe7f 100644
--- a/arm_compute/runtime/NEON/functions/NEPReluLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEPReluLayer.h
@@ -26,41 +26,14 @@
 
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/runtime/NEON/INEOperator.h"
+
+#include <memory>
 
 namespace arm_compute
 {
 class ITensor;
 class ITensorInfo;
 
-namespace experimental
-{
-/** Basic function to run @ref cpu::kernels::CpuArithmeticKernel for PRELU
- *
- * @note The function implements an activation layer with the PRELU activation function.
- */
-class NEPRelu : public INEOperator
-{
-public:
-    /** Set the input and output tensor.
-     *
-     * @param[in]  input  Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
-     * @param[in]  alpha  Source alpha tensor info. Data types supported: same of @p input.
-     * @param[out] output Destination tensor info. Data type supported: same as @p input
-     */
-    void configure(const ITensorInfo *input, const ITensorInfo *alpha, ITensorInfo *output);
-    /** Static function to check if given info will lead to a valid configuration of @ref cpu::kernels::CpuComparisonKernel
-     *
-     * @param[in] input  Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
-     * @param[in] alpha  Source alpha tensor info. Data types supported: same of @p input.
-     * @param[in] output Destination tensor info. Data type supported: same as @p input
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *alpha, const ITensorInfo *output);
-};
-} // namespace experimental
-
 /** Basic function to run @ref cpu::kernels::CpuArithmeticKernel for PRELU
  *
  * @note The function implements an activation layer with the PRELU activation function.
diff --git a/src/runtime/NEON/functions/NEPReluLayer.cpp b/src/runtime/NEON/functions/NEPReluLayer.cpp
index d79235747b..a05b545e9a 100644
--- a/src/runtime/NEON/functions/NEPReluLayer.cpp
+++ b/src/runtime/NEON/functions/NEPReluLayer.cpp
@@ -24,31 +24,18 @@
 #include "arm_compute/runtime/NEON/functions/NEPReluLayer.h"
 
 #include "arm_compute/core/ITensor.h"
-#include "src/core/cpu/kernels/CpuElementwiseKernel.h"
+#include "src/runtime/cpu/operators/CpuPRelu.h"
 
 namespace arm_compute
 {
-namespace experimental
-{
-void NEPRelu::configure(const ITensorInfo *input, const ITensorInfo *alpha, ITensorInfo *output)
-{
-    auto k = std::make_unique<cpu::kernels::CpuArithmeticKernel>();
-    k->configure(ArithmeticOperation::PRELU, input, alpha, output);
-    _kernel = std::move(k);
-}
-
-Status NEPRelu::validate(const ITensorInfo *input, const ITensorInfo *alpha, const ITensorInfo *output)
-{
-    return cpu::kernels::CpuArithmeticKernel::validate(ArithmeticOperation::PRELU, input, alpha, output);
-}
-} // nsamespace experimental
+using OperatorType = cpu::CpuPRelu;
 
 struct NEPReluLayer::Impl
 {
-    const ITensor                         *src_0{ nullptr };
-    const ITensor                         *src_1{ nullptr };
-    ITensor                               *dst{ nullptr };
-    std::unique_ptr<experimental::NEPRelu> op{ nullptr };
+    const ITensor                *src_0{ nullptr };
+    const ITensor                *src_1{ nullptr };
+    ITensor                      *dst{ nullptr };
+    std::unique_ptr<OperatorType> op{ nullptr };
 };
 
 NEPReluLayer::NEPReluLayer()
@@ -64,7 +51,7 @@ void NEPReluLayer::configure(const ITensor *input, const ITensor *alpha, ITensor
     _impl->src_0 = input;
     _impl->src_1 = alpha;
     _impl->dst   = output;
-    _impl->op    = std::make_unique<experimental::NEPRelu>();
+    _impl->op    = std::make_unique<OperatorType>();
     _impl->op->configure(input->info(), alpha->info(), output->info());
 }
 
@@ -79,6 +66,6 @@ void NEPReluLayer::run()
 
 Status NEPReluLayer::validate(const ITensorInfo *input, const ITensorInfo *alpha, const ITensorInfo *output)
 {
-    return experimental::NEPRelu::validate(input, alpha, output);
+    return OperatorType::validate(input, alpha, output);
 }
 } // namespace arm_compute
diff --git a/src/runtime/cpu/operators/CpuElementwise.cpp b/src/runtime/cpu/operators/CpuElementwise.cpp
index 5457825bd5..8953d4769c 100644
--- a/src/runtime/cpu/operators/CpuElementwise.cpp
+++ b/src/runtime/cpu/operators/CpuElementwise.cpp
@@ -44,41 +44,24 @@ void CpuElementwiseBase::run(ITensorPack &tensors)
     ICpuOperator::run(tensors, shape_and_window.second);
 }
 
-void CpuElementwiseMax::configure(const ITensorInfo *src0, const ITensorInfo *src1, ITensorInfo *dst)
+template <ArithmeticOperation op>
+void CpuElementwiseArithmetic<op>::configure(const ITensorInfo *src0, const ITensorInfo *src1, ITensorInfo *dst)
 {
     auto k = std::make_unique<kernels::CpuArithmeticKernel>();
-    k->configure(ArithmeticOperation::MAX, src0, src1, dst);
-    _kernel = std::move(k);
-}
-
-Status CpuElementwiseMax::validate(const ITensorInfo *src0, const ITensorInfo *src1, const ITensorInfo *dst)
-{
-    return kernels::CpuArithmeticKernel::validate(ArithmeticOperation::MAX, src0, src1, dst);
-}
-
-void CpuElementwiseMin::configure(const ITensorInfo *src0, const ITensorInfo *src1, ITensorInfo *dst)
-{
-    auto k = std::make_unique<kernels::CpuArithmeticKernel>();
-    k->configure(ArithmeticOperation::MIN, src0, src1, dst);
+    k->configure(op, src0, src1, dst);
     _kernel = std::move(k);
 }
 
-Status CpuElementwiseMin::validate(const ITensorInfo *src0, const ITensorInfo *src1, const ITensorInfo *dst)
-{
-    return kernels::CpuArithmeticKernel::validate(ArithmeticOperation::MIN, src0, src1, dst);
-}
-
-void CpuElementwiseSquaredDiff::configure(const ITensorInfo *src0, const ITensorInfo *src1, ITensorInfo *dst)
+template <ArithmeticOperation op>
+Status CpuElementwiseArithmetic<op>::validate(const ITensorInfo *src0, const ITensorInfo *src1, const ITensorInfo *dst)
 {
-    auto k = std::make_unique<kernels::CpuArithmeticKernel>();
-    k->configure(ArithmeticOperation::SQUARED_DIFF, src0, src1, dst);
-    _kernel = std::move(k);
+    return kernels::CpuArithmeticKernel::validate(op, src0, src1, dst);
 }
 
-Status CpuElementwiseSquaredDiff::validate(const ITensorInfo *src0, const ITensorInfo *src1, const ITensorInfo *dst)
-{
-    return kernels::CpuArithmeticKernel::validate(ArithmeticOperation::SQUARED_DIFF, src0, src1, dst);
-}
+template class CpuElementwiseArithmetic<ArithmeticOperation::MAX>;
+template class CpuElementwiseArithmetic<ArithmeticOperation::MIN>;
+template class CpuElementwiseArithmetic<ArithmeticOperation::SQUARED_DIFF>;
+template class CpuElementwiseArithmetic<ArithmeticOperation::PRELU>;
 
 void CpuElementwiseDivision::configure(const ITensorInfo *src0, const ITensorInfo *src1, ITensorInfo *dst)
 {
diff --git a/src/runtime/cpu/operators/CpuElementwise.h b/src/runtime/cpu/operators/CpuElementwise.h
index 4b350d5f9f..899a2ffdb7 100644
--- a/src/runtime/cpu/operators/CpuElementwise.h
+++ b/src/runtime/cpu/operators/CpuElementwise.h
@@ -36,83 +36,39 @@ public:
     // Inherited methods overridden:
     void run(ITensorPack &tensors) override;
 };
-/** Basic function to run @ref cpu::kernels::CpuArithmeticKernel for max
+/** Class to run @ref cpu::kernels::CpuArithmeticKernel except for division and power
  *
- * @note The tensor data type for the inputs must be QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
- * @note The function performs a max operation between two tensors.
- */
-class CpuElementwiseMax : public CpuElementwiseBase
-{
-public:
-    /** Initialise the kernel's inputs, dst and conversion policy.
-     *
-     * @param[in, out] src0 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
-     * @param[in, out] src1 Second tensor input info. Data types supported: Same as @p src0.
-     * @param[out]     dst  Output tensor info. Data types supported: Same as @p src0.
-     */
-    void configure(const ITensorInfo *src0, const ITensorInfo *src1, ITensorInfo *dst);
-    /** Static function to check if given info will lead to a valid configuration of @ref cpu::kernels::CpuArithmeticKernel for max
-     *
-     * @param[in] src0 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
-     * @param[in] src1 Second tensor input info. Data types supported: Same as @p src0.
-     * @param[in] dst  Output tensor info. Data types supported: Same as @p src0.
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *src0, const ITensorInfo *src1, const ITensorInfo *dst);
-};
-
-/** Basic function to run @ref cpu::kernels::CpuArithmeticKernel for min
- *
- * @note The tensor data type for the inputs must be QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
- * @note The function performs a min operation between two tensors.
+ * @note Max/Min/Squared difference supports input data type of QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32
+ * @note PRelu supports inpute data type of QASYMM8/QASYMM8_SIGNED/F16/F32.
  */
-class CpuElementwiseMin : public CpuElementwiseBase
+template <ArithmeticOperation op>
+class CpuElementwiseArithmetic : public CpuElementwiseBase
 {
 public:
-    /** Initialise the kernel's inputs, dst and conversion policy.
+    /** Configure the operator
      *
-     * @param[in, out] src0 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
-     * @param[in, out] src1 Second tensor input info. Data types supported: Same as @p src0.
-     * @param[out]     dst  Output tensor info. Data types supported: Same as @p src0.
+     * @param[in]  src0 The first source tensor information.
+     * @param[in]  src1 The second source tensor information. With PRelu, this is used as alpha tensor.
+     * @param[out] dst  The output tensor information.
      */
     void configure(const ITensorInfo *src0, const ITensorInfo *src1, ITensorInfo *dst);
-    /** Static function to check if given info will lead to a valid configuration of @ref cpu::kernels::CpuArithmeticKernel for min
+    /** Static function to check if the given information will lead to a valid configuration
      *
-     * @param[in] src0 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
-     * @param[in] src1 Second tensor input info. Data types supported: Same as @p src0.
-     * @param[in] dst  Output tensor info. Data types supported: Same as @p src0.
+     * @param[in]  src0 The first source tensor information.
+     * @param[in]  src1 The second source tensor information. With PRelu, this is used as alpha tensor.
+     * @param[out] dst  The output tensor information.
      *
-     * @return a status
+     * @return A status
      */
     static Status validate(const ITensorInfo *src0, const ITensorInfo *src1, const ITensorInfo *dst);
 };
 
-/** Basic function to run @ref cpu::kernels::CpuArithmeticKernel for squared difference
- *
- * @note The tensor data type for the inputs must be QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
- * @note The function performs a squared different operation between two tensors (i.e., out[i] = (in1[i] - in2[i])^2
- */
-class CpuElementwiseSquaredDiff : public CpuElementwiseBase
-{
-public:
-    /** Initialise the kernel's inputs, dst and conversion policy.
-     *
-     * @param[in, out] src0 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
-     * @param[in, out] src1 Second tensor input info. Data types supported: Same as @p src0.
-     * @param[out]     dst  Output tensor info. Data types supported: Same as @p src0.
-     */
-    void configure(const ITensorInfo *src0, const ITensorInfo *src1, ITensorInfo *dst);
-    /** Static function to check if given info will lead to a valid configuration of @ref cpu::kernels::CpuArithmeticKernel for squared difference
-     *
-     * @param[in] src0 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
-     * @param[in] src1 Second tensor input info. Data types supported: Same as @p src0.
-     * @param[in] dst  Output tensor info. Data types supported: Same as @p src0.
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *src0, const ITensorInfo *src1, const ITensorInfo *dst);
-};
+/** Class to run @ref cpu::kernels::CpuArithmeticKernel except for maximum operation */
+using CpuElementwiseMax = CpuElementwiseArithmetic<ArithmeticOperation::MAX>;
+/** Class to run @ref cpu::kernels::CpuArithmeticKernel except for minimum operation */
+using CpuElementwiseMin = CpuElementwiseArithmetic<ArithmeticOperation::MIN>;
+/** Class to run @ref cpu::kernels::CpuArithmeticKernel except for squared difference operation */
+using CpuElementwiseSquaredDiff = CpuElementwiseArithmetic<ArithmeticOperation::SQUARED_DIFF>;
 
 /** Basic function to run @ref cpu::kernels::CpuArithmeticKernel for division
  *
diff --git a/src/runtime/cpu/operators/CpuPRelu.h b/src/runtime/cpu/operators/CpuPRelu.h
new file mode 100644
index 0000000000..a6859f95d9
--- /dev/null
+++ b/src/runtime/cpu/operators/CpuPRelu.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CPU_PRELU_H
+#define ARM_COMPUTE_CPU_PRELU_H
+
+#include "src/runtime/cpu/operators/CpuElementwise.h"
+
+namespace arm_compute
+{
+namespace cpu
+{
+/** Class to run @ref cpu::kernels::CpuArithmeticKernel except for PRelu operation */
+using CpuPRelu = CpuElementwiseArithmetic<ArithmeticOperation::PRELU>;
+} // namespace cpu
+} // namespace arm_compute
+
+#endif /* ARM_COMPUTE_CPU_PRELU_H */
\ No newline at end of file
-- 
cgit v1.2.1