From 173ba9bbb19ea83f951318d9989e440768b4de8f Mon Sep 17 00:00:00 2001
From: Michalis Spyrou <michalis.spyrou@arm.com>
Date: Tue, 23 Jun 2020 17:25:43 +0100
Subject: COMPMID-3373: Async support to NEArithmetic* kernels/functions (Pt.
 1)

Added support on NEArithmeticAddition and NEArithmeticSubtraction

Signed-off-by: Michalis Spyrou <michalis.spyrou@arm.com>
Change-Id: Ifa805f8455ef6eff1ee627752dc1c7fe9740ec47
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3451
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
---
 arm_compute/runtime/NEON/functions/NEQLSTMLayer.h | 38 +++++++++++++----------
 1 file changed, 22 insertions(+), 16 deletions(-)

(limited to 'arm_compute/runtime/NEON/functions/NEQLSTMLayer.h')

diff --git a/arm_compute/runtime/NEON/functions/NEQLSTMLayer.h b/arm_compute/runtime/NEON/functions/NEQLSTMLayer.h
index d1cc962940..60c8fa1226 100644
--- a/arm_compute/runtime/NEON/functions/NEQLSTMLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEQLSTMLayer.h
@@ -24,14 +24,14 @@
 #ifndef ARM_COMPUTE_NEQLSTMLAYER_H
 #define ARM_COMPUTE_NEQLSTMLAYER_H
 
-#include "arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h"
-#include "arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h"
 #include "arm_compute/core/NEON/kernels/NECopyKernel.h"
 #include "arm_compute/core/NEON/kernels/NEGEMMLowpReductionKernel.h"
 #include "arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h"
 #include "arm_compute/core/NEON/kernels/NEQLSTMLayerNormalizationKernel.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/NEON/functions/NEActivationLayer.h"
+#include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h"
+#include "arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h"
 #include "arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h"
 #include "arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h"
 #include "arm_compute/runtime/NEON/functions/NETranspose.h"
@@ -48,7 +48,7 @@ class ITensor;
  * This function calls the following NEON functions/kernels:
  *
  * -# @ref NEActivationLayer                                     Activation functions (tanh and logistic)
- * -# @ref NEArithmeticAdditionKernel                            Elementwise addition
+ * -# @ref NEArithmeticAddition                            Elementwise addition
  * -# @ref NEArithmeticSubtractionKernel                         Elementwise subtraction
  * -# @ref NECopyKernel                                          Copy kernel for copying output_state_out to output
  * -# @ref NEGEMMLowpMatrixMultiplyCore                          Quantized matrix multiplication core. Accumulators are 32-bit integers
@@ -254,51 +254,51 @@ private:
     NEGEMMLowpMatrixAReductionKernel _input_to_output_reduction{};
     NEGEMMLowpMatrixAReductionKernel _recurrent_to_output_reduction{};
     NEGEMMLowpMatrixAReductionKernel _projection_reduction{};
-    NEArithmeticAdditionKernel       _projection_bias_add{};
+    NEArithmeticAddition             _projection_bias_add{};
     NEGEMMLowpMatrixMultiplyCore     _mm_input_to_forget{};
     NEGEMMLowpMatrixMultiplyCore     _mm_recurrent_to_forget{};
     NEPixelWiseMultiplicationKernel  _pixelwise_mul_cell_to_forget{};
     NEGEMMLowpOutputStage            _input_to_forget_outstage{};
     NEGEMMLowpOutputStage            _recurrent_to_forget_outstage{};
     NEGEMMLowpOutputStage            _cell_to_forget_outstage{};
-    NEArithmeticAdditionKernel       _accumulate_input_recurrent_forget{};
-    NEArithmeticAdditionKernel       _accumulate_cell_forget{};
+    NEArithmeticAddition             _accumulate_input_recurrent_forget{};
+    NEArithmeticAddition             _accumulate_cell_forget{};
     NEActivationLayer                _forget_gate_sigmoid{};
     NEGEMMLowpMatrixMultiplyCore     _mm_input_to_cell{};
     NEGEMMLowpOutputStage            _input_to_cell_outstage{};
     NEGEMMLowpMatrixMultiplyCore     _mm_recurrent_to_cell{};
     NEGEMMLowpOutputStage            _recurrent_to_cell_outstage{};
-    NEArithmeticAdditionKernel       _accumulate_input_recurrent_modulation{};
+    NEArithmeticAddition             _accumulate_input_recurrent_modulation{};
     NEActivationLayer                _cell_gate_tanh{};
-    NEArithmeticSubtractionKernel    _input_gate_sub{};
+    NEArithmeticSubtraction          _input_gate_sub{};
     NEGEMMLowpMatrixMultiplyCore     _mm_input_to_input{};
     NEGEMMLowpOutputStage            _input_to_input_outstage{};
     NEGEMMLowpMatrixMultiplyCore     _mm_recurrent_to_input{};
     NEGEMMLowpOutputStage            _recurrent_to_input_outstage{};
-    NEArithmeticAdditionKernel       _accumulate_input_recurrent_input{};
+    NEArithmeticAddition             _accumulate_input_recurrent_input{};
     NEPixelWiseMultiplicationKernel  _pixelwise_mul_cell_to_input{};
     NEGEMMLowpOutputStage            _cell_to_input_outstage{};
-    NEArithmeticAdditionKernel       _accumulate_cell_input{};
+    NEArithmeticAddition             _accumulate_cell_input{};
     NEActivationLayer                _input_gate_sigmoid{};
     NEPixelWiseMultiplicationKernel  _pixelwise_mul_forget_cell{};
     NEPixelWiseMultiplicationKernel  _pixelwise_mul_input_cell{};
-    NEArithmeticAdditionKernel       _add_forget_cell{};
+    NEArithmeticAddition             _add_forget_cell{};
     NEActivationLayer                _cell_clip{};
     NEGEMMLowpMatrixMultiplyCore     _mm_input_to_output{};
     NEGEMMLowpOutputStage            _input_to_output_outstage{};
     NEGEMMLowpMatrixMultiplyCore     _mm_recurrent_to_output{};
     NEGEMMLowpOutputStage            _recurrent_to_output_outstage{};
-    NEArithmeticAdditionKernel       _accumulate_input_recurrent_output{};
+    NEArithmeticAddition             _accumulate_input_recurrent_output{};
     NEPixelWiseMultiplicationKernel  _pixelwise_mul_cell_to_output{};
     NEGEMMLowpOutputStage            _cell_to_output_outstage{};
-    NEArithmeticAdditionKernel       _accumulate_cell_to_output{};
+    NEArithmeticAddition             _accumulate_cell_to_output{};
     NEActivationLayer                _output_gate_sigmoid{};
     NEActivationLayer                _hidden_tanh{};
     NEPixelWiseMultiplicationKernel  _pixelwise_mul_hidden{};
     NEGEMMLowpOutputStage            _hidden_outstage{};
     NEGEMMLowpMatrixMultiplyCore     _mm_projection{};
     NEGEMMLowpOutputStage            _projection_outstage{};
-    NEArithmeticAdditionKernel       _accumulate_projection{};
+    NEArithmeticAddition             _accumulate_projection{};
     NEActivationLayer                _projection_clip{};
 
     TensorCopyKernel _projection_bias_copy{};
@@ -311,7 +311,10 @@ private:
     NECopyKernel _copy_output{};
 
     // Tensor pointers
-    const ITensor *_input_to_input_weights{ nullptr };
+    const ITensor *_input_to_input_weights
+    {
+        nullptr
+    };
     const ITensor *_recurrent_to_input_weights{ nullptr };
     const ITensor *_projection_bias{ nullptr };
     const ITensor *_input_to_forget_weights{ nullptr };
@@ -370,7 +373,10 @@ private:
     {
         // Output quantization scale will be different, but ignored here
         // since it will be configured at configure() stage.
-        const TensorInfo out{ in };
+        const TensorInfo out
+        {
+            in
+        };
         return NEQLSTMLayerNormalizationKernel::validate(&in, &out, &weight, &bias);
     }
 
-- 
cgit v1.2.1