From 917959c88361e8148696c156453f69c6ae0c95c0 Mon Sep 17 00:00:00 2001
From: John Kesapides <john.kesapides@arm.com>
Date: Mon, 4 Feb 2019 12:37:29 +0000
Subject: COMPMID-1281 Investigate concatenation for RNN/LSTM NEON

Change-Id: I7f099348a361a6f2d4efb30618f58bd44dd41e6c
Signed-off-by: John Kesapides <john.kesapides@arm.com>
Reviewed-on: https://review.mlplatform.org/c/712
Reviewed-by: Giuseppe Rossini <giuseppe.rossini@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
---
 arm_compute/core/utils/misc/ShapeCalculator.h                |  5 +++++
 arm_compute/runtime/NEON/functions/NELSTMLayer.h             | 11 ++++++++---
 arm_compute/runtime/NEON/functions/NEWidthConcatenateLayer.h | 11 ++++++++++-
 3 files changed, 23 insertions(+), 4 deletions(-)

(limited to 'arm_compute')

diff --git a/arm_compute/core/utils/misc/ShapeCalculator.h b/arm_compute/core/utils/misc/ShapeCalculator.h
index b256e73146..9d36405041 100644
--- a/arm_compute/core/utils/misc/ShapeCalculator.h
+++ b/arm_compute/core/utils/misc/ShapeCalculator.h
@@ -1162,6 +1162,11 @@ inline TensorShape extract_shape(T *data)
     return data->info()->tensor_shape();
 }
 
+inline TensorShape extract_shape(const ITensorInfo *data)
+{
+    return data->tensor_shape();
+}
+
 inline TensorShape extract_shape(ITensorInfo *data)
 {
     return data->tensor_shape();
diff --git a/arm_compute/runtime/NEON/functions/NELSTMLayer.h b/arm_compute/runtime/NEON/functions/NELSTMLayer.h
index b98e74d969..f3a1aa7c75 100644
--- a/arm_compute/runtime/NEON/functions/NELSTMLayer.h
+++ b/arm_compute/runtime/NEON/functions/NELSTMLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -132,6 +132,7 @@ public:
 
     // Inherited methods overridden:
     void run() override;
+    void prepare() override;
 
 private:
     MemoryGroup                     _memory_group;
@@ -176,16 +177,20 @@ private:
     NECopyKernel                    _copy_cell_state;
     NECopyKernel                    _copy_output;
     NEWidthConcatenateLayer         _concat_scratch_buffer;
+    NEWidthConcatenateLayer         _concat_inputs_forget_gate;
+    NEWidthConcatenateLayer         _concat_weights_forget_gate;
+    NEWidthConcatenateLayer         _concat_weights_input_gate;
+    NEWidthConcatenateLayer         _concat_weights_output;
     Tensor                          _input_gate_out1;
     Tensor                          _input_gate_out2;
     Tensor                          _input_gate_out3;
     Tensor                          _input_gate_out4;
-    Tensor                          _input_gate_out5;
     Tensor                          _forget_gate_out1;
     Tensor                          _forget_gate_out2;
     Tensor                          _forget_gate_out3;
     Tensor                          _forget_gate_out4;
     Tensor                          _forget_gate_out5;
+    Tensor                          _forget_gate_out6;
     Tensor                          _cell_state_out1;
     Tensor                          _cell_state_out2;
     Tensor                          _cell_state_out3;
@@ -195,7 +200,6 @@ private:
     Tensor                          _output2;
     Tensor                          _output3;
     Tensor                          _output4;
-    Tensor                          _output5;
     Tensor                          _cell_state_activation;
     Tensor                          _output_state1;
     Tensor                          _ones;
@@ -204,6 +208,7 @@ private:
     bool                            _perform_cell_clipping;
     bool                            _has_projection_weights;
     bool                            _perform_projection_clipping;
+    bool                            _is_prepared;
 };
 } // namespace arm_compute
 #endif /* __ARM_COMPUTE_NELSTMLAYER_H__ */
diff --git a/arm_compute/runtime/NEON/functions/NEWidthConcatenateLayer.h b/arm_compute/runtime/NEON/functions/NEWidthConcatenateLayer.h
index e68525fa76..70a81b2788 100644
--- a/arm_compute/runtime/NEON/functions/NEWidthConcatenateLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEWidthConcatenateLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -30,7 +30,10 @@
 
 #include "arm_compute/core/NEON/kernels/NEWidthConcatenateLayerKernel.h"
 
+#include "arm_compute/core/utils/misc/Requires.h"
+
 #include <memory>
+#include <type_traits>
 #include <vector>
 
 namespace arm_compute
@@ -56,6 +59,7 @@ public:
      *                           The first dimension (width) is the sum of the input tensors' widths.
      */
     void configure(std::vector<ITensor *> inputs_vector, ITensor *output);
+    void configure(std::vector<const ITensor *> inputs_vector, ITensor *output);
     /** Static function to check if given info will lead to a valid configuration of @ref NEWidthConcatenateLayer
      *
      * @param[in] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
@@ -67,6 +71,7 @@ public:
      * @return a status
      */
     static Status validate(const std::vector<ITensorInfo *> &inputs_vector, const ITensorInfo *output);
+    static Status validate(const std::vector<const ITensorInfo *> &inputs_vector, const ITensorInfo *output);
 
     // Inherited methods overridden:
     void run() override;
@@ -74,6 +79,10 @@ public:
 private:
     std::unique_ptr<NEWidthConcatenateLayerKernel[]> _concat_kernels_vector;
     unsigned int                                     _num_inputs;
+    template <typename TensorType, REQUIRES_TA(std::is_same<typename std::remove_cv<TensorType>::type, ITensor>::value)>
+    void configure_internal(std::vector<TensorType *> &&inputs_vector, ITensor *output);
+    template <typename TensorInfoType, REQUIRES_TA(std::is_same<typename std::remove_cv<TensorInfoType>::type, ITensorInfo>::value)>
+    static Status validate_internal(const std::vector<TensorInfoType *> &inputs_vector, const ITensorInfo *output);
 };
 } // namespace arm_compute
 #endif /* __ARM_COMPUTE_NEWIDTHCONCATENATELAYER_H__ */
-- 
cgit v1.2.1