COMPMID-1281 Investigate concatenation for RNN/LSTM NEON

Change-Id: I7f099348a361a6f2d4efb30618f58bd44dd41e6c Signed-off-by: John Kesapides <john.kesapides@arm.com> Reviewed-on: https://review.mlplatform.org/c/712 Reviewed-by: Giuseppe Rossini <giuseppe.rossini@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
author: John Kesapides <john.kesapides@arm.com> 2019-02-04 12:37:29 +0000
committer: Pablo Marquez <pablo.tello@arm.com> 2019-03-14 09:29:15 +0000
commit: 917959c88361e8148696c156453f69c6ae0c95c0 (patch)
tree: fbdadd65904b446edcc275f9e85874d2dfb7f8d3 /arm_compute
parent: 894066de8cc26d1a3aca62dcaa6b30a2a1116028 (diff)
download: ComputeLibrary-917959c88361e8148696c156453f69c6ae0c95c0.tar.gz
3 files changed, 23 insertions, 4 deletions
diff --git a/arm_compute/core/utils/misc/ShapeCalculator.h b/arm_compute/core/utils/misc/ShapeCalculator.h
index b256e73146..9d36405041 100644
--- a/arm_compute/core/utils/misc/ShapeCalculator.h
+++ b/arm_compute/core/utils/misc/ShapeCalculator.h
@@ -1162,6 +1162,11 @@ inline TensorShape extract_shape(T *data)
     return data->info()->tensor_shape();
 }
 
+inline TensorShape extract_shape(const ITensorInfo *data)
+{
+    return data->tensor_shape();
+}
+
 inline TensorShape extract_shape(ITensorInfo *data)
 {
     return data->tensor_shape();
diff --git a/arm_compute/runtime/NEON/functions/NELSTMLayer.h b/arm_compute/runtime/NEON/functions/NELSTMLayer.h
index b98e74d969..f3a1aa7c75 100644
--- a/arm_compute/runtime/NEON/functions/NELSTMLayer.h
+++ b/arm_compute/runtime/NEON/functions/NELSTMLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -132,6 +132,7 @@ public:
 
     // Inherited methods overridden:
     void run() override;
+    void prepare() override;
 
 private:
     MemoryGroup                     _memory_group;
@@ -176,16 +177,20 @@ private:
     NECopyKernel                    _copy_cell_state;
     NECopyKernel                    _copy_output;
     NEWidthConcatenateLayer         _concat_scratch_buffer;
+    NEWidthConcatenateLayer         _concat_inputs_forget_gate;
+    NEWidthConcatenateLayer         _concat_weights_forget_gate;
+    NEWidthConcatenateLayer         _concat_weights_input_gate;
+    NEWidthConcatenateLayer         _concat_weights_output;
     Tensor                          _input_gate_out1;
     Tensor                          _input_gate_out2;
     Tensor                          _input_gate_out3;
     Tensor                          _input_gate_out4;
-    Tensor                          _input_gate_out5;
     Tensor                          _forget_gate_out1;
     Tensor                          _forget_gate_out2;
     Tensor                          _forget_gate_out3;
     Tensor                          _forget_gate_out4;
     Tensor                          _forget_gate_out5;
+    Tensor                          _forget_gate_out6;
     Tensor                          _cell_state_out1;
     Tensor                          _cell_state_out2;
     Tensor                          _cell_state_out3;
@@ -195,7 +200,6 @@ private:
     Tensor                          _output2;
     Tensor                          _output3;
     Tensor                          _output4;
-    Tensor                          _output5;
     Tensor                          _cell_state_activation;
     Tensor                          _output_state1;
     Tensor                          _ones;
@@ -204,6 +208,7 @@ private:
     bool                            _perform_cell_clipping;
     bool                            _has_projection_weights;
     bool                            _perform_projection_clipping;
+    bool                            _is_prepared;
 };
 } // namespace arm_compute
 #endif /* __ARM_COMPUTE_NELSTMLAYER_H__ */
diff --git a/arm_compute/runtime/NEON/functions/NEWidthConcatenateLayer.h b/arm_compute/runtime/NEON/functions/NEWidthConcatenateLayer.h
index e68525fa76..70a81b2788 100644
--- a/arm_compute/runtime/NEON/functions/NEWidthConcatenateLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEWidthConcatenateLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -30,7 +30,10 @@
 
 #include "arm_compute/core/NEON/kernels/NEWidthConcatenateLayerKernel.h"
 
+#include "arm_compute/core/utils/misc/Requires.h"
+
 #include <memory>
+#include <type_traits>
 #include <vector>
 
 namespace arm_compute
@@ -56,6 +59,7 @@ public:
      *                           The first dimension (width) is the sum of the input tensors' widths.
      */
     void configure(std::vector<ITensor *> inputs_vector, ITensor *output);
+    void configure(std::vector<const ITensor *> inputs_vector, ITensor *output);
     /** Static function to check if given info will lead to a valid configuration of @ref NEWidthConcatenateLayer
      *
      * @param[in] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
@@ -67,6 +71,7 @@ public:
      * @return a status
      */
     static Status validate(const std::vector<ITensorInfo *> &inputs_vector, const ITensorInfo *output);
+    static Status validate(const std::vector<const ITensorInfo *> &inputs_vector, const ITensorInfo *output);
 
     // Inherited methods overridden:
     void run() override;
@@ -74,6 +79,10 @@ public:
 private:
     std::unique_ptr<NEWidthConcatenateLayerKernel[]> _concat_kernels_vector;
     unsigned int                                     _num_inputs;
+    template <typename TensorType, REQUIRES_TA(std::is_same<typename std::remove_cv<TensorType>::type, ITensor>::value)>
+    void configure_internal(std::vector<TensorType *> &&inputs_vector, ITensor *output);
+    template <typename TensorInfoType, REQUIRES_TA(std::is_same<typename std::remove_cv<TensorInfoType>::type, ITensorInfo>::value)>
+    static Status validate_internal(const std::vector<TensorInfoType *> &inputs_vector, const ITensorInfo *output);
 };
 } // namespace arm_compute
 #endif /* __ARM_COMPUTE_NEWIDTHCONCATENATELAYER_H__ */
author	John Kesapides <john.kesapides@arm.com>	2019-02-04 12:37:29 +0000
committer	Pablo Marquez <pablo.tello@arm.com>	2019-03-14 09:29:15 +0000
commit	917959c88361e8148696c156453f69c6ae0c95c0 (patch)
tree	fbdadd65904b446edcc275f9e85874d2dfb7f8d3 /arm_compute
parent	894066de8cc26d1a3aca62dcaa6b30a2a1116028 (diff)
download	ComputeLibrary-917959c88361e8148696c156453f69c6ae0c95c0.tar.gz