COMPMID-3386: Support memory injection in CLConcatenate functions/kernels

Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com> Change-Id: I611adf4f506d406540e920b0bd6befb4b5108918 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3601 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Michalis Spyrou <michalis.spyrou@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
author: Michele Di Giorgio <michele.digiorgio@arm.com> 2020-07-06 11:27:21 +0100
committer: Georgios Pinitas <georgios.pinitas@arm.com> 2020-07-22 10:35:29 +0000
commit: f932d2c8409831cb9cb97a2eb65be93ad4709cd6 (patch)
tree: 44fd816205d9b908deaff9f21e5018da42e3cd80 /arm_compute
parent: f20d6d6ae5a0da2c856294e93341cdc065db58f9 (diff)
download: ComputeLibrary-f932d2c8409831cb9cb97a2eb65be93ad4709cd6.tar.gz
9 files changed, 89 insertions, 117 deletions
diff --git a/arm_compute/core/CL/kernels/CLBatchConcatenateLayerKernel.h b/arm_compute/core/CL/kernels/CLBatchConcatenateLayerKernel.h
index 737aa79727..890fecf0c9 100644
--- a/arm_compute/core/CL/kernels/CLBatchConcatenateLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLBatchConcatenateLayerKernel.h
@@ -52,17 +52,6 @@ public:
     ~CLBatchConcatenateLayerKernel() = default;
     /** Initialise the kernel's inputs and output
      *
-     * @param[in]     input        Input tensor. Data types supported: All.
-     * @param[in]     batch_offset The offset on axis # 3.
-     * @param[in,out] output       Output tensor. Data types supported: Same as @p input.
-     *
-     * @note: The output tensor's low two dimensions can't be smaller than the input one's.
-     * @note: The gaps between the two lowest dimensions of input and output need to be divisible by 2.
-     *
-     */
-    void configure(const ICLTensor *input, unsigned int batch_offset, ICLTensor *output);
-    /** Initialise the kernel's inputs and output
-     *
      * @param[in]     compile_context The compile context to be used.
      * @param[in]     input           Input tensor. Data types supported: All.
      * @param[in]     batch_offset    The offset on axis # 3.
@@ -72,7 +61,7 @@ public:
      * @note: The gaps between the two lowest dimensions of input and output need to be divisible by 2.
      *
      */
-    void configure(const CLCompileContext &compile_context, const ICLTensor *input, unsigned int batch_offset, ICLTensor *output);
+    void configure(const CLCompileContext &compile_context, ITensorInfo *input, unsigned int batch_offset, ITensorInfo *output);
     /**  Static function to check if given info will lead to a valid configuration of @ref CLBatchConcatenateLayerKernel
      *
      * @param[in] input        Input tensor info. Data types supported: All.
@@ -84,12 +73,11 @@ public:
     static Status validate(const ITensorInfo *input, unsigned int batch_offset, const ITensorInfo *output);
 
     // Inherited methods overridden:
-    void run(const Window &window, cl::CommandQueue &queue) override;
+    void run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs,
+                const Window &window, cl::CommandQueue &queue) override;
 
 private:
-    const ICLTensor *_input;
-    ICLTensor       *_output;
-    unsigned int     _batch_offset;
+    unsigned int _batch_offset;
 };
 } // namespace arm_compute
 #endif /* ARM_COMPUTE_CLBATCHCONCATENATEKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLDepthConcatenateLayerKernel.h b/arm_compute/core/CL/kernels/CLDepthConcatenateLayerKernel.h
index f9a6bb947d..ad365d655b 100644
--- a/arm_compute/core/CL/kernels/CLDepthConcatenateLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLDepthConcatenateLayerKernel.h
@@ -30,8 +30,6 @@
 
 namespace arm_compute
 {
-class ICLTensor;
-
 /** Interface for the depth concatenate kernel.
  *  The input tensor will be concatenated into the output tensor.
  */
@@ -52,17 +50,6 @@ public:
     ~CLDepthConcatenateLayerKernel() = default;
     /** Initialise the kernel's inputs and output
      *
-     * @param[in]     input        Input tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
-     * @param[in]     depth_offset The offset on the Z axis.
-     * @param[in,out] output       Output tensor. Data types supported: Same as @p input.
-     *
-     * @note: The output tensor's low two dimensions can't be smaller than the input one's.
-     * @note: The gaps between the two lowest dimensions of input and output need to be divisible by 2.
-     *
-     */
-    void configure(const ICLTensor *input, unsigned int depth_offset, ICLTensor *output);
-    /** Initialise the kernel's inputs and output
-     *
      * @param[in]     compile_context The compile context to be used.
      * @param[in]     input           Input tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
      * @param[in]     depth_offset    The offset on the Z axis.
@@ -72,7 +59,7 @@ public:
      * @note: The gaps between the two lowest dimensions of input and output need to be divisible by 2.
      *
      */
-    void configure(const CLCompileContext &compile_context, const ICLTensor *input, unsigned int depth_offset, ICLTensor *output);
+    void configure(const CLCompileContext &compile_context, ITensorInfo *input, unsigned int depth_offset, ITensorInfo *output);
     /**  Static function to check if given info will lead to a valid configuration of @ref CLDepthConcatenateLayerKernel
      *
      * @param[in] input        Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32
@@ -84,12 +71,11 @@ public:
     static Status validate(const ITensorInfo *input, unsigned int depth_offset, const ITensorInfo *output);
 
     // Inherited methods overridden:
-    void run(const Window &window, cl::CommandQueue &queue) override;
+    void run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs,
+                const Window &window, cl::CommandQueue &queue) override;
 
 private:
-    const ICLTensor *_input;
-    ICLTensor       *_output;
-    unsigned int     _depth_offset;
+    unsigned int _depth_offset;
 };
 } // namespace arm_compute
 #endif /* ARM_COMPUTE_CLDEPTHCONCATENATEKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLHeightConcatenateLayerKernel.h b/arm_compute/core/CL/kernels/CLHeightConcatenateLayerKernel.h
index c1b7d6be77..0563fad414 100644
--- a/arm_compute/core/CL/kernels/CLHeightConcatenateLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLHeightConcatenateLayerKernel.h
@@ -30,8 +30,6 @@
 
 namespace arm_compute
 {
-class ICLTensor;
-
 /** Interface for the height concatenate kernel.
  *  The input tensor will be concatenated into the output tensor.
  */
@@ -52,21 +50,13 @@ public:
     ~CLHeightConcatenateLayerKernel() = default;
     /** Initialise the kernel's inputs and output
      *
-     * @param[in]  input         Input tensor. Data types supported: All.
-     * @param[in]  height_offset The starting offset on the Y axis for the output tensor.
-     * @param[out] output        Output tensor. Data types supported: Same as @p input.
-     *
-     */
-    void configure(const ICLTensor *input, unsigned int height_offset, ICLTensor *output);
-    /** Initialise the kernel's inputs and output
-     *
      * @param[in]  compile_context The compile context to be used.
      * @param[in]  input           Input tensor. Data types supported: All.
      * @param[in]  height_offset   The starting offset on the Y axis for the output tensor.
      * @param[out] output          Output tensor. Data types supported: Same as @p input.
      *
      */
-    void configure(const CLCompileContext &compile_context, const ICLTensor *input, unsigned int height_offset, ICLTensor *output);
+    void configure(const CLCompileContext &compile_context, ITensorInfo *input, unsigned int height_offset, ITensorInfo *output);
     /**  Static function to check if given info will lead to a valid configuration of @ref CLHeightConcatenateLayerKernel
      *
      * @param[in] input         Input tensor info. Data types supported: All.
@@ -78,13 +68,12 @@ public:
     static Status validate(const ITensorInfo *input, unsigned int height_offset, const ITensorInfo *output);
 
     // Inherited methods overridden:
-    void run(const Window &window, cl::CommandQueue &queue) override;
+    void run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs,
+                const Window &window, cl::CommandQueue &queue) override;
 
 private:
-    const ICLTensor *_input;
-    ICLTensor       *_output;
-    unsigned int     _height_offset;
-    unsigned int     _num_elems_processed_per_iteration;
+    unsigned int _height_offset;
+    unsigned int _num_elems_processed_per_iteration;
 };
 } // namespace arm_compute
 #endif /* ARM_COMPUTE_CLHEIGHTCONCATENATELAYERKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h b/arm_compute/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h
index e9b3546439..aaedaa90ee 100644
--- a/arm_compute/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h
+++ b/arm_compute/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h
@@ -30,8 +30,6 @@
 
 namespace arm_compute
 {
-class ICLTensor;
-
 /** Interface for the width concatenate kernel of 2 tensors.
  *  The input1 and input2 tensors will be concatenated into the output tensor.
  */
@@ -39,7 +37,7 @@ class CLWidthConcatenate2TensorsKernel : public ICLKernel
 {
 public:
     /** Default constructor */
-    CLWidthConcatenate2TensorsKernel();
+    CLWidthConcatenate2TensorsKernel() = default;
     /** Prevent instances of this class from being copied (As this class contains pointers) */
     CLWidthConcatenate2TensorsKernel(const CLWidthConcatenate2TensorsKernel &) = delete;
     /** Prevent instances of this class from being copied (As this class contains pointers) */
@@ -52,19 +50,12 @@ public:
     ~CLWidthConcatenate2TensorsKernel() = default;
     /** Initialise the kernel's input1s and output
      *
-     * @param[in]  input1 First input tensor. Data types supported: All.
-     * @param[in]  input2 Second input tensor. Data types supported: same as @p input1
-     * @param[out] output Output tensor. Data types supported: Same as @p input1.
-     */
-    void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output);
-    /** Initialise the kernel's input1s and output
-     *
      * @param[in]  compile_context The compile context to be used.
      * @param[in]  input1          First input tensor. Data types supported: All.
      * @param[in]  input2          Second input tensor. Data types supported: same as @p input1
      * @param[out] output          Output tensor. Data types supported: Same as @p input1.
      */
-    void configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output);
+    void configure(const CLCompileContext &compile_context, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output);
     /**  Static function to check if given info will lead to a valid configuration of @ref CLWidthConcatenate2TensorsKernel
      *
      * @param[in] input1 First tensor info. Data types supported: All.
@@ -76,12 +67,8 @@ public:
     static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output);
 
     // Inherited methods overridden:
-    void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
-    const ICLTensor *_input1;
-    const ICLTensor *_input2;
-    ICLTensor       *_output;
+    void run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs,
+                const Window &window, cl::CommandQueue &queue) override;
 };
 } // namespace arm_compute
 #endif /* ARM_COMPUTE_CLWIDTHCONCATENATE_2TENSORS_KERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h b/arm_compute/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h
index d3845b68f0..b1d6e890a0 100644
--- a/arm_compute/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h
+++ b/arm_compute/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h
@@ -30,8 +30,6 @@
 
 namespace arm_compute
 {
-class ICLTensor;
-
 /** Interface for the width concatenate kernel of 4 tensors.
  *  All input tensors will be concatenated into the output tensor.
  */
@@ -52,15 +50,6 @@ public:
     ~CLWidthConcatenate4TensorsKernel() = default;
     /** Initialise the kernel's input1s and output
      *
-     * @param[in]  input1 First input tensor. Data types supported: All.
-     * @param[in]  input2 Second input tensor. Data types supported: same as @p input1
-     * @param[in]  input3 Third input tensor. Data types supported: same as @p input1
-     * @param[in]  input4 Fourth input tensor. Data types supported: same as @p input1
-     * @param[out] output Output tensor. Data types supported: Same as @p input1.
-     */
-    void configure(const ICLTensor *input1, const ICLTensor *input2, const ICLTensor *input3, const ICLTensor *input4, ICLTensor *output);
-    /** Initialise the kernel's input1s and output
-     *
      * @param[in]  compile_context The compile context to be used.
      * @param[in]  input1          First input tensor. Data types supported: All.
      * @param[in]  input2          Second input tensor. Data types supported: same as @p input1
@@ -68,7 +57,7 @@ public:
      * @param[in]  input4          Fourth input tensor. Data types supported: same as @p input1
      * @param[out] output          Output tensor. Data types supported: Same as @p input1.
      */
-    void configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, const ICLTensor *input3, const ICLTensor *input4, ICLTensor *output);
+    void configure(const CLCompileContext &compile_context, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *input3, ITensorInfo *input4, ITensorInfo *output);
     /**  Static function to check if given info will lead to a valid configuration of @ref CLWidthConcatenate4TensorsKernel
      *
      * @param[in] input1 First tensor info. Data types supported: All.
@@ -82,14 +71,8 @@ public:
     static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *input3, const ITensorInfo *input4, const ITensorInfo *output);
 
     // Inherited methods overridden:
-    void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
-    const ICLTensor *_input1;
-    const ICLTensor *_input2;
-    const ICLTensor *_input3;
-    const ICLTensor *_input4;
-    ICLTensor       *_output;
+    void run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs,
+                const Window &window, cl::CommandQueue &queue) override;
 };
 } // namespace arm_compute
 #endif /* ARM_COMPUTE_CLWIDTHCONCATENATE_4TENSORS_KERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLWidthConcatenateLayerKernel.h b/arm_compute/core/CL/kernels/CLWidthConcatenateLayerKernel.h
index a42ab5126e..cb8777d5fd 100644
--- a/arm_compute/core/CL/kernels/CLWidthConcatenateLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLWidthConcatenateLayerKernel.h
@@ -30,8 +30,6 @@
 
 namespace arm_compute
 {
-class ICLTensor;
-
 /** Interface for the width concatenate kernel.
  *  The input tensor will be concatenated into the output tensor.
  */
@@ -52,21 +50,13 @@ public:
     ~CLWidthConcatenateLayerKernel() = default;
     /** Initialise the kernel's inputs and output
      *
-     * @param[in]     input        Input tensor. Data types supported: All.
-     * @param[in]     width_offset The offset on the X axis.
-     * @param[in,out] output       Output tensor. Data types supported: Same as @p input.
-     *
-     */
-    void configure(const ICLTensor *input, unsigned int width_offset, ICLTensor *output);
-    /** Initialise the kernel's inputs and output
-     *
      * @param[in]     compile_context The compile context to be used.
      * @param[in]     input           Input tensor. Data types supported: All.
      * @param[in]     width_offset    The offset on the X axis.
      * @param[in,out] output          Output tensor. Data types supported: Same as @p input.
      *
      */
-    void configure(const CLCompileContext &compile_context, const ICLTensor *input, unsigned int width_offset, ICLTensor *output);
+    void configure(const CLCompileContext &compile_context, ITensorInfo *input, unsigned int width_offset, ITensorInfo *output);
     /**  Static function to check if given info will lead to a valid configuration of @ref CLWidthConcatenateLayerKernel
      *
      * @param[in] input        Input tensor info. Data types supported: All.
@@ -78,12 +68,11 @@ public:
     static Status validate(const ITensorInfo *input, unsigned int width_offset, const ITensorInfo *output);
 
     // Inherited methods overridden:
-    void run(const Window &window, cl::CommandQueue &queue) override;
+    void run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs,
+                const Window &window, cl::CommandQueue &queue) override;
 
 private:
-    const ICLTensor *_input;
-    ICLTensor       *_output;
-    unsigned int     _width_offset;
+    unsigned int _width_offset;
 };
 } // namespace arm_compute
 #endif /* ARM_COMPUTE_CLWIDTHCONCATENATELAYERKERNEL_H */
diff --git a/arm_compute/runtime/CL/ICLOperator.h b/arm_compute/runtime/CL/ICLOperator.h
index 7ad0c8ce0e..c9fdd864de 100644
--- a/arm_compute/runtime/CL/ICLOperator.h
+++ b/arm_compute/runtime/CL/ICLOperator.h
@@ -54,7 +54,7 @@ public:
     ICLOperator &operator=(ICLOperator &&) = default;
 
     // Inherited methods overridden:
-    void run(InputTensorMap inputs, OutputTensorMap outputs, OperatorTensorMap workspace) override final;
+    void run(InputTensorMap inputs, OutputTensorMap outputs, OperatorTensorMap workspace) override;
     void prepare(OperatorTensorMap constants) override final;
 
 protected:
diff --git a/arm_compute/runtime/CL/functions/CLConcatenateLayer.h b/arm_compute/runtime/CL/functions/CLConcatenateLayer.h
index c5869345c7..4e8a95be43 100644
--- a/arm_compute/runtime/CL/functions/CLConcatenateLayer.h
+++ b/arm_compute/runtime/CL/functions/CLConcatenateLayer.h
@@ -24,6 +24,7 @@
 #ifndef ARM_COMPUTE_CLCONCATENATELAYER_H
 #define ARM_COMPUTE_CLCONCATENATELAYER_H
 
+#include "arm_compute/runtime/CL/ICLOperator.h"
 #include "arm_compute/runtime/IFunction.h"
 
 #include "arm_compute/core/CL/ICLKernel.h"
@@ -51,16 +52,25 @@ class CLConcatenateLayer : public IFunction
 public:
     /** Default constructor */
     CLConcatenateLayer();
+    /** Destructor */
+    ~CLConcatenateLayer();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    CLConcatenateLayer(const CLConcatenateLayer &) = delete;
+    /** Default move constructor */
+    CLConcatenateLayer(CLConcatenateLayer &&);
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    CLConcatenateLayer &operator=(const CLConcatenateLayer &) = delete;
+    /** Default move assignment operator */
+    CLConcatenateLayer &operator=(CLConcatenateLayer &&);
     /** Initialise the kernel's inputs vector and output.
      *
      * @note Input and output tensor dimensions preconditions defer depending on the concatenation axis.
      * @note Preconditions can be found respectively at @ref CLWidthConcatenateLayerKernel, @ref CLHeightConcatenateLayerKernel and @ref CLDepthConcatenateLayerKernel.
      *
-     * @param[in,out] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: All.
+     * @param[in,out] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: All
      * @param[out]    output        Output tensor. Data types supported: Same as @p input.
      * @param[in]     axis          Concatenation axis. Supported underlying concatenation axis are 0, 1, 2 and 3.
      */
-    void configure(std::vector<ICLTensor *> &inputs_vector, ICLTensor *output, size_t axis);
     void configure(std::vector<const ICLTensor *> &inputs_vector, ICLTensor *output, size_t axis);
     /** Initialise the kernel's inputs vector and output.
      *
@@ -68,11 +78,10 @@ public:
      * @note Preconditions can be found respectively at @ref CLWidthConcatenateLayerKernel, @ref CLHeightConcatenateLayerKernel and @ref CLDepthConcatenateLayerKernel.
      *
      * @param[in]     compile_context The compile context to be used.
-     * @param[in,out] inputs_vector   The vectors containing all the tensors to concatenate. Data types supported: All.
+     * @param[in,out] inputs_vector   The vectors containing all the tensors to concatenate. Data types supported: All
      * @param[out]    output          Output tensor. Data types supported: Same as @p input.
      * @param[in]     axis            Concatenation axis. Supported underlying concatenation axis are 0, 1, 2 and 3.
      */
-    void configure(const CLCompileContext &compile_context, std::vector<ICLTensor *> &inputs_vector, ICLTensor *output, size_t axis);
     void configure(const CLCompileContext &compile_context, std::vector<const ICLTensor *> &inputs_vector, ICLTensor *output, size_t axis);
     /** Static function to check if given info will lead to a valid configuration of @ref CLConcatenateLayer
      *
@@ -85,22 +94,64 @@ public:
      *
      * @return a status
      */
-    static Status validate(const std::vector<ITensorInfo *> &inputs_vector, const ITensorInfo *output, size_t axis);
     static Status validate(const std::vector<const ITensorInfo *> &inputs_vector, const ITensorInfo *output, size_t axis);
 
     // Inherited methods overridden:
     void run() override;
 
 private:
-    template <typename TensorType>
-    void configure_internal(const CLCompileContext &compile_context, std::vector<TensorType *> &&inputs_vector, ICLTensor *output, size_t axis);
+    struct Impl;
+    std::unique_ptr<Impl> _impl;
+};
+
+namespace experimental
+{
+/** Basic function to execute concatenate tensors along a given axis. This function calls the following kernels:
+ *
+ * -# @ref CLWidthConcatenateLayerKernel (if underlying concatenation axis is 0).
+ * -# @ref CLHeightConcatenateLayerKernel (if underlying concatenation axis is 1).
+ * -# @ref CLDepthConcatenateLayerKernel (if underlying concatenation axis is 2).
+ * -# @ref CLBatchConcatenateLayerKernel (if underlying concatenation axis is 3).
+ */
+class CLConcatenateLayer : public ICLOperator
+{
+public:
+    /** Default constructor */
+    CLConcatenateLayer();
+    /** Initialise the kernel's inputs vector and output.
+     *
+     * @note Input and output tensor dimensions preconditions defer depending on the concatenation axis.
+     * @note Preconditions can be found respectively at @ref CLWidthConcatenateLayerKernel, @ref CLHeightConcatenateLayerKernel and @ref CLDepthConcatenateLayerKernel.
+     *
+     *
+     * @param[in]     compile_context The compile context to be used.
+     * @param[in,out] inputs_vector   The vectors containing all the tensors to concatenate. Data types supported: All
+     * @param[out]    output          Output tensor. Data types supported: Same as @p input.
+     * @param[in]     axis            Concatenation axis. Supported underlying concatenation axis are 0, 1, 2 and 3.
+     */
+    void configure(const CLCompileContext &compile_context, const std::vector<ITensorInfo *> &inputs_vector, ITensorInfo *output, size_t axis);
+    /** Static function to check if given info will lead to a valid configuration of @ref NEConcatenateLayer
+     *
+     * @note Input and output tensor dimensions preconditions defer depending on the concatenation axis.
+     * @note Preconditions can be found respectively at @ref CLWidthConcatenateLayerKernel, @ref CLHeightConcatenateLayerKernel and @ref CLDepthConcatenateLayerKernel.
+     *
+     * @param[in] inputs_vector The vectors containing all the tensors info to concatenate. Data types supported: All
+     * @param[in] output        Output tensor info. Data types supported: Same as @p input.
+     * @param[in] axis          Concatenation axis. Supported underlying concatenation axis are 0, 1, 2 and 3.
+     *
+     * @return a status
+     */
+    static Status validate(const std::vector<const ITensorInfo *> &inputs_vector, const ITensorInfo *output, size_t axis);
 
-    template <typename TensorInfoType>
-    static Status validate_internal(const std::vector<TensorInfoType *> &inputs_vector, const ITensorInfo *output, size_t axis);
+    // Inherited methods overridden:
+    MemoryRequirements workspace() const override;
+    void run(InputTensorMap inputs, OutputTensorMap outputs, OperatorTensorMap workspace) override;
 
+private:
     std::vector<std::unique_ptr<ICLKernel>> _concat_kernels;
     unsigned int                            _num_inputs;
     unsigned int                            _axis;
 };
+} // namespace experimental
 } // namespace arm_compute
 #endif /* ARM_COMPUTE_CLCONCATENATELAYER_H */
diff --git a/arm_compute/runtime/CL/functions/CLLSTMLayer.h b/arm_compute/runtime/CL/functions/CLLSTMLayer.h
index 2e44eed6f6..e5733cd784 100644
--- a/arm_compute/runtime/CL/functions/CLLSTMLayer.h
+++ b/arm_compute/runtime/CL/functions/CLLSTMLayer.h
@@ -30,7 +30,6 @@
 #include "arm_compute/core/CL/kernels/CLElementwiseOperationKernel.h"
 #include "arm_compute/core/CL/kernels/CLMemsetKernel.h"
 #include "arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h"
-#include "arm_compute/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/CL/CLTensor.h"
 #include "arm_compute/runtime/CL/functions/CLActivationLayer.h"
@@ -232,10 +231,10 @@ private:
     CLCopyKernel                         _copy_cell_state;
     CLCopyKernel                         _copy_output;
     CLConcatenateLayer                   _concat_scratch_buffer;
-    CLWidthConcatenate2TensorsKernel     _concat_inputs_forget_gate;
-    CLWidthConcatenate2TensorsKernel     _concat_weights_forget_gate;
-    CLWidthConcatenate2TensorsKernel     _concat_weights_input_gate;
-    CLWidthConcatenate2TensorsKernel     _concat_weights_output;
+    CLConcatenateLayer                   _concat_inputs_forget_gate;
+    CLConcatenateLayer                   _concat_weights_forget_gate;
+    CLConcatenateLayer                   _concat_weights_input_gate;
+    CLConcatenateLayer                   _concat_weights_output;
     CLMemsetKernel                       _ones_memset_kernel;
     CLMeanStdDevNormalizationLayer       _mean_std_norm_input_gate;
     CLPixelWiseMultiplicationKernel      _pixelwise_mul_input_gate_coeff;
author	Michele Di Giorgio <michele.digiorgio@arm.com>	2020-07-06 11:27:21 +0100
committer	Georgios Pinitas <georgios.pinitas@arm.com>	2020-07-22 10:35:29 +0000
commit	f932d2c8409831cb9cb97a2eb65be93ad4709cd6 (patch)
tree	44fd816205d9b908deaff9f21e5018da42e3cd80 /arm_compute
parent	f20d6d6ae5a0da2c856294e93341cdc065db58f9 (diff)
download	ComputeLibrary-f932d2c8409831cb9cb97a2eb65be93ad4709cd6.tar.gz