From f932d2c8409831cb9cb97a2eb65be93ad4709cd6 Mon Sep 17 00:00:00 2001 From: Michele Di Giorgio Date: Mon, 6 Jul 2020 11:27:21 +0100 Subject: COMPMID-3386: Support memory injection in CLConcatenate functions/kernels Signed-off-by: Georgios Pinitas Change-Id: I611adf4f506d406540e920b0bd6befb4b5108918 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3601 Tested-by: Arm Jenkins Reviewed-by: Michalis Spyrou Comments-Addressed: Arm Jenkins --- .../CL/kernels/CLBatchConcatenateLayerKernel.h | 20 ++----- .../CL/kernels/CLDepthConcatenateLayerKernel.h | 22 ++----- .../CL/kernels/CLHeightConcatenateLayerKernel.h | 21 ++----- .../CL/kernels/CLWidthConcatenate2TensorsKernel.h | 21 ++----- .../CL/kernels/CLWidthConcatenate4TensorsKernel.h | 23 +------- .../CL/kernels/CLWidthConcatenateLayerKernel.h | 19 ++---- arm_compute/runtime/CL/ICLOperator.h | 2 +- .../runtime/CL/functions/CLConcatenateLayer.h | 69 +++++++++++++++++++--- arm_compute/runtime/CL/functions/CLLSTMLayer.h | 9 ++- 9 files changed, 89 insertions(+), 117 deletions(-) (limited to 'arm_compute') diff --git a/arm_compute/core/CL/kernels/CLBatchConcatenateLayerKernel.h b/arm_compute/core/CL/kernels/CLBatchConcatenateLayerKernel.h index 737aa79727..890fecf0c9 100644 --- a/arm_compute/core/CL/kernels/CLBatchConcatenateLayerKernel.h +++ b/arm_compute/core/CL/kernels/CLBatchConcatenateLayerKernel.h @@ -50,17 +50,6 @@ public: CLBatchConcatenateLayerKernel &operator=(CLBatchConcatenateLayerKernel &&) = default; /** Default destructor */ ~CLBatchConcatenateLayerKernel() = default; - /** Initialise the kernel's inputs and output - * - * @param[in] input Input tensor. Data types supported: All. - * @param[in] batch_offset The offset on axis # 3. - * @param[in,out] output Output tensor. Data types supported: Same as @p input. - * - * @note: The output tensor's low two dimensions can't be smaller than the input one's. - * @note: The gaps between the two lowest dimensions of input and output need to be divisible by 2. - * - */ - void configure(const ICLTensor *input, unsigned int batch_offset, ICLTensor *output); /** Initialise the kernel's inputs and output * * @param[in] compile_context The compile context to be used. @@ -72,7 +61,7 @@ public: * @note: The gaps between the two lowest dimensions of input and output need to be divisible by 2. * */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, unsigned int batch_offset, ICLTensor *output); + void configure(const CLCompileContext &compile_context, ITensorInfo *input, unsigned int batch_offset, ITensorInfo *output); /** Static function to check if given info will lead to a valid configuration of @ref CLBatchConcatenateLayerKernel * * @param[in] input Input tensor info. Data types supported: All. @@ -84,12 +73,11 @@ public: static Status validate(const ITensorInfo *input, unsigned int batch_offset, const ITensorInfo *output); // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; + void run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, + const Window &window, cl::CommandQueue &queue) override; private: - const ICLTensor *_input; - ICLTensor *_output; - unsigned int _batch_offset; + unsigned int _batch_offset; }; } // namespace arm_compute #endif /* ARM_COMPUTE_CLBATCHCONCATENATEKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLDepthConcatenateLayerKernel.h b/arm_compute/core/CL/kernels/CLDepthConcatenateLayerKernel.h index f9a6bb947d..ad365d655b 100644 --- a/arm_compute/core/CL/kernels/CLDepthConcatenateLayerKernel.h +++ b/arm_compute/core/CL/kernels/CLDepthConcatenateLayerKernel.h @@ -30,8 +30,6 @@ namespace arm_compute { -class ICLTensor; - /** Interface for the depth concatenate kernel. * The input tensor will be concatenated into the output tensor. */ @@ -50,17 +48,6 @@ public: CLDepthConcatenateLayerKernel &operator=(CLDepthConcatenateLayerKernel &&) = default; /** Default destructor */ ~CLDepthConcatenateLayerKernel() = default; - /** Initialise the kernel's inputs and output - * - * @param[in] input Input tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[in] depth_offset The offset on the Z axis. - * @param[in,out] output Output tensor. Data types supported: Same as @p input. - * - * @note: The output tensor's low two dimensions can't be smaller than the input one's. - * @note: The gaps between the two lowest dimensions of input and output need to be divisible by 2. - * - */ - void configure(const ICLTensor *input, unsigned int depth_offset, ICLTensor *output); /** Initialise the kernel's inputs and output * * @param[in] compile_context The compile context to be used. @@ -72,7 +59,7 @@ public: * @note: The gaps between the two lowest dimensions of input and output need to be divisible by 2. * */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, unsigned int depth_offset, ICLTensor *output); + void configure(const CLCompileContext &compile_context, ITensorInfo *input, unsigned int depth_offset, ITensorInfo *output); /** Static function to check if given info will lead to a valid configuration of @ref CLDepthConcatenateLayerKernel * * @param[in] input Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 @@ -84,12 +71,11 @@ public: static Status validate(const ITensorInfo *input, unsigned int depth_offset, const ITensorInfo *output); // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; + void run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, + const Window &window, cl::CommandQueue &queue) override; private: - const ICLTensor *_input; - ICLTensor *_output; - unsigned int _depth_offset; + unsigned int _depth_offset; }; } // namespace arm_compute #endif /* ARM_COMPUTE_CLDEPTHCONCATENATEKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLHeightConcatenateLayerKernel.h b/arm_compute/core/CL/kernels/CLHeightConcatenateLayerKernel.h index c1b7d6be77..0563fad414 100644 --- a/arm_compute/core/CL/kernels/CLHeightConcatenateLayerKernel.h +++ b/arm_compute/core/CL/kernels/CLHeightConcatenateLayerKernel.h @@ -30,8 +30,6 @@ namespace arm_compute { -class ICLTensor; - /** Interface for the height concatenate kernel. * The input tensor will be concatenated into the output tensor. */ @@ -50,14 +48,6 @@ public: CLHeightConcatenateLayerKernel &operator=(CLHeightConcatenateLayerKernel &&) = default; /** Default destructor */ ~CLHeightConcatenateLayerKernel() = default; - /** Initialise the kernel's inputs and output - * - * @param[in] input Input tensor. Data types supported: All. - * @param[in] height_offset The starting offset on the Y axis for the output tensor. - * @param[out] output Output tensor. Data types supported: Same as @p input. - * - */ - void configure(const ICLTensor *input, unsigned int height_offset, ICLTensor *output); /** Initialise the kernel's inputs and output * * @param[in] compile_context The compile context to be used. @@ -66,7 +56,7 @@ public: * @param[out] output Output tensor. Data types supported: Same as @p input. * */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, unsigned int height_offset, ICLTensor *output); + void configure(const CLCompileContext &compile_context, ITensorInfo *input, unsigned int height_offset, ITensorInfo *output); /** Static function to check if given info will lead to a valid configuration of @ref CLHeightConcatenateLayerKernel * * @param[in] input Input tensor info. Data types supported: All. @@ -78,13 +68,12 @@ public: static Status validate(const ITensorInfo *input, unsigned int height_offset, const ITensorInfo *output); // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; + void run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, + const Window &window, cl::CommandQueue &queue) override; private: - const ICLTensor *_input; - ICLTensor *_output; - unsigned int _height_offset; - unsigned int _num_elems_processed_per_iteration; + unsigned int _height_offset; + unsigned int _num_elems_processed_per_iteration; }; } // namespace arm_compute #endif /* ARM_COMPUTE_CLHEIGHTCONCATENATELAYERKERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h b/arm_compute/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h index e9b3546439..aaedaa90ee 100644 --- a/arm_compute/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h +++ b/arm_compute/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h @@ -30,8 +30,6 @@ namespace arm_compute { -class ICLTensor; - /** Interface for the width concatenate kernel of 2 tensors. * The input1 and input2 tensors will be concatenated into the output tensor. */ @@ -39,7 +37,7 @@ class CLWidthConcatenate2TensorsKernel : public ICLKernel { public: /** Default constructor */ - CLWidthConcatenate2TensorsKernel(); + CLWidthConcatenate2TensorsKernel() = default; /** Prevent instances of this class from being copied (As this class contains pointers) */ CLWidthConcatenate2TensorsKernel(const CLWidthConcatenate2TensorsKernel &) = delete; /** Prevent instances of this class from being copied (As this class contains pointers) */ @@ -50,13 +48,6 @@ public: CLWidthConcatenate2TensorsKernel &operator=(CLWidthConcatenate2TensorsKernel &&) = default; /** Default destructor */ ~CLWidthConcatenate2TensorsKernel() = default; - /** Initialise the kernel's input1s and output - * - * @param[in] input1 First input tensor. Data types supported: All. - * @param[in] input2 Second input tensor. Data types supported: same as @p input1 - * @param[out] output Output tensor. Data types supported: Same as @p input1. - */ - void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output); /** Initialise the kernel's input1s and output * * @param[in] compile_context The compile context to be used. @@ -64,7 +55,7 @@ public: * @param[in] input2 Second input tensor. Data types supported: same as @p input1 * @param[out] output Output tensor. Data types supported: Same as @p input1. */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output); + void configure(const CLCompileContext &compile_context, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output); /** Static function to check if given info will lead to a valid configuration of @ref CLWidthConcatenate2TensorsKernel * * @param[in] input1 First tensor info. Data types supported: All. @@ -76,12 +67,8 @@ public: static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output); // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input1; - const ICLTensor *_input2; - ICLTensor *_output; + void run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, + const Window &window, cl::CommandQueue &queue) override; }; } // namespace arm_compute #endif /* ARM_COMPUTE_CLWIDTHCONCATENATE_2TENSORS_KERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h b/arm_compute/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h index d3845b68f0..b1d6e890a0 100644 --- a/arm_compute/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h +++ b/arm_compute/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h @@ -30,8 +30,6 @@ namespace arm_compute { -class ICLTensor; - /** Interface for the width concatenate kernel of 4 tensors. * All input tensors will be concatenated into the output tensor. */ @@ -50,15 +48,6 @@ public: CLWidthConcatenate4TensorsKernel &operator=(CLWidthConcatenate4TensorsKernel &&) = default; /** Default destructor */ ~CLWidthConcatenate4TensorsKernel() = default; - /** Initialise the kernel's input1s and output - * - * @param[in] input1 First input tensor. Data types supported: All. - * @param[in] input2 Second input tensor. Data types supported: same as @p input1 - * @param[in] input3 Third input tensor. Data types supported: same as @p input1 - * @param[in] input4 Fourth input tensor. Data types supported: same as @p input1 - * @param[out] output Output tensor. Data types supported: Same as @p input1. - */ - void configure(const ICLTensor *input1, const ICLTensor *input2, const ICLTensor *input3, const ICLTensor *input4, ICLTensor *output); /** Initialise the kernel's input1s and output * * @param[in] compile_context The compile context to be used. @@ -68,7 +57,7 @@ public: * @param[in] input4 Fourth input tensor. Data types supported: same as @p input1 * @param[out] output Output tensor. Data types supported: Same as @p input1. */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, const ICLTensor *input3, const ICLTensor *input4, ICLTensor *output); + void configure(const CLCompileContext &compile_context, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *input3, ITensorInfo *input4, ITensorInfo *output); /** Static function to check if given info will lead to a valid configuration of @ref CLWidthConcatenate4TensorsKernel * * @param[in] input1 First tensor info. Data types supported: All. @@ -82,14 +71,8 @@ public: static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *input3, const ITensorInfo *input4, const ITensorInfo *output); // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input1; - const ICLTensor *_input2; - const ICLTensor *_input3; - const ICLTensor *_input4; - ICLTensor *_output; + void run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, + const Window &window, cl::CommandQueue &queue) override; }; } // namespace arm_compute #endif /* ARM_COMPUTE_CLWIDTHCONCATENATE_4TENSORS_KERNEL_H */ diff --git a/arm_compute/core/CL/kernels/CLWidthConcatenateLayerKernel.h b/arm_compute/core/CL/kernels/CLWidthConcatenateLayerKernel.h index a42ab5126e..cb8777d5fd 100644 --- a/arm_compute/core/CL/kernels/CLWidthConcatenateLayerKernel.h +++ b/arm_compute/core/CL/kernels/CLWidthConcatenateLayerKernel.h @@ -30,8 +30,6 @@ namespace arm_compute { -class ICLTensor; - /** Interface for the width concatenate kernel. * The input tensor will be concatenated into the output tensor. */ @@ -50,14 +48,6 @@ public: CLWidthConcatenateLayerKernel &operator=(CLWidthConcatenateLayerKernel &&) = default; /** Default destructor */ ~CLWidthConcatenateLayerKernel() = default; - /** Initialise the kernel's inputs and output - * - * @param[in] input Input tensor. Data types supported: All. - * @param[in] width_offset The offset on the X axis. - * @param[in,out] output Output tensor. Data types supported: Same as @p input. - * - */ - void configure(const ICLTensor *input, unsigned int width_offset, ICLTensor *output); /** Initialise the kernel's inputs and output * * @param[in] compile_context The compile context to be used. @@ -66,7 +56,7 @@ public: * @param[in,out] output Output tensor. Data types supported: Same as @p input. * */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, unsigned int width_offset, ICLTensor *output); + void configure(const CLCompileContext &compile_context, ITensorInfo *input, unsigned int width_offset, ITensorInfo *output); /** Static function to check if given info will lead to a valid configuration of @ref CLWidthConcatenateLayerKernel * * @param[in] input Input tensor info. Data types supported: All. @@ -78,12 +68,11 @@ public: static Status validate(const ITensorInfo *input, unsigned int width_offset, const ITensorInfo *output); // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; + void run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, + const Window &window, cl::CommandQueue &queue) override; private: - const ICLTensor *_input; - ICLTensor *_output; - unsigned int _width_offset; + unsigned int _width_offset; }; } // namespace arm_compute #endif /* ARM_COMPUTE_CLWIDTHCONCATENATELAYERKERNEL_H */ diff --git a/arm_compute/runtime/CL/ICLOperator.h b/arm_compute/runtime/CL/ICLOperator.h index 7ad0c8ce0e..c9fdd864de 100644 --- a/arm_compute/runtime/CL/ICLOperator.h +++ b/arm_compute/runtime/CL/ICLOperator.h @@ -54,7 +54,7 @@ public: ICLOperator &operator=(ICLOperator &&) = default; // Inherited methods overridden: - void run(InputTensorMap inputs, OutputTensorMap outputs, OperatorTensorMap workspace) override final; + void run(InputTensorMap inputs, OutputTensorMap outputs, OperatorTensorMap workspace) override; void prepare(OperatorTensorMap constants) override final; protected: diff --git a/arm_compute/runtime/CL/functions/CLConcatenateLayer.h b/arm_compute/runtime/CL/functions/CLConcatenateLayer.h index c5869345c7..4e8a95be43 100644 --- a/arm_compute/runtime/CL/functions/CLConcatenateLayer.h +++ b/arm_compute/runtime/CL/functions/CLConcatenateLayer.h @@ -24,6 +24,7 @@ #ifndef ARM_COMPUTE_CLCONCATENATELAYER_H #define ARM_COMPUTE_CLCONCATENATELAYER_H +#include "arm_compute/runtime/CL/ICLOperator.h" #include "arm_compute/runtime/IFunction.h" #include "arm_compute/core/CL/ICLKernel.h" @@ -51,16 +52,25 @@ class CLConcatenateLayer : public IFunction public: /** Default constructor */ CLConcatenateLayer(); + /** Destructor */ + ~CLConcatenateLayer(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLConcatenateLayer(const CLConcatenateLayer &) = delete; + /** Default move constructor */ + CLConcatenateLayer(CLConcatenateLayer &&); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLConcatenateLayer &operator=(const CLConcatenateLayer &) = delete; + /** Default move assignment operator */ + CLConcatenateLayer &operator=(CLConcatenateLayer &&); /** Initialise the kernel's inputs vector and output. * * @note Input and output tensor dimensions preconditions defer depending on the concatenation axis. * @note Preconditions can be found respectively at @ref CLWidthConcatenateLayerKernel, @ref CLHeightConcatenateLayerKernel and @ref CLDepthConcatenateLayerKernel. * - * @param[in,out] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: All. + * @param[in,out] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: All * @param[out] output Output tensor. Data types supported: Same as @p input. * @param[in] axis Concatenation axis. Supported underlying concatenation axis are 0, 1, 2 and 3. */ - void configure(std::vector &inputs_vector, ICLTensor *output, size_t axis); void configure(std::vector &inputs_vector, ICLTensor *output, size_t axis); /** Initialise the kernel's inputs vector and output. * @@ -68,11 +78,10 @@ public: * @note Preconditions can be found respectively at @ref CLWidthConcatenateLayerKernel, @ref CLHeightConcatenateLayerKernel and @ref CLDepthConcatenateLayerKernel. * * @param[in] compile_context The compile context to be used. - * @param[in,out] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: All. + * @param[in,out] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: All * @param[out] output Output tensor. Data types supported: Same as @p input. * @param[in] axis Concatenation axis. Supported underlying concatenation axis are 0, 1, 2 and 3. */ - void configure(const CLCompileContext &compile_context, std::vector &inputs_vector, ICLTensor *output, size_t axis); void configure(const CLCompileContext &compile_context, std::vector &inputs_vector, ICLTensor *output, size_t axis); /** Static function to check if given info will lead to a valid configuration of @ref CLConcatenateLayer * @@ -85,22 +94,64 @@ public: * * @return a status */ - static Status validate(const std::vector &inputs_vector, const ITensorInfo *output, size_t axis); static Status validate(const std::vector &inputs_vector, const ITensorInfo *output, size_t axis); // Inherited methods overridden: void run() override; private: - template - void configure_internal(const CLCompileContext &compile_context, std::vector &&inputs_vector, ICLTensor *output, size_t axis); + struct Impl; + std::unique_ptr _impl; +}; + +namespace experimental +{ +/** Basic function to execute concatenate tensors along a given axis. This function calls the following kernels: + * + * -# @ref CLWidthConcatenateLayerKernel (if underlying concatenation axis is 0). + * -# @ref CLHeightConcatenateLayerKernel (if underlying concatenation axis is 1). + * -# @ref CLDepthConcatenateLayerKernel (if underlying concatenation axis is 2). + * -# @ref CLBatchConcatenateLayerKernel (if underlying concatenation axis is 3). + */ +class CLConcatenateLayer : public ICLOperator +{ +public: + /** Default constructor */ + CLConcatenateLayer(); + /** Initialise the kernel's inputs vector and output. + * + * @note Input and output tensor dimensions preconditions defer depending on the concatenation axis. + * @note Preconditions can be found respectively at @ref CLWidthConcatenateLayerKernel, @ref CLHeightConcatenateLayerKernel and @ref CLDepthConcatenateLayerKernel. + * + * + * @param[in] compile_context The compile context to be used. + * @param[in,out] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: All + * @param[out] output Output tensor. Data types supported: Same as @p input. + * @param[in] axis Concatenation axis. Supported underlying concatenation axis are 0, 1, 2 and 3. + */ + void configure(const CLCompileContext &compile_context, const std::vector &inputs_vector, ITensorInfo *output, size_t axis); + /** Static function to check if given info will lead to a valid configuration of @ref NEConcatenateLayer + * + * @note Input and output tensor dimensions preconditions defer depending on the concatenation axis. + * @note Preconditions can be found respectively at @ref CLWidthConcatenateLayerKernel, @ref CLHeightConcatenateLayerKernel and @ref CLDepthConcatenateLayerKernel. + * + * @param[in] inputs_vector The vectors containing all the tensors info to concatenate. Data types supported: All + * @param[in] output Output tensor info. Data types supported: Same as @p input. + * @param[in] axis Concatenation axis. Supported underlying concatenation axis are 0, 1, 2 and 3. + * + * @return a status + */ + static Status validate(const std::vector &inputs_vector, const ITensorInfo *output, size_t axis); - template - static Status validate_internal(const std::vector &inputs_vector, const ITensorInfo *output, size_t axis); + // Inherited methods overridden: + MemoryRequirements workspace() const override; + void run(InputTensorMap inputs, OutputTensorMap outputs, OperatorTensorMap workspace) override; +private: std::vector> _concat_kernels; unsigned int _num_inputs; unsigned int _axis; }; +} // namespace experimental } // namespace arm_compute #endif /* ARM_COMPUTE_CLCONCATENATELAYER_H */ diff --git a/arm_compute/runtime/CL/functions/CLLSTMLayer.h b/arm_compute/runtime/CL/functions/CLLSTMLayer.h index 2e44eed6f6..e5733cd784 100644 --- a/arm_compute/runtime/CL/functions/CLLSTMLayer.h +++ b/arm_compute/runtime/CL/functions/CLLSTMLayer.h @@ -30,7 +30,6 @@ #include "arm_compute/core/CL/kernels/CLElementwiseOperationKernel.h" #include "arm_compute/core/CL/kernels/CLMemsetKernel.h" #include "arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h" -#include "arm_compute/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/CL/CLTensor.h" #include "arm_compute/runtime/CL/functions/CLActivationLayer.h" @@ -232,10 +231,10 @@ private: CLCopyKernel _copy_cell_state; CLCopyKernel _copy_output; CLConcatenateLayer _concat_scratch_buffer; - CLWidthConcatenate2TensorsKernel _concat_inputs_forget_gate; - CLWidthConcatenate2TensorsKernel _concat_weights_forget_gate; - CLWidthConcatenate2TensorsKernel _concat_weights_input_gate; - CLWidthConcatenate2TensorsKernel _concat_weights_output; + CLConcatenateLayer _concat_inputs_forget_gate; + CLConcatenateLayer _concat_weights_forget_gate; + CLConcatenateLayer _concat_weights_input_gate; + CLConcatenateLayer _concat_weights_output; CLMemsetKernel _ones_memset_kernel; CLMeanStdDevNormalizationLayer _mean_std_norm_input_gate; CLPixelWiseMultiplicationKernel _pixelwise_mul_input_gate_coeff; -- cgit v1.2.1