diff options
author | Georgios Pinitas <georgios.pinitas@arm.com> | 2020-07-13 21:21:33 +0100 |
---|---|---|
committer | Georgios Pinitas <georgios.pinitas@arm.com> | 2020-07-14 14:28:46 +0000 |
commit | 4667dddc0ed403c636348294cd7f70261e5540cf (patch) | |
tree | 177b74f377dcbb32cf8a83d407c633df255665a0 /arm_compute | |
parent | 2232a201a9f72de483c12a7857c5f08b81cf7396 (diff) | |
download | ComputeLibrary-4667dddc0ed403c636348294cd7f70261e5540cf.tar.gz |
COMPMID-3374: Remove memory state from NEConcatenateLayer kernels
* Allow the following kernels to accept backing memory at run-time:
* NEBatchConcatenateLayerKernel
* NEDepthConcatenateLayerKernel
* NEHeightConcatenateLayerKernel
* NEWidthConcatenateLayerKernel
* Allow the following functions to accept backing memory at run-time:
* NEConcatenateLayer
Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com>
Change-Id: Ib0b6714cff7f06a52dc74d294bc3e0d72a1c2419
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3569
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Michalis Spyrou <michalis.spyrou@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'arm_compute')
8 files changed, 87 insertions, 42 deletions
diff --git a/arm_compute/core/NEON/kernels/NEBatchConcatenateLayerKernel.h b/arm_compute/core/NEON/kernels/NEBatchConcatenateLayerKernel.h index 267211fad6..f397a29b48 100644 --- a/arm_compute/core/NEON/kernels/NEBatchConcatenateLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NEBatchConcatenateLayerKernel.h @@ -56,15 +56,15 @@ public: ~NEBatchConcatenateLayerKernel() = default; /** Initialise the kernel's inputs and output * - * @param[in] input Input tensor. Data types supported: All. + * @param[in] input Input tensor info. Data types supported: All. * @param[in] batch_offset The offset on axis # 3. - * @param[in,out] output Output tensor. Data types supported: Same as @p input. + * @param[in,out] output Output tensor info. Data types supported: Same as @p input. * * @note: The output tensor's low two dimensions can't be smaller than the input one's. * @note: The gaps between the two lowest dimensions of input and output need to be divisible by 2. * */ - void configure(const ITensor *input, unsigned int batch_offset, ITensor *output); + void configure(const ITensorInfo *input, unsigned int batch_offset, ITensorInfo *output); /** Static function to check if given info will lead to a valid configuration of @ref NEBatchConcatenateLayerKernel * * @param[in] input Input tensor info. Data types supported: All. @@ -76,15 +76,14 @@ public: static Status validate(const ITensorInfo *input, unsigned int batch_offset, const ITensorInfo *output); // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; + void run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, + const Window &window, const ThreadInfo &info) override; private: using BatchConcatFunction = void(const ITensor *in, ITensor *out, unsigned int batch_offset, const Window &window); private: BatchConcatFunction *_func; - const ITensor *_input; - ITensor *_output; unsigned int _batch_offset; }; } // namespace arm_compute diff --git a/arm_compute/core/NEON/kernels/NEDepthConcatenateLayerKernel.h b/arm_compute/core/NEON/kernels/NEDepthConcatenateLayerKernel.h index a465146184..e1aaa59f25 100644 --- a/arm_compute/core/NEON/kernels/NEDepthConcatenateLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NEDepthConcatenateLayerKernel.h @@ -56,15 +56,15 @@ public: ~NEDepthConcatenateLayerKernel() = default; /** Initialise the kernel's inputs and output * - * @param[in] input Input tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[in] input Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. * @param[in] depth_offset The offset on the Z axis. - * @param[in,out] output Output tensor. Data types supported: Same as @p input. + * @param[in,out] output Output tensor info. Data types supported: Same as @p input. * * @note: The output tensor's low two dimensions can't be smaller than the input one's. * @note: The gaps between the two lowest dimensions of input and output need to be divisible by 2. * */ - void configure(const ITensor *input, unsigned int depth_offset, ITensor *output); + void configure(const ITensorInfo *input, unsigned int depth_offset, ITensorInfo *output); /** Static function to check if given info will lead to a valid configuration of @ref NEDepthConcatenateLayerKernel * * @param[in] input Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. @@ -76,15 +76,14 @@ public: static Status validate(const ITensorInfo *input, unsigned int depth_offset, const ITensorInfo *output); // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; + void run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, + const Window &window, const ThreadInfo &info) override; private: using DepthConcatFunction = void(const ITensor *in, ITensor *out, unsigned int depth_offset, const Window &window); private: DepthConcatFunction *_func; - const ITensor *_input; - ITensor *_output; unsigned int _depth_offset; }; } // namespace arm_compute diff --git a/arm_compute/core/NEON/kernels/NEHeightConcatenateLayerKernel.h b/arm_compute/core/NEON/kernels/NEHeightConcatenateLayerKernel.h index e8a9d9ad94..d463b53e2c 100644 --- a/arm_compute/core/NEON/kernels/NEHeightConcatenateLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NEHeightConcatenateLayerKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 Arm Limited. + * Copyright (c) 2019-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -57,12 +57,12 @@ public: ~NEHeightConcatenateLayerKernel() = default; /** Initialise the kernel's inputs and output * - * @param[in] input Input tensor. Data types supported: All + * @param[in] input Input tensor info. Data types supported: All * @param[in] height_offset The starting offset on the Y axis for the output tensor. - * @param[in,out] output Output tensor. Data types supported: Same as @p input. + * @param[in,out] output Output tensor info. Data types supported: Same as @p input. * */ - void configure(const ITensor *input, unsigned int height_offset, ITensor *output); + void configure(const ITensorInfo *input, unsigned int height_offset, ITensorInfo *output); /** Static function to check if given info will lead to a valid configuration of @ref NEHeightConcatenateLayerKernel * * @param[in] input Input tensor info. Data types supported: All @@ -74,12 +74,11 @@ public: static Status validate(const ITensorInfo *input, unsigned int height_offset, const ITensorInfo *output); // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; + void run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, + const Window &window, const ThreadInfo &info) override; private: - const ITensor *_input; - ITensor *_output; - unsigned int _height_offset; + unsigned int _height_offset; }; } // namespace arm_compute #endif /* ARM_COMPUTE_NEHEIGHTCONCATENATELAYERKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEWidthConcatenateLayerKernel.h b/arm_compute/core/NEON/kernels/NEWidthConcatenateLayerKernel.h index 442d35c656..b5336ad026 100644 --- a/arm_compute/core/NEON/kernels/NEWidthConcatenateLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NEWidthConcatenateLayerKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 Arm Limited. + * Copyright (c) 2018-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -57,12 +57,11 @@ public: ~NEWidthConcatenateLayerKernel() = default; /** Initialise the kernel's inputs and output * - * @param[in] input Input tensor. Data types supported: All + * @param[in] input Input tensor info. Data types supported: All * @param[in] width_offset The offset on the X axis. - * @param[in,out] output Output tensor. Data types supported: Same as @p input. - * + * @param[in,out] output Output tensor info. Data types supported: Same as @p input. */ - void configure(const ITensor *input, unsigned int width_offset, ITensor *output); + void configure(const ITensorInfo *input, unsigned int width_offset, ITensorInfo *output); /** Static function to check if given info will lead to a valid configuration of @ref NEWidthConcatenateLayerKernel * * @param[in] input Input tensor info. Data types supported: All @@ -74,12 +73,11 @@ public: static Status validate(const ITensorInfo *input, unsigned int width_offset, const ITensorInfo *output); // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; + void run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, + const Window &window, const ThreadInfo &info) override; private: - const ITensor *_input; - ITensor *_output; - unsigned int _width_offset; + unsigned int _width_offset; }; } // namespace arm_compute #endif /* ARM_COMPUTE_NEWIDTHCONCATENATELAYERKERNEL_H */ diff --git a/arm_compute/core/experimental/Types.h b/arm_compute/core/experimental/Types.h index a009c0dab0..eca833e708 100644 --- a/arm_compute/core/experimental/Types.h +++ b/arm_compute/core/experimental/Types.h @@ -34,7 +34,7 @@ namespace arm_compute class ITensor; /** Memory type */ -enum class TensorType +enum TensorType : int32_t { ACL_UNKNOWN = -1, ACL_SRC = 0, @@ -47,11 +47,12 @@ enum class TensorType ACL_INT = 50, ACL_INT_0 = 50, ACL_INT_1 = 51, - ACL_INT_2 = 52 + ACL_INT_2 = 52, + ACL_SRC_VEC = 256, }; -using InputTensorMap = std::map<TensorType, const ITensor *>; -using OutputTensorMap = std::map<TensorType, ITensor *>; +using InputTensorMap = std::map<int32_t, const ITensor *>; +using OutputTensorMap = std::map<int32_t, ITensor *>; using OperatorTensorMap = OutputTensorMap; namespace experimental diff --git a/arm_compute/graph/backends/FunctionHelpers.h b/arm_compute/graph/backends/FunctionHelpers.h index fecaa9d79c..af748341a5 100644 --- a/arm_compute/graph/backends/FunctionHelpers.h +++ b/arm_compute/graph/backends/FunctionHelpers.h @@ -383,7 +383,7 @@ std::unique_ptr<arm_compute::IFunction> create_concatenate_layer(ConcatenateLaye } // Extract IO and info - std::vector<typename TargetInfo::TensorType *> inputs; + std::vector<typename TargetInfo::SrcTensorType *> inputs; for(unsigned int i = 0; i < node.num_inputs(); ++i) { inputs.push_back(get_backing_tensor<TargetInfo>(node.input(i))); diff --git a/arm_compute/runtime/NEON/INEOperator.h b/arm_compute/runtime/NEON/INEOperator.h index 4f42efde7f..004abb245f 100644 --- a/arm_compute/runtime/NEON/INEOperator.h +++ b/arm_compute/runtime/NEON/INEOperator.h @@ -54,7 +54,7 @@ public: INEOperator &operator=(INEOperator &&) = default; // Inherited methods overridden: - void run(InputTensorMap inputs, OutputTensorMap outputs, OperatorTensorMap workspace) override final; + void run(InputTensorMap inputs, OutputTensorMap outputs, OperatorTensorMap workspace) override; void prepare(OperatorTensorMap constants) override final; protected: diff --git a/arm_compute/runtime/NEON/functions/NEConcatenateLayer.h b/arm_compute/runtime/NEON/functions/NEConcatenateLayer.h index b5c200b467..02c27e20e6 100644 --- a/arm_compute/runtime/NEON/functions/NEConcatenateLayer.h +++ b/arm_compute/runtime/NEON/functions/NEConcatenateLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 Arm Limited. + * Copyright (c) 2018-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -29,6 +29,7 @@ #include "arm_compute/core/NEON/INEKernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/utils/misc/Requires.h" +#include "arm_compute/runtime/NEON/INEOperator.h" #include <memory> #include <vector> @@ -52,6 +53,16 @@ class NEConcatenateLayer : public IFunction public: /** Default constructor */ NEConcatenateLayer(); + /** Destructor */ + ~NEConcatenateLayer(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEConcatenateLayer(const NEConcatenateLayer &) = delete; + /** Default move constructor */ + NEConcatenateLayer(NEConcatenateLayer &&); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEConcatenateLayer &operator=(const NEConcatenateLayer &) = delete; + /** Default move assignment operator */ + NEConcatenateLayer &operator=(NEConcatenateLayer &&); /** Initialise the kernel's inputs vector and output. * * @note Input and output tensor dimensions preconditions defer depending on the concatenation axis. @@ -61,7 +72,6 @@ public: * @param[out] output Output tensor. Data types supported: Same as @p input. * @param[in] axis Concatenation axis. Supported underlying concatenation axis are 0, 1, 2 and 3. */ - void configure(std::vector<ITensor *> inputs_vector, ITensor *output, size_t axis); void configure(std::vector<const ITensor *> inputs_vector, ITensor *output, size_t axis); /** Static function to check if given info will lead to a valid configuration of @ref NEConcatenateLayer * @@ -74,23 +84,62 @@ public: * * @return a status */ - static Status validate(const std::vector<ITensorInfo *> &inputs_vector, const ITensorInfo *output, size_t axis); static Status validate(const std::vector<const ITensorInfo *> &inputs_vector, const ITensorInfo *output, size_t axis); // Inherited methods overridden: void run() override; private: - template <typename TensorType, REQUIRES_TA(std::is_same<typename std::remove_cv<TensorType>::type, ITensor>::value)> - void configure_internal(std::vector<TensorType *> &&inputs_vector, ITensor *output, size_t axis); + struct Impl; + std::unique_ptr<Impl> _impl; +}; + +namespace experimental +{ +/** Basic function to execute concatenate tensors along a given axis. This function calls the following kernels: + * + * -# @ref NEWidthConcatenateLayerKernel (if underlying concatenation axis is 0). + * -# @ref NEHeightConcatenateLayerKernel (if underlying concatenation axis is 1). + * -# @ref NEDepthConcatenateLayerKernel (if underlying concatenation axis is 2). + * -# @ref NEBatchConcatenateLayerKernel (if underlying concatenation axis is 3). + */ +class NEConcatenateLayer : public INEOperator +{ +public: + /** Default constructor */ + NEConcatenateLayer(); + /** Initialise the kernel's inputs vector and output. + * + * @note Input and output tensor dimensions preconditions defer depending on the concatenation axis. + * @note Preconditions can be found respectively at @ref NEWidthConcatenateLayerKernel, @ref NEHeightConcatenateLayerKernel and @ref NEDepthConcatenateLayerKernel. + * + * @param[in,out] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[out] output Output tensor. Data types supported: Same as @p input. + * @param[in] axis Concatenation axis. Supported underlying concatenation axis are 0, 1, 2 and 3. + */ + void configure(const std::vector<const ITensorInfo *> &inputs_vector, ITensorInfo *output, size_t axis); + /** Static function to check if given info will lead to a valid configuration of @ref NEConcatenateLayer + * + * @note Input and output tensor dimensions preconditions defer depending on the concatenation axis. + * @note Preconditions can be found respectively at @ref NEWidthConcatenateLayerKernel, @ref NEHeightConcatenateLayerKernel and @ref NEDepthConcatenateLayerKernel. + * + * @param[in] inputs_vector The vectors containing all the tensors info to concatenate. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[in] output Output tensor info. Data types supported: Same as @p input. + * @param[in] axis Concatenation axis. Supported underlying concatenation axis are 0, 1, 2 and 3. + * + * @return a status + */ + static Status validate(const std::vector<const ITensorInfo *> &inputs_vector, const ITensorInfo *output, size_t axis); - template <typename TensorInfoType, REQUIRES_TA(std::is_same<typename std::remove_cv<TensorInfoType>::type, ITensorInfo>::value)> - static Status validate_internal(const std::vector<TensorInfoType *> &inputs_vector, const ITensorInfo *output, size_t axis); + // Inherited methods overridden: + MemoryRequirements workspace() const override; + void run(InputTensorMap inputs, OutputTensorMap outputs, OperatorTensorMap workspace) override; private: std::vector<std::unique_ptr<INEKernel>> _concat_kernels; unsigned int _num_inputs; unsigned int _axis; }; +} // namespace experimental } // namespace arm_compute #endif /* ARM_COMPUTE_NECONCATENATELAYER_H */ |