From 4667dddc0ed403c636348294cd7f70261e5540cf Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Mon, 13 Jul 2020 21:21:33 +0100 Subject: COMPMID-3374: Remove memory state from NEConcatenateLayer kernels * Allow the following kernels to accept backing memory at run-time: * NEBatchConcatenateLayerKernel * NEDepthConcatenateLayerKernel * NEHeightConcatenateLayerKernel * NEWidthConcatenateLayerKernel * Allow the following functions to accept backing memory at run-time: * NEConcatenateLayer Signed-off-by: Georgios Pinitas Change-Id: Ib0b6714cff7f06a52dc74d294bc3e0d72a1c2419 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3569 Tested-by: Arm Jenkins Reviewed-by: Michalis Spyrou Comments-Addressed: Arm Jenkins --- .../NEON/kernels/NEBatchConcatenateLayerKernel.h | 11 +- .../NEON/kernels/NEDepthConcatenateLayerKernel.h | 11 +- .../NEON/kernels/NEHeightConcatenateLayerKernel.h | 15 ++- .../NEON/kernels/NEWidthConcatenateLayerKernel.h | 16 ++- arm_compute/core/experimental/Types.h | 9 +- arm_compute/graph/backends/FunctionHelpers.h | 2 +- arm_compute/runtime/NEON/INEOperator.h | 2 +- .../runtime/NEON/functions/NEConcatenateLayer.h | 63 ++++++++-- .../NEON/kernels/NEBatchConcatenateLayerKernel.cpp | 21 ++-- .../NEON/kernels/NEDepthConcatenateLayerKernel.cpp | 21 ++-- .../kernels/NEHeightConcatenateLayerKernel.cpp | 39 +++--- .../NEON/kernels/NEWidthConcatenateLayerKernel.cpp | 34 +++--- src/graph/backends/CL/CLFunctionsFactory.cpp | 1 + src/graph/backends/GLES/GCFunctionsFactory.cpp | 3 +- src/graph/backends/NEON/NEFunctionFactory.cpp | 1 + src/runtime/NEON/functions/NEConcatenateLayer.cpp | 131 ++++++++++++++------- src/runtime/NEON/functions/NELSTMLayer.cpp | 4 +- src/runtime/NEON/functions/NEPadLayer.cpp | 2 +- tests/validation/NEON/BatchConcatenateLayer.cpp | 4 +- tests/validation/NEON/DepthConcatenateLayer.cpp | 4 +- tests/validation/NEON/HeightConcatenateLayer.cpp | 4 +- tests/validation/NEON/WidthConcatenateLayer.cpp | 4 +- .../validation/fixtures/ConcatenateLayerFixture.h | 9 +- 23 files changed, 257 insertions(+), 154 deletions(-) diff --git a/arm_compute/core/NEON/kernels/NEBatchConcatenateLayerKernel.h b/arm_compute/core/NEON/kernels/NEBatchConcatenateLayerKernel.h index 267211fad6..f397a29b48 100644 --- a/arm_compute/core/NEON/kernels/NEBatchConcatenateLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NEBatchConcatenateLayerKernel.h @@ -56,15 +56,15 @@ public: ~NEBatchConcatenateLayerKernel() = default; /** Initialise the kernel's inputs and output * - * @param[in] input Input tensor. Data types supported: All. + * @param[in] input Input tensor info. Data types supported: All. * @param[in] batch_offset The offset on axis # 3. - * @param[in,out] output Output tensor. Data types supported: Same as @p input. + * @param[in,out] output Output tensor info. Data types supported: Same as @p input. * * @note: The output tensor's low two dimensions can't be smaller than the input one's. * @note: The gaps between the two lowest dimensions of input and output need to be divisible by 2. * */ - void configure(const ITensor *input, unsigned int batch_offset, ITensor *output); + void configure(const ITensorInfo *input, unsigned int batch_offset, ITensorInfo *output); /** Static function to check if given info will lead to a valid configuration of @ref NEBatchConcatenateLayerKernel * * @param[in] input Input tensor info. Data types supported: All. @@ -76,15 +76,14 @@ public: static Status validate(const ITensorInfo *input, unsigned int batch_offset, const ITensorInfo *output); // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; + void run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, + const Window &window, const ThreadInfo &info) override; private: using BatchConcatFunction = void(const ITensor *in, ITensor *out, unsigned int batch_offset, const Window &window); private: BatchConcatFunction *_func; - const ITensor *_input; - ITensor *_output; unsigned int _batch_offset; }; } // namespace arm_compute diff --git a/arm_compute/core/NEON/kernels/NEDepthConcatenateLayerKernel.h b/arm_compute/core/NEON/kernels/NEDepthConcatenateLayerKernel.h index a465146184..e1aaa59f25 100644 --- a/arm_compute/core/NEON/kernels/NEDepthConcatenateLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NEDepthConcatenateLayerKernel.h @@ -56,15 +56,15 @@ public: ~NEDepthConcatenateLayerKernel() = default; /** Initialise the kernel's inputs and output * - * @param[in] input Input tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[in] input Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. * @param[in] depth_offset The offset on the Z axis. - * @param[in,out] output Output tensor. Data types supported: Same as @p input. + * @param[in,out] output Output tensor info. Data types supported: Same as @p input. * * @note: The output tensor's low two dimensions can't be smaller than the input one's. * @note: The gaps between the two lowest dimensions of input and output need to be divisible by 2. * */ - void configure(const ITensor *input, unsigned int depth_offset, ITensor *output); + void configure(const ITensorInfo *input, unsigned int depth_offset, ITensorInfo *output); /** Static function to check if given info will lead to a valid configuration of @ref NEDepthConcatenateLayerKernel * * @param[in] input Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. @@ -76,15 +76,14 @@ public: static Status validate(const ITensorInfo *input, unsigned int depth_offset, const ITensorInfo *output); // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; + void run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, + const Window &window, const ThreadInfo &info) override; private: using DepthConcatFunction = void(const ITensor *in, ITensor *out, unsigned int depth_offset, const Window &window); private: DepthConcatFunction *_func; - const ITensor *_input; - ITensor *_output; unsigned int _depth_offset; }; } // namespace arm_compute diff --git a/arm_compute/core/NEON/kernels/NEHeightConcatenateLayerKernel.h b/arm_compute/core/NEON/kernels/NEHeightConcatenateLayerKernel.h index e8a9d9ad94..d463b53e2c 100644 --- a/arm_compute/core/NEON/kernels/NEHeightConcatenateLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NEHeightConcatenateLayerKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 Arm Limited. + * Copyright (c) 2019-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -57,12 +57,12 @@ public: ~NEHeightConcatenateLayerKernel() = default; /** Initialise the kernel's inputs and output * - * @param[in] input Input tensor. Data types supported: All + * @param[in] input Input tensor info. Data types supported: All * @param[in] height_offset The starting offset on the Y axis for the output tensor. - * @param[in,out] output Output tensor. Data types supported: Same as @p input. + * @param[in,out] output Output tensor info. Data types supported: Same as @p input. * */ - void configure(const ITensor *input, unsigned int height_offset, ITensor *output); + void configure(const ITensorInfo *input, unsigned int height_offset, ITensorInfo *output); /** Static function to check if given info will lead to a valid configuration of @ref NEHeightConcatenateLayerKernel * * @param[in] input Input tensor info. Data types supported: All @@ -74,12 +74,11 @@ public: static Status validate(const ITensorInfo *input, unsigned int height_offset, const ITensorInfo *output); // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; + void run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, + const Window &window, const ThreadInfo &info) override; private: - const ITensor *_input; - ITensor *_output; - unsigned int _height_offset; + unsigned int _height_offset; }; } // namespace arm_compute #endif /* ARM_COMPUTE_NEHEIGHTCONCATENATELAYERKERNEL_H */ diff --git a/arm_compute/core/NEON/kernels/NEWidthConcatenateLayerKernel.h b/arm_compute/core/NEON/kernels/NEWidthConcatenateLayerKernel.h index 442d35c656..b5336ad026 100644 --- a/arm_compute/core/NEON/kernels/NEWidthConcatenateLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NEWidthConcatenateLayerKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 Arm Limited. + * Copyright (c) 2018-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -57,12 +57,11 @@ public: ~NEWidthConcatenateLayerKernel() = default; /** Initialise the kernel's inputs and output * - * @param[in] input Input tensor. Data types supported: All + * @param[in] input Input tensor info. Data types supported: All * @param[in] width_offset The offset on the X axis. - * @param[in,out] output Output tensor. Data types supported: Same as @p input. - * + * @param[in,out] output Output tensor info. Data types supported: Same as @p input. */ - void configure(const ITensor *input, unsigned int width_offset, ITensor *output); + void configure(const ITensorInfo *input, unsigned int width_offset, ITensorInfo *output); /** Static function to check if given info will lead to a valid configuration of @ref NEWidthConcatenateLayerKernel * * @param[in] input Input tensor info. Data types supported: All @@ -74,12 +73,11 @@ public: static Status validate(const ITensorInfo *input, unsigned int width_offset, const ITensorInfo *output); // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; + void run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, + const Window &window, const ThreadInfo &info) override; private: - const ITensor *_input; - ITensor *_output; - unsigned int _width_offset; + unsigned int _width_offset; }; } // namespace arm_compute #endif /* ARM_COMPUTE_NEWIDTHCONCATENATELAYERKERNEL_H */ diff --git a/arm_compute/core/experimental/Types.h b/arm_compute/core/experimental/Types.h index a009c0dab0..eca833e708 100644 --- a/arm_compute/core/experimental/Types.h +++ b/arm_compute/core/experimental/Types.h @@ -34,7 +34,7 @@ namespace arm_compute class ITensor; /** Memory type */ -enum class TensorType +enum TensorType : int32_t { ACL_UNKNOWN = -1, ACL_SRC = 0, @@ -47,11 +47,12 @@ enum class TensorType ACL_INT = 50, ACL_INT_0 = 50, ACL_INT_1 = 51, - ACL_INT_2 = 52 + ACL_INT_2 = 52, + ACL_SRC_VEC = 256, }; -using InputTensorMap = std::map; -using OutputTensorMap = std::map; +using InputTensorMap = std::map; +using OutputTensorMap = std::map; using OperatorTensorMap = OutputTensorMap; namespace experimental diff --git a/arm_compute/graph/backends/FunctionHelpers.h b/arm_compute/graph/backends/FunctionHelpers.h index fecaa9d79c..af748341a5 100644 --- a/arm_compute/graph/backends/FunctionHelpers.h +++ b/arm_compute/graph/backends/FunctionHelpers.h @@ -383,7 +383,7 @@ std::unique_ptr create_concatenate_layer(ConcatenateLaye } // Extract IO and info - std::vector inputs; + std::vector inputs; for(unsigned int i = 0; i < node.num_inputs(); ++i) { inputs.push_back(get_backing_tensor(node.input(i))); diff --git a/arm_compute/runtime/NEON/INEOperator.h b/arm_compute/runtime/NEON/INEOperator.h index 4f42efde7f..004abb245f 100644 --- a/arm_compute/runtime/NEON/INEOperator.h +++ b/arm_compute/runtime/NEON/INEOperator.h @@ -54,7 +54,7 @@ public: INEOperator &operator=(INEOperator &&) = default; // Inherited methods overridden: - void run(InputTensorMap inputs, OutputTensorMap outputs, OperatorTensorMap workspace) override final; + void run(InputTensorMap inputs, OutputTensorMap outputs, OperatorTensorMap workspace) override; void prepare(OperatorTensorMap constants) override final; protected: diff --git a/arm_compute/runtime/NEON/functions/NEConcatenateLayer.h b/arm_compute/runtime/NEON/functions/NEConcatenateLayer.h index b5c200b467..02c27e20e6 100644 --- a/arm_compute/runtime/NEON/functions/NEConcatenateLayer.h +++ b/arm_compute/runtime/NEON/functions/NEConcatenateLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 Arm Limited. + * Copyright (c) 2018-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -29,6 +29,7 @@ #include "arm_compute/core/NEON/INEKernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/utils/misc/Requires.h" +#include "arm_compute/runtime/NEON/INEOperator.h" #include #include @@ -52,6 +53,16 @@ class NEConcatenateLayer : public IFunction public: /** Default constructor */ NEConcatenateLayer(); + /** Destructor */ + ~NEConcatenateLayer(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEConcatenateLayer(const NEConcatenateLayer &) = delete; + /** Default move constructor */ + NEConcatenateLayer(NEConcatenateLayer &&); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEConcatenateLayer &operator=(const NEConcatenateLayer &) = delete; + /** Default move assignment operator */ + NEConcatenateLayer &operator=(NEConcatenateLayer &&); /** Initialise the kernel's inputs vector and output. * * @note Input and output tensor dimensions preconditions defer depending on the concatenation axis. @@ -61,7 +72,6 @@ public: * @param[out] output Output tensor. Data types supported: Same as @p input. * @param[in] axis Concatenation axis. Supported underlying concatenation axis are 0, 1, 2 and 3. */ - void configure(std::vector inputs_vector, ITensor *output, size_t axis); void configure(std::vector inputs_vector, ITensor *output, size_t axis); /** Static function to check if given info will lead to a valid configuration of @ref NEConcatenateLayer * @@ -74,23 +84,62 @@ public: * * @return a status */ - static Status validate(const std::vector &inputs_vector, const ITensorInfo *output, size_t axis); static Status validate(const std::vector &inputs_vector, const ITensorInfo *output, size_t axis); // Inherited methods overridden: void run() override; private: - template ::type, ITensor>::value)> - void configure_internal(std::vector &&inputs_vector, ITensor *output, size_t axis); + struct Impl; + std::unique_ptr _impl; +}; + +namespace experimental +{ +/** Basic function to execute concatenate tensors along a given axis. This function calls the following kernels: + * + * -# @ref NEWidthConcatenateLayerKernel (if underlying concatenation axis is 0). + * -# @ref NEHeightConcatenateLayerKernel (if underlying concatenation axis is 1). + * -# @ref NEDepthConcatenateLayerKernel (if underlying concatenation axis is 2). + * -# @ref NEBatchConcatenateLayerKernel (if underlying concatenation axis is 3). + */ +class NEConcatenateLayer : public INEOperator +{ +public: + /** Default constructor */ + NEConcatenateLayer(); + /** Initialise the kernel's inputs vector and output. + * + * @note Input and output tensor dimensions preconditions defer depending on the concatenation axis. + * @note Preconditions can be found respectively at @ref NEWidthConcatenateLayerKernel, @ref NEHeightConcatenateLayerKernel and @ref NEDepthConcatenateLayerKernel. + * + * @param[in,out] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[out] output Output tensor. Data types supported: Same as @p input. + * @param[in] axis Concatenation axis. Supported underlying concatenation axis are 0, 1, 2 and 3. + */ + void configure(const std::vector &inputs_vector, ITensorInfo *output, size_t axis); + /** Static function to check if given info will lead to a valid configuration of @ref NEConcatenateLayer + * + * @note Input and output tensor dimensions preconditions defer depending on the concatenation axis. + * @note Preconditions can be found respectively at @ref NEWidthConcatenateLayerKernel, @ref NEHeightConcatenateLayerKernel and @ref NEDepthConcatenateLayerKernel. + * + * @param[in] inputs_vector The vectors containing all the tensors info to concatenate. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[in] output Output tensor info. Data types supported: Same as @p input. + * @param[in] axis Concatenation axis. Supported underlying concatenation axis are 0, 1, 2 and 3. + * + * @return a status + */ + static Status validate(const std::vector &inputs_vector, const ITensorInfo *output, size_t axis); - template ::type, ITensorInfo>::value)> - static Status validate_internal(const std::vector &inputs_vector, const ITensorInfo *output, size_t axis); + // Inherited methods overridden: + MemoryRequirements workspace() const override; + void run(InputTensorMap inputs, OutputTensorMap outputs, OperatorTensorMap workspace) override; private: std::vector> _concat_kernels; unsigned int _num_inputs; unsigned int _axis; }; +} // namespace experimental } // namespace arm_compute #endif /* ARM_COMPUTE_NECONCATENATELAYER_H */ diff --git a/src/core/NEON/kernels/NEBatchConcatenateLayerKernel.cpp b/src/core/NEON/kernels/NEBatchConcatenateLayerKernel.cpp index 65789160f6..c597afd804 100644 --- a/src/core/NEON/kernels/NEBatchConcatenateLayerKernel.cpp +++ b/src/core/NEON/kernels/NEBatchConcatenateLayerKernel.cpp @@ -141,21 +141,19 @@ Status validate_arguments(const ITensorInfo *input, unsigned int batch_offset, c } // namespace NEBatchConcatenateLayerKernel::NEBatchConcatenateLayerKernel() - : _func(nullptr), _input(nullptr), _output(nullptr), _batch_offset(0) + : _func(nullptr), _batch_offset(0) { } -void NEBatchConcatenateLayerKernel::configure(const ITensor *input, unsigned int batch_offset, ITensor *output) +void NEBatchConcatenateLayerKernel::configure(const ITensorInfo *input, unsigned int batch_offset, ITensorInfo *output) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), batch_offset, output->info())); + ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input, batch_offset, output)); _func = nullptr; - _input = input; - _output = output; _batch_offset = batch_offset; - switch(input->info()->data_type()) + switch(input->data_type()) { case DataType::S8: case DataType::U8: @@ -178,10 +176,10 @@ void NEBatchConcatenateLayerKernel::configure(const ITensor *input, unsigned int } // Configure kernel window - Window win = calculate_max_window(*output->info(), Steps()); + Window win = calculate_max_window(*output, Steps()); Coordinates coord; - coord.set_num_dimensions(output->info()->num_dimensions()); - output->info()->set_valid_region(ValidRegion(coord, output->info()->tensor_shape())); + coord.set_num_dimensions(output->num_dimensions()); + output->set_valid_region(ValidRegion(coord, output->tensor_shape())); INEKernel::configure(win); } @@ -193,13 +191,14 @@ Status NEBatchConcatenateLayerKernel::validate(const arm_compute::ITensorInfo *i return Status{}; } -void NEBatchConcatenateLayerKernel::run(const Window &window, const ThreadInfo &info) +void NEBatchConcatenateLayerKernel::run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, + const Window &window, const ThreadInfo &info) { ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); ARM_COMPUTE_ERROR_ON(_func == nullptr); - (*_func)(_input, _output, _batch_offset, window); + (*_func)(inputs.at(TensorType::ACL_SRC), outputs.at(TensorType::ACL_DST), _batch_offset, window); } } // namespace arm_compute diff --git a/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp b/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp index a95d711f43..49e10de94e 100644 --- a/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp +++ b/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp @@ -142,21 +142,19 @@ Status validate_arguments(const ITensorInfo *input, unsigned int depth_offset, c } // namespace NEDepthConcatenateLayerKernel::NEDepthConcatenateLayerKernel() - : _func(nullptr), _input(nullptr), _output(nullptr), _depth_offset(0) + : _func(nullptr), _depth_offset(0) { } -void NEDepthConcatenateLayerKernel::configure(const ITensor *input, unsigned int depth_offset, ITensor *output) +void NEDepthConcatenateLayerKernel::configure(const ITensorInfo *input, unsigned int depth_offset, ITensorInfo *output) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), depth_offset, output->info())); + ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input, depth_offset, output)); _func = nullptr; - _input = input; - _output = output; _depth_offset = depth_offset; - switch(input->info()->data_type()) + switch(input->data_type()) { case DataType::QASYMM8: _func = &depth_concat; @@ -175,11 +173,11 @@ void NEDepthConcatenateLayerKernel::configure(const ITensor *input, unsigned int } // Configure kernel window - Window win = calculate_max_window(*output->info(), Steps()); + Window win = calculate_max_window(*output, Steps()); Coordinates coord; - coord.set_num_dimensions(output->info()->num_dimensions()); + coord.set_num_dimensions(output->num_dimensions()); - output->info()->set_valid_region(ValidRegion(coord, output->info()->tensor_shape())); + output->set_valid_region(ValidRegion(coord, output->tensor_shape())); INEKernel::configure(win); } @@ -191,13 +189,14 @@ Status NEDepthConcatenateLayerKernel::validate(const arm_compute::ITensorInfo *i return Status{}; } -void NEDepthConcatenateLayerKernel::run(const Window &window, const ThreadInfo &info) +void NEDepthConcatenateLayerKernel::run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, + const Window &window, const ThreadInfo &info) { ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); ARM_COMPUTE_ERROR_ON(_func == nullptr); - (*_func)(_input, _output, _depth_offset, window); + (*_func)(inputs.at(TensorType::ACL_SRC), outputs.at(TensorType::ACL_DST), _depth_offset, window); } } // namespace arm_compute diff --git a/src/core/NEON/kernels/NEHeightConcatenateLayerKernel.cpp b/src/core/NEON/kernels/NEHeightConcatenateLayerKernel.cpp index 0adf996cca..d4043e02b7 100644 --- a/src/core/NEON/kernels/NEHeightConcatenateLayerKernel.cpp +++ b/src/core/NEON/kernels/NEHeightConcatenateLayerKernel.cpp @@ -58,24 +58,23 @@ Status validate_arguments(const ITensorInfo *input, unsigned int height_offset, } // namespace NEHeightConcatenateLayerKernel::NEHeightConcatenateLayerKernel() - : _input(nullptr), _output(nullptr), _height_offset(0) + : _height_offset(0) { } -void NEHeightConcatenateLayerKernel::configure(const ITensor *input, unsigned int height_offset, ITensor *output) +void NEHeightConcatenateLayerKernel::configure(const ITensorInfo *input, unsigned int height_offset, ITensorInfo *output) { + ARM_COMPUTE_UNUSED(input); ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), height_offset, output->info())); + ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input, height_offset, output)); - _input = input; - _output = output; _height_offset = height_offset; // Configure kernel window - Window win = calculate_max_window(*output->info(), Steps()); + Window win = calculate_max_window(*output, Steps()); Coordinates coord; - coord.set_num_dimensions(output->info()->num_dimensions()); - output->info()->set_valid_region(ValidRegion(coord, output->info()->tensor_shape())); + coord.set_num_dimensions(output->num_dimensions()); + output->set_valid_region(ValidRegion(coord, output->tensor_shape())); INEKernel::configure(win); } @@ -85,30 +84,34 @@ Status NEHeightConcatenateLayerKernel::validate(const ITensorInfo *input, unsign return Status{}; } -void NEHeightConcatenateLayerKernel::run(const Window &window, const ThreadInfo &info) +void NEHeightConcatenateLayerKernel::run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, + const Window &window, const ThreadInfo &info) { ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); + const auto src = inputs.at(TensorType::ACL_SRC); + auto dst = outputs.at(TensorType::ACL_DST); + // Offset output pointer to the correct position - uint8_t *output_ptr = _output->buffer() + _output->info()->offset_first_element_in_bytes() + _height_offset * _output->info()->strides_in_bytes()[Window::DimY]; + uint8_t *output_ptr = dst->buffer() + dst->info()->offset_first_element_in_bytes() + _height_offset * dst->info()->strides_in_bytes()[Window::DimY]; const auto window_start_x = static_cast(window.x().start()); - const auto window_end_x = static_cast(window.x().end()) * static_cast(_output->info()->element_size()); - const int window_step_x = 16; + const auto window_end_x = static_cast(window.x().end()) * static_cast(dst->info()->element_size()); + const int window_step_x = 16; Window win{ window }; win.set(Window::DimX, Window::Dimension(0, 1, 1)); - win.set(Window::DimY, Window::Dimension(0, _input->info()->tensor_shape().y(), 1)); + win.set(Window::DimY, Window::Dimension(0, src->info()->tensor_shape().y(), 1)); // Create iterators - Iterator input(_input, win); - Iterator output(_output, win); + Iterator input(src, win); + Iterator output(dst, win); - const DataType dt = _input->info()->data_type(); - const UniformQuantizationInfo &input_qinfo = _input->info()->quantization_info().uniform(); - const UniformQuantizationInfo &output_qinfo = _output->info()->quantization_info().uniform(); + const DataType dt = src->info()->data_type(); + const UniformQuantizationInfo &input_qinfo = src->info()->quantization_info().uniform(); + const UniformQuantizationInfo &output_qinfo = dst->info()->quantization_info().uniform(); if(dt == DataType::QASYMM8 && input_qinfo != output_qinfo) { execute_window_loop(win, [&](const Coordinates &) diff --git a/src/core/NEON/kernels/NEWidthConcatenateLayerKernel.cpp b/src/core/NEON/kernels/NEWidthConcatenateLayerKernel.cpp index f5bdfae5d6..1b32e3614e 100644 --- a/src/core/NEON/kernels/NEWidthConcatenateLayerKernel.cpp +++ b/src/core/NEON/kernels/NEWidthConcatenateLayerKernel.cpp @@ -58,24 +58,22 @@ Status validate_arguments(const ITensorInfo *input, unsigned int width_offset, c } // namespace NEWidthConcatenateLayerKernel::NEWidthConcatenateLayerKernel() - : _input(nullptr), _output(nullptr), _width_offset(0) + : _width_offset(0) { } -void NEWidthConcatenateLayerKernel::configure(const ITensor *input, unsigned int width_offset, ITensor *output) +void NEWidthConcatenateLayerKernel::configure(const ITensorInfo *input, unsigned int width_offset, ITensorInfo *output) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), width_offset, output->info())); + ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input, width_offset, output)); - _input = input; - _output = output; _width_offset = width_offset; // Configure kernel window - Window win = calculate_max_window(*input->info(), Steps()); + Window win = calculate_max_window(*input, Steps()); Coordinates coord; - coord.set_num_dimensions(output->info()->num_dimensions()); - output->info()->set_valid_region(ValidRegion(coord, output->info()->tensor_shape())); + coord.set_num_dimensions(output->num_dimensions()); + output->set_valid_region(ValidRegion(coord, output->tensor_shape())); INEKernel::configure(win); } @@ -86,28 +84,32 @@ Status NEWidthConcatenateLayerKernel::validate(const ITensorInfo *input, unsigne return Status{}; } -void NEWidthConcatenateLayerKernel::run(const Window &window, const ThreadInfo &info) +void NEWidthConcatenateLayerKernel::run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, + const Window &window, const ThreadInfo &info) { ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); + const auto src = inputs.at(TensorType::ACL_SRC); + auto dst = outputs.at(TensorType::ACL_DST); + // Offset output pointer to the correct position - uint8_t *output_ptr = _output->buffer() + _output->info()->offset_first_element_in_bytes() + _width_offset * _output->info()->strides_in_bytes()[0]; + uint8_t *output_ptr = dst->buffer() + dst->info()->offset_first_element_in_bytes() + _width_offset * dst->info()->strides_in_bytes()[0]; const auto window_start_x = static_cast(window.x().start()); - const auto window_end_x = static_cast(window.x().end()) * static_cast(_output->info()->element_size()); + const auto window_end_x = static_cast(window.x().end()) * static_cast(dst->info()->element_size()); constexpr int window_step_x = 16; Window win{ window }; win.set(Window::DimX, Window::Dimension(0, 1, 1)); // Create iterators - Iterator input(_input, win); - Iterator output(_output, win); - const DataType dt = _input->info()->data_type(); - const UniformQuantizationInfo &input_qinfo = _input->info()->quantization_info().uniform(); - const UniformQuantizationInfo &output_qinfo = _output->info()->quantization_info().uniform(); + Iterator input(src, win); + Iterator output(dst, win); + const DataType dt = src->info()->data_type(); + const UniformQuantizationInfo &input_qinfo = src->info()->quantization_info().uniform(); + const UniformQuantizationInfo &output_qinfo = dst->info()->quantization_info().uniform(); if(dt == DataType::QASYMM8 && input_qinfo != output_qinfo) { execute_window_loop(win, [&](const Coordinates &) diff --git a/src/graph/backends/CL/CLFunctionsFactory.cpp b/src/graph/backends/CL/CLFunctionsFactory.cpp index 917741a2b7..ceff6e5cef 100644 --- a/src/graph/backends/CL/CLFunctionsFactory.cpp +++ b/src/graph/backends/CL/CLFunctionsFactory.cpp @@ -42,6 +42,7 @@ namespace backends struct CLTargetInfo { using TensorType = arm_compute::ICLTensor; + using SrcTensorType = TensorType; using TensorConcreteType = CLTensor; static Target TargetType; }; diff --git a/src/graph/backends/GLES/GCFunctionsFactory.cpp b/src/graph/backends/GLES/GCFunctionsFactory.cpp index a78f51cdbd..8ecb593e11 100644 --- a/src/graph/backends/GLES/GCFunctionsFactory.cpp +++ b/src/graph/backends/GLES/GCFunctionsFactory.cpp @@ -40,7 +40,8 @@ namespace backends /** Target specific information structure used to pass information to the layer templates */ struct GCTargetInfo { - using TensorType = arm_compute::IGCTensor; + using TensorType = arm_compute::IGCTensor; + using SrcTensorType = TensorType; static Target TargetType; }; diff --git a/src/graph/backends/NEON/NEFunctionFactory.cpp b/src/graph/backends/NEON/NEFunctionFactory.cpp index 2f313081e0..4fee630192 100644 --- a/src/graph/backends/NEON/NEFunctionFactory.cpp +++ b/src/graph/backends/NEON/NEFunctionFactory.cpp @@ -47,6 +47,7 @@ namespace backends struct NETargetInfo { using TensorType = arm_compute::ITensor; + using SrcTensorType = const arm_compute::ITensor; using TensorConcreteType = arm_compute::Tensor; static Target TargetType; }; diff --git a/src/runtime/NEON/functions/NEConcatenateLayer.cpp b/src/runtime/NEON/functions/NEConcatenateLayer.cpp index 9c480a0d50..37cdd15529 100644 --- a/src/runtime/NEON/functions/NEConcatenateLayer.cpp +++ b/src/runtime/NEON/functions/NEConcatenateLayer.cpp @@ -39,58 +39,31 @@ namespace arm_compute { -NEConcatenateLayer::NEConcatenateLayer() - : _concat_kernels(), - _num_inputs(0), - _axis(Window::DimX) -{ -} - -void NEConcatenateLayer::configure(std::vector inputs_vector, ITensor *output, size_t axis) -{ - configure_internal(std::move(inputs_vector), output, axis); -} - -void NEConcatenateLayer::configure(std::vector inputs_vector, ITensor *output, size_t axis) +namespace experimental { - configure_internal(std::move(inputs_vector), output, axis); -} - -Status NEConcatenateLayer::validate(const std::vector &inputs_vector, const ITensorInfo *output, size_t axis) -{ - return validate_internal(inputs_vector, output, axis); -} - -Status NEConcatenateLayer::validate(const std::vector &inputs_vector, const ITensorInfo *output, size_t axis) +NEConcatenateLayer::NEConcatenateLayer() + : _concat_kernels(), _num_inputs(0), _axis(0) { - return validate_internal(inputs_vector, output, axis); } -template -void NEConcatenateLayer::configure_internal(std::vector &&inputs_vector, ITensor *output, size_t axis) +void NEConcatenateLayer::configure(const std::vector &inputs_vector, ITensorInfo *output, size_t axis) { ARM_COMPUTE_ERROR_ON(output == nullptr); + _axis = axis; _num_inputs = inputs_vector.size(); - std::vector inputs_vector_info; - inputs_vector_info.reserve(_num_inputs); - for(unsigned int i = 0; i < _num_inputs; ++i) - { - ARM_COMPUTE_ERROR_ON_NULLPTR(inputs_vector.at(i)); - inputs_vector_info.emplace_back(inputs_vector.at(i)->info()); - } - TensorShape output_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(inputs_vector, _axis); + TensorShape output_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(inputs_vector, axis); // Output auto inizialitation if not yet initialized - auto_init_if_empty(*output->info(), output_shape, 1, inputs_vector[0]->info()->data_type()); - ARM_COMPUTE_ERROR_THROW_ON(NEConcatenateLayer::validate(inputs_vector_info, output->info(), axis)); + auto_init_if_empty(*output, output_shape, 1, inputs_vector[0]->data_type()); + ARM_COMPUTE_ERROR_THROW_ON(NEConcatenateLayer::validate(inputs_vector, output, axis)); unsigned int offset = 0; for(unsigned int i = 0; i < _num_inputs; ++i) { - switch(_axis) + switch(axis) { case Window::DimX: { @@ -123,12 +96,11 @@ void NEConcatenateLayer::configure_internal(std::vector &&inputs_v default: ARM_COMPUTE_ERROR("Axis not supported"); } - offset += inputs_vector.at(i)->info()->dimension(_axis); + offset += inputs_vector.at(i)->dimension(axis); } } -template -Status NEConcatenateLayer::validate_internal(const std::vector &inputs_vector, const ITensorInfo *output, size_t axis) +Status NEConcatenateLayer::validate(const std::vector &inputs_vector, const ITensorInfo *output, size_t axis) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output); ARM_COMPUTE_RETURN_ERROR_ON(inputs_vector.size() < 2); @@ -174,11 +146,88 @@ Status NEConcatenateLayer::validate_internal(const std::vector return Status{}; } +MemoryRequirements NEConcatenateLayer::workspace() const +{ + return MemoryRequirements{}; +} + +void NEConcatenateLayer::run(InputTensorMap inputs, OutputTensorMap outputs, OperatorTensorMap workspace) +{ + ARM_COMPUTE_UNUSED(workspace); + + if(inputs.empty() || outputs.empty()) + { + ARM_COMPUTE_ERROR("No inputs provided"); + } + + if(inputs.size() != _num_inputs) + { + ARM_COMPUTE_ERROR("Configured with different number of inputs"); + } + + int i = 0; + for(auto &k : _concat_kernels) + { + const InputTensorMap input = { { TensorType::ACL_SRC, inputs.at(ACL_SRC_VEC + i) } }; + NEScheduler::get().schedule_op(k.get(), Window::DimY, input, outputs); + ++i; + } +} +} // namespace experimental + +struct NEConcatenateLayer::Impl +{ + std::vector srcs{}; + ITensor *dst{ nullptr }; + unsigned int num_inputs{ 0 }; + unsigned int axis{ 0 }; + std::unique_ptr op{ nullptr }; +}; + +NEConcatenateLayer::NEConcatenateLayer() + : _impl(support::cpp14::make_unique()) +{ +} + +NEConcatenateLayer::NEConcatenateLayer(NEConcatenateLayer &&) = default; + +NEConcatenateLayer &NEConcatenateLayer::operator=(NEConcatenateLayer &&) = default; + +NEConcatenateLayer::~NEConcatenateLayer() = default; + +void NEConcatenateLayer::configure(std::vector inputs_vector, ITensor *output, size_t axis) +{ + ARM_COMPUTE_ERROR_ON(output == nullptr); + + _impl->srcs = inputs_vector; + _impl->dst = output; + _impl->axis = axis; + _impl->num_inputs = inputs_vector.size(); + _impl->op = arm_compute::support::cpp14::make_unique(); + + std::vector inputs_vector_info; + for(unsigned int i = 0; i < inputs_vector.size(); ++i) + { + ARM_COMPUTE_ERROR_ON_NULLPTR(inputs_vector.at(i)); + inputs_vector_info.emplace_back(inputs_vector.at(i)->info()); + } + _impl->op->configure(inputs_vector_info, _impl->dst->info(), axis); +} + +Status NEConcatenateLayer::validate(const std::vector &inputs_vector, const ITensorInfo *output, size_t axis) +{ + return experimental::NEConcatenateLayer::validate(inputs_vector, output, axis); +} + void NEConcatenateLayer::run() { - for(auto &kernel : _concat_kernels) + InputTensorMap srcs; + for(unsigned i = 0; i < _impl->num_inputs; ++i) { - NEScheduler::get().schedule(kernel.get(), Window::DimY); + srcs.insert(std::make_pair(TensorType::ACL_SRC_VEC + i, _impl->srcs.at(i))); } + const OutputTensorMap dst{ { TensorType::ACL_DST, _impl->dst } }; + + _impl->op->run(srcs, dst, {}); } } // namespace arm_compute diff --git a/src/runtime/NEON/functions/NELSTMLayer.cpp b/src/runtime/NEON/functions/NELSTMLayer.cpp index f89b3e999c..dca274acd2 100644 --- a/src/runtime/NEON/functions/NELSTMLayer.cpp +++ b/src/runtime/NEON/functions/NELSTMLayer.cpp @@ -347,7 +347,7 @@ void NELSTMLayer::configure(const ITensor *input, _copy_output.configure(output_state_out, output); // Vector for holding the tensors to store in scratch buffer - std::vector scratch_inputs; + std::vector scratch_inputs; if(!lstm_params.has_cifg_opt()) { scratch_inputs.emplace_back(input_gate_out); @@ -579,7 +579,7 @@ Status NELSTMLayer::validate(const ITensorInfo *input, ARM_COMPUTE_RETURN_ON_ERROR(NECopyKernel::validate(output_state_out, output)); // Validate scratch concatenation - std::vector inputs_vector_info_raw; + std::vector inputs_vector_info_raw; if(!lstm_params.has_cifg_opt()) { inputs_vector_info_raw.push_back(&input_gate); diff --git a/src/runtime/NEON/functions/NEPadLayer.cpp b/src/runtime/NEON/functions/NEPadLayer.cpp index da9a425d9b..21c349ba95 100644 --- a/src/runtime/NEON/functions/NEPadLayer.cpp +++ b/src/runtime/NEON/functions/NEPadLayer.cpp @@ -117,7 +117,7 @@ void NEPadLayer::configure_reflect_symmetric_mode(ITensor *input, ITensor *outpu const int32_t end_mask_after = ends_after[i] < 0 ? ~0 : ~(1u << i); // Reflect the input values for the padding before and after the input. - std::vector concat_vector; + std::vector concat_vector; if(_padding[i].first > 0) { if(i < prev->info()->num_dimensions()) diff --git a/tests/validation/NEON/BatchConcatenateLayer.cpp b/tests/validation/NEON/BatchConcatenateLayer.cpp index bd8d0dc606..6eafe82f8a 100644 --- a/tests/validation/NEON/BatchConcatenateLayer.cpp +++ b/tests/validation/NEON/BatchConcatenateLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 Arm Limited. + * Copyright (c) 2019-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -70,7 +70,7 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( inputs_vector_info.emplace_back(std::move(input_info1)); inputs_vector_info.emplace_back(std::move(input_info2)); - std::vector inputs_vector_info_raw; + std::vector inputs_vector_info_raw; inputs_vector_info_raw.reserve(inputs_vector_info.size()); for(auto &input : inputs_vector_info) { diff --git a/tests/validation/NEON/DepthConcatenateLayer.cpp b/tests/validation/NEON/DepthConcatenateLayer.cpp index 2c7b5a61f1..1c69d44a2b 100644 --- a/tests/validation/NEON/DepthConcatenateLayer.cpp +++ b/tests/validation/NEON/DepthConcatenateLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 Arm Limited. + * Copyright (c) 2017-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -67,7 +67,7 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( inputs_vector_info.emplace_back(std::move(input_info1)); inputs_vector_info.emplace_back(std::move(input_info2)); - std::vector inputs_vector_info_raw; + std::vector inputs_vector_info_raw; inputs_vector_info_raw.reserve(inputs_vector_info.size()); for(auto &input : inputs_vector_info) { diff --git a/tests/validation/NEON/HeightConcatenateLayer.cpp b/tests/validation/NEON/HeightConcatenateLayer.cpp index 0449fcd3e0..c46b797315 100644 --- a/tests/validation/NEON/HeightConcatenateLayer.cpp +++ b/tests/validation/NEON/HeightConcatenateLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 Arm Limited. + * Copyright (c) 2019-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -71,7 +71,7 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( inputs_vector_info.emplace_back(std::move(input_info1)); inputs_vector_info.emplace_back(std::move(input_info2)); - std::vector inputs_vector_info_raw; + std::vector inputs_vector_info_raw; inputs_vector_info_raw.reserve(inputs_vector_info.size()); for(auto &input : inputs_vector_info) { diff --git a/tests/validation/NEON/WidthConcatenateLayer.cpp b/tests/validation/NEON/WidthConcatenateLayer.cpp index 4d22976cb7..123a77276a 100644 --- a/tests/validation/NEON/WidthConcatenateLayer.cpp +++ b/tests/validation/NEON/WidthConcatenateLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 Arm Limited. + * Copyright (c) 2018-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -69,7 +69,7 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( inputs_vector_info.emplace_back(std::move(input_info1)); inputs_vector_info.emplace_back(std::move(input_info2)); - std::vector inputs_vector_info_raw; + std::vector inputs_vector_info_raw; inputs_vector_info_raw.reserve(inputs_vector_info.size()); for(auto &input : inputs_vector_info) { diff --git a/tests/validation/fixtures/ConcatenateLayerFixture.h b/tests/validation/fixtures/ConcatenateLayerFixture.h index 9de59c27ba..a56eac1fdf 100644 --- a/tests/validation/fixtures/ConcatenateLayerFixture.h +++ b/tests/validation/fixtures/ConcatenateLayerFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 Arm Limited. + * Copyright (c) 2018-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -46,6 +46,9 @@ namespace validation template class ConcatenateLayerValidationFixture : public framework::Fixture { +private: + using SrcITensorType = typename std::conditional::value, const ITensorType, ITensorType>::type; + public: template void setup(TensorShape shape, DataType data_type, unsigned int axis) @@ -95,8 +98,8 @@ protected: TensorType compute_target(const std::vector &shapes, const std::vector &qinfo, DataType data_type, unsigned int axis) { - std::vector srcs; - std::vector src_ptrs; + std::vector srcs; + std::vector src_ptrs; // Create tensors srcs.reserve(shapes.size()); -- cgit v1.2.1