From 7d61ff041826782d14e67b7f5b7a2864905ff38b Mon Sep 17 00:00:00 2001 From: Michele Di Giorgio Date: Mon, 18 Jan 2021 21:15:59 +0000 Subject: Make all CL Concatenate kernels and functions state-less Resolves COMPMID-3995 Change-Id: I84172bed20924f1d9ae3b4d14d7b321e9494296e Signed-off-by: Michele Di Giorgio Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4887 Tested-by: Arm Jenkins Reviewed-by: Georgios Pinitas --- src/runtime/CL/functions/CLConcatenateLayer.cpp | 243 +---------------------- src/runtime/gpu/cl/IClOperator.h | 37 ++++ src/runtime/gpu/cl/operators/ClConcatenate.cpp | 254 ++++++++++++++++++++++++ src/runtime/gpu/cl/operators/ClConcatenate.h | 86 ++++++++ 4 files changed, 387 insertions(+), 233 deletions(-) create mode 100644 src/runtime/gpu/cl/IClOperator.h create mode 100644 src/runtime/gpu/cl/operators/ClConcatenate.cpp create mode 100644 src/runtime/gpu/cl/operators/ClConcatenate.h (limited to 'src/runtime') diff --git a/src/runtime/CL/functions/CLConcatenateLayer.cpp b/src/runtime/CL/functions/CLConcatenateLayer.cpp index 0c473a79c8..ea96e45bf8 100644 --- a/src/runtime/CL/functions/CLConcatenateLayer.cpp +++ b/src/runtime/CL/functions/CLConcatenateLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -23,242 +23,19 @@ */ #include "arm_compute/runtime/CL/functions/CLConcatenateLayer.h" -#include "arm_compute/core/utils/misc/ShapeCalculator.h" -#include "arm_compute/runtime/CL/CLScheduler.h" -#include "src/core/CL/kernels/CLDepthConcatenateLayerKernel.h" -#include "src/core/CL/kernels/CLHeightConcatenateLayerKernel.h" -#include "src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h" -#include "src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h" -#include "src/core/CL/kernels/CLWidthConcatenateLayerKernel.h" - #include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/Error.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" -#include "src/core/CL/kernels/CLBatchConcatenateLayerKernel.h" -#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/CL/ICLKernel.h" +#include "src/runtime/gpu/cl/operators/ClConcatenate.h" namespace arm_compute { -namespace experimental -{ -CLConcatenation::CLConcatenation() - : _concat_kernels(), - _num_inputs(0), - _axis(Window::DimX) -{ -} - -void CLConcatenation::configure(const CLCompileContext &compile_context, const std::vector &inputs_vector, ITensorInfo *output, size_t axis) -{ - ARM_COMPUTE_ERROR_ON(output == nullptr); - _axis = axis; - _num_inputs = inputs_vector.size(); - - TensorShape output_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(inputs_vector, _axis); - std::vector const_inputs_vector(inputs_vector.size()); - std::transform(inputs_vector.begin(), inputs_vector.end(), const_inputs_vector.begin(), [](ITensorInfo * t) - { - ARM_COMPUTE_ERROR_ON_NULLPTR(t); - return t; - }); - - // Output auto inizialitation if not yet initialized - auto_init_if_empty(*output, output_shape, 1, inputs_vector[0]->data_type()); - ARM_COMPUTE_ERROR_THROW_ON(CLConcatenateLayer::validate(const_inputs_vector, output, axis)); - - unsigned int offset = 0; - switch(_axis) - { - case Window::DimX: - { - switch(_num_inputs) - { - case 2: - { - // Configure WidthConcatenate2Tensors kernel - auto kernel = std::make_unique(); - kernel->configure(compile_context, inputs_vector.at(0), inputs_vector.at(1), output); - _concat_kernels.emplace_back(std::move(kernel)); - break; - } - case 4: - { - // Configure WidthConcatenate4Tensors kernel - auto kernel = std::make_unique(); - kernel->configure(compile_context, inputs_vector.at(0), inputs_vector.at(1), inputs_vector.at(2), inputs_vector.at(3), output); - _concat_kernels.emplace_back(std::move(kernel)); - break; - } - default: - { - // Configure generic case WidthConcatenate kernels - for(unsigned int i = 0; i < _num_inputs; ++i) - { - auto kernel = std::make_unique(); - kernel->configure(compile_context, inputs_vector.at(i), offset, output); - offset += inputs_vector.at(i)->dimension(_axis); - _concat_kernels.emplace_back(std::move(kernel)); - } - break; - } - } - break; - } - case Window::DimY: - { - for(unsigned int i = 0; i < _num_inputs; ++i) - { - auto kernel = std::make_unique(); - kernel->configure(compile_context, inputs_vector.at(i), offset, output); - offset += inputs_vector.at(i)->dimension(_axis); - _concat_kernels.emplace_back(std::move(kernel)); - } - break; - } - case Window::DimZ: - { - for(unsigned int i = 0; i < _num_inputs; ++i) - { - auto kernel = std::make_unique(); - kernel->configure(compile_context, inputs_vector.at(i), offset, output); - offset += inputs_vector.at(i)->dimension(_axis); - _concat_kernels.emplace_back(std::move(kernel)); - } - break; - } - case 3: - { - for(unsigned int i = 0; i < _num_inputs; ++i) - { - auto kernel = std::make_unique(); - kernel->configure(compile_context, inputs_vector.at(i), offset, output); - offset += inputs_vector.at(i)->dimension(_axis); - _concat_kernels.emplace_back(std::move(kernel)); - } - break; - } - default: - ARM_COMPUTE_ERROR("Axis not supported"); - } -} - -Status CLConcatenation::validate(const std::vector &inputs_vector, const ITensorInfo *output, size_t axis) -{ - ARM_COMPUTE_RETURN_ERROR_ON(output == nullptr); - const unsigned int num_inputs = inputs_vector.size(); - - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output); - ARM_COMPUTE_RETURN_ERROR_ON(num_inputs < 2); - - unsigned int offset = 0; - switch(axis) - { - case Window::DimX: - { - switch(num_inputs) - { - case 2: - // Validate WidthConcatenate2Tensors kernels if there are 2 inputs - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(inputs_vector[0], inputs_vector[1]); - ARM_COMPUTE_RETURN_ON_ERROR(CLWidthConcatenate2TensorsKernel::validate(inputs_vector[0], inputs_vector[1], output)); - break; - case 4: - // Validate WidthConcatenate4Tensors kernels if there are 4 inputs - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(inputs_vector[0], inputs_vector[1], inputs_vector[2], inputs_vector[3]); - ARM_COMPUTE_RETURN_ON_ERROR(CLWidthConcatenate4TensorsKernel::validate(inputs_vector[0], inputs_vector[1], inputs_vector[2], inputs_vector[3], output)); - break; - default: - // Validate generic case of WidthConcatenate kernel - for(const auto &input : inputs_vector) - { - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input); - ARM_COMPUTE_RETURN_ON_ERROR(CLWidthConcatenateLayerKernel::validate(input, offset, output)); - offset += input->dimension(axis); - } - break; - } - break; - } - case Window::DimY: - { - for(const auto &input : inputs_vector) - { - ARM_COMPUTE_RETURN_ON_ERROR(CLHeightConcatenateLayerKernel::validate(input, offset, output)); - offset += input->dimension(axis); - } - break; - } - case Window::DimZ: - { - for(const auto &input : inputs_vector) - { - ARM_COMPUTE_RETURN_ON_ERROR(CLDepthConcatenateLayerKernel::validate(input, offset, output)); - offset += input->dimension(axis); - } - break; - } - case 3: - { - for(const auto &input : inputs_vector) - { - ARM_COMPUTE_RETURN_ON_ERROR(CLBatchConcatenateLayerKernel::validate(input, offset, output)); - offset += input->dimension(axis); - } - break; - } - default: - ARM_COMPUTE_ERROR("Axis not supported"); - } - - if(output->total_size() != 0) - { - TensorShape output_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(inputs_vector, axis); - ARM_COMPUTE_RETURN_ERROR_ON(output_shape.total_size() != output->tensor_shape().total_size()); - } - - return Status{}; -} - -void CLConcatenation::run(ITensorPack &tensors) -{ - if(tensors.empty()) - { - ARM_COMPUTE_ERROR("No inputs provided"); - } - - if(static_cast(tensors.size()) - 1 != static_cast(_num_inputs)) - { - ARM_COMPUTE_ERROR("Configured with different number of inputs"); - } - - if(_axis == Window::DimX && (_num_inputs == 2 || _num_inputs == 4)) - { - ARM_COMPUTE_ERROR_ON(_concat_kernels.empty()); - CLScheduler::get().enqueue_op(*_concat_kernels.at(0), tensors, true); - } - else - { - int i = 0; - for(auto &k : _concat_kernels) - { - ITensorPack pack; - pack.add_tensor(TensorType::ACL_SRC, tensors.get_const_tensor(ACL_SRC_VEC + i)); - pack.add_tensor(TensorType::ACL_DST, tensors.get_tensor(ACL_DST)); - CLScheduler::get().enqueue_op(*k, pack, true); - ++i; - } - } -} -} // namespace experimental - struct CLConcatenateLayer::Impl { - std::vector srcs{}; - ICLTensor *dst{ nullptr }; - unsigned int num_inputs{ 0 }; - unsigned int axis{ 0 }; - std::unique_ptr op{ nullptr }; + std::vector srcs{}; + ICLTensor *dst{ nullptr }; + unsigned int num_inputs{ 0 }; + unsigned int axis{ 0 }; + std::unique_ptr op{ nullptr }; }; CLConcatenateLayer::CLConcatenateLayer() @@ -285,7 +62,7 @@ void CLConcatenateLayer::configure(const CLCompileContext &compile_context, std: _impl->dst = output; _impl->axis = axis; _impl->num_inputs = inputs_vector.size(); - _impl->op = std::make_unique(); + _impl->op = std::make_unique(); std::vector inputs_vector_info; for(unsigned int i = 0; i < inputs_vector.size(); ++i) @@ -298,7 +75,7 @@ void CLConcatenateLayer::configure(const CLCompileContext &compile_context, std: Status CLConcatenateLayer::validate(const std::vector &inputs_vector, const ITensorInfo *output, size_t axis) { - return experimental::CLConcatenation::validate(inputs_vector, output, axis); + return opencl::ClConcatenate::validate(inputs_vector, output, axis); } void CLConcatenateLayer::run() diff --git a/src/runtime/gpu/cl/IClOperator.h b/src/runtime/gpu/cl/IClOperator.h new file mode 100644 index 0000000000..049bf05dc1 --- /dev/null +++ b/src/runtime/gpu/cl/IClOperator.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2021 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_ICL_OPERATOR_H +#define ARM_COMPUTE_ICL_OPERATOR_H + +#include "arm_compute/core/ITensorInfo.h" +#include "arm_compute/runtime/CL/ICLOperator.h" + +namespace arm_compute +{ +namespace opencl +{ +using IClOperator = experimental::ICLOperator; +} // namespace opencl +} // namespace arm_compute +#endif /* ARM_COMPUTE_ICL_OPERATOR_H */ diff --git a/src/runtime/gpu/cl/operators/ClConcatenate.cpp b/src/runtime/gpu/cl/operators/ClConcatenate.cpp new file mode 100644 index 0000000000..4385fcfaed --- /dev/null +++ b/src/runtime/gpu/cl/operators/ClConcatenate.cpp @@ -0,0 +1,254 @@ +/* + * Copyright (c) 2021 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "src/runtime/gpu/cl/operators/ClConcatenate.h" + +#include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "arm_compute/runtime/CL/CLScheduler.h" + +#include "src/core/gpu/cl/kernels/ClBatchConcatenateKernel.h" +#include "src/core/gpu/cl/kernels/ClDepthConcatenateKernel.h" +#include "src/core/gpu/cl/kernels/ClHeightConcatenateKernel.h" +#include "src/core/gpu/cl/kernels/ClWidthConcatenate2TensorsKernel.h" +#include "src/core/gpu/cl/kernels/ClWidthConcatenate4TensorsKernel.h" +#include "src/core/gpu/cl/kernels/ClWidthConcatenateKernel.h" + +#include "arm_compute/core/Error.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Types.h" +#include "src/core/helpers/AutoConfiguration.h" + +namespace arm_compute +{ +namespace opencl +{ +ClConcatenate::ClConcatenate() + : _concat_kernels(), + _num_inputs(0), + _axis(Window::DimX) +{ +} + +void ClConcatenate::configure(const CLCompileContext &compile_context, const std::vector &src_vector, ITensorInfo *dst, size_t axis) +{ + ARM_COMPUTE_ERROR_ON(dst == nullptr); + _axis = axis; + _num_inputs = src_vector.size(); + + TensorShape dst_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(src_vector, _axis); + std::vector const_src_vector(src_vector.size()); + std::transform(src_vector.begin(), src_vector.end(), const_src_vector.begin(), [](ITensorInfo * t) + { + ARM_COMPUTE_ERROR_ON_NULLPTR(t); + return t; + }); + + // dst auto inizialitation if not yet initialized + auto_init_if_empty(*dst, dst_shape, 1, src_vector[0]->data_type()); + ARM_COMPUTE_ERROR_THROW_ON(ClConcatenate::validate(const_src_vector, dst, axis)); + + unsigned int offset = 0; + switch(_axis) + { + case Window::DimX: + { + switch(_num_inputs) + { + case 2: + { + // Configure WidthConcatenate2Tensors kernel + auto kernel = std::make_unique(); + kernel->configure(compile_context, src_vector.at(0), src_vector.at(1), dst); + _concat_kernels.emplace_back(std::move(kernel)); + break; + } + case 4: + { + // Configure WidthConcatenate4Tensors kernel + auto kernel = std::make_unique(); + kernel->configure(compile_context, src_vector.at(0), src_vector.at(1), src_vector.at(2), src_vector.at(3), dst); + _concat_kernels.emplace_back(std::move(kernel)); + break; + } + default: + { + // Configure generic case WidthConcatenate kernels + for(unsigned int i = 0; i < _num_inputs; ++i) + { + auto kernel = std::make_unique(); + kernel->configure(compile_context, src_vector.at(i), offset, dst); + offset += src_vector.at(i)->dimension(_axis); + _concat_kernels.emplace_back(std::move(kernel)); + } + break; + } + } + break; + } + case Window::DimY: + { + for(unsigned int i = 0; i < _num_inputs; ++i) + { + auto kernel = std::make_unique(); + kernel->configure(compile_context, src_vector.at(i), offset, dst); + offset += src_vector.at(i)->dimension(_axis); + _concat_kernels.emplace_back(std::move(kernel)); + } + break; + } + case Window::DimZ: + { + for(unsigned int i = 0; i < _num_inputs; ++i) + { + auto kernel = std::make_unique(); + kernel->configure(compile_context, src_vector.at(i), offset, dst); + offset += src_vector.at(i)->dimension(_axis); + _concat_kernels.emplace_back(std::move(kernel)); + } + break; + } + case 3: + { + for(unsigned int i = 0; i < _num_inputs; ++i) + { + auto kernel = std::make_unique(); + kernel->configure(compile_context, src_vector.at(i), offset, dst); + offset += src_vector.at(i)->dimension(_axis); + _concat_kernels.emplace_back(std::move(kernel)); + } + break; + } + default: + ARM_COMPUTE_ERROR("Axis not supported"); + } +} + +Status ClConcatenate::validate(const std::vector &src_vector, const ITensorInfo *dst, size_t axis) +{ + ARM_COMPUTE_RETURN_ERROR_ON(dst == nullptr); + const unsigned int num_inputs = src_vector.size(); + + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(dst); + ARM_COMPUTE_RETURN_ERROR_ON(num_inputs < 2); + + unsigned int offset = 0; + switch(axis) + { + case Window::DimX: + { + switch(num_inputs) + { + case 2: + // Validate WidthConcatenate2Tensors kernels if there are 2 inputs + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src_vector[0], src_vector[1]); + ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClWidthConcatenate2TensorsKernel::validate(src_vector[0], src_vector[1], dst)); + break; + case 4: + // Validate WidthConcatenate4Tensors kernels if there are 4 inputs + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src_vector[0], src_vector[1], src_vector[2], src_vector[3]); + ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClWidthConcatenate4TensorsKernel::validate(src_vector[0], src_vector[1], src_vector[2], src_vector[3], dst)); + break; + default: + // Validate generic case of WidthConcatenate kernel + for(const auto &src : src_vector) + { + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src); + ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClWidthConcatenateKernel::validate(src, offset, dst)); + offset += src->dimension(axis); + } + break; + } + break; + } + case Window::DimY: + { + for(const auto &src : src_vector) + { + ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClHeightConcatenateKernel::validate(src, offset, dst)); + offset += src->dimension(axis); + } + break; + } + case Window::DimZ: + { + for(const auto &src : src_vector) + { + ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClDepthConcatenateKernel::validate(src, offset, dst)); + offset += src->dimension(axis); + } + break; + } + case 3: + { + for(const auto &src : src_vector) + { + ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClBatchConcatenateKernel::validate(src, offset, dst)); + offset += src->dimension(axis); + } + break; + } + default: + ARM_COMPUTE_ERROR("Axis not supported"); + } + + if(dst->total_size() != 0) + { + TensorShape dst_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(src_vector, axis); + ARM_COMPUTE_RETURN_ERROR_ON(dst_shape.total_size() != dst->tensor_shape().total_size()); + } + + return Status{}; +} + +void ClConcatenate::run(ITensorPack &tensors) +{ + if(tensors.empty()) + { + ARM_COMPUTE_ERROR("No inputs provided"); + } + + if(static_cast(tensors.size()) - 1 != static_cast(_num_inputs)) + { + ARM_COMPUTE_ERROR("Configured with different number of inputs"); + } + + if(_axis == Window::DimX && (_num_inputs == 2 || _num_inputs == 4)) + { + ARM_COMPUTE_ERROR_ON(_concat_kernels.empty()); + CLScheduler::get().enqueue_op(*_concat_kernels.at(0), tensors, true); + } + else + { + int i = 0; + for(auto &k : _concat_kernels) + { + ITensorPack pack; + pack.add_tensor(TensorType::ACL_SRC, tensors.get_const_tensor(ACL_SRC_VEC + i)); + pack.add_tensor(TensorType::ACL_DST, tensors.get_tensor(ACL_DST)); + CLScheduler::get().enqueue_op(*k, pack, true); + ++i; + } + } +} +} // namespace opencl +} // namespace arm_compute diff --git a/src/runtime/gpu/cl/operators/ClConcatenate.h b/src/runtime/gpu/cl/operators/ClConcatenate.h new file mode 100644 index 0000000000..112e2ac6b7 --- /dev/null +++ b/src/runtime/gpu/cl/operators/ClConcatenate.h @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2021 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLCONCATENATE_H +#define ARM_COMPUTE_CLCONCATENATE_H + +#include "src/core/gpu/cl/ClCompileContext.h" +#include "src/core/gpu/cl/IClKernel.h" +#include "src/runtime/gpu/cl/IClOperator.h" + +#include + +namespace arm_compute +{ +namespace opencl +{ +/** Basic function to execute concatenate tensors along a given axis. This function calls the following kernels: + * + * -# @ref kernels::ClWidthConcatenateKernel (if underlying concatenation axis is 0). + * -# @ref kernels::ClHeightConcatenateKernel (if underlying concatenation axis is 1). + * -# @ref kernels::ClDepthConcatenateKernel (if underlying concatenation axis is 2). + * -# @ref kernels::ClBatchConcatenateKernel (if underlying concatenation axis is 3). + */ +class ClConcatenate : public IClOperator +{ +public: + /** Default constructor */ + ClConcatenate(); + /** Initialise the kernel's inputs vector and dst. + * + * @note Input and dst tensor dimensions preconditions defer depending on the concatenation axis. + * @note Preconditions can be found respectively at @ref kernels::ClWidthConcatenateKernel, + * @ref kernels::ClHeightConcatenateKernel and @ref kernels::ClDepthConcatenateKernel. + * + * + * @param[in] compile_context The compile context to be used. + * @param[in,out] src_vector The vectors containing all the tensors to concatenate. Data types supported: All + * @param[out] dst Destination tensor. Data types supported: same as @p src_vector. + * @param[in] axis Concatenation axis. Supported underlying concatenation axis are 0, 1, 2 and 3. + */ + void configure(const ClCompileContext &compile_context, const std::vector &src_vector, ITensorInfo *dst, size_t axis); + /** Static function to check if given info will lead to a valid configuration of @ref ClConcatenate + * + * @note Input and dst tensor dimensions preconditions defer depending on the concatenation axis. + * @note Preconditions can be found respectively at @ref kernels::ClWidthConcatenateKernel, + * @ref kernels::ClHeightConcatenateKernel and @ref kernels::ClDepthConcatenateKernel. + * + * @param[in] src_vector The vectors containing all the tensors info to concatenate. Data types supported: All + * @param[in] dst Destination tensor info. Data types supported: same as @p src_vector. + * @param[in] axis Concatenation axis. Supported underlying concatenation axis are 0, 1, 2 and 3. + * + * @return a status + */ + static Status validate(const std::vector &src_vector, const ITensorInfo *dst, size_t axis); + + // Inherited methods overridden: + void run(ITensorPack &tensors) override; + +private: + std::vector> _concat_kernels; + unsigned int _num_inputs; + unsigned int _axis; +}; +} // namespace opencl +} // namespace arm_comPUTE +#endif /* ARM_COMPUTE_CL_CONCATENATE_H */ -- cgit v1.2.1