diff options
author | Michele Di Giorgio <michele.digiorgio@arm.com> | 2021-01-18 21:15:59 +0000 |
---|---|---|
committer | Georgios Pinitas <georgios.pinitas@arm.com> | 2021-01-20 16:28:27 +0000 |
commit | 7d61ff041826782d14e67b7f5b7a2864905ff38b (patch) | |
tree | 2e69c8a5fdabc6717b0691acdbbe7374d856902f /src/runtime/CL/functions | |
parent | da6a6eb3bc06ce8869ae3290853970d4c0ce412e (diff) | |
download | ComputeLibrary-7d61ff041826782d14e67b7f5b7a2864905ff38b.tar.gz |
Make all CL Concatenate kernels and functions state-less
Resolves COMPMID-3995
Change-Id: I84172bed20924f1d9ae3b4d14d7b321e9494296e
Signed-off-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4887
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Diffstat (limited to 'src/runtime/CL/functions')
-rw-r--r-- | src/runtime/CL/functions/CLConcatenateLayer.cpp | 243 |
1 files changed, 10 insertions, 233 deletions
diff --git a/src/runtime/CL/functions/CLConcatenateLayer.cpp b/src/runtime/CL/functions/CLConcatenateLayer.cpp index 0c473a79c8..ea96e45bf8 100644 --- a/src/runtime/CL/functions/CLConcatenateLayer.cpp +++ b/src/runtime/CL/functions/CLConcatenateLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -23,242 +23,19 @@ */ #include "arm_compute/runtime/CL/functions/CLConcatenateLayer.h" -#include "arm_compute/core/utils/misc/ShapeCalculator.h" -#include "arm_compute/runtime/CL/CLScheduler.h" -#include "src/core/CL/kernels/CLDepthConcatenateLayerKernel.h" -#include "src/core/CL/kernels/CLHeightConcatenateLayerKernel.h" -#include "src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h" -#include "src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h" -#include "src/core/CL/kernels/CLWidthConcatenateLayerKernel.h" - #include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/Error.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" -#include "src/core/CL/kernels/CLBatchConcatenateLayerKernel.h" -#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/CL/ICLKernel.h" +#include "src/runtime/gpu/cl/operators/ClConcatenate.h" namespace arm_compute { -namespace experimental -{ -CLConcatenation::CLConcatenation() - : _concat_kernels(), - _num_inputs(0), - _axis(Window::DimX) -{ -} - -void CLConcatenation::configure(const CLCompileContext &compile_context, const std::vector<ITensorInfo *> &inputs_vector, ITensorInfo *output, size_t axis) -{ - ARM_COMPUTE_ERROR_ON(output == nullptr); - _axis = axis; - _num_inputs = inputs_vector.size(); - - TensorShape output_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(inputs_vector, _axis); - std::vector<const ITensorInfo *> const_inputs_vector(inputs_vector.size()); - std::transform(inputs_vector.begin(), inputs_vector.end(), const_inputs_vector.begin(), [](ITensorInfo * t) - { - ARM_COMPUTE_ERROR_ON_NULLPTR(t); - return t; - }); - - // Output auto inizialitation if not yet initialized - auto_init_if_empty(*output, output_shape, 1, inputs_vector[0]->data_type()); - ARM_COMPUTE_ERROR_THROW_ON(CLConcatenateLayer::validate(const_inputs_vector, output, axis)); - - unsigned int offset = 0; - switch(_axis) - { - case Window::DimX: - { - switch(_num_inputs) - { - case 2: - { - // Configure WidthConcatenate2Tensors kernel - auto kernel = std::make_unique<CLWidthConcatenate2TensorsKernel>(); - kernel->configure(compile_context, inputs_vector.at(0), inputs_vector.at(1), output); - _concat_kernels.emplace_back(std::move(kernel)); - break; - } - case 4: - { - // Configure WidthConcatenate4Tensors kernel - auto kernel = std::make_unique<CLWidthConcatenate4TensorsKernel>(); - kernel->configure(compile_context, inputs_vector.at(0), inputs_vector.at(1), inputs_vector.at(2), inputs_vector.at(3), output); - _concat_kernels.emplace_back(std::move(kernel)); - break; - } - default: - { - // Configure generic case WidthConcatenate kernels - for(unsigned int i = 0; i < _num_inputs; ++i) - { - auto kernel = std::make_unique<CLWidthConcatenateLayerKernel>(); - kernel->configure(compile_context, inputs_vector.at(i), offset, output); - offset += inputs_vector.at(i)->dimension(_axis); - _concat_kernels.emplace_back(std::move(kernel)); - } - break; - } - } - break; - } - case Window::DimY: - { - for(unsigned int i = 0; i < _num_inputs; ++i) - { - auto kernel = std::make_unique<CLHeightConcatenateLayerKernel>(); - kernel->configure(compile_context, inputs_vector.at(i), offset, output); - offset += inputs_vector.at(i)->dimension(_axis); - _concat_kernels.emplace_back(std::move(kernel)); - } - break; - } - case Window::DimZ: - { - for(unsigned int i = 0; i < _num_inputs; ++i) - { - auto kernel = std::make_unique<CLDepthConcatenateLayerKernel>(); - kernel->configure(compile_context, inputs_vector.at(i), offset, output); - offset += inputs_vector.at(i)->dimension(_axis); - _concat_kernels.emplace_back(std::move(kernel)); - } - break; - } - case 3: - { - for(unsigned int i = 0; i < _num_inputs; ++i) - { - auto kernel = std::make_unique<CLBatchConcatenateLayerKernel>(); - kernel->configure(compile_context, inputs_vector.at(i), offset, output); - offset += inputs_vector.at(i)->dimension(_axis); - _concat_kernels.emplace_back(std::move(kernel)); - } - break; - } - default: - ARM_COMPUTE_ERROR("Axis not supported"); - } -} - -Status CLConcatenation::validate(const std::vector<const ITensorInfo *> &inputs_vector, const ITensorInfo *output, size_t axis) -{ - ARM_COMPUTE_RETURN_ERROR_ON(output == nullptr); - const unsigned int num_inputs = inputs_vector.size(); - - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output); - ARM_COMPUTE_RETURN_ERROR_ON(num_inputs < 2); - - unsigned int offset = 0; - switch(axis) - { - case Window::DimX: - { - switch(num_inputs) - { - case 2: - // Validate WidthConcatenate2Tensors kernels if there are 2 inputs - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(inputs_vector[0], inputs_vector[1]); - ARM_COMPUTE_RETURN_ON_ERROR(CLWidthConcatenate2TensorsKernel::validate(inputs_vector[0], inputs_vector[1], output)); - break; - case 4: - // Validate WidthConcatenate4Tensors kernels if there are 4 inputs - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(inputs_vector[0], inputs_vector[1], inputs_vector[2], inputs_vector[3]); - ARM_COMPUTE_RETURN_ON_ERROR(CLWidthConcatenate4TensorsKernel::validate(inputs_vector[0], inputs_vector[1], inputs_vector[2], inputs_vector[3], output)); - break; - default: - // Validate generic case of WidthConcatenate kernel - for(const auto &input : inputs_vector) - { - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input); - ARM_COMPUTE_RETURN_ON_ERROR(CLWidthConcatenateLayerKernel::validate(input, offset, output)); - offset += input->dimension(axis); - } - break; - } - break; - } - case Window::DimY: - { - for(const auto &input : inputs_vector) - { - ARM_COMPUTE_RETURN_ON_ERROR(CLHeightConcatenateLayerKernel::validate(input, offset, output)); - offset += input->dimension(axis); - } - break; - } - case Window::DimZ: - { - for(const auto &input : inputs_vector) - { - ARM_COMPUTE_RETURN_ON_ERROR(CLDepthConcatenateLayerKernel::validate(input, offset, output)); - offset += input->dimension(axis); - } - break; - } - case 3: - { - for(const auto &input : inputs_vector) - { - ARM_COMPUTE_RETURN_ON_ERROR(CLBatchConcatenateLayerKernel::validate(input, offset, output)); - offset += input->dimension(axis); - } - break; - } - default: - ARM_COMPUTE_ERROR("Axis not supported"); - } - - if(output->total_size() != 0) - { - TensorShape output_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(inputs_vector, axis); - ARM_COMPUTE_RETURN_ERROR_ON(output_shape.total_size() != output->tensor_shape().total_size()); - } - - return Status{}; -} - -void CLConcatenation::run(ITensorPack &tensors) -{ - if(tensors.empty()) - { - ARM_COMPUTE_ERROR("No inputs provided"); - } - - if(static_cast<int>(tensors.size()) - 1 != static_cast<int>(_num_inputs)) - { - ARM_COMPUTE_ERROR("Configured with different number of inputs"); - } - - if(_axis == Window::DimX && (_num_inputs == 2 || _num_inputs == 4)) - { - ARM_COMPUTE_ERROR_ON(_concat_kernels.empty()); - CLScheduler::get().enqueue_op(*_concat_kernels.at(0), tensors, true); - } - else - { - int i = 0; - for(auto &k : _concat_kernels) - { - ITensorPack pack; - pack.add_tensor(TensorType::ACL_SRC, tensors.get_const_tensor(ACL_SRC_VEC + i)); - pack.add_tensor(TensorType::ACL_DST, tensors.get_tensor(ACL_DST)); - CLScheduler::get().enqueue_op(*k, pack, true); - ++i; - } - } -} -} // namespace experimental - struct CLConcatenateLayer::Impl { - std::vector<const ICLTensor *> srcs{}; - ICLTensor *dst{ nullptr }; - unsigned int num_inputs{ 0 }; - unsigned int axis{ 0 }; - std::unique_ptr<experimental::CLConcatenation> op{ nullptr }; + std::vector<const ICLTensor *> srcs{}; + ICLTensor *dst{ nullptr }; + unsigned int num_inputs{ 0 }; + unsigned int axis{ 0 }; + std::unique_ptr<opencl::ClConcatenate> op{ nullptr }; }; CLConcatenateLayer::CLConcatenateLayer() @@ -285,7 +62,7 @@ void CLConcatenateLayer::configure(const CLCompileContext &compile_context, std: _impl->dst = output; _impl->axis = axis; _impl->num_inputs = inputs_vector.size(); - _impl->op = std::make_unique<experimental::CLConcatenation>(); + _impl->op = std::make_unique<opencl::ClConcatenate>(); std::vector<ITensorInfo *> inputs_vector_info; for(unsigned int i = 0; i < inputs_vector.size(); ++i) @@ -298,7 +75,7 @@ void CLConcatenateLayer::configure(const CLCompileContext &compile_context, std: Status CLConcatenateLayer::validate(const std::vector<const ITensorInfo *> &inputs_vector, const ITensorInfo *output, size_t axis) { - return experimental::CLConcatenation::validate(inputs_vector, output, axis); + return opencl::ClConcatenate::validate(inputs_vector, output, axis); } void CLConcatenateLayer::run() |