diff options
Diffstat (limited to 'src/runtime/CL/functions/CLConcatenateLayer.cpp')
-rw-r--r-- | src/runtime/CL/functions/CLConcatenateLayer.cpp | 249 |
1 files changed, 44 insertions, 205 deletions
diff --git a/src/runtime/CL/functions/CLConcatenateLayer.cpp b/src/runtime/CL/functions/CLConcatenateLayer.cpp index e97256713f..9df1c34593 100644 --- a/src/runtime/CL/functions/CLConcatenateLayer.cpp +++ b/src/runtime/CL/functions/CLConcatenateLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 ARM Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -23,238 +23,77 @@ */ #include "arm_compute/runtime/CL/functions/CLConcatenateLayer.h" -#include "arm_compute/core/CL/kernels/CLBatchConcatenateLayerKernel.h" -#include "arm_compute/core/CL/kernels/CLDepthConcatenateLayerKernel.h" -#include "arm_compute/core/CL/kernels/CLHeightConcatenateLayerKernel.h" -#include "arm_compute/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h" -#include "arm_compute/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h" -#include "arm_compute/core/CL/kernels/CLWidthConcatenateLayerKernel.h" -#include "arm_compute/core/utils/misc/ShapeCalculator.h" -#include "arm_compute/runtime/CL/CLScheduler.h" - #include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/Error.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" -#include "support/MemorySupport.h" + +#include "src/common/utils/Log.h" +#include "src/core/CL/ICLKernel.h" +#include "src/gpu/cl/operators/ClConcatenate.h" namespace arm_compute { -CLConcatenateLayer::CLConcatenateLayer() - : _concat_kernels(), - _num_inputs(0), - _axis(Window::DimX) +struct CLConcatenateLayer::Impl { -} - -void CLConcatenateLayer::configure(std::vector<ICLTensor *> &inputs_vector, ICLTensor *output, size_t axis) + std::vector<const ICLTensor *> srcs{}; + ICLTensor *dst{nullptr}; + unsigned int num_inputs{0}; + unsigned int axis{0}; + std::unique_ptr<opencl::ClConcatenate> op{nullptr}; +}; + +CLConcatenateLayer::CLConcatenateLayer() : _impl(std::make_unique<Impl>()) { - configure(CLKernelLibrary::get().get_compile_context(), inputs_vector, output, axis); } -void CLConcatenateLayer::configure(const CLCompileContext &compile_context, std::vector<ICLTensor *> &inputs_vector, ICLTensor *output, size_t axis) -{ - configure_internal(compile_context, std::move(inputs_vector), output, axis); -} +CLConcatenateLayer::CLConcatenateLayer(CLConcatenateLayer &&) = default; -void CLConcatenateLayer::configure(std::vector<const ICLTensor *> &inputs_vector, ICLTensor *output, size_t axis) -{ - configure(CLKernelLibrary::get().get_compile_context(), inputs_vector, output, axis); -} +CLConcatenateLayer &CLConcatenateLayer::operator=(CLConcatenateLayer &&) = default; -void CLConcatenateLayer::configure(const CLCompileContext &compile_context, std::vector<const ICLTensor *> &inputs_vector, ICLTensor *output, size_t axis) -{ - configure_internal(compile_context, std::move(inputs_vector), output, axis); -} +CLConcatenateLayer::~CLConcatenateLayer() = default; -Status CLConcatenateLayer::validate(const std::vector<ITensorInfo *> &inputs_vector, const ITensorInfo *output, size_t axis) -{ - return validate_internal(inputs_vector, output, axis); -} - -Status CLConcatenateLayer::validate(const std::vector<const ITensorInfo *> &inputs_vector, const ITensorInfo *output, size_t axis) +void CLConcatenateLayer::configure(std::vector<const ICLTensor *> &inputs_vector, ICLTensor *output, size_t axis) { - return validate_internal(inputs_vector, output, axis); + configure(CLKernelLibrary::get().get_compile_context(), inputs_vector, output, axis); } -template <typename TensorType> -void CLConcatenateLayer::configure_internal(const CLCompileContext &compile_context, std::vector<TensorType *> &&inputs_vector, ICLTensor *output, size_t axis) +void CLConcatenateLayer::configure(const CLCompileContext &compile_context, + std::vector<const ICLTensor *> &inputs_vector, + ICLTensor *output, + size_t axis) { ARM_COMPUTE_ERROR_ON(output == nullptr); - _axis = axis; - _num_inputs = inputs_vector.size(); + ARM_COMPUTE_LOG_PARAMS(inputs_vector, output, axis); - std::vector<ITensorInfo *> inputs_vector_info(inputs_vector.size()); - std::transform(inputs_vector.begin(), inputs_vector.end(), inputs_vector_info.begin(), [](TensorType * t) - { - ARM_COMPUTE_ERROR_ON_NULLPTR(t); - return t->info(); - }); - TensorShape output_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(inputs_vector, _axis); + _impl->srcs = inputs_vector; + _impl->dst = output; + _impl->axis = axis; + _impl->num_inputs = inputs_vector.size(); + _impl->op = std::make_unique<opencl::ClConcatenate>(); - // Output auto inizialitation if not yet initialized - auto_init_if_empty(*output->info(), output_shape, 1, inputs_vector[0]->info()->data_type()); - ARM_COMPUTE_ERROR_THROW_ON(CLConcatenateLayer::validate(inputs_vector_info, output->info(), axis)); - - unsigned int offset = 0; - switch(_axis) + std::vector<ITensorInfo *> inputs_vector_info; + for (unsigned int i = 0; i < inputs_vector.size(); ++i) { - case Window::DimX: - { - switch(_num_inputs) - { - case 2: - { - // Configure WidthConcatenate2Tensors kernel - auto kernel = support::cpp14::make_unique<CLWidthConcatenate2TensorsKernel>(); - kernel->configure(compile_context, inputs_vector.at(0), inputs_vector.at(1), output); - _concat_kernels.emplace_back(std::move(kernel)); - break; - } - case 4: - { - // Configure WidthConcatenate4Tensors kernel - auto kernel = support::cpp14::make_unique<CLWidthConcatenate4TensorsKernel>(); - kernel->configure(compile_context, inputs_vector.at(0), inputs_vector.at(1), inputs_vector.at(2), inputs_vector.at(3), output); - _concat_kernels.emplace_back(std::move(kernel)); - break; - } - default: - { - // Configure generic case WidthConcatenate kernels - for(unsigned int i = 0; i < _num_inputs; ++i) - { - auto kernel = support::cpp14::make_unique<CLWidthConcatenateLayerKernel>(); - kernel->configure(compile_context, inputs_vector.at(i), offset, output); - offset += inputs_vector.at(i)->info()->dimension(_axis); - _concat_kernels.emplace_back(std::move(kernel)); - } - break; - } - } - break; - } - case Window::DimY: - { - for(unsigned int i = 0; i < _num_inputs; ++i) - { - auto kernel = support::cpp14::make_unique<CLHeightConcatenateLayerKernel>(); - kernel->configure(compile_context, inputs_vector.at(i), offset, output); - offset += inputs_vector.at(i)->info()->dimension(_axis); - _concat_kernels.emplace_back(std::move(kernel)); - } - break; - } - case Window::DimZ: - { - for(unsigned int i = 0; i < _num_inputs; ++i) - { - auto kernel = support::cpp14::make_unique<CLDepthConcatenateLayerKernel>(); - kernel->configure(compile_context, inputs_vector.at(i), offset, output); - offset += inputs_vector.at(i)->info()->dimension(_axis); - _concat_kernels.emplace_back(std::move(kernel)); - } - break; - } - case 3: - { - for(unsigned int i = 0; i < _num_inputs; ++i) - { - auto kernel = support::cpp14::make_unique<CLBatchConcatenateLayerKernel>(); - kernel->configure(compile_context, inputs_vector.at(i), offset, output); - offset += inputs_vector.at(i)->info()->dimension(_axis); - _concat_kernels.emplace_back(std::move(kernel)); - } - break; - } - default: - ARM_COMPUTE_ERROR("Axis not supported"); + ARM_COMPUTE_ERROR_ON_NULLPTR(inputs_vector.at(i)); + inputs_vector_info.emplace_back(inputs_vector.at(i)->info()); } + _impl->op->configure(compile_context, inputs_vector_info, _impl->dst->info(), axis); } -template <typename TensorInfoType> -Status CLConcatenateLayer::validate_internal(const std::vector<TensorInfoType *> &inputs_vector, const ITensorInfo *output, size_t axis) +Status CLConcatenateLayer::validate(const std::vector<const ITensorInfo *> &inputs_vector, + const ITensorInfo *output, + size_t axis) { - ARM_COMPUTE_RETURN_ERROR_ON(output == nullptr); - const unsigned int num_inputs = inputs_vector.size(); - - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output); - ARM_COMPUTE_RETURN_ERROR_ON(num_inputs < 2); - - unsigned int offset = 0; - switch(axis) - { - case Window::DimX: - { - switch(num_inputs) - { - case 2: - // Validate WidthConcatenate2Tensors kernels if there are 2 inputs - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(inputs_vector[0], inputs_vector[1]); - ARM_COMPUTE_RETURN_ON_ERROR(CLWidthConcatenate2TensorsKernel::validate(inputs_vector[0], inputs_vector[1], output)); - break; - case 4: - // Validate WidthConcatenate4Tensors kernels if there are 4 inputs - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(inputs_vector[0], inputs_vector[1], inputs_vector[2], inputs_vector[3]); - ARM_COMPUTE_RETURN_ON_ERROR(CLWidthConcatenate4TensorsKernel::validate(inputs_vector[0], inputs_vector[1], inputs_vector[2], inputs_vector[3], output)); - break; - default: - // Validate generic case of WidthConcatenate kernel - for(const auto &input : inputs_vector) - { - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input); - ARM_COMPUTE_RETURN_ON_ERROR(CLWidthConcatenateLayerKernel::validate(input, offset, output)); - offset += input->dimension(axis); - } - break; - } - break; - } - case Window::DimY: - { - for(const auto &input : inputs_vector) - { - ARM_COMPUTE_RETURN_ON_ERROR(CLHeightConcatenateLayerKernel::validate(input, offset, output)); - offset += input->dimension(axis); - } - break; - } - case Window::DimZ: - { - for(const auto &input : inputs_vector) - { - ARM_COMPUTE_RETURN_ON_ERROR(CLDepthConcatenateLayerKernel::validate(input, offset, output)); - offset += input->dimension(axis); - } - break; - } - case 3: - { - for(const auto &input : inputs_vector) - { - ARM_COMPUTE_RETURN_ON_ERROR(CLBatchConcatenateLayerKernel::validate(input, offset, output)); - offset += input->dimension(axis); - } - break; - } - default: - ARM_COMPUTE_ERROR("Axis not supported"); - } - - if(output->total_size() != 0) - { - TensorShape output_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(inputs_vector, axis); - ARM_COMPUTE_RETURN_ERROR_ON(output_shape.total_size() != output->tensor_shape().total_size()); - } - - return Status{}; + return opencl::ClConcatenate::validate(inputs_vector, output, axis); } void CLConcatenateLayer::run() { - for(auto &kernel : _concat_kernels) + ITensorPack pack; + for (unsigned i = 0; i < _impl->num_inputs; ++i) { - CLScheduler::get().enqueue(*kernel, true); + pack.add_tensor(TensorType::ACL_SRC_VEC + i, _impl->srcs.at(i)); } + pack.add_tensor(TensorType::ACL_DST, _impl->dst); + + _impl->op->run(pack); } } // namespace arm_compute |