diff options
Diffstat (limited to 'src/runtime/CL/functions/CLConvolutionLayer.cpp')
-rw-r--r-- | src/runtime/CL/functions/CLConvolutionLayer.cpp | 102 |
1 files changed, 70 insertions, 32 deletions
diff --git a/src/runtime/CL/functions/CLConvolutionLayer.cpp b/src/runtime/CL/functions/CLConvolutionLayer.cpp index f3c05adb47..7767b45a01 100644 --- a/src/runtime/CL/functions/CLConvolutionLayer.cpp +++ b/src/runtime/CL/functions/CLConvolutionLayer.cpp @@ -28,11 +28,11 @@ #include "arm_compute/core/KernelDescriptors.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/CL/functions/CLFFTConvolutionLayer.h" + +#include "src/common/utils/Log.h" #include "src/core/CL/ICLKernel.h" #include "src/core/helpers/MemoryHelpers.h" #include "src/gpu/cl/operators/ClConv2d.h" - -#include "src/common/utils/Log.h" #include "support/Cast.h" namespace arm_compute @@ -43,41 +43,59 @@ struct CLConvolutionLayer::Impl { MemoryGroup memory_group{}; std::shared_ptr<IMemoryManager> memory_manager{}; - std::unique_ptr<opencl::IClOperator> op{ nullptr }; + std::unique_ptr<opencl::IClOperator> op{nullptr}; ITensorPack run_pack{}; ITensorPack prep_pack{}; WorkspaceData<CLTensor> workspace{}; experimental::MemoryRequirements aux_mem_req{}; - std::unique_ptr<IFunction> func{ nullptr }; + std::unique_ptr<IFunction> func{nullptr}; }; -CLConvolutionLayer::CLConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager) - : _impl(std::make_unique<Impl>()) +CLConvolutionLayer::CLConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager) : _impl(std::make_unique<Impl>()) { _impl->memory_manager = std::move(memory_manager); } CLConvolutionLayer::~CLConvolutionLayer() = default; -void CLConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info, - const Size2D &dilation, const ActivationLayerInfo &act_info, bool enable_fast_math, unsigned int num_groups) +void CLConvolutionLayer::configure(ICLTensor *input, + const ICLTensor *weights, + const ICLTensor *biases, + ICLTensor *output, + const PadStrideInfo &conv_info, + const WeightsInfo &weights_info, + const Size2D &dilation, + const ActivationLayerInfo &act_info, + bool enable_fast_math, + unsigned int num_groups) { - configure(CLKernelLibrary::get().get_compile_context(), input, weights, biases, output, conv_info, weights_info, dilation, act_info, enable_fast_math, num_groups); + configure(CLKernelLibrary::get().get_compile_context(), input, weights, biases, output, conv_info, weights_info, + dilation, act_info, enable_fast_math, num_groups); } -void CLConvolutionLayer::configure(const CLCompileContext &compile_context, ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, - const WeightsInfo &weights_info, - const Size2D &dilation, const ActivationLayerInfo &act_info, bool enable_fast_math, unsigned int num_groups) +void CLConvolutionLayer::configure(const CLCompileContext &compile_context, + ICLTensor *input, + const ICLTensor *weights, + const ICLTensor *biases, + ICLTensor *output, + const PadStrideInfo &conv_info, + const WeightsInfo &weights_info, + const Size2D &dilation, + const ActivationLayerInfo &act_info, + bool enable_fast_math, + unsigned int num_groups) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output); - ARM_COMPUTE_ERROR_THROW_ON(CLConvolutionLayer::validate(input->info(), weights->info(), ((biases != nullptr) ? biases->info() : nullptr), output->info(), conv_info, weights_info, dilation, act_info, - enable_fast_math, num_groups)); - ARM_COMPUTE_LOG_PARAMS(input, weights, biases, output, conv_info, weights_info, dilation, act_info, enable_fast_math, num_groups); + ARM_COMPUTE_ERROR_THROW_ON(CLConvolutionLayer::validate( + input->info(), weights->info(), ((biases != nullptr) ? biases->info() : nullptr), output->info(), conv_info, + weights_info, dilation, act_info, enable_fast_math, num_groups)); + ARM_COMPUTE_LOG_PARAMS(input, weights, biases, output, conv_info, weights_info, dilation, act_info, + enable_fast_math, num_groups); const Conv2dInfo conv2d_info = Conv2dInfo(conv_info, dilation, act_info, enable_fast_math, num_groups); - switch(opencl::ClConv2d::get_convolution_method(input->info(), weights->info(), output->info(), conv2d_info, - weights_info, CLScheduler::get().target())) + switch (opencl::ClConv2d::get_convolution_method(input->info(), weights->info(), output->info(), conv2d_info, + weights_info, CLScheduler::get().target())) { case ConvolutionMethod::WINOGRAD: case ConvolutionMethod::DIRECT: @@ -85,7 +103,8 @@ void CLConvolutionLayer::configure(const CLCompileContext &compile_context, ICLT case ConvolutionMethod::GEMM: { auto f = std::make_unique<opencl::ClConv2d>(); - f->configure(compile_context, input->info(), weights->info(), ((biases != nullptr) ? biases->info() : nullptr), output->info(), conv2d_info, weights_info); + f->configure(compile_context, input->info(), weights->info(), + ((biases != nullptr) ? biases->info() : nullptr), output->info(), conv2d_info, weights_info); _impl->op = std::move(f); break; } @@ -101,40 +120,52 @@ void CLConvolutionLayer::configure(const CLCompileContext &compile_context, ICLT break; } - if(_impl->op) + if (_impl->op) { _impl->memory_group = MemoryGroup(std::move(_impl->memory_manager)); _impl->aux_mem_req = _impl->op->workspace(); - _impl->run_pack = { { ACL_SRC_0, input }, { ACL_SRC_1, weights }, { ACL_SRC_2, biases }, { ACL_DST, output } }; - _impl->prep_pack = { { ACL_SRC_1, weights }, { ACL_SRC_2, biases } }; - _impl->workspace = manage_workspace<CLTensor>(_impl->aux_mem_req, _impl->memory_group, _impl->run_pack, _impl->prep_pack); + _impl->run_pack = {{ACL_SRC_0, input}, {ACL_SRC_1, weights}, {ACL_SRC_2, biases}, {ACL_DST, output}}; + _impl->prep_pack = {{ACL_SRC_1, weights}, {ACL_SRC_2, biases}}; + _impl->workspace = + manage_workspace<CLTensor>(_impl->aux_mem_req, _impl->memory_group, _impl->run_pack, _impl->prep_pack); } } -Status CLConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, - const WeightsInfo &weights_info, const Size2D &dilation, const ActivationLayerInfo &act_info, bool enable_fast_math, unsigned int num_groups) +Status CLConvolutionLayer::validate(const ITensorInfo *input, + const ITensorInfo *weights, + const ITensorInfo *biases, + const ITensorInfo *output, + const PadStrideInfo &conv_info, + const WeightsInfo &weights_info, + const Size2D &dilation, + const ActivationLayerInfo &act_info, + bool enable_fast_math, + unsigned int num_groups) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output); ARM_COMPUTE_RETURN_ERROR_ON_MSG(!weights->are_values_constant(), "Dynamic weights are not supported"); - ARM_COMPUTE_RETURN_ERROR_ON_MSG((num_groups != 1) && (input->data_layout() != DataLayout::NCHW), "Grouping (num_groups != 1) with NHWC data layout is not supported"); + ARM_COMPUTE_RETURN_ERROR_ON_MSG((num_groups != 1) && (input->data_layout() != DataLayout::NCHW), + "Grouping (num_groups != 1) with NHWC data layout is not supported"); const GPUTarget gpu_target = CLScheduler::get().target(); const Conv2dInfo conv2d_info = Conv2dInfo(conv_info, dilation, act_info, enable_fast_math, num_groups); - switch(opencl::ClConv2d::get_convolution_method(input, weights, output, conv2d_info, weights_info, gpu_target)) + switch (opencl::ClConv2d::get_convolution_method(input, weights, output, conv2d_info, weights_info, gpu_target)) { case ConvolutionMethod::WINOGRAD: case ConvolutionMethod::DIRECT: case ConvolutionMethod::INDIRECT: case ConvolutionMethod::GEMM: { - ARM_COMPUTE_RETURN_ON_ERROR(opencl::ClConv2d::validate(input, weights, biases, output, conv2d_info, weights_info)); + ARM_COMPUTE_RETURN_ON_ERROR( + opencl::ClConv2d::validate(input, weights, biases, output, conv2d_info, weights_info)); break; } case ConvolutionMethod::FFT: { // Validate FFT-based convolution layer - ARM_COMPUTE_RETURN_ON_ERROR(CLFFTConvolutionLayer::validate(input, weights, nullptr, output, conv_info, act_info, enable_fast_math)); + ARM_COMPUTE_RETURN_ON_ERROR(CLFFTConvolutionLayer::validate(input, weights, nullptr, output, conv_info, + act_info, enable_fast_math)); break; } default: @@ -145,8 +176,15 @@ Status CLConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo return Status{}; } -ConvolutionMethod CLConvolutionLayer::get_convolution_method(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *output, const PadStrideInfo &conv_info, - const WeightsInfo &weights_info, const ActivationLayerInfo &act_info, const GPUTarget gpu_target, const Size2D &dilation, bool enable_fast_math) +ConvolutionMethod CLConvolutionLayer::get_convolution_method(const ITensorInfo *input, + const ITensorInfo *weights, + const ITensorInfo *output, + const PadStrideInfo &conv_info, + const WeightsInfo &weights_info, + const ActivationLayerInfo &act_info, + const GPUTarget gpu_target, + const Size2D &dilation, + bool enable_fast_math) { const Conv2dInfo conv2d_info = Conv2dInfo(conv_info, dilation, act_info, enable_fast_math, 1); return opencl::ClConv2d::get_convolution_method(input, weights, output, conv2d_info, weights_info, gpu_target); @@ -158,7 +196,7 @@ void CLConvolutionLayer::run() MemoryGroupResourceScope scope_mg(_impl->memory_group); - if(_impl->func) + if (_impl->func) { _impl->func->run(); } @@ -170,7 +208,7 @@ void CLConvolutionLayer::run() void CLConvolutionLayer::prepare() { - if(_impl->func) + if (_impl->func) { _impl->func->prepare(); } |