diff options
Diffstat (limited to 'src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp')
-rw-r--r-- | src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp | 52 |
1 files changed, 30 insertions, 22 deletions
diff --git a/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp b/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp index bb0db2e7a7..2440384e3b 100644 --- a/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp +++ b/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp @@ -24,13 +24,19 @@ #include "arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h" #include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h" -#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/PixelValue.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/core/utils/quantization/AsymmHelpers.h" #include "arm_compute/runtime/CL/CLScheduler.h" +#include "src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h" +#include "src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h" +#include "src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h" +#include "src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h" +#include "src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h" +#include "src/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.h" +#include "src/core/CL/kernels/CLFillBorderKernel.h" +#include "src/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h" #include "support/MemorySupport.h" namespace arm_compute @@ -119,7 +125,7 @@ Status validate_arguments_3x3(const ITensorInfo *input, const ITensorInfo *weigh CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayerGeneric::CLDepthwiseConvolutionLayerGeneric(std::shared_ptr<IMemoryManager> memory_manager) : _memory_group(std::move(memory_manager)), - _dwc_native_kernel(), + _dwc_native_kernel(support::cpp14::make_unique<CLDepthwiseConvolutionLayerNativeKernel>()), _permute_input_to_nhwc(), _permute_weights_to_nhwc(), _permute_output_to_nchw(), @@ -137,6 +143,8 @@ CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayerGeneric::CLDepthwiseConv { } +CLDepthwiseConvolutionLayer::~CLDepthwiseConvolutionLayer() = default; + void CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayerGeneric::configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D &dilation) { @@ -206,9 +214,9 @@ void CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayerGeneric::configure( dwc_weights_info.n0 = (depth_multiplier == 1) ? 8 : 1; DWCKernelInfo dwc_info; dwc_info.activation_info = act_info; - _dwc_native_kernel.configure(compile_context, input_to_use, weights_to_use, biases, output_to_use, - dwc_weights_info, dwc_info, conv_info, depth_multiplier, dilation, - output_multipliers_to_use, output_shifts_to_use); + _dwc_native_kernel->configure(compile_context, input_to_use, weights_to_use, biases, output_to_use, + dwc_weights_info, dwc_info, conv_info, depth_multiplier, dilation, + output_multipliers_to_use, output_shifts_to_use); if(_needs_permute) { @@ -302,7 +310,7 @@ void CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayerGeneric::run() { _permute_input_to_nhwc.run(); } - CLScheduler::get().enqueue(_dwc_native_kernel); + CLScheduler::get().enqueue(*_dwc_native_kernel); if(_needs_permute) { _permute_output_to_nchw.run(); @@ -343,11 +351,11 @@ void CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayerGeneric::prepare() CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayerInternal3x3::CLDepthwiseConvolutionLayerInternal3x3(std::shared_ptr<IMemoryManager> memory_manager) : _memory_group(std::move(memory_manager)), _kernel(nullptr), - _border_handler(), + _border_handler(support::cpp14::make_unique<CLFillBorderKernel>()), _permute_input_to_nchw(), _permute_weights_to_nchw(), _permute_output_to_nhwc(), - _reshape_weights(), + _reshape_weights(support::cpp14::make_unique<CLDepthwiseConvolutionLayerReshapeWeightsKernel>()), _permuted_input(), _permuted_weights(), _permuted_output(), @@ -378,14 +386,14 @@ void CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayerInternal3x3::config // Perform validation step ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output); ARM_COMPUTE_ERROR_THROW_ON(CLDepthwiseConvolutionLayerInternal3x3::validate(input->info(), - weights->info(), - biases != nullptr ? biases->info() : nullptr, - output->info(), - conv_info, - depth_multiplier, - act_info, - gpu_target, - dilation)); + weights->info(), + biases != nullptr ? biases->info() : nullptr, + output->info(), + conv_info, + depth_multiplier, + act_info, + gpu_target, + dilation)); const bool is_nhwc = input->info()->data_layout() == DataLayout::NHWC; _is_quantized = is_data_type_quantized_asymmetric(input->info()->data_type()); @@ -434,7 +442,7 @@ void CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayerInternal3x3::config { if(_needs_weights_reshape) { - _reshape_weights.configure(compile_context, weights, &_permuted_weights, info); + _reshape_weights->configure(compile_context, weights, &_permuted_weights, info); weights_to_use = &_permuted_weights; } _kernel = arm_compute::support::cpp14::make_unique<CLDepthwiseConvolutionLayer3x3NHWCKernel>(); @@ -486,7 +494,7 @@ void CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayerInternal3x3::config { zero_value = PixelValue(static_cast<uint8_t>(input->info()->quantization_info().uniform().offset)); } - _border_handler.configure(compile_context, input_to_use, _kernel->border_size(), BorderMode::CONSTANT, zero_value); + _border_handler->configure(compile_context, input_to_use, _kernel->border_size(), BorderMode::CONSTANT, zero_value); } Status CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayerInternal3x3::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, @@ -505,7 +513,7 @@ void CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayerInternal3x3::run() { _permute_input_to_nchw.run(); } - CLScheduler::get().enqueue(_border_handler); + CLScheduler::get().enqueue(*_border_handler); CLScheduler::get().enqueue(*_kernel); if(_needs_permute) @@ -547,7 +555,7 @@ void CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayerInternal3x3::prepar ARM_COMPUTE_ERROR_ON(_needs_permute); ARM_COMPUTE_ERROR_ON(!_original_weights->is_used()); _permuted_weights.allocator()->allocate(); - CLScheduler::get().enqueue(_reshape_weights); + CLScheduler::get().enqueue(*_reshape_weights); _original_weights->mark_as_unused(); } _is_prepared = true; @@ -567,7 +575,7 @@ void CLDepthwiseConvolutionLayer::configure(ICLTensor *input, const ICLTensor *w void CLDepthwiseConvolutionLayer::configure(const CLCompileContext &compile_context, ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, - unsigned int depth_multiplier, + unsigned int depth_multiplier, ActivationLayerInfo act_info, const Size2D &dilation) { const GPUTarget gpu_target = CLScheduler::get().target(); |