aboutsummaryrefslogtreecommitdiff
path: root/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp')
-rw-r--r--src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp52
1 files changed, 30 insertions, 22 deletions
diff --git a/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp b/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp
index bb0db2e7a7..2440384e3b 100644
--- a/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp
@@ -24,13 +24,19 @@
#include "arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h"
-#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/PixelValue.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h"
+#include "src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h"
+#include "src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h"
+#include "src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h"
+#include "src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h"
+#include "src/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h"
#include "support/MemorySupport.h"
namespace arm_compute
@@ -119,7 +125,7 @@ Status validate_arguments_3x3(const ITensorInfo *input, const ITensorInfo *weigh
CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayerGeneric::CLDepthwiseConvolutionLayerGeneric(std::shared_ptr<IMemoryManager> memory_manager)
: _memory_group(std::move(memory_manager)),
- _dwc_native_kernel(),
+ _dwc_native_kernel(support::cpp14::make_unique<CLDepthwiseConvolutionLayerNativeKernel>()),
_permute_input_to_nhwc(),
_permute_weights_to_nhwc(),
_permute_output_to_nchw(),
@@ -137,6 +143,8 @@ CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayerGeneric::CLDepthwiseConv
{
}
+CLDepthwiseConvolutionLayer::~CLDepthwiseConvolutionLayer() = default;
+
void CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayerGeneric::configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D &dilation)
{
@@ -206,9 +214,9 @@ void CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayerGeneric::configure(
dwc_weights_info.n0 = (depth_multiplier == 1) ? 8 : 1;
DWCKernelInfo dwc_info;
dwc_info.activation_info = act_info;
- _dwc_native_kernel.configure(compile_context, input_to_use, weights_to_use, biases, output_to_use,
- dwc_weights_info, dwc_info, conv_info, depth_multiplier, dilation,
- output_multipliers_to_use, output_shifts_to_use);
+ _dwc_native_kernel->configure(compile_context, input_to_use, weights_to_use, biases, output_to_use,
+ dwc_weights_info, dwc_info, conv_info, depth_multiplier, dilation,
+ output_multipliers_to_use, output_shifts_to_use);
if(_needs_permute)
{
@@ -302,7 +310,7 @@ void CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayerGeneric::run()
{
_permute_input_to_nhwc.run();
}
- CLScheduler::get().enqueue(_dwc_native_kernel);
+ CLScheduler::get().enqueue(*_dwc_native_kernel);
if(_needs_permute)
{
_permute_output_to_nchw.run();
@@ -343,11 +351,11 @@ void CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayerGeneric::prepare()
CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayerInternal3x3::CLDepthwiseConvolutionLayerInternal3x3(std::shared_ptr<IMemoryManager> memory_manager)
: _memory_group(std::move(memory_manager)),
_kernel(nullptr),
- _border_handler(),
+ _border_handler(support::cpp14::make_unique<CLFillBorderKernel>()),
_permute_input_to_nchw(),
_permute_weights_to_nchw(),
_permute_output_to_nhwc(),
- _reshape_weights(),
+ _reshape_weights(support::cpp14::make_unique<CLDepthwiseConvolutionLayerReshapeWeightsKernel>()),
_permuted_input(),
_permuted_weights(),
_permuted_output(),
@@ -378,14 +386,14 @@ void CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayerInternal3x3::config
// Perform validation step
ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
ARM_COMPUTE_ERROR_THROW_ON(CLDepthwiseConvolutionLayerInternal3x3::validate(input->info(),
- weights->info(),
- biases != nullptr ? biases->info() : nullptr,
- output->info(),
- conv_info,
- depth_multiplier,
- act_info,
- gpu_target,
- dilation));
+ weights->info(),
+ biases != nullptr ? biases->info() : nullptr,
+ output->info(),
+ conv_info,
+ depth_multiplier,
+ act_info,
+ gpu_target,
+ dilation));
const bool is_nhwc = input->info()->data_layout() == DataLayout::NHWC;
_is_quantized = is_data_type_quantized_asymmetric(input->info()->data_type());
@@ -434,7 +442,7 @@ void CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayerInternal3x3::config
{
if(_needs_weights_reshape)
{
- _reshape_weights.configure(compile_context, weights, &_permuted_weights, info);
+ _reshape_weights->configure(compile_context, weights, &_permuted_weights, info);
weights_to_use = &_permuted_weights;
}
_kernel = arm_compute::support::cpp14::make_unique<CLDepthwiseConvolutionLayer3x3NHWCKernel>();
@@ -486,7 +494,7 @@ void CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayerInternal3x3::config
{
zero_value = PixelValue(static_cast<uint8_t>(input->info()->quantization_info().uniform().offset));
}
- _border_handler.configure(compile_context, input_to_use, _kernel->border_size(), BorderMode::CONSTANT, zero_value);
+ _border_handler->configure(compile_context, input_to_use, _kernel->border_size(), BorderMode::CONSTANT, zero_value);
}
Status CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayerInternal3x3::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output,
@@ -505,7 +513,7 @@ void CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayerInternal3x3::run()
{
_permute_input_to_nchw.run();
}
- CLScheduler::get().enqueue(_border_handler);
+ CLScheduler::get().enqueue(*_border_handler);
CLScheduler::get().enqueue(*_kernel);
if(_needs_permute)
@@ -547,7 +555,7 @@ void CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayerInternal3x3::prepar
ARM_COMPUTE_ERROR_ON(_needs_permute);
ARM_COMPUTE_ERROR_ON(!_original_weights->is_used());
_permuted_weights.allocator()->allocate();
- CLScheduler::get().enqueue(_reshape_weights);
+ CLScheduler::get().enqueue(*_reshape_weights);
_original_weights->mark_as_unused();
}
_is_prepared = true;
@@ -567,7 +575,7 @@ void CLDepthwiseConvolutionLayer::configure(ICLTensor *input, const ICLTensor *w
void CLDepthwiseConvolutionLayer::configure(const CLCompileContext &compile_context, ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output,
const PadStrideInfo &conv_info,
- unsigned int depth_multiplier,
+ unsigned int depth_multiplier,
ActivationLayerInfo act_info, const Size2D &dilation)
{
const GPUTarget gpu_target = CLScheduler::get().target();