aboutsummaryrefslogtreecommitdiff
path: root/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp
diff options
context:
space:
mode:
authorGiorgio Arena <giorgio.arena@arm.com>2021-04-16 12:41:45 +0100
committerGeorgios Pinitas <georgios.pinitas@arm.com>2021-04-20 13:32:10 +0000
commitdcf4c87cf78a5f1667699c1a3511d09356938660 (patch)
tree28aa191a226e4bf4350d622fcb668abaa17e8677 /src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp
parent562bee584f3633167725af7915f50d07b0597f10 (diff)
downloadComputeLibrary-dcf4c87cf78a5f1667699c1a3511d09356938660.tar.gz
CLDepthwiseConvolutionLayer rework - Part 1
Remove the reshaped variant for CLDepthwiseConvolutionLayer 3x3 NHWC Quantized - Remove kernel selection by GPUTarget - Remove unused quantized support from the NHWC kernel - Remove CLDepthwiseConvolutionLayerReshapeWeightsKernel - Remove OpenCL kernels for reshaped dwc 3x3 quantized and weights reshape - Remove the "_bifrost" suffix in common OpenCL kernel - Remove the ICLDepthwiseConvolutionLayer3x3Kernel common interface Resolve COMPMID-3864, COMPMID-3907 Change-Id: Icfac0fb6c00e214985beb05dad7c0cdbbee7d830 Signed-off-by: Giorgio Arena <giorgio.arena@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5447 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp')
-rw-r--r--src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp53
1 files changed, 24 insertions, 29 deletions
diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp
index 287a965f5b..dda70d2231 100644
--- a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp
+++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp
@@ -114,20 +114,19 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *weights,
}
std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *weights, ITensorInfo *output, const PadStrideInfo &conv_info,
- unsigned int depth_multiplier, GPUTarget gpu_target, std::string &kernel_name, const Size2D dilation)
+ unsigned int depth_multiplier, std::string &kernel_name, const Size2D dilation)
{
// Output auto inizialitation if not yet initialized
const ConvolutionInfo info
{
conv_info, depth_multiplier, ActivationLayerInfo(), dilation
};
- const TensorShape output_shape = compute_depthwise_convolution_shape(*input, *weights, info);
+ const TensorShape output_shape = compute_depthwise_convolution_shape(*input, *weights, info);
auto_init_if_empty(*output, input->clone()->set_tensor_shape(output_shape).set_quantization_info(output->quantization_info()));
const unsigned int conv_stride_x = conv_info.stride().first;
const unsigned int conv_stride_y = conv_info.stride().second;
const bool is_qasymm = is_data_type_quantized_asymmetric(input->data_type());
- const bool is_bifrost = get_arch_from_target(gpu_target) == GPUTarget::BIFROST;
// Configure kernel window
unsigned int num_elems_read_per_iteration_x = 0;
@@ -156,31 +155,28 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen
num_elems_read_per_iteration_x = 3 + (num_elems_written_per_iteration_x - 1) * conv_stride_x;
break;
}
- if(is_bifrost)
+ if(conv_stride_x == 1 && conv_stride_y == 1)
{
- if(conv_stride_x == 1 && conv_stride_y == 1)
- {
- kernel_name = "depthwise_convolution_3x3_stridex1_stridey1_bifrost_f16";
- num_elems_read_per_iteration_x = 8;
- num_elems_written_per_iteration_x = 4;
- num_elems_read_per_iteration_y = 6;
- num_elems_written_per_iteration_y = 4;
- }
- else if(conv_stride_x == 2 && conv_stride_y == 2)
- {
- kernel_name = "depthwise_convolution_3x3_stridex2_stridey2_bifrost_f16";
- num_elems_read_per_iteration_x = 10;
- num_elems_written_per_iteration_x = 4;
- num_elems_read_per_iteration_y = 5;
- num_elems_written_per_iteration_y = 2;
- }
+ kernel_name = "depthwise_convolution_3x3_stridex1_stridey1_f16";
+ num_elems_read_per_iteration_x = 8;
+ num_elems_written_per_iteration_x = 4;
+ num_elems_read_per_iteration_y = 6;
+ num_elems_written_per_iteration_y = 4;
+ }
+ else if(conv_stride_x == 2 && conv_stride_y == 2)
+ {
+ kernel_name = "depthwise_convolution_3x3_stridex2_stridey2_f16";
+ num_elems_read_per_iteration_x = 10;
+ num_elems_written_per_iteration_x = 4;
+ num_elems_read_per_iteration_y = 5;
+ num_elems_written_per_iteration_y = 2;
}
}
- else if(input->data_type() == DataType::F32 && is_bifrost)
+ else if(input->data_type() == DataType::F32)
{
if(conv_stride_x == 1 && conv_stride_y == 1)
{
- kernel_name = "depthwise_convolution_3x3_stridex1_stridey1_bifrost_f32";
+ kernel_name = "depthwise_convolution_3x3_stridex1_stridey1_f32";
num_elems_read_per_iteration_x = 4;
num_elems_read_per_iteration_y = 6;
num_elems_written_per_iteration_x = 2;
@@ -188,7 +184,7 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen
}
else if(conv_stride_x == 2 && conv_stride_y == 2)
{
- kernel_name = "depthwise_convolution_3x3_stridex2_stridey2_bifrost_f32";
+ kernel_name = "depthwise_convolution_3x3_stridex2_stridey2_f32";
num_elems_read_per_iteration_x = 6;
num_elems_read_per_iteration_y = 5;
num_elems_written_per_iteration_x = 2;
@@ -239,7 +235,7 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen
} // namespace
CLDepthwiseConvolutionLayer3x3NCHWKernel::CLDepthwiseConvolutionLayer3x3NCHWKernel()
- : _conv_stride_x(0), _conv_pad_top(0), _conv_pad_left(0)
+ : _border_size(0), _input(), _output(), _weights(), _biases(), _conv_stride_y(1), _output_multipliers(), _output_shifts(), _is_quantized(false), _conv_stride_x(0), _conv_pad_top(0), _conv_pad_left(0)
{
}
@@ -278,10 +274,9 @@ void CLDepthwiseConvolutionLayer3x3NCHWKernel::configure(const CLCompileContext
_is_quantized = is_data_type_quantized_asymmetric(input->info()->data_type());
// Configure kernel window
- std::string kernel_name;
- const GPUTarget gpu_target = get_target();
+ std::string kernel_name;
- auto win_config = validate_and_configure_window(input->info(), weights->info(), output->info(), conv_info, depth_multiplier, gpu_target, kernel_name, dilation);
+ auto win_config = validate_and_configure_window(input->info(), weights->info(), output->info(), conv_info, depth_multiplier, kernel_name, dilation);
ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
ICLKernel::configure_internal(win_config.second);
@@ -372,13 +367,13 @@ void CLDepthwiseConvolutionLayer3x3NCHWKernel::configure(const CLCompileContext
}
Status CLDepthwiseConvolutionLayer3x3NCHWKernel::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output,
- const PadStrideInfo &conv_info, unsigned int depth_multiplier, ActivationLayerInfo act_info, GPUTarget gpu_target,
+ const PadStrideInfo &conv_info, unsigned int depth_multiplier, ActivationLayerInfo act_info,
const Size2D &dilation, const ITensorInfo *output_multipliers, const ITensorInfo *output_shifts)
{
std::string kernel_name;
ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation, output_multipliers, output_shifts));
ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), weights->clone().get(), output->clone().get(),
- conv_info, depth_multiplier, gpu_target, kernel_name, dilation)
+ conv_info, depth_multiplier, kernel_name, dilation)
.first);
return Status{};