aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2018-12-24 13:09:02 +0000
committerAnthony Barbier <Anthony.barbier@arm.com>2018-12-28 13:22:17 +0000
commit3f8aac4474b245b20c07b3a5384577a83f4950a7 (patch)
tree83dba64602f7a51d4df0de6ff1363790fa0a056a
parent189606997e4e46d9f81e8198e2e96b1ab6dea8f2 (diff)
downloadComputeLibrary-3f8aac4474b245b20c07b3a5384577a83f4950a7.tar.gz
COMPMID-1860: Invalid arguments in CLDepthwiseConvolution3x3 for NHWC
-Alters the kernel/function selection process to use validate for selection. -Fixes border kernel input in case of permutation. Change-Id: Ia61df3a0ed661349114dc125f33ad53ee40d9c76 Reviewed-on: https://review.mlplatform.org/443 Reviewed-by: Anthony Barbier <Anthony.barbier@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r--src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp8
-rw-r--r--src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp30
2 files changed, 12 insertions, 26 deletions
diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp
index 1fce14f215..4f3636b081 100644
--- a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp
+++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp
@@ -55,6 +55,8 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *weights,
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights);
ARM_COMPUTE_RETURN_ERROR_ON(depth_multiplier > 1); // COMPMID-1071 Add depth multiplier support for NHWC
ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(1) != 3 || weights->dimension(2) != 3);
+ ARM_COMPUTE_RETURN_ERROR_ON(conv_info.stride().first < 1 || conv_info.stride().first > 2);
+ ARM_COMPUTE_RETURN_ERROR_ON(std::max(conv_info.pad_top(), conv_info.pad_bottom()) > 1);
const bool is_qasymm = is_data_type_quantized_asymmetric(input->data_type());
@@ -155,10 +157,6 @@ void CLDepthwiseConvolutionLayer3x3NHWCKernel::configure(const ICLTensor *input,
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), weights->info(), (biases != nullptr) ? biases->info() : nullptr, output->info(), conv_info, depth_multiplier, act_info));
- const unsigned int conv_stride_x = conv_info.stride().first;
- ARM_COMPUTE_ERROR_ON(conv_stride_x < 1 || conv_stride_x > 2);
- ARM_COMPUTE_ERROR_ON(std::max(conv_info.pad_top(), conv_info.pad_bottom()) > 1);
-
const bool is_qasymm = is_data_type_quantized_asymmetric(input->info()->data_type());
const bool is_stride_1 = ((conv_info.stride().first == conv_info.stride().second) && (conv_info.stride().first == 1));
const bool is_dot8_supported = dot8_supported(CLKernelLibrary::get().get_device());
@@ -243,7 +241,7 @@ void CLDepthwiseConvolutionLayer3x3NHWCKernel::configure(const ICLTensor *input,
}
else
{
- build_opts.add_option("-DCONV_STRIDE_X=" + support::cpp11::to_string(conv_stride_x));
+ build_opts.add_option("-DCONV_STRIDE_X=" + support::cpp11::to_string(conv_info.stride().first));
build_opts.add_option("-DCONV_STRIDE_Y=" + support::cpp11::to_string(_conv_stride_y));
}
build_opts.add_option_if(_input->info()->tensor_shape().total_size_upper(3) > 1,
diff --git a/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp b/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp
index c2782aaa89..be13f500ea 100644
--- a/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp
@@ -95,7 +95,7 @@ void CLDepthwiseConvolutionLayer3x3::configure(ICLTensor *input, const ICLTensor
if(_needs_permute)
{
// Configure the function to transform the convoluted output to ACL's native ordering format NCHW
- _permuted_output.info()->set_data_layout(DataLayout::NHWC);
+ _permuted_output.info()->set_data_layout(DataLayout::NCHW);
_permute_output_to_nhwc.configure(&_permuted_output, output, PermutationVector(2U, 0U, 1U));
// Allocate tensors
@@ -109,7 +109,7 @@ void CLDepthwiseConvolutionLayer3x3::configure(ICLTensor *input, const ICLTensor
{
zero_value = PixelValue(static_cast<uint8_t>(input->info()->quantization_info().offset));
}
- _border_handler.configure(input, _kernel->border_size(), BorderMode::CONSTANT, zero_value);
+ _border_handler.configure(input_to_use, _kernel->border_size(), BorderMode::CONSTANT, zero_value);
}
Status CLDepthwiseConvolutionLayer3x3::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
@@ -187,19 +187,6 @@ void CLDepthwiseConvolutionLayer3x3::prepare()
}
}
-namespace
-{
-inline bool can_run_optimised_3x3_kernel(const ITensorInfo *weights, unsigned int depth_multiplier)
-{
- const DataLayout data_layout = weights->data_layout();
- const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
- const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
- const Size2D weights_size(weights->dimension(idx_w), weights->dimension(idx_h));
- return weights_size == Size2D(3, 3) && (data_layout == DataLayout::NHWC && depth_multiplier <= 1);
-}
-
-} // namespace
-
CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayer()
: _im2col_kernel(), _weights_reshape_kernel(), _v2mm_kernel(), _vector_to_tensor_kernel(), _output_stage_kernel(), _activationlayer_function(), _v2mm_input_fill_border(), _v2mm_weights_fill_border(),
_input_reshaped(), _weights_reshaped(), _v2mm_output(), _output_reshaped(), _is_prepared(false), _is_quantized(false), _is_activationlayer_enabled(false), _original_weights(nullptr),
@@ -214,7 +201,12 @@ void CLDepthwiseConvolutionLayer::configure(ICLTensor *input, const ICLTensor *w
ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights);
ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, output);
- if(can_run_optimised_3x3_kernel(weights->info(), depth_multiplier))
+ const Status can_run_optimised_3x3_kernel = CLDepthwiseConvolutionLayer3x3::validate(input->info(),
+ weights->info(),
+ biases != nullptr ? biases->info() : nullptr,
+ output->info(),
+ conv_info, depth_multiplier, act_info);
+ if(bool(can_run_optimised_3x3_kernel))
{
auto f = arm_compute::support::cpp14::make_unique<CLDepthwiseConvolutionLayer3x3>();
f->configure(input, weights, biases, output, conv_info, depth_multiplier, act_info);
@@ -323,11 +315,7 @@ void CLDepthwiseConvolutionLayer::configure(ICLTensor *input, const ICLTensor *w
Status CLDepthwiseConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
unsigned int depth_multiplier, const ActivationLayerInfo &act_info)
{
- if(can_run_optimised_3x3_kernel(weights, depth_multiplier))
- {
- ARM_COMPUTE_RETURN_ON_ERROR(CLDepthwiseConvolutionLayer3x3::validate(input, weights, biases, output, conv_info, depth_multiplier, act_info));
- }
- else
+ if(!bool(CLDepthwiseConvolutionLayer3x3::validate(input, weights, biases, output, conv_info, depth_multiplier, act_info)))
{
const size_t idx_w = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::WIDTH);
const size_t idx_h = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::HEIGHT);