aboutsummaryrefslogtreecommitdiff
path: root/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp')
-rw-r--r--src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp30
1 files changed, 26 insertions, 4 deletions
diff --git a/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp b/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp
index 451ccc4956..f01b58a8b3 100644
--- a/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp
@@ -142,9 +142,10 @@ Status CLDepthwiseConvolutionLayer3x3::validate(const ITensorInfo *input, const
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
ARM_COMPUTE_RETURN_ERROR_ON(input->data_layout() == DataLayout::UNKNOWN);
+ const bool is_quantized = is_data_type_quantized_asymmetric(input->data_type());
const bool is_nhwc = input->data_layout() == DataLayout::NHWC;
const bool needs_permute = is_nhwc && (depth_multiplier > 1);
- const bool needs_weights_reshape = is_nhwc && (depth_multiplier == 1);
+ const bool needs_weights_reshape = is_nhwc && (depth_multiplier == 1) && is_quantized;
const bool is_stride_1 = ((conv_info.stride().first == conv_info.stride().second) && (conv_info.stride().first == 1));
const bool is_stride_1_dilation_1 = (is_stride_1 && dilation.x() == 1 && dilation.y() == 1);
const bool is_dot8_supported = dot8_supported(CLKernelLibrary::get().get_device());
@@ -152,6 +153,17 @@ Status CLDepthwiseConvolutionLayer3x3::validate(const ITensorInfo *input, const
info.c0 = 4;
info.transpose = is_stride_1_dilation_1 && is_dot8_supported;
+ if(is_quantized)
+ {
+ const UniformQuantizationInfo iq_info = input->quantization_info().uniform();
+ const UniformQuantizationInfo wq_info = weights->quantization_info().uniform();
+ const UniformQuantizationInfo oq_info = (output->total_size() == 0) ? iq_info : output->quantization_info().uniform();
+
+ const float multiplier = iq_info.scale * wq_info.scale / oq_info.scale;
+ ARM_COMPUTE_UNUSED(multiplier);
+ ARM_COMPUTE_RETURN_ERROR_ON(multiplier > 1.0f);
+ }
+
if(needs_permute)
{
TensorShape permuted_input_shape = input->tensor_shape();
@@ -177,7 +189,10 @@ Status CLDepthwiseConvolutionLayer3x3::validate(const ITensorInfo *input, const
ARM_COMPUTE_RETURN_ON_ERROR(CLDepthwiseConvolutionLayer3x3NHWCKernel::validate(input, &weights->clone()->set_tensor_shape(reshaped_weights_shape), biases, output, conv_info, depth_multiplier,
act_info, dilation));
}
- ARM_COMPUTE_RETURN_ON_ERROR(CLDepthwiseConvolutionLayer3x3NHWCKernel::validate(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation));
+ else
+ {
+ ARM_COMPUTE_RETURN_ON_ERROR(CLDepthwiseConvolutionLayer3x3NHWCKernel::validate(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation));
+ }
}
else
{
@@ -373,7 +388,7 @@ Status CLDepthwiseConvolutionLayer::validate(const ITensorInfo *input, const ITe
const bool can_run_optimised_3x3_kernel = (weights->dimension(idx_w) == 3) && (weights->dimension(idx_h) == 3);
- if(can_run_optimised_3x3_kernel)
+ if(!can_run_optimised_3x3_kernel)
{
const size_t idx_c = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::CHANNEL);
@@ -415,6 +430,13 @@ Status CLDepthwiseConvolutionLayer::validate(const ITensorInfo *input, const ITe
if(is_quantized)
{
+ const UniformQuantizationInfo iq_info = input->quantization_info().uniform();
+ const UniformQuantizationInfo wq_info = weights->quantization_info().uniform();
+ const UniformQuantizationInfo oq_info = (output->total_size() == 0) ? iq_info : output->quantization_info().uniform();
+
+ const float multiplier = iq_info.scale * wq_info.scale / oq_info.scale;
+ ARM_COMPUTE_UNUSED(multiplier);
+ ARM_COMPUTE_RETURN_ERROR_ON(multiplier > 1.0f);
ARM_COMPUTE_RETURN_ON_ERROR(CLDirectConvolutionLayerOutputStageKernel::validate(&output_reshaped, biases, output));
}
@@ -426,7 +448,7 @@ Status CLDepthwiseConvolutionLayer::validate(const ITensorInfo *input, const ITe
}
else
{
- CLDepthwiseConvolutionLayer3x3::validate(input, weights, biases, output, conv_info, depth_multiplier, act_info, GPUTarget::MIDGARD, dilation);
+ ARM_COMPUTE_RETURN_ON_ERROR(CLDepthwiseConvolutionLayer3x3::validate(input, weights, biases, output, conv_info, depth_multiplier, act_info, GPUTarget::MIDGARD, dilation));
}
return Status{};
}