aboutsummaryrefslogtreecommitdiff
path: root/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp
diff options
context:
space:
mode:
authorGiorgio Arena <giorgio.arena@arm.com>2018-07-02 15:29:57 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:54:54 +0000
commiteff8d95991205e874091576e2d225f63246dd0bb (patch)
treea0dc2ab5544c7dbc68d7e2af3ae72101b8247e6a /src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp
parent74b671bc2da803ef60bcdec62923943960eb3acd (diff)
downloadComputeLibrary-eff8d95991205e874091576e2d225f63246dd0bb.tar.gz
COMPMID-1316 Using 8 bit dot product instruction in CLDepthWiseConvolution with QASYMM8
Change-Id: I3fc37bdceaae8b4b1effa51129b71bf352388564 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/138374 Tested-by: Jenkins <bsgcomp@arm.com> Reviewed-by: Anthony Barbier <anthony.barbier@arm.com> Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Diffstat (limited to 'src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp')
-rw-r--r--src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp8
1 files changed, 5 insertions, 3 deletions
diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp
index 8bd62c69f7..e091e5c2cb 100644
--- a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp
+++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp
@@ -54,7 +54,6 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *weights,
"For QASYMM8 only logistic, relu, lower bounded relu and lower-upper bounded relu are supported");
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights);
ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(0) != 3 || weights->dimension(1) != 3);
- ARM_COMPUTE_RETURN_ERROR_ON((input->dimension(2) * depth_multiplier) != output->dimension(2));
ARM_COMPUTE_RETURN_ERROR_ON(conv_info.stride().first < 1 || conv_info.stride().first > 3);
const bool is_qasymm = is_data_type_quantized_asymmetric(input->data_type());
@@ -170,9 +169,11 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen
}
else
{
- kernel_name = is_qasymm ? "depthwise_convolution_3x3_quantized_nchw" : "depthwise_convolution_3x3";
+ const bool is_dot8_supported = dot8_supported(CLKernelLibrary::get().get_device());
+
+ kernel_name = is_qasymm ? (std::string("depthwise_convolution_3x3_quantized") + (is_dot8_supported ? "_dot8" : "") + "_nchw") : "depthwise_convolution_3x3";
num_elems_written_per_iteration_x = 8 / data_size_from_type(input->data_type());
- num_elems_written_per_iteration_y = (is_qasymm && conv_stride_y < 3) ? (2 / conv_stride_y) : 1;
+ num_elems_written_per_iteration_y = (is_qasymm && conv_stride_y == 1) ? 2 : 1;
num_elems_read_per_iteration_x = 3 + (num_elems_written_per_iteration_x - 1) * conv_stride_x;
num_elems_read_per_iteration_y = num_elems_written_per_iteration_y + 2;
}
@@ -210,6 +211,7 @@ void CLDepthwiseConvolutionLayer3x3NCHWKernel::configure(const ICLTensor *input,
ActivationLayerInfo act_info)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
+ ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), weights->info(), (biases != nullptr) ? biases->info() : nullptr, output->info(), conv_info, depth_multiplier, act_info));
bool is_qasymm = is_data_type_quantized_asymmetric(input->info()->data_type());