COMPMID-1316 Using 8 bit dot product instruction in CLDepthWiseConvolution with QASYMM8

Change-Id: I3fc37bdceaae8b4b1effa51129b71bf352388564 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/138374 Tested-by: Jenkins <bsgcomp@arm.com> Reviewed-by: Anthony Barbier <anthony.barbier@arm.com> Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
author: Giorgio Arena <giorgio.arena@arm.com> 2018-07-02 15:29:57 +0100
committer: Anthony Barbier <anthony.barbier@arm.com> 2018-11-02 16:54:54 +0000
commit: eff8d95991205e874091576e2d225f63246dd0bb (patch)
tree: a0dc2ab5544c7dbc68d7e2af3ae72101b8247e6a /src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp
parent: 74b671bc2da803ef60bcdec62923943960eb3acd (diff)
download: ComputeLibrary-eff8d95991205e874091576e2d225f63246dd0bb.tar.gz
1 files changed, 5 insertions, 3 deletions
diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp
index 8bd62c69f7..e091e5c2cb 100644
--- a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp
+++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp
@@ -54,7 +54,6 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *weights,
                                     "For QASYMM8 only logistic, relu, lower bounded relu and lower-upper bounded relu are supported");
     ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights);
     ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(0) != 3 || weights->dimension(1) != 3);
-    ARM_COMPUTE_RETURN_ERROR_ON((input->dimension(2) * depth_multiplier) != output->dimension(2));
     ARM_COMPUTE_RETURN_ERROR_ON(conv_info.stride().first < 1 || conv_info.stride().first > 3);
 
     const bool is_qasymm = is_data_type_quantized_asymmetric(input->data_type());
@@ -170,9 +169,11 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen
     }
     else
     {
-        kernel_name                       = is_qasymm ? "depthwise_convolution_3x3_quantized_nchw" : "depthwise_convolution_3x3";
+        const bool is_dot8_supported = dot8_supported(CLKernelLibrary::get().get_device());
+
+        kernel_name                       = is_qasymm ? (std::string("depthwise_convolution_3x3_quantized") + (is_dot8_supported ? "_dot8" : "") + "_nchw") : "depthwise_convolution_3x3";
         num_elems_written_per_iteration_x = 8 / data_size_from_type(input->data_type());
-        num_elems_written_per_iteration_y = (is_qasymm && conv_stride_y < 3) ? (2 / conv_stride_y) : 1;
+        num_elems_written_per_iteration_y = (is_qasymm && conv_stride_y == 1) ? 2 : 1;
         num_elems_read_per_iteration_x    = 3 + (num_elems_written_per_iteration_x - 1) * conv_stride_x;
         num_elems_read_per_iteration_y    = num_elems_written_per_iteration_y + 2;
     }
@@ -210,6 +211,7 @@ void CLDepthwiseConvolutionLayer3x3NCHWKernel::configure(const ICLTensor *input,
                                                          ActivationLayerInfo act_info)
 {
     ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
+    ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), weights->info(), (biases != nullptr) ? biases->info() : nullptr, output->info(), conv_info, depth_multiplier, act_info));
 
     bool is_qasymm = is_data_type_quantized_asymmetric(input->info()->data_type());
author	Giorgio Arena <giorgio.arena@arm.com>	2018-07-02 15:29:57 +0100
committer	Anthony Barbier <anthony.barbier@arm.com>	2018-11-02 16:54:54 +0000
commit	eff8d95991205e874091576e2d225f63246dd0bb (patch)
tree	a0dc2ab5544c7dbc68d7e2af3ae72101b8247e6a /src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp
parent	74b671bc2da803ef60bcdec62923943960eb3acd (diff)
download	ComputeLibrary-eff8d95991205e874091576e2d225f63246dd0bb.tar.gz