From eff8d95991205e874091576e2d225f63246dd0bb Mon Sep 17 00:00:00 2001 From: Giorgio Arena Date: Mon, 2 Jul 2018 15:29:57 +0100 Subject: COMPMID-1316 Using 8 bit dot product instruction in CLDepthWiseConvolution with QASYMM8 Change-Id: I3fc37bdceaae8b4b1effa51129b71bf352388564 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/138374 Tested-by: Jenkins Reviewed-by: Anthony Barbier Reviewed-by: Gian Marco Iodice --- src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp') diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp index 8bd62c69f7..e091e5c2cb 100644 --- a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp +++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp @@ -54,7 +54,6 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *weights, "For QASYMM8 only logistic, relu, lower bounded relu and lower-upper bounded relu are supported"); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights); ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(0) != 3 || weights->dimension(1) != 3); - ARM_COMPUTE_RETURN_ERROR_ON((input->dimension(2) * depth_multiplier) != output->dimension(2)); ARM_COMPUTE_RETURN_ERROR_ON(conv_info.stride().first < 1 || conv_info.stride().first > 3); const bool is_qasymm = is_data_type_quantized_asymmetric(input->data_type()); @@ -170,9 +169,11 @@ std::pair validate_and_configure_window(ITensorInfo *input, ITen } else { - kernel_name = is_qasymm ? "depthwise_convolution_3x3_quantized_nchw" : "depthwise_convolution_3x3"; + const bool is_dot8_supported = dot8_supported(CLKernelLibrary::get().get_device()); + + kernel_name = is_qasymm ? (std::string("depthwise_convolution_3x3_quantized") + (is_dot8_supported ? "_dot8" : "") + "_nchw") : "depthwise_convolution_3x3"; num_elems_written_per_iteration_x = 8 / data_size_from_type(input->data_type()); - num_elems_written_per_iteration_y = (is_qasymm && conv_stride_y < 3) ? (2 / conv_stride_y) : 1; + num_elems_written_per_iteration_y = (is_qasymm && conv_stride_y == 1) ? 2 : 1; num_elems_read_per_iteration_x = 3 + (num_elems_written_per_iteration_x - 1) * conv_stride_x; num_elems_read_per_iteration_y = num_elems_written_per_iteration_y + 2; } @@ -210,6 +211,7 @@ void CLDepthwiseConvolutionLayer3x3NCHWKernel::configure(const ICLTensor *input, ActivationLayerInfo act_info) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output); + ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), weights->info(), (biases != nullptr) ? biases->info() : nullptr, output->info(), conv_info, depth_multiplier, act_info)); bool is_qasymm = is_data_type_quantized_asymmetric(input->info()->data_type()); -- cgit v1.2.1