From a046e164b96a8441b2fa14ef578f7db46a0e97da Mon Sep 17 00:00:00 2001 From: Michele Di Giorgio Date: Tue, 8 Oct 2019 09:36:26 +0100 Subject: COMPMID-2600: Implement a new and generic depthwise convolution for CL QASYMM8 NHWC The NCHW case is supported at function level by permuting the inputs/outputs to NHWC. This patch also removes CLDirectConvolutionLayerOutputStageKernel which is deprecated and some kernels which were only used in the generic case of depthwise convolution. Change-Id: I91e0f02d0a2f4a4a352e08c248e648944137fe68 Signed-off-by: Michele Di Giorgio Reviewed-on: https://review.mlplatform.org/c/2056 Reviewed-by: Giorgio Arena Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins Reviewed-by: Gian Marco Iodice --- .../direct_convolution_1x1_3x3_5x5_quantized.cl | 78 +--------------------- 1 file changed, 1 insertion(+), 77 deletions(-) (limited to 'src/core/CL/cl_kernels/direct_convolution_1x1_3x3_5x5_quantized.cl') diff --git a/src/core/CL/cl_kernels/direct_convolution_1x1_3x3_5x5_quantized.cl b/src/core/CL/cl_kernels/direct_convolution_1x1_3x3_5x5_quantized.cl index 83da76785b..5ad9afb23c 100644 --- a/src/core/CL/cl_kernels/direct_convolution_1x1_3x3_5x5_quantized.cl +++ b/src/core/CL/cl_kernels/direct_convolution_1x1_3x3_5x5_quantized.cl @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -247,79 +247,3 @@ __kernel void direct_convolution_1x1_3x3_5x5_quantized( vstore8(convert_uchar8_sat(pixels0), 0, (__global uchar *)dst.ptr); } #endif // defined(DATA_TYPE) && defined(STRIDE_X) && defined(WEIGHTS_DEPTH) - -#if defined(VEC_SIZE) - -#define VEC_INT VEC_DATA_TYPE(int, VEC_SIZE) -#define CONVERT_SAT_UCHAR_STR(x, size) (convert_uchar##size##_sat((x))) -#define CONVERT_SAT_UCHAR(x, size) CONVERT_SAT_UCHAR_STR(x, size) - -/** This function computes the output stage of a depthwise convolution. - * - * @param[in] src_ptr Pointer to the source image. Supported data types: QASYMM8 - * @param[in] src_stride_x Stride of the source image in X dimension (in bytes) - * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] src_stride_y Stride of the source image in Y dimension (in bytes) - * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes) - * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source image - * @param[in] src_stride_z Stride of the source tensor in Z dimension (in bytes) - * @param[in] src_step_z src_stride_z * number of elements along Y processed per workitem(in bytes) - * @param[in] dst_ptr Pointer to the destination tensor. Supported data types: QASYMM8 - * @param[in] dst_stride_x Stride of the destination tensor in X dimension (in bytes) - * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] dst_stride_y Stride of the destination tensor in Y dimension (in bytes) - * @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes) - * @param[in] dst_stride_z Stride of the destination tensor in Z dimension (in bytes) - * @param[in] dst_step_z dst_stride_z * number of elements along Y processed per workitem(in bytes) - * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor - * @param[in] bias_ptr (Optional) Pointer to the biases vector. Supported data types: S32 - * @param[in] bias_stride_x (Optional) Stride of the biases vector in X dimension (in bytes) - * @param[in] bias_step_x (Optional) bias_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] bias_offset_first_element_in_bytes (Optional) The offset of the first element in the biases vector - * @param[in] output_offset Quantized offset of zero point of the output tensor data range - * @param[in] output_multiplier Output scale multiplier - * @param[in] output_shift Output scale divisor exponent - */ -__kernel void output_stage_quantized( - TENSOR3D_DECLARATION(src), - TENSOR3D_DECLARATION(dst), -#if defined(HAS_BIAS) - VECTOR_DECLARATION(bias), -#endif //defined(HAS_BIAS) - int output_offset, - int output_multiplier, - int output_shift) -{ - Image src = CONVERT_TENSOR3D_TO_IMAGE_STRUCT(src); - Image dst = CONVERT_TENSOR3D_TO_IMAGE_STRUCT(dst); -#if defined(HAS_BIAS) - Vector bias = CONVERT_TO_VECTOR_STRUCT_NO_STEP(bias); -#endif //defined(HAS_BIAS) - - // Load input - VEC_INT vals = VLOAD(VEC_SIZE)(0, (__global int *)(src.ptr)); - -#if defined(HAS_BIAS) - // Load and add bias -#if defined(NCHW) - int bias_value = *((__global int *)(vector_offset(&bias, get_global_id(2)))); -#else // defined(NCHW) - VEC_INT bias_value = VLOAD(VEC_SIZE)(0, ((__global int *)(vector_offset(&bias, get_global_id(0) * VEC_SIZE)))); -#endif // defined(NCHW) - - vals += (VEC_INT)(bias_value); -#endif //defined(HAS_BIAS) - - vals = ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(vals, output_multiplier, output_shift, VEC_SIZE); - vals = vals + output_offset; - - // Store result in dst - VSTORE(VEC_SIZE) - (CONVERT_SAT_UCHAR(vals, VEC_SIZE), 0, (__global uchar *)dst.ptr); -} - -#undef VEC_INT -#undef CONVERT_SAT_UCHAR_STR -#undef CONVERT_SAT_UCHAR - -#endif // defined(VEC_SIZE) -- cgit v1.2.1