diff options
author | Georgios Pinitas <georgios.pinitas@arm.com> | 2018-02-02 12:52:07 +0000 |
---|---|---|
committer | Anthony Barbier <anthony.barbier@arm.com> | 2018-11-02 16:47:18 +0000 |
commit | de5a1cc7e5c929b19fb1d3ed7d0d8783b9ac6860 (patch) | |
tree | a1787371cdaf4976d8781bd453551a85d2fc274a /src/core/CL/cl_kernels/depthwise_convolution_quantized.cl | |
parent | f36ac355e050a4714a951d04a72896e02cf5e2a1 (diff) | |
download | ComputeLibrary-de5a1cc7e5c929b19fb1d3ed7d0d8783b9ac6860.tar.gz |
COMPMID-856: CL Depthwise Convolution QASYMM8 support
Change-Id: Ic6097e7cf160e8b829fb521b7b99d9a57d9799d3
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/118774
Tested-by: Jenkins <bsgcomp@arm.com>
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Diffstat (limited to 'src/core/CL/cl_kernels/depthwise_convolution_quantized.cl')
-rw-r--r-- | src/core/CL/cl_kernels/depthwise_convolution_quantized.cl | 61 |
1 files changed, 60 insertions, 1 deletions
diff --git a/src/core/CL/cl_kernels/depthwise_convolution_quantized.cl b/src/core/CL/cl_kernels/depthwise_convolution_quantized.cl index 450342ddfc..cd7f4f83d5 100644 --- a/src/core/CL/cl_kernels/depthwise_convolution_quantized.cl +++ b/src/core/CL/cl_kernels/depthwise_convolution_quantized.cl @@ -254,5 +254,64 @@ __kernel void depthwise_convolution_3x3_quantized( vstore8(pixels, 0, dst.ptr); } - #endif //defined(CONV_STRIDE_X) + +/** This function computes the output stage of a depthwise convolution. + * + * @param[in] src_ptr Pointer to the source image. Supported data types: QASYMM8 + * @param[in] src_stride_x Stride of the source image in X dimension (in bytes) + * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] src_stride_y Stride of the source image in Y dimension (in bytes) + * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source image + * @param[in] src_stride_z Stride of the source tensor in Z dimension (in bytes) + * @param[in] src_step_z src_stride_z * number of elements along Y processed per workitem(in bytes) + * @param[in] dst_ptr Pointer to the destination tensor. Supported data types: QASYMM8 + * @param[in] dst_stride_x Stride of the destination tensor in X dimension (in bytes) + * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] dst_stride_y Stride of the destination tensor in Y dimension (in bytes) + * @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] dst_stride_z Stride of the destination tensor in Z dimension (in bytes) + * @param[in] dst_step_z dst_stride_z * number of elements along Y processed per workitem(in bytes) + * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor + * @param[in] bias_ptr (Optional) Pointer to the biases vector. Supported data types: S32 + * @param[in] bias_stride_x (Optional) Stride of the biases vector in X dimension (in bytes) + * @param[in] bias_step_x (Optional) bias_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] bias_offset_first_element_in_bytes (Optional) The offset of the first element in the biases vector + * @param[in] output_offset Quantized offset of zero point of the output tensor data range + * @param[in] output_multiplier Output scale multiplier + * @param[in] output_shift Output scale divisor exponent + */ + +__kernel void output_stage_quantized( + TENSOR3D_DECLARATION(src), + TENSOR3D_DECLARATION(dst), +#if defined(HAS_BIAS) + VECTOR_DECLARATION(bias), +#endif //defined(HAS_BIAS) + int output_offset, + int output_multiplier, + int output_shift) +{ + Image src = CONVERT_TENSOR3D_TO_IMAGE_STRUCT(src); + Image dst = CONVERT_TENSOR3D_TO_IMAGE_STRUCT(dst); +#if defined(HAS_BIAS) + Vector bias = CONVERT_TO_VECTOR_STRUCT_NO_STEP(bias); +#endif //defined(HAS_BIAS) + + // Load input + int16 vals = vload16(0, (__global int *)(src.ptr)); + +#if defined(HAS_BIAS) + // Load and add bias + int bias_value = *((__global int *)(vector_offset(&bias, get_global_id(2)))); + vals += (int16)(bias_value); +#endif //defined(HAS_BIAS) + + vals = ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(vals, output_multiplier, output_shift, 16); + vals = vals + output_offset; + vals = clamp(vals, 0, 255); + + // Store result in dst + vstore16(convert_uchar16(vals), 0, (__global uchar *)dst.ptr); +}
\ No newline at end of file |