diff options
Diffstat (limited to 'src/core/CL/cl_kernels')
-rw-r--r-- | src/core/CL/cl_kernels/concatenate.cl | 14 | ||||
-rw-r--r-- | src/core/CL/cl_kernels/helpers_asymm.h | 21 | ||||
-rw-r--r-- | src/core/CL/cl_kernels/roi_align_layer_quantized.cl | 13 | ||||
-rw-r--r-- | src/core/CL/cl_kernels/scale_quantized.cl | 2 | ||||
-rw-r--r-- | src/core/CL/cl_kernels/warp_helpers_quantized.h | 4 |
5 files changed, 36 insertions, 18 deletions
diff --git a/src/core/CL/cl_kernels/concatenate.cl b/src/core/CL/cl_kernels/concatenate.cl index 5ccf746a4e..3684eb504f 100644 --- a/src/core/CL/cl_kernels/concatenate.cl +++ b/src/core/CL/cl_kernels/concatenate.cl @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -26,15 +26,15 @@ #if defined(OFFSET_IN1) && defined(OFFSET_OUT) && defined(SCALE_IN1) && defined(SCALE_OUT) #define VEC_FLOAT VEC_DATA_TYPE(float, VEC_SIZE) #define VEC_INT VEC_DATA_TYPE(int, VEC_SIZE) -#define VEC_UCHAR VEC_DATA_TYPE(uchar, VEC_SIZE) +#define VEC_QUANT VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) #define CONVERT_RTE(x, type) (convert_##type##_rte((x))) #define CONVERT_DOWN(x, type) CONVERT_RTE(x, type) -inline VEC_UCHAR requantize(VEC_UCHAR input, float in_offset, float out_offset, float in_scale, float out_scale) +inline VEC_QUANT requantize(VEC_QUANT input, float in_offset, float out_offset, float in_scale, float out_scale) { const VEC_FLOAT in_f32 = (CONVERT(input, VEC_FLOAT) - (VEC_FLOAT)((float)in_offset)) * (VEC_FLOAT)((float)in_scale); const VEC_FLOAT out_f32 = in_f32 / ((VEC_FLOAT)(float)out_scale) + ((VEC_FLOAT)((float)out_offset)); - const VEC_UCHAR res_u8 = CONVERT_SAT(CONVERT_DOWN(out_f32, VEC_INT), VEC_UCHAR); - return res_u8; + const VEC_QUANT res_q8 = CONVERT_SAT(CONVERT_DOWN(out_f32, VEC_INT), VEC_QUANT); + return res_q8; } #endif /* defined(OFFSET_IN1) && defined(OFFSET_OUT) && defined(SCALE_IN1) && defined(SCALE_OUT) */ @@ -319,7 +319,7 @@ __kernel void concatenate_width( source_values = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)src.ptr); #if defined(OFFSET_IN1) && defined(OFFSET_OUT) && defined(SCALE_IN1) && defined(SCALE_OUT) - const VEC_UCHAR out = requantize(source_values, OFFSET_IN1, OFFSET_OUT, SCALE_IN1, SCALE_OUT); + const VEC_QUANT out = requantize(source_values, OFFSET_IN1, OFFSET_OUT, SCALE_IN1, SCALE_OUT); VSTORE(VEC_SIZE) (out, 0, (__global DATA_TYPE *)(dst.ptr) + WIDTH_OFFSET); #else /* defined(OFFSET_IN1) && defined(OFFSET_OUT) && defined(SCALE_IN1) && defined(SCALE_OUT) */ @@ -372,7 +372,7 @@ __kernel void concatenate_height( source_values = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)src.ptr); #if defined(OFFSET_IN1) && defined(OFFSET_OUT) && defined(SCALE_IN1) && defined(SCALE_OUT) - const VEC_UCHAR out = requantize(source_values, OFFSET_IN1, OFFSET_OUT, SCALE_IN1, SCALE_OUT); + const VEC_QUANT out = requantize(source_values, OFFSET_IN1, OFFSET_OUT, SCALE_IN1, SCALE_OUT); VSTORE(VEC_SIZE) (out, 0, (__global DATA_TYPE *)(dst.ptr + HEIGHT_OFFSET * dst_stride_y)); #else /* defined(OFFSET_IN1) && defined(OFFSET_OUT) && defined(SCALE_IN1) && defined(SCALE_OUT) */ diff --git a/src/core/CL/cl_kernels/helpers_asymm.h b/src/core/CL/cl_kernels/helpers_asymm.h index 09409dc5e9..5a7c7126dc 100644 --- a/src/core/CL/cl_kernels/helpers_asymm.h +++ b/src/core/CL/cl_kernels/helpers_asymm.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -64,6 +64,19 @@ inline float dequantize_qasymm8(uchar input, float offset, float scale) return ((float)input - offset) * scale; } +/** Dequantize a scalar value from signed 8-bit asymmetric to floating-point + * + * @param[in] input Input value to quantize + * @param[in] offset Quantization offset + * @param[in] scale Quantization scale + * + * @return quantized value + */ +inline float dequantize_qasymm8_signed(char input, float offset, float scale) +{ + return ((float)input - offset) * scale; +} + /** Quantize a vector of values from floating-point * * @param[in] type Output data type. @@ -91,7 +104,7 @@ inline float dequantize_qasymm8(uchar input, float offset, float scale) #define DEQUANTIZE_IMPL(type, size) \ inline VEC_DATA_TYPE(float, size) dequantize_##type##size(VEC_DATA_TYPE(type, size) input, float offset, float scale) \ { \ - return (CONVERT(input, VEC_DATA_TYPE(float, 4)) - offset) * scale; \ + return (CONVERT(input, VEC_DATA_TYPE(float, size)) - offset) * scale; \ } /** Correctly-rounded-to-nearest division by a power-of-two. @@ -384,10 +397,14 @@ inline float dequantize_qasymm8(uchar input, float offset, float scale) #define ASYMM_ROUNDING_HALF_SUM(a, b, size) asymm_rounding_half_sum##size(a, b) #define ASYMM_RESCALE(value, src_integer_bits, dst_integer_bits, size) asymm_rescale##size(value, src_integer_bits, dst_integer_bits) +QUANTIZE_IMPL(uchar, 1) +QUANTIZE_IMPL(char, 1) QUANTIZE_IMPL(uchar, 4) QUANTIZE_IMPL(ushort, 4) QUANTIZE_IMPL(short, 4) +DEQUANTIZE_IMPL(uchar, 1) +DEQUANTIZE_IMPL(char, 1) DEQUANTIZE_IMPL(uchar, 4) DEQUANTIZE_IMPL(ushort, 4) DEQUANTIZE_IMPL(short, 4) diff --git a/src/core/CL/cl_kernels/roi_align_layer_quantized.cl b/src/core/CL/cl_kernels/roi_align_layer_quantized.cl index 030731b7d3..8093623904 100644 --- a/src/core/CL/cl_kernels/roi_align_layer_quantized.cl +++ b/src/core/CL/cl_kernels/roi_align_layer_quantized.cl @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 ARM Limited. + * Copyright (c) 2019-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -86,16 +86,17 @@ inline DATA_TYPE roi_align_1x1(const Tensor3D *input, float region_start_x, const DATA_TYPE data3 = *(__global DATA_TYPE *)tensor3D_offset(input, x_low, y_high, pz); const DATA_TYPE data4 = *(__global DATA_TYPE *)tensor3D_offset(input, x_high, y_high, pz); #endif // defined(NHWC) - const float data1_f32 = dequantize_qasymm8(data1, OFFSET_IN, SCALE_IN); - const float data2_f32 = dequantize_qasymm8(data2, OFFSET_IN, SCALE_IN); - const float data3_f32 = dequantize_qasymm8(data3, OFFSET_IN, SCALE_IN); - const float data4_f32 = dequantize_qasymm8(data4, OFFSET_IN, SCALE_IN); + + const float data1_f32 = DEQUANTIZE(data1, OFFSET_IN, SCALE_IN, DATA_TYPE, 1); + const float data2_f32 = DEQUANTIZE(data2, OFFSET_IN, SCALE_IN, DATA_TYPE, 1); + const float data3_f32 = DEQUANTIZE(data3, OFFSET_IN, SCALE_IN, DATA_TYPE, 1); + const float data4_f32 = DEQUANTIZE(data4, OFFSET_IN, SCALE_IN, DATA_TYPE, 1); sum += w1 * data1_f32 + w2 * data2_f32 + w3 * data3_f32 + w4 * data4_f32; } } const float res_f32 = sum / (grid_size_x * grid_size_y); - return quantize_qasymm8(res_f32, OFFSET_OUT, SCALE_OUT); + return QUANTIZE(res_f32, OFFSET_OUT, SCALE_OUT, DATA_TYPE, 1); } /** Performs a roi align function. diff --git a/src/core/CL/cl_kernels/scale_quantized.cl b/src/core/CL/cl_kernels/scale_quantized.cl index 86dbf608f4..ccbd71e81c 100644 --- a/src/core/CL/cl_kernels/scale_quantized.cl +++ b/src/core/CL/cl_kernels/scale_quantized.cl @@ -165,7 +165,7 @@ __kernel void scale_bilinear_quantized_nhwc( const float fr = ((insf32.s0 * b * b1) + (insf32.s1 * a * b1) + (insf32.s2 * b * a1) + (insf32.s3 * a * a1)); - uchar res = convert_uchar_sat(convert_int_sat_rtp(fr / SCALE) + OFFSET); + DATA_TYPE res = CONVERT_SAT(convert_int_sat_rtp(fr / SCALE) + OFFSET, DATA_TYPE); *((__global DATA_TYPE *)out.ptr) = res; } diff --git a/src/core/CL/cl_kernels/warp_helpers_quantized.h b/src/core/CL/cl_kernels/warp_helpers_quantized.h index 48d6faef73..fc9788f45b 100644 --- a/src/core/CL/cl_kernels/warp_helpers_quantized.h +++ b/src/core/CL/cl_kernels/warp_helpers_quantized.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 ARM Limited. + * Copyright (c) 2018-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -117,7 +117,7 @@ inline const VEC_DATA_TYPE(DATA_TYPE, 4) bilinear_interpolate_with_border_quanti ((inf32.s8 * b.s4 * b.s5) + (inf32.s9 * a.s4 * b.s5) + (inf32.sa * b.s4 * a.s5) + (inf32.sb * a.s4 * a.s5)), ((inf32.sc * b.s6 * b.s7) + (inf32.sd * a.s6 * b.s7) + (inf32.se * b.s6 * a.s7) + (inf32.sf * a.s6 * a.s7))); - const uchar4 res = convert_uchar4_sat(convert_int4_sat_rtp(fr / scale) + offset_qasymm); + const VEC_DATA_TYPE(DATA_TYPE, 4) res = CONVERT_SAT(convert_int4_sat_rtp(fr / scale) + offset_qasymm, VEC_DATA_TYPE(DATA_TYPE, 4)); return res; } |