aboutsummaryrefslogtreecommitdiff
path: root/src/core/CL/cl_kernels
diff options
context:
space:
mode:
Diffstat (limited to 'src/core/CL/cl_kernels')
-rw-r--r--src/core/CL/cl_kernels/concatenate.cl14
-rw-r--r--src/core/CL/cl_kernels/helpers_asymm.h21
-rw-r--r--src/core/CL/cl_kernels/roi_align_layer_quantized.cl13
-rw-r--r--src/core/CL/cl_kernels/scale_quantized.cl2
-rw-r--r--src/core/CL/cl_kernels/warp_helpers_quantized.h4
5 files changed, 36 insertions, 18 deletions
diff --git a/src/core/CL/cl_kernels/concatenate.cl b/src/core/CL/cl_kernels/concatenate.cl
index 5ccf746a4e..3684eb504f 100644
--- a/src/core/CL/cl_kernels/concatenate.cl
+++ b/src/core/CL/cl_kernels/concatenate.cl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -26,15 +26,15 @@
#if defined(OFFSET_IN1) && defined(OFFSET_OUT) && defined(SCALE_IN1) && defined(SCALE_OUT)
#define VEC_FLOAT VEC_DATA_TYPE(float, VEC_SIZE)
#define VEC_INT VEC_DATA_TYPE(int, VEC_SIZE)
-#define VEC_UCHAR VEC_DATA_TYPE(uchar, VEC_SIZE)
+#define VEC_QUANT VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)
#define CONVERT_RTE(x, type) (convert_##type##_rte((x)))
#define CONVERT_DOWN(x, type) CONVERT_RTE(x, type)
-inline VEC_UCHAR requantize(VEC_UCHAR input, float in_offset, float out_offset, float in_scale, float out_scale)
+inline VEC_QUANT requantize(VEC_QUANT input, float in_offset, float out_offset, float in_scale, float out_scale)
{
const VEC_FLOAT in_f32 = (CONVERT(input, VEC_FLOAT) - (VEC_FLOAT)((float)in_offset)) * (VEC_FLOAT)((float)in_scale);
const VEC_FLOAT out_f32 = in_f32 / ((VEC_FLOAT)(float)out_scale) + ((VEC_FLOAT)((float)out_offset));
- const VEC_UCHAR res_u8 = CONVERT_SAT(CONVERT_DOWN(out_f32, VEC_INT), VEC_UCHAR);
- return res_u8;
+ const VEC_QUANT res_q8 = CONVERT_SAT(CONVERT_DOWN(out_f32, VEC_INT), VEC_QUANT);
+ return res_q8;
}
#endif /* defined(OFFSET_IN1) && defined(OFFSET_OUT) && defined(SCALE_IN1) && defined(SCALE_OUT) */
@@ -319,7 +319,7 @@ __kernel void concatenate_width(
source_values = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)src.ptr);
#if defined(OFFSET_IN1) && defined(OFFSET_OUT) && defined(SCALE_IN1) && defined(SCALE_OUT)
- const VEC_UCHAR out = requantize(source_values, OFFSET_IN1, OFFSET_OUT, SCALE_IN1, SCALE_OUT);
+ const VEC_QUANT out = requantize(source_values, OFFSET_IN1, OFFSET_OUT, SCALE_IN1, SCALE_OUT);
VSTORE(VEC_SIZE)
(out, 0, (__global DATA_TYPE *)(dst.ptr) + WIDTH_OFFSET);
#else /* defined(OFFSET_IN1) && defined(OFFSET_OUT) && defined(SCALE_IN1) && defined(SCALE_OUT) */
@@ -372,7 +372,7 @@ __kernel void concatenate_height(
source_values = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)src.ptr);
#if defined(OFFSET_IN1) && defined(OFFSET_OUT) && defined(SCALE_IN1) && defined(SCALE_OUT)
- const VEC_UCHAR out = requantize(source_values, OFFSET_IN1, OFFSET_OUT, SCALE_IN1, SCALE_OUT);
+ const VEC_QUANT out = requantize(source_values, OFFSET_IN1, OFFSET_OUT, SCALE_IN1, SCALE_OUT);
VSTORE(VEC_SIZE)
(out, 0, (__global DATA_TYPE *)(dst.ptr + HEIGHT_OFFSET * dst_stride_y));
#else /* defined(OFFSET_IN1) && defined(OFFSET_OUT) && defined(SCALE_IN1) && defined(SCALE_OUT) */
diff --git a/src/core/CL/cl_kernels/helpers_asymm.h b/src/core/CL/cl_kernels/helpers_asymm.h
index 09409dc5e9..5a7c7126dc 100644
--- a/src/core/CL/cl_kernels/helpers_asymm.h
+++ b/src/core/CL/cl_kernels/helpers_asymm.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -64,6 +64,19 @@ inline float dequantize_qasymm8(uchar input, float offset, float scale)
return ((float)input - offset) * scale;
}
+/** Dequantize a scalar value from signed 8-bit asymmetric to floating-point
+ *
+ * @param[in] input Input value to quantize
+ * @param[in] offset Quantization offset
+ * @param[in] scale Quantization scale
+ *
+ * @return quantized value
+ */
+inline float dequantize_qasymm8_signed(char input, float offset, float scale)
+{
+ return ((float)input - offset) * scale;
+}
+
/** Quantize a vector of values from floating-point
*
* @param[in] type Output data type.
@@ -91,7 +104,7 @@ inline float dequantize_qasymm8(uchar input, float offset, float scale)
#define DEQUANTIZE_IMPL(type, size) \
inline VEC_DATA_TYPE(float, size) dequantize_##type##size(VEC_DATA_TYPE(type, size) input, float offset, float scale) \
{ \
- return (CONVERT(input, VEC_DATA_TYPE(float, 4)) - offset) * scale; \
+ return (CONVERT(input, VEC_DATA_TYPE(float, size)) - offset) * scale; \
}
/** Correctly-rounded-to-nearest division by a power-of-two.
@@ -384,10 +397,14 @@ inline float dequantize_qasymm8(uchar input, float offset, float scale)
#define ASYMM_ROUNDING_HALF_SUM(a, b, size) asymm_rounding_half_sum##size(a, b)
#define ASYMM_RESCALE(value, src_integer_bits, dst_integer_bits, size) asymm_rescale##size(value, src_integer_bits, dst_integer_bits)
+QUANTIZE_IMPL(uchar, 1)
+QUANTIZE_IMPL(char, 1)
QUANTIZE_IMPL(uchar, 4)
QUANTIZE_IMPL(ushort, 4)
QUANTIZE_IMPL(short, 4)
+DEQUANTIZE_IMPL(uchar, 1)
+DEQUANTIZE_IMPL(char, 1)
DEQUANTIZE_IMPL(uchar, 4)
DEQUANTIZE_IMPL(ushort, 4)
DEQUANTIZE_IMPL(short, 4)
diff --git a/src/core/CL/cl_kernels/roi_align_layer_quantized.cl b/src/core/CL/cl_kernels/roi_align_layer_quantized.cl
index 030731b7d3..8093623904 100644
--- a/src/core/CL/cl_kernels/roi_align_layer_quantized.cl
+++ b/src/core/CL/cl_kernels/roi_align_layer_quantized.cl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -86,16 +86,17 @@ inline DATA_TYPE roi_align_1x1(const Tensor3D *input, float region_start_x,
const DATA_TYPE data3 = *(__global DATA_TYPE *)tensor3D_offset(input, x_low, y_high, pz);
const DATA_TYPE data4 = *(__global DATA_TYPE *)tensor3D_offset(input, x_high, y_high, pz);
#endif // defined(NHWC)
- const float data1_f32 = dequantize_qasymm8(data1, OFFSET_IN, SCALE_IN);
- const float data2_f32 = dequantize_qasymm8(data2, OFFSET_IN, SCALE_IN);
- const float data3_f32 = dequantize_qasymm8(data3, OFFSET_IN, SCALE_IN);
- const float data4_f32 = dequantize_qasymm8(data4, OFFSET_IN, SCALE_IN);
+
+ const float data1_f32 = DEQUANTIZE(data1, OFFSET_IN, SCALE_IN, DATA_TYPE, 1);
+ const float data2_f32 = DEQUANTIZE(data2, OFFSET_IN, SCALE_IN, DATA_TYPE, 1);
+ const float data3_f32 = DEQUANTIZE(data3, OFFSET_IN, SCALE_IN, DATA_TYPE, 1);
+ const float data4_f32 = DEQUANTIZE(data4, OFFSET_IN, SCALE_IN, DATA_TYPE, 1);
sum += w1 * data1_f32 + w2 * data2_f32 + w3 * data3_f32 + w4 * data4_f32;
}
}
const float res_f32 = sum / (grid_size_x * grid_size_y);
- return quantize_qasymm8(res_f32, OFFSET_OUT, SCALE_OUT);
+ return QUANTIZE(res_f32, OFFSET_OUT, SCALE_OUT, DATA_TYPE, 1);
}
/** Performs a roi align function.
diff --git a/src/core/CL/cl_kernels/scale_quantized.cl b/src/core/CL/cl_kernels/scale_quantized.cl
index 86dbf608f4..ccbd71e81c 100644
--- a/src/core/CL/cl_kernels/scale_quantized.cl
+++ b/src/core/CL/cl_kernels/scale_quantized.cl
@@ -165,7 +165,7 @@ __kernel void scale_bilinear_quantized_nhwc(
const float fr = ((insf32.s0 * b * b1) + (insf32.s1 * a * b1) + (insf32.s2 * b * a1) + (insf32.s3 * a * a1));
- uchar res = convert_uchar_sat(convert_int_sat_rtp(fr / SCALE) + OFFSET);
+ DATA_TYPE res = CONVERT_SAT(convert_int_sat_rtp(fr / SCALE) + OFFSET, DATA_TYPE);
*((__global DATA_TYPE *)out.ptr) = res;
}
diff --git a/src/core/CL/cl_kernels/warp_helpers_quantized.h b/src/core/CL/cl_kernels/warp_helpers_quantized.h
index 48d6faef73..fc9788f45b 100644
--- a/src/core/CL/cl_kernels/warp_helpers_quantized.h
+++ b/src/core/CL/cl_kernels/warp_helpers_quantized.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -117,7 +117,7 @@ inline const VEC_DATA_TYPE(DATA_TYPE, 4) bilinear_interpolate_with_border_quanti
((inf32.s8 * b.s4 * b.s5) + (inf32.s9 * a.s4 * b.s5) + (inf32.sa * b.s4 * a.s5) + (inf32.sb * a.s4 * a.s5)),
((inf32.sc * b.s6 * b.s7) + (inf32.sd * a.s6 * b.s7) + (inf32.se * b.s6 * a.s7) + (inf32.sf * a.s6 * a.s7)));
- const uchar4 res = convert_uchar4_sat(convert_int4_sat_rtp(fr / scale) + offset_qasymm);
+ const VEC_DATA_TYPE(DATA_TYPE, 4) res = CONVERT_SAT(convert_int4_sat_rtp(fr / scale) + offset_qasymm, VEC_DATA_TYPE(DATA_TYPE, 4));
return res;
}