aboutsummaryrefslogtreecommitdiff
path: root/arm_compute/core/NEON
diff options
context:
space:
mode:
Diffstat (limited to 'arm_compute/core/NEON')
-rw-r--r--arm_compute/core/NEON/NEAsymm.h17
-rw-r--r--arm_compute/core/NEON/kernels/NEDequantizationLayerKernel.h4
2 files changed, 10 insertions, 11 deletions
diff --git a/arm_compute/core/NEON/NEAsymm.h b/arm_compute/core/NEON/NEAsymm.h
index a3bd7e28f0..c75a58046b 100644
--- a/arm_compute/core/NEON/NEAsymm.h
+++ b/arm_compute/core/NEON/NEAsymm.h
@@ -325,23 +325,22 @@ inline float32x4x4_t vdequantize(const uint8x16_t &qv, float scale, int32_t offs
return vdequantized_input;
}
-/** Dequantize following an asymmetric quantization scheme a neon vector holding 16 quantized values.
+/** Dequantize following symmetric quantization scheme a neon vector holding 16 quantized values.
*
- * @param[in] qv Input values to be dequantized.
- * @param[in] vscale Vector containing quantization scaling factors.
- * @param[in] voffset Vector containing quantization offset.
+ * @param[in] qv Input values to be dequantized.
+ * @param[in] vscale Vector containing quantization scaling factors.
*
* @return Dequantized values in a neon vector
*/
-inline float32x4x4_t vdequantize(const uint8x16_t &qv, const float32x4x4_t vscale, const int32x4x4_t voffset)
+inline float32x4x4_t vdequantize(const int8x16_t &qv, const float32x4x4_t vscale)
{
const float32x4x4_t vdequantized_input =
{
{
- vmulq_f32(vcvtq_f32_s32(vsubq_s32(vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(vmovl_u8(vget_low_u8(qv))))), voffset.val[0])), vscale.val[0]),
- vmulq_f32(vcvtq_f32_s32(vsubq_s32(vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(vmovl_u8(vget_low_u8(qv))))), voffset.val[1])), vscale.val[1]),
- vmulq_f32(vcvtq_f32_s32(vsubq_s32(vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(vmovl_u8(vget_high_u8(qv))))), voffset.val[2])), vscale.val[2]),
- vmulq_f32(vcvtq_f32_s32(vsubq_s32(vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(vmovl_u8(vget_high_u8(qv))))), voffset.val[3])), vscale.val[3]),
+ vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_low_s16(vmovl_s8(vget_low_s8(qv))))), vscale.val[0]),
+ vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_high_s16(vmovl_s8(vget_low_s8(qv))))), vscale.val[1]),
+ vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_low_s16(vmovl_s8(vget_high_s8(qv))))), vscale.val[2]),
+ vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_high_s16(vmovl_s8(vget_high_s8(qv))))), vscale.val[3]),
}
};
return vdequantized_input;
diff --git a/arm_compute/core/NEON/kernels/NEDequantizationLayerKernel.h b/arm_compute/core/NEON/kernels/NEDequantizationLayerKernel.h
index 3e7feda650..7e65384677 100644
--- a/arm_compute/core/NEON/kernels/NEDequantizationLayerKernel.h
+++ b/arm_compute/core/NEON/kernels/NEDequantizationLayerKernel.h
@@ -52,13 +52,13 @@ public:
~NEDequantizationLayerKernel() = default;
/** Set input, output tensors.
*
- * @param[in] input Source tensor. Data type supported: QASYMM8/QASYMM8_PER_CHANNEL/QSYMM8/QSYMM16.
+ * @param[in] input Source tensor. Data type supported: QASYMM8/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16.
* @param[out] output Destination tensor with the same dimensions of input. Data type supported: F16/F32.
*/
void configure(const ITensor *input, ITensor *output);
/** Static function to check if given info will lead to a valid configuration of @ref NEDequantizationLayerKernel
*
- * @param[in] input Input tensor info. Data types supported: QASYMM8/QASYMM8_PER_CHANNEL/QSYMM8/QSYMM16.
+ * @param[in] input Input tensor info. Data types supported: QASYMM8/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16.
* @param[in] output Output tensor info. Data types supported: F16/F32.
*
* @return a status