aboutsummaryrefslogtreecommitdiff
path: root/arm_compute
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2019-04-15 15:44:17 +0100
committerGeorgios Pinitas <georgios.pinitas@arm.com>2019-04-15 15:57:47 +0000
commitd66094e37ecd747e85f30130e1a678bdbaf30788 (patch)
tree3587faa39d46fb344db03beee54f663185407678 /arm_compute
parent17dae8765ba738c0d68fd3ed6af9eae8ae40798b (diff)
downloadComputeLibrary-d66094e37ecd747e85f30130e1a678bdbaf30788.tar.gz
COMPMID-1995: Fix NEPoolingLayer for quantized 3x3
Quantized 3x3 pooling layer on NEON did not support different quantization information for the input and output. Change-Id: I38f8da6ec91c91ba37a21d9d0e1a14fd5bb99f86 Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com> Reviewed-on: https://review.mlplatform.org/c/992 Reviewed-by: Isabella Gottardi <isabella.gottardi@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'arm_compute')
-rw-r--r--arm_compute/core/NEON/NEAsymm.h60
1 files changed, 55 insertions, 5 deletions
diff --git a/arm_compute/core/NEON/NEAsymm.h b/arm_compute/core/NEON/NEAsymm.h
index f71626182c..253d0fdff7 100644
--- a/arm_compute/core/NEON/NEAsymm.h
+++ b/arm_compute/core/NEON/NEAsymm.h
@@ -175,10 +175,33 @@ inline uint8_t finalize_quantization(int32_t in_value, int result_fixedpoint_mul
return out_u8;
}
+/** Dequantize a neon vector holding 8 quantized values.
+ *
+ * @param[in] qv Input values to be dequantized.
+ * @param[in] qi Quantization information to be used in the computation.
+ *
+ * @return Dequantized values in a neon vector
+ */
+inline float32x4x2_t vdequantize(const uint8x8_t &qv, const QuantizationInfo &qi)
+{
+ const float scale = qi.scale;
+ const int offset = qi.offset;
+ const int32x4_t voffset = vdupq_n_s32(offset);
+ const float32x4_t vscale = vdupq_n_f32(scale);
+ const float32x4x2_t vdequantized_input =
+ {
+ {
+ vmulq_f32(vcvtq_f32_s32(vsubq_s32(vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(vmovl_u8(qv)))), voffset)), vscale),
+ vmulq_f32(vcvtq_f32_s32(vsubq_s32(vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(vmovl_u8(qv)))), voffset)), vscale),
+ }
+ };
+ return vdequantized_input;
+}
+
/** Dequantize a neon vector holding 16 quantized values.
*
- * @param qv Input values to be dequantized.
- * @param qi Quantization information to be used in the computation.
+ * @param[in] qv Input values to be dequantized.
+ * @param[in] qi Quantization information to be used in the computation.
*
* @return Dequantized values in a neon vector
*/
@@ -200,10 +223,38 @@ inline float32x4x4_t vdequantize(const uint8x16_t &qv, const QuantizationInfo &q
return vdequantized_input;
}
+/** Quantize a neon vector holding 8 floating point values.
+ *
+ * @param[in] qv Input values to be quantized.
+ * @param[in] qi Quantization information to be used in the computation.
+ *
+ * @return A neon vector holding the quantized values
+ */
+inline uint8x8_t vquantize(const float32x4x2_t &qv, const QuantizationInfo &qi)
+{
+ const float scale = qi.scale;
+ const int offset = qi.offset;
+ const float32x4_t voffset = vdupq_n_f32(offset);
+ const float32x4_t vinvscale = vdupq_n_f32(1.f / scale);
+ const int32x4x4_t rf =
+ {
+ {
+#ifdef __aarch64__
+ vcvtnq_s32_f32(vmlaq_f32(voffset, qv.val[0], vinvscale)),
+ vcvtnq_s32_f32(vmlaq_f32(voffset, qv.val[1], vinvscale)),
+#else //__aarch64__
+ vcvtq_s32_f32(vmlaq_f32(voffset, qv.val[0], vinvscale)),
+ vcvtq_s32_f32(vmlaq_f32(voffset, qv.val[1], vinvscale)),
+#endif //__aarch64__
+ }
+ };
+ return vqmovun_s16(vcombine_s16(vqmovn_s32(rf.val[0]), vqmovn_s32(rf.val[1])));
+}
+
/** Quantize a neon vector holding 16 floating point values.
*
- * @param qv Input values to be quantized.
- * @param qi Quantization information to be used in the computation.
+ * @param[in] qv Input values to be quantized.
+ * @param[in] qi Quantization information to be used in the computation.
*
* @return A neon vector holding the quantized values
*/
@@ -233,7 +284,6 @@ inline uint8x16_t vquantize(const float32x4x4_t &qv, const QuantizationInfo &qi)
const uint8x8_t pb = vqmovun_s16(vcombine_s16(vqmovn_s32(rf.val[2]), vqmovn_s32(rf.val[3])));
return vcombine_u8(pa, pb);
}
-
} // namespace arm_compute
#include "arm_compute/core/NEON/NEAsymm.inl"
#endif // __ARM_COMPUTE_NEASYMM_H__