aboutsummaryrefslogtreecommitdiff
path: root/arm_compute
diff options
context:
space:
mode:
authorMichele Di Giorgio <michele.digiorgio@arm.com>2019-10-01 12:25:49 +0100
committerMichele Di Giorgio <michele.digiorgio@arm.com>2019-10-01 17:26:16 +0000
commitd64a46c6dfa81ce4607fc3de57bc9d9ac7e01e4a (patch)
treee4b2a1e670a6002cd70e920ad7043c090b5d25f1 /arm_compute
parent79f88e6d825402388bb79fc123ee2dfe01985bda (diff)
downloadComputeLibrary-d64a46c6dfa81ce4607fc3de57bc9d9ac7e01e4a.tar.gz
COMPMID-2699: Add support for QASYMM16 in NEQuantizationLayer
Change-Id: Icb968e37551a9048040e9aaff5329e874c53a2ee Signed-off-by: Michele Di Giorgio <michele.digiorgio@arm.com> Reviewed-on: https://review.mlplatform.org/c/2016 Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Diffstat (limited to 'arm_compute')
-rw-r--r--arm_compute/core/NEON/NEAsymm.h34
-rw-r--r--arm_compute/core/NEON/kernels/NEQuantizationLayerKernel.h25
-rw-r--r--arm_compute/runtime/NEON/functions/NEQuantizationLayer.h4
3 files changed, 58 insertions, 5 deletions
diff --git a/arm_compute/core/NEON/NEAsymm.h b/arm_compute/core/NEON/NEAsymm.h
index f2d20d373a..56d4c09f92 100644
--- a/arm_compute/core/NEON/NEAsymm.h
+++ b/arm_compute/core/NEON/NEAsymm.h
@@ -331,6 +331,40 @@ inline uint8x16_t vquantize(const float32x4x4_t &qv, const UniformQuantizationIn
const uint8x8_t pb = vqmovun_s16(vcombine_s16(vqmovn_s32(rf.val[2]), vqmovn_s32(rf.val[3])));
return vcombine_u8(pa, pb);
}
+
+/** Quantize to QASYMM16 a neon vector holding 16 floating point values.
+ *
+ * @param[in] qv Input values to be quantized.
+ * @param[in] qi Quantization information to be used in the computation.
+ *
+ * @return A neon vector holding the quantized values
+ */
+inline uint16x8x2_t vquantize_qasymm16(const float32x4x4_t &qv, const UniformQuantizationInfo &qi)
+{
+ const float scale = qi.scale;
+ const int offset = qi.offset;
+ const float32x4_t voffset = vdupq_n_f32(offset);
+ const float32x4_t vinvscale = vdupq_n_f32(1.f / scale);
+ const int32x4x4_t rf =
+ {
+ {
+#ifdef __aarch64__
+ vcvtnq_s32_f32(vmlaq_f32(voffset, qv.val[0], vinvscale)),
+ vcvtnq_s32_f32(vmlaq_f32(voffset, qv.val[1], vinvscale)),
+ vcvtnq_s32_f32(vmlaq_f32(voffset, qv.val[2], vinvscale)),
+ vcvtnq_s32_f32(vmlaq_f32(voffset, qv.val[3], vinvscale)),
+#else //__aarch64__
+ vcvtq_s32_f32(vmlaq_f32(voffset, qv.val[0], vinvscale)),
+ vcvtq_s32_f32(vmlaq_f32(voffset, qv.val[1], vinvscale)),
+ vcvtq_s32_f32(vmlaq_f32(voffset, qv.val[2], vinvscale)),
+ vcvtq_s32_f32(vmlaq_f32(voffset, qv.val[3], vinvscale)),
+#endif //__aarch64__
+ }
+ };
+ const uint16x8_t pa = vcombine_u16(vqmovun_s32(rf.val[0]), vqmovun_s32(rf.val[1]));
+ const uint16x8_t pb = vcombine_u16(vqmovun_s32(rf.val[2]), vqmovun_s32(rf.val[3]));
+ return { pa, pb };
+}
} // namespace arm_compute
#include "arm_compute/core/NEON/NEAsymm.inl"
#endif // __ARM_COMPUTE_NEASYMM_H__
diff --git a/arm_compute/core/NEON/kernels/NEQuantizationLayerKernel.h b/arm_compute/core/NEON/kernels/NEQuantizationLayerKernel.h
index 391a72c6db..e1aaad5094 100644
--- a/arm_compute/core/NEON/kernels/NEQuantizationLayerKernel.h
+++ b/arm_compute/core/NEON/kernels/NEQuantizationLayerKernel.h
@@ -57,13 +57,15 @@ public:
/** Set the input, output.
*
* @param[in] input Source tensor. The dimensions over the third will be interpreted as batches. Data types supported: F32/F16.
- * @param[out] output Destination tensor with the same dimensions of input. Data types supported: QASYMM8.
+ * @param[out] output Destination tensor with the same dimensions of input. Data types supported: QASYMM8/QASYMM16.
+ *
+ * @note Output auto initialization is not supported by this kernel
*/
void configure(const ITensor *input, ITensor *output);
/** Static function to check if given info will lead to a valid configuration of @ref NEQuantizationLayerKernel
*
* @param[in] input Input tensor info. Data types supported: F32/F16.
- * @param[in] output Output tensor info. Data types supported: QASYMM8.
+ * @param[in] output Output tensor info. Data types supported: QASYMM8/QASYMM16.
*
* @return a status
*/
@@ -73,11 +75,28 @@ public:
void run(const Window &window, const ThreadInfo &info) override;
private:
+ /** Common signature for all the specialised @ref NEQuantizationLayerKernel functions
+ *
+ * @param[in] window Region on which to execute the kernel.
+ */
+ using QuantizationFunctionExecutorPtr = void (NEQuantizationLayerKernel::*)(const Window &window);
+ /** Function to apply QASYMM8 quantization on a tensor.
+ *
+ * @param[in] window Region on which to execute the kernel.
+ */
template <typename T>
- void quantize(const Window &window, const QuantizationInfo &qinfo);
+ void run_quantize_qasymm8(const Window &window);
+ /** Function to apply QASYMM16 quantization on a tensor.
+ *
+ * @param[in] window Region on which to execute the kernel.
+ */
+ template <typename T>
+ void run_quantize_qasymm16(const Window &window);
const ITensor *_input;
ITensor *_output;
+
+ QuantizationFunctionExecutorPtr _func;
};
} // namespace arm_compute
#endif /*__ARM_COMPUTE_NEQUANTIZATIONLAYERKERNEL_H__ */
diff --git a/arm_compute/runtime/NEON/functions/NEQuantizationLayer.h b/arm_compute/runtime/NEON/functions/NEQuantizationLayer.h
index 46a62bd903..25609324a0 100644
--- a/arm_compute/runtime/NEON/functions/NEQuantizationLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEQuantizationLayer.h
@@ -49,13 +49,13 @@ public:
/** Set the input and output tensors.
*
* @param[in] input Source tensor. The dimensions over the third will be interpreted as batches. Data types supported: F32/F16.
- * @param[out] output Destination tensor with the same dimensions of input. Data types supported: QASYMM8/QSYMM16
+ * @param[out] output Destination tensor with the same dimensions of input. Data types supported: QASYMM8/QASYMM16
*/
void configure(const ITensor *input, ITensor *output);
/** Static function to check if given info will lead to a valid configuration of @ref NEQuantizationLayer
*
* @param[in] input Input tensor info. The dimensions over the third will be interpreted as batches. Data types supported: F32/F16.
- * @param[in] output Output tensor info. Data types supported: QASYMM8/QSYMM16
+ * @param[in] output Output tensor info. Data types supported: QASYMM8/QASYMM16
*
* @return a status
*/