From a668f9f8a4eab405df0fe8dd58e7d9425bcf9640 Mon Sep 17 00:00:00 2001 From: Jonathan Deakin Date: Wed, 24 Jan 2024 09:15:38 +0000 Subject: Add s8f32 kernels and dynamic QuantizationInfo - Add support for QASYMM_SIGNED*QASYMM8_SIGNED->F32 in CpuGemmLowpMatrixMultiplyCore - Add s8f32 kernel using existing s8->s32 kernels with a new DequantizeFloat OutputStage, the structure is similar to Requantize32 but the opposite way around. - Add SME s8f32 kernels with integrated support for DequantizeFloat. - Add scale to CpuGemmLowpOffsetContributionKernel. - Add virtual dequantize scale to gemm_common, only implemented for gemm_interleaved. - Update year to 2024 in generate_build_files. - Add dynamic flag to QuantizationInfo which signals to operators that it can change after configuration - Add support for dynamic quantization in NEGEMMLowpMatrixMultiplyCore - Add dynamic quantization fixture by extending GEMMLowpGenericMatrixMultiplyCoreValidationFixture - Add GEMMLowpDequantizedMatrixMultiplyValidationFixture - Store k (number of cols of A) rather than k_offset in the offset contribution kernels so that we can recompute it when the other offsets change relates to: ONCPUML-1444 MLINFSW-439 Co-authored-by: Milos Puzovic Co-authored-by: David Mansell Change-Id: I58a3acf2c09289a303e52eea6b336a696a5bc8da Signed-off-by: Jonathan Deakin Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/11022 Reviewed-by: Gunes Bayir Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins Benchmark: Arm Jenkins --- arm_compute/core/QuantizationInfo.h | 72 +++++++++++++++++++--- .../NEON/functions/NEGEMMLowpMatrixMultiplyCore.h | 15 ++--- 2 files changed, 70 insertions(+), 17 deletions(-) (limited to 'arm_compute') diff --git a/arm_compute/core/QuantizationInfo.h b/arm_compute/core/QuantizationInfo.h index 471b8c57ab..aecba3712e 100644 --- a/arm_compute/core/QuantizationInfo.h +++ b/arm_compute/core/QuantizationInfo.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023 Arm Limited. + * Copyright (c) 2019-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_QUANTIZATION_INFO_H -#define ARM_COMPUTE_QUANTIZATION_INFO_H +#ifndef ACL_ARM_COMPUTE_CORE_QUANTIZATIONINFO_H +#define ACL_ARM_COMPUTE_CORE_QUANTIZATIONINFO_H #include "arm_compute/core/Rounding.h" #include "arm_compute/core/utils/misc/Utility.h" @@ -84,10 +84,12 @@ public: * * @note Used for asymmetric quantization * - * @param[in] scale Scale. - * @param[in] offset Offset. + * @param[in] scale Scale. + * @param[in] offset Offset. + * @param[in] is_dynamic Whether this QuantizationInfo is dynamic, i.e. the scale and offset may change. */ - QuantizationInfo(float scale, int offset) : _scale(1, scale), _offset(1, offset) + QuantizationInfo(float scale, int offset, bool is_dynamic = false) + : _scale(1, scale), _offset(1, offset), _is_dynamic(is_dynamic) { } /** Construct quantization info. @@ -103,10 +105,12 @@ public: * * @note Used for asymmetric per channel quantization * - * @param[in] scale Scale. - * @param[in] offset Offset. + * @param[in] scale Scale. + * @param[in] offset Offset. + * @param[in] is_dynamic Whether this QuantizationInfo is dynamic, i.e. the scale and offset may change. */ - QuantizationInfo(std::vector scale, std::vector offset) : _scale(scale), _offset(offset) + QuantizationInfo(std::vector scale, std::vector offset, bool is_dynamic = false) + : _scale(scale), _offset(offset), _is_dynamic(is_dynamic) { } /** Scale vector accessor @@ -125,6 +129,14 @@ public: { return _offset; } + /** is_dynamic accessor + * + * @return If true, the scale and offset may change, so operators will need to read on every run + */ + bool is_dynamic() const + { + return _is_dynamic; + } /** Indicates whether this QuantizationInfo has valid settings or not * * @return True if the this has invalid settings. @@ -149,6 +161,8 @@ public: private: std::vector _scale; /**< Vector containing scaling factors */ std::vector _offset; /**< Vector containing zero offsets */ + bool _is_dynamic = + false; /**< If true, the scale and offset may change, so operators will need to read on every run */ }; /** Check whether two quantization info are equal. @@ -430,6 +444,19 @@ inline float dequantize(uint16_t value, float scale, int32_t offset) return (static_cast(value) - offset) * scale; } +/** Dequantize a value given a 32-bit asymmetric quantization scheme + * + * @param[in] value Value to dequantize + * @param[in] scale Scale to use for dequantization + * @param[in] offset Zero-offset to use for dequantization + * + * @return Dequantized value + */ +inline float dequantize(int32_t value, float scale, int32_t offset) +{ + return (static_cast(value) - offset) * scale; +} + /** Quantize a value given a 16-bit symmetric quantization scheme * * @param[in] value Value to quantize @@ -536,6 +563,31 @@ inline float dequantize_qasymm16(uint16_t value, const QuantizationInfo &qinfo) return dequantize_qasymm16(value, qinfo.uniform()); } +/** Dequantize a value given a 32-bit asymmetric quantization scheme + * + * @param[in] value Value to dequantize + * @param[in] qinfo Quantization information to use for dequantizing + * + * @return Dequantized value + */ +inline float dequantize_s32(int32_t value, const UniformQuantizationInfo &qinfo) +{ + return (static_cast(value) - qinfo.offset) * qinfo.scale; +} + +/** Dequantize a value given a 32-bit asymmetric quantization scheme + * + * @param[in] value Value to dequantize + * @param[in] qinfo Quantization information to use for dequantizing + * + * @return Dequantized value + */ + +inline float dequantize_s32(int32_t value, const QuantizationInfo &qinfo) +{ + return dequantize_s32(value, qinfo.uniform()); +} + /* * In case of requantization of a quantized input tensor to an output tensor with another quantization * instead of applying dequantization and then a quantization functions, we just compute new scale and @@ -581,4 +633,4 @@ inline UniformQuantizationInfo compute_requantization_scale_offset(const Uniform } } // namespace arm_compute -#endif /* ARM_COMPUTE_QUANTIZATION_INFO_H */ +#endif // ACL_ARM_COMPUTE_CORE_QUANTIZATIONINFO_H diff --git a/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h b/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h index 824c4443ad..6d07675d3d 100644 --- a/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h +++ b/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021, 2023 Arm Limited. + * Copyright (c) 2017-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYCORE_H -#define ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYCORE_H +#ifndef ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEGEMMLOWPMATRIXMULTIPLYCORE_H +#define ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEGEMMLOWPMATRIXMULTIPLYCORE_H #include "arm_compute/core/Types.h" #include "arm_compute/function_info/GEMMInfo.h" @@ -80,6 +80,7 @@ public: * |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |S32 | * |QASYMM8_SIGNED |QSYMM8_PER_CHANNEL |S32 |S32 | * |QASYMM8_SIGNED |QSYMM8 |S32 |S32 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED |F32 |F32 | * * @note GEMM_LOWP: low precision GEMM kernel * This kernel performs the following computations: @@ -88,12 +89,12 @@ public: * -# Convert b values from QASYMM8 to int32 add b_offset to each of them. * -# Compute the matrix product of the resulting a * b in int32. * - * @note The @p output type is S32 if @p gemm_info.type == GEMMLowpOutputStageType::NONE. It is QASYMM8/QASYMM8_SIGNED otherwise + * @note The @p output type is S32 if @p gemm_info.type == GEMMLowpOutputStageType::NONE. It is QASYMM8/QASYMM8_SIGNED/F32 otherwise * * @param[in] a First input tensor (Matrix A). Data type supported: QASYMM8/QASYMM8_SIGNED. * @param[in] b Second input tensor (Matrix B). Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL. - * @param[in] c Third input tensor (Matrix C). It can be a nullptr. Data type supported: S32 - * @param[out] output Output tensor. Data type supported: Data type supported: S32/QASYMM8/QASYMM8_SIGNED + * @param[in] c Third input tensor (Matrix C). It can be a nullptr. Data type supported: S32/F32 + * @param[out] output Output tensor. Data type supported: Data type supported: S32/QASYMM8/QASYMM8_SIGNED/F32 * @param[in] gemm_info (Optional) Specifies if the matrix A and/or matrix B have been reshaped and * if the reshape of matrix B should be executed only for the first run */ @@ -120,4 +121,4 @@ private: std::unique_ptr _impl; }; } // namespace arm_compute -#endif /*ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYCORE_H */ +#endif // ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEGEMMLOWPMATRIXMULTIPLYCORE_H -- cgit v1.2.1