aboutsummaryrefslogtreecommitdiff
path: root/arm_compute/core
diff options
context:
space:
mode:
authorJonathan Deakin <jonathan.deakin@arm.com>2024-01-24 09:15:38 +0000
committerRadu Salavat <radu.salavat@arm.com>2024-04-15 13:52:31 +0000
commita668f9f8a4eab405df0fe8dd58e7d9425bcf9640 (patch)
treedb16e6af9289897557a58755b88d2c337dcb8650 /arm_compute/core
parent34bdffb288d6367cb6dca652ebed60c450854039 (diff)
downloadComputeLibrary-a668f9f8a4eab405df0fe8dd58e7d9425bcf9640.tar.gz
Add s8f32 kernels and dynamic QuantizationInfo
- Add support for QASYMM_SIGNED*QASYMM8_SIGNED->F32 in CpuGemmLowpMatrixMultiplyCore - Add s8f32 kernel using existing s8->s32 kernels with a new DequantizeFloat OutputStage, the structure is similar to Requantize32 but the opposite way around. - Add SME s8f32 kernels with integrated support for DequantizeFloat. - Add scale to CpuGemmLowpOffsetContributionKernel. - Add virtual dequantize scale to gemm_common, only implemented for gemm_interleaved. - Update year to 2024 in generate_build_files. - Add dynamic flag to QuantizationInfo which signals to operators that it can change after configuration - Add support for dynamic quantization in NEGEMMLowpMatrixMultiplyCore - Add dynamic quantization fixture by extending GEMMLowpGenericMatrixMultiplyCoreValidationFixture - Add GEMMLowpDequantizedMatrixMultiplyValidationFixture - Store k (number of cols of A) rather than k_offset in the offset contribution kernels so that we can recompute it when the other offsets change relates to: ONCPUML-1444 MLINFSW-439 Co-authored-by: Milos Puzovic <Milos.Puzovic@arm.com> Co-authored-by: David Mansell <David.Mansell@arm.com> Change-Id: I58a3acf2c09289a303e52eea6b336a696a5bc8da Signed-off-by: Jonathan Deakin <jonathan.deakin@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/11022 Reviewed-by: Gunes Bayir <gunes.bayir@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Benchmark: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'arm_compute/core')
-rw-r--r--arm_compute/core/QuantizationInfo.h72
1 files changed, 62 insertions, 10 deletions
diff --git a/arm_compute/core/QuantizationInfo.h b/arm_compute/core/QuantizationInfo.h
index 471b8c57ab..aecba3712e 100644
--- a/arm_compute/core/QuantizationInfo.h
+++ b/arm_compute/core/QuantizationInfo.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2023 Arm Limited.
+ * Copyright (c) 2019-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,8 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ARM_COMPUTE_QUANTIZATION_INFO_H
-#define ARM_COMPUTE_QUANTIZATION_INFO_H
+#ifndef ACL_ARM_COMPUTE_CORE_QUANTIZATIONINFO_H
+#define ACL_ARM_COMPUTE_CORE_QUANTIZATIONINFO_H
#include "arm_compute/core/Rounding.h"
#include "arm_compute/core/utils/misc/Utility.h"
@@ -84,10 +84,12 @@ public:
*
* @note Used for asymmetric quantization
*
- * @param[in] scale Scale.
- * @param[in] offset Offset.
+ * @param[in] scale Scale.
+ * @param[in] offset Offset.
+ * @param[in] is_dynamic Whether this QuantizationInfo is dynamic, i.e. the scale and offset may change.
*/
- QuantizationInfo(float scale, int offset) : _scale(1, scale), _offset(1, offset)
+ QuantizationInfo(float scale, int offset, bool is_dynamic = false)
+ : _scale(1, scale), _offset(1, offset), _is_dynamic(is_dynamic)
{
}
/** Construct quantization info.
@@ -103,10 +105,12 @@ public:
*
* @note Used for asymmetric per channel quantization
*
- * @param[in] scale Scale.
- * @param[in] offset Offset.
+ * @param[in] scale Scale.
+ * @param[in] offset Offset.
+ * @param[in] is_dynamic Whether this QuantizationInfo is dynamic, i.e. the scale and offset may change.
*/
- QuantizationInfo(std::vector<float> scale, std::vector<int32_t> offset) : _scale(scale), _offset(offset)
+ QuantizationInfo(std::vector<float> scale, std::vector<int32_t> offset, bool is_dynamic = false)
+ : _scale(scale), _offset(offset), _is_dynamic(is_dynamic)
{
}
/** Scale vector accessor
@@ -125,6 +129,14 @@ public:
{
return _offset;
}
+ /** is_dynamic accessor
+ *
+ * @return If true, the scale and offset may change, so operators will need to read on every run
+ */
+ bool is_dynamic() const
+ {
+ return _is_dynamic;
+ }
/** Indicates whether this QuantizationInfo has valid settings or not
*
* @return True if the this has invalid settings.
@@ -149,6 +161,8 @@ public:
private:
std::vector<float> _scale; /**< Vector containing scaling factors */
std::vector<int32_t> _offset; /**< Vector containing zero offsets */
+ bool _is_dynamic =
+ false; /**< If true, the scale and offset may change, so operators will need to read on every run */
};
/** Check whether two quantization info are equal.
@@ -430,6 +444,19 @@ inline float dequantize(uint16_t value, float scale, int32_t offset)
return (static_cast<int>(value) - offset) * scale;
}
+/** Dequantize a value given a 32-bit asymmetric quantization scheme
+ *
+ * @param[in] value Value to dequantize
+ * @param[in] scale Scale to use for dequantization
+ * @param[in] offset Zero-offset to use for dequantization
+ *
+ * @return Dequantized value
+ */
+inline float dequantize(int32_t value, float scale, int32_t offset)
+{
+ return (static_cast<int>(value) - offset) * scale;
+}
+
/** Quantize a value given a 16-bit symmetric quantization scheme
*
* @param[in] value Value to quantize
@@ -536,6 +563,31 @@ inline float dequantize_qasymm16(uint16_t value, const QuantizationInfo &qinfo)
return dequantize_qasymm16(value, qinfo.uniform());
}
+/** Dequantize a value given a 32-bit asymmetric quantization scheme
+ *
+ * @param[in] value Value to dequantize
+ * @param[in] qinfo Quantization information to use for dequantizing
+ *
+ * @return Dequantized value
+ */
+inline float dequantize_s32(int32_t value, const UniformQuantizationInfo &qinfo)
+{
+ return (static_cast<int>(value) - qinfo.offset) * qinfo.scale;
+}
+
+/** Dequantize a value given a 32-bit asymmetric quantization scheme
+ *
+ * @param[in] value Value to dequantize
+ * @param[in] qinfo Quantization information to use for dequantizing
+ *
+ * @return Dequantized value
+ */
+
+inline float dequantize_s32(int32_t value, const QuantizationInfo &qinfo)
+{
+ return dequantize_s32(value, qinfo.uniform());
+}
+
/*
* In case of requantization of a quantized input tensor to an output tensor with another quantization
* instead of applying dequantization and then a quantization functions, we just compute new scale and
@@ -581,4 +633,4 @@ inline UniformQuantizationInfo compute_requantization_scale_offset(const Uniform
}
} // namespace arm_compute
-#endif /* ARM_COMPUTE_QUANTIZATION_INFO_H */
+#endif // ACL_ARM_COMPUTE_CORE_QUANTIZATIONINFO_H