aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorViet-Hoa Do <viet-hoa.do@arm.com>2023-04-11 17:16:27 +0100
committerViet-Hoa Do <viet-hoa.do@arm.com>2023-04-19 08:40:45 +0000
commit9c7c2d2d23693877867bb3284c577b33cfbff471 (patch)
treef470a88b23498c1b5d13c5f9578caaf9d0599b74 /src
parent9d0c4deb760efc2ca07e5e0b8218995201ad8a1f (diff)
downloadComputeLibrary-9c7c2d2d23693877867bb3284c577b33cfbff471.tar.gz
Add quantized support for CPU MatMul
Resolves: COMPMID-5899 Signed-off-by: Viet-Hoa Do <viet-hoa.do@arm.com> Change-Id: I89d96e292c3492ba9b1900a3e5683f9dcd11dfc6 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/9440 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Gunes Bayir <gunes.bayir@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Benchmark: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src')
-rw-r--r--src/core/utils/quantization/AsymmHelpers.cpp34
-rw-r--r--src/cpu/operators/CpuFullyConnected.cpp34
-rw-r--r--src/cpu/operators/CpuMatMul.cpp52
3 files changed, 84 insertions, 36 deletions
diff --git a/src/core/utils/quantization/AsymmHelpers.cpp b/src/core/utils/quantization/AsymmHelpers.cpp
index eb008639b1..ba9a97aef9 100644
--- a/src/core/utils/quantization/AsymmHelpers.cpp
+++ b/src/core/utils/quantization/AsymmHelpers.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2022 Arm Limited.
+ * Copyright (c) 2017-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -176,6 +176,38 @@ std::pair<int, int> get_min_max_values_from_quantized_data_type(DataType data_ty
}
return std::make_pair(min_quant_val, max_quant_val);
}
+
+std::tuple<PixelValue, PixelValue> get_quantized_asymmetric_output_min_max(const QuantizationInfo &q_info, const ActivationLayerInfo &act_info, DataType data_type)
+{
+ PixelValue type_min{};
+ PixelValue type_max{};
+ std::tie(type_min, type_max) = get_min_max(data_type);
+ const UniformQuantizationInfo q_unif = q_info.uniform();
+
+ if(act_info.enabled())
+ {
+ switch(act_info.activation())
+ {
+ case ActivationLayerInfo::ActivationFunction::RELU:
+ type_min = PixelValue(q_unif.offset);
+ break;
+ case ActivationLayerInfo::ActivationFunction::BOUNDED_RELU:
+ type_min = PixelValue(q_unif.offset);
+ type_max = PixelValue(act_info.a(), data_type, q_info);
+ break;
+ case ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU:
+ type_min = PixelValue(act_info.b(), data_type, q_info);
+ type_max = PixelValue(act_info.a(), data_type, q_info);
+ break;
+ default:
+ ARM_COMPUTE_ERROR("Activation function not supported.");
+ break;
+ }
+ }
+
+ return std::make_pair(type_min, type_max);
+}
+
void compute_quantized_multipliers_and_shifts(const ITensorInfo *input,
const ITensorInfo *weights,
const ITensorInfo *output,
diff --git a/src/cpu/operators/CpuFullyConnected.cpp b/src/cpu/operators/CpuFullyConnected.cpp
index 70584a64f8..460257902d 100644
--- a/src/cpu/operators/CpuFullyConnected.cpp
+++ b/src/cpu/operators/CpuFullyConnected.cpp
@@ -48,38 +48,6 @@ using namespace arm_compute::misc::shape_calculator;
namespace
{
-// Get min, max bound of a quantized asymmetric dst tensor, with the effect of fused activation
-std::pair<PixelValue, PixelValue> get_quantized_asymmetric_output_min_max(const QuantizationInfo &q_info, const ActivationLayerInfo &act_info, DataType data_type)
-{
- PixelValue type_min{};
- PixelValue type_max{};
- std::tie(type_min, type_max) = get_min_max(data_type);
- const UniformQuantizationInfo q_unif = q_info.uniform();
-
- if(act_info.enabled())
- {
- switch(act_info.activation())
- {
- case ActivationLayerInfo::ActivationFunction::RELU:
- type_min = PixelValue(q_unif.offset);
- break;
- case ActivationLayerInfo::ActivationFunction::BOUNDED_RELU:
- type_min = PixelValue(q_unif.offset);
- type_max = PixelValue(act_info.a(), data_type, q_info);
- break;
- case ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU:
- type_min = PixelValue(act_info.b(), data_type, q_info);
- type_max = PixelValue(act_info.a(), data_type, q_info);
- break;
- default:
- ARM_COMPUTE_ERROR("Activation function not supported.");
- break;
- }
- }
-
- return std::make_pair(type_min, type_max);
-}
-
Status get_gemmlowp_output_stage_info(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *dst, const ActivationLayerInfo &act,
GEMMLowpOutputStageInfo &gemmlowp_output_stage_info)
{
@@ -97,7 +65,7 @@ Status get_gemmlowp_output_stage_info(const ITensorInfo *src, const ITensorInfo
PixelValue type_min{};
PixelValue type_max{};
- std::tie(type_min, type_max) = get_quantized_asymmetric_output_min_max(oq_info, act, data_type);
+ std::tie(type_min, type_max) = quantization::get_quantized_asymmetric_output_min_max(oq_info, act, data_type);
gemmlowp_output_stage_info.gemmlowp_multiplier = output_multiplier;
gemmlowp_output_stage_info.gemmlowp_shift = output_shift;
diff --git a/src/cpu/operators/CpuMatMul.cpp b/src/cpu/operators/CpuMatMul.cpp
index b5359e51af..369466b669 100644
--- a/src/cpu/operators/CpuMatMul.cpp
+++ b/src/cpu/operators/CpuMatMul.cpp
@@ -23,7 +23,9 @@
*/
#include "src/cpu/operators/CpuMatMul.h"
+#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
+#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
#include "arm_compute/core/experimental/Types.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
@@ -40,6 +42,40 @@ namespace arm_compute
{
namespace cpu
{
+namespace
+{
+
+Status get_gemmlowp_output_stage_info(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *dst, const ActivationLayerInfo &act,
+ GEMMLowpOutputStageInfo &gemmlowp_output_stage_info)
+{
+ const auto data_type = src->data_type();
+ const QuantizationInfo oq_info = dst->quantization_info();
+ const UniformQuantizationInfo iq_unif = src->quantization_info().uniform();
+ const UniformQuantizationInfo wq_unif = weights->quantization_info().uniform();
+ const UniformQuantizationInfo oq_unif = oq_info.uniform();
+
+ float multiplier = (iq_unif.scale * wq_unif.scale) / oq_unif.scale;
+ int32_t output_multiplier;
+ int32_t output_shift;
+
+ ARM_COMPUTE_RETURN_ON_ERROR(quantization::calculate_quantized_multiplier(multiplier, &output_multiplier, &output_shift));
+
+ PixelValue type_min{};
+ PixelValue type_max{};
+ std::tie(type_min, type_max) = quantization::get_quantized_asymmetric_output_min_max(oq_info, act, data_type);
+
+ gemmlowp_output_stage_info.gemmlowp_multiplier = output_multiplier;
+ gemmlowp_output_stage_info.gemmlowp_shift = output_shift;
+ gemmlowp_output_stage_info.gemmlowp_offset = oq_unif.offset;
+ gemmlowp_output_stage_info.type = GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT;
+ gemmlowp_output_stage_info.gemmlowp_min_bound = type_min.get<int32_t>();
+ gemmlowp_output_stage_info.gemmlowp_max_bound = type_max.get<int32_t>();
+
+ return Status{};
+}
+
+}
+
CpuMatMul::CpuMatMul()
: _transpose_kernel_lhs(), _transpose_kernel_rhs(), _asm_glue(), _lhs_transposed(), _rhs_transposed(), _original_lhs_shape(), _original_rhs_shape(), _original_dst_shape()
{
@@ -47,8 +83,8 @@ CpuMatMul::CpuMatMul()
Status CpuMatMul::validate(const ITensorInfo *lhs, const ITensorInfo *rhs, const ITensorInfo *dst, const MatMulInfo &info, const CpuMatMulSettings &settings)
{
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(lhs, rhs);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lhs, 1, DataType::F32, DataType::F16);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(lhs, rhs, dst);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lhs, 1, DataType::F32, DataType::F16, DataType::QASYMM8, DataType::QASYMM8_SIGNED);
ARM_COMPUTE_RETURN_ERROR_ON_MSG(lhs->are_values_constant(), "LHS Tensor must be dynamic.");
ARM_COMPUTE_RETURN_ERROR_ON_MSG(rhs->are_values_constant(), "RHS Tensor must be dynamic.");
ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(lhs);
@@ -96,6 +132,12 @@ Status CpuMatMul::validate(const ITensorInfo *lhs, const ITensorInfo *rhs, const
ARM_COMPUTE_RETURN_ERROR_ON_MSG(lhs_to_use->dimension(i) != rhs_to_use->dimension(i), "Broadcasting in Batch dimension is unsupported by this operator.");
}
+ // Quantized-specific configuration
+ if(is_data_type_quantized(lhs->data_type()))
+ {
+ ARM_COMPUTE_RETURN_ON_ERROR(get_gemmlowp_output_stage_info(lhs_to_use, rhs_to_use, dst, gemm_info.activation_info, gemm_info.output_stage));
+ }
+
cpu::CpuGemmAssemblyDispatch::validate(lhs_to_use, rhs_to_use, nullptr, dst, gemm_info);
return Status{};
@@ -157,6 +199,12 @@ void CpuMatMul::configure(ITensorInfo *lhs, ITensorInfo *rhs, ITensorInfo *dst,
lhs_to_use = (_adj_lhs) ? _lhs_transposed : lhs_to_use;
rhs_to_use = (_adj_rhs) ? _rhs_transposed : rhs_to_use;
+ // Quantized-specific configuration
+ if(is_data_type_quantized(lhs->data_type()))
+ {
+ get_gemmlowp_output_stage_info(&lhs_to_use, &rhs_to_use, &dst_to_use, _gemm_info.activation_info, _gemm_info.output_stage);
+ }
+
// Configure Asm Kernel
_asm_glue = std::make_unique<cpu::CpuGemmAssemblyDispatch>();
_asm_glue->configure(&lhs_to_use, &rhs_to_use, nullptr, &dst_to_use, _gemm_info); // c is nullptr as bias not supported in MatMul