aboutsummaryrefslogtreecommitdiff
path: root/src/cpu/operators/CpuFullyConnected.cpp
diff options
context:
space:
mode:
authorcfRod <crefeda.rodrigues@arm.com>2021-11-05 11:29:53 +0000
committerGian Marco Iodice <gianmarco.iodice@arm.com>2021-11-09 10:19:03 +0000
commitf2c022e54ae65467cca4bc698b5b94e5b3c62c47 (patch)
treedcf3f38e85ff9761573d2e7e37cc746291f828af /src/cpu/operators/CpuFullyConnected.cpp
parentf349655cf75a686ed97e90d193a45602d6e82e6f (diff)
downloadComputeLibrary-f2c022e54ae65467cca4bc698b5b94e5b3c62c47.tar.gz
Enable fast_math in CpuFullyConnected
ONCPUML-529 * Add support for passing fast_math for fullyconnected layers via fc_info. * Add support for passing fast_math to run ACL benchmark graphs. * Add validation test and accuracy tests (updated fixtures). Note: abs and rel. tolerance for fast math mode are set based on experimental data. Signed-off-by: cfRod <crefeda.rodrigues@arm.com> change-Id: Ib107d6264d3ae5e36555334f39a13e678f8618df Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/6521 Reviewed-by: SiCong Li <sicong.li@arm.com> Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/cpu/operators/CpuFullyConnected.cpp')
-rw-r--r--src/cpu/operators/CpuFullyConnected.cpp15
1 files changed, 11 insertions, 4 deletions
diff --git a/src/cpu/operators/CpuFullyConnected.cpp b/src/cpu/operators/CpuFullyConnected.cpp
index 03c53b001d..6d77c614f7 100644
--- a/src/cpu/operators/CpuFullyConnected.cpp
+++ b/src/cpu/operators/CpuFullyConnected.cpp
@@ -109,7 +109,7 @@ Status get_gemmlowp_output_stage_info(const ITensorInfo *src, const ITensorInfo
return Status{};
}
-Status validate_mm(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const ActivationLayerInfo &act)
+Status validate_mm(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const ActivationLayerInfo &act, bool enable_fast_math)
{
if(is_data_type_quantized_asymmetric(src->data_type()))
{
@@ -123,6 +123,7 @@ Status validate_mm(const ITensorInfo *src, const ITensorInfo *weights, const ITe
GEMMInfo gemm_info;
gemm_info.set_gemmlowp_output_stage(gemmlowp_output_stage_info);
+ gemm_info.set_fast_math(enable_fast_math);
// Validate gemmlowp function
TensorInfo src_info = src->clone()->set_quantization_info(src_quantization_info);
@@ -135,7 +136,9 @@ Status validate_mm(const ITensorInfo *src, const ITensorInfo *weights, const ITe
}
else
{
- ARM_COMPUTE_RETURN_ON_ERROR(CpuGemm::validate(src, weights, biases, dst, 1.f, 1.0f, GEMMInfo(false, false, true /* Reshape weights only for the first run */)));
+ GEMMInfo gemm_info(false, false, true /* Reshape weights only for the first run */);
+ gemm_info.set_fast_math(enable_fast_math);
+ ARM_COMPUTE_RETURN_ON_ERROR(CpuGemm::validate(src, weights, biases, dst, 1.f, 1.0f, gemm_info));
}
return Status{};
@@ -158,7 +161,8 @@ CpuFullyConnected::CpuFullyConnected()
_needs_weights_reshape(false),
_is_fc_after_conv(false),
_is_quantized_asymmetric(false),
- _is_prepared(false)
+ _is_prepared(false),
+ _enable_fast_math(false)
{
}
@@ -185,6 +189,7 @@ void CpuFullyConnected::configure_mm(const ITensorInfo *src, const ITensorInfo *
GEMMInfo gemm_info;
gemm_info.set_gemmlowp_output_stage(gemmlowp_output_stage_info);
gemm_info.set_activation_info(act);
+ gemm_info.set_fast_math(_enable_fast_math);
_mm_gemmlowp = std::make_unique<CpuGemmLowpMatrixMultiplyCore>();
_mm_gemmlowp->configure(&src_info, &weights_info, biases, dst, gemm_info);
}
@@ -193,6 +198,7 @@ void CpuFullyConnected::configure_mm(const ITensorInfo *src, const ITensorInfo *
// Configure matrix multiply kernel
GEMMInfo gemm_info(false, false, true /* Reshape weights only for the first run */);
gemm_info.set_activation_info(act);
+ gemm_info.set_fast_math(_enable_fast_math);
_mm_gemm = std::make_unique<CpuGemm>();
_mm_gemm->configure(src, weights, biases, dst, 1.f, 1.0f, gemm_info);
}
@@ -241,6 +247,7 @@ void CpuFullyConnected::configure(const ITensorInfo *src, const ITensorInfo *wei
_is_quantized_asymmetric = is_data_type_quantized_asymmetric(src->data_type());
_is_prepared = false;
_trans_weights_idx = AuxTensorIdx::Count;
+ _enable_fast_math = fc_info.enable_fast_math;
// With the Fully Connected layer we can have 4 different cases:
// 1) Convolution layer -> Fully Connected layer without batches
@@ -418,7 +425,7 @@ Status CpuFullyConnected::validate(const ITensorInfo *src, const ITensorInfo *we
ARM_COMPUTE_RETURN_ERROR_ON(src->dimension(0) != weights_to_use->dimension(1));
}
// Validate matrix multiply kernel
- ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(src_to_use, weights_to_use, biases, dst, fc_info.activation_info));
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(src_to_use, weights_to_use, biases, dst, fc_info.activation_info, fc_info.enable_fast_math));
return Status{};
}