diff options
Diffstat (limited to 'src/cpu')
-rw-r--r-- | src/cpu/operators/CpuFullyConnected.cpp | 15 | ||||
-rw-r--r-- | src/cpu/operators/CpuFullyConnected.h | 1 |
2 files changed, 12 insertions, 4 deletions
diff --git a/src/cpu/operators/CpuFullyConnected.cpp b/src/cpu/operators/CpuFullyConnected.cpp index 03c53b001d..6d77c614f7 100644 --- a/src/cpu/operators/CpuFullyConnected.cpp +++ b/src/cpu/operators/CpuFullyConnected.cpp @@ -109,7 +109,7 @@ Status get_gemmlowp_output_stage_info(const ITensorInfo *src, const ITensorInfo return Status{}; } -Status validate_mm(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const ActivationLayerInfo &act) +Status validate_mm(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const ActivationLayerInfo &act, bool enable_fast_math) { if(is_data_type_quantized_asymmetric(src->data_type())) { @@ -123,6 +123,7 @@ Status validate_mm(const ITensorInfo *src, const ITensorInfo *weights, const ITe GEMMInfo gemm_info; gemm_info.set_gemmlowp_output_stage(gemmlowp_output_stage_info); + gemm_info.set_fast_math(enable_fast_math); // Validate gemmlowp function TensorInfo src_info = src->clone()->set_quantization_info(src_quantization_info); @@ -135,7 +136,9 @@ Status validate_mm(const ITensorInfo *src, const ITensorInfo *weights, const ITe } else { - ARM_COMPUTE_RETURN_ON_ERROR(CpuGemm::validate(src, weights, biases, dst, 1.f, 1.0f, GEMMInfo(false, false, true /* Reshape weights only for the first run */))); + GEMMInfo gemm_info(false, false, true /* Reshape weights only for the first run */); + gemm_info.set_fast_math(enable_fast_math); + ARM_COMPUTE_RETURN_ON_ERROR(CpuGemm::validate(src, weights, biases, dst, 1.f, 1.0f, gemm_info)); } return Status{}; @@ -158,7 +161,8 @@ CpuFullyConnected::CpuFullyConnected() _needs_weights_reshape(false), _is_fc_after_conv(false), _is_quantized_asymmetric(false), - _is_prepared(false) + _is_prepared(false), + _enable_fast_math(false) { } @@ -185,6 +189,7 @@ void CpuFullyConnected::configure_mm(const ITensorInfo *src, const ITensorInfo * GEMMInfo gemm_info; gemm_info.set_gemmlowp_output_stage(gemmlowp_output_stage_info); gemm_info.set_activation_info(act); + gemm_info.set_fast_math(_enable_fast_math); _mm_gemmlowp = std::make_unique<CpuGemmLowpMatrixMultiplyCore>(); _mm_gemmlowp->configure(&src_info, &weights_info, biases, dst, gemm_info); } @@ -193,6 +198,7 @@ void CpuFullyConnected::configure_mm(const ITensorInfo *src, const ITensorInfo * // Configure matrix multiply kernel GEMMInfo gemm_info(false, false, true /* Reshape weights only for the first run */); gemm_info.set_activation_info(act); + gemm_info.set_fast_math(_enable_fast_math); _mm_gemm = std::make_unique<CpuGemm>(); _mm_gemm->configure(src, weights, biases, dst, 1.f, 1.0f, gemm_info); } @@ -241,6 +247,7 @@ void CpuFullyConnected::configure(const ITensorInfo *src, const ITensorInfo *wei _is_quantized_asymmetric = is_data_type_quantized_asymmetric(src->data_type()); _is_prepared = false; _trans_weights_idx = AuxTensorIdx::Count; + _enable_fast_math = fc_info.enable_fast_math; // With the Fully Connected layer we can have 4 different cases: // 1) Convolution layer -> Fully Connected layer without batches @@ -418,7 +425,7 @@ Status CpuFullyConnected::validate(const ITensorInfo *src, const ITensorInfo *we ARM_COMPUTE_RETURN_ERROR_ON(src->dimension(0) != weights_to_use->dimension(1)); } // Validate matrix multiply kernel - ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(src_to_use, weights_to_use, biases, dst, fc_info.activation_info)); + ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(src_to_use, weights_to_use, biases, dst, fc_info.activation_info, fc_info.enable_fast_math)); return Status{}; } diff --git a/src/cpu/operators/CpuFullyConnected.h b/src/cpu/operators/CpuFullyConnected.h index 304ea3c62b..44fa21f9f8 100644 --- a/src/cpu/operators/CpuFullyConnected.h +++ b/src/cpu/operators/CpuFullyConnected.h @@ -141,6 +141,7 @@ private: bool _is_fc_after_conv; bool _is_quantized_asymmetric; bool _is_prepared; + bool _enable_fast_math; }; } // namespace cpu } // namespace arm_compute |