From 14e868e0fbb22622d70aa5529818ee4737c4a863 Mon Sep 17 00:00:00 2001 From: Michalis Spyrou Date: Wed, 30 Sep 2020 00:33:05 +0100 Subject: COMPMID-3802: Remove templates from NEDirectConvolutionLayerOutputStageKernel Removing bool template reduces the binary size by 20Kb. Signed-off-by: Michalis Spyrou Change-Id: I652cea7d320a00b6c6e44cdacb61e77f3c10e56a Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4053 Reviewed-by: Michele Di Giorgio Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins --- .../NEDirectConvolutionLayerOutputStageKernel.h | 2 +- .../NEDirectConvolutionLayerOutputStageKernel.cpp | 36 +++++++++++----------- 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h b/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h index 165f5bd133..552a88ce42 100644 --- a/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h +++ b/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h @@ -87,7 +87,7 @@ public: private: using OutputStageKernel = void(ITensor *input, const ITensor *bias, const Window &window, ITensor *output, - int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift); + int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift, bool has_bias); private: OutputStageKernel *_func; diff --git a/src/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.cpp b/src/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.cpp index c2c85f81ef..8e2b88f5a5 100644 --- a/src/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.cpp +++ b/src/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.cpp @@ -86,10 +86,10 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *bias, con return Status{}; } -template +template typename std::enable_if::value, void>::type output_stage_nchw(ITensor *input, const ITensor *bias, const Window &window, ITensor *output, - int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift) + int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift, bool has_bias) { /** NEON vector tag type. */ using ExactTagType = typename wrapper::traits::neon_bitvector_tag_t; @@ -147,10 +147,10 @@ output_stage_nchw(ITensor *input, const ITensor *bias, const Window &window, ITe in, out); } -template +template typename std::enable_if::value, void>::type output_stage_nhwc(ITensor *input, const ITensor *bias, const Window &window, ITensor *output, - int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift) + int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift, bool has_bias) { ARM_COMPUTE_UNUSED(result_fixedpoint_multiplier); ARM_COMPUTE_UNUSED(result_shift); @@ -213,9 +213,9 @@ output_stage_nhwc(ITensor *input, const ITensor *bias, const Window &window, ITe } // Quantized case -template < typename TOut, bool has_bias, typename std::enable_if < std::is_same::value || std::is_same::value, int >::type = 0 > +template < typename TOut, typename std::enable_if < std::is_same::value || std::is_same::value, int >::type = 0 > void output_stage_nchw(ITensor *input, const ITensor *bias, const Window &window, ITensor *output, - int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift) + int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift, bool has_bias) { using VectorType = typename wrapper::traits::neon_bitvector_t; using TagType = typename wrapper::traits::neon_bitvector_tag_t; @@ -292,9 +292,9 @@ void output_stage_nchw(ITensor *input, const ITensor *bias, const Window &window }, in, out); } -template < typename TOut, bool has_bias, typename std::enable_if < std::is_same::value || std::is_same::value, int >::type = 0 > +template < typename TOut, typename std::enable_if < std::is_same::value || std::is_same::value, int >::type = 0 > void output_stage_nhwc(ITensor *input, const ITensor *bias, const Window &window, ITensor *output, - int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift) + int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift, bool has_bias) { using VectorType = typename wrapper::traits::neon_bitvector_t; using TagType = typename wrapper::traits::neon_bitvector_tag_t; @@ -419,7 +419,6 @@ void NEDirectConvolutionLayerOutputStageKernel::configure(ITensor *input, const INEKernel::configure(win); - const bool has_bias = bias != nullptr; const bool is_qasymm8_signed = (output != nullptr) ? is_data_type_quantized_asymmetric_signed(output->info()->data_type()) : false; // Set appropriate function @@ -431,24 +430,24 @@ void NEDirectConvolutionLayerOutputStageKernel::configure(ITensor *input, const { if(is_qasymm8_signed) { - _func = (has_bias) ? &output_stage_nchw : &output_stage_nchw; + _func = &output_stage_nchw; } else { - _func = (has_bias) ? &output_stage_nchw : &output_stage_nchw; + _func = &output_stage_nchw; } break; } #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC case DataType::F16: { - _func = (has_bias) ? &output_stage_nchw : &output_stage_nchw; + _func = &output_stage_nchw; break; } #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ case DataType::F32: { - _func = (has_bias) ? &output_stage_nchw : &output_stage_nchw; + _func = &output_stage_nchw; break; } default: @@ -465,24 +464,24 @@ void NEDirectConvolutionLayerOutputStageKernel::configure(ITensor *input, const { if(is_qasymm8_signed) { - _func = (has_bias) ? &output_stage_nhwc : &output_stage_nhwc; + _func = &output_stage_nhwc; } else { - _func = (has_bias) ? &output_stage_nhwc : &output_stage_nhwc; + _func = &output_stage_nhwc; } break; } #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC case DataType::F16: { - _func = (has_bias) ? &output_stage_nhwc : &output_stage_nhwc; + _func = &output_stage_nhwc; break; } #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ case DataType::F32: { - _func = (has_bias) ? &output_stage_nhwc : &output_stage_nhwc; + _func = &output_stage_nhwc; break; } default: @@ -508,6 +507,7 @@ void NEDirectConvolutionLayerOutputStageKernel::run(const Window &window, const ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); ARM_COMPUTE_ERROR_ON(_func == nullptr); - (*_func)(_input, _bias, window, _output, _result_fixedpoint_multiplier, _result_shift, _result_offset_after_shift); + const bool has_bias = _bias != nullptr; + (*_func)(_input, _bias, window, _output, _result_fixedpoint_multiplier, _result_shift, _result_offset_after_shift, has_bias); } } // namespace arm_compute -- cgit v1.2.1