From 0bc80daf319ea3219ca6a6fa200118dc859ee460 Mon Sep 17 00:00:00 2001 From: morgolock Date: Mon, 10 Aug 2020 16:44:18 +0100 Subject: MLCE-229: Support for negative shifts in asm kernels Change-Id: I2c5e98aae7698963f106d7423df0e65cd00ee2a9 Signed-off-by: morgolock Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3710 Tested-by: Arm Jenkins Reviewed-by: Sheri Zhang Comments-Addressed: Arm Jenkins --- .../NEON/functions/NEGEMMAssemblyDispatch.cpp | 31 +++++++++++++++------- .../functions/NEGEMMLowpMatrixMultiplyCore.cpp | 29 +++----------------- 2 files changed, 26 insertions(+), 34 deletions(-) (limited to 'src/runtime') diff --git a/src/runtime/NEON/functions/NEGEMMAssemblyDispatch.cpp b/src/runtime/NEON/functions/NEGEMMAssemblyDispatch.cpp index 3b9dde2bf7..eeea3a45ee 100644 --- a/src/runtime/NEON/functions/NEGEMMAssemblyDispatch.cpp +++ b/src/runtime/NEON/functions/NEGEMMAssemblyDispatch.cpp @@ -182,8 +182,8 @@ public: * * @return A tuple with the pointers to the shift and multiplier data respectively */ - std::tuple set_requantize_data(const std::vector &shifts, - const std::vector &multipliers); + std::tuple set_requantize_data(const std::vector &shifts, + const std::vector &multipliers); // Inherited methods overridden: void run() override; @@ -235,18 +235,29 @@ private: arm_gemm::KernelDescription _kernel_info{}; /** Per channel quantization shifts */ std::vector _shifts{}; + std::vector right_shifts{}; + std::vector left_shifts{}; /** Per channel quantization multipliers */ std::vector _multipliers{}; }; template -std::tuple Fallback::set_requantize_data(const std::vector &shifts, - const std::vector &multipliers) +std::tuple Fallback::set_requantize_data(const std::vector &shifts, + const std::vector &multipliers) { - _multipliers = multipliers; - _shifts = shifts; - std::transform(_shifts.begin(), _shifts.end(), _shifts.begin(), std::negate()); - return std::make_tuple(_shifts.data(), _multipliers.data()); + _multipliers = multipliers; + _shifts = shifts; + bool need_left = false; + for(const auto s : _shifts) + { + left_shifts.push_back(std::max(-s, int32_t(0))); + right_shifts.push_back(std::min(-s, int32_t(0))); + if(s > 0 && !need_left) + { + need_left = true; + } + } + return std::make_tuple(need_left, left_shifts.data(), right_shifts.data(), _multipliers.data()); } template @@ -498,7 +509,9 @@ void create_arm_gemm_quant(std::unique_ptr &a const auto requantize_data = fallback->set_requantize_data(os_info.gemmlowp_shifts, os_info.gemmlowp_multipliers); gemm_requant_info = arm_gemm::Requantize32(nullptr, 0, a_offset, b_offset, os_info.gemmlowp_offset, - std::get<0>(requantize_data), std::get<1>(requantize_data), + (std::get<0>(requantize_data)) ? std::get<1>(requantize_data) : nullptr, + std::get<2>(requantize_data), + std::get<3>(requantize_data), os_info.gemmlowp_min_bound, os_info.gemmlowp_max_bound); } else diff --git a/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp b/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp index dada6d16da..83db146a8a 100644 --- a/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp +++ b/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp @@ -117,18 +117,8 @@ void NEGEMMLowpMatrixMultiplyCore::configure(const ITensor *a, const ITensor *b, { if(is_data_type_quantized_asymmetric(a_to_use->info()->data_type()) && info.gemmlowp_output_stage().type == GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT) { - // Result shifts < 0 are not supported by asm kernels - const std::vector &shifts = info.gemmlowp_output_stage().gemmlowp_shifts; - const bool is_asm_supported = info.gemmlowp_output_stage().gemmlowp_shift >= 0 - && std::all_of(shifts.cbegin(), shifts.cend(), [](int32_t val) - { - return val >= 0; - }); - if(is_asm_supported) - { - _asm_glue.configure(a_to_use, b, c, output, gemm_info); - _fused_assembly_path = _asm_glue.is_configured(); - } + _asm_glue.configure(a_to_use, b, c, output, gemm_info); + _fused_assembly_path = _asm_glue.is_configured(); } else { @@ -339,19 +329,8 @@ Status NEGEMMLowpMatrixMultiplyCore::validate(const ITensorInfo *a, const ITenso bool run_optimised_requantized = false; if(is_data_type_quantized_asymmetric(a_to_use->data_type()) && info.gemmlowp_output_stage().type == GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT) { - // Result shifts < 0 are not supported by asm kernels - const std::vector &shifts = info.gemmlowp_output_stage().gemmlowp_shifts; - const bool is_asm_supported = info.gemmlowp_output_stage().gemmlowp_shift >= 0 - && std::all_of(shifts.cbegin(), shifts.cend(), [](int32_t val) - { - return val >= 0; - }); - - if(is_asm_supported) - { - run_optimised = bool(NEGEMMAssemblyDispatch::validate(a_to_use, b, c, output, gemm_info)); - run_optimised_requantized = run_optimised; - } + run_optimised = bool(NEGEMMAssemblyDispatch::validate(a_to_use, b, c, output, gemm_info)); + run_optimised_requantized = run_optimised; } else { -- cgit v1.2.1