aboutsummaryrefslogtreecommitdiff
path: root/src/runtime/NEON/functions
diff options
context:
space:
mode:
authormorgolock <pablo.tello@arm.com>2020-08-10 16:44:18 +0100
committerPablo Marquez <pablo.tello@arm.com>2020-08-19 10:53:52 +0000
commit0bc80daf319ea3219ca6a6fa200118dc859ee460 (patch)
tree32d9294334247d62b20b347ffb01e37bd1d5edd1 /src/runtime/NEON/functions
parentc58f0ad7ac6d91f2789a78049d3cec7355113f9a (diff)
downloadComputeLibrary-0bc80daf319ea3219ca6a6fa200118dc859ee460.tar.gz
MLCE-229: Support for negative shifts in asm kernels
Change-Id: I2c5e98aae7698963f106d7423df0e65cd00ee2a9 Signed-off-by: morgolock <pablo.tello@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3710 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Sheri Zhang <sheri.zhang@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/runtime/NEON/functions')
-rw-r--r--src/runtime/NEON/functions/NEGEMMAssemblyDispatch.cpp31
-rw-r--r--src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp29
2 files changed, 26 insertions, 34 deletions
diff --git a/src/runtime/NEON/functions/NEGEMMAssemblyDispatch.cpp b/src/runtime/NEON/functions/NEGEMMAssemblyDispatch.cpp
index 3b9dde2bf7..eeea3a45ee 100644
--- a/src/runtime/NEON/functions/NEGEMMAssemblyDispatch.cpp
+++ b/src/runtime/NEON/functions/NEGEMMAssemblyDispatch.cpp
@@ -182,8 +182,8 @@ public:
*
* @return A tuple with the pointers to the shift and multiplier data respectively
*/
- std::tuple<const int32_t *, const int32_t *> set_requantize_data(const std::vector<int32_t> &shifts,
- const std::vector<int32_t> &multipliers);
+ std::tuple<bool, const int32_t *, const int32_t *, const int32_t *> set_requantize_data(const std::vector<int32_t> &shifts,
+ const std::vector<int32_t> &multipliers);
// Inherited methods overridden:
void run() override;
@@ -235,18 +235,29 @@ private:
arm_gemm::KernelDescription _kernel_info{};
/** Per channel quantization shifts */
std::vector<int32_t> _shifts{};
+ std::vector<int32_t> right_shifts{};
+ std::vector<int32_t> left_shifts{};
/** Per channel quantization multipliers */
std::vector<int32_t> _multipliers{};
};
template <typename TypeInput, typename TypeOutput, class OutputStage>
-std::tuple<const int32_t *, const int32_t *> Fallback<TypeInput, TypeOutput, OutputStage>::set_requantize_data(const std::vector<int32_t> &shifts,
- const std::vector<int32_t> &multipliers)
+std::tuple<bool, const int32_t *, const int32_t *, const int32_t *> Fallback<TypeInput, TypeOutput, OutputStage>::set_requantize_data(const std::vector<int32_t> &shifts,
+ const std::vector<int32_t> &multipliers)
{
- _multipliers = multipliers;
- _shifts = shifts;
- std::transform(_shifts.begin(), _shifts.end(), _shifts.begin(), std::negate<int32_t>());
- return std::make_tuple(_shifts.data(), _multipliers.data());
+ _multipliers = multipliers;
+ _shifts = shifts;
+ bool need_left = false;
+ for(const auto s : _shifts)
+ {
+ left_shifts.push_back(std::max(-s, int32_t(0)));
+ right_shifts.push_back(std::min(-s, int32_t(0)));
+ if(s > 0 && !need_left)
+ {
+ need_left = true;
+ }
+ }
+ return std::make_tuple(need_left, left_shifts.data(), right_shifts.data(), _multipliers.data());
}
template <typename TypeInput, typename TypeOutput, class OutputStage>
@@ -498,7 +509,9 @@ void create_arm_gemm_quant(std::unique_ptr<NEGEMMAssemblyDispatch::IFallback> &a
const auto requantize_data = fallback->set_requantize_data(os_info.gemmlowp_shifts, os_info.gemmlowp_multipliers);
gemm_requant_info = arm_gemm::Requantize32(nullptr, 0,
a_offset, b_offset, os_info.gemmlowp_offset,
- std::get<0>(requantize_data), std::get<1>(requantize_data),
+ (std::get<0>(requantize_data)) ? std::get<1>(requantize_data) : nullptr,
+ std::get<2>(requantize_data),
+ std::get<3>(requantize_data),
os_info.gemmlowp_min_bound, os_info.gemmlowp_max_bound);
}
else
diff --git a/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp b/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp
index dada6d16da..83db146a8a 100644
--- a/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp
+++ b/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp
@@ -117,18 +117,8 @@ void NEGEMMLowpMatrixMultiplyCore::configure(const ITensor *a, const ITensor *b,
{
if(is_data_type_quantized_asymmetric(a_to_use->info()->data_type()) && info.gemmlowp_output_stage().type == GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT)
{
- // Result shifts < 0 are not supported by asm kernels
- const std::vector<int32_t> &shifts = info.gemmlowp_output_stage().gemmlowp_shifts;
- const bool is_asm_supported = info.gemmlowp_output_stage().gemmlowp_shift >= 0
- && std::all_of(shifts.cbegin(), shifts.cend(), [](int32_t val)
- {
- return val >= 0;
- });
- if(is_asm_supported)
- {
- _asm_glue.configure(a_to_use, b, c, output, gemm_info);
- _fused_assembly_path = _asm_glue.is_configured();
- }
+ _asm_glue.configure(a_to_use, b, c, output, gemm_info);
+ _fused_assembly_path = _asm_glue.is_configured();
}
else
{
@@ -339,19 +329,8 @@ Status NEGEMMLowpMatrixMultiplyCore::validate(const ITensorInfo *a, const ITenso
bool run_optimised_requantized = false;
if(is_data_type_quantized_asymmetric(a_to_use->data_type()) && info.gemmlowp_output_stage().type == GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT)
{
- // Result shifts < 0 are not supported by asm kernels
- const std::vector<int32_t> &shifts = info.gemmlowp_output_stage().gemmlowp_shifts;
- const bool is_asm_supported = info.gemmlowp_output_stage().gemmlowp_shift >= 0
- && std::all_of(shifts.cbegin(), shifts.cend(), [](int32_t val)
- {
- return val >= 0;
- });
-
- if(is_asm_supported)
- {
- run_optimised = bool(NEGEMMAssemblyDispatch::validate(a_to_use, b, c, output, gemm_info));
- run_optimised_requantized = run_optimised;
- }
+ run_optimised = bool(NEGEMMAssemblyDispatch::validate(a_to_use, b, c, output, gemm_info));
+ run_optimised_requantized = run_optimised;
}
else
{