From a668f9f8a4eab405df0fe8dd58e7d9425bcf9640 Mon Sep 17 00:00:00 2001 From: Jonathan Deakin Date: Wed, 24 Jan 2024 09:15:38 +0000 Subject: Add s8f32 kernels and dynamic QuantizationInfo - Add support for QASYMM_SIGNED*QASYMM8_SIGNED->F32 in CpuGemmLowpMatrixMultiplyCore - Add s8f32 kernel using existing s8->s32 kernels with a new DequantizeFloat OutputStage, the structure is similar to Requantize32 but the opposite way around. - Add SME s8f32 kernels with integrated support for DequantizeFloat. - Add scale to CpuGemmLowpOffsetContributionKernel. - Add virtual dequantize scale to gemm_common, only implemented for gemm_interleaved. - Update year to 2024 in generate_build_files. - Add dynamic flag to QuantizationInfo which signals to operators that it can change after configuration - Add support for dynamic quantization in NEGEMMLowpMatrixMultiplyCore - Add dynamic quantization fixture by extending GEMMLowpGenericMatrixMultiplyCoreValidationFixture - Add GEMMLowpDequantizedMatrixMultiplyValidationFixture - Store k (number of cols of A) rather than k_offset in the offset contribution kernels so that we can recompute it when the other offsets change relates to: ONCPUML-1444 MLINFSW-439 Co-authored-by: Milos Puzovic Co-authored-by: David Mansell Change-Id: I58a3acf2c09289a303e52eea6b336a696a5bc8da Signed-off-by: Jonathan Deakin Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/11022 Reviewed-by: Gunes Bayir Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins Benchmark: Arm Jenkins --- ...uGemmLowpOffsetContributionOutputStageKernel.cpp | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) (limited to 'src/cpu/kernels/CpuGemmLowpOffsetContributionOutputStageKernel.cpp') diff --git a/src/cpu/kernels/CpuGemmLowpOffsetContributionOutputStageKernel.cpp b/src/cpu/kernels/CpuGemmLowpOffsetContributionOutputStageKernel.cpp index d008842398..3c113f2828 100644 --- a/src/cpu/kernels/CpuGemmLowpOffsetContributionOutputStageKernel.cpp +++ b/src/cpu/kernels/CpuGemmLowpOffsetContributionOutputStageKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, 2023 Arm Limited. + * Copyright (c) 2019-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -919,7 +919,7 @@ void CpuGemmLowpOffsetContributionOutputStageKernel::configure(const ITensorInfo _a_offset = a_offset; _b_offset = b_offset; - _k_offset = a_offset * b_offset * k; + _k = k; _output_stage = output_stage; // If a_offset == 0, vector_sum_col can be a nullptr @@ -958,6 +958,16 @@ Status CpuGemmLowpOffsetContributionOutputStageKernel::validate(const ITensorInf return Status{}; } +void CpuGemmLowpOffsetContributionOutputStageKernel::set_a_offset(int32_t a_offset) +{ + _a_offset = a_offset; +} + +void CpuGemmLowpOffsetContributionOutputStageKernel::set_b_offset(int32_t b_offset) +{ + _b_offset = b_offset; +} + void CpuGemmLowpOffsetContributionOutputStageKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) @@ -993,10 +1003,11 @@ void CpuGemmLowpOffsetContributionOutputStageKernel::run_op(ITensorPack &te // Check if symmetric per-channel execution const bool is_symm = _output_stage.is_quantized_per_channel; + auto k_offset = _a_offset * _b_offset * _k; if (is_symm) { run_offset_contribution_output_stage_symm(window, mm_result, vector_sum_col, vector_sum_row, bias, dst, - _a_offset, _b_offset, _k_offset, _is_vector_sum_col_batched, + _a_offset, _b_offset, k_offset, _is_vector_sum_col_batched, _output_stage, reinterpret_as_3d, is_bounded_relu, is_fixed_point); } else @@ -1004,13 +1015,13 @@ void CpuGemmLowpOffsetContributionOutputStageKernel::run_op(ITensorPack &te if (is_signed) { run_offset_contribution_output_stage( - window, mm_result, vector_sum_col, vector_sum_row, bias, dst, _a_offset, _b_offset, _k_offset, + window, mm_result, vector_sum_col, vector_sum_row, bias, dst, _a_offset, _b_offset, k_offset, _is_vector_sum_col_batched, _output_stage, reinterpret_as_3d, is_bounded_relu, is_fixed_point); } else { run_offset_contribution_output_stage( - window, mm_result, vector_sum_col, vector_sum_row, bias, dst, _a_offset, _b_offset, _k_offset, + window, mm_result, vector_sum_col, vector_sum_row, bias, dst, _a_offset, _b_offset, k_offset, _is_vector_sum_col_batched, _output_stage, reinterpret_as_3d, is_bounded_relu, is_fixed_point); } } -- cgit v1.2.1