aboutsummaryrefslogtreecommitdiff
path: root/src/cpu/kernels/CpuGemmLowpOffsetContributionKernel.h
diff options
context:
space:
mode:
authorJonathan Deakin <jonathan.deakin@arm.com>2024-01-24 09:15:38 +0000
committerRadu Salavat <radu.salavat@arm.com>2024-04-15 13:52:31 +0000
commita668f9f8a4eab405df0fe8dd58e7d9425bcf9640 (patch)
treedb16e6af9289897557a58755b88d2c337dcb8650 /src/cpu/kernels/CpuGemmLowpOffsetContributionKernel.h
parent34bdffb288d6367cb6dca652ebed60c450854039 (diff)
downloadComputeLibrary-a668f9f8a4eab405df0fe8dd58e7d9425bcf9640.tar.gz
Add s8f32 kernels and dynamic QuantizationInfo
- Add support for QASYMM_SIGNED*QASYMM8_SIGNED->F32 in CpuGemmLowpMatrixMultiplyCore - Add s8f32 kernel using existing s8->s32 kernels with a new DequantizeFloat OutputStage, the structure is similar to Requantize32 but the opposite way around. - Add SME s8f32 kernels with integrated support for DequantizeFloat. - Add scale to CpuGemmLowpOffsetContributionKernel. - Add virtual dequantize scale to gemm_common, only implemented for gemm_interleaved. - Update year to 2024 in generate_build_files. - Add dynamic flag to QuantizationInfo which signals to operators that it can change after configuration - Add support for dynamic quantization in NEGEMMLowpMatrixMultiplyCore - Add dynamic quantization fixture by extending GEMMLowpGenericMatrixMultiplyCoreValidationFixture - Add GEMMLowpDequantizedMatrixMultiplyValidationFixture - Store k (number of cols of A) rather than k_offset in the offset contribution kernels so that we can recompute it when the other offsets change relates to: ONCPUML-1444 MLINFSW-439 Co-authored-by: Milos Puzovic <Milos.Puzovic@arm.com> Co-authored-by: David Mansell <David.Mansell@arm.com> Change-Id: I58a3acf2c09289a303e52eea6b336a696a5bc8da Signed-off-by: Jonathan Deakin <jonathan.deakin@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/11022 Reviewed-by: Gunes Bayir <gunes.bayir@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Benchmark: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/cpu/kernels/CpuGemmLowpOffsetContributionKernel.h')
-rw-r--r--src/cpu/kernels/CpuGemmLowpOffsetContributionKernel.h41
1 files changed, 35 insertions, 6 deletions
diff --git a/src/cpu/kernels/CpuGemmLowpOffsetContributionKernel.h b/src/cpu/kernels/CpuGemmLowpOffsetContributionKernel.h
index 08b2d47529..ecbfb0c282 100644
--- a/src/cpu/kernels/CpuGemmLowpOffsetContributionKernel.h
+++ b/src/cpu/kernels/CpuGemmLowpOffsetContributionKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2022 Arm Limited.
+ * Copyright (c) 2017-2022,2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,12 +21,14 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ARM_COMPUTE_CPU_GEMMLOWP_OFFSETCONTRIBUTION_KERNEL_H
-#define ARM_COMPUTE_CPU_GEMMLOWP_OFFSETCONTRIBUTION_KERNEL_H
+#ifndef ACL_SRC_CPU_KERNELS_CPUGEMMLOWPOFFSETCONTRIBUTIONKERNEL_H
+#define ACL_SRC_CPU_KERNELS_CPUGEMMLOWPOFFSETCONTRIBUTIONKERNEL_H
#include "src/core/common/Macros.h"
#include "src/cpu/ICpuKernel.h"
+#include <cstdint>
+
namespace arm_compute
{
namespace cpu
@@ -62,13 +64,16 @@ public:
* @param[in] k Number of matrix A columns or Matrix B rows
* @param[in] a_offset Offset to be added to each element of the matrix A.
* @param[in] b_offset Offset to be added to each element of the matrix B.
+ * @param[in] scale (Optional) multiplies the contribution to make it the same scale as the dst in the case where mm_result is float
+ * (and so has already been scaled). Default is 1.0
*/
void configure(ITensorInfo *mm_result,
ITensorInfo *vector_sum_col,
ITensorInfo *vector_sum_row,
int32_t k,
int32_t a_offset,
- int32_t b_offset);
+ int32_t b_offset,
+ float scale = 1.0f);
/** Static function to check if given info will lead to a valid configuration
*
* Similar to CpuGemmLowpOffsetContributionKernel::configure()
@@ -81,6 +86,29 @@ public:
int32_t a_offset,
int32_t b_offset);
+ /** Set the a offset
+ * Warning: if a_offset is non-zero then vector_sum_col must be set in run_op.
+ * Run configure or validate again if you aren't sure
+ *
+ * @param[in] a_offset Offset to be added to each element of the matrix A.
+ */
+ void set_a_offset(int32_t a_offset);
+
+ /** Set the b offset
+ * Warning: if b_offset is non-zero then vector_sum_row must be set in run_op.
+ * Run configure or validate again if you aren't sure
+ *
+ * @param[in] b_offset Offset to be added to each element of the matrix B.
+ */
+ void set_b_offset(int32_t b_offset);
+
+ /** Set the dequantize scale
+ *
+ * @param[in] scale Multiplies the contribution to make it the same scale as the dst in the case where
+ * mm_result is float (and so has already been scaled).
+ */
+ void set_scale(float scale);
+
// Inherited methods overridden:
void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
const char *name() const override;
@@ -88,10 +116,11 @@ public:
private:
int32_t _a_offset{0};
int32_t _b_offset{0};
- int32_t _k_offset{0};
+ int32_t _k{0}; // Number of columns of A or rows of B, used in last offset term
+ float _scale{1.0};
bool _slide_vector_sum_col{true};
};
} // namespace kernels
} // namespace cpu
} // namespace arm_compute
-#endif /* ARM_COMPUTE_CPU_GEMMLOWP_OFFSETCONTRIBUTION_KERNEL_H */
+#endif // ACL_SRC_CPU_KERNELS_CPUGEMMLOWPOFFSETCONTRIBUTIONKERNEL_H