From f1f1f87132690a8061801ef1a4638d637c780df7 Mon Sep 17 00:00:00 2001 From: Radu Salavat Date: Tue, 27 Feb 2024 18:32:26 +0000 Subject: Add in place summation to CPU GEMM kernels Instead of dispatching the sum postop for GEMM kernels to a separate kernel + add, that requires an extra destination sized allocation, plus 3 extra load/stores per element, just do it in the GEMM kernel. Resolves: ONCPUML-1442 Signed-off-by: Radu Salavat Co-authored-by: Milos Puzovic Change-Id: I7a1f2da3300875fa1ac88b705a34390969518077 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/11298 Reviewed-by: Gunes Bayir Tested-by: Arm Jenkins Comments-Addressed: Arm Jenkins Benchmark: Arm Jenkins --- tests/validation/reference/GEMMLowp.h | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'tests/validation/reference/GEMMLowp.h') diff --git a/tests/validation/reference/GEMMLowp.h b/tests/validation/reference/GEMMLowp.h index 99015d71fb..6e471fdad1 100644 --- a/tests/validation/reference/GEMMLowp.h +++ b/tests/validation/reference/GEMMLowp.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2020, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_TEST_GEMMLOWP_H -#define ARM_COMPUTE_TEST_GEMMLOWP_H +#ifndef ACL_TESTS_VALIDATION_REFERENCE_GEMMLOWP_H +#define ACL_TESTS_VALIDATION_REFERENCE_GEMMLOWP_H #include "tests/SimpleTensor.h" #include "tests/validation/Helpers.h" @@ -38,6 +38,9 @@ namespace reference template SimpleTensor gemmlowp_matrix_multiply_core(const SimpleTensor &a, const SimpleTensor &b, TensorShape shape_c, int32_t a_offset, int32_t b_offset); +template +void gemmlowp_matrix_multiply_core_accumulate(const SimpleTensor &a, const SimpleTensor &b, TensorShape shape_c, int32_t a_offset, int32_t b_offset, SimpleTensor &dst_); + template SimpleTensor gemmlowp(const SimpleTensor &a, const SimpleTensor &b, TensorShape shape_c); @@ -71,4 +74,4 @@ SimpleTensor gemmlowp_quantize_down_scale_by_float(const SimpleTensor } // namespace validation } // namespace test } // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_GEMMLOWP_H */ +#endif // ACL_TESTS_VALIDATION_REFERENCE_GEMMLOWP_H -- cgit v1.2.1