From fc94f4d23abd4bc427b701f54ad85282e9ec7872 Mon Sep 17 00:00:00 2001 From: Michael Tyler Date: Tue, 4 Jun 2024 15:47:37 +0100 Subject: Update CPU kernels and add mixed sign GEMM support - Add support for mixed sign quantized convolution. - Add support for mixed sign dequantized GEMM. - Add SME FP16 GEMV kernel. - Change SME vector length function to use RDSVL instead of static variable. - Add GEMM dilation support internally (not exposed yet). - Remove unused "get_default_activation_values" functions. - Add SVE fixed format interleaved BF16 DOT kernel. - Updates and optimizations to assembly kernels. Resolves COMPMID-6926 Change-Id: I227f502502611d4cc4111c89e30c53ce94079544 Signed-off-by: Michael Tyler Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/11570 Tested-by: Arm Jenkins Reviewed-by: Gunes Bayir Comments-Addressed: Arm Jenkins Benchmark: Arm Jenkins --- src/core/NEON/kernels/assembly/pooling.hpp | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'src/core/NEON/kernels/assembly/pooling.hpp') diff --git a/src/core/NEON/kernels/assembly/pooling.hpp b/src/core/NEON/kernels/assembly/pooling.hpp index 89d594298e..d64a59f4d0 100644 --- a/src/core/NEON/kernels/assembly/pooling.hpp +++ b/src/core/NEON/kernels/assembly/pooling.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023 Arm Limited. + * Copyright (c) 2021-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -22,6 +22,9 @@ * SOFTWARE. */ +#ifndef ACL_SRC_CORE_NEON_KERNELS_ASSEMBLY_POOLING_HPP +#define ACL_SRC_CORE_NEON_KERNELS_ASSEMBLY_POOLING_HPP + #pragma once #include "arm_gemm_local.hpp" @@ -136,7 +139,11 @@ public: PoolingCommon(PoolingCommon &) = delete; PoolingCommon &operator=(PoolingCommon &) = delete; - size_t get_working_size(unsigned int) const override = 0; + size_t get_working_size(unsigned int, unsigned int) const override = 0; + size_t get_working_size(unsigned int n_threads) const override + { + return this->get_working_size(n_threads, m_args.n_channels); + } // Execute pooling over the specified area of memory. void execute(const void *const input, @@ -223,3 +230,5 @@ UniquePoolingCommon pooling(const PoolingArgs &, const OutputSt } // namespace pooling } // namespace arm_conv + +#endif // ACL_SRC_CORE_NEON_KERNELS_ASSEMBLY_POOLING_HPP -- cgit v1.2.1