From 36a75dafdbe6d6a3a6f50bd075fe01f5b7dace38 Mon Sep 17 00:00:00 2001 From: Renato Arantes Date: Fri, 26 Jan 2024 17:31:18 +0000 Subject: =?UTF-8?q?[ONCPUML-1451]=20Add=20matmul=20kernel=20to=20enable=20?= =?UTF-8?q?bf16=20to=20bf16=20operations=20via=20PyTorch=C2=AE=20autocast(?= =?UTF-8?q?)=20function?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The full range of tests must be added with [MLINFSW-482] epic due to the lack of reordering kernels implemented in Acl. Co-Authored-By: David Mansell Change-Id: I820d316295a1ec94fdc89c37e4144a268f914c36 Signed-off-by: Renato Arantes Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/11169 Tested-by: Arm Jenkins Reviewed-by: Gunes Bayir Comments-Addressed: Arm Jenkins Benchmark: Arm Jenkins --- arm_compute/runtime/NEON/functions/NEMatMul.h | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) (limited to 'arm_compute') diff --git a/arm_compute/runtime/NEON/functions/NEMatMul.h b/arm_compute/runtime/NEON/functions/NEMatMul.h index 414fc2f3fd..58dd7a6f6b 100644 --- a/arm_compute/runtime/NEON/functions/NEMatMul.h +++ b/arm_compute/runtime/NEON/functions/NEMatMul.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023 Arm Limited. + * Copyright (c) 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEMATMUL -#define ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEMATMUL +#ifndef ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEMATMUL_H +#define ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEMATMUL_H #include "arm_compute/core/Types.h" #include "arm_compute/function_info/ActivationLayerInfo.h" @@ -41,15 +41,27 @@ public: { return _fast_math; } + // get fixed format flag + bool fixed_format() const + { + return _fixed_format; + } // Set fast math flag CpuMatMulSettings &fast_math(bool fmath) { _fast_math = fmath; return *this; - }; + } + // Set fixed format flag + CpuMatMulSettings &fixed_format(bool fixed_format) + { + _fixed_format = fixed_format; + return *this; + } private: bool _fast_math{false}; + bool _fixed_format{false}; }; // Forward declarations @@ -87,6 +99,7 @@ public: * |:--------------|:------------------|:--------------| * |F32 |F32 |F32 | * |F16 |F16 |F16 | + * |BFLOAT16 |BFLOAT16 |BFLOAT16 | * |QASYMM8_SIGNED |QASYMM8_SIGNED |QASYMM8_SIGNED | * |QASYMM8 |QASYMM8 |QASYMM8 | * @@ -129,4 +142,4 @@ private: std::unique_ptr _impl; }; } // namespace arm_compute -#endif /* ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEMATMUL */ +#endif // ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEMATMUL_H -- cgit v1.2.1