From 01b0f9b6c63ac4161006a87f2603b8a661e74899 Mon Sep 17 00:00:00 2001 From: Pablo Marquez Tello Date: Tue, 7 Nov 2023 15:42:46 +0000 Subject: Pooling changes to enable fp16 in armv8a multi_isa builds * Changes in filelist.json moving fp16 file from common to fp16 attribute * Changes in kernel CpuPool2dAssemblyWrapperKernel, replaced __ARM_FEATURE_FP16_VECTOR_ARITHMETIC by ENABLE_FP16_KERNELS to make sure the fp16 kernels are compiled in for multi_isa=1 * Partially resolves MLCE-1102 Change-Id: I327154ec5b1ddfb9f54d9096f00c35b3e05c678a Signed-off-by: Pablo Marquez Tello Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10662 Benchmark: Arm Jenkins Tested-by: Arm Jenkins Reviewed-by: Gunes Bayir Comments-Addressed: Arm Jenkins --- filelist.json | 14 ++++++++------ .../kernels/internal/CpuPool2dAssemblyWrapperKernel.cpp | 6 +++--- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/filelist.json b/filelist.json index 6559ed2120..d8c1692e80 100644 --- a/filelist.json +++ b/filelist.json @@ -1945,16 +1945,11 @@ "neon": { "common": [ "src/core/NEON/kernels/arm_conv/pooling/kernels/cpp_nhwc_1x1_stride_any_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/pooling/pooling_fp16.cpp", "src/core/NEON/kernels/arm_conv/pooling/pooling_fp32.cpp", "src/core/NEON/kernels/arm_conv/pooling/pooling_s8.cpp", "src/core/NEON/kernels/arm_conv/pooling/pooling_s8q.cpp", "src/core/NEON/kernels/arm_conv/pooling/pooling_u8.cpp", "src/core/NEON/kernels/arm_conv/pooling/pooling_u8q.cpp", - "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp16_nhwc_avg_3x3_s1_output2x2_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp16_nhwc_avg_generic_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp16_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp", - "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp16_nhwc_max_generic_depthfirst/generic.cpp", "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp32_nhwc_avg_3x3_s1_output2x2_depthfirst/generic.cpp", "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp32_nhwc_avg_generic_depthfirst/generic.cpp", "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp32_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp", @@ -1971,7 +1966,14 @@ "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8q_nhwc_max_generic_depthfirst/generic.cpp" ], "nchw": [ "src/cpu/kernels/pool2d/neon/nchw/all.cpp" ], - "fp16": [ "src/cpu/kernels/pool2d/neon/fp16.cpp" ], + "fp16": [ + "src/cpu/kernels/pool2d/neon/fp16.cpp", + "src/core/NEON/kernels/arm_conv/pooling/pooling_fp16.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp16_nhwc_avg_3x3_s1_output2x2_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp16_nhwc_avg_generic_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp16_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp16_nhwc_max_generic_depthfirst/generic.cpp" + ], "fp32": [ "src/cpu/kernels/pool2d/neon/fp32.cpp" ], "qasymm8":[ "src/cpu/kernels/pool2d/neon/qasymm8.cpp" ], "qasymm8_signed":["src/cpu/kernels/pool2d/neon/qasymm8_signed.cpp"] diff --git a/src/cpu/kernels/internal/CpuPool2dAssemblyWrapperKernel.cpp b/src/cpu/kernels/internal/CpuPool2dAssemblyWrapperKernel.cpp index a161c800fd..9ba2451482 100644 --- a/src/cpu/kernels/internal/CpuPool2dAssemblyWrapperKernel.cpp +++ b/src/cpu/kernels/internal/CpuPool2dAssemblyWrapperKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022 Arm Limited. + * Copyright (c) 2021-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -79,11 +79,11 @@ void CpuPool2dAssemblyWrapperKernel::configure(const ITensorInfo *src, create_arm_pooling(src, dst, info, cpu_info); } break; -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#if defined(ENABLE_FP16_KERNELS) case DataType::F16: create_arm_pooling(src, dst, info, cpu_info); break; -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +#endif // defined(ENABLE_FP16_KERNELS) case DataType::F32: create_arm_pooling(src, dst, info, cpu_info); break; -- cgit v1.2.1