From 6b6ba9e443b700f9da1671d8ed2ea0c35750d806 Mon Sep 17 00:00:00 2001 From: Pablo Marquez Tello Date: Fri, 22 Sep 2023 14:36:03 +0100 Subject: Maxunpooling changes to enable fp16 in armv8a multi_isa builds * Code guarded with __ARM_FEATURE_FP16_VECTOR_ARITHMETIC needs to be moved to an fp16.cpp file to allow compilation with -march=armv8.2-a+fp16 * fp16.cpp needs to use the template max_unpooling() which had to be moved from impl.cpp to impl.h * Partially resolves MLCE-1102 Change-Id: Iabf9a9ba9d2441032f931f33aad97acc3e332575 Signed-off-by: Pablo Marquez Tello Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10362 Benchmark: Arm Jenkins Tested-by: Arm Jenkins Reviewed-by: Jakub Sujak --- Android.bp | 1 - filelist.json | 3 +- src/BUILD.bazel | 1 - src/CMakeLists.txt | 1 - src/cpu/kernels/maxunpool/generic/neon/impl.cpp | 54 ------------------------- src/cpu/kernels/maxunpool/generic/neon/impl.h | 25 ++++++++---- 6 files changed, 19 insertions(+), 66 deletions(-) delete mode 100644 src/cpu/kernels/maxunpool/generic/neon/impl.cpp diff --git a/Android.bp b/Android.bp index 696942c866..ae0c79bddc 100644 --- a/Android.bp +++ b/Android.bp @@ -534,7 +534,6 @@ cc_library_static { "src/cpu/kernels/lut/generic/neon/u8.cpp", "src/cpu/kernels/maxunpool/generic/neon/fp16.cpp", "src/cpu/kernels/maxunpool/generic/neon/fp32.cpp", - "src/cpu/kernels/maxunpool/generic/neon/impl.cpp", "src/cpu/kernels/maxunpool/generic/neon/qasymm8.cpp", "src/cpu/kernels/maxunpool/generic/neon/qasymm8_signed.cpp", "src/cpu/kernels/meanstddevnorm/generic/neon/fp16.cpp", diff --git a/filelist.json b/filelist.json index 215b363255..b74e2e25f1 100644 --- a/filelist.json +++ b/filelist.json @@ -1855,8 +1855,7 @@ "fp32":["src/cpu/kernels/maxunpool/generic/neon/fp32.cpp"], "fp16":["src/cpu/kernels/maxunpool/generic/neon/fp16.cpp"], "qasymm8":["src/cpu/kernels/maxunpool/generic/neon/qasymm8.cpp"], - "qasymm8_signed":[ "src/cpu/kernels/maxunpool/generic/neon/qasymm8_signed.cpp"], - "common":["src/cpu/kernels/maxunpool/generic/neon/impl.cpp"] + "qasymm8_signed":[ "src/cpu/kernels/maxunpool/generic/neon/qasymm8_signed.cpp"] } } }, diff --git a/src/BUILD.bazel b/src/BUILD.bazel index b989bd1963..f4fa950766 100644 --- a/src/BUILD.bazel +++ b/src/BUILD.bazel @@ -783,7 +783,6 @@ filegroup( "cpu/kernels/lut/generic/neon/u8.cpp", "cpu/kernels/maxunpool/generic/neon/fp16.cpp", "cpu/kernels/maxunpool/generic/neon/fp32.cpp", - "cpu/kernels/maxunpool/generic/neon/impl.cpp", "cpu/kernels/maxunpool/generic/neon/qasymm8.cpp", "cpu/kernels/maxunpool/generic/neon/qasymm8_signed.cpp", "cpu/kernels/meanstddevnorm/generic/neon/fp16.cpp", diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index ee1ff476e6..eb17d51bbd 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -774,7 +774,6 @@ target_sources( cpu/kernels/lut/generic/neon/u8.cpp cpu/kernels/maxunpool/generic/neon/fp16.cpp cpu/kernels/maxunpool/generic/neon/fp32.cpp - cpu/kernels/maxunpool/generic/neon/impl.cpp cpu/kernels/maxunpool/generic/neon/qasymm8.cpp cpu/kernels/maxunpool/generic/neon/qasymm8_signed.cpp cpu/kernels/meanstddevnorm/generic/neon/fp16.cpp diff --git a/src/cpu/kernels/maxunpool/generic/neon/impl.cpp b/src/cpu/kernels/maxunpool/generic/neon/impl.cpp deleted file mode 100644 index 77e3b8594a..0000000000 --- a/src/cpu/kernels/maxunpool/generic/neon/impl.cpp +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright (c) 2020-2022 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/cpu/kernels/maxunpool/generic/neon/impl.h" -namespace arm_compute -{ -class ITensor; -class Window; -namespace cpu -{ -template -void max_unpooling(const ITensor *input, const ITensor *indices, ITensor *output, const Window &window) -{ - Iterator input_itr(input, window); - Iterator indices_itr(indices, window); - auto out_ptr = reinterpret_cast(output->buffer()); - const int out_stride_w = static_cast(output->info()->strides_in_bytes()[3]); - execute_window_loop(window, [&](const Coordinates & id) - { - auto vindices = reinterpret_cast(indices_itr.ptr()); - auto vinput = reinterpret_cast(input_itr.ptr()); - out_ptr[id[3] * out_stride_w / sizeof(T) + *vindices] = *vinput; - }, - input_itr, indices_itr); -} -template void max_unpooling(const ITensor *input, const ITensor *indices, ITensor *output, const Window &window); -template void max_unpooling(const ITensor *input, const ITensor *indices, ITensor *output, const Window &window); -template void max_unpooling(const ITensor *input, const ITensor *indices, ITensor *output, const Window &window); - -#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS) -template void max_unpooling(const ITensor *input, const ITensor *indices, ITensor *output, const Window &window); -#endif //defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS) -} // namespace cpu -} // namespace arm_compute diff --git a/src/cpu/kernels/maxunpool/generic/neon/impl.h b/src/cpu/kernels/maxunpool/generic/neon/impl.h index 3fea9cfcf3..5fe19c4707 100644 --- a/src/cpu/kernels/maxunpool/generic/neon/impl.h +++ b/src/cpu/kernels/maxunpool/generic/neon/impl.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 Arm Limited. + * Copyright (c) 2022-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,19 +21,30 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef SRC_CORE_SVE_KERNELS_MAXUNPOOLING_IMPL_H -#define SRC_CORE_SVE_KERNELS_MAXUNPOOLING_IMPL_H +#ifndef ACL_SRC_CPU_KERNELS_MAXUNPOOL_GENERIC_NEON_IMPL_H +#define ACL_SRC_CPU_KERNELS_MAXUNPOOL_GENERIC_NEON_IMPL_H #include "arm_compute/core/Helpers.h" #include "arm_compute/core/Window.h" #include "src/core/NEON/wrapper/wrapper.h" namespace arm_compute { -class ITensor; -class Window; namespace cpu { template -void max_unpooling(const ITensor *input, const ITensor *indices, ITensor *output, const Window &window); +void max_unpooling(const ITensor *input, const ITensor *indices, ITensor *output, const Window &window) +{ + Iterator input_itr(input, window); + Iterator indices_itr(indices, window); + auto out_ptr = reinterpret_cast(output->buffer()); + const int out_stride_w = static_cast(output->info()->strides_in_bytes()[3]); + execute_window_loop(window, [&](const Coordinates & id) + { + auto vindices = reinterpret_cast(indices_itr.ptr()); + auto vinput = reinterpret_cast(input_itr.ptr()); + out_ptr[id[3] * out_stride_w / sizeof(T) + *vindices] = *vinput; + }, + input_itr, indices_itr); +} } // namespace cpu } // namespace arm_compute -#endif //define SRC_CORE_SVE_KERNELS_MAXUNPOOLING_IMPL_H +#endif // ACL_SRC_CPU_KERNELS_MAXUNPOOL_GENERIC_NEON_IMPL_H -- cgit v1.2.1