diff options
author | Pablo Marquez Tello <pablo.tello@arm.com> | 2023-09-22 14:36:03 +0100 |
---|---|---|
committer | Pablo Marquez Tello <pablo.tello@arm.com> | 2023-09-26 09:19:38 +0000 |
commit | 6b6ba9e443b700f9da1671d8ed2ea0c35750d806 (patch) | |
tree | 56efa680653175ca0668f98913c85f58512b38ad /src/cpu/kernels/maxunpool/generic/neon/impl.h | |
parent | 8562a4ec48fc49fbacac9783530443b60955f5a4 (diff) | |
download | ComputeLibrary-6b6ba9e443b700f9da1671d8ed2ea0c35750d806.tar.gz |
Maxunpooling changes to enable fp16 in armv8a multi_isa builds
* Code guarded with __ARM_FEATURE_FP16_VECTOR_ARITHMETIC needs
to be moved to an fp16.cpp file to allow compilation with
-march=armv8.2-a+fp16
* fp16.cpp needs to use the template max_unpooling() which had to be moved from impl.cpp to impl.h
* Partially resolves MLCE-1102
Change-Id: Iabf9a9ba9d2441032f931f33aad97acc3e332575
Signed-off-by: Pablo Marquez Tello <pablo.tello@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10362
Benchmark: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Jakub Sujak <jakub.sujak@arm.com>
Diffstat (limited to 'src/cpu/kernels/maxunpool/generic/neon/impl.h')
-rw-r--r-- | src/cpu/kernels/maxunpool/generic/neon/impl.h | 25 |
1 files changed, 18 insertions, 7 deletions
diff --git a/src/cpu/kernels/maxunpool/generic/neon/impl.h b/src/cpu/kernels/maxunpool/generic/neon/impl.h index 3fea9cfcf3..5fe19c4707 100644 --- a/src/cpu/kernels/maxunpool/generic/neon/impl.h +++ b/src/cpu/kernels/maxunpool/generic/neon/impl.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 Arm Limited. + * Copyright (c) 2022-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,19 +21,30 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef SRC_CORE_SVE_KERNELS_MAXUNPOOLING_IMPL_H -#define SRC_CORE_SVE_KERNELS_MAXUNPOOLING_IMPL_H +#ifndef ACL_SRC_CPU_KERNELS_MAXUNPOOL_GENERIC_NEON_IMPL_H +#define ACL_SRC_CPU_KERNELS_MAXUNPOOL_GENERIC_NEON_IMPL_H #include "arm_compute/core/Helpers.h" #include "arm_compute/core/Window.h" #include "src/core/NEON/wrapper/wrapper.h" namespace arm_compute { -class ITensor; -class Window; namespace cpu { template <typename T> -void max_unpooling(const ITensor *input, const ITensor *indices, ITensor *output, const Window &window); +void max_unpooling(const ITensor *input, const ITensor *indices, ITensor *output, const Window &window) +{ + Iterator input_itr(input, window); + Iterator indices_itr(indices, window); + auto out_ptr = reinterpret_cast<T *>(output->buffer()); + const int out_stride_w = static_cast<int>(output->info()->strides_in_bytes()[3]); + execute_window_loop(window, [&](const Coordinates & id) + { + auto vindices = reinterpret_cast<uint32_t *>(indices_itr.ptr()); + auto vinput = reinterpret_cast<T *>(input_itr.ptr()); + out_ptr[id[3] * out_stride_w / sizeof(T) + *vindices] = *vinput; + }, + input_itr, indices_itr); +} } // namespace cpu } // namespace arm_compute -#endif //define SRC_CORE_SVE_KERNELS_MAXUNPOOLING_IMPL_H +#endif // ACL_SRC_CPU_KERNELS_MAXUNPOOL_GENERIC_NEON_IMPL_H |