From 143310c6ec5e7c9f6035e9357bf0faa4020f2412 Mon Sep 17 00:00:00 2001 From: Dana Zlotnik Date: Thu, 30 Dec 2021 13:24:52 +0200 Subject: Decouple NEMaxUnpoolingLayerKernel Resolves COMPMID-4619 Change-Id: I9c43dcd3fb3a688e1c0ccc858a02376741381ba7 Signed-off-by: Dana Zlotnik Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/6874 Tested-by: Arm Jenkins Reviewed-by: Giorgio Arena Comments-Addressed: Arm Jenkins --- .../NEON/kernels/NEMaxUnpoolingLayerKernel.cpp | 109 +++++++++++++-------- 1 file changed, 69 insertions(+), 40 deletions(-) (limited to 'src/core/NEON/kernels/NEMaxUnpoolingLayerKernel.cpp') diff --git a/src/core/NEON/kernels/NEMaxUnpoolingLayerKernel.cpp b/src/core/NEON/kernels/NEMaxUnpoolingLayerKernel.cpp index 761fa15238..93da8a24c5 100644 --- a/src/core/NEON/kernels/NEMaxUnpoolingLayerKernel.cpp +++ b/src/core/NEON/kernels/NEMaxUnpoolingLayerKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020 Arm Limited. + * Copyright (c) 2020-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -28,9 +28,10 @@ #include "arm_compute/core/Window.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "src/core/CPP/Validate.h" +#include "src/core/common/Registrars.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/core/helpers/WindowHelpers.h" - +#include "src/cpu/kernels/maxunpool/list.h" #include "support/ToolchainSupport.h" namespace arm_compute @@ -39,6 +40,67 @@ using namespace misc::shape_calculator; namespace { +struct MaxUnpoolingSelectorData +{ + DataType dt; +}; + +using MaxUnpoolingSelctorPtr = std::add_pointer::type; +using MaxUnpoolingUKernelPtr = std::add_pointer::type; + +struct MaxUnpoolingKernel +{ + const char *name; + const MaxUnpoolingSelctorPtr is_selected; + MaxUnpoolingUKernelPtr ukernel; +}; + +static const MaxUnpoolingKernel available_kernels[] = +{ + { + "fp32_neon_maxunpooling", + [](const MaxUnpoolingSelectorData & data) { return data.dt == DataType::F32; }, + REGISTER_FP32_NEON(arm_compute::cpu::neon_fp32_maxunpooling) + }, +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC + { + "fp16_neon_maxunpooling", + [](const MaxUnpoolingSelectorData & data) { return data.dt == DataType::F16; }, + REGISTER_FP16_NEON(arm_compute::cpu::neon_fp16_maxunpooling) + }, +#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#if defined(ARM_COMPUTE_ENABLE_NEON) + { + "qs8_neon_maxunpooling", + [](const MaxUnpoolingSelectorData & data) { return data.dt == DataType::QASYMM8; }, + REGISTER_QASYMM8_NEON(arm_compute::cpu::neon_qs8_maxunpooling) + }, + { + "qu8_neon_maxunpooling", + [](const MaxUnpoolingSelectorData & data) { return data.dt == DataType::QASYMM8_SIGNED; }, + REGISTER_QASYMM8_SIGNED_NEON(arm_compute::cpu::neon_qu8_maxunpooling) + }, +#endif //defined(ARM_COMPUTE_ENABLE_NEON) +}; + +/** Micro-kernel selector + * + * @param[in] data Selection data passed to help pick the appropriate micro-kernel + * + * @return A matching micro-kernel else nullptr + */ +const MaxUnpoolingKernel *get_implementation(const MaxUnpoolingSelectorData &data) +{ + for(const auto &uk : available_kernels) + { + if(uk.is_selected(data)) + { + return &uk; + } + } + return nullptr; +} + Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info, const ITensorInfo *indices) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output, indices); @@ -69,7 +131,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, c } // namespace NEMaxUnpoolingLayerKernel::NEMaxUnpoolingLayerKernel() - : _func(nullptr), _input(nullptr), _output(nullptr), _indices(nullptr) + : _input(nullptr), _output(nullptr), _indices(nullptr) { } @@ -82,46 +144,12 @@ void NEMaxUnpoolingLayerKernel::configure(const ITensor *input, const ITensor *i _output = output; _indices = indices; - switch(input->info()->data_type()) - { - case DataType::F32: - _func = &NEMaxUnpoolingLayerKernel::unpooling2; - break; - case DataType::QASYMM8: - _func = &NEMaxUnpoolingLayerKernel::unpooling2; - break; - case DataType::QASYMM8_SIGNED: - _func = &NEMaxUnpoolingLayerKernel::unpooling2; - break; -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - case DataType::F16: - _func = &NEMaxUnpoolingLayerKernel::unpooling2; - break; -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ - default: - break; - } const TensorShape output_shape = compute_unpool_shape(*input->info(), pool_info); auto_init_if_empty(*output->info(), input->info()->clone()->set_tensor_shape(output_shape)); auto window = calculate_max_window(*input->info(), Steps()); INEKernel::configure(window); } -template -void NEMaxUnpoolingLayerKernel::unpooling2(const Window &window) -{ - Iterator input(_input, window); - Iterator indices(_indices, window); - auto out_ptr = reinterpret_cast(_output->buffer()); - const int out_stride_w = static_cast(_output->info()->strides_in_bytes()[3]); - execute_window_loop(window, [&](const Coordinates & id) - { - auto vindices = reinterpret_cast(indices.ptr()); - auto vinput = reinterpret_cast(input.ptr()); - out_ptr[id[3] * out_stride_w / sizeof(T) + *vindices] = *vinput; - }, - input, indices); -} Status NEMaxUnpoolingLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *indices, const ITensorInfo *output, const PoolingLayerInfo &pool_info) { @@ -135,8 +163,9 @@ void NEMaxUnpoolingLayerKernel::run(const Window &window, const ThreadInfo &info ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); - ARM_COMPUTE_ERROR_ON(_func == nullptr); - // Run function - (this->*_func)(window); + const auto *uk = get_implementation(MaxUnpoolingSelectorData{ _input->info()->data_type() }); + ARM_COMPUTE_ERROR_ON(uk == nullptr || uk->ukernel == nullptr); + + uk->ukernel(_input, _output, _indices, window); } } // namespace arm_compute -- cgit v1.2.1