From 149203bc23d5c84fe1326d9dea4730750fab6710 Mon Sep 17 00:00:00 2001 From: Dana Zlotnik Date: Wed, 26 Jan 2022 12:38:03 +0200 Subject: Port MaxUnpoolingLayer kernel and add KernelSelect vaidation test Resolves COMPMID-4958 Change-Id: Ibed5155f2e3ece46635f6ea9617bf11cefc402b1 Signed-off-by: Dana Zlotnik Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/7028 Tested-by: Arm Jenkins Reviewed-by: Giorgio Arena Comments-Addressed: Arm Jenkins --- src/cpu/kernels/CpuMaxUnpoolingLayerKernel.cpp | 148 +++++++++++++++++++++ src/cpu/kernels/CpuMaxUnpoolingLayerKernel.h | 92 +++++++++++++ src/cpu/kernels/maxunpool/generic/neon/fp16.cpp | 4 +- src/cpu/kernels/maxunpool/generic/neon/fp32.cpp | 4 +- src/cpu/kernels/maxunpool/generic/neon/impl.cpp | 10 +- src/cpu/kernels/maxunpool/generic/neon/impl.h | 2 +- src/cpu/kernels/maxunpool/generic/neon/qasymm8.cpp | 4 +- .../maxunpool/generic/neon/qasymm8_signed.cpp | 4 +- src/cpu/kernels/maxunpool/list.h | 2 +- 9 files changed, 255 insertions(+), 15 deletions(-) create mode 100644 src/cpu/kernels/CpuMaxUnpoolingLayerKernel.cpp create mode 100644 src/cpu/kernels/CpuMaxUnpoolingLayerKernel.h (limited to 'src/cpu/kernels') diff --git a/src/cpu/kernels/CpuMaxUnpoolingLayerKernel.cpp b/src/cpu/kernels/CpuMaxUnpoolingLayerKernel.cpp new file mode 100644 index 0000000000..604f22f6cc --- /dev/null +++ b/src/cpu/kernels/CpuMaxUnpoolingLayerKernel.cpp @@ -0,0 +1,148 @@ +/* + * Copyright (c) 2020-2022 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "src/cpu/kernels/CpuMaxUnpoolingLayerKernel.h" + +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/core/Window.h" +#include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/CPP/Validate.h" +#include "src/core/common/Registrars.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" +#include "src/cpu/kernels/maxunpool/list.h" +#include "support/ToolchainSupport.h" + +namespace arm_compute +{ +namespace cpu +{ +namespace kernels +{ +using namespace misc::shape_calculator; + +namespace +{ +static const std::vector available_kernels = +{ + { + "neon_fp32_maxunpooling", + [](const DataTypeISASelectorData & data) { return data.dt == DataType::F32; }, + REGISTER_FP32_NEON(neon_fp32_maxunpooling) + }, + { + "neon_fp16_maxunpooling", + [](const DataTypeISASelectorData & data) { return data.dt == DataType::F16 && data.isa.fp16; }, + REGISTER_FP16_NEON(neon_fp16_maxunpooling) + }, + { + "neon_qu8_maxunpooling", + [](const DataTypeISASelectorData & data) { return data.dt == DataType::QASYMM8; }, + REGISTER_QASYMM8_NEON(neon_qs8_maxunpooling) + }, + { + "neon_qs8_maxunpooling", + [](const DataTypeISASelectorData & data) { return data.dt == DataType::QASYMM8_SIGNED; }, + REGISTER_QASYMM8_SIGNED_NEON(neon_qu8_maxunpooling) + }, +}; + +Status validate_arguments(const ITensorInfo *src, const ITensorInfo *indices, const ITensorInfo *dst, const PoolingLayerInfo &pool_info) +{ + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, indices, dst); + ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(src); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(indices, 1, DataType::U32); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(src, indices); + + int pool_stride_x = 0; + int pool_stride_y = 0; + PoolingType pool_type = pool_info.pool_type; + const PadStrideInfo pad_stride_info = pool_info.pad_stride_info; + std::tie(pool_stride_x, pool_stride_y) = pad_stride_info.stride(); + const int pool_size_x = pool_info.pool_size.width; + const int pool_size_y = pool_info.pool_size.height; + const Size2D pool_size(pool_size_x, pool_size_y); + + ARM_COMPUTE_RETURN_ERROR_ON_MSG(pool_type != PoolingType::MAX, "Pooling indices only supported for MAX pooling method"); + ARM_COMPUTE_RETURN_ERROR_ON_MSG((pool_size != Size2D(2, 2)), "Pooling indices only supported for pool size 2x2"); + if(dst->total_size() != 0) + { + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, dst); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(src, dst); + } + + return Status{}; +} +} // namespace + +void CpuMaxUnpoolingLayerKernel::configure(const ITensorInfo *src, const ITensorInfo *indices, ITensorInfo *dst, const PoolingLayerInfo &pool_info) +{ + ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst, indices); + ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(src, indices, dst, pool_info)); + ARM_COMPUTE_UNUSED(indices); + + const auto uk = CpuMaxUnpoolingLayerKernel::get_implementation(DataTypeISASelectorData{ src->data_type(), CPUInfo::get().get_isa() }); + ARM_COMPUTE_ERROR_ON_NULLPTR(uk); + _run_method = uk->ukernel; + + const TensorShape output_shape = compute_unpool_shape(*src, pool_info); + auto_init_if_empty(*dst, src->clone()->set_tensor_shape(output_shape)); + + auto window = calculate_max_window(*src, Steps()); + ICpuKernel::configure(window); +} + +Status CpuMaxUnpoolingLayerKernel::validate(const ITensorInfo *src, const ITensorInfo *indices, const ITensorInfo *dst, const PoolingLayerInfo &pool_info) +{ + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, indices, dst); + ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(src, indices, dst, pool_info)); + return Status{}; +} + +void CpuMaxUnpoolingLayerKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) +{ + ARM_COMPUTE_UNUSED(info); + ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); + ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICpuKernel::window(), window); + + const auto src = tensors.get_const_tensor(TensorType::ACL_SRC_0); + const auto indices = tensors.get_const_tensor(TensorType::ACL_SRC_1); + const auto dst = tensors.get_tensor(TensorType::ACL_DST); + + _run_method(src, indices, dst, window); +} + +const char *CpuMaxUnpoolingLayerKernel::name() const +{ + return "CpuMaxUnpoolingLayerKernel"; +} + +const std::vector &CpuMaxUnpoolingLayerKernel::get_available_kernels() +{ + return available_kernels; +} +} // namespace kernels +} // namespace cpu +} // namespace arm_compute \ No newline at end of file diff --git a/src/cpu/kernels/CpuMaxUnpoolingLayerKernel.h b/src/cpu/kernels/CpuMaxUnpoolingLayerKernel.h new file mode 100644 index 0000000000..d0c13471c8 --- /dev/null +++ b/src/cpu/kernels/CpuMaxUnpoolingLayerKernel.h @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2020-2022 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CPUMAXUNPOOLINGLAYERKERNEL_H +#define ARM_COMPUTE_CPUMAXUNPOOLINGLAYERKERNEL_H + +#include "src/core/common/Macros.h" +#include "src/cpu/ICpuKernel.h" + +namespace arm_compute +{ +namespace cpu +{ +namespace kernels +{ +/** Interface for the pooling layer kernel */ +class CpuMaxUnpoolingLayerKernel : public ICpuKernel +{ +private: + using MaxUnpoolingUKernelPtr = std::add_pointer::type; + +public: + /** Default constructor */ + CpuMaxUnpoolingLayerKernel() = default; + ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuMaxUnpoolingLayerKernel); + + /** Configure kernel for a given list of arguments + * + * @note Dst shape must be equal to the shape of the original src to pool. + * + * @param[in] src Source tensor to permute. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[in] indices Tensor containing the offset to store the src elements in the dst tensor. + * @ref CpuMaxUnpooling with indices should precede this function in order to + * properly reconstruct the output tensor. + * The tensor shape of this tensor has to be equal to the src tensor shape. Data type supported: U32. + * @param[out] dst Destination tensor. Data types supported: Same as @p src + * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo. + */ + void configure(const ITensorInfo *src, const ITensorInfo *indices, ITensorInfo *dst, const PoolingLayerInfo &pool_info); + /** Static function to check if given info will lead to a valid configuration of @ref CpuMaxUnpoolingLayerKernel + * + * @param[in] src Source tensor to permute. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[in] indices Tensor info of the indices of the maximal values. Data type supported: U32. + * @param[out] dst Destination tensor. Data types supported: Same as @p src + * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo. + * + * @return a status + */ + static Status validate(const ITensorInfo *src, const ITensorInfo *indices, const ITensorInfo *dst, const PoolingLayerInfo &pool_info); + + // Inherited methods overridden: + void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override; + + struct MaxUnpoolingKernel + { + const char *name; + const DataTypeISASelectorPtr is_selected; + MaxUnpoolingUKernelPtr ukernel; + }; + + static const std::vector &get_available_kernels(); + + const char *name() const override; + +private: + MaxUnpoolingUKernelPtr _run_method{ nullptr }; +}; + +} // namespace kernels +} // namespace cpu +} // namespace arm_compute +#endif /*ARM_COMPUTE_CPUMAXUNPOOLINGLAYERKERNEL_H */ diff --git a/src/cpu/kernels/maxunpool/generic/neon/fp16.cpp b/src/cpu/kernels/maxunpool/generic/neon/fp16.cpp index d43503aa2f..e81ff92311 100644 --- a/src/cpu/kernels/maxunpool/generic/neon/fp16.cpp +++ b/src/cpu/kernels/maxunpool/generic/neon/fp16.cpp @@ -27,9 +27,9 @@ namespace arm_compute { namespace cpu { -void neon_fp16_maxunpooling(const ITensor *input, ITensor *output, const ITensor *indices, const Window &window) +void neon_fp16_maxunpooling(const ITensor *input, const ITensor *indices, ITensor *output, const Window &window) { - return max_unpooling(input, output, indices, window); + return max_unpooling(input, indices, output, window); } } // namespace cpu } // namespace arm_compute diff --git a/src/cpu/kernels/maxunpool/generic/neon/fp32.cpp b/src/cpu/kernels/maxunpool/generic/neon/fp32.cpp index 2f96e86695..ba0d7851a9 100644 --- a/src/cpu/kernels/maxunpool/generic/neon/fp32.cpp +++ b/src/cpu/kernels/maxunpool/generic/neon/fp32.cpp @@ -26,9 +26,9 @@ namespace arm_compute { namespace cpu { -void neon_fp32_maxunpooling(const ITensor *input, ITensor *output, const ITensor *indices, const Window &window) +void neon_fp32_maxunpooling(const ITensor *input, const ITensor *indices, ITensor *output, const Window &window) { - return max_unpooling(input, output, indices, window); + return max_unpooling(input, indices, output, window); } } // namespace cpu } // namespace arm_compute diff --git a/src/cpu/kernels/maxunpool/generic/neon/impl.cpp b/src/cpu/kernels/maxunpool/generic/neon/impl.cpp index 8bbc8d128f..77e3b8594a 100644 --- a/src/cpu/kernels/maxunpool/generic/neon/impl.cpp +++ b/src/cpu/kernels/maxunpool/generic/neon/impl.cpp @@ -29,7 +29,7 @@ class Window; namespace cpu { template -void max_unpooling(const ITensor *input, ITensor *output, const ITensor *indices, const Window &window) +void max_unpooling(const ITensor *input, const ITensor *indices, ITensor *output, const Window &window) { Iterator input_itr(input, window); Iterator indices_itr(indices, window); @@ -43,12 +43,12 @@ void max_unpooling(const ITensor *input, ITensor *output, const ITensor *indices }, input_itr, indices_itr); } -template void max_unpooling(const ITensor *input, ITensor *output, const ITensor *indices, const Window &window); -template void max_unpooling(const ITensor *input, ITensor *output, const ITensor *indices, const Window &window); -template void max_unpooling(const ITensor *input, ITensor *output, const ITensor *indices, const Window &window); +template void max_unpooling(const ITensor *input, const ITensor *indices, ITensor *output, const Window &window); +template void max_unpooling(const ITensor *input, const ITensor *indices, ITensor *output, const Window &window); +template void max_unpooling(const ITensor *input, const ITensor *indices, ITensor *output, const Window &window); #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS) -template void max_unpooling(const ITensor *input, ITensor *output, const ITensor *indices, const Window &window); +template void max_unpooling(const ITensor *input, const ITensor *indices, ITensor *output, const Window &window); #endif //defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS) } // namespace cpu } // namespace arm_compute diff --git a/src/cpu/kernels/maxunpool/generic/neon/impl.h b/src/cpu/kernels/maxunpool/generic/neon/impl.h index 6a14c66b33..3fea9cfcf3 100644 --- a/src/cpu/kernels/maxunpool/generic/neon/impl.h +++ b/src/cpu/kernels/maxunpool/generic/neon/impl.h @@ -33,7 +33,7 @@ class Window; namespace cpu { template -void max_unpooling(const ITensor *input, ITensor *output, const ITensor *indices, const Window &window); +void max_unpooling(const ITensor *input, const ITensor *indices, ITensor *output, const Window &window); } // namespace cpu } // namespace arm_compute #endif //define SRC_CORE_SVE_KERNELS_MAXUNPOOLING_IMPL_H diff --git a/src/cpu/kernels/maxunpool/generic/neon/qasymm8.cpp b/src/cpu/kernels/maxunpool/generic/neon/qasymm8.cpp index b6d0f48fda..53e601bba6 100644 --- a/src/cpu/kernels/maxunpool/generic/neon/qasymm8.cpp +++ b/src/cpu/kernels/maxunpool/generic/neon/qasymm8.cpp @@ -26,9 +26,9 @@ namespace arm_compute { namespace cpu { -void neon_qs8_maxunpooling(const ITensor *input, ITensor *output, const ITensor *indices, const Window &window) +void neon_qs8_maxunpooling(const ITensor *input, const ITensor *indices, ITensor *output, const Window &window) { - return max_unpooling(input, output, indices, window); + return max_unpooling(input, indices, output, window); } } // namespace cpu } // namespace arm_compute diff --git a/src/cpu/kernels/maxunpool/generic/neon/qasymm8_signed.cpp b/src/cpu/kernels/maxunpool/generic/neon/qasymm8_signed.cpp index 79f3013805..a3c346fba7 100644 --- a/src/cpu/kernels/maxunpool/generic/neon/qasymm8_signed.cpp +++ b/src/cpu/kernels/maxunpool/generic/neon/qasymm8_signed.cpp @@ -26,9 +26,9 @@ namespace arm_compute { namespace cpu { -void neon_qu8_maxunpooling(const ITensor *input, ITensor *output, const ITensor *indices, const Window &window) +void neon_qu8_maxunpooling(const ITensor *input, const ITensor *indices, ITensor *output, const Window &window) { - return max_unpooling(input, output, indices, window); + return max_unpooling(input, indices, output, window); } } // namespace cpu } // namespace arm_compute diff --git a/src/cpu/kernels/maxunpool/list.h b/src/cpu/kernels/maxunpool/list.h index 0f9bb499d3..2c4fe940d9 100644 --- a/src/cpu/kernels/maxunpool/list.h +++ b/src/cpu/kernels/maxunpool/list.h @@ -28,7 +28,7 @@ namespace arm_compute namespace cpu { #define DECLARE_MAXUNPOOL_KERNEL(func_name) \ - void func_name(const ITensor *input, ITensor *output, const ITensor *indices, const Window &window) + void func_name(const ITensor *input, const ITensor *indices, ITensor *output, const Window &window) DECLARE_MAXUNPOOL_KERNEL(neon_fp32_maxunpooling); DECLARE_MAXUNPOOL_KERNEL(neon_fp16_maxunpooling); DECLARE_MAXUNPOOL_KERNEL(neon_qs8_maxunpooling); -- cgit v1.2.1