diff options
author | Michele Di Giorgio <michele.digiorgio@arm.com> | 2020-10-27 10:56:31 +0000 |
---|---|---|
committer | Michele Di Giorgio <michele.digiorgio@arm.com> | 2021-01-20 16:39:53 +0000 |
commit | d556d7bafe6ad943f4aca0f5285ada7b8ce497f7 (patch) | |
tree | 11c7077daf97b46c47a4eac821830b37a7ce9e76 /src/core/NEON/kernels/assembly | |
parent | 7d61ff041826782d14e67b7f5b7a2864905ff38b (diff) | |
download | ComputeLibrary-d556d7bafe6ad943f4aca0f5285ada7b8ce497f7.tar.gz |
Integrate improved pooling layer on NEON
Resolves COMPMID-4035
Change-Id: I559f8c4208fba9193dfe5012f03ddaf26c746215
Signed-off-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4855
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/NEON/kernels/assembly')
-rw-r--r-- | src/core/NEON/kernels/assembly/NEPoolingAssemblyWrapperKernel.cpp | 250 | ||||
-rw-r--r-- | src/core/NEON/kernels/assembly/NEPoolingAssemblyWrapperKernel.h | 116 | ||||
-rw-r--r-- | src/core/NEON/kernels/assembly/arm_gemm_local.hpp | 3 | ||||
-rw-r--r-- | src/core/NEON/kernels/assembly/pool_common.hpp | 123 | ||||
-rw-r--r-- | src/core/NEON/kernels/assembly/pooling.hpp | 117 |
5 files changed, 606 insertions, 3 deletions
diff --git a/src/core/NEON/kernels/assembly/NEPoolingAssemblyWrapperKernel.cpp b/src/core/NEON/kernels/assembly/NEPoolingAssemblyWrapperKernel.cpp new file mode 100644 index 0000000000..3c84f36435 --- /dev/null +++ b/src/core/NEON/kernels/assembly/NEPoolingAssemblyWrapperKernel.cpp @@ -0,0 +1,250 @@ +/* + * Copyright (c) 2021 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "src/core/NEON/kernels/assembly/NEPoolingAssemblyWrapperKernel.h" +#include "arm_compute/core/Utils.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "arm_compute/core/utils/quantization/AsymmHelpers.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" + +#include <arm_neon.h> + +namespace arm_compute +{ +using namespace arm_compute::misc::shape_calculator; + +void NEPoolingAssemblyWrapperKernel::configure(const ITensorInfo *input, ITensorInfo *output, const PoolingLayerInfo &info, const CPUInfo &cpu_info) +{ + ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); + + // Output initialization if not yet initialized + auto_init_if_empty(*output, input->clone()->set_tensor_shape(compute_pool_shape(*input, info))); + + const bool requantize = input->quantization_info() != output->quantization_info(); + + switch(input->data_type()) + { + case DataType::QASYMM8: + if(requantize) + { + create_arm_pooling_requant<uint8_t, uint8_t>(input, output, info, cpu_info); + } + else + { + create_arm_pooling<uint8_t, uint8_t>(input, output, info, cpu_info); + } + break; + case DataType::QASYMM8_SIGNED: + if(requantize) + { + create_arm_pooling_requant<int8_t, int8_t>(input, output, info, cpu_info); + } + else + { + create_arm_pooling<int8_t, int8_t>(input, output, info, cpu_info); + } + break; +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC + case DataType::F16: + create_arm_pooling<float16_t, float16_t>(input, output, info, cpu_info); + break; +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ + case DataType::F32: + create_arm_pooling<float, float>(input, output, info, cpu_info); + break; + default: + break; + } + + Window win = calculate_max_window(*output, Steps()); + INEKernel::configure(win); +} + +Status NEPoolingAssemblyWrapperKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &info) +{ + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); + + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_MSG((input->data_layout() != DataLayout::NHWC) || (info.data_layout != DataLayout::NHWC), "Only NHWC is supported by assembly kernels"); + ARM_COMPUTE_RETURN_ERROR_ON_MSG((info.pool_type != PoolingType::AVG) && (info.pool_type != PoolingType::MAX), + "Only AVG and MAX pooling are supported by assembly kernels"); + + if(output->total_size() > 0) + { + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); + + const auto input_qinfo = input->quantization_info().uniform(); + const auto output_qinfo = output->quantization_info().uniform(); + + if(input_qinfo != output_qinfo) + { + const float multiplier = input_qinfo.scale / output_qinfo.scale; + int32_t output_multiplier{}; + int32_t output_shift{}; + ARM_COMPUTE_RETURN_ERROR_ON(quantization::calculate_quantized_multiplier(multiplier, &output_multiplier, &output_shift)); + } + else + { + if(input->data_type() == DataType::QASYMM8) + { + const bool has_padding = info.pad_stride_info.has_padding(); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(!info.exclude_padding && has_padding, "Assembly kernels do not support padding for QASYMM8 with same input/output quantization info"); + } + } + } + else + { + if(input->data_type() == DataType::QASYMM8) + { + // If output is not configured, the quantization info are the same + const bool has_padding = info.pad_stride_info.has_padding(); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(!info.exclude_padding && has_padding, "Assembly kernels do not support padding for QASYMM8 with same input/output quantization info"); + } + } + return Status{}; +} + +void NEPoolingAssemblyWrapperKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) +{ + ARM_COMPUTE_ERROR_ON_NULLPTR(_kernel_asm.get()); + ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); + ARM_COMPUTE_UNUSED(window); + ARM_COMPUTE_UNUSED(info); + + ARM_COMPUTE_ERROR_ON(tensors.empty()); + + const ITensor *input = tensors.get_const_tensor(TensorType::ACL_SRC); + ITensor *output = tensors.get_tensor(TensorType::ACL_DST_0); + ITensor *workspace = tensors.get_tensor(TensorType::ACL_DST_1); + + const auto in_ptr = input->buffer() + input->info()->offset_first_element_in_bytes(); + auto out_ptr = output->buffer() + output->info()->offset_first_element_in_bytes(); + auto working_space = workspace->buffer() + workspace->info()->offset_first_element_in_bytes(); + + _kernel_asm->execute(in_ptr, out_ptr, working_space, info.thread_id, info.num_threads); +} + +size_t NEPoolingAssemblyWrapperKernel::get_working_size(unsigned int num_threads) const +{ + return _kernel_asm->get_working_size(num_threads); +} + +bool NEPoolingAssemblyWrapperKernel::is_configured() const +{ + return _kernel_asm != nullptr; +} + +template <typename TypeInput, typename TypeOutput> +void NEPoolingAssemblyWrapperKernel::create_arm_pooling(const ITensorInfo *input, ITensorInfo *output, const PoolingLayerInfo &info, const CPUInfo &cpu_info) +{ + const arm_conv::pooling::PoolingType pool_type = (info.pool_type == PoolingType::AVG) ? arm_conv::pooling::PoolingType::AVERAGE : arm_conv::pooling::PoolingType::MAX; + + arm_conv::pooling::PoolingWindow window{}; + window.cols = static_cast<unsigned int>(info.pool_size.x()); + window.rows = static_cast<unsigned int>(info.pool_size.y()); + + arm_conv::pooling::PoolingStride stride{}; + std::tie(stride.cols, stride.rows) = info.pad_stride_info.stride(); + + const arm_conv::pooling::PaddingValues padding{ info.pad_stride_info.pad_left(), info.pad_stride_info.pad_top(), info.pad_stride_info.pad_right(), info.pad_stride_info.pad_bottom() }; + + constexpr unsigned int idx_width = 1; + constexpr unsigned int idx_height = 2; + constexpr unsigned int idx_channels = 0; + constexpr unsigned int idx_batches = 3; + + const unsigned int n_batches = input->dimension(idx_batches); + const unsigned int input_rows = input->dimension(idx_height); + const unsigned int input_cols = input->dimension(idx_width); + const unsigned int n_channels = input->dimension(idx_channels); + const unsigned int output_rows = output->dimension(idx_height); + const unsigned int output_cols = output->dimension(idx_width); + + arm_conv::pooling::PoolingArgs args(&cpu_info, pool_type, window, stride, info.exclude_padding, n_batches, input_rows, input_cols, n_channels, output_rows, output_cols, padding, nullptr); + + // Configure assembly pooling kernel + auto pooling_kernel_asm = arm_conv::pooling::pooling<TypeInput, TypeOutput>(args); + if(pooling_kernel_asm == nullptr) + { + // Configuration not supported: Leave function unconfigured: + return; + } + + _kernel_asm = std::move(pooling_kernel_asm); +} + +template <typename TypeInput, typename TypeOutput> +void NEPoolingAssemblyWrapperKernel::create_arm_pooling_requant(const ITensorInfo *input, ITensorInfo *output, const PoolingLayerInfo &info, const CPUInfo &cpu_info) +{ + const arm_conv::pooling::PoolingType pool_type = (info.pool_type == PoolingType::AVG) ? arm_conv::pooling::PoolingType::AVERAGE : arm_conv::pooling::PoolingType::MAX; + + arm_conv::pooling::PoolingWindow window{}; + window.cols = static_cast<unsigned int>(info.pool_size.x()); + window.rows = static_cast<unsigned int>(info.pool_size.y()); + + arm_conv::pooling::PoolingStride stride{}; + std::tie(stride.cols, stride.rows) = info.pad_stride_info.stride(); + + const arm_conv::pooling::PaddingValues padding{ info.pad_stride_info.pad_left(), info.pad_stride_info.pad_top(), info.pad_stride_info.pad_right(), info.pad_stride_info.pad_bottom() }; + + constexpr unsigned int idx_width = 1; + constexpr unsigned int idx_height = 2; + constexpr unsigned int idx_channels = 0; + constexpr unsigned int idx_batches = 3; + + const unsigned int n_batches = input->dimension(idx_batches); + const unsigned int input_rows = input->dimension(idx_height); + const unsigned int input_cols = input->dimension(idx_width); + const unsigned int n_channels = input->dimension(idx_channels); + const unsigned int output_rows = output->dimension(idx_height); + const unsigned int output_cols = output->dimension(idx_width); + + arm_conv::pooling::PoolingArgs args(&cpu_info, pool_type, window, stride, info.exclude_padding, n_batches, input_rows, input_cols, n_channels, output_rows, output_cols, padding, nullptr); + + const auto input_qinfo = input->quantization_info().uniform(); + const auto output_qinfo = output->quantization_info().uniform(); + + const float multiplier = input_qinfo.scale / output_qinfo.scale; + int32_t output_multiplier{}; + int32_t output_shift{}; + quantization::calculate_quantized_multiplier(multiplier, &output_multiplier, &output_shift); + + const arm_conv::pooling::Requantize32 requant_args(input_qinfo.offset, + output_qinfo.offset, + output_shift, // left shift + 0, // right shift + output_multiplier); + + // Configure assembly pooling kernel with requantization + auto pooling_kernel_asm = arm_conv::pooling::pooling<TypeInput, TypeOutput, arm_conv::pooling::Requantize32>(args, requant_args); + if(pooling_kernel_asm == nullptr) + { + // Configuration not supported: Leave function unconfigured: + return; + } + + _kernel_asm = std::move(pooling_kernel_asm); +} +} // namespace arm_compute diff --git a/src/core/NEON/kernels/assembly/NEPoolingAssemblyWrapperKernel.h b/src/core/NEON/kernels/assembly/NEPoolingAssemblyWrapperKernel.h new file mode 100644 index 0000000000..b2fa5b5714 --- /dev/null +++ b/src/core/NEON/kernels/assembly/NEPoolingAssemblyWrapperKernel.h @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2021 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_ASSEMBLY_POOLING_KERNEL_WRAPPER_KERNEL_H +#define ARM_COMPUTE_ASSEMBLY_POOLING_KERNEL_WRAPPER_KERNEL_H + +#include "src/core/NEON/INEKernel.h" +#include "src/core/NEON/kernels/assembly/pooling.hpp" + +#include "pool_common.hpp" + +namespace arm_compute +{ +class ITensor; + +/** This class is a wrapper for the assembly kernels. + * + * Some kernels were written in assembly and highly optimised for specific + * CPUs like A53 or A55. The arm compute library creates an instance of + * NEPoolingAssemblyWrapperKernel and other auxiliary data structures to + * execute a single assembly kernel in the context of an NEFunction. + * + */ +class NEPoolingAssemblyWrapperKernel final : public INEKernel +{ +public: + /** Constructor + */ + NEPoolingAssemblyWrapperKernel() = default; + NEPoolingAssemblyWrapperKernel(NEPoolingAssemblyWrapperKernel &) = delete; + NEPoolingAssemblyWrapperKernel(NEPoolingAssemblyWrapperKernel &&) = default; + NEPoolingAssemblyWrapperKernel &operator=(NEPoolingAssemblyWrapperKernel &) = delete; + + const char *name() const override + { + return "NEPoolingAssemblyWrapperKernel"; + } + + /** Initialise the kernel's input and output. + * + * @param[in] input Input tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[out] output Output tensor to store the result of pooling. Data types supported: same as @p input. + * @param[in] info Pooling meta-data + */ + void configure(const ITensorInfo *input, ITensorInfo *output, const PoolingLayerInfo &info, const CPUInfo &cpu_info); + + /** Indicates whether or not this function can be used to process the given parameters. + * + * @param[in] input Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[in] output Output tensor to store the result of pooling. Data types supported: same as @p input. + * @param[in] info Pooling meta-data + * + * @return a status. + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &info); + + // Inherited methods overridden: + void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override; + + /** Get size of the workspace needed by the assembly kernel. + * + * @param[in] num_threads Maximum number of threads that are going to be spawned. + * + * @return size of workspace + */ + size_t get_working_size(unsigned int num_threads) const; + + /** Was the asm kernel successfully configured? + * + * @return True if the asm kernel is configured and ready to run + */ + bool is_configured() const; + +private: + /** Helper function to create the assembly kernel. + * + * @param[in] input Input tensor info. + * @param[in] output Output tensor info. + * @param[in] info Pooling layer meta-data. + */ + template <typename TypeInput, typename TypeOutput> + void create_arm_pooling(const ITensorInfo *input, ITensorInfo *output, const PoolingLayerInfo &info, const CPUInfo &cpu_info); + + /** Helper function to create the assembly kernel with requantization support + * + * @param[in] input Input tensor info. + * @param[in] output Output tensor info. + * @param[in] info Pooling layer meta-data. + */ + template <typename TypeInput, typename TypeOutput> + void create_arm_pooling_requant(const ITensorInfo *input, ITensorInfo *output, const PoolingLayerInfo &info, const CPUInfo &cpu_info); + + std::unique_ptr<arm_conv::pooling::IPoolingCommon> _kernel_asm{ nullptr }; +}; +} // namespace arm_compute +#endif /* ARM_COMPUTE_ASSEMBLY_POOLING_KERNEL_WRAPPER_KERNEL_H */ diff --git a/src/core/NEON/kernels/assembly/arm_gemm_local.hpp b/src/core/NEON/kernels/assembly/arm_gemm_local.hpp index 4715f2500a..c08ed2d5e3 100644 --- a/src/core/NEON/kernels/assembly/arm_gemm_local.hpp +++ b/src/core/NEON/kernels/assembly/arm_gemm_local.hpp @@ -27,8 +27,5 @@ #include "arm_compute/core/CPP/CPPTypes.h" -namespace arm_gemm -{ using CPUModel = arm_compute::CPUModel; using CPUInfo = arm_compute::CPUInfo; -} // namespace arm_compute diff --git a/src/core/NEON/kernels/assembly/pool_common.hpp b/src/core/NEON/kernels/assembly/pool_common.hpp new file mode 100644 index 0000000000..fdc18aef39 --- /dev/null +++ b/src/core/NEON/kernels/assembly/pool_common.hpp @@ -0,0 +1,123 @@ +/* + * Copyright (c) 2021 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#pragma once + +#ifdef CYCLE_PROFILING +#include "profiler.hpp" +#endif // CYCLE_PROFILING + +namespace arm_conv +{ +namespace pooling +{ +enum class PoolingType +{ + AVERAGE, + MAX, +}; + +enum class PoolingMethod +{ + DEFAULT, + DEPTHFIRST, + PLANAR, +}; + +struct PoolingWindow +{ + unsigned int rows, cols; +}; + +struct PoolingStride +{ + unsigned int rows, cols; +}; + +struct PaddingValues +{ + unsigned int left, top, right, bottom; +}; + +class IPoolingCommon +{ +public: + virtual ~IPoolingCommon() = default; + + // Determine the amount of working space required. + virtual size_t get_working_size(unsigned int num_threads) const = 0; + + // Execute pooling over the specified area of memory. + virtual void execute( + const void *const input, + void *const output, + void *working_space, + unsigned int thread_id, + unsigned int num_threads) const = 0; + + virtual void execute( + const void *const input, + size_t ld_input_col, + size_t ld_input_row, + size_t ld_input_batch, + void *const output, + size_t ld_output_col, + size_t ld_output_row, + size_t ld_output_batch, + void *working_space, + unsigned int thread_id, + unsigned int num_threads) const = 0; + + virtual void execute( + unsigned int batches, + unsigned int height, + unsigned int width, + unsigned int channels, + const void *const input, + size_t ld_input_col, + size_t ld_input_row, + size_t ld_input_batch, + const PaddingValues &, + unsigned int output_height, + unsigned int output_width, + void *const output, + size_t ld_output_col, + size_t ld_output_row, + size_t ld_output_batch, + void *working_space, + unsigned int thread_id, + unsigned int num_threads) const = 0; +}; + +struct Nothing +{ +}; + +template <typename TInput, typename TOutput, class OutputStage = Nothing> +class PoolingCommon : public IPoolingCommon +{ +}; + +} // namespace pooling +} // namespace arm_conv diff --git a/src/core/NEON/kernels/assembly/pooling.hpp b/src/core/NEON/kernels/assembly/pooling.hpp new file mode 100644 index 0000000000..2325bd08ca --- /dev/null +++ b/src/core/NEON/kernels/assembly/pooling.hpp @@ -0,0 +1,117 @@ +/* + * Copyright (c) 2021 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#pragma once + +#include "arm_gemm_local.hpp" +#include "pool_common.hpp" + +#include <memory> + +namespace arm_conv +{ +namespace pooling +{ +struct PoolingConfig +{ + PoolingMethod method = PoolingMethod::DEFAULT; + std::string filter = ""; + + PoolingConfig(PoolingMethod method) + : method(method) {}; + PoolingConfig() {}; +}; + +struct PoolingArgs +{ + const CPUInfo *cpu_info; + + PoolingType pool_type; + PoolingWindow pool_window; + PoolingStride pool_stride; + bool exclude_padding; + + unsigned int n_batches, input_rows, input_cols, n_channels; + unsigned int output_rows, output_cols; + + PaddingValues padding; + + const PoolingConfig *config; + + PoolingArgs( + const CPUInfo *cpu_info, + PoolingType pool_type, + const PoolingWindow &window, + const PoolingStride &stride, + bool exclude_padding, + unsigned int n_batches, + unsigned int input_rows, + unsigned int input_cols, + unsigned int n_channels, + unsigned int output_rows, + unsigned int output_cols, + const PaddingValues &padding, + const PoolingConfig *cfg) + : cpu_info(cpu_info), pool_type(pool_type), pool_window(window), pool_stride(stride), exclude_padding(exclude_padding), n_batches(n_batches), input_rows(input_rows), input_cols(input_cols), + n_channels(n_channels), output_rows(output_rows), output_cols(output_cols), padding(padding), config(cfg) + { + // If either of the pooling window dimensions are set to zero, meaning + // "pool everything", then replace with the corresponding input dimension. + if(pool_window.rows == 0) + { + pool_window.rows = input_rows; + } + if(pool_window.cols == 0) + { + pool_window.cols = input_cols; + } + } +}; + +struct Requantize32 +{ + int32_t input_offset = 0; + int32_t output_offset = 0; + + int32_t per_layer_left_shift = 0; + int32_t per_layer_right_shift = 0; + int32_t per_layer_mul = 0; + + Requantize32(int32_t input_offset, int32_t output_offset, + int32_t per_layer_left_shift, int32_t per_layer_right_shift, + int32_t per_layer_mul) + : input_offset(input_offset), output_offset(output_offset), per_layer_left_shift(per_layer_left_shift), per_layer_right_shift(per_layer_right_shift), per_layer_mul(per_layer_mul) + { + } +}; + +template <typename TInput, typename TOutput, class OutputStage = Nothing> +using UniquePoolingCommon = std::unique_ptr<PoolingCommon<TInput, TOutput, OutputStage>>; + +// Get a pooling engine +template <typename TInput, typename TOutput = TInput, class OutputStage = Nothing> +UniquePoolingCommon<TInput, TOutput, OutputStage> pooling(const PoolingArgs &, const OutputStage & = {}); + +} // namespace pooling +} // namespace arm_conv |