/* * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include "src/core/cpu/kernels/CpuPoolingKernel.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "src/core/AccessWindowStatic.h" #include "src/core/CPP/Validate.h" #include "src/core/NEON/NEAsymm.h" #include "src/core/NEON/NEFixedPoint.h" #include "src/core/NEON/NEMath.h" #include "src/core/common/Registrars.h" #include "src/core/cpu/kernels/pooling/neon/list.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/core/helpers/WindowHelpers.h" #include "support/ToolchainSupport.h" #include "src/core/NEON/wrapper/wrapper.h" #include namespace arm_compute { namespace cpu { namespace kernels { namespace { using namespace misc::shape_calculator; struct PoolingSelectorData { DataType dt; DataLayout dl; int pool_stride_x; Size2D pool_size; }; using PoolingSelectorPtr = std::add_pointer::type; using PoolingKernelPtr = std::add_pointer::type; struct PoolingKernel { const char *name; const PoolingSelectorPtr is_selected; PoolingKernelPtr ukernel; }; static const PoolingKernel available_kernels[] = { { "poolingMxN_qasymm8_neon_nhwc", [](const PoolingSelectorData & data) { return ((data.dl == DataLayout::NHWC) && (data.dt == DataType::QASYMM8)); }, REGISTER_QASYMM8_NEON(arm_compute::cpu::poolingMxN_qasymm8_neon_nhwc) }, { "poolingMxN_qasymm8_signed_neon_nhwc", [](const PoolingSelectorData & data) { return ((data.dl == DataLayout::NHWC) && (data.dt == DataType::QASYMM8_SIGNED)); }, REGISTER_QASYMM8_SIGNED_NEON(arm_compute::cpu::poolingMxN_qasymm8_signed_neon_nhwc) }, #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) { "poolingMxN_fp16_neon_nhwc", [](const PoolingSelectorData & data) { return ((data.dl == DataLayout::NHWC) && (data.dt == DataType::F16)); }, REGISTER_FP16_NEON(arm_compute::cpu::poolingMxN_fp16_neon_nhwc) }, #endif /* defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) */ { "poolingMxN_fp32_neon_nhwc", [](const PoolingSelectorData & data) { return ((data.dl == DataLayout::NHWC) && (data.dt == DataType::F32)); }, REGISTER_FP32_NEON(arm_compute::cpu::poolingMxN_fp32_neon_nhwc) }, #if defined(ENABLE_NCHW_KERNELS) { "pooling2_qasymm8_neon_nchw", [](const PoolingSelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::QASYMM8) && (data.pool_size.x() == data.pool_size.y()) && (data.pool_size.x() == 2) && (data.pool_stride_x < 3)); }, REGISTER_QASYMM8_NEON(arm_compute::cpu::pooling2_quantized_neon_nchw) }, { "pooling3_qasymm8_neon_nchw", [](const PoolingSelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::QASYMM8) && (data.pool_size.x() == data.pool_size.y()) && (data.pool_size.x() == 3) && (data.pool_stride_x < 3)); }, REGISTER_QASYMM8_NEON(arm_compute::cpu::pooling3_quantized_neon_nchw) }, { "poolingMxN_qasymm8_neon_nchw", [](const PoolingSelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::QASYMM8)); }, REGISTER_QASYMM8_NEON(arm_compute::cpu::poolingMxN_quantized_neon_nchw) }, { "pooling2_qasymm8_signed_neon_nchw", [](const PoolingSelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::QASYMM8_SIGNED) && (data.pool_size.x() == data.pool_size.y()) && (data.pool_size.x() == 2) && (data.pool_stride_x < 3)); }, REGISTER_QASYMM8_SIGNED_NEON(arm_compute::cpu::pooling2_quantized_neon_nchw) }, { "pooling3_qasymm8_signed_neon_nchw", [](const PoolingSelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::QASYMM8_SIGNED) && (data.pool_size.x() == data.pool_size.y()) && (data.pool_size.x() == 3) && (data.pool_stride_x < 3)); }, REGISTER_QASYMM8_SIGNED_NEON(arm_compute::cpu::pooling3_quantized_neon_nchw) }, { "poolingMxN_qasymm8_signed_neon_nchw", [](const PoolingSelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::QASYMM8_SIGNED)); }, REGISTER_QASYMM8_SIGNED_NEON(arm_compute::cpu::poolingMxN_quantized_neon_nchw) }, #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) { "pooling2_fp16_neon_nchw", [](const PoolingSelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::F16) && (data.pool_size.x() == data.pool_size.y()) && (data.pool_size.x() == 2)); }, REGISTER_FP16_NEON(arm_compute::cpu::pooling2_fp16_neon_nchw) }, { "pooling3_fp16_neon_nchw", [](const PoolingSelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::F16) && (data.pool_size.x() == data.pool_size.y()) && (data.pool_size.x() == 3)); }, REGISTER_FP16_NEON(arm_compute::cpu::pooling3_fp16_neon_nchw) }, { "poolingMxN_fp16_neon_nchw", [](const PoolingSelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::F16)); }, REGISTER_FP16_NEON(arm_compute::cpu::poolingMxN_fp16_neon_nchw) }, #endif /* defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) */ { "pooling2_fp32_neon_nchw", [](const PoolingSelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::F32) && (data.pool_size.x() == data.pool_size.y()) && (data.pool_size.x() == 2)); }, REGISTER_FP32_NEON(arm_compute::cpu::pooling2_fp32_neon_nchw) }, { "pooling3_fp32_neon_nchw", [](const PoolingSelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::F32) && (data.pool_size.x() == data.pool_size.y()) && (data.pool_size.x() == 3)); }, REGISTER_FP32_NEON(arm_compute::cpu::pooling3_fp32_neon_nchw) }, { "pooling7_fp32_neon_nchw", [](const PoolingSelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::F32) && (data.pool_size.x() == data.pool_size.y()) && (data.pool_size.x() == 7)); }, REGISTER_FP32_NEON(arm_compute::cpu::pooling7_fp32_neon_nchw) }, { "poolingMxN_fp32_neon_nchw", [](const PoolingSelectorData & data) { return ((data.dl == DataLayout::NCHW) && (data.dt == DataType::F32)); }, REGISTER_FP32_NEON(arm_compute::cpu::poolingMxN_fp32_neon_nchw) }, #endif /* defined(ENABLE_NCHW_KERNELS) */ }; /** Micro-kernel selector * * @param[in] data Selection data passed to help pick the appropriate micro-kernel * * @return A matching micro-kernel else nullptr */ const PoolingKernel *get_implementation(DataType dt, DataLayout dl, int pool_stride_x, Size2D pool_size) { for(const auto &uk : available_kernels) { if(uk.is_selected({ dt, dl, pool_stride_x, pool_size })) { return &uk; } } return nullptr; } Status validate_arguments(const ITensorInfo *src, const ITensorInfo *dst, const PoolingLayerInfo &pool_info, unsigned int &pooled_w, unsigned int pooled_h, const ITensorInfo *indices, Size2D pool_size) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst); int pool_stride_x = 0; int pool_stride_y = 0; PoolingType pool_type = pool_info.pool_type; const PadStrideInfo pad_stride_info = pool_info.pad_stride_info; std::tie(pool_stride_x, pool_stride_y) = pad_stride_info.stride(); ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(src); if(indices) { ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::F32, DataType::F16); ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(indices, 1, DataType::U32); ARM_COMPUTE_RETURN_ERROR_ON_MSG(pool_type != PoolingType::MAX, "Pooling indices only supported for MAX pooling method"); } ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::F16, DataType::F32); ARM_COMPUTE_RETURN_ERROR_ON(pool_type == PoolingType::L2 && is_data_type_quantized(src->data_type())); ARM_COMPUTE_RETURN_ERROR_ON_MSG(is_data_type_quantized(src->data_type()) && !pool_info.exclude_padding && (pool_info.pool_type == PoolingType::AVG) && pool_info.pad_stride_info.has_padding() && (src->data_layout() == DataLayout::NHWC), "exclude_padding equal false is not supported for AVG Pooling with padding on quantized types"); if(dst->total_size() != 0) { ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, dst); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(src, dst); ARM_COMPUTE_RETURN_ERROR_ON((dst->dimension(get_data_layout_dimension_index(src->data_layout(), DataLayoutDimension::WIDTH)) != pooled_w) || (dst->dimension(get_data_layout_dimension_index(src->data_layout(), DataLayoutDimension::HEIGHT)) != pooled_h)); if(indices) { ARM_COMPUTE_RETURN_ERROR_ON_MSG((pool_size != Size2D(2, 2)), "Pooling indices only supported for pool size 2x2"); ARM_COMPUTE_RETURN_ERROR_ON((indices->dimension(get_data_layout_dimension_index(indices->data_layout(), DataLayoutDimension::WIDTH)) != pooled_w) || (indices->dimension(get_data_layout_dimension_index(indices->data_layout(), DataLayoutDimension::HEIGHT)) != pooled_h)); } } const auto *uk = get_implementation(src->data_type(), src->data_layout(), pool_stride_x, pool_size); ARM_COMPUTE_RETURN_ERROR_ON(uk == nullptr || uk->ukernel == nullptr); return Status{}; } Status validate_arguments_pool_info(const unsigned int pool_size_x, const unsigned int pool_size_y) { ARM_COMPUTE_RETURN_ERROR_ON(pool_size_x == 0); ARM_COMPUTE_RETURN_ERROR_ON(pool_size_y == 0); return Status{}; } std::pair validate_and_configure_window(ITensorInfo *src, ITensorInfo *dst, ITensorInfo *indices, const PoolingLayerInfo &pool_info, unsigned int &num_elems_processed_per_iteration, BorderSize &border_size, unsigned int pooled_w, unsigned int pooled_h, int pool_size_x, int pool_size_y) { // dst auto inizialitation if not yet initialized auto_init_if_empty(*dst, src->clone()->set_tensor_shape(compute_pool_shape(*src, pool_info))); if(indices) { // Indices auto inizialitation if not yet initialized auto_init_if_empty(*indices, (src->clone()->set_tensor_shape(compute_pool_shape(*src, pool_info))) .set_data_type(DataType::U32) /* we store the offset to the element */); } const auto data_layout = pool_info.data_layout == DataLayout::UNKNOWN ? src->data_layout() : pool_info.data_layout; unsigned int num_elems_read_per_iteration = 0; unsigned int num_elems_horizontal_window = 0; int pool_stride_x = 0; int pool_stride_y = 0; const int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); const int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); const int src_width = src->dimension(idx_width); const int src_height = src->dimension(idx_height); const PadStrideInfo pad_stride_info = pool_info.pad_stride_info; std::tie(pool_stride_x, pool_stride_y) = pad_stride_info.stride(); const int pool_pad_right = pad_stride_info.pad_right(); const int pool_pad_top = pad_stride_info.pad_top(); const int pool_pad_left = pad_stride_info.pad_left(); const int pool_pad_bottom = pad_stride_info.pad_bottom(); const bool is_square = pool_size_x == pool_size_y; // Check dst dimensions std::tie(pooled_w, pooled_h) = scaled_dimensions(src->dimension(idx_width), src->dimension(idx_height), pool_size_x, pool_size_y, pad_stride_info); //If it's not squared and optimized will be executed the MxN num_elems_read_per_iteration = 1; num_elems_processed_per_iteration = 1; num_elems_horizontal_window = 1; if(is_square) { switch(src->data_type()) { case DataType::QASYMM8: case DataType::QASYMM8_SIGNED: switch(pool_size_x) { case 2: num_elems_read_per_iteration = 16; num_elems_processed_per_iteration = (pool_stride_x == 2) ? 8 : 15; num_elems_horizontal_window = (pool_stride_x == 2) ? 8 : 16; break; case 3: num_elems_read_per_iteration = 16; num_elems_processed_per_iteration = (pool_stride_x == 2) ? 7 : 14; num_elems_horizontal_window = (pool_stride_x == 2) ? 8 : 16; break; default: break; } break; #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC case DataType::F16: switch(pool_size_x) { case 2: case 3: num_elems_read_per_iteration = 4; num_elems_processed_per_iteration = 1; num_elems_horizontal_window = 1; break; default: break; } break; #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ case DataType::F32: switch(pool_size_x) { case 2: num_elems_read_per_iteration = 2; break; case 3: num_elems_read_per_iteration = 4; // We use vload4 for pooling3 break; case 7: num_elems_read_per_iteration = 8; // We use vload8 for pooling7 break; default: break; } num_elems_processed_per_iteration = 1; num_elems_horizontal_window = 1; break; default: ARM_COMPUTE_ERROR("Element size not supported"); break; } } bool window_changed = false; Window win{}; if(data_layout == DataLayout::NCHW) { // Number of iterations in X dimension const int num_iterations_x = (pooled_w + num_elems_processed_per_iteration - 1) / num_elems_processed_per_iteration; // Upper limit for the number of right/bottom border elements that are accessed const int upper_bound_w = ((num_iterations_x - 1) * num_elems_processed_per_iteration * pool_stride_x - pool_pad_left + num_elems_read_per_iteration) - src_width; const int upper_bound_h = ((pooled_h - 1) * pool_stride_y - pool_pad_top + pool_size_y) - src_height; border_size = BorderSize(pool_pad_top, pool_pad_right, pool_pad_bottom, pool_pad_left); border_size.right = std::max(upper_bound_w, pool_pad_right); border_size.bottom = std::max(upper_bound_h, pool_pad_bottom); TensorShape dst_shape{ src->tensor_shape() }; dst_shape.set(0, pooled_w); dst_shape.set(1, pooled_h); TensorInfo dst_info(src->clone()->set_tensor_shape(dst_shape)); win = calculate_max_window(dst_info, Steps(num_elems_processed_per_iteration)); AccessWindowStatic src_access(src, -pool_pad_left, -pool_pad_top, ceil_to_multiple(src_width + border_size.right, pool_size_x), src_height + border_size.bottom); AccessWindowHorizontal dst_access(dst, 0, num_elems_horizontal_window); if(indices) { AccessWindowHorizontal indices_access(indices, 0, num_elems_horizontal_window); window_changed = update_window_and_padding(win, src_access, dst_access, indices_access); } else { window_changed = update_window_and_padding(win, src_access, dst_access); } dst_access.set_valid_region(win, ValidRegion(Coordinates(), dst->tensor_shape())); border_size = src->padding(); } Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{}; return std::make_pair(err, win); } } // namespace BorderSize CpuPoolingKernel::border_size() const { return _border_size; } void CpuPoolingKernel::configure(ITensorInfo *src, ITensorInfo *dst, const PoolingLayerInfo &pool_info, ITensorInfo *indices) { ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst); const PadStrideInfo pad_stride_info = pool_info.pad_stride_info; const bool is_global_pooling = pool_info.is_global_pooling; // Get data layout const auto data_layout = pool_info.data_layout == DataLayout::UNKNOWN ? src->data_layout() : pool_info.data_layout; const int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); const int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); // Update pool size in case of global pooling const Size2D pool_size( is_global_pooling ? src->dimension(idx_width) : pool_info.pool_size.width, is_global_pooling ? src->dimension(idx_height) : pool_info.pool_size.height); // Validate pool info before calling scaled_dimensions ARM_COMPUTE_ERROR_THROW_ON(validate_arguments_pool_info(pool_size.x(), pool_size.y())); // Check dst dimensions unsigned int pooled_w; unsigned int pooled_h; std::tie(pooled_w, pooled_h) = scaled_dimensions(src->dimension(idx_width), src->dimension(idx_height), pool_size.x(), pool_size.y(), pad_stride_info); // Perform validation step ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(src, dst, pool_info, pooled_w, pooled_h, indices, pool_size)); // Set instance variables _pool_info = pool_info; _data_layout = src->data_layout(); _pool_size = pool_size; _pool_stride_x = pad_stride_info.stride().first; if(_data_layout == DataLayout::NHWC) { // Configure kernel window Window win = calculate_max_window(*dst, Steps()); ICpuKernel::configure(win); } else { // Configure kernel window auto win_config = validate_and_configure_window(src, dst, indices, pool_info, _num_elems_processed_per_iteration, _border_size, pooled_w, pooled_h, pool_size.x(), pool_size.y()); ARM_COMPUTE_ERROR_THROW_ON(win_config.first); ICpuKernel::configure(win_config.second); } } Status CpuPoolingKernel::validate(const ITensorInfo *src, const ITensorInfo *dst, const PoolingLayerInfo &pool_info, const ITensorInfo *indices) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src); unsigned int pooled_w = 0; unsigned int pooled_h = 0; unsigned int num_elems_processed_per_iteration = 0; BorderSize border_size(0); const bool is_global_pooling = pool_info.is_global_pooling; unsigned int pool_size_x = 0; unsigned int pool_size_y = 0; // Get data layout const auto data_layout = pool_info.data_layout == DataLayout::UNKNOWN ? src->data_layout() : pool_info.data_layout; const int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); const int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); pool_size_x = is_global_pooling ? src->dimension(idx_width) : pool_info.pool_size.width; pool_size_y = is_global_pooling ? src->dimension(idx_height) : pool_info.pool_size.height; // Validate pool info before calling scaled_dimensions ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments_pool_info(pool_size_x, pool_size_y)); // Check dst dimensions std::tie(pooled_w, pooled_h) = scaled_dimensions(src->dimension(idx_width), src->dimension(idx_height), pool_size_x, pool_size_y, pool_info.pad_stride_info); ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(src, dst, pool_info, pooled_w, pooled_h, indices, Size2D(pool_size_x, pool_size_y))); ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(src->clone().get(), dst->clone().get(), (indices) ? indices->clone().get() : nullptr, pool_info, num_elems_processed_per_iteration, border_size, pooled_w, pooled_h, pool_size_x, pool_size_y) .first); return Status{}; } void CpuPoolingKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) { ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICpuKernel::window(), window); const ITensor *src = tensors.get_const_tensor(TensorType::ACL_SRC_0); ITensor *dst = tensors.get_tensor(TensorType::ACL_DST_0); ITensor *indices = tensors.get_tensor(TensorType::ACL_DST_1); const unsigned int pool_stride_x = _pool_info.pad_stride_info.stride().first; const unsigned int pool_stride_y = _pool_info.pad_stride_info.stride().second; const unsigned int pool_size = _pool_info.pool_size.width; Window window_src(window); if(_data_layout == DataLayout::NCHW) { // Set step for src in x and y direction for the src unsigned int window_x_inc = 0; switch(src->info()->data_type()) { case DataType::QASYMM8: case DataType::QASYMM8_SIGNED: { window_x_inc = pool_stride_x; if((pool_size == 2 || pool_size == 3) && pool_stride_x < 3) { window_x_inc = (pool_stride_x == 2) ? _num_elems_processed_per_iteration * 2 : _num_elems_processed_per_iteration; } break; } case DataType::F16: case DataType::F32: { window_x_inc = pool_stride_x; break; } default: { ARM_COMPUTE_ERROR("Not supported"); } } window_src.set(Window::DimX, Window::Dimension(window.x().start() * pool_stride_x, window.x().end() * pool_stride_x, window_x_inc)); window_src.set(Window::DimY, Window::Dimension(window.y().start() * pool_stride_y, window.y().end() * pool_stride_y, pool_stride_y)); } else { window_src.set(Window::DimX, Window::Dimension(0, 1, 1)); window_src.set(Window::DimY, Window::Dimension(0, src->info()->dimension(1), pool_stride_x)); window_src.set(Window::DimZ, Window::Dimension(0, src->info()->dimension(2), pool_stride_y)); } const auto *uk = get_implementation(src->info()->data_type(), _data_layout, _pool_stride_x, _pool_size); ARM_COMPUTE_ERROR_ON(uk == nullptr || uk->ukernel == nullptr); uk->ukernel(src, dst, indices, _pool_info, window_src, window); } const char *CpuPoolingKernel::name() const { return "CpuPoolingKernel"; } } // namespace kernels } // namespace cpu } // namespace arm_compute