From 8fce496a715929372b3c448a233713d87d65f768 Mon Sep 17 00:00:00 2001 From: Giorgio Arena Date: Wed, 1 Sep 2021 14:05:00 +0100 Subject: Remove padding from ClPool2dKernel NCHW - Simplify NCHW kernel structure by removing old optimized paths - Merge quantized with fp kernels Resolve COMPMID-4722 Signed-off-by: Giorgio Arena Change-Id: I79016b119619aed6a6193295601cd6517f14b88c Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/6183 Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins Reviewed-by: Gian Marco Iodice --- src/gpu/cl/ClKernelLibrary.cpp | 16 +-- src/gpu/cl/kernels/ClPool2dKernel.cpp | 255 +++++++--------------------------- src/gpu/cl/kernels/ClPool2dKernel.h | 2 - src/gpu/cl/operators/ClPool2d.cpp | 52 +------ src/gpu/cl/operators/ClPool2d.h | 8 -- 5 files changed, 56 insertions(+), 277 deletions(-) (limited to 'src/gpu/cl') diff --git a/src/gpu/cl/ClKernelLibrary.cpp b/src/gpu/cl/ClKernelLibrary.cpp index 5cd969e7f2..c05bb96753 100644 --- a/src/gpu/cl/ClKernelLibrary.cpp +++ b/src/gpu/cl/ClKernelLibrary.cpp @@ -328,10 +328,6 @@ const std::map ClKernelLibrary::_kernel_program_map = { "pixelwise_mul_float", "common/pixelwise_mul_float.cl" }, { "pixelwise_mul_int", "common/pixelwise_mul_int.cl" }, { "pixelwise_mul_quantized", "common/pixelwise_mul_int.cl" }, - { "pooling_layer_2", "common/pooling_layer.cl" }, - { "pooling_layer_3", "common/pooling_layer.cl" }, - { "pooling_layer_optimized_3", "common/pooling_layer.cl" }, - { "pooling_layer_7", "common/pooling_layer.cl" }, { "qlstm_layer_normalization", "common/qlstm_layer_normalization.cl" }, { "quantization_layer", "common/quantization_layer.cl" }, { "range", "common/range.cl" }, @@ -385,9 +381,7 @@ const std::map ClKernelLibrary::_kernel_program_map = { "normalize_planar_yuv_layer_nchw", "nchw/normalize_planar_yuv_layer.cl" }, { "normalize_planar_yuv_layer_q8_nchw", "nchw/normalize_planar_yuv_layer_quantized.cl" }, { "pooling_layer_MxN_nchw", "nchw/pooling_layer.cl" }, - { "pooling_layer_2_nchw_indices_fp32", "nchw/pooling_layer.cl" }, - { "pooling_layer_2_nchw_indices_fp16", "nchw/pooling_layer.cl" }, - { "pooling_layer_MxN_quantized_nchw", "nchw/pooling_layer_quantized.cl" }, + { "pooling_layer_2_nchw_indices", "nchw/pooling_layer.cl" }, { "prior_box_layer_nchw", "nchw/prior_box_layer.cl" }, { "remap_nearest_neighbour_nchw", "nchw/remap.cl" }, { "remap_bilinear_nchw", "nchw/remap.cl" }, @@ -666,10 +660,6 @@ const std::map ClKernelLibrary::_program_source_map = { "common/pixelwise_mul_int.cl", #include "./cl_kernels/common/pixelwise_mul_int.clembed" - }, - { - "common/pooling_layer.cl", -#include "./cl_kernels/common/pooling_layer.clembed" }, { "common/qlstm_layer_normalization.cl", @@ -803,10 +793,6 @@ const std::map ClKernelLibrary::_program_source_map = { "nchw/pooling_layer.cl", #include "./cl_kernels/nchw/pooling_layer.clembed" - }, - { - "nchw/pooling_layer_quantized.cl", -#include "./cl_kernels/nchw/pooling_layer_quantized.clembed" }, { "nchw/prior_box_layer.cl", diff --git a/src/gpu/cl/kernels/ClPool2dKernel.cpp b/src/gpu/cl/kernels/ClPool2dKernel.cpp index 04f2b142bd..5e53799f30 100644 --- a/src/gpu/cl/kernels/ClPool2dKernel.cpp +++ b/src/gpu/cl/kernels/ClPool2dKernel.cpp @@ -23,18 +23,13 @@ */ #include "src/gpu/cl/kernels/ClPool2dKernel.h" -#include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Utils.h" -#include "arm_compute/core/Validate.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "src/core/CL/CLValidate.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/core/helpers/WindowHelpers.h" #include "support/Cast.h" -#include "support/StringSupport.h" namespace arm_compute { @@ -46,19 +41,6 @@ using namespace arm_compute::misc::shape_calculator; namespace { -// Internal window config info -using ClPoolingConfig = std::pair; //num_elems_processed_per_iteration, border_size - -void auto_init(const ITensorInfo *src, ITensorInfo *dst, ITensorInfo *indices, PoolingLayerInfo pool_info) -{ - TensorShape out_shape = compute_pool_shape(*src, pool_info); - auto_init_if_empty(*dst, src->clone()->set_tensor_shape(out_shape)); - if(indices) - { - auto_init_if_empty(*indices, src->clone()->set_tensor_shape(out_shape).set_data_type(DataType::U32)); - } -} - Status validate_arguments(const ITensorInfo *src, const ITensorInfo *dst, const PoolingLayerInfo &pool_info, const ITensorInfo *indices) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst); @@ -104,102 +86,6 @@ Status validate_arguments(const ITensorInfo *src, const ITensorInfo *dst, const return Status{}; } - -std::tuple validate_and_configure_window(ITensorInfo *src, ITensorInfo *dst, const PoolingLayerInfo &pool_info, ITensorInfo *indices = nullptr) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst); - - // Get data layout - const DataLayout data_layout = pool_info.data_layout == DataLayout::UNKNOWN ? src->data_layout() : pool_info.data_layout; - const int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); - const int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); - - int pool_stride_x = 0; - int pool_stride_y = 0; - unsigned int pooled_w = 0; - unsigned int pooled_h = 0; - int pool_size_x = pool_info.is_global_pooling ? src->dimension(idx_width) : pool_info.pool_size.width; - int pool_size_y = pool_info.is_global_pooling ? src->dimension(idx_height) : pool_info.pool_size.height; - const PadStrideInfo pad_stride_info = pool_info.pad_stride_info; - std::tie(pool_stride_x, pool_stride_y) = pad_stride_info.stride(); - const int pool_pad_right = pad_stride_info.pad_right(); - const int pool_pad_top = pad_stride_info.pad_top(); - const int pool_pad_left = pad_stride_info.pad_left(); - const int pool_pad_bottom = pad_stride_info.pad_bottom(); - BorderSize border_size = BorderSize(); - - auto_init(src, dst, indices, pool_info); - pooled_w = dst->tensor_shape()[idx_width]; - pooled_h = dst->tensor_shape()[idx_height]; - - const DataType data_type = src->data_type(); - - const int src_width = src->dimension(idx_width); - const int src_height = src->dimension(idx_height); - - unsigned int num_elems_processed_per_iteration = 0; - bool window_changed = false; - Window win{}; - switch(data_layout) - { - case DataLayout::NCHW: - { - // Initialize border size - border_size = BorderSize(pool_pad_top, pool_pad_right, pool_pad_bottom, pool_pad_left); - // Change the number of elements processed per iteration - // for pooling 3x3 with stride less equal than 3 - const bool can_optimize = (pool_size_x == 3) && (pool_size_y == 3) && (pool_stride_x <= 3) && !is_data_type_quantized(data_type); - num_elems_processed_per_iteration = can_optimize ? 4 : 1; - const unsigned int num_elems_read_per_iteration = (num_elems_processed_per_iteration - 1) * pool_stride_x + pool_size_x; - - // Number of iterations in X dimension - const int num_iterations_x = (pooled_w + num_elems_processed_per_iteration - 1) / num_elems_processed_per_iteration; - - // Upper limit for the number of right/bottom border elements that are accessed - const int upper_bound_w = ((num_iterations_x - 1) * num_elems_processed_per_iteration * pool_stride_x - pool_pad_left + num_elems_read_per_iteration) - src_width; - const int upper_bound_h = ((pooled_h - 1) * pool_stride_y - pool_pad_top + pool_size_y) - src_height; - - border_size.right = std::max(upper_bound_w, pool_pad_right); - border_size.bottom = std::max(upper_bound_h, pool_pad_bottom); - - win = calculate_max_window(*dst, Steps(num_elems_processed_per_iteration)); - - AccessWindowRectangle src_access(src, -pool_pad_left, -pool_pad_top, num_elems_read_per_iteration, pool_size_y, - pool_stride_x, pool_stride_y); - AccessWindowHorizontal dst_access(dst, 0, num_elems_processed_per_iteration); - - // Update indices window - if(indices) - { - AccessWindowHorizontal indices_access(indices, 0, num_elems_processed_per_iteration); - window_changed = update_window_and_padding(win, src_access, dst_access, indices_access); - indices_access.set_valid_region(win, ValidRegion(Coordinates(), indices->tensor_shape())); - } - else - { - window_changed = update_window_and_padding(win, src_access, dst_access); - } - - dst_access.set_valid_region(win, ValidRegion(Coordinates(), dst->tensor_shape())); - break; - } - case DataLayout::NHWC: - { - const size_t vec_size = dst->data_type() == DataType::F32 ? 2 : 4; - - // Initialize border size - border_size = BorderSize(); - num_elems_processed_per_iteration = adjust_vec_size(vec_size, dst->dimension(0)); - win = calculate_max_window(*dst, Steps(num_elems_processed_per_iteration)); - break; - } - default: - ARM_COMPUTE_ERROR("Not implemented"); - } - - Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{}; - return std::make_tuple(err, win, ClPoolingConfig(num_elems_processed_per_iteration, border_size)); -} } // namespace ClPool2dKernel::ClPool2dKernel() @@ -207,20 +93,27 @@ ClPool2dKernel::ClPool2dKernel() _type = CLKernelType::POOL; } -BorderSize ClPool2dKernel::border_size() const -{ - return _border_size; -} - void ClPool2dKernel::configure(const ClCompileContext &compile_context, ITensorInfo *src, ITensorInfo *dst, const PoolingLayerInfo &pool_info, ITensorInfo *indices) { ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst); + ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(src, dst, pool_info, indices)); auto padding_info = get_padding_info({ src, dst, indices }); + // Auto init if empty + TensorShape out_shape = compute_pool_shape(*src, pool_info); + auto_init_if_empty(*dst, src->clone()->set_tensor_shape(out_shape)); + if(indices) + { + auto_init_if_empty(*indices, src->clone()->set_tensor_shape(out_shape).set_data_type(DataType::U32)); + } + // Set instance variables - _pool_info = pool_info; - _data_layout = pool_info.data_layout == DataLayout::UNKNOWN ? src->data_layout() : pool_info.data_layout; + _pool_info = pool_info; + _data_layout = pool_info.data_layout == DataLayout::UNKNOWN ? src->data_layout() : pool_info.data_layout; + _num_elems_processed_per_iteration = (_data_layout == DataLayout::NCHW) ? 1 : ((dst->data_type() == DataType::F32) ? 2 : 4); + _num_elems_processed_per_iteration = adjust_vec_size(_num_elems_processed_per_iteration, dst->dimension(0)); + int pool_stride_x = 0; int pool_stride_y = 0; const PoolingType pool_type = pool_info.pool_type; @@ -233,61 +126,47 @@ void ClPool2dKernel::configure(const ClCompileContext &compile_context, ITensorI const PadStrideInfo pad_stride_info = pool_info.pad_stride_info; const bool exclude_padding = pool_info.exclude_padding; std::tie(pool_stride_x, pool_stride_y) = pad_stride_info.stride(); - const int pool_pad_top = pad_stride_info.pad_top(); - const int pool_pad_left = pad_stride_info.pad_left(); + const int pool_pad_top = pad_stride_info.pad_top(); + const int pool_pad_left = pad_stride_info.pad_left(); + const DataType data_type = src->data_type(); // Set build options CLBuildOptions build_opts; - const DataType data_type = src->data_type(); - - // Configure kernel window - auto win_config = validate_and_configure_window(src, dst, pool_info, indices); - - ARM_COMPUTE_ERROR_THROW_ON(std::get<0>(win_config)); - ICLKernel::configure_internal(std::get<1>(win_config)); - - ClPoolingConfig pooling_config = std::get<2>(win_config); - _num_elems_processed_per_iteration = pooling_config.first; - _border_size = pooling_config.second; - build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(_num_elems_processed_per_iteration)); + build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(data_type)); + build_opts.add_option("-DPOOL_" + string_from_pooling_type(pool_type)); + build_opts.add_option("-DSTRIDE_X=" + support::cpp11::to_string(pool_stride_x)); + build_opts.add_option("-DSTRIDE_Y=" + support::cpp11::to_string(pool_stride_y)); + build_opts.add_option("-DPAD_X=" + support::cpp11::to_string(pool_pad_left)); + build_opts.add_option("-DPAD_Y=" + support::cpp11::to_string(pool_pad_top)); + build_opts.add_option("-DPOOL_SIZE_X=" + support::cpp11::to_string(pool_size_x)); + build_opts.add_option("-DPOOL_SIZE_Y=" + support::cpp11::to_string(pool_size_y)); + build_opts.add_option("-DSRC_WIDTH=" + support::cpp11::to_string(src->dimension(idx_width))); + build_opts.add_option("-DSRC_HEIGHT=" + support::cpp11::to_string(src->dimension(idx_height))); + build_opts.add_option("-DMAX_WIDTH=" + support::cpp11::to_string(src->dimension(idx_width) + (exclude_padding ? 0 : pool_pad_left))); + build_opts.add_option("-DMAX_HEIGHT=" + support::cpp11::to_string(src->dimension(idx_height) + (exclude_padding ? 0 : pool_pad_top))); // Tensor paddings are used to calculate the indicies for MAX pooling if(pool_info.pool_size == Size2D(2, 2) && pool_type == PoolingType::MAX && indices && is_data_type_float(data_type)) { - build_opts.add_option("-DPAD_TENSOR_LEFT=" + support::cpp11::to_string(src->padding().left)); - build_opts.add_option("-DPAD_TENSOR_RIGHT=" + support::cpp11::to_string(src->padding().right)); - build_opts.add_option("-DPAD_TENSOR_TOP=" + support::cpp11::to_string(src->padding().top)); - build_opts.add_option("-DPAD_TENSOR_BOTTOM=" + support::cpp11::to_string(src->padding().bottom)); - build_opts.add_option("-DTENSOR_CHANNEL=" + support::cpp11::to_string(src->dimension(idx_channel))); - build_opts.add_option("-DTENSOR_WIDTH=" + support::cpp11::to_string(src->dimension(idx_width))); - build_opts.add_option("-DTENSOR_HEIGHT=" + support::cpp11::to_string(src->dimension(idx_height))); + build_opts.add_option("-DSRC_BATCH=" + support::cpp11::to_string(src->tensor_shape().total_size_lower(3))); } - if(is_data_type_quantized_asymmetric(data_type) && src->quantization_info() != dst->quantization_info()) + if(is_data_type_quantized_asymmetric(data_type)) { - const UniformQuantizationInfo iq_info = src->quantization_info().uniform(); - const UniformQuantizationInfo oq_info = dst->quantization_info().uniform(); - - build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(iq_info.offset)); - build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(oq_info.offset)); - build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(iq_info.scale)); - build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(oq_info.scale)); - } - - // Check dst dimensions - auto_init(src, dst, indices, pool_info); + build_opts.add_option("-DQUANTIZED"); - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(src, dst, pool_info, indices)); + if(src->quantization_info() != dst->quantization_info()) + { + const UniformQuantizationInfo iq_info = src->quantization_info().uniform(); + const UniformQuantizationInfo oq_info = dst->quantization_info().uniform(); - build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(data_type)); - build_opts.add_option("-DPOOL_" + string_from_pooling_type(pool_type)); - build_opts.add_option("-DSTRIDE_X=" + support::cpp11::to_string(pool_stride_x)); - build_opts.add_option("-DSTRIDE_Y=" + support::cpp11::to_string(pool_stride_y)); - build_opts.add_option("-DPAD_X=" + support::cpp11::to_string(pool_pad_left)); - build_opts.add_option("-DPAD_Y=" + support::cpp11::to_string(pool_pad_top)); - build_opts.add_option("-DPOOL_SIZE_X=" + support::cpp11::to_string(pool_size_x)); - build_opts.add_option("-DPOOL_SIZE_Y=" + support::cpp11::to_string(pool_size_y)); + build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(iq_info.offset)); + build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(oq_info.offset)); + build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(iq_info.scale)); + build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(oq_info.scale)); + } + } // Set the initial value for the pooling operation accordingly with the data type if(pool_type == PoolingType::MAX) @@ -309,9 +188,6 @@ void ClPool2dKernel::configure(const ClCompileContext &compile_context, ITensorI build_opts.add_option("-DINITIAL_VALUE=0"); } - build_opts.add_option("-DMAX_WIDTH=" + support::cpp11::to_string(src->dimension(idx_width) + (exclude_padding ? 0 : pool_pad_left))); - build_opts.add_option("-DMAX_HEIGHT=" + support::cpp11::to_string(src->dimension(idx_height) + (exclude_padding ? 0 : pool_pad_top))); - // Create kernel switch(_data_layout) { @@ -319,7 +195,7 @@ void ClPool2dKernel::configure(const ClCompileContext &compile_context, ITensorI { const auto use_fp_mixed_precision = (data_type == DataType::F16) && pool_info.fp_mixed_precision; const auto use_wider_accumulator = use_fp_mixed_precision && (pool_type != PoolingType::MAX); - const auto acc_data_type = get_cl_type_from_data_type(use_wider_accumulator ? DataType::F32 : data_type); + const auto acc_data_type = get_cl_type_from_data_type(use_wider_accumulator ? DataType::F32 : (is_data_type_quantized(data_type) ? DataType::S32 : data_type)); build_opts.add_option("-DACC_DATA_TYPE=" + acc_data_type); build_opts.add_option_if(use_wider_accumulator, "-DFP_MIXED_PRECISION"); @@ -328,33 +204,15 @@ void ClPool2dKernel::configure(const ClCompileContext &compile_context, ITensorI build_opts.add_option_if(exclude_padding, "-DEXCLUDE_PADDING"); } - if((pool_size_x == 3) && (pool_size_y == 3) && !is_data_type_quantized_asymmetric(data_type)) - { - // Check if we have pool3x3 with stride_x less equal than 3. In these cases, run an optimized OpenCL kernel where - // each thread computes 4 dst elements - const bool is_pool3x3_stride_le3 = (pool_size_x == 3) && (pool_size_y == 3) && (pool_stride_x <= 3); - - std::string kernel_name = ((is_pool3x3_stride_le3) ? "pooling_layer_optimized_" : "pooling_layer_") - + support::cpp11::to_string(pool_size_x); - _kernel = create_kernel(compile_context, kernel_name, build_opts.options()); - } - else if(pool_info.pool_size == Size2D(2, 2) && pool_type == PoolingType::MAX && indices && is_data_type_float(data_type)) + if(pool_info.pool_size == Size2D(2, 2) && pool_type == PoolingType::MAX && indices && is_data_type_float(data_type)) { // For max pooling with pool2x2, store indicies which will be used in max unpooling - if(data_type == DataType::F32) - { - std::string kernel_name = "pooling_layer_2_nchw_indices_fp32"; - _kernel = create_kernel(compile_context, kernel_name, build_opts.options()); - } - else if(data_type == DataType::F16) - { - std::string kernel_name = "pooling_layer_2_nchw_indices_fp16"; - _kernel = create_kernel(compile_context, kernel_name, build_opts.options()); - } + std::string kernel_name = "pooling_layer_2_nchw_indices"; + _kernel = create_kernel(compile_context, kernel_name, build_opts.options()); } else // Run general case { - std::string kernel_name = is_data_type_quantized_asymmetric(data_type) ? "pooling_layer_MxN_quantized_nchw" : "pooling_layer_MxN_nchw"; + std::string kernel_name = "pooling_layer_MxN_nchw"; _kernel = create_kernel(compile_context, kernel_name, build_opts.options()); } break; @@ -405,6 +263,10 @@ void ClPool2dKernel::configure(const ClCompileContext &compile_context, ITensorI ARM_COMPUTE_ERROR("Not implemented"); } + // Configure kernel window + Window win = calculate_max_window(*dst, Steps(_num_elems_processed_per_iteration)); + ICLKernel::configure_internal(win); + // Set config_id for enabling LWS tuning _config_id = "pooling_layer_"; _config_id += lower_string(string_from_data_type(data_type)); @@ -419,14 +281,12 @@ void ClPool2dKernel::configure(const ClCompileContext &compile_context, ITensorI _config_id += "_"; _config_id += lower_string(string_from_data_layout(src->data_layout())); - ARM_COMPUTE_ERROR_ON(src->data_layout() == DataLayout::NHWC && has_padding_changed(padding_info)); + ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info)); } Status ClPool2dKernel::validate(const ITensorInfo *src, const ITensorInfo *dst, const PoolingLayerInfo &pool_info, const ITensorInfo *indices) { ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(src, dst, pool_info, indices)); - ARM_COMPUTE_RETURN_ON_ERROR(std::get<0>(validate_and_configure_window(src->clone().get(), dst->clone().get(), pool_info))); - return Status{}; } @@ -453,18 +313,9 @@ void ClPool2dKernel::run_op(ITensorPack &tensors, const Window &window, cl::Comm Window slice = window_collapsed.first_slice_window_3D(); do { - // Upsample src by pool size - Window in_slice(slice); - in_slice.set(Window::DimX, Window::Dimension(in_slice.x().start() - _pool_info.pad_stride_info.pad_left(), - (in_slice.x().end() - _pool_info.pad_stride_info.pad_left()) * pool_stride_x, - pool_stride_x * _num_elems_processed_per_iteration)); - in_slice.set(Window::DimY, Window::Dimension(in_slice.y().start() - _pool_info.pad_stride_info.pad_top(), - (in_slice.y().end() - _pool_info.pad_stride_info.pad_top()) * pool_stride_y, - pool_stride_y)); - // Set srcs unsigned int idx = 0; - add_3D_tensor_argument(idx, src, in_slice); + add_3D_tensor_argument(idx, src, slice); add_3D_tensor_argument(idx, dst, slice); if(indices && is_data_type_float(src->info()->data_type()) && (_pool_info.pool_size == Size2D(2, 2))) { diff --git a/src/gpu/cl/kernels/ClPool2dKernel.h b/src/gpu/cl/kernels/ClPool2dKernel.h index 61d204dc68..f5bb0687e8 100644 --- a/src/gpu/cl/kernels/ClPool2dKernel.h +++ b/src/gpu/cl/kernels/ClPool2dKernel.h @@ -61,12 +61,10 @@ public: // Inherited methods overridden: void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override; - BorderSize border_size() const override; public: PoolingLayerInfo _pool_info{}; DataLayout _data_layout{ DataLayout::UNKNOWN }; - BorderSize _border_size{ 0 }; unsigned int _num_elems_processed_per_iteration{ 1 }; }; } // namespace kernels diff --git a/src/gpu/cl/operators/ClPool2d.cpp b/src/gpu/cl/operators/ClPool2d.cpp index fdadd199fc..a5b18a2340 100644 --- a/src/gpu/cl/operators/ClPool2d.cpp +++ b/src/gpu/cl/operators/ClPool2d.cpp @@ -25,7 +25,6 @@ #include "arm_compute/runtime/CL/CLScheduler.h" -#include "src/core/CL/kernels/CLFillBorderKernel.h" #include "src/gpu/cl/ClCompileContext.h" #include "src/gpu/cl/kernels/ClPool2dKernel.h" @@ -40,62 +39,15 @@ void ClPool2d::configure(const ClCompileContext &compile_context, ITensorInfo *s auto k = std::make_unique(); k->set_target(CLScheduler::get().target()); k->configure(compile_context, src, dst, info, indices); - _pooling = std::move(k); - - const DataType data_type = src->data_type(); - - // Configure border depending on operation required (quantize border in case of asymmetric data_type) - BorderMode border_mode{}; - PixelValue pixel_value(0.f); - if(is_data_type_quantized_asymmetric(data_type) && !info.exclude_padding) - { - pixel_value = PixelValue(0, data_type, src->quantization_info()); - } - - // Data layout - const auto data_layout = info.data_layout == DataLayout::UNKNOWN ? src->data_layout() : info.data_layout; - - switch(data_layout) - { - case DataLayout::NCHW: - border_mode = (PoolingType::MAX == info.pool_type) ? BorderMode::REPLICATE : BorderMode::CONSTANT; - break; - case DataLayout::NHWC: - border_mode = BorderMode::CONSTANT; - if(PoolingType::MAX == info.pool_type) - { - if(is_data_type_quantized(data_type)) - { - std::tie(pixel_value, std::ignore) = get_min_max(data_type); - } - else - { - pixel_value = PixelValue(std::numeric_limits::lowest()); - } - } - break; - default: - ARM_COMPUTE_ERROR("Data layout not supported"); - } - auto b = std::make_unique(); - b->configure(compile_context, src, _pooling->border_size(), border_mode, pixel_value); - _border_handler = std::move(b); + _kernel = std::move(k); // Tune kernels - CLScheduler::get().tune_kernel_static(*_pooling); + CLScheduler::get().tune_kernel_static(*_kernel); } Status ClPool2d::validate(const ITensorInfo *src, const ITensorInfo *dst, const PoolingLayerInfo &info, const ITensorInfo *indices) { return kernels::ClPool2dKernel::validate(src, dst, info, indices); } - -void ClPool2d::run(ITensorPack &tensors) -{ - ARM_COMPUTE_ERROR_ON_MSG(tensors.empty(), "No inputs provided"); - - CLScheduler::get().enqueue_op(*_border_handler.get(), tensors, false); - CLScheduler::get().enqueue_op(*_pooling.get(), tensors, false); -} } // namespace opencl } // namespace arm_compute diff --git a/src/gpu/cl/operators/ClPool2d.h b/src/gpu/cl/operators/ClPool2d.h index a041053bb3..f353ba262e 100644 --- a/src/gpu/cl/operators/ClPool2d.h +++ b/src/gpu/cl/operators/ClPool2d.h @@ -35,7 +35,6 @@ namespace opencl { /** Basic function to simulate a pooling layer with the specified pooling operation. This function calls the following OpenCL kernels: * - * -# @ref CLFillBorderKernel (executed if padding size is different from zero) * -# @ref opencl::ClPool2d */ class ClPool2d : public IClOperator @@ -59,13 +58,6 @@ public: * @return a status */ static Status validate(const ITensorInfo *src, const ITensorInfo *dst, const PoolingLayerInfo &info, const ITensorInfo *indices = nullptr); - - // Inherited method overridden - void run(ITensorPack &tensors) override; - -private: - std::unique_ptr _pooling{ nullptr }; - std::unique_ptr _border_handler{ nullptr }; }; } // namespace opencl } // namespace arm_compute -- cgit v1.2.1